openclaw/src/agents/tools/agent-step.ts
Clawd 7d11b9246b feat(agents): support OpenRouter image generation models
Add support for extracting and saving generated images from OpenRouter
models that return images (like gpt-5-image-mini).

- Extract base64 images from assistant responses via extractAssistantImages()
- Save generated images using standard media store ("generated" subdir)
- Send generated images to users via message channels
- Support both direct agent responses and subagent image generation

This enables use of OpenRouter's image generation models in Moltbot
conversations and subagent tasks.
2026-01-29 12:37:42 -06:00

90 lines
3.0 KiB
TypeScript

import crypto from "node:crypto";
import { callGateway } from "../../gateway/call.js";
import { INTERNAL_MESSAGE_CHANNEL } from "../../utils/message-channel.js";
import { AGENT_LANE_NESTED } from "../lanes.js";
import {
extractAssistantText,
extractAssistantImages,
stripToolMessages,
} from "./sessions-helpers.js";
export interface AssistantReplyContent {
text?: string;
images?: Array<{ mimeType: string; data: string }>;
}
export async function readLatestAssistantReply(params: {
sessionKey: string;
limit?: number;
}): Promise<string | undefined> {
const history = (await callGateway({
method: "chat.history",
params: { sessionKey: params.sessionKey, limit: params.limit ?? 50 },
})) as { messages?: unknown[] };
const filtered = stripToolMessages(Array.isArray(history?.messages) ? history.messages : []);
const last = filtered.length > 0 ? filtered[filtered.length - 1] : undefined;
return last ? extractAssistantText(last) : undefined;
}
/**
* Read the latest assistant reply including both text and images.
* Used for subagent announce flow where images need to be forwarded.
*/
export async function readLatestAssistantReplyWithMedia(params: {
sessionKey: string;
limit?: number;
}): Promise<AssistantReplyContent> {
const history = (await callGateway({
method: "chat.history",
params: { sessionKey: params.sessionKey, limit: params.limit ?? 50 },
})) as { messages?: unknown[] };
const filtered = stripToolMessages(Array.isArray(history?.messages) ? history.messages : []);
const last = filtered.length > 0 ? filtered[filtered.length - 1] : undefined;
if (!last) return {};
const text = extractAssistantText(last);
const images = extractAssistantImages(last);
return {
text: text || undefined,
images: images.length > 0 ? images : undefined,
};
}
export async function runAgentStep(params: {
sessionKey: string;
message: string;
extraSystemPrompt: string;
timeoutMs: number;
channel?: string;
lane?: string;
}): Promise<string | undefined> {
const stepIdem = crypto.randomUUID();
const response = (await callGateway({
method: "agent",
params: {
message: params.message,
sessionKey: params.sessionKey,
idempotencyKey: stepIdem,
deliver: false,
channel: params.channel ?? INTERNAL_MESSAGE_CHANNEL,
lane: params.lane ?? AGENT_LANE_NESTED,
extraSystemPrompt: params.extraSystemPrompt,
},
timeoutMs: 10_000,
})) as { runId?: string; acceptedAt?: number };
const stepRunId = typeof response?.runId === "string" && response.runId ? response.runId : "";
const resolvedRunId = stepRunId || stepIdem;
const stepWaitMs = Math.min(params.timeoutMs, 60_000);
const wait = (await callGateway({
method: "agent.wait",
params: {
runId: resolvedRunId,
timeoutMs: stepWaitMs,
},
timeoutMs: stepWaitMs + 2000,
})) as { status?: string };
if (wait?.status !== "ok") return undefined;
return await readLatestAssistantReply({ sessionKey: params.sessionKey });
}