Add support for extracting and saving generated images from OpenRouter
models that return images (like gpt-5-image-mini).
- Extract base64 images from assistant responses via extractAssistantImages()
- Save generated images using standard media store ("generated" subdir)
- Send generated images to users via message channels
- Support both direct agent responses and subagent image generation
This enables use of OpenRouter's image generation models in Moltbot
conversations and subagent tasks.
90 lines
3.0 KiB
TypeScript
90 lines
3.0 KiB
TypeScript
import crypto from "node:crypto";
|
|
|
|
import { callGateway } from "../../gateway/call.js";
|
|
import { INTERNAL_MESSAGE_CHANNEL } from "../../utils/message-channel.js";
|
|
import { AGENT_LANE_NESTED } from "../lanes.js";
|
|
import {
|
|
extractAssistantText,
|
|
extractAssistantImages,
|
|
stripToolMessages,
|
|
} from "./sessions-helpers.js";
|
|
|
|
export interface AssistantReplyContent {
|
|
text?: string;
|
|
images?: Array<{ mimeType: string; data: string }>;
|
|
}
|
|
|
|
export async function readLatestAssistantReply(params: {
|
|
sessionKey: string;
|
|
limit?: number;
|
|
}): Promise<string | undefined> {
|
|
const history = (await callGateway({
|
|
method: "chat.history",
|
|
params: { sessionKey: params.sessionKey, limit: params.limit ?? 50 },
|
|
})) as { messages?: unknown[] };
|
|
const filtered = stripToolMessages(Array.isArray(history?.messages) ? history.messages : []);
|
|
const last = filtered.length > 0 ? filtered[filtered.length - 1] : undefined;
|
|
return last ? extractAssistantText(last) : undefined;
|
|
}
|
|
|
|
/**
|
|
* Read the latest assistant reply including both text and images.
|
|
* Used for subagent announce flow where images need to be forwarded.
|
|
*/
|
|
export async function readLatestAssistantReplyWithMedia(params: {
|
|
sessionKey: string;
|
|
limit?: number;
|
|
}): Promise<AssistantReplyContent> {
|
|
const history = (await callGateway({
|
|
method: "chat.history",
|
|
params: { sessionKey: params.sessionKey, limit: params.limit ?? 50 },
|
|
})) as { messages?: unknown[] };
|
|
const filtered = stripToolMessages(Array.isArray(history?.messages) ? history.messages : []);
|
|
const last = filtered.length > 0 ? filtered[filtered.length - 1] : undefined;
|
|
if (!last) return {};
|
|
const text = extractAssistantText(last);
|
|
const images = extractAssistantImages(last);
|
|
return {
|
|
text: text || undefined,
|
|
images: images.length > 0 ? images : undefined,
|
|
};
|
|
}
|
|
|
|
export async function runAgentStep(params: {
|
|
sessionKey: string;
|
|
message: string;
|
|
extraSystemPrompt: string;
|
|
timeoutMs: number;
|
|
channel?: string;
|
|
lane?: string;
|
|
}): Promise<string | undefined> {
|
|
const stepIdem = crypto.randomUUID();
|
|
const response = (await callGateway({
|
|
method: "agent",
|
|
params: {
|
|
message: params.message,
|
|
sessionKey: params.sessionKey,
|
|
idempotencyKey: stepIdem,
|
|
deliver: false,
|
|
channel: params.channel ?? INTERNAL_MESSAGE_CHANNEL,
|
|
lane: params.lane ?? AGENT_LANE_NESTED,
|
|
extraSystemPrompt: params.extraSystemPrompt,
|
|
},
|
|
timeoutMs: 10_000,
|
|
})) as { runId?: string; acceptedAt?: number };
|
|
|
|
const stepRunId = typeof response?.runId === "string" && response.runId ? response.runId : "";
|
|
const resolvedRunId = stepRunId || stepIdem;
|
|
const stepWaitMs = Math.min(params.timeoutMs, 60_000);
|
|
const wait = (await callGateway({
|
|
method: "agent.wait",
|
|
params: {
|
|
runId: resolvedRunId,
|
|
timeoutMs: stepWaitMs,
|
|
},
|
|
timeoutMs: stepWaitMs + 2000,
|
|
})) as { status?: string };
|
|
if (wait?.status !== "ok") return undefined;
|
|
return await readLatestAssistantReply({ sessionKey: params.sessionKey });
|
|
}
|