import { type Api, type AssistantMessage, type Context, complete, type Model, } from "@mariozechner/pi-ai"; import { discoverAuthStorage, discoverModels, } from "@mariozechner/pi-coding-agent"; import { Type } from "@sinclair/typebox"; import type { ClawdbotConfig } from "../../config/config.js"; import { resolveUserPath } from "../../utils.js"; import { loadWebMedia } from "../../web/media.js"; import { getApiKeyForModel } from "../model-auth.js"; import { runWithImageModelFallback } from "../model-fallback.js"; import { ensureClawdbotModelsJson } from "../models-config.js"; import { extractAssistantText } from "../pi-embedded-utils.js"; import type { AnyAgentTool } from "./common.js"; const DEFAULT_PROMPT = "Describe the image."; function ensureImageToolConfigured(cfg?: ClawdbotConfig): boolean { const imageModel = cfg?.agents?.defaults?.imageModel as | { primary?: string; fallbacks?: string[] } | string | undefined; const primary = typeof imageModel === "string" ? imageModel.trim() : imageModel?.primary; const fallbacks = typeof imageModel === "object" ? (imageModel?.fallbacks ?? []) : []; return Boolean(primary?.trim() || fallbacks.length > 0); } function pickMaxBytes( cfg?: ClawdbotConfig, maxBytesMb?: number, ): number | undefined { if ( typeof maxBytesMb === "number" && Number.isFinite(maxBytesMb) && maxBytesMb > 0 ) { return Math.floor(maxBytesMb * 1024 * 1024); } const configured = cfg?.agents?.defaults?.mediaMaxMb; if ( typeof configured === "number" && Number.isFinite(configured) && configured > 0 ) { return Math.floor(configured * 1024 * 1024); } return undefined; } function buildImageContext( prompt: string, base64: string, mimeType: string, ): Context { return { messages: [ { role: "user", content: [ { type: "text", text: prompt }, { type: "image", data: base64, mimeType }, ], timestamp: Date.now(), }, ], }; } async function runImagePrompt(params: { cfg?: ClawdbotConfig; agentDir: string; modelOverride?: string; prompt: string; base64: string; mimeType: string; }): Promise<{ text: string; provider: string; model: string }> { await ensureClawdbotModelsJson(params.cfg, params.agentDir); const authStorage = discoverAuthStorage(params.agentDir); const modelRegistry = discoverModels(authStorage, params.agentDir); const result = await runWithImageModelFallback({ cfg: params.cfg, modelOverride: params.modelOverride, run: async (provider, modelId) => { const model = modelRegistry.find(provider, modelId) as Model | null; if (!model) { throw new Error(`Unknown model: ${provider}/${modelId}`); } if (!model.input?.includes("image")) { throw new Error( `Model does not support images: ${provider}/${modelId}`, ); } const apiKeyInfo = await getApiKeyForModel({ model, cfg: params.cfg, agentDir: params.agentDir, }); authStorage.setRuntimeApiKey(model.provider, apiKeyInfo.apiKey); const context = buildImageContext( params.prompt, params.base64, params.mimeType, ); const message = (await complete(model, context, { apiKey: apiKeyInfo.apiKey, maxTokens: 512, temperature: 0, })) as AssistantMessage; return message; }, }); const text = extractAssistantText(result.result); return { text: text || "(no text returned)", provider: result.provider, model: result.model, }; } export function createImageTool(options?: { config?: ClawdbotConfig; agentDir?: string; }): AnyAgentTool | null { if (!ensureImageToolConfigured(options?.config)) return null; const agentDir = options?.agentDir; if (!agentDir?.trim()) { throw new Error("createImageTool requires agentDir when enabled"); } return { label: "Image", name: "image", description: "Analyze an image with the configured image model (agents.defaults.imageModel). Provide a prompt and image path or URL.", parameters: Type.Object({ prompt: Type.Optional(Type.String()), image: Type.String(), model: Type.Optional(Type.String()), maxBytesMb: Type.Optional(Type.Number()), }), execute: async (_toolCallId, args) => { const record = args && typeof args === "object" ? (args as Record) : {}; const imageRaw = typeof record.image === "string" ? record.image.trim() : ""; if (!imageRaw) throw new Error("image required"); const promptRaw = typeof record.prompt === "string" && record.prompt.trim() ? record.prompt.trim() : DEFAULT_PROMPT; const modelOverride = typeof record.model === "string" && record.model.trim() ? record.model.trim() : undefined; const maxBytesMb = typeof record.maxBytesMb === "number" ? record.maxBytesMb : undefined; const maxBytes = pickMaxBytes(options?.config, maxBytesMb); const resolvedImage = imageRaw.startsWith("~") ? resolveUserPath(imageRaw) : imageRaw; const media = await loadWebMedia(resolvedImage, maxBytes); if (media.kind !== "image") { throw new Error(`Unsupported media type: ${media.kind}`); } const mimeType = media.contentType ?? "image/png"; const base64 = media.buffer.toString("base64"); const result = await runImagePrompt({ cfg: options?.config, agentDir, modelOverride, prompt: promptRaw, base64, mimeType, }); return { content: [{ type: "text", text: result.text }], details: { model: `${result.provider}/${result.model}`, image: resolvedImage, }, }; }, }; }