import type { ClawdbotConfig } from "../config/config.js"; import type { MsgContext } from "../auto-reply/templating.js"; import type { MediaUnderstandingConfig, MediaUnderstandingModelConfig, MediaUnderstandingScopeConfig, } from "../config/types.tools.js"; import { logVerbose, shouldLogVerbose } from "../globals.js"; import { DEFAULT_MAX_BYTES, DEFAULT_MAX_CHARS_BY_CAPABILITY, DEFAULT_MEDIA_CONCURRENCY, DEFAULT_PROMPT, } from "./defaults.js"; import { normalizeMediaProviderId } from "./providers/index.js"; import { normalizeMediaUnderstandingChatType, resolveMediaUnderstandingScope } from "./scope.js"; import type { MediaUnderstandingCapability } from "./types.js"; export function resolveTimeoutMs(seconds: number | undefined, fallbackSeconds: number): number { const value = typeof seconds === "number" && Number.isFinite(seconds) ? seconds : fallbackSeconds; return Math.max(1000, Math.floor(value * 1000)); } export function resolvePrompt( capability: MediaUnderstandingCapability, prompt?: string, maxChars?: number, ): string { const base = prompt?.trim() || DEFAULT_PROMPT[capability]; if (!maxChars || capability === "audio") return base; return `${base} Respond in at most ${maxChars} characters.`; } export function resolveMaxChars(params: { capability: MediaUnderstandingCapability; entry: MediaUnderstandingModelConfig; cfg: ClawdbotConfig; config?: MediaUnderstandingConfig; }): number | undefined { const { capability, entry, cfg } = params; const configured = entry.maxChars ?? params.config?.maxChars ?? cfg.tools?.media?.[capability]?.maxChars; if (typeof configured === "number") return configured; return DEFAULT_MAX_CHARS_BY_CAPABILITY[capability]; } export function resolveMaxBytes(params: { capability: MediaUnderstandingCapability; entry: MediaUnderstandingModelConfig; cfg: ClawdbotConfig; config?: MediaUnderstandingConfig; }): number { const configured = params.entry.maxBytes ?? params.config?.maxBytes ?? params.cfg.tools?.media?.[params.capability]?.maxBytes; if (typeof configured === "number") return configured; return DEFAULT_MAX_BYTES[params.capability]; } export function resolveCapabilityConfig( cfg: ClawdbotConfig, capability: MediaUnderstandingCapability, ): MediaUnderstandingConfig | undefined { return cfg.tools?.media?.[capability]; } export function resolveScopeDecision(params: { scope?: MediaUnderstandingScopeConfig; ctx: MsgContext; }): "allow" | "deny" { return resolveMediaUnderstandingScope({ scope: params.scope, sessionKey: params.ctx.SessionKey, channel: params.ctx.Surface ?? params.ctx.Provider, chatType: normalizeMediaUnderstandingChatType(params.ctx.ChatType), }); } function inferCapabilities( entry: MediaUnderstandingModelConfig, ): MediaUnderstandingCapability[] | undefined { if ((entry.type ?? (entry.command ? "cli" : "provider")) === "cli") { return ["image", "audio", "video"]; } const provider = normalizeMediaProviderId(entry.provider ?? ""); if (!provider) return undefined; if (provider === "openai" || provider === "anthropic" || provider === "minimax") { return ["image"]; } if (provider === "google") { return ["image", "audio", "video"]; } if (provider === "groq") { return ["audio"]; } return undefined; } export function resolveModelEntries(params: { cfg: ClawdbotConfig; capability: MediaUnderstandingCapability; config?: MediaUnderstandingConfig; }): MediaUnderstandingModelConfig[] { const { cfg, capability, config } = params; const sharedModels = cfg.tools?.media?.models ?? []; const entries = [ ...(config?.models ?? []).map((entry) => ({ entry, source: "capability" as const })), ...sharedModels.map((entry) => ({ entry, source: "shared" as const })), ]; if (entries.length === 0) return []; return entries .filter(({ entry, source }) => { const caps = entry.capabilities && entry.capabilities.length > 0 ? entry.capabilities : source === "shared" ? inferCapabilities(entry) : undefined; if (!caps || caps.length === 0) { if (source === "shared") { if (shouldLogVerbose()) { logVerbose( `Skipping shared media model without capabilities: ${entry.provider ?? entry.command ?? "unknown"}`, ); } return false; } return true; } return caps.includes(capability); }) .map(({ entry }) => entry); } export function resolveConcurrency(cfg: ClawdbotConfig): number { const configured = cfg.tools?.media?.concurrency; if (typeof configured === "number" && Number.isFinite(configured) && configured > 0) { return Math.floor(configured); } return DEFAULT_MEDIA_CONCURRENCY; } export function resolveCapabilityEnabled(params: { cfg: ClawdbotConfig; config?: MediaUnderstandingConfig; }): boolean { if (params.config?.enabled === false) return false; const sharedModels = params.cfg.tools?.media?.models ?? []; const hasModels = (params.config?.models?.length ?? 0) > 0 || sharedModels.length > 0; if (!hasModels) return false; return true; }