From c33905a2bc59c72cec3739e5e55117a5d4987d7a Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 30 Jan 2026 06:24:02 +0000 Subject: [PATCH] feat: add image analysis support - Agent now supports multimodal messages (text + images) - Telegram handler downloads photos and sends to Claude/GPT-4V - Works with both Anthropic and OpenAI vision models - Updates branding to AssureBot in messages https://claude.ai/code/session_015VqJ7gN4vaxtYfYc92UjLs --- secure/agent.ts | 123 +++++++++++++++++++++++++++++++++++++++++---- secure/telegram.ts | 85 +++++++++++++++++++++++++++---- 2 files changed, 187 insertions(+), 21 deletions(-) diff --git a/secure/agent.ts b/secure/agent.ts index 8a98c029a..5381ccaa9 100644 --- a/secure/agent.ts +++ b/secure/agent.ts @@ -1,7 +1,7 @@ /** - * Moltbot Secure - Agent Core + * AssureBot - Agent Core * - * Minimal AI agent that handles conversations. + * Minimal AI agent that handles conversations with image support. * Direct API calls to Anthropic or OpenAI - no intermediaries. */ @@ -10,9 +10,22 @@ import OpenAI from "openai"; import type { SecureConfig } from "./config.js"; import type { AuditLogger } from "./audit.js"; +export type ImageContent = { + type: "image"; + data: string; // base64 + mediaType: "image/jpeg" | "image/png" | "image/gif" | "image/webp"; +}; + +export type TextContent = { + type: "text"; + text: string; +}; + +export type MessageContent = string | (TextContent | ImageContent)[]; + export type Message = { role: "user" | "assistant"; - content: string; + content: MessageContent; }; export type AgentResponse = { @@ -25,6 +38,7 @@ export type AgentResponse = { export type AgentCore = { chat: (messages: Message[], systemPrompt?: string) => Promise; + analyzeImage: (imageData: string, mediaType: ImageContent["mediaType"], prompt?: string) => Promise; provider: "anthropic" | "openai"; }; @@ -53,8 +67,28 @@ function createAnthropicAgent(config: SecureConfig, audit: AuditLogger): AgentCo const model = config.ai.model || DEFAULT_ANTHROPIC_MODEL; + function convertContent(content: MessageContent): Anthropic.MessageParam["content"] { + if (typeof content === "string") { + return content; + } + return content.map((part) => { + if (part.type === "text") { + return { type: "text" as const, text: part.text }; + } + return { + type: "image" as const, + source: { + type: "base64" as const, + media_type: part.mediaType, + data: part.data, + }, + }; + }); + } + return { provider: "anthropic", + async chat(messages: Message[], systemPrompt?: string): Promise { try { const response = await client.messages.create({ @@ -63,7 +97,7 @@ function createAnthropicAgent(config: SecureConfig, audit: AuditLogger): AgentCo system: systemPrompt || DEFAULT_SYSTEM_PROMPT, messages: messages.map((m) => ({ role: m.role, - content: m.content, + content: convertContent(m.content), })), }); @@ -86,6 +120,23 @@ function createAnthropicAgent(config: SecureConfig, audit: AuditLogger): AgentCo throw err; } }, + + async analyzeImage( + imageData: string, + mediaType: ImageContent["mediaType"], + prompt = "What's in this image? Describe it in detail." + ): Promise { + const messages: Message[] = [ + { + role: "user", + content: [ + { type: "image", data: imageData, mediaType }, + { type: "text", text: prompt }, + ], + }, + ]; + return this.chat(messages); + }, }; } @@ -96,20 +147,53 @@ function createOpenAIAgent(config: SecureConfig, audit: AuditLogger): AgentCore const model = config.ai.model || DEFAULT_OPENAI_MODEL; + type OpenAIContent = OpenAI.ChatCompletionContentPart[]; + + function convertContent(content: MessageContent): string | OpenAIContent { + if (typeof content === "string") { + return content; + } + return content.map((part) => { + if (part.type === "text") { + return { type: "text" as const, text: part.text }; + } + return { + type: "image_url" as const, + image_url: { + url: `data:${part.mediaType};base64,${part.data}`, + }, + }; + }); + } + return { provider: "openai", + async chat(messages: Message[], systemPrompt?: string): Promise { try { + const openaiMessages: OpenAI.ChatCompletionMessageParam[] = [ + { role: "system", content: systemPrompt || DEFAULT_SYSTEM_PROMPT }, + ]; + + for (const m of messages) { + if (m.role === "user") { + openaiMessages.push({ + role: "user", + content: convertContent(m.content), + }); + } else { + // Assistant messages are always text + openaiMessages.push({ + role: "assistant", + content: typeof m.content === "string" ? m.content : "", + }); + } + } + const response = await client.chat.completions.create({ model, max_tokens: 4096, - messages: [ - { role: "system", content: systemPrompt || DEFAULT_SYSTEM_PROMPT }, - ...messages.map((m) => ({ - role: m.role as "user" | "assistant", - content: m.content, - })), - ], + messages: openaiMessages, }); const text = response.choices[0]?.message?.content || ""; @@ -130,6 +214,23 @@ function createOpenAIAgent(config: SecureConfig, audit: AuditLogger): AgentCore throw err; } }, + + async analyzeImage( + imageData: string, + mediaType: ImageContent["mediaType"], + prompt = "What's in this image? Describe it in detail." + ): Promise { + const messages: Message[] = [ + { + role: "user", + content: [ + { type: "image", data: imageData, mediaType }, + { type: "text", text: prompt }, + ], + }, + ]; + return this.chat(messages); + }, }; } diff --git a/secure/telegram.ts b/secure/telegram.ts index aa92c9446..4108404ad 100644 --- a/secure/telegram.ts +++ b/secure/telegram.ts @@ -1,14 +1,14 @@ /** - * Moltbot Secure - Telegram Channel + * AssureBot - Telegram Channel * - * Minimal, secure Telegram bot handler. + * Minimal, secure Telegram bot handler with image analysis. * Allowlist-only: only approved users can interact. */ import { Bot, Context } from "grammy"; import type { SecureConfig } from "./config.js"; import type { AuditLogger } from "./audit.js"; -import type { AgentCore, ConversationStore } from "./agent.js"; +import type { AgentCore, ConversationStore, ImageContent } from "./agent.js"; export type TelegramBot = { bot: Bot; @@ -62,7 +62,7 @@ export function createTelegramBot(deps: TelegramDeps): TelegramBot { } await ctx.reply( - `Welcome to Moltbot Secure. + `Welcome to AssureBot. You are authorized to use this bot. @@ -72,7 +72,10 @@ Commands: /status - Check bot status /help - Show help -Just send me a message to chat!` +Features: +- Send text messages to chat +- Send images for analysis +- Forward content for analysis` ); }); @@ -113,12 +116,13 @@ Just send me a message to chat!` } await ctx.reply( - `Moltbot Secure Help + `AssureBot Help -This is a secure, self-hosted AI assistant. +A secure, self-hosted AI assistant. Features: - Chat with AI (text messages) +- Image analysis (send photos) - Forward content for analysis - Receive webhook notifications @@ -255,13 +259,74 @@ Security: // Handle photos bot.on("message:photo", async (ctx) => { const userId = ctx.from?.id; + const username = formatUsername(ctx); + if (!userId || !isUserAllowed(userId, config.telegram.allowedUsers)) { + audit.messageBlocked({ + userId: userId || 0, + username, + reason: "User not in allowlist", + }); return; } - await ctx.reply( - "I received your image. Image analysis is available with Claude - please describe what you'd like me to analyze." - ); + const startTime = Date.now(); + const caption = ctx.message.caption || "What's in this image? Describe it in detail."; + + try { + await ctx.replyWithChatAction("typing"); + + // Get the largest photo (last in array) + const photos = ctx.message.photo; + const photo = photos[photos.length - 1]; + + // Get file info + const file = await ctx.api.getFile(photo.file_id); + if (!file.file_path) { + await ctx.reply("Sorry, I couldn't download the image."); + return; + } + + // Download the file + const fileUrl = `https://api.telegram.org/file/bot${config.telegram.botToken}/${file.file_path}`; + const response = await fetch(fileUrl); + if (!response.ok) { + await ctx.reply("Sorry, I couldn't download the image."); + return; + } + + const buffer = await response.arrayBuffer(); + const base64 = Buffer.from(buffer).toString("base64"); + + // Determine media type from file path + const ext = file.file_path.split(".").pop()?.toLowerCase(); + let mediaType: ImageContent["mediaType"] = "image/jpeg"; + if (ext === "png") mediaType = "image/png"; + else if (ext === "gif") mediaType = "image/gif"; + else if (ext === "webp") mediaType = "image/webp"; + + // Analyze with AI + const result = await agent.analyzeImage(base64, mediaType, caption); + + await ctx.reply(result.text, { parse_mode: "Markdown" }).catch(async () => { + await ctx.reply(result.text); + }); + + audit.message({ + userId, + username, + text: `[IMAGE] ${caption}`, + response: result.text, + durationMs: Date.now() - startTime, + }); + } catch (err) { + const errorMsg = err instanceof Error ? err.message : String(err); + audit.error({ + error: `Failed to analyze image: ${errorMsg}`, + metadata: { userId, username }, + }); + await ctx.reply("Sorry, I couldn't analyze that image. Please try again."); + } }); // Handle documents