feat: add image analysis support
- Agent now supports multimodal messages (text + images) - Telegram handler downloads photos and sends to Claude/GPT-4V - Works with both Anthropic and OpenAI vision models - Updates branding to AssureBot in messages https://claude.ai/code/session_015VqJ7gN4vaxtYfYc92UjLs
This commit is contained in:
parent
b53fda5942
commit
d8daefb66a
123
secure/agent.ts
123
secure/agent.ts
@ -1,7 +1,7 @@
|
|||||||
/**
|
/**
|
||||||
* Moltbot Secure - Agent Core
|
* AssureBot - Agent Core
|
||||||
*
|
*
|
||||||
* Minimal AI agent that handles conversations.
|
* Minimal AI agent that handles conversations with image support.
|
||||||
* Direct API calls to Anthropic or OpenAI - no intermediaries.
|
* Direct API calls to Anthropic or OpenAI - no intermediaries.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -10,9 +10,22 @@ import OpenAI from "openai";
|
|||||||
import type { SecureConfig } from "./config.js";
|
import type { SecureConfig } from "./config.js";
|
||||||
import type { AuditLogger } from "./audit.js";
|
import type { AuditLogger } from "./audit.js";
|
||||||
|
|
||||||
|
export type ImageContent = {
|
||||||
|
type: "image";
|
||||||
|
data: string; // base64
|
||||||
|
mediaType: "image/jpeg" | "image/png" | "image/gif" | "image/webp";
|
||||||
|
};
|
||||||
|
|
||||||
|
export type TextContent = {
|
||||||
|
type: "text";
|
||||||
|
text: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
export type MessageContent = string | (TextContent | ImageContent)[];
|
||||||
|
|
||||||
export type Message = {
|
export type Message = {
|
||||||
role: "user" | "assistant";
|
role: "user" | "assistant";
|
||||||
content: string;
|
content: MessageContent;
|
||||||
};
|
};
|
||||||
|
|
||||||
export type AgentResponse = {
|
export type AgentResponse = {
|
||||||
@ -25,6 +38,7 @@ export type AgentResponse = {
|
|||||||
|
|
||||||
export type AgentCore = {
|
export type AgentCore = {
|
||||||
chat: (messages: Message[], systemPrompt?: string) => Promise<AgentResponse>;
|
chat: (messages: Message[], systemPrompt?: string) => Promise<AgentResponse>;
|
||||||
|
analyzeImage: (imageData: string, mediaType: ImageContent["mediaType"], prompt?: string) => Promise<AgentResponse>;
|
||||||
provider: "anthropic" | "openai";
|
provider: "anthropic" | "openai";
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -53,8 +67,28 @@ function createAnthropicAgent(config: SecureConfig, audit: AuditLogger): AgentCo
|
|||||||
|
|
||||||
const model = config.ai.model || DEFAULT_ANTHROPIC_MODEL;
|
const model = config.ai.model || DEFAULT_ANTHROPIC_MODEL;
|
||||||
|
|
||||||
|
function convertContent(content: MessageContent): Anthropic.MessageParam["content"] {
|
||||||
|
if (typeof content === "string") {
|
||||||
|
return content;
|
||||||
|
}
|
||||||
|
return content.map((part) => {
|
||||||
|
if (part.type === "text") {
|
||||||
|
return { type: "text" as const, text: part.text };
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
type: "image" as const,
|
||||||
|
source: {
|
||||||
|
type: "base64" as const,
|
||||||
|
media_type: part.mediaType,
|
||||||
|
data: part.data,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
provider: "anthropic",
|
provider: "anthropic",
|
||||||
|
|
||||||
async chat(messages: Message[], systemPrompt?: string): Promise<AgentResponse> {
|
async chat(messages: Message[], systemPrompt?: string): Promise<AgentResponse> {
|
||||||
try {
|
try {
|
||||||
const response = await client.messages.create({
|
const response = await client.messages.create({
|
||||||
@ -63,7 +97,7 @@ function createAnthropicAgent(config: SecureConfig, audit: AuditLogger): AgentCo
|
|||||||
system: systemPrompt || DEFAULT_SYSTEM_PROMPT,
|
system: systemPrompt || DEFAULT_SYSTEM_PROMPT,
|
||||||
messages: messages.map((m) => ({
|
messages: messages.map((m) => ({
|
||||||
role: m.role,
|
role: m.role,
|
||||||
content: m.content,
|
content: convertContent(m.content),
|
||||||
})),
|
})),
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -86,6 +120,23 @@ function createAnthropicAgent(config: SecureConfig, audit: AuditLogger): AgentCo
|
|||||||
throw err;
|
throw err;
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
|
async analyzeImage(
|
||||||
|
imageData: string,
|
||||||
|
mediaType: ImageContent["mediaType"],
|
||||||
|
prompt = "What's in this image? Describe it in detail."
|
||||||
|
): Promise<AgentResponse> {
|
||||||
|
const messages: Message[] = [
|
||||||
|
{
|
||||||
|
role: "user",
|
||||||
|
content: [
|
||||||
|
{ type: "image", data: imageData, mediaType },
|
||||||
|
{ type: "text", text: prompt },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
];
|
||||||
|
return this.chat(messages);
|
||||||
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -96,20 +147,53 @@ function createOpenAIAgent(config: SecureConfig, audit: AuditLogger): AgentCore
|
|||||||
|
|
||||||
const model = config.ai.model || DEFAULT_OPENAI_MODEL;
|
const model = config.ai.model || DEFAULT_OPENAI_MODEL;
|
||||||
|
|
||||||
|
type OpenAIContent = OpenAI.ChatCompletionContentPart[];
|
||||||
|
|
||||||
|
function convertContent(content: MessageContent): string | OpenAIContent {
|
||||||
|
if (typeof content === "string") {
|
||||||
|
return content;
|
||||||
|
}
|
||||||
|
return content.map((part) => {
|
||||||
|
if (part.type === "text") {
|
||||||
|
return { type: "text" as const, text: part.text };
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
type: "image_url" as const,
|
||||||
|
image_url: {
|
||||||
|
url: `data:${part.mediaType};base64,${part.data}`,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
provider: "openai",
|
provider: "openai",
|
||||||
|
|
||||||
async chat(messages: Message[], systemPrompt?: string): Promise<AgentResponse> {
|
async chat(messages: Message[], systemPrompt?: string): Promise<AgentResponse> {
|
||||||
try {
|
try {
|
||||||
|
const openaiMessages: OpenAI.ChatCompletionMessageParam[] = [
|
||||||
|
{ role: "system", content: systemPrompt || DEFAULT_SYSTEM_PROMPT },
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const m of messages) {
|
||||||
|
if (m.role === "user") {
|
||||||
|
openaiMessages.push({
|
||||||
|
role: "user",
|
||||||
|
content: convertContent(m.content),
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
// Assistant messages are always text
|
||||||
|
openaiMessages.push({
|
||||||
|
role: "assistant",
|
||||||
|
content: typeof m.content === "string" ? m.content : "",
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const response = await client.chat.completions.create({
|
const response = await client.chat.completions.create({
|
||||||
model,
|
model,
|
||||||
max_tokens: 4096,
|
max_tokens: 4096,
|
||||||
messages: [
|
messages: openaiMessages,
|
||||||
{ role: "system", content: systemPrompt || DEFAULT_SYSTEM_PROMPT },
|
|
||||||
...messages.map((m) => ({
|
|
||||||
role: m.role as "user" | "assistant",
|
|
||||||
content: m.content,
|
|
||||||
})),
|
|
||||||
],
|
|
||||||
});
|
});
|
||||||
|
|
||||||
const text = response.choices[0]?.message?.content || "";
|
const text = response.choices[0]?.message?.content || "";
|
||||||
@ -130,6 +214,23 @@ function createOpenAIAgent(config: SecureConfig, audit: AuditLogger): AgentCore
|
|||||||
throw err;
|
throw err;
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
|
async analyzeImage(
|
||||||
|
imageData: string,
|
||||||
|
mediaType: ImageContent["mediaType"],
|
||||||
|
prompt = "What's in this image? Describe it in detail."
|
||||||
|
): Promise<AgentResponse> {
|
||||||
|
const messages: Message[] = [
|
||||||
|
{
|
||||||
|
role: "user",
|
||||||
|
content: [
|
||||||
|
{ type: "image", data: imageData, mediaType },
|
||||||
|
{ type: "text", text: prompt },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
];
|
||||||
|
return this.chat(messages);
|
||||||
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -1,14 +1,14 @@
|
|||||||
/**
|
/**
|
||||||
* Moltbot Secure - Telegram Channel
|
* AssureBot - Telegram Channel
|
||||||
*
|
*
|
||||||
* Minimal, secure Telegram bot handler.
|
* Minimal, secure Telegram bot handler with image analysis.
|
||||||
* Allowlist-only: only approved users can interact.
|
* Allowlist-only: only approved users can interact.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { Bot, Context } from "grammy";
|
import { Bot, Context } from "grammy";
|
||||||
import type { SecureConfig } from "./config.js";
|
import type { SecureConfig } from "./config.js";
|
||||||
import type { AuditLogger } from "./audit.js";
|
import type { AuditLogger } from "./audit.js";
|
||||||
import type { AgentCore, ConversationStore } from "./agent.js";
|
import type { AgentCore, ConversationStore, ImageContent } from "./agent.js";
|
||||||
|
|
||||||
export type TelegramBot = {
|
export type TelegramBot = {
|
||||||
bot: Bot;
|
bot: Bot;
|
||||||
@ -62,7 +62,7 @@ export function createTelegramBot(deps: TelegramDeps): TelegramBot {
|
|||||||
}
|
}
|
||||||
|
|
||||||
await ctx.reply(
|
await ctx.reply(
|
||||||
`Welcome to Moltbot Secure.
|
`Welcome to AssureBot.
|
||||||
|
|
||||||
You are authorized to use this bot.
|
You are authorized to use this bot.
|
||||||
|
|
||||||
@ -72,7 +72,10 @@ Commands:
|
|||||||
/status - Check bot status
|
/status - Check bot status
|
||||||
/help - Show help
|
/help - Show help
|
||||||
|
|
||||||
Just send me a message to chat!`
|
Features:
|
||||||
|
- Send text messages to chat
|
||||||
|
- Send images for analysis
|
||||||
|
- Forward content for analysis`
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -113,12 +116,13 @@ Just send me a message to chat!`
|
|||||||
}
|
}
|
||||||
|
|
||||||
await ctx.reply(
|
await ctx.reply(
|
||||||
`Moltbot Secure Help
|
`AssureBot Help
|
||||||
|
|
||||||
This is a secure, self-hosted AI assistant.
|
A secure, self-hosted AI assistant.
|
||||||
|
|
||||||
Features:
|
Features:
|
||||||
- Chat with AI (text messages)
|
- Chat with AI (text messages)
|
||||||
|
- Image analysis (send photos)
|
||||||
- Forward content for analysis
|
- Forward content for analysis
|
||||||
- Receive webhook notifications
|
- Receive webhook notifications
|
||||||
|
|
||||||
@ -255,13 +259,74 @@ Security:
|
|||||||
// Handle photos
|
// Handle photos
|
||||||
bot.on("message:photo", async (ctx) => {
|
bot.on("message:photo", async (ctx) => {
|
||||||
const userId = ctx.from?.id;
|
const userId = ctx.from?.id;
|
||||||
|
const username = formatUsername(ctx);
|
||||||
|
|
||||||
if (!userId || !isUserAllowed(userId, config.telegram.allowedUsers)) {
|
if (!userId || !isUserAllowed(userId, config.telegram.allowedUsers)) {
|
||||||
|
audit.messageBlocked({
|
||||||
|
userId: userId || 0,
|
||||||
|
username,
|
||||||
|
reason: "User not in allowlist",
|
||||||
|
});
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
await ctx.reply(
|
const startTime = Date.now();
|
||||||
"I received your image. Image analysis is available with Claude - please describe what you'd like me to analyze."
|
const caption = ctx.message.caption || "What's in this image? Describe it in detail.";
|
||||||
);
|
|
||||||
|
try {
|
||||||
|
await ctx.replyWithChatAction("typing");
|
||||||
|
|
||||||
|
// Get the largest photo (last in array)
|
||||||
|
const photos = ctx.message.photo;
|
||||||
|
const photo = photos[photos.length - 1];
|
||||||
|
|
||||||
|
// Get file info
|
||||||
|
const file = await ctx.api.getFile(photo.file_id);
|
||||||
|
if (!file.file_path) {
|
||||||
|
await ctx.reply("Sorry, I couldn't download the image.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Download the file
|
||||||
|
const fileUrl = `https://api.telegram.org/file/bot${config.telegram.botToken}/${file.file_path}`;
|
||||||
|
const response = await fetch(fileUrl);
|
||||||
|
if (!response.ok) {
|
||||||
|
await ctx.reply("Sorry, I couldn't download the image.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const buffer = await response.arrayBuffer();
|
||||||
|
const base64 = Buffer.from(buffer).toString("base64");
|
||||||
|
|
||||||
|
// Determine media type from file path
|
||||||
|
const ext = file.file_path.split(".").pop()?.toLowerCase();
|
||||||
|
let mediaType: ImageContent["mediaType"] = "image/jpeg";
|
||||||
|
if (ext === "png") mediaType = "image/png";
|
||||||
|
else if (ext === "gif") mediaType = "image/gif";
|
||||||
|
else if (ext === "webp") mediaType = "image/webp";
|
||||||
|
|
||||||
|
// Analyze with AI
|
||||||
|
const result = await agent.analyzeImage(base64, mediaType, caption);
|
||||||
|
|
||||||
|
await ctx.reply(result.text, { parse_mode: "Markdown" }).catch(async () => {
|
||||||
|
await ctx.reply(result.text);
|
||||||
|
});
|
||||||
|
|
||||||
|
audit.message({
|
||||||
|
userId,
|
||||||
|
username,
|
||||||
|
text: `[IMAGE] ${caption}`,
|
||||||
|
response: result.text,
|
||||||
|
durationMs: Date.now() - startTime,
|
||||||
|
});
|
||||||
|
} catch (err) {
|
||||||
|
const errorMsg = err instanceof Error ? err.message : String(err);
|
||||||
|
audit.error({
|
||||||
|
error: `Failed to analyze image: ${errorMsg}`,
|
||||||
|
metadata: { userId, username },
|
||||||
|
});
|
||||||
|
await ctx.reply("Sorry, I couldn't analyze that image. Please try again.");
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
// Handle documents
|
// Handle documents
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user