feat: add AI tool calling for code execution + fix smart quotes

- AI can now execute code directly when users ask (uses execute_code tool)
- No need for users to use slash commands - AI runs code automatically
- Fixed smart quote issue: curly quotes from mobile keyboards are normalized
- Updated system prompts to instruct AI to use tools proactively

https://claude.ai/code/session_015VqJ7gN4vaxtYfYc92UjLs
This commit is contained in:
Claude 2026-01-30 08:33:27 +00:00
parent 4be63d67df
commit de7c462c95
No known key found for this signature in database
4 changed files with 365 additions and 44 deletions

View File

@ -3,12 +3,14 @@
*
* Minimal AI agent that handles conversations with image support.
* Direct API calls to Anthropic or OpenAI - no intermediaries.
* Supports tool calling for code execution.
*/
import Anthropic from "@anthropic-ai/sdk";
import OpenAI from "openai";
import type { SecureConfig } from "./config.js";
import type { AuditLogger } from "./audit.js";
import type { SandboxRunner, SandboxResult } from "./sandbox.js";
export type ImageContent = {
type: "image";
@ -40,6 +42,52 @@ export type AgentCore = {
chat: (messages: Message[], systemPrompt?: string) => Promise<AgentResponse>;
analyzeImage: (imageData: string, mediaType: ImageContent["mediaType"], prompt?: string) => Promise<AgentResponse>;
provider: "anthropic" | "openai" | "openrouter";
setSandbox: (sandbox: SandboxRunner) => void;
};
// Tool definitions for code execution
const CODE_EXECUTION_TOOL: Anthropic.Tool = {
name: "execute_code",
description: "Execute code in a sandboxed environment. Use this when the user asks you to run, test, or execute code. Supports: python, javascript, typescript, bash, rust, go, c, cpp, java, ruby, php.",
input_schema: {
type: "object",
properties: {
language: {
type: "string",
description: "Programming language: python, javascript, typescript, bash, rust, go, c, cpp, java, ruby, php",
enum: ["python", "javascript", "typescript", "bash", "rust", "go", "c", "cpp", "java", "ruby", "php"],
},
code: {
type: "string",
description: "The code to execute",
},
},
required: ["language", "code"],
},
};
// OpenAI-compatible tool format
const CODE_EXECUTION_TOOL_OPENAI: OpenAI.ChatCompletionTool = {
type: "function",
function: {
name: "execute_code",
description: "Execute code in a sandboxed environment. Use this when the user asks you to run, test, or execute code. Supports: python, javascript, typescript, bash, rust, go, c, cpp, java, ruby, php.",
parameters: {
type: "object",
properties: {
language: {
type: "string",
description: "Programming language: python, javascript, typescript, bash, rust, go, c, cpp, java, ruby, php",
enum: ["python", "javascript", "typescript", "bash", "rust", "go", "c", "cpp", "java", "ruby", "php"],
},
code: {
type: "string",
description: "The code to execute",
},
},
required: ["language", "code"],
},
},
};
const DEFAULT_ANTHROPIC_MODEL = "claude-sonnet-4-20250514";
@ -52,20 +100,30 @@ You are direct, concise, and helpful. You can:
- Answer questions and have conversations
- Analyze images and documents shared with you
- Help with coding and technical tasks
- Execute code in a secure sandbox (use the execute_code tool)
- Summarize content and extract information
## Available Commands (tell users about these when relevant)
## Code Execution
When users ask you to run, test, or execute code, USE THE execute_code TOOL directly. Don't ask them to use commands - just run the code for them.
- If a user says "run this python code", use the execute_code tool with language="python"
- If a user shares code and asks you to test it, execute it directly
- If a user asks you to demonstrate code, run it and show the output
Examples of when to use execute_code:
- "Can you run this for me?" Use execute_code
- "Test this python code" Use execute_code
- "Execute this script" Use execute_code
- "What does this code output?" Use execute_code and show result
## Available Manual Commands (for users who prefer slash commands)
- /js <code> - Run JavaScript
- /python <code> - Run Python
- /ts <code> - Run TypeScript
- /bash <code> - Run shell commands
- /run <lang> <code> - Run code in any language (python, js, ts, bash, rust, go, c, cpp, java, ruby, php)
- /run <lang> <code> - Run any language
- /status - Check bot status
- /clear - Clear conversation history
When users ask to run or test code, guide them to use the appropriate command.
Example: "Use /js console.log('hello')" or "Try /python print('hello')"
Be security-conscious:
- Never reveal API keys, tokens, or secrets
- Don't execute commands that could harm the system
@ -77,6 +135,7 @@ function createAnthropicAgent(config: SecureConfig, audit: AuditLogger): AgentCo
});
const model = config.ai.model || DEFAULT_ANTHROPIC_MODEL;
let sandbox: SandboxRunner | null = null;
function convertContent(content: MessageContent): Anthropic.MessageParam["content"] {
if (typeof content === "string") {
@ -97,21 +156,109 @@ function createAnthropicAgent(config: SecureConfig, audit: AuditLogger): AgentCo
});
}
async function executeCodeTool(language: string, code: string): Promise<string> {
if (!sandbox) {
return "Error: Sandbox is not configured. Code execution is unavailable.";
}
const isAvailable = await sandbox.isAvailable();
if (!isAvailable) {
return `Error: Sandbox unavailable. Backend: ${sandbox.backend}`;
}
try {
const result = await sandbox.runCode(language, code);
const output = result.stdout || result.stderr || "(no output)";
const status = result.exitCode === 0 ? "Success" : `Failed (exit ${result.exitCode})`;
const timeout = result.timedOut ? " [TIMED OUT]" : "";
return `${status}${timeout}\nDuration: ${result.durationMs}ms\n\nOutput:\n${output.slice(0, 5000)}`;
} catch (err) {
return `Error executing code: ${err instanceof Error ? err.message : String(err)}`;
}
}
return {
provider: "anthropic",
setSandbox(sb: SandboxRunner): void {
sandbox = sb;
},
async chat(messages: Message[], systemPrompt?: string): Promise<AgentResponse> {
try {
const response = await client.messages.create({
// Build initial messages
const anthropicMessages: Anthropic.MessageParam[] = messages.map((m) => ({
role: m.role,
content: convertContent(m.content),
}));
// Include tools only if sandbox is available
const tools = sandbox ? [CODE_EXECUTION_TOOL] : undefined;
let response = await client.messages.create({
model,
max_tokens: 4096,
system: systemPrompt || DEFAULT_SYSTEM_PROMPT,
messages: messages.map((m) => ({
role: m.role,
content: convertContent(m.content),
})),
messages: anthropicMessages,
tools,
});
let totalInputTokens = response.usage.input_tokens;
let totalOutputTokens = response.usage.output_tokens;
// Handle tool calls in a loop (max 5 iterations to prevent infinite loops)
let iterations = 0;
while (response.stop_reason === "tool_use" && iterations < 5) {
iterations++;
// Find tool use blocks
const toolUseBlocks = response.content.filter(
(block): block is Anthropic.ToolUseBlock => block.type === "tool_use"
);
// Process each tool call
const toolResults: Anthropic.ToolResultBlockParam[] = [];
for (const toolUse of toolUseBlocks) {
if (toolUse.name === "execute_code") {
const input = toolUse.input as { language: string; code: string };
audit.sandbox({
command: `[AI:${input.language}] ${input.code.slice(0, 100)}...`,
exitCode: 0,
durationMs: 0,
});
const result = await executeCodeTool(input.language, input.code);
toolResults.push({
type: "tool_result",
tool_use_id: toolUse.id,
content: result,
});
}
}
// Continue conversation with tool results
anthropicMessages.push({
role: "assistant",
content: response.content,
});
anthropicMessages.push({
role: "user",
content: toolResults,
});
response = await client.messages.create({
model,
max_tokens: 4096,
system: systemPrompt || DEFAULT_SYSTEM_PROMPT,
messages: anthropicMessages,
tools,
});
totalInputTokens += response.usage.input_tokens;
totalOutputTokens += response.usage.output_tokens;
}
// Extract final text response
const text = response.content
.filter((block): block is Anthropic.TextBlock => block.type === "text")
.map((block) => block.text)
@ -120,8 +267,8 @@ function createAnthropicAgent(config: SecureConfig, audit: AuditLogger): AgentCo
return {
text,
usage: {
inputTokens: response.usage.input_tokens,
outputTokens: response.usage.output_tokens,
inputTokens: totalInputTokens,
outputTokens: totalOutputTokens,
},
};
} catch (err) {
@ -157,6 +304,7 @@ function createOpenAIAgent(config: SecureConfig, audit: AuditLogger): AgentCore
});
const model = config.ai.model || DEFAULT_OPENAI_MODEL;
let sandbox: SandboxRunner | null = null;
type OpenAIContent = OpenAI.ChatCompletionContentPart[];
@ -177,9 +325,35 @@ function createOpenAIAgent(config: SecureConfig, audit: AuditLogger): AgentCore
});
}
async function executeCodeTool(language: string, code: string): Promise<string> {
if (!sandbox) {
return "Error: Sandbox is not configured. Code execution is unavailable.";
}
const isAvailable = await sandbox.isAvailable();
if (!isAvailable) {
return `Error: Sandbox unavailable. Backend: ${sandbox.backend}`;
}
try {
const result = await sandbox.runCode(language, code);
const output = result.stdout || result.stderr || "(no output)";
const status = result.exitCode === 0 ? "Success" : `Failed (exit ${result.exitCode})`;
const timeout = result.timedOut ? " [TIMED OUT]" : "";
return `${status}${timeout}\nDuration: ${result.durationMs}ms\n\nOutput:\n${output.slice(0, 5000)}`;
} catch (err) {
return `Error executing code: ${err instanceof Error ? err.message : String(err)}`;
}
}
return {
provider: "openai",
setSandbox(sb: SandboxRunner): void {
sandbox = sb;
},
async chat(messages: Message[], systemPrompt?: string): Promise<AgentResponse> {
try {
const openaiMessages: OpenAI.ChatCompletionMessageParam[] = [
@ -193,7 +367,6 @@ function createOpenAIAgent(config: SecureConfig, audit: AuditLogger): AgentCore
content: convertContent(m.content),
});
} else {
// Assistant messages are always text
openaiMessages.push({
role: "assistant",
content: typeof m.content === "string" ? m.content : "",
@ -201,22 +374,70 @@ function createOpenAIAgent(config: SecureConfig, audit: AuditLogger): AgentCore
}
}
const response = await client.chat.completions.create({
// Include tools only if sandbox is available
const tools = sandbox ? [CODE_EXECUTION_TOOL_OPENAI] : undefined;
let response = await client.chat.completions.create({
model,
max_tokens: 4096,
messages: openaiMessages,
tools,
});
let totalInputTokens = response.usage?.prompt_tokens || 0;
let totalOutputTokens = response.usage?.completion_tokens || 0;
// Handle tool calls in a loop (max 5 iterations)
let iterations = 0;
while (response.choices[0]?.finish_reason === "tool_calls" && iterations < 5) {
iterations++;
const toolCalls = response.choices[0]?.message?.tool_calls || [];
// Add assistant message with tool calls
openaiMessages.push({
role: "assistant",
content: response.choices[0]?.message?.content || null,
tool_calls: toolCalls,
});
// Process each tool call
for (const toolCall of toolCalls) {
if (toolCall.function.name === "execute_code") {
const args = JSON.parse(toolCall.function.arguments) as { language: string; code: string };
audit.sandbox({
command: `[AI:${args.language}] ${args.code.slice(0, 100)}...`,
exitCode: 0,
durationMs: 0,
});
const result = await executeCodeTool(args.language, args.code);
openaiMessages.push({
role: "tool",
tool_call_id: toolCall.id,
content: result,
});
}
}
response = await client.chat.completions.create({
model,
max_tokens: 4096,
messages: openaiMessages,
tools,
});
totalInputTokens += response.usage?.prompt_tokens || 0;
totalOutputTokens += response.usage?.completion_tokens || 0;
}
const text = response.choices[0]?.message?.content || "";
return {
text,
usage: response.usage
? {
inputTokens: response.usage.prompt_tokens,
outputTokens: response.usage.completion_tokens,
}
: undefined,
usage: {
inputTokens: totalInputTokens,
outputTokens: totalOutputTokens,
},
};
} catch (err) {
audit.error({
@ -257,6 +478,7 @@ function createOpenRouterAgent(config: SecureConfig, audit: AuditLogger): AgentC
});
const model = config.ai.model || DEFAULT_OPENROUTER_MODEL;
let sandbox: SandboxRunner | null = null;
type OpenAIContent = OpenAI.ChatCompletionContentPart[];
@ -277,9 +499,35 @@ function createOpenRouterAgent(config: SecureConfig, audit: AuditLogger): AgentC
});
}
async function executeCodeTool(language: string, code: string): Promise<string> {
if (!sandbox) {
return "Error: Sandbox is not configured. Code execution is unavailable.";
}
const isAvailable = await sandbox.isAvailable();
if (!isAvailable) {
return `Error: Sandbox unavailable. Backend: ${sandbox.backend}`;
}
try {
const result = await sandbox.runCode(language, code);
const output = result.stdout || result.stderr || "(no output)";
const status = result.exitCode === 0 ? "Success" : `Failed (exit ${result.exitCode})`;
const timeout = result.timedOut ? " [TIMED OUT]" : "";
return `${status}${timeout}\nDuration: ${result.durationMs}ms\n\nOutput:\n${output.slice(0, 5000)}`;
} catch (err) {
return `Error executing code: ${err instanceof Error ? err.message : String(err)}`;
}
}
return {
provider: "openrouter",
setSandbox(sb: SandboxRunner): void {
sandbox = sb;
},
async chat(messages: Message[], systemPrompt?: string): Promise<AgentResponse> {
try {
const openaiMessages: OpenAI.ChatCompletionMessageParam[] = [
@ -300,22 +548,70 @@ function createOpenRouterAgent(config: SecureConfig, audit: AuditLogger): AgentC
}
}
const response = await client.chat.completions.create({
// Include tools only if sandbox is available
const tools = sandbox ? [CODE_EXECUTION_TOOL_OPENAI] : undefined;
let response = await client.chat.completions.create({
model,
max_tokens: 4096,
messages: openaiMessages,
tools,
});
let totalInputTokens = response.usage?.prompt_tokens || 0;
let totalOutputTokens = response.usage?.completion_tokens || 0;
// Handle tool calls in a loop (max 5 iterations)
let iterations = 0;
while (response.choices[0]?.finish_reason === "tool_calls" && iterations < 5) {
iterations++;
const toolCalls = response.choices[0]?.message?.tool_calls || [];
// Add assistant message with tool calls
openaiMessages.push({
role: "assistant",
content: response.choices[0]?.message?.content || null,
tool_calls: toolCalls,
});
// Process each tool call
for (const toolCall of toolCalls) {
if (toolCall.function.name === "execute_code") {
const args = JSON.parse(toolCall.function.arguments) as { language: string; code: string };
audit.sandbox({
command: `[AI:${args.language}] ${args.code.slice(0, 100)}...`,
exitCode: 0,
durationMs: 0,
});
const result = await executeCodeTool(args.language, args.code);
openaiMessages.push({
role: "tool",
tool_call_id: toolCall.id,
content: result,
});
}
}
response = await client.chat.completions.create({
model,
max_tokens: 4096,
messages: openaiMessages,
tools,
});
totalInputTokens += response.usage?.prompt_tokens || 0;
totalOutputTokens += response.usage?.completion_tokens || 0;
}
const text = response.choices[0]?.message?.content || "";
return {
text,
usage: response.usage
? {
inputTokens: response.usage.prompt_tokens,
outputTokens: response.usage.completion_tokens,
}
: undefined,
usage: {
inputTokens: totalInputTokens,
outputTokens: totalOutputTokens,
},
};
} catch (err) {
audit.error({

View File

@ -73,6 +73,9 @@ async function main() {
const sandboxAvailable = await sandbox.isAvailable();
console.log(`[init] Sandbox available: ${sandboxAvailable}`);
// Wire sandbox to agent for tool calling
agent.setSandbox(sandbox);
// Create a placeholder bot for circular deps
// We'll create telegram, scheduler, and webhooks together
const { Bot } = await import("grammy");

View File

@ -112,28 +112,30 @@ ${traits.expertiseAreas.map(e => `- ${e}`).join("\n")}
- Recent topics: ${profile.recentTopics.length > 0 ? profile.recentTopics.slice(-3).join(", ") : "None yet"}
${profile.notes.length > 0 ? `- Notes: ${profile.notes.slice(-3).join("; ")}` : ""}
## Available Commands (you can tell users about these)
- /js <code> - Run JavaScript code
- /python <code> or /py <code> - Run Python code
- /ts <code> - Run TypeScript code
- /bash <code> or /sh <code> - Run shell commands
- /run <language> <code> - Run code in any supported language (python, javascript, typescript, bash, rust, go, c, cpp, java, ruby, php)
- /status - Check bot and sandbox status
- /clear - Clear conversation history
- /schedule "<cron>" "<name>" <prompt> - Schedule recurring AI tasks
- /tasks - List scheduled tasks
- /deltask <id> - Delete a task
## Code Execution
You have the execute_code tool available. When users ask you to run, test, or execute code, USE THE TOOL directly - don't ask them to use commands.
- If a user says "run this python code", use execute_code with language="python"
- If a user shares code and asks you to test it, execute it directly
- If a user asks what code outputs, run it and show the result
When a user asks to run code, you can either:
1. Tell them to use the appropriate command (e.g., "Use /js console.log('hello')")
2. Just answer their question directly if they don't need to execute code
Examples of when to use execute_code:
- "Can you run this for me?" Use execute_code
- "Test this python code" Use execute_code
- "Execute this script" Use execute_code
- "What does this code output?" Use execute_code
## Manual Commands (for users who prefer slash commands)
- /js <code>, /python <code>, /ts <code>, /bash <code>, /run <lang> <code>
- /status - Check bot status
- /clear - Clear conversation
- /schedule, /tasks, /deltask - Task scheduling
## Guidelines
- Be helpful, accurate, and security-conscious
- Never reveal API keys, tokens, or secrets
- Adapt to the user's communication style
- Remember context from this conversation
- When users want to run code, guide them to use the right command
- When users want code executed, use the execute_code tool directly
${traits.commonPhrases.length > 0 ? `- Phrases you like: ${traits.commonPhrases.join(", ")}` : ""}
${traits.avoidPhrases.length > 0 ? `- Avoid saying: ${traits.avoidPhrases.join(", ")}` : ""}`;

View File

@ -9,6 +9,20 @@
*/
import { spawn } from "node:child_process";
/**
* Normalize smart quotes to straight quotes
* Telegram and mobile keyboards often auto-convert quotes which breaks code
*/
export function normalizeQuotes(code: string): string {
return code
// Double quotes: " " „ ‟ → "
.replace(/[\u201C\u201D\u201E\u201F]/g, '"')
// Single quotes: ' ' → '
.replace(/[\u2018\u2019\u201A\u201B]/g, "'")
// Backticks: ` → `
.replace(/[\u0060\u2018]/g, "`");
}
import type { SecureConfig } from "./config.js";
import type { AuditLogger } from "./audit.js";
@ -90,6 +104,9 @@ async function runPiston(
): Promise<SandboxResult> {
const startTime = Date.now();
// Normalize smart quotes from mobile keyboards
const normalizedCode = normalizeQuotes(code);
const langConfig = PISTON_LANGUAGES[language.toLowerCase()];
if (!langConfig) {
return {
@ -110,7 +127,7 @@ async function runPiston(
body: JSON.stringify({
language: langConfig.language,
version: langConfig.version,
files: [{ content: code }],
files: [{ content: normalizedCode }],
}),
signal: AbortSignal.timeout(timeoutMs),
});
@ -219,8 +236,11 @@ async function runDocker(
): Promise<SandboxResult> {
const startTime = Date.now();
// Normalize smart quotes from mobile keyboards
const normalizedCommand = normalizeQuotes(command);
return new Promise((resolve) => {
const args = buildDockerArgs(config, command);
const args = buildDockerArgs(config, normalizedCommand);
const proc = spawn("docker", args, {
stdio: ["pipe", "pipe", "pipe"],