feat: add AI tool calling for code execution + fix smart quotes

- AI can now execute code directly when users ask (uses execute_code tool) - No need for users to use slash commands - AI runs code automatically - Fixed smart quote issue: curly quotes from mobile keyboards are normalized - Updated system prompts to instruct AI to use tools proactively https://claude.ai/code/session_015VqJ7gN4vaxtYfYc92UjLs
2026-01-30 08:33:27 +00:00 · 2026-01-30 08:33:27 +00:00 · de7c462c95
commit de7c462c95
parent 4be63d67df
4 changed files with 365 additions and 44 deletions
--- a/secure/agent.ts
+++ b/secure/agent.ts
@ -3,12 +3,14 @@
 *
 * Minimal AI agent that handles conversations with image support.
 * Direct API calls to Anthropic or OpenAI - no intermediaries.
+ * Supports tool calling for code execution.
 */

 import Anthropic from "@anthropic-ai/sdk";
 import OpenAI from "openai";
 import type { SecureConfig } from "./config.js";
 import type { AuditLogger } from "./audit.js";
+import type { SandboxRunner, SandboxResult } from "./sandbox.js";

 export type ImageContent = {
  type: "image";
@ -40,6 +42,52 @@ export type AgentCore = {
  chat: (messages: Message[], systemPrompt?: string) => Promise<AgentResponse>;
  analyzeImage: (imageData: string, mediaType: ImageContent["mediaType"], prompt?: string) => Promise<AgentResponse>;
  provider: "anthropic" | "openai" | "openrouter";
+  setSandbox: (sandbox: SandboxRunner) => void;
+};
+
+// Tool definitions for code execution
+const CODE_EXECUTION_TOOL: Anthropic.Tool = {
+  name: "execute_code",
+  description: "Execute code in a sandboxed environment. Use this when the user asks you to run, test, or execute code. Supports: python, javascript, typescript, bash, rust, go, c, cpp, java, ruby, php.",
+  input_schema: {
+    type: "object",
+    properties: {
+      language: {
+        type: "string",
+        description: "Programming language: python, javascript, typescript, bash, rust, go, c, cpp, java, ruby, php",
+        enum: ["python", "javascript", "typescript", "bash", "rust", "go", "c", "cpp", "java", "ruby", "php"],
+      },
+      code: {
+        type: "string",
+        description: "The code to execute",
+      },
+    },
+    required: ["language", "code"],
+  },
+};
+
+// OpenAI-compatible tool format
+const CODE_EXECUTION_TOOL_OPENAI: OpenAI.ChatCompletionTool = {
+  type: "function",
+  function: {
+    name: "execute_code",
+    description: "Execute code in a sandboxed environment. Use this when the user asks you to run, test, or execute code. Supports: python, javascript, typescript, bash, rust, go, c, cpp, java, ruby, php.",
+    parameters: {
+      type: "object",
+      properties: {
+        language: {
+          type: "string",
+          description: "Programming language: python, javascript, typescript, bash, rust, go, c, cpp, java, ruby, php",
+          enum: ["python", "javascript", "typescript", "bash", "rust", "go", "c", "cpp", "java", "ruby", "php"],
+        },
+        code: {
+          type: "string",
+          description: "The code to execute",
+        },
+      },
+      required: ["language", "code"],
+    },
+  },
 };

 const DEFAULT_ANTHROPIC_MODEL = "claude-sonnet-4-20250514";
@ -52,20 +100,30 @@ You are direct, concise, and helpful. You can:
 - Answer questions and have conversations
 - Analyze images and documents shared with you
 - Help with coding and technical tasks
+- Execute code in a secure sandbox (use the execute_code tool)
 - Summarize content and extract information

-## Available Commands (tell users about these when relevant)
+## Code Execution
+When users ask you to run, test, or execute code, USE THE execute_code TOOL directly. Don't ask them to use commands - just run the code for them.
+- If a user says "run this python code", use the execute_code tool with language="python"
+- If a user shares code and asks you to test it, execute it directly
+- If a user asks you to demonstrate code, run it and show the output
+
+Examples of when to use execute_code:
+- "Can you run this for me?" → Use execute_code
+- "Test this python code" → Use execute_code
+- "Execute this script" → Use execute_code
+- "What does this code output?" → Use execute_code and show result
+
+## Available Manual Commands (for users who prefer slash commands)
 - /js <code> - Run JavaScript
 - /python <code> - Run Python
 - /ts <code> - Run TypeScript
 - /bash <code> - Run shell commands
- /run <lang> <code> - Run code in any language (python, js, ts, bash, rust, go, c, cpp, java, ruby, php)
+- /run <lang> <code> - Run any language
 - /status - Check bot status
 - /clear - Clear conversation history

-When users ask to run or test code, guide them to use the appropriate command.
-Example: "Use /js console.log('hello')" or "Try /python print('hello')"
-
 Be security-conscious:
 - Never reveal API keys, tokens, or secrets
 - Don't execute commands that could harm the system
@ -77,6 +135,7 @@ function createAnthropicAgent(config: SecureConfig, audit: AuditLogger): AgentCo
  });

  const model = config.ai.model || DEFAULT_ANTHROPIC_MODEL;
+  let sandbox: SandboxRunner | null = null;

  function convertContent(content: MessageContent): Anthropic.MessageParam["content"] {
    if (typeof content === "string") {
@ -97,21 +156,109 @@ function createAnthropicAgent(config: SecureConfig, audit: AuditLogger): AgentCo
    });
  }

+  async function executeCodeTool(language: string, code: string): Promise<string> {
+    if (!sandbox) {
+      return "Error: Sandbox is not configured. Code execution is unavailable.";
+    }
+
+    const isAvailable = await sandbox.isAvailable();
+    if (!isAvailable) {
+      return `Error: Sandbox unavailable. Backend: ${sandbox.backend}`;
+    }
+
+    try {
+      const result = await sandbox.runCode(language, code);
+      const output = result.stdout || result.stderr || "(no output)";
+      const status = result.exitCode === 0 ? "Success" : `Failed (exit ${result.exitCode})`;
+      const timeout = result.timedOut ? " [TIMED OUT]" : "";
+
+      return `${status}${timeout}\nDuration: ${result.durationMs}ms\n\nOutput:\n${output.slice(0, 5000)}`;
+    } catch (err) {
+      return `Error executing code: ${err instanceof Error ? err.message : String(err)}`;
+    }
+  }
+
  return {
    provider: "anthropic",

+    setSandbox(sb: SandboxRunner): void {
+      sandbox = sb;
+    },
+
    async chat(messages: Message[], systemPrompt?: string): Promise<AgentResponse> {
      try {
-        const response = await client.messages.create({
+        // Build initial messages
+        const anthropicMessages: Anthropic.MessageParam[] = messages.map((m) => ({
+          role: m.role,
+          content: convertContent(m.content),
+        }));
+
+        // Include tools only if sandbox is available
+        const tools = sandbox ? [CODE_EXECUTION_TOOL] : undefined;
+
+        let response = await client.messages.create({
          model,
          max_tokens: 4096,
          system: systemPrompt || DEFAULT_SYSTEM_PROMPT,
-          messages: messages.map((m) => ({
-            role: m.role,
-            content: convertContent(m.content),
-          })),
+          messages: anthropicMessages,
+          tools,
        });

+        let totalInputTokens = response.usage.input_tokens;
+        let totalOutputTokens = response.usage.output_tokens;
+
+        // Handle tool calls in a loop (max 5 iterations to prevent infinite loops)
+        let iterations = 0;
+        while (response.stop_reason === "tool_use" && iterations < 5) {
+          iterations++;
+
+          // Find tool use blocks
+          const toolUseBlocks = response.content.filter(
+            (block): block is Anthropic.ToolUseBlock => block.type === "tool_use"
+          );
+
+          // Process each tool call
+          const toolResults: Anthropic.ToolResultBlockParam[] = [];
+          for (const toolUse of toolUseBlocks) {
+            if (toolUse.name === "execute_code") {
+              const input = toolUse.input as { language: string; code: string };
+              audit.sandbox({
+                command: `[AI:${input.language}] ${input.code.slice(0, 100)}...`,
+                exitCode: 0,
+                durationMs: 0,
+              });
+              const result = await executeCodeTool(input.language, input.code);
+              toolResults.push({
+                type: "tool_result",
+                tool_use_id: toolUse.id,
+                content: result,
+              });
+            }
+          }
+
+          // Continue conversation with tool results
+          anthropicMessages.push({
+            role: "assistant",
+            content: response.content,
+          });
+          anthropicMessages.push({
+            role: "user",
+            content: toolResults,
+          });
+
+          response = await client.messages.create({
+            model,
+            max_tokens: 4096,
+            system: systemPrompt || DEFAULT_SYSTEM_PROMPT,
+            messages: anthropicMessages,
+            tools,
+          });
+
+          totalInputTokens += response.usage.input_tokens;
+          totalOutputTokens += response.usage.output_tokens;
+        }
+
+        // Extract final text response
        const text = response.content
          .filter((block): block is Anthropic.TextBlock => block.type === "text")
          .map((block) => block.text)
@ -120,8 +267,8 @@ function createAnthropicAgent(config: SecureConfig, audit: AuditLogger): AgentCo
        return {
          text,
          usage: {
-            inputTokens: response.usage.input_tokens,
-            outputTokens: response.usage.output_tokens,
+            inputTokens: totalInputTokens,
+            outputTokens: totalOutputTokens,
          },
        };
      } catch (err) {
@ -157,6 +304,7 @@ function createOpenAIAgent(config: SecureConfig, audit: AuditLogger): AgentCore
  });

  const model = config.ai.model || DEFAULT_OPENAI_MODEL;
+  let sandbox: SandboxRunner | null = null;

  type OpenAIContent = OpenAI.ChatCompletionContentPart[];

@ -177,9 +325,35 @@ function createOpenAIAgent(config: SecureConfig, audit: AuditLogger): AgentCore
    });
  }

+  async function executeCodeTool(language: string, code: string): Promise<string> {
+    if (!sandbox) {
+      return "Error: Sandbox is not configured. Code execution is unavailable.";
+    }
+
+    const isAvailable = await sandbox.isAvailable();
+    if (!isAvailable) {
+      return `Error: Sandbox unavailable. Backend: ${sandbox.backend}`;
+    }
+
+    try {
+      const result = await sandbox.runCode(language, code);
+      const output = result.stdout || result.stderr || "(no output)";
+      const status = result.exitCode === 0 ? "Success" : `Failed (exit ${result.exitCode})`;
+      const timeout = result.timedOut ? " [TIMED OUT]" : "";
+
+      return `${status}${timeout}\nDuration: ${result.durationMs}ms\n\nOutput:\n${output.slice(0, 5000)}`;
+    } catch (err) {
+      return `Error executing code: ${err instanceof Error ? err.message : String(err)}`;
+    }
+  }
+
  return {
    provider: "openai",

+    setSandbox(sb: SandboxRunner): void {
+      sandbox = sb;
+    },
+
    async chat(messages: Message[], systemPrompt?: string): Promise<AgentResponse> {
      try {
        const openaiMessages: OpenAI.ChatCompletionMessageParam[] = [
@ -193,7 +367,6 @@ function createOpenAIAgent(config: SecureConfig, audit: AuditLogger): AgentCore
              content: convertContent(m.content),
            });
          } else {
-            // Assistant messages are always text
            openaiMessages.push({
              role: "assistant",
              content: typeof m.content === "string" ? m.content : "",
@ -201,22 +374,70 @@ function createOpenAIAgent(config: SecureConfig, audit: AuditLogger): AgentCore
          }
        }

-        const response = await client.chat.completions.create({
+        // Include tools only if sandbox is available
+        const tools = sandbox ? [CODE_EXECUTION_TOOL_OPENAI] : undefined;
+
+        let response = await client.chat.completions.create({
          model,
          max_tokens: 4096,
          messages: openaiMessages,
+          tools,
        });

+        let totalInputTokens = response.usage?.prompt_tokens || 0;
+        let totalOutputTokens = response.usage?.completion_tokens || 0;
+
+        // Handle tool calls in a loop (max 5 iterations)
+        let iterations = 0;
+        while (response.choices[0]?.finish_reason === "tool_calls" && iterations < 5) {
+          iterations++;
+
+          const toolCalls = response.choices[0]?.message?.tool_calls || [];
+
+          // Add assistant message with tool calls
+          openaiMessages.push({
+            role: "assistant",
+            content: response.choices[0]?.message?.content || null,
+            tool_calls: toolCalls,
+          });
+
+          // Process each tool call
+          for (const toolCall of toolCalls) {
+            if (toolCall.function.name === "execute_code") {
+              const args = JSON.parse(toolCall.function.arguments) as { language: string; code: string };
+              audit.sandbox({
+                command: `[AI:${args.language}] ${args.code.slice(0, 100)}...`,
+                exitCode: 0,
+                durationMs: 0,
+              });
+              const result = await executeCodeTool(args.language, args.code);
+              openaiMessages.push({
+                role: "tool",
+                tool_call_id: toolCall.id,
+                content: result,
+              });
+            }
+          }
+
+          response = await client.chat.completions.create({
+            model,
+            max_tokens: 4096,
+            messages: openaiMessages,
+            tools,
+          });
+
+          totalInputTokens += response.usage?.prompt_tokens || 0;
+          totalOutputTokens += response.usage?.completion_tokens || 0;
+        }
+
        const text = response.choices[0]?.message?.content || "";

        return {
          text,
-          usage: response.usage
-            ? {
-                inputTokens: response.usage.prompt_tokens,
-                outputTokens: response.usage.completion_tokens,
-              }
-            : undefined,
+          usage: {
+            inputTokens: totalInputTokens,
+            outputTokens: totalOutputTokens,
+          },
        };
      } catch (err) {
        audit.error({
@ -257,6 +478,7 @@ function createOpenRouterAgent(config: SecureConfig, audit: AuditLogger): AgentC
  });

  const model = config.ai.model || DEFAULT_OPENROUTER_MODEL;
+  let sandbox: SandboxRunner | null = null;

  type OpenAIContent = OpenAI.ChatCompletionContentPart[];

@ -277,9 +499,35 @@ function createOpenRouterAgent(config: SecureConfig, audit: AuditLogger): AgentC
    });
  }

+  async function executeCodeTool(language: string, code: string): Promise<string> {
+    if (!sandbox) {
+      return "Error: Sandbox is not configured. Code execution is unavailable.";
+    }
+
+    const isAvailable = await sandbox.isAvailable();
+    if (!isAvailable) {
+      return `Error: Sandbox unavailable. Backend: ${sandbox.backend}`;
+    }
+
+    try {
+      const result = await sandbox.runCode(language, code);
+      const output = result.stdout || result.stderr || "(no output)";
+      const status = result.exitCode === 0 ? "Success" : `Failed (exit ${result.exitCode})`;
+      const timeout = result.timedOut ? " [TIMED OUT]" : "";
+
+      return `${status}${timeout}\nDuration: ${result.durationMs}ms\n\nOutput:\n${output.slice(0, 5000)}`;
+    } catch (err) {
+      return `Error executing code: ${err instanceof Error ? err.message : String(err)}`;
+    }
+  }
+
  return {
    provider: "openrouter",

+    setSandbox(sb: SandboxRunner): void {
+      sandbox = sb;
+    },
+
    async chat(messages: Message[], systemPrompt?: string): Promise<AgentResponse> {
      try {
        const openaiMessages: OpenAI.ChatCompletionMessageParam[] = [
@ -300,22 +548,70 @@ function createOpenRouterAgent(config: SecureConfig, audit: AuditLogger): AgentC
          }
        }

-        const response = await client.chat.completions.create({
+        // Include tools only if sandbox is available
+        const tools = sandbox ? [CODE_EXECUTION_TOOL_OPENAI] : undefined;
+
+        let response = await client.chat.completions.create({
          model,
          max_tokens: 4096,
          messages: openaiMessages,
+          tools,
        });

+        let totalInputTokens = response.usage?.prompt_tokens || 0;
+        let totalOutputTokens = response.usage?.completion_tokens || 0;
+
+        // Handle tool calls in a loop (max 5 iterations)
+        let iterations = 0;
+        while (response.choices[0]?.finish_reason === "tool_calls" && iterations < 5) {
+          iterations++;
+
+          const toolCalls = response.choices[0]?.message?.tool_calls || [];
+
+          // Add assistant message with tool calls
+          openaiMessages.push({
+            role: "assistant",
+            content: response.choices[0]?.message?.content || null,
+            tool_calls: toolCalls,
+          });
+
+          // Process each tool call
+          for (const toolCall of toolCalls) {
+            if (toolCall.function.name === "execute_code") {
+              const args = JSON.parse(toolCall.function.arguments) as { language: string; code: string };
+              audit.sandbox({
+                command: `[AI:${args.language}] ${args.code.slice(0, 100)}...`,
+                exitCode: 0,
+                durationMs: 0,
+              });
+              const result = await executeCodeTool(args.language, args.code);
+              openaiMessages.push({
+                role: "tool",
+                tool_call_id: toolCall.id,
+                content: result,
+              });
+            }
+          }
+
+          response = await client.chat.completions.create({
+            model,
+            max_tokens: 4096,
+            messages: openaiMessages,
+            tools,
+          });
+
+          totalInputTokens += response.usage?.prompt_tokens || 0;
+          totalOutputTokens += response.usage?.completion_tokens || 0;
+        }
+
        const text = response.choices[0]?.message?.content || "";

        return {
          text,
-          usage: response.usage
-            ? {
-                inputTokens: response.usage.prompt_tokens,
-                outputTokens: response.usage.completion_tokens,
-              }
-            : undefined,
+          usage: {
+            inputTokens: totalInputTokens,
+            outputTokens: totalOutputTokens,
+          },
        };
      } catch (err) {
        audit.error({
--- a/secure/index.ts
+++ b/secure/index.ts
@ -73,6 +73,9 @@ async function main() {
  const sandboxAvailable = await sandbox.isAvailable();
  console.log(`[init] Sandbox available: ${sandboxAvailable}`);

+  // Wire sandbox to agent for tool calling
+  agent.setSandbox(sandbox);
+
  // Create a placeholder bot for circular deps
  // We'll create telegram, scheduler, and webhooks together
  const { Bot } = await import("grammy");
--- a/secure/personality.ts
+++ b/secure/personality.ts
@ -112,28 +112,30 @@ ${traits.expertiseAreas.map(e => `- ${e}`).join("\n")}
 - Recent topics: ${profile.recentTopics.length > 0 ? profile.recentTopics.slice(-3).join(", ") : "None yet"}
 ${profile.notes.length > 0 ? `- Notes: ${profile.notes.slice(-3).join("; ")}` : ""}

-## Available Commands (you can tell users about these)
- /js <code> - Run JavaScript code
- /python <code> or /py <code> - Run Python code
- /ts <code> - Run TypeScript code
- /bash <code> or /sh <code> - Run shell commands
- /run <language> <code> - Run code in any supported language (python, javascript, typescript, bash, rust, go, c, cpp, java, ruby, php)
- /status - Check bot and sandbox status
- /clear - Clear conversation history
- /schedule "<cron>" "<name>" <prompt> - Schedule recurring AI tasks
- /tasks - List scheduled tasks
- /deltask <id> - Delete a task
+## Code Execution
+You have the execute_code tool available. When users ask you to run, test, or execute code, USE THE TOOL directly - don't ask them to use commands.
+- If a user says "run this python code", use execute_code with language="python"
+- If a user shares code and asks you to test it, execute it directly
+- If a user asks what code outputs, run it and show the result

-When a user asks to run code, you can either:
-1. Tell them to use the appropriate command (e.g., "Use /js console.log('hello')")
-2. Just answer their question directly if they don't need to execute code
+Examples of when to use execute_code:
+- "Can you run this for me?" → Use execute_code
+- "Test this python code" → Use execute_code
+- "Execute this script" → Use execute_code
+- "What does this code output?" → Use execute_code
+
+## Manual Commands (for users who prefer slash commands)
+- /js <code>, /python <code>, /ts <code>, /bash <code>, /run <lang> <code>
+- /status - Check bot status
+- /clear - Clear conversation
+- /schedule, /tasks, /deltask - Task scheduling

 ## Guidelines
 - Be helpful, accurate, and security-conscious
 - Never reveal API keys, tokens, or secrets
 - Adapt to the user's communication style
 - Remember context from this conversation
- When users want to run code, guide them to use the right command
+- When users want code executed, use the execute_code tool directly
 ${traits.commonPhrases.length > 0 ? `- Phrases you like: ${traits.commonPhrases.join(", ")}` : ""}
 ${traits.avoidPhrases.length > 0 ? `- Avoid saying: ${traits.avoidPhrases.join(", ")}` : ""}`;

--- a/secure/sandbox.ts
+++ b/secure/sandbox.ts
@ -9,6 +9,20 @@
 */

 import { spawn } from "node:child_process";
+
+/**
+ * Normalize smart quotes to straight quotes
+ * Telegram and mobile keyboards often auto-convert quotes which breaks code
+ */
+export function normalizeQuotes(code: string): string {
+  return code
+    // Double quotes: " " „ ‟ → "
+    .replace(/[\u201C\u201D\u201E\u201F]/g, '"')
+    // Single quotes: ' ' ‚ ‛ → '
+    .replace(/[\u2018\u2019\u201A\u201B]/g, "'")
+    // Backticks: ` → `
+    .replace(/[\u0060\u2018]/g, "`");
+}
 import type { SecureConfig } from "./config.js";
 import type { AuditLogger } from "./audit.js";

@ -90,6 +104,9 @@ async function runPiston(
 ): Promise<SandboxResult> {
  const startTime = Date.now();

+  // Normalize smart quotes from mobile keyboards
+  const normalizedCode = normalizeQuotes(code);
+
  const langConfig = PISTON_LANGUAGES[language.toLowerCase()];
  if (!langConfig) {
    return {
@ -110,7 +127,7 @@ async function runPiston(
      body: JSON.stringify({
        language: langConfig.language,
        version: langConfig.version,
-        files: [{ content: code }],
+        files: [{ content: normalizedCode }],
      }),
      signal: AbortSignal.timeout(timeoutMs),
    });
@ -219,8 +236,11 @@ async function runDocker(
 ): Promise<SandboxResult> {
  const startTime = Date.now();

+  // Normalize smart quotes from mobile keyboards
+  const normalizedCommand = normalizeQuotes(command);
+
  return new Promise((resolve) => {
-    const args = buildDockerArgs(config, command);
+    const args = buildDockerArgs(config, normalizedCommand);

    const proc = spawn("docker", args, {
      stdio: ["pipe", "pipe", "pipe"],