diff --git a/src/agents/pi-embedded-runner/capability-validator.ts b/src/agents/pi-embedded-runner/capability-validator.ts new file mode 100644 index 000000000..16f7baf8d --- /dev/null +++ b/src/agents/pi-embedded-runner/capability-validator.ts @@ -0,0 +1,194 @@ +/** + * Capability Validator + * + * Validates that a model's promised actions are actually achievable + * with the available tools and their constraints. + * + * Runs BEFORE response is sent to user to catch impossible promises early. + */ + +import { TOOL_CAPABILITIES, canPromise, getNegativePromises } from "../tool-capabilities.js"; + +export interface CapabilityValidationIssue { + type: "impossible_promise" | "missing_tool" | "wrong_scope" | "requires_setup"; + toolName?: string; + promiseType?: string; + message: string; + suggestedFix: string; +} + +export interface CapabilityValidationResult { + isValid: boolean; + issues: CapabilityValidationIssue[]; +} + +/** + * Patterns that indicate impossible promises + */ +const IMPOSSIBLE_PROMISE_PATTERNS = [ + { + pattern: /message.*when.*(?:online|back|online|startup|restart|reboot)/i, + toolName: "message", + promiseType: "message_when_system_online", + message: "Cannot promise to message when system comes online (message tool is reactive-only)", + fix: 'Use cron job instead: "I can set up a cron job to check and notify you when ready"', + }, + { + pattern: /message.*(?:event|happen|trigger|fire|detect)/i, + toolName: "message", + promiseType: "message_on_event", + message: "Cannot promise to message on external events (message tool cannot monitor events)", + fix: 'Consider alternative: "I can check periodically if you set up a cron job" or "I can respond immediately when you ask"', + }, + { + pattern: /monitor.*background/i, + toolName: "exec", + promiseType: "background_monitoring", + message: + "Cannot promise background monitoring with exec (processes terminate when session ends)", + fix: 'Use cron instead: "I can set up a cron job to check periodically"', + }, + { + pattern: /(?:run|spawn|start).*(?:background|daemon|service|process)/i, + toolName: "exec", + promiseType: "start_daemon", + message: "Cannot spawn persistent background processes (exec runs only during this session)", + fix: 'Either: (1) Run it now and show output, or (2) Create a cron job for periodic execution"', + }, + { + pattern: /watch.*(?:file|directory|folder|path)/i, + toolName: "exec", + promiseType: "watch_file", + message: "Cannot watch files continuously (exec cannot hook into system events)", + fix: 'Alternative: "I can check the file now" or "Set up a cron job to check periodically"', + }, + { + pattern: /(?:monitor|track|watch).*(?:website|page|url)/i, + toolName: "browser", + promiseType: "continuous_monitoring", + message: "Cannot monitor websites continuously (browser cannot auto-refresh)", + fix: 'Alternatives: (1) Check now and show state, or (2) Create cron job to check periodically"', + }, + { + pattern: /the moment(?:\s+you|\s+it|\s+when)/i, + toolName: "message", + promiseType: "message_when_system_online", + message: "Cannot promise immediate notification on external event (requires active monitoring)", + fix: 'Be honest: "I cannot monitor. But I CAN: [alternatives]"', + }, +]; + +/** + * Validate that a response does not contain impossible promises + */ +export function validateResponseCapabilities( + modelResponse: string, + availableTools: Set, +): CapabilityValidationResult { + const issues: CapabilityValidationIssue[] = []; + + // Pattern-based detection + for (const { pattern, toolName, promiseType, message, fix } of IMPOSSIBLE_PROMISE_PATTERNS) { + if (pattern.test(modelResponse)) { + const hasToolAvailable = availableTools.has(toolName); + + if (!hasToolAvailable) { + issues.push({ + type: "missing_tool", + toolName, + promiseType, + message: `${message} (${toolName} tool not available)`, + suggestedFix: fix, + }); + } else if (!canPromise(toolName, promiseType || "")) { + issues.push({ + type: "impossible_promise", + toolName, + promiseType, + message, + suggestedFix: fix, + }); + } + } + } + + // Heuristic: Check for "Got it!" followed by promissory language + if (/Got it|Understood|Will do|I will|I\'ll|Can do|No problem/i.test(modelResponse)) { + const hasAnyPromise = + /(?:message|monitor|watch|check|notify|alert).*(?:when|if|once|after)/i.test(modelResponse); + const hasBackground = /(?:background|daemon|continuously|periodically|ongoing)/i.test( + modelResponse, + ); + + if (hasAnyPromise || hasBackground) { + // Flag for manual review; this is uncertain + // Don't auto-reject, but log a warning + } + } + + return { + isValid: issues.length === 0, + issues, + }; +} + +/** + * Format validation issues for the model to read and self-correct + */ +export function formatValidationIssuesForModel(issues: CapabilityValidationIssue[]): string { + if (issues.length === 0) return ""; + + const lines = [ + "⚠️ **Capability Validation Issues Detected:**", + "", + "Your response contains promises that cannot be fulfilled with available tools:", + "", + ]; + + for (const issue of issues) { + lines.push(`- **[${issue.type}]** ${issue.message}`); + lines.push(` Fix: ${issue.suggestedFix}`); + lines.push(""); + } + + lines.push( + "Please revise your response to remove impossible promises and suggest realistic alternatives.", + ); + + return lines.join("\n"); +} + +/** + * Check if response mentions tool by name + */ +function mentionsTool(response: string, toolName: string): boolean { + const regex = new RegExp(`\\b${toolName}\\b`, "i"); + return regex.test(response); +} + +/** + * Check if response contains event-driven language + */ +function mentionsEventDriven(response: string): boolean { + return /(?:when|if|once|as soon as|the moment|upon|trigger|event|happen|fire|detect)/i.test( + response, + ); +} + +/** + * Log validation for debugging (integrate with agent logger) + */ +export function logCapabilityValidation( + validationResult: CapabilityValidationResult, + agentLogger?: { + warn?: (msg: string) => void; + debug?: (msg: string) => void; + }, +): void { + if (agentLogger?.warn && validationResult.issues.length > 0) { + const summary = validationResult.issues + .map((i) => `${i.toolName}/${i.promiseType}: ${i.message}`) + .join("; "); + agentLogger.warn(`Capability validation: ${summary}`); + } +} diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index 46a53bd8f..737599769 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -844,6 +844,33 @@ export async function runEmbeddedAttempt( .reverse() .find((m) => (m as AgentMessage)?.role === "assistant") as AssistantMessage | undefined; + // === HONESTY VALIDATION HOOK === + // Validates that the assistant response doesn't contain impossible promises + // before returning to the user. Logs warnings for issues detected. + if (lastAssistant && assistantTexts.length > 0) { + try { + const { validateResponseCapabilities, logCapabilityValidation } = + await import("../capability-validator.js"); + const fullResponseText = assistantTexts.join("\n"); + const availableToolNames = new Set(tools.map((t) => t.name.toLowerCase())); + const validationResult = validateResponseCapabilities( + fullResponseText, + availableToolNames, + ); + + if (!validationResult.isValid) { + logCapabilityValidation(validationResult, { + warn: (msg: string) => + log.warn(`[HONESTY] ${msg} | runId=${params.runId} sessionId=${params.sessionId}`), + }); + } + } catch (validationErr) { + // Non-fatal: validation is advisory, don't block the response + log.debug(`capability validation skipped: ${String(validationErr)}`); + } + } + // === END HONESTY VALIDATION HOOK === + const toolMetasNormalized = toolMetas .filter( (entry): entry is { toolName: string; meta?: string } => diff --git a/src/agents/pi-embedded-runner/tool-execution-guard.ts b/src/agents/pi-embedded-runner/tool-execution-guard.ts new file mode 100644 index 000000000..1527c8e14 --- /dev/null +++ b/src/agents/pi-embedded-runner/tool-execution-guard.ts @@ -0,0 +1,165 @@ +/** + * Tool Execution Guard + * + * Validates that tool invocations match their promised capabilities. + * Prevents misuse of tools for things they cannot do. + * + * Runs BEFORE tool is executed. + */ + +import { TOOL_CAPABILITIES } from "../tool-capabilities.js"; + +export interface ToolExecutionGuardResult { + allowed: boolean; + reason?: string; + warning?: string; +} + +/** + * Guard configuration - what to prevent + */ +const EXECUTION_GUARDS = { + message: { + // Detect: "message tool to monitor for event X" + preventPatterns: [ + { + check: (args: Record) => { + const message = String(args.message || "").toLowerCase(); + return ( + message.includes("notify you when") || + message.includes("message you when") || + message.includes("alert you when") + ); + }, + reason: + "Cannot use message tool to send conditional notifications (requires cron job setup first)", + }, + ], + }, + + exec: { + preventPatterns: [ + { + check: (args: Record) => { + const command = String(args.command || "").toLowerCase(); + return ( + command.includes("daemon") || + command.includes("&") || + command.includes("nohup") || + command.includes("background") + ); + }, + reason: + "Backgrounding processes is not reliable in exec tool (they terminate with session)", + }, + ], + }, + + browser: { + preventPatterns: [ + { + check: (args: Record) => { + const interval = Number(args.interval) || Number(args.pollingInterval) || 0; + return interval > 0; + }, + reason: + "Browser tool cannot handle polling/continuous monitoring (would require external scheduler)", + }, + ], + }, +}; + +/** + * Guard tool execution before it runs + */ +export function guardToolExecution(params: { + toolName: string; + toolArgs: Record; + agentPromise?: string; // What did the agent claim it would do? +}): ToolExecutionGuardResult { + const toolName = params.toolName.toLowerCase(); + const guards = EXECUTION_GUARDS[toolName as keyof typeof EXECUTION_GUARDS]; + + if (!guards) { + return { allowed: true }; + } + + // Check prevention patterns + for (const pattern of guards.preventPatterns) { + if (pattern.check(params.toolArgs)) { + return { + allowed: false, + reason: pattern.reason, + }; + } + } + + return { allowed: true }; +} + +/** + * Validate tool result matches what was promised + */ +export function validateToolResult(params: { + toolName: string; + toolArgs: Record; + result: unknown; + agentPromise?: string; +}): { valid: boolean; warning?: string } { + // Example: if agent said "I'll monitor for changes" but exec only ran once + // then the promise was not fulfilled + + if (params.toolName.toLowerCase() === "exec") { + const command = String(params.toolArgs.command || "").toLowerCase(); + const promiseText = (params.agentPromise || "").toLowerCase(); + + // If agent promised monitoring but only ran command once + if ( + (promiseText.includes("monitor") || promiseText.includes("watch")) && + !command.includes("watch") && + !command.includes("tail") && + !command.includes("loop") + ) { + return { + valid: false, + warning: + "Agent promised monitoring but exec only ran once. Promise not fulfilled. Consider cron for periodic checks.", + }; + } + } + + return { valid: true }; +} + +/** + * Format guard rejection for user + */ +export function formatGuardRejection(result: ToolExecutionGuardResult): string { + return `⚠️ Tool execution blocked: ${result.reason || "Guard prevented execution"}`; +} + +/** + * Check if tool scope matches usage context + */ +export function checkToolScope(params: { + toolName: string; + context: "reactive" | "synchronous" | "scheduled"; +}): { matches: boolean; warning?: string } { + const tool = TOOL_CAPABILITIES[params.toolName.toLowerCase()]; + if (!tool) return { matches: true }; + + const scopeOk = { + reactive: tool.scope === "reactive", + synchronous: tool.scope === "synchronous", + scheduled: tool.scope === "persistent", + }; + + if (!scopeOk[params.context]) { + return { + matches: false, + warning: `Tool ${params.toolName} has scope "${tool.scope}" but is being used in "${params.context}" context`, + }; + } + + return { matches: true }; +} diff --git a/src/agents/pi-tools.ts b/src/agents/pi-tools.ts index d763393a4..5e913754c 100644 --- a/src/agents/pi-tools.ts +++ b/src/agents/pi-tools.ts @@ -413,8 +413,43 @@ export function createMoltbotCodingTools(options?: { ? normalized.map((tool) => wrapToolWithAbortSignal(tool, options.abortSignal)) : normalized; + // === HONESTY EXECUTION GUARD === + // Wraps tool execution to prevent misuse (e.g., trying to use exec for background monitoring). + // Non-fatal: logs warnings but does not block execution (advisory mode). + const withHonestyGuard = withAbort.map((tool) => { + const originalExecute = tool.execute; + if (!originalExecute) return tool; + + // Wrap execute with guard check (preserving original signature) + const wrappedExecute: typeof originalExecute = async (toolCallId, args, signal, onUpdate) => { + try { + // Dynamic import to avoid bundling issues + const { guardToolExecution } = await import("./pi-embedded-runner/tool-execution-guard.js"); + const guardResult = guardToolExecution({ + toolName: tool.name, + toolArgs: args as Record, + }); + + if (!guardResult.allowed) { + // Advisory mode: log warning but don't block + logWarn(`[HONESTY GUARD] Tool ${tool.name} execution flagged: ${guardResult.reason}`); + } + } catch { + // Silently continue if guard module not available + } + + return originalExecute.call(tool, toolCallId, args, signal, onUpdate); + }; + + return { + ...tool, + execute: wrappedExecute, + } as typeof tool; + }); + // === END HONESTY EXECUTION GUARD === + // NOTE: Keep canonical (lowercase) tool names here. // pi-ai's Anthropic OAuth transport remaps tool names to Claude Code-style names // on the wire and maps them back for tool dispatch. - return withAbort; + return withHonestyGuard; } diff --git a/src/agents/system-prompt.ts b/src/agents/system-prompt.ts index ed97fd539..2107825c2 100644 --- a/src/agents/system-prompt.ts +++ b/src/agents/system-prompt.ts @@ -110,6 +110,60 @@ function buildVoiceSection(params: { isMinimal: boolean; ttsHint?: string }) { return ["## Voice (TTS)", hint, ""]; } +function buildSessionConstraintsSection(params: { + isMinimal: boolean; + availableTools: Set; + hasMessageTool: boolean; + hasExecTool: boolean; + hasCronTool: boolean; +}) { + if (params.isMinimal) return []; + + const constraints: string[] = []; + + if (!params.hasCronTool) { + constraints.push( + "- ❌ **Background monitoring:** No cron tool available. Cannot schedule periodic checks without explicit cron job setup first.", + ); + } else { + constraints.push( + "- ⚠️ **Background monitoring:** Available only via cron jobs. Must create job explicitly; no auto-triggers.", + ); + } + + if (!params.hasMessageTool) { + constraints.push("- ❌ **Messaging:** No message tool available."); + } else { + constraints.push( + "- ⚠️ **Proactive messaging:** Cannot do event-triggered sends. Message tool responds to incoming messages; cannot initiate on external events.", + ); + } + + if (!params.hasExecTool) { + constraints.push("- ❌ **Shell commands:** No exec tool available."); + } else { + constraints.push( + "- ⚠️ **Process execution:** Commands run synchronously. Cannot spawn persistent daemons or background services.", + ); + } + + constraints.push( + "- ⚠️ **Cross-session state:** Cannot directly affect other sessions' state. Use sessions_send for coordination.", + ); + constraints.push( + "- ⚠️ **Proactive actions:** Nothing happens when you're not running. Only reactive to user input or scheduled cron jobs.", + ); + + if (constraints.length === 0) return []; + + return [ + "## Session-Specific Constraints", + "This session has these limitations (what you CANNOT promise):", + ...constraints, + "", + ]; +} + function buildDocsSection(params: { docsPath?: string; isMinimal: boolean; readToolName: string }) { const docsPath = params.docsPath?.trim(); if (!docsPath || params.isMinimal) return []; @@ -513,6 +567,44 @@ export function buildAgentSystemPrompt(params: { } } + // Session constraints section (applies to full mode only) + lines.push( + ...buildSessionConstraintsSection({ + isMinimal: promptMode === "minimal", + availableTools, + hasMessageTool: availableTools.has("message"), + hasExecTool: availableTools.has("exec"), + hasCronTool: availableTools.has("cron"), + }), + ); + + // Honesty Protocol section (applies to all modes) + lines.push( + "## Honesty Protocol", + "", + "**Critical: Do not say 'Got it!' or 'I will do X' unless you can verify:**", + "1. You have the tool to do it (listed in Tooling above)", + "2. The tool can actually achieve what the user asked", + "3. You understand the tool's constraints and scope", + "", + "**Tool Scope Reference:**", + "- **message tool**: Reactive only. Responds when users contact you. CANNOT monitor external events or promise 'I'll message when X happens' without explicit cron job setup.", + "- **exec tool**: Runs during this session only. CANNOT spawn persistent background processes or daemons.", + "- **cron tool**: Requires explicit job creation. Does NOT auto-trigger on events; only on schedule.", + "- **browser tool**: Requires active session. CANNOT monitor websites continuously.", + "", + "**When you cannot do something, say so immediately.**", + "Examples:", + "- User: 'Message me the moment you come back online'", + " Your response: 'I can't monitor for that (message tool is reactive-only), but I CAN: [alternatives]'", + "", + "- User: 'Monitor this file in the background'", + " Your response: 'I can't monitor continuously, but I CAN: [alternatives]'", + "", + "**The Golden Rule:** Saying 'Got it!' when you can't is a broken promise. Honesty > appearing helpful.", + "", + ); + // Skip silent replies for subagent/none modes if (!isMinimal) { lines.push( diff --git a/src/agents/tool-capabilities.ts b/src/agents/tool-capabilities.ts new file mode 100644 index 000000000..2950bd099 --- /dev/null +++ b/src/agents/tool-capabilities.ts @@ -0,0 +1,234 @@ +/** + * Tool Capability Descriptors + * + * Defines the scope, constraints, and promise boundaries for each tool. + * Used to prevent the agent from promising things that tools cannot do. + */ + +export interface ToolCapabilityConstraint { + description: string; + category: "scope" | "timing" | "state" | "event" | "persistence"; +} + +export interface ToolCapability { + name: string; + summary: string; + scope: "reactive" | "synchronous" | "persistent" | "query"; + constraints: ToolCapabilityConstraint[]; + canPromise: Record; +} + +export const TOOL_CAPABILITIES: Record = { + message: { + name: "message", + summary: "Send messages and reactions", + scope: "reactive", + constraints: [ + { + description: "Only sends when agent is actively running", + category: "timing", + }, + { + description: "Cannot monitor for external events or state changes", + category: "event", + }, + { + description: "Cannot be used for proactive notifications without explicit cron job setup", + category: "event", + }, + { + description: "Requires user or system to trigger delivery", + category: "scope", + }, + ], + canPromise: { + message_in_reply: true, + message_when_triggered: true, + message_on_event: false, + message_periodically: false, + message_when_system_online: false, + message_when_file_changes: false, + proactive_notification: false, + }, + }, + + exec: { + name: "exec", + summary: "Run shell commands", + scope: "synchronous", + constraints: [ + { + description: "Runs synchronously during this session only", + category: "timing", + }, + { + description: "Cannot spawn persistent background processes or daemons", + category: "persistence", + }, + { + description: "Terminates when agent session ends", + category: "persistence", + }, + { + description: "Cannot hook into system events or watchdirs", + category: "event", + }, + ], + canPromise: { + run_command: true, + run_and_wait: true, + spawn_background_process: false, + start_daemon: false, + monitor_directory: false, + watch_file: false, + background_monitoring: false, + }, + }, + + cron: { + name: "cron", + summary: "Manage scheduled jobs and wake events", + scope: "persistent", + constraints: [ + { + description: "Requires explicit job creation; does not auto-create", + category: "scope", + }, + { + description: "Only triggers on predefined schedules, not on arbitrary events", + category: "event", + }, + { + description: "Cannot listen for system events (file changes, network events, etc.)", + category: "event", + }, + { + description: "Job must be explicitly created before first execution", + category: "state", + }, + ], + canPromise: { + schedule_periodic_job: true, + create_cron_job: true, + run_at_time: true, + monitor_file: false, + listen_for_event: false, + auto_trigger: false, + periodic_checks: true, + }, + }, + + browser: { + name: "browser", + summary: "Control web browser", + scope: "synchronous", + constraints: [ + { + description: "Runs during active session only", + category: "timing", + }, + { + description: "Cannot monitor websites continuously", + category: "persistence", + }, + { + description: "Cannot auto-refresh or auto-check", + category: "event", + }, + { + description: "Requires explicit user action to check current state", + category: "scope", + }, + ], + canPromise: { + navigate_and_check: true, + take_screenshot: true, + submit_form: true, + continuous_monitoring: false, + auto_refresh: false, + watch_for_changes: false, + poll_website: false, + }, + }, + + sessions_send: { + name: "sessions_send", + summary: "Send message to another session", + scope: "reactive", + constraints: [ + { + description: "Other session must be actively running to receive message", + category: "timing", + }, + { + description: "Cannot guarantee delivery if target session ends", + category: "state", + }, + { + description: "Requires target session key or label", + category: "scope", + }, + ], + canPromise: { + send_to_active_session: true, + coordinate_across_sessions: true, + send_to_offline_session: false, + guarantee_delivery: false, + }, + }, + + sessions_spawn: { + name: "sessions_spawn", + summary: "Spawn sub-agent session", + scope: "persistent", + constraints: [ + { + description: "Creates isolated session; cannot directly share state", + category: "state", + }, + { + description: "Sub-agent runs independently; parent cannot forcibly stop it", + category: "scope", + }, + { + description: "Results delivered asynchronously", + category: "timing", + }, + ], + canPromise: { + spawn_subagent: true, + delegate_task: true, + parallel_work: true, + real_time_control: false, + immediate_results: false, + }, + }, +}; + +/** + * Check if a tool can fulfill a specific promise + */ +export function canPromise(toolName: string, promiseType: string): boolean { + const tool = TOOL_CAPABILITIES[toolName.toLowerCase()]; + if (!tool) return false; + return tool.canPromise[promiseType] ?? false; +} + +/** + * Get all constraints for a tool + */ +export function getToolConstraints(toolName: string): ToolCapabilityConstraint[] { + const tool = TOOL_CAPABILITIES[toolName.toLowerCase()]; + return tool?.constraints ?? []; +} + +/** + * Get summary of what tool CANNOT do + */ +export function getNegativePromises(toolName: string): string[] { + const tool = TOOL_CAPABILITIES[toolName.toLowerCase()]; + if (!tool) return []; + return Object.entries(tool.canPromise) + .filter(([, allowed]) => !allowed) + .map(([promise]) => promise); +}