This commit is contained in:
Steven Eijzermans 2026-01-30 15:44:30 +00:00 committed by GitHub
commit f323433ecc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 748 additions and 1 deletions

View File

@ -0,0 +1,194 @@
/**
* Capability Validator
*
* Validates that a model's promised actions are actually achievable
* with the available tools and their constraints.
*
* Runs BEFORE response is sent to user to catch impossible promises early.
*/
import { TOOL_CAPABILITIES, canPromise, getNegativePromises } from "../tool-capabilities.js";
export interface CapabilityValidationIssue {
type: "impossible_promise" | "missing_tool" | "wrong_scope" | "requires_setup";
toolName?: string;
promiseType?: string;
message: string;
suggestedFix: string;
}
export interface CapabilityValidationResult {
isValid: boolean;
issues: CapabilityValidationIssue[];
}
/**
* Patterns that indicate impossible promises
*/
const IMPOSSIBLE_PROMISE_PATTERNS = [
{
pattern: /message.*when.*(?:online|back|online|startup|restart|reboot)/i,
toolName: "message",
promiseType: "message_when_system_online",
message: "Cannot promise to message when system comes online (message tool is reactive-only)",
fix: 'Use cron job instead: "I can set up a cron job to check and notify you when ready"',
},
{
pattern: /message.*(?:event|happen|trigger|fire|detect)/i,
toolName: "message",
promiseType: "message_on_event",
message: "Cannot promise to message on external events (message tool cannot monitor events)",
fix: 'Consider alternative: "I can check periodically if you set up a cron job" or "I can respond immediately when you ask"',
},
{
pattern: /monitor.*background/i,
toolName: "exec",
promiseType: "background_monitoring",
message:
"Cannot promise background monitoring with exec (processes terminate when session ends)",
fix: 'Use cron instead: "I can set up a cron job to check periodically"',
},
{
pattern: /(?:run|spawn|start).*(?:background|daemon|service|process)/i,
toolName: "exec",
promiseType: "start_daemon",
message: "Cannot spawn persistent background processes (exec runs only during this session)",
fix: 'Either: (1) Run it now and show output, or (2) Create a cron job for periodic execution"',
},
{
pattern: /watch.*(?:file|directory|folder|path)/i,
toolName: "exec",
promiseType: "watch_file",
message: "Cannot watch files continuously (exec cannot hook into system events)",
fix: 'Alternative: "I can check the file now" or "Set up a cron job to check periodically"',
},
{
pattern: /(?:monitor|track|watch).*(?:website|page|url)/i,
toolName: "browser",
promiseType: "continuous_monitoring",
message: "Cannot monitor websites continuously (browser cannot auto-refresh)",
fix: 'Alternatives: (1) Check now and show state, or (2) Create cron job to check periodically"',
},
{
pattern: /the moment(?:\s+you|\s+it|\s+when)/i,
toolName: "message",
promiseType: "message_when_system_online",
message: "Cannot promise immediate notification on external event (requires active monitoring)",
fix: 'Be honest: "I cannot monitor. But I CAN: [alternatives]"',
},
];
/**
* Validate that a response does not contain impossible promises
*/
export function validateResponseCapabilities(
modelResponse: string,
availableTools: Set<string>,
): CapabilityValidationResult {
const issues: CapabilityValidationIssue[] = [];
// Pattern-based detection
for (const { pattern, toolName, promiseType, message, fix } of IMPOSSIBLE_PROMISE_PATTERNS) {
if (pattern.test(modelResponse)) {
const hasToolAvailable = availableTools.has(toolName);
if (!hasToolAvailable) {
issues.push({
type: "missing_tool",
toolName,
promiseType,
message: `${message} (${toolName} tool not available)`,
suggestedFix: fix,
});
} else if (!canPromise(toolName, promiseType || "")) {
issues.push({
type: "impossible_promise",
toolName,
promiseType,
message,
suggestedFix: fix,
});
}
}
}
// Heuristic: Check for "Got it!" followed by promissory language
if (/Got it|Understood|Will do|I will|I\'ll|Can do|No problem/i.test(modelResponse)) {
const hasAnyPromise =
/(?:message|monitor|watch|check|notify|alert).*(?:when|if|once|after)/i.test(modelResponse);
const hasBackground = /(?:background|daemon|continuously|periodically|ongoing)/i.test(
modelResponse,
);
if (hasAnyPromise || hasBackground) {
// Flag for manual review; this is uncertain
// Don't auto-reject, but log a warning
}
}
return {
isValid: issues.length === 0,
issues,
};
}
/**
* Format validation issues for the model to read and self-correct
*/
export function formatValidationIssuesForModel(issues: CapabilityValidationIssue[]): string {
if (issues.length === 0) return "";
const lines = [
"⚠️ **Capability Validation Issues Detected:**",
"",
"Your response contains promises that cannot be fulfilled with available tools:",
"",
];
for (const issue of issues) {
lines.push(`- **[${issue.type}]** ${issue.message}`);
lines.push(` Fix: ${issue.suggestedFix}`);
lines.push("");
}
lines.push(
"Please revise your response to remove impossible promises and suggest realistic alternatives.",
);
return lines.join("\n");
}
/**
* Check if response mentions tool by name
*/
function mentionsTool(response: string, toolName: string): boolean {
const regex = new RegExp(`\\b${toolName}\\b`, "i");
return regex.test(response);
}
/**
* Check if response contains event-driven language
*/
function mentionsEventDriven(response: string): boolean {
return /(?:when|if|once|as soon as|the moment|upon|trigger|event|happen|fire|detect)/i.test(
response,
);
}
/**
* Log validation for debugging (integrate with agent logger)
*/
export function logCapabilityValidation(
validationResult: CapabilityValidationResult,
agentLogger?: {
warn?: (msg: string) => void;
debug?: (msg: string) => void;
},
): void {
if (agentLogger?.warn && validationResult.issues.length > 0) {
const summary = validationResult.issues
.map((i) => `${i.toolName}/${i.promiseType}: ${i.message}`)
.join("; ");
agentLogger.warn(`Capability validation: ${summary}`);
}
}

View File

@ -844,6 +844,33 @@ export async function runEmbeddedAttempt(
.reverse()
.find((m) => (m as AgentMessage)?.role === "assistant") as AssistantMessage | undefined;
// === HONESTY VALIDATION HOOK ===
// Validates that the assistant response doesn't contain impossible promises
// before returning to the user. Logs warnings for issues detected.
if (lastAssistant && assistantTexts.length > 0) {
try {
const { validateResponseCapabilities, logCapabilityValidation } =
await import("../capability-validator.js");
const fullResponseText = assistantTexts.join("\n");
const availableToolNames = new Set(tools.map((t) => t.name.toLowerCase()));
const validationResult = validateResponseCapabilities(
fullResponseText,
availableToolNames,
);
if (!validationResult.isValid) {
logCapabilityValidation(validationResult, {
warn: (msg: string) =>
log.warn(`[HONESTY] ${msg} | runId=${params.runId} sessionId=${params.sessionId}`),
});
}
} catch (validationErr) {
// Non-fatal: validation is advisory, don't block the response
log.debug(`capability validation skipped: ${String(validationErr)}`);
}
}
// === END HONESTY VALIDATION HOOK ===
const toolMetasNormalized = toolMetas
.filter(
(entry): entry is { toolName: string; meta?: string } =>

View File

@ -0,0 +1,165 @@
/**
* Tool Execution Guard
*
* Validates that tool invocations match their promised capabilities.
* Prevents misuse of tools for things they cannot do.
*
* Runs BEFORE tool is executed.
*/
import { TOOL_CAPABILITIES } from "../tool-capabilities.js";
export interface ToolExecutionGuardResult {
allowed: boolean;
reason?: string;
warning?: string;
}
/**
* Guard configuration - what to prevent
*/
const EXECUTION_GUARDS = {
message: {
// Detect: "message tool to monitor for event X"
preventPatterns: [
{
check: (args: Record<string, unknown>) => {
const message = String(args.message || "").toLowerCase();
return (
message.includes("notify you when") ||
message.includes("message you when") ||
message.includes("alert you when")
);
},
reason:
"Cannot use message tool to send conditional notifications (requires cron job setup first)",
},
],
},
exec: {
preventPatterns: [
{
check: (args: Record<string, unknown>) => {
const command = String(args.command || "").toLowerCase();
return (
command.includes("daemon") ||
command.includes("&") ||
command.includes("nohup") ||
command.includes("background")
);
},
reason:
"Backgrounding processes is not reliable in exec tool (they terminate with session)",
},
],
},
browser: {
preventPatterns: [
{
check: (args: Record<string, unknown>) => {
const interval = Number(args.interval) || Number(args.pollingInterval) || 0;
return interval > 0;
},
reason:
"Browser tool cannot handle polling/continuous monitoring (would require external scheduler)",
},
],
},
};
/**
* Guard tool execution before it runs
*/
export function guardToolExecution(params: {
toolName: string;
toolArgs: Record<string, unknown>;
agentPromise?: string; // What did the agent claim it would do?
}): ToolExecutionGuardResult {
const toolName = params.toolName.toLowerCase();
const guards = EXECUTION_GUARDS[toolName as keyof typeof EXECUTION_GUARDS];
if (!guards) {
return { allowed: true };
}
// Check prevention patterns
for (const pattern of guards.preventPatterns) {
if (pattern.check(params.toolArgs)) {
return {
allowed: false,
reason: pattern.reason,
};
}
}
return { allowed: true };
}
/**
* Validate tool result matches what was promised
*/
export function validateToolResult(params: {
toolName: string;
toolArgs: Record<string, unknown>;
result: unknown;
agentPromise?: string;
}): { valid: boolean; warning?: string } {
// Example: if agent said "I'll monitor for changes" but exec only ran once
// then the promise was not fulfilled
if (params.toolName.toLowerCase() === "exec") {
const command = String(params.toolArgs.command || "").toLowerCase();
const promiseText = (params.agentPromise || "").toLowerCase();
// If agent promised monitoring but only ran command once
if (
(promiseText.includes("monitor") || promiseText.includes("watch")) &&
!command.includes("watch") &&
!command.includes("tail") &&
!command.includes("loop")
) {
return {
valid: false,
warning:
"Agent promised monitoring but exec only ran once. Promise not fulfilled. Consider cron for periodic checks.",
};
}
}
return { valid: true };
}
/**
* Format guard rejection for user
*/
export function formatGuardRejection(result: ToolExecutionGuardResult): string {
return `⚠️ Tool execution blocked: ${result.reason || "Guard prevented execution"}`;
}
/**
* Check if tool scope matches usage context
*/
export function checkToolScope(params: {
toolName: string;
context: "reactive" | "synchronous" | "scheduled";
}): { matches: boolean; warning?: string } {
const tool = TOOL_CAPABILITIES[params.toolName.toLowerCase()];
if (!tool) return { matches: true };
const scopeOk = {
reactive: tool.scope === "reactive",
synchronous: tool.scope === "synchronous",
scheduled: tool.scope === "persistent",
};
if (!scopeOk[params.context]) {
return {
matches: false,
warning: `Tool ${params.toolName} has scope "${tool.scope}" but is being used in "${params.context}" context`,
};
}
return { matches: true };
}

View File

@ -413,8 +413,43 @@ export function createOpenClawCodingTools(options?: {
? normalized.map((tool) => wrapToolWithAbortSignal(tool, options.abortSignal))
: normalized;
// === HONESTY EXECUTION GUARD ===
// Wraps tool execution to prevent misuse (e.g., trying to use exec for background monitoring).
// Non-fatal: logs warnings but does not block execution (advisory mode).
const withHonestyGuard = withAbort.map((tool) => {
const originalExecute = tool.execute;
if (!originalExecute) return tool;
// Wrap execute with guard check (preserving original signature)
const wrappedExecute: typeof originalExecute = async (toolCallId, args, signal, onUpdate) => {
try {
// Dynamic import to avoid bundling issues
const { guardToolExecution } = await import("./pi-embedded-runner/tool-execution-guard.js");
const guardResult = guardToolExecution({
toolName: tool.name,
toolArgs: args as Record<string, unknown>,
});
if (!guardResult.allowed) {
// Advisory mode: log warning but don't block
logWarn(`[HONESTY GUARD] Tool ${tool.name} execution flagged: ${guardResult.reason}`);
}
} catch {
// Silently continue if guard module not available
}
return originalExecute.call(tool, toolCallId, args, signal, onUpdate);
};
return {
...tool,
execute: wrappedExecute,
} as typeof tool;
});
// === END HONESTY EXECUTION GUARD ===
// NOTE: Keep canonical (lowercase) tool names here.
// pi-ai's Anthropic OAuth transport remaps tool names to Claude Code-style names
// on the wire and maps them back for tool dispatch.
return withAbort;
return withHonestyGuard;
}

View File

@ -110,6 +110,60 @@ function buildVoiceSection(params: { isMinimal: boolean; ttsHint?: string }) {
return ["## Voice (TTS)", hint, ""];
}
function buildSessionConstraintsSection(params: {
isMinimal: boolean;
availableTools: Set<string>;
hasMessageTool: boolean;
hasExecTool: boolean;
hasCronTool: boolean;
}) {
if (params.isMinimal) return [];
const constraints: string[] = [];
if (!params.hasCronTool) {
constraints.push(
"- ❌ **Background monitoring:** No cron tool available. Cannot schedule periodic checks without explicit cron job setup first.",
);
} else {
constraints.push(
"- ⚠️ **Background monitoring:** Available only via cron jobs. Must create job explicitly; no auto-triggers.",
);
}
if (!params.hasMessageTool) {
constraints.push("- ❌ **Messaging:** No message tool available.");
} else {
constraints.push(
"- ⚠️ **Proactive messaging:** Cannot do event-triggered sends. Message tool responds to incoming messages; cannot initiate on external events.",
);
}
if (!params.hasExecTool) {
constraints.push("- ❌ **Shell commands:** No exec tool available.");
} else {
constraints.push(
"- ⚠️ **Process execution:** Commands run synchronously. Cannot spawn persistent daemons or background services.",
);
}
constraints.push(
"- ⚠️ **Cross-session state:** Cannot directly affect other sessions' state. Use sessions_send for coordination.",
);
constraints.push(
"- ⚠️ **Proactive actions:** Nothing happens when you're not running. Only reactive to user input or scheduled cron jobs.",
);
if (constraints.length === 0) return [];
return [
"## Session-Specific Constraints",
"This session has these limitations (what you CANNOT promise):",
...constraints,
"",
];
}
function buildDocsSection(params: { docsPath?: string; isMinimal: boolean; readToolName: string }) {
const docsPath = params.docsPath?.trim();
if (!docsPath || params.isMinimal) return [];
@ -513,6 +567,44 @@ export function buildAgentSystemPrompt(params: {
}
}
// Session constraints section (applies to full mode only)
lines.push(
...buildSessionConstraintsSection({
isMinimal: promptMode === "minimal",
availableTools,
hasMessageTool: availableTools.has("message"),
hasExecTool: availableTools.has("exec"),
hasCronTool: availableTools.has("cron"),
}),
);
// Honesty Protocol section (applies to all modes)
lines.push(
"## Honesty Protocol",
"",
"**Critical: Do not say 'Got it!' or 'I will do X' unless you can verify:**",
"1. You have the tool to do it (listed in Tooling above)",
"2. The tool can actually achieve what the user asked",
"3. You understand the tool's constraints and scope",
"",
"**Tool Scope Reference:**",
"- **message tool**: Reactive only. Responds when users contact you. CANNOT monitor external events or promise 'I'll message when X happens' without explicit cron job setup.",
"- **exec tool**: Runs during this session only. CANNOT spawn persistent background processes or daemons.",
"- **cron tool**: Requires explicit job creation. Does NOT auto-trigger on events; only on schedule.",
"- **browser tool**: Requires active session. CANNOT monitor websites continuously.",
"",
"**When you cannot do something, say so immediately.**",
"Examples:",
"- User: 'Message me the moment you come back online'",
" Your response: 'I can't monitor for that (message tool is reactive-only), but I CAN: [alternatives]'",
"",
"- User: 'Monitor this file in the background'",
" Your response: 'I can't monitor continuously, but I CAN: [alternatives]'",
"",
"**The Golden Rule:** Saying 'Got it!' when you can't is a broken promise. Honesty > appearing helpful.",
"",
);
// Skip silent replies for subagent/none modes
if (!isMinimal) {
lines.push(

View File

@ -0,0 +1,234 @@
/**
* Tool Capability Descriptors
*
* Defines the scope, constraints, and promise boundaries for each tool.
* Used to prevent the agent from promising things that tools cannot do.
*/
export interface ToolCapabilityConstraint {
description: string;
category: "scope" | "timing" | "state" | "event" | "persistence";
}
export interface ToolCapability {
name: string;
summary: string;
scope: "reactive" | "synchronous" | "persistent" | "query";
constraints: ToolCapabilityConstraint[];
canPromise: Record<string, boolean>;
}
export const TOOL_CAPABILITIES: Record<string, ToolCapability> = {
message: {
name: "message",
summary: "Send messages and reactions",
scope: "reactive",
constraints: [
{
description: "Only sends when agent is actively running",
category: "timing",
},
{
description: "Cannot monitor for external events or state changes",
category: "event",
},
{
description: "Cannot be used for proactive notifications without explicit cron job setup",
category: "event",
},
{
description: "Requires user or system to trigger delivery",
category: "scope",
},
],
canPromise: {
message_in_reply: true,
message_when_triggered: true,
message_on_event: false,
message_periodically: false,
message_when_system_online: false,
message_when_file_changes: false,
proactive_notification: false,
},
},
exec: {
name: "exec",
summary: "Run shell commands",
scope: "synchronous",
constraints: [
{
description: "Runs synchronously during this session only",
category: "timing",
},
{
description: "Cannot spawn persistent background processes or daemons",
category: "persistence",
},
{
description: "Terminates when agent session ends",
category: "persistence",
},
{
description: "Cannot hook into system events or watchdirs",
category: "event",
},
],
canPromise: {
run_command: true,
run_and_wait: true,
spawn_background_process: false,
start_daemon: false,
monitor_directory: false,
watch_file: false,
background_monitoring: false,
},
},
cron: {
name: "cron",
summary: "Manage scheduled jobs and wake events",
scope: "persistent",
constraints: [
{
description: "Requires explicit job creation; does not auto-create",
category: "scope",
},
{
description: "Only triggers on predefined schedules, not on arbitrary events",
category: "event",
},
{
description: "Cannot listen for system events (file changes, network events, etc.)",
category: "event",
},
{
description: "Job must be explicitly created before first execution",
category: "state",
},
],
canPromise: {
schedule_periodic_job: true,
create_cron_job: true,
run_at_time: true,
monitor_file: false,
listen_for_event: false,
auto_trigger: false,
periodic_checks: true,
},
},
browser: {
name: "browser",
summary: "Control web browser",
scope: "synchronous",
constraints: [
{
description: "Runs during active session only",
category: "timing",
},
{
description: "Cannot monitor websites continuously",
category: "persistence",
},
{
description: "Cannot auto-refresh or auto-check",
category: "event",
},
{
description: "Requires explicit user action to check current state",
category: "scope",
},
],
canPromise: {
navigate_and_check: true,
take_screenshot: true,
submit_form: true,
continuous_monitoring: false,
auto_refresh: false,
watch_for_changes: false,
poll_website: false,
},
},
sessions_send: {
name: "sessions_send",
summary: "Send message to another session",
scope: "reactive",
constraints: [
{
description: "Other session must be actively running to receive message",
category: "timing",
},
{
description: "Cannot guarantee delivery if target session ends",
category: "state",
},
{
description: "Requires target session key or label",
category: "scope",
},
],
canPromise: {
send_to_active_session: true,
coordinate_across_sessions: true,
send_to_offline_session: false,
guarantee_delivery: false,
},
},
sessions_spawn: {
name: "sessions_spawn",
summary: "Spawn sub-agent session",
scope: "persistent",
constraints: [
{
description: "Creates isolated session; cannot directly share state",
category: "state",
},
{
description: "Sub-agent runs independently; parent cannot forcibly stop it",
category: "scope",
},
{
description: "Results delivered asynchronously",
category: "timing",
},
],
canPromise: {
spawn_subagent: true,
delegate_task: true,
parallel_work: true,
real_time_control: false,
immediate_results: false,
},
},
};
/**
* Check if a tool can fulfill a specific promise
*/
export function canPromise(toolName: string, promiseType: string): boolean {
const tool = TOOL_CAPABILITIES[toolName.toLowerCase()];
if (!tool) return false;
return tool.canPromise[promiseType] ?? false;
}
/**
* Get all constraints for a tool
*/
export function getToolConstraints(toolName: string): ToolCapabilityConstraint[] {
const tool = TOOL_CAPABILITIES[toolName.toLowerCase()];
return tool?.constraints ?? [];
}
/**
* Get summary of what tool CANNOT do
*/
export function getNegativePromises(toolName: string): string[] {
const tool = TOOL_CAPABILITIES[toolName.toLowerCase()];
if (!tool) return [];
return Object.entries(tool.canPromise)
.filter(([, allowed]) => !allowed)
.map(([promise]) => promise);
}