diff --git a/docs/cli/sessions.md b/docs/cli/sessions.md index d906bc1de..e283cd0b7 100644 --- a/docs/cli/sessions.md +++ b/docs/cli/sessions.md @@ -1,7 +1,8 @@ --- -summary: "CLI reference for `moltbot sessions` (list stored sessions + usage)" +summary: "CLI reference for `moltbot sessions` (list stored sessions + usage) + `moltbot sessions health` (diagnose tool pairing issues)" read_when: - You want to list stored sessions and see recent activity + - You encounter "tool id not found" errors --- # `moltbot sessions` @@ -14,3 +15,60 @@ moltbot sessions --active 120 moltbot sessions --json ``` +# `moltbot sessions health` + +Diagnose session health for tool call/result pairing issues. Use this when you encounter errors like: + +> `LLM request rejected: invalid params, tool result's tool id(call_function_xxx) not found` + +This command checks for: +- **Orphaned tool results** - tool results without matching tool calls +- **Unmatched tool calls** - tool calls without results +- **Duplicate tool results** - multiple results for the same tool call + +```bash +# Check all sessions for issues +moltbot sessions health + +# Show detailed diagnostics for all sessions +moltbot sessions health --verbose + +# Check a specific session by ID +moltbot sessions health --session-id d7ce8851-6c25-4244-b872-58690b546288 + +# Use a custom session store +moltbot sessions health --store /path/to/sessions.json +``` + +## Example output + +**Healthy session:** +``` +✅ [agent:main:main] HEALTHY (22 messages) +``` + +**Unhealthy session:** +``` +❌ [agent:main:main] UNHEALTHY + - Found 1 orphaned tool result(s) without matching tool call + Orphaned IDs: call_function_ynavyw1i6p3e_1 +``` + +## Troubleshooting + +If a session is unhealthy: + +1. Clear the session: + ```bash + rm -f ~/.clawdbot/agents/*/sessions/*.jsonl + ``` + +2. Restart the gateway: + ```bash + pkill -HUP moltbot-gateway + ``` + +3. Verify health: + ```bash + moltbot sessions health --verbose + ``` \ No newline at end of file diff --git a/src/agents/pi-embedded-runner.limithistoryturns.test.ts b/src/agents/pi-embedded-runner.limithistoryturns.test.ts index e0340a69e..1052a76ff 100644 --- a/src/agents/pi-embedded-runner.limithistoryturns.test.ts +++ b/src/agents/pi-embedded-runner.limithistoryturns.test.ts @@ -154,7 +154,27 @@ describe("limitHistoryTurns", () => { { role: "assistant", content: [{ type: "text", text: "response" }] }, ]; const limited = limitHistoryTurns(messages, 1); - expect(limited[0].content).toEqual([{ type: "text", text: "second" }]); expect(limited[1].content).toEqual([{ type: "text", text: "response" }]); }); + + it("does not slice between tool use and tool result when limit cuts off tool use", () => { + const messages: AgentMessage[] = [ + { role: "user", content: [{ type: "text", text: "start" }] }, + { role: "assistant", content: [{ type: "text", text: "ack" }] }, + { role: "user", content: [{ type: "text", text: "do tool" }] }, + { role: "assistant", content: [{ type: "tool_use", id: "call_1", name: "foo", input: {} }] }, + { role: "user", content: [{ type: "tool_result", tool_use_id: "call_1", content: "res" }] }, + ]; + + // If we limit to 1 turn, we should get the full tool interaction chain (User -> Asst(Call) -> User(Result)) + const limited = limitHistoryTurns(messages, 1); + + expect(limited.length).toBe(3); + expect(limited[0].role).toBe("user"); + expect((limited[0].content as any)[0].text).toBe("do tool"); + expect(limited[1].role).toBe("assistant"); + expect((limited[1].content as any)[0].type).toBe("tool_use"); + expect(limited[2].role).toBe("user"); + expect((limited[2].content as any)[0].type).toBe("tool_result"); + }); }); diff --git a/src/agents/pi-embedded-runner/history.ts b/src/agents/pi-embedded-runner/history.ts index 8abdc7d1e..eadd38321 100644 --- a/src/agents/pi-embedded-runner/history.ts +++ b/src/agents/pi-embedded-runner/history.ts @@ -9,9 +9,61 @@ function stripThreadSuffix(value: string): string { return match?.[1] ?? value; } +/** + * Check if a user message is purely a tool result (not a new user turn). + */ +function isToolResultMessage(msg: AgentMessage): boolean { + if (msg.role !== "user") return false; + const content = msg.content; + if (!Array.isArray(content)) return false; + // A tool result message contains only tool_result blocks + return ( + content.length > 0 && + content.every((block) => { + if (!block || typeof block !== "object") return false; + const type = (block as { type?: unknown }).type; + return type === "tool_result"; + }) + ); +} + +function extractToolUseIdsFromAssistant(msg: AgentMessage): string[] { + if (msg.role !== "assistant") return []; + const content = msg.content; + if (!Array.isArray(content)) return []; + + const ids: string[] = []; + for (const block of content) { + if (!block || typeof block !== "object") continue; + const rec = block as { type?: unknown; id?: unknown }; + if ( + (rec.type === "toolCall" || rec.type === "toolUse" || rec.type === "functionCall") && + typeof rec.id === "string" + ) { + ids.push(rec.id); + } + } + return ids; +} + +function extractToolUseIdFromResult(msg: AgentMessage): string | null { + if (msg.role !== "user") return null; + const content = msg.content; + if (!Array.isArray(content) || content.length === 0) return null; + + const block = content[0] as { tool_use_id?: unknown; toolCallId?: unknown }; + const id = block.tool_use_id ?? block.toolCallId; + return typeof id === "string" ? id : null; +} + /** * Limits conversation history to the last N user turns (and their associated * assistant responses). This reduces token usage for long-running DM sessions. + * Tool result messages are not counted as new user turns. + * + * CRITICAL: When truncating, we must preserve tool_use + tool_result pairs. + * A tool_result that follows its tool_use belongs to the same logical turn, + * even if they're separated by assistant responses. */ export function limitHistoryTurns( messages: AgentMessage[], @@ -23,15 +75,55 @@ export function limitHistoryTurns( let lastUserIndex = messages.length; for (let i = messages.length - 1; i >= 0; i--) { - if (messages[i].role === "user") { + const msg = messages[i]; + if (msg.role === "user" && !isToolResultMessage(msg)) { userCount++; if (userCount > limit) { - return messages.slice(lastUserIndex); + break; } lastUserIndex = i; } } - return messages; + + if (lastUserIndex === 0 || lastUserIndex === messages.length) { + return messages; + } + + const slice = messages.slice(lastUserIndex); + + const positionsToAdd = new Set(); + for (let i = 0; i < slice.length; i++) { + const msg = slice[i]; + if (isToolResultMessage(msg)) { + const toolId = extractToolUseIdFromResult(msg); + if (toolId) { + let j = lastUserIndex + i - 1; + for (; j >= 0; j--) { + const prev = messages[j]; + const toolIds = extractToolUseIdsFromAssistant(prev); + if (toolIds.includes(toolId)) { + positionsToAdd.add(j); + break; + } + } + } + } + } + + if (positionsToAdd.size === 0) { + return slice; + } + + const minPositionToAdd = Math.min(...positionsToAdd); + const result: AgentMessage[] = []; + for (let i = minPositionToAdd; i < messages.length; i++) { + const inSlice = i >= lastUserIndex; + const inPositionsToAdd = positionsToAdd.has(i); + if (inSlice || inPositionsToAdd) { + result.push(messages[i]); + } + } + return result; } /** diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index 46a53bd8f..986ff5221 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -42,6 +42,7 @@ import { createMoltbotCodingTools } from "../../pi-tools.js"; import { resolveSandboxContext } from "../../sandbox.js"; import { guardSessionManager } from "../../session-tool-result-guard-wrapper.js"; import { resolveTranscriptPolicy } from "../../transcript-policy.js"; +import { sanitizeToolUseResultPairing } from "../../session-transcript-repair.js"; import { acquireSessionWriteLock } from "../../session-write-lock.js"; import { applySkillEnvOverrides, @@ -531,8 +532,11 @@ export async function runEmbeddedAttempt( const validated = transcriptPolicy.validateAnthropicTurns ? validateAnthropicTurns(validatedGemini) : validatedGemini; + const repaired = transcriptPolicy.repairToolUseResultPairing + ? sanitizeToolUseResultPairing(validated) + : validated; const limited = limitHistoryTurns( - validated, + repaired, getDmHistoryLimitFromSessionKey(params.sessionKey, params.config), ); cacheTrace?.recordStage("session:limited", { messages: limited }); diff --git a/src/agents/session-transcript-repair.ts b/src/agents/session-transcript-repair.ts index d680beb4d..7ed6903f1 100644 --- a/src/agents/session-transcript-repair.ts +++ b/src/agents/session-transcript-repair.ts @@ -5,6 +5,28 @@ type ToolCallLike = { name?: string; }; +export type ToolUseRepairReport = { + messages: AgentMessage[]; + added: Array>; + droppedDuplicateCount: number; + droppedOrphanCount: number; + moved: boolean; +}; + +export type SessionDiagnostics = { + totalMessages: number; + assistantMessages: number; + userMessages: number; + toolResultMessages: number; + toolCalls: Array<{ id: string; name?: string; messageIndex: number }>; + toolResults: Array<{ id: string | null; messageIndex: number }>; + orphanedToolResults: Array<{ id: string | null; messageIndex: number }>; + unmatchedToolCallIds: string[]; + duplicateToolResultIds: string[]; + isHealthy: boolean; + issues: string[]; +}; + function extractToolCallsFromAssistant( msg: Extract, ): ToolCallLike[] { @@ -35,6 +57,119 @@ function extractToolResultId(msg: Extract) return null; } +export function diagnoseSessionHealth( + messages: AgentMessage[], + _sessionId?: string, +): SessionDiagnostics { + const toolCalls: Array<{ id: string; name?: string; messageIndex: number }> = []; + const toolResults: Array<{ id: string | null; messageIndex: number }> = []; + const seenToolResultIds = new Set(); + const seenToolCallIds = new Set(); + const duplicateToolResultIds: string[] = []; + + let assistantMessages = 0; + let userMessages = 0; + let toolResultMessages = 0; + + for (let i = 0; i < messages.length; i++) { + const msg = messages[i] as AgentMessage; + if (!msg || typeof msg !== "object") continue; + + const role = (msg as { role?: unknown }).role; + + if (role === "assistant") { + assistantMessages++; + const calls = extractToolCallsFromAssistant( + msg as Extract, + ); + for (const call of calls) { + toolCalls.push({ id: call.id, name: call.name, messageIndex: i }); + seenToolCallIds.add(call.id); + } + } else if (role === "user") { + userMessages++; + } else if (role === "toolResult") { + toolResultMessages++; + const id = extractToolResultId(msg as Extract); + toolResults.push({ id, messageIndex: i }); + if (id && seenToolResultIds.has(id)) { + duplicateToolResultIds.push(id); + } else if (id) { + seenToolResultIds.add(id); + } + } + } + + const orphanedToolResults = toolResults.filter( + (r) => r.id === null || !seenToolCallIds.has(r.id), + ); + const unmatchedToolCallIds = Array.from(seenToolCallIds).filter((id) => { + return !toolResults.some((r) => r.id === id); + }); + + const issues: string[] = []; + if (orphanedToolResults.length > 0) { + issues.push( + `Found ${orphanedToolResults.length} orphaned tool result(s) without matching tool call`, + ); + } + if (unmatchedToolCallIds.length > 0) { + issues.push( + `Found ${unmatchedToolCallIds.length} tool call(s) without matching result: ${unmatchedToolCallIds.join(", ")}`, + ); + } + if (duplicateToolResultIds.length > 0) { + issues.push( + `Found ${duplicateToolResultIds.length} duplicate tool result(s): ${duplicateToolResultIds.join(", ")}`, + ); + } + + return { + totalMessages: messages.length, + assistantMessages, + userMessages, + toolResultMessages, + toolCalls, + toolResults, + orphanedToolResults, + unmatchedToolCallIds, + duplicateToolResultIds, + isHealthy: issues.length === 0, + issues, + }; +} + +export function logSessionDiagnostics(diagnostics: SessionDiagnostics, sessionId?: string): void { + const prefix = sessionId ? `[session=${sessionId}]` : "[session]"; + console.log(`${prefix} Session Diagnostics:`); + console.log( + `${prefix} Messages: ${diagnostics.totalMessages} (${diagnostics.assistantMessages} assistant, ${diagnostics.userMessages} user, ${diagnostics.toolResultMessages} toolResult)`, + ); + console.log(`${prefix} Tool calls: ${diagnostics.toolCalls.length}`); + console.log(`${prefix} Tool results: ${diagnostics.toolResults.length}`); + + if (diagnostics.isHealthy) { + console.log(`${prefix} Status: HEALTHY ✅`); + } else { + console.log(`${prefix} Status: UNHEALTHY ❌`); + for (const issue of diagnostics.issues) { + console.log(`${prefix} - ${issue}`); + } + if (diagnostics.orphanedToolResults.length > 0) { + const examples = diagnostics.orphanedToolResults.slice(0, 3).map((r) => r.id ?? "(null)"); + console.log( + `${prefix} Orphaned IDs: ${examples.join(", ")}${diagnostics.orphanedToolResults.length > 3 ? " ..." : ""}`, + ); + } + if (diagnostics.unmatchedToolCallIds.length > 0) { + const examples = diagnostics.unmatchedToolCallIds.slice(0, 3); + console.log( + `${prefix} Unmatched IDs: ${examples.join(", ")}${diagnostics.unmatchedToolCallIds.length > 3 ? " ..." : ""}`, + ); + } + } +} + function makeMissingToolResult(params: { toolCallId: string; toolName?: string; @@ -60,14 +195,6 @@ export function sanitizeToolUseResultPairing(messages: AgentMessage[]): AgentMes return repairToolUseResultPairing(messages).messages; } -export type ToolUseRepairReport = { - messages: AgentMessage[]; - added: Array>; - droppedDuplicateCount: number; - droppedOrphanCount: number; - moved: boolean; -}; - export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRepairReport { // Anthropic (and Cloud Code Assist) reject transcripts where assistant tool calls are not // immediately followed by matching tool results. Session files can end up with results diff --git a/src/cli/program/register.status-health-sessions.ts b/src/cli/program/register.status-health-sessions.ts index 3478df29c..bf302a36b 100644 --- a/src/cli/program/register.status-health-sessions.ts +++ b/src/cli/program/register.status-health-sessions.ts @@ -1,6 +1,6 @@ import type { Command } from "commander"; import { healthCommand } from "../../commands/health.js"; -import { sessionsCommand } from "../../commands/sessions.js"; +import { sessionsCommand, sessionsHealthCommand } from "../../commands/sessions.js"; import { statusCommand } from "../../commands/status.js"; import { setVerbose } from "../../globals.js"; import { defaultRuntime } from "../../runtime.js"; @@ -143,4 +143,36 @@ export function registerStatusHealthSessionsCommands(program: Command) { defaultRuntime, ); }); + + program + .command("sessions health") + .description("Check session health for tool call/result pairing issues") + .option("--verbose", "Show diagnostics for all sessions", false) + .option("--session-id ", "Check a specific session by ID") + .option("--store ", "Path to session store (default: resolved from config)") + .addHelpText( + "after", + () => + `\n${theme.heading("Examples:")}\n${formatHelpExamples([ + ["moltbot sessions health", "Check all sessions for tool pairing issues."], + ["moltbot sessions health --verbose", "Show detailed diagnostics."], + ["moltbot sessions health --session-id abc123", "Check specific session."], + ])}`, + ) + .addHelpText( + "after", + () => + `\n${theme.muted("Docs:")} ${formatDocsLink("/cli/sessions-health", "docs.molt.bot/cli/sessions-health")}\n`, + ) + .action(async (opts) => { + setVerbose(Boolean(opts.verbose)); + await sessionsHealthCommand( + { + sessionId: opts.sessionId as string | undefined, + verbose: Boolean(opts.verbose), + store: opts.store as string | undefined, + }, + defaultRuntime, + ); + }); } diff --git a/src/commands/sessions.ts b/src/commands/sessions.ts index 1f1d5497f..ac4d6e2e8 100644 --- a/src/commands/sessions.ts +++ b/src/commands/sessions.ts @@ -1,11 +1,17 @@ import { lookupContextTokens } from "../agents/context.js"; import { DEFAULT_CONTEXT_TOKENS, DEFAULT_MODEL, DEFAULT_PROVIDER } from "../agents/defaults.js"; +import type { AgentMessage } from "@mariozechner/pi-agent-core"; +import { + diagnoseSessionHealth, + logSessionDiagnostics, +} from "../agents/session-transcript-repair.js"; import { resolveConfiguredModelRef } from "../agents/model-selection.js"; import { loadConfig } from "../config/config.js"; import { loadSessionStore, resolveStorePath, type SessionEntry } from "../config/sessions.js"; import { info } from "../globals.js"; import type { RuntimeEnv } from "../runtime.js"; import { isRich, theme } from "../terminal/theme.js"; +import fs from "node:fs/promises"; type SessionRow = { key: string; @@ -247,3 +253,80 @@ export async function sessionsCommand( runtime.log(line.trimEnd()); } } + +export async function sessionsHealthCommand( + opts: { store?: string; sessionId?: string; verbose?: boolean }, + runtime: RuntimeEnv, +) { + const cfg = loadConfig(); + const storePath = resolveStorePath(opts.store ?? cfg.session?.store); + const store = loadSessionStore(storePath); + + let sessionsToCheck: Array<{ key: string; entry: SessionEntry }> = []; + + if (opts.sessionId) { + const found = Object.entries(store).find(([, entry]) => entry?.sessionId === opts.sessionId); + if (found) { + sessionsToCheck = [{ key: found[0], entry: found[1]! }]; + } else { + runtime.error(`Session not found: ${opts.sessionId}`); + runtime.exit(1); + return; + } + } else { + sessionsToCheck = Object.entries(store).map(([key, entry]) => ({ + key, + entry: entry!, + })); + } + + let unhealthyCount = 0; + const allDiagnostics: Array<{ + key: string; + diagnostics: Awaited>; + }> = []; + + for (const { key, entry } of sessionsToCheck) { + const sessionFilePath = entry?.sessionFile; + if (!sessionFilePath) { + runtime.log(`[${key}] No session file`); + continue; + } + + let messages: unknown[]; + try { + const content = await fs.readFile(sessionFilePath, "utf-8"); + messages = content + .split("\n") + .filter(Boolean) + .map((line) => JSON.parse(line)); + } catch { + runtime.error(`[${key}] Failed to read session file: ${sessionFilePath}`); + continue; + } + + const diagnostics = diagnoseSessionHealth(messages as AgentMessage[], key); + allDiagnostics.push({ key, diagnostics }); + + if (!diagnostics.isHealthy) { + unhealthyCount++; + } + + if (opts.verbose || !diagnostics.isHealthy) { + logSessionDiagnostics(diagnostics, key); + } else { + runtime.log(`[${key}] ✅ HEALTHY (${diagnostics.totalMessages} messages)`); + } + } + + if (unhealthyCount > 0) { + runtime.log( + info( + `\nFound ${unhealthyCount} unhealthy session(s) out of ${allDiagnostics.length} checked`, + ), + ); + runtime.exit(1); + } else { + runtime.log(info(`\nAll ${allDiagnostics.length} session(s) are healthy`)); + } +}