Merge d3db8377f7 into 4583f88626

2026-01-29 20:33:48 +01:00 · 2026-01-29 20:33:48 +01:00 · 72e514bf33
commit 72e514bf33
parent 4583f88626 d3db8377f7
7 changed files with 431 additions and 15 deletions
--- a/docs/cli/sessions.md
+++ b/docs/cli/sessions.md
@ -1,7 +1,8 @@
 ---
-summary: "CLI reference for `moltbot sessions` (list stored sessions + usage)"
+summary: "CLI reference for `moltbot sessions` (list stored sessions + usage) + `moltbot sessions health` (diagnose tool pairing issues)"
 read_when:
  - You want to list stored sessions and see recent activity
+  - You encounter "tool id not found" errors
 ---

 # `moltbot sessions`
@ -14,3 +15,60 @@ moltbot sessions --active 120
 moltbot sessions --json
 ```

+# `moltbot sessions health`
+
+Diagnose session health for tool call/result pairing issues. Use this when you encounter errors like:
+
+> `LLM request rejected: invalid params, tool result's tool id(call_function_xxx) not found`
+
+This command checks for:
+- **Orphaned tool results** - tool results without matching tool calls
+- **Unmatched tool calls** - tool calls without results
+- **Duplicate tool results** - multiple results for the same tool call
+
+```bash
+# Check all sessions for issues
+moltbot sessions health
+
+# Show detailed diagnostics for all sessions
+moltbot sessions health --verbose
+
+# Check a specific session by ID
+moltbot sessions health --session-id d7ce8851-6c25-4244-b872-58690b546288
+
+# Use a custom session store
+moltbot sessions health --store /path/to/sessions.json
+```
+
+## Example output
+
+**Healthy session:**
+```
+✅ [agent:main:main] HEALTHY (22 messages)
+```
+
+**Unhealthy session:**
+```
+❌ [agent:main:main] UNHEALTHY
+  - Found 1 orphaned tool result(s) without matching tool call
+  Orphaned IDs: call_function_ynavyw1i6p3e_1
+```
+
+## Troubleshooting
+
+If a session is unhealthy:
+
+1. Clear the session:
+   ```bash
+   rm -f ~/.clawdbot/agents/*/sessions/*.jsonl
+   ```
+
+2. Restart the gateway:
+   ```bash
+   pkill -HUP moltbot-gateway
+   ```
+
+3. Verify health:
+   ```bash
+   moltbot sessions health --verbose
+   ```
--- a/src/agents/pi-embedded-runner.limithistoryturns.test.ts
+++ b/src/agents/pi-embedded-runner.limithistoryturns.test.ts
@ -154,7 +154,27 @@ describe("limitHistoryTurns", () => {
      { role: "assistant", content: [{ type: "text", text: "response" }] },
    ];
    const limited = limitHistoryTurns(messages, 1);
-    expect(limited[0].content).toEqual([{ type: "text", text: "second" }]);
    expect(limited[1].content).toEqual([{ type: "text", text: "response" }]);
  });
+
+  it("does not slice between tool use and tool result when limit cuts off tool use", () => {
+    const messages: AgentMessage[] = [
+      { role: "user", content: [{ type: "text", text: "start" }] },
+      { role: "assistant", content: [{ type: "text", text: "ack" }] },
+      { role: "user", content: [{ type: "text", text: "do tool" }] },
+      { role: "assistant", content: [{ type: "tool_use", id: "call_1", name: "foo", input: {} }] },
+      { role: "user", content: [{ type: "tool_result", tool_use_id: "call_1", content: "res" }] },
+    ];
+
+    // If we limit to 1 turn, we should get the full tool interaction chain (User -> Asst(Call) -> User(Result))
+    const limited = limitHistoryTurns(messages, 1);
+
+    expect(limited.length).toBe(3);
+    expect(limited[0].role).toBe("user");
+    expect((limited[0].content as any)[0].text).toBe("do tool");
+    expect(limited[1].role).toBe("assistant");
+    expect((limited[1].content as any)[0].type).toBe("tool_use");
+    expect(limited[2].role).toBe("user");
+    expect((limited[2].content as any)[0].type).toBe("tool_result");
+  });
 });
--- a/src/agents/pi-embedded-runner/history.ts
+++ b/src/agents/pi-embedded-runner/history.ts
@ -9,9 +9,61 @@ function stripThreadSuffix(value: string): string {
  return match?.[1] ?? value;
 }

+/**
+ * Check if a user message is purely a tool result (not a new user turn).
+ */
+function isToolResultMessage(msg: AgentMessage): boolean {
+  if (msg.role !== "user") return false;
+  const content = msg.content;
+  if (!Array.isArray(content)) return false;
+  // A tool result message contains only tool_result blocks
+  return (
+    content.length > 0 &&
+    content.every((block) => {
+      if (!block || typeof block !== "object") return false;
+      const type = (block as { type?: unknown }).type;
+      return type === "tool_result";
+    })
+  );
+}
+
+function extractToolUseIdsFromAssistant(msg: AgentMessage): string[] {
+  if (msg.role !== "assistant") return [];
+  const content = msg.content;
+  if (!Array.isArray(content)) return [];
+
+  const ids: string[] = [];
+  for (const block of content) {
+    if (!block || typeof block !== "object") continue;
+    const rec = block as { type?: unknown; id?: unknown };
+    if (
+      (rec.type === "toolCall" || rec.type === "toolUse" || rec.type === "functionCall") &&
+      typeof rec.id === "string"
+    ) {
+      ids.push(rec.id);
+    }
+  }
+  return ids;
+}
+
+function extractToolUseIdFromResult(msg: AgentMessage): string | null {
+  if (msg.role !== "user") return null;
+  const content = msg.content;
+  if (!Array.isArray(content) || content.length === 0) return null;
+
+  const block = content[0] as { tool_use_id?: unknown; toolCallId?: unknown };
+  const id = block.tool_use_id ?? block.toolCallId;
+  return typeof id === "string" ? id : null;
+}
+
 /**
 * Limits conversation history to the last N user turns (and their associated
 * assistant responses). This reduces token usage for long-running DM sessions.
+ * Tool result messages are not counted as new user turns.
+ *
+ * CRITICAL: When truncating, we must preserve tool_use + tool_result pairs.
+ * A tool_result that follows its tool_use belongs to the same logical turn,
+ * even if they're separated by assistant responses.
 */
 export function limitHistoryTurns(
  messages: AgentMessage[],
@ -23,15 +75,55 @@ export function limitHistoryTurns(
  let lastUserIndex = messages.length;

  for (let i = messages.length - 1; i >= 0; i--) {
-    if (messages[i].role === "user") {
+    const msg = messages[i];
+    if (msg.role === "user" && !isToolResultMessage(msg)) {
      userCount++;
      if (userCount > limit) {
-        return messages.slice(lastUserIndex);
+        break;
      }
      lastUserIndex = i;
    }
  }
-  return messages;
+
+  if (lastUserIndex === 0 || lastUserIndex === messages.length) {
+    return messages;
+  }
+
+  const slice = messages.slice(lastUserIndex);
+
+  const positionsToAdd = new Set<number>();
+  for (let i = 0; i < slice.length; i++) {
+    const msg = slice[i];
+    if (isToolResultMessage(msg)) {
+      const toolId = extractToolUseIdFromResult(msg);
+      if (toolId) {
+        let j = lastUserIndex + i - 1;
+        for (; j >= 0; j--) {
+          const prev = messages[j];
+          const toolIds = extractToolUseIdsFromAssistant(prev);
+          if (toolIds.includes(toolId)) {
+            positionsToAdd.add(j);
+            break;
+          }
+        }
+      }
+    }
+  }
+
+  if (positionsToAdd.size === 0) {
+    return slice;
+  }
+
+  const minPositionToAdd = Math.min(...positionsToAdd);
+  const result: AgentMessage[] = [];
+  for (let i = minPositionToAdd; i < messages.length; i++) {
+    const inSlice = i >= lastUserIndex;
+    const inPositionsToAdd = positionsToAdd.has(i);
+    if (inSlice || inPositionsToAdd) {
+      result.push(messages[i]);
+    }
+  }
+  return result;
 }

 /**
--- a/src/agents/pi-embedded-runner/run/attempt.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.ts
@ -42,6 +42,7 @@ import { createMoltbotCodingTools } from "../../pi-tools.js";
 import { resolveSandboxContext } from "../../sandbox.js";
 import { guardSessionManager } from "../../session-tool-result-guard-wrapper.js";
 import { resolveTranscriptPolicy } from "../../transcript-policy.js";
+import { sanitizeToolUseResultPairing } from "../../session-transcript-repair.js";
 import { acquireSessionWriteLock } from "../../session-write-lock.js";
 import {
  applySkillEnvOverrides,
@ -531,8 +532,11 @@ export async function runEmbeddedAttempt(
        const validated = transcriptPolicy.validateAnthropicTurns
          ? validateAnthropicTurns(validatedGemini)
          : validatedGemini;
+        const repaired = transcriptPolicy.repairToolUseResultPairing
+          ? sanitizeToolUseResultPairing(validated)
+          : validated;
        const limited = limitHistoryTurns(
-          validated,
+          repaired,
          getDmHistoryLimitFromSessionKey(params.sessionKey, params.config),
        );
        cacheTrace?.recordStage("session:limited", { messages: limited });
--- a/src/agents/session-transcript-repair.ts
+++ b/src/agents/session-transcript-repair.ts
@ -5,6 +5,28 @@ type ToolCallLike = {
  name?: string;
 };

+export type ToolUseRepairReport = {
+  messages: AgentMessage[];
+  added: Array<Extract<AgentMessage, { role: "toolResult" }>>;
+  droppedDuplicateCount: number;
+  droppedOrphanCount: number;
+  moved: boolean;
+};
+
+export type SessionDiagnostics = {
+  totalMessages: number;
+  assistantMessages: number;
+  userMessages: number;
+  toolResultMessages: number;
+  toolCalls: Array<{ id: string; name?: string; messageIndex: number }>;
+  toolResults: Array<{ id: string | null; messageIndex: number }>;
+  orphanedToolResults: Array<{ id: string | null; messageIndex: number }>;
+  unmatchedToolCallIds: string[];
+  duplicateToolResultIds: string[];
+  isHealthy: boolean;
+  issues: string[];
+};
+
 function extractToolCallsFromAssistant(
  msg: Extract<AgentMessage, { role: "assistant" }>,
 ): ToolCallLike[] {
@ -35,6 +57,119 @@ function extractToolResultId(msg: Extract<AgentMessage, { role: "toolResult" }>)
  return null;
 }

+export function diagnoseSessionHealth(
+  messages: AgentMessage[],
+  _sessionId?: string,
+): SessionDiagnostics {
+  const toolCalls: Array<{ id: string; name?: string; messageIndex: number }> = [];
+  const toolResults: Array<{ id: string | null; messageIndex: number }> = [];
+  const seenToolResultIds = new Set<string>();
+  const seenToolCallIds = new Set<string>();
+  const duplicateToolResultIds: string[] = [];
+
+  let assistantMessages = 0;
+  let userMessages = 0;
+  let toolResultMessages = 0;
+
+  for (let i = 0; i < messages.length; i++) {
+    const msg = messages[i] as AgentMessage;
+    if (!msg || typeof msg !== "object") continue;
+
+    const role = (msg as { role?: unknown }).role;
+
+    if (role === "assistant") {
+      assistantMessages++;
+      const calls = extractToolCallsFromAssistant(
+        msg as Extract<AgentMessage, { role: "assistant" }>,
+      );
+      for (const call of calls) {
+        toolCalls.push({ id: call.id, name: call.name, messageIndex: i });
+        seenToolCallIds.add(call.id);
+      }
+    } else if (role === "user") {
+      userMessages++;
+    } else if (role === "toolResult") {
+      toolResultMessages++;
+      const id = extractToolResultId(msg as Extract<AgentMessage, { role: "toolResult" }>);
+      toolResults.push({ id, messageIndex: i });
+      if (id && seenToolResultIds.has(id)) {
+        duplicateToolResultIds.push(id);
+      } else if (id) {
+        seenToolResultIds.add(id);
+      }
+    }
+  }
+
+  const orphanedToolResults = toolResults.filter(
+    (r) => r.id === null || !seenToolCallIds.has(r.id),
+  );
+  const unmatchedToolCallIds = Array.from(seenToolCallIds).filter((id) => {
+    return !toolResults.some((r) => r.id === id);
+  });
+
+  const issues: string[] = [];
+  if (orphanedToolResults.length > 0) {
+    issues.push(
+      `Found ${orphanedToolResults.length} orphaned tool result(s) without matching tool call`,
+    );
+  }
+  if (unmatchedToolCallIds.length > 0) {
+    issues.push(
+      `Found ${unmatchedToolCallIds.length} tool call(s) without matching result: ${unmatchedToolCallIds.join(", ")}`,
+    );
+  }
+  if (duplicateToolResultIds.length > 0) {
+    issues.push(
+      `Found ${duplicateToolResultIds.length} duplicate tool result(s): ${duplicateToolResultIds.join(", ")}`,
+    );
+  }
+
+  return {
+    totalMessages: messages.length,
+    assistantMessages,
+    userMessages,
+    toolResultMessages,
+    toolCalls,
+    toolResults,
+    orphanedToolResults,
+    unmatchedToolCallIds,
+    duplicateToolResultIds,
+    isHealthy: issues.length === 0,
+    issues,
+  };
+}
+
+export function logSessionDiagnostics(diagnostics: SessionDiagnostics, sessionId?: string): void {
+  const prefix = sessionId ? `[session=${sessionId}]` : "[session]";
+  console.log(`${prefix} Session Diagnostics:`);
+  console.log(
+    `${prefix}   Messages: ${diagnostics.totalMessages} (${diagnostics.assistantMessages} assistant, ${diagnostics.userMessages} user, ${diagnostics.toolResultMessages} toolResult)`,
+  );
+  console.log(`${prefix}   Tool calls: ${diagnostics.toolCalls.length}`);
+  console.log(`${prefix}   Tool results: ${diagnostics.toolResults.length}`);
+
+  if (diagnostics.isHealthy) {
+    console.log(`${prefix}   Status: HEALTHY ✅`);
+  } else {
+    console.log(`${prefix}   Status: UNHEALTHY ❌`);
+    for (const issue of diagnostics.issues) {
+      console.log(`${prefix}   - ${issue}`);
+    }
+    if (diagnostics.orphanedToolResults.length > 0) {
+      const examples = diagnostics.orphanedToolResults.slice(0, 3).map((r) => r.id ?? "(null)");
+      console.log(
+        `${prefix}   Orphaned IDs: ${examples.join(", ")}${diagnostics.orphanedToolResults.length > 3 ? " ..." : ""}`,
+      );
+    }
+    if (diagnostics.unmatchedToolCallIds.length > 0) {
+      const examples = diagnostics.unmatchedToolCallIds.slice(0, 3);
+      console.log(
+        `${prefix}   Unmatched IDs: ${examples.join(", ")}${diagnostics.unmatchedToolCallIds.length > 3 ? " ..." : ""}`,
+      );
+    }
+  }
+}
+
 function makeMissingToolResult(params: {
  toolCallId: string;
  toolName?: string;
@ -60,14 +195,6 @@ export function sanitizeToolUseResultPairing(messages: AgentMessage[]): AgentMes
  return repairToolUseResultPairing(messages).messages;
 }

-export type ToolUseRepairReport = {
-  messages: AgentMessage[];
-  added: Array<Extract<AgentMessage, { role: "toolResult" }>>;
-  droppedDuplicateCount: number;
-  droppedOrphanCount: number;
-  moved: boolean;
-};
-
 export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRepairReport {
  // Anthropic (and Cloud Code Assist) reject transcripts where assistant tool calls are not
  // immediately followed by matching tool results. Session files can end up with results
--- a/src/cli/program/register.status-health-sessions.ts
+++ b/src/cli/program/register.status-health-sessions.ts
@ -1,6 +1,6 @@
 import type { Command } from "commander";
 import { healthCommand } from "../../commands/health.js";
-import { sessionsCommand } from "../../commands/sessions.js";
+import { sessionsCommand, sessionsHealthCommand } from "../../commands/sessions.js";
 import { statusCommand } from "../../commands/status.js";
 import { setVerbose } from "../../globals.js";
 import { defaultRuntime } from "../../runtime.js";
@ -143,4 +143,36 @@ export function registerStatusHealthSessionsCommands(program: Command) {
        defaultRuntime,
      );
    });
+
+  program
+    .command("sessions health")
+    .description("Check session health for tool call/result pairing issues")
+    .option("--verbose", "Show diagnostics for all sessions", false)
+    .option("--session-id <id>", "Check a specific session by ID")
+    .option("--store <path>", "Path to session store (default: resolved from config)")
+    .addHelpText(
+      "after",
+      () =>
+        `\n${theme.heading("Examples:")}\n${formatHelpExamples([
+          ["moltbot sessions health", "Check all sessions for tool pairing issues."],
+          ["moltbot sessions health --verbose", "Show detailed diagnostics."],
+          ["moltbot sessions health --session-id abc123", "Check specific session."],
+        ])}`,
+    )
+    .addHelpText(
+      "after",
+      () =>
+        `\n${theme.muted("Docs:")} ${formatDocsLink("/cli/sessions-health", "docs.molt.bot/cli/sessions-health")}\n`,
+    )
+    .action(async (opts) => {
+      setVerbose(Boolean(opts.verbose));
+      await sessionsHealthCommand(
+        {
+          sessionId: opts.sessionId as string | undefined,
+          verbose: Boolean(opts.verbose),
+          store: opts.store as string | undefined,
+        },
+        defaultRuntime,
+      );
+    });
 }
--- a/src/commands/sessions.ts
+++ b/src/commands/sessions.ts
@ -1,11 +1,17 @@
 import { lookupContextTokens } from "../agents/context.js";
 import { DEFAULT_CONTEXT_TOKENS, DEFAULT_MODEL, DEFAULT_PROVIDER } from "../agents/defaults.js";
+import type { AgentMessage } from "@mariozechner/pi-agent-core";
+import {
+  diagnoseSessionHealth,
+  logSessionDiagnostics,
+} from "../agents/session-transcript-repair.js";
 import { resolveConfiguredModelRef } from "../agents/model-selection.js";
 import { loadConfig } from "../config/config.js";
 import { loadSessionStore, resolveStorePath, type SessionEntry } from "../config/sessions.js";
 import { info } from "../globals.js";
 import type { RuntimeEnv } from "../runtime.js";
 import { isRich, theme } from "../terminal/theme.js";
+import fs from "node:fs/promises";

 type SessionRow = {
  key: string;
@ -247,3 +253,80 @@ export async function sessionsCommand(
    runtime.log(line.trimEnd());
  }
 }
+
+export async function sessionsHealthCommand(
+  opts: { store?: string; sessionId?: string; verbose?: boolean },
+  runtime: RuntimeEnv,
+) {
+  const cfg = loadConfig();
+  const storePath = resolveStorePath(opts.store ?? cfg.session?.store);
+  const store = loadSessionStore(storePath);
+
+  let sessionsToCheck: Array<{ key: string; entry: SessionEntry }> = [];
+
+  if (opts.sessionId) {
+    const found = Object.entries(store).find(([, entry]) => entry?.sessionId === opts.sessionId);
+    if (found) {
+      sessionsToCheck = [{ key: found[0], entry: found[1]! }];
+    } else {
+      runtime.error(`Session not found: ${opts.sessionId}`);
+      runtime.exit(1);
+      return;
+    }
+  } else {
+    sessionsToCheck = Object.entries(store).map(([key, entry]) => ({
+      key,
+      entry: entry!,
+    }));
+  }
+
+  let unhealthyCount = 0;
+  const allDiagnostics: Array<{
+    key: string;
+    diagnostics: Awaited<ReturnType<typeof diagnoseSessionHealth>>;
+  }> = [];
+
+  for (const { key, entry } of sessionsToCheck) {
+    const sessionFilePath = entry?.sessionFile;
+    if (!sessionFilePath) {
+      runtime.log(`[${key}] No session file`);
+      continue;
+    }
+
+    let messages: unknown[];
+    try {
+      const content = await fs.readFile(sessionFilePath, "utf-8");
+      messages = content
+        .split("\n")
+        .filter(Boolean)
+        .map((line) => JSON.parse(line));
+    } catch {
+      runtime.error(`[${key}] Failed to read session file: ${sessionFilePath}`);
+      continue;
+    }
+
+    const diagnostics = diagnoseSessionHealth(messages as AgentMessage[], key);
+    allDiagnostics.push({ key, diagnostics });
+
+    if (!diagnostics.isHealthy) {
+      unhealthyCount++;
+    }
+
+    if (opts.verbose || !diagnostics.isHealthy) {
+      logSessionDiagnostics(diagnostics, key);
+    } else {
+      runtime.log(`[${key}] ✅ HEALTHY (${diagnostics.totalMessages} messages)`);
+    }
+  }
+
+  if (unhealthyCount > 0) {
+    runtime.log(
+      info(
+        `\nFound ${unhealthyCount} unhealthy session(s) out of ${allDiagnostics.length} checked`,
+      ),
+    );
+    runtime.exit(1);
+  } else {
+    runtime.log(info(`\nAll ${allDiagnostics.length} session(s) are healthy`));
+  }
+}