feat: add session health diagnostics for tool pair validation

- Add diagnoseSessionHealth() function to detect orphaned tool results
- Add logSessionDiagnostics() for human-readable diagnostic output
- Add 'sessions health' CLI command to check all sessions or specific ones
- Commands: moltbot sessions health [--verbose] [--session-id XYZ]
This commit is contained in:
Kastrah 2026-01-28 04:36:22 +01:00
parent b55447cb25
commit 085c411a26
2 changed files with 218 additions and 8 deletions

View File

@ -5,6 +5,28 @@ type ToolCallLike = {
name?: string;
};
export type ToolUseRepairReport = {
messages: AgentMessage[];
added: Array<Extract<AgentMessage, { role: "toolResult" }>>;
droppedDuplicateCount: number;
droppedOrphanCount: number;
moved: boolean;
};
export type SessionDiagnostics = {
totalMessages: number;
assistantMessages: number;
userMessages: number;
toolResultMessages: number;
toolCalls: Array<{ id: string; name?: string; messageIndex: number }>;
toolResults: Array<{ id: string | null; messageIndex: number }>;
orphanedToolResults: Array<{ id: string | null; messageIndex: number }>;
unmatchedToolCallIds: string[];
duplicateToolResultIds: string[];
isHealthy: boolean;
issues: string[];
};
function extractToolCallsFromAssistant(
msg: Extract<AgentMessage, { role: "assistant" }>,
): ToolCallLike[] {
@ -35,6 +57,119 @@ function extractToolResultId(msg: Extract<AgentMessage, { role: "toolResult" }>)
return null;
}
export function diagnoseSessionHealth(
messages: AgentMessage[],
_sessionId?: string,
): SessionDiagnostics {
const toolCalls: Array<{ id: string; name?: string; messageIndex: number }> = [];
const toolResults: Array<{ id: string | null; messageIndex: number }> = [];
const seenToolResultIds = new Set<string>();
const seenToolCallIds = new Set<string>();
const duplicateToolResultIds: string[] = [];
let assistantMessages = 0;
let userMessages = 0;
let toolResultMessages = 0;
for (let i = 0; i < messages.length; i++) {
const msg = messages[i] as AgentMessage;
if (!msg || typeof msg !== "object") continue;
const role = (msg as { role?: unknown }).role;
if (role === "assistant") {
assistantMessages++;
const calls = extractToolCallsFromAssistant(
msg as Extract<AgentMessage, { role: "assistant" }>,
);
for (const call of calls) {
toolCalls.push({ id: call.id, name: call.name, messageIndex: i });
seenToolCallIds.add(call.id);
}
} else if (role === "user") {
userMessages++;
} else if (role === "toolResult") {
toolResultMessages++;
const id = extractToolResultId(msg as Extract<AgentMessage, { role: "toolResult" }>);
toolResults.push({ id, messageIndex: i });
if (id && seenToolResultIds.has(id)) {
duplicateToolResultIds.push(id);
} else if (id) {
seenToolResultIds.add(id);
}
}
}
const orphanedToolResults = toolResults.filter(
(r) => r.id === null || !seenToolCallIds.has(r.id),
);
const unmatchedToolCallIds = Array.from(seenToolCallIds).filter((id) => {
return !toolResults.some((r) => r.id === id);
});
const issues: string[] = [];
if (orphanedToolResults.length > 0) {
issues.push(
`Found ${orphanedToolResults.length} orphaned tool result(s) without matching tool call`,
);
}
if (unmatchedToolCallIds.length > 0) {
issues.push(
`Found ${unmatchedToolCallIds.length} tool call(s) without matching result: ${unmatchedToolCallIds.join(", ")}`,
);
}
if (duplicateToolResultIds.length > 0) {
issues.push(
`Found ${duplicateToolResultIds.length} duplicate tool result(s): ${duplicateToolResultIds.join(", ")}`,
);
}
return {
totalMessages: messages.length,
assistantMessages,
userMessages,
toolResultMessages,
toolCalls,
toolResults,
orphanedToolResults,
unmatchedToolCallIds,
duplicateToolResultIds,
isHealthy: issues.length === 0,
issues,
};
}
export function logSessionDiagnostics(diagnostics: SessionDiagnostics, sessionId?: string): void {
const prefix = sessionId ? `[session=${sessionId}]` : "[session]";
console.log(`${prefix} Session Diagnostics:`);
console.log(
`${prefix} Messages: ${diagnostics.totalMessages} (${diagnostics.assistantMessages} assistant, ${diagnostics.userMessages} user, ${diagnostics.toolResultMessages} toolResult)`,
);
console.log(`${prefix} Tool calls: ${diagnostics.toolCalls.length}`);
console.log(`${prefix} Tool results: ${diagnostics.toolResults.length}`);
if (diagnostics.isHealthy) {
console.log(`${prefix} Status: HEALTHY ✅`);
} else {
console.log(`${prefix} Status: UNHEALTHY ❌`);
for (const issue of diagnostics.issues) {
console.log(`${prefix} - ${issue}`);
}
if (diagnostics.orphanedToolResults.length > 0) {
const examples = diagnostics.orphanedToolResults.slice(0, 3).map((r) => r.id ?? "(null)");
console.log(
`${prefix} Orphaned IDs: ${examples.join(", ")}${diagnostics.orphanedToolResults.length > 3 ? " ..." : ""}`,
);
}
if (diagnostics.unmatchedToolCallIds.length > 0) {
const examples = diagnostics.unmatchedToolCallIds.slice(0, 3);
console.log(
`${prefix} Unmatched IDs: ${examples.join(", ")}${diagnostics.unmatchedToolCallIds.length > 3 ? " ..." : ""}`,
);
}
}
}
function makeMissingToolResult(params: {
toolCallId: string;
toolName?: string;
@ -60,14 +195,6 @@ export function sanitizeToolUseResultPairing(messages: AgentMessage[]): AgentMes
return repairToolUseResultPairing(messages).messages;
}
export type ToolUseRepairReport = {
messages: AgentMessage[];
added: Array<Extract<AgentMessage, { role: "toolResult" }>>;
droppedDuplicateCount: number;
droppedOrphanCount: number;
moved: boolean;
};
export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRepairReport {
// Anthropic (and Cloud Code Assist) reject transcripts where assistant tool calls are not
// immediately followed by matching tool results. Session files can end up with results

View File

@ -1,11 +1,17 @@
import { lookupContextTokens } from "../agents/context.js";
import { DEFAULT_CONTEXT_TOKENS, DEFAULT_MODEL, DEFAULT_PROVIDER } from "../agents/defaults.js";
import type { AgentMessage } from "@mariozechner/pi-agent-core";
import {
diagnoseSessionHealth,
logSessionDiagnostics,
} from "../agents/session-transcript-repair.js";
import { resolveConfiguredModelRef } from "../agents/model-selection.js";
import { loadConfig } from "../config/config.js";
import { loadSessionStore, resolveStorePath, type SessionEntry } from "../config/sessions.js";
import { info } from "../globals.js";
import type { RuntimeEnv } from "../runtime.js";
import { isRich, theme } from "../terminal/theme.js";
import fs from "node:fs/promises";
type SessionRow = {
key: string;
@ -247,3 +253,80 @@ export async function sessionsCommand(
runtime.log(line.trimEnd());
}
}
export async function sessionsHealthCommand(
opts: { store?: string; sessionId?: string; verbose?: boolean },
runtime: RuntimeEnv,
) {
const cfg = loadConfig();
const storePath = resolveStorePath(opts.store ?? cfg.session?.store);
const store = loadSessionStore(storePath);
let sessionsToCheck: Array<{ key: string; entry: SessionEntry }> = [];
if (opts.sessionId) {
const found = Object.entries(store).find(([, entry]) => entry?.sessionId === opts.sessionId);
if (found) {
sessionsToCheck = [{ key: found[0], entry: found[1]! }];
} else {
runtime.error(`Session not found: ${opts.sessionId}`);
runtime.exit(1);
return;
}
} else {
sessionsToCheck = Object.entries(store).map(([key, entry]) => ({
key,
entry: entry!,
}));
}
let unhealthyCount = 0;
const allDiagnostics: Array<{
key: string;
diagnostics: Awaited<ReturnType<typeof diagnoseSessionHealth>>;
}> = [];
for (const { key, entry } of sessionsToCheck) {
const sessionFilePath = entry?.sessionFile;
if (!sessionFilePath) {
runtime.log(`[${key}] No session file`);
continue;
}
let messages: unknown[];
try {
const content = await fs.readFile(sessionFilePath, "utf-8");
messages = content
.split("\n")
.filter(Boolean)
.map((line) => JSON.parse(line));
} catch {
runtime.error(`[${key}] Failed to read session file: ${sessionFilePath}`);
continue;
}
const diagnostics = diagnoseSessionHealth(messages as AgentMessage[], key);
allDiagnostics.push({ key, diagnostics });
if (!diagnostics.isHealthy) {
unhealthyCount++;
}
if (opts.verbose || !diagnostics.isHealthy) {
logSessionDiagnostics(diagnostics, key);
} else {
runtime.log(`[${key}] ✅ HEALTHY (${diagnostics.totalMessages} messages)`);
}
}
if (unhealthyCount > 0) {
runtime.log(
info(
`\nFound ${unhealthyCount} unhealthy session(s) out of ${allDiagnostics.length} checked`,
),
);
runtime.exit(1);
} else {
runtime.log(info(`\nAll ${allDiagnostics.length} session(s) are healthy`));
}
}