feat: add session health diagnostics for tool pair validation
- Add diagnoseSessionHealth() function to detect orphaned tool results - Add logSessionDiagnostics() for human-readable diagnostic output - Add 'sessions health' CLI command to check all sessions or specific ones - Commands: moltbot sessions health [--verbose] [--session-id XYZ]
This commit is contained in:
parent
b55447cb25
commit
085c411a26
@ -5,6 +5,28 @@ type ToolCallLike = {
|
||||
name?: string;
|
||||
};
|
||||
|
||||
export type ToolUseRepairReport = {
|
||||
messages: AgentMessage[];
|
||||
added: Array<Extract<AgentMessage, { role: "toolResult" }>>;
|
||||
droppedDuplicateCount: number;
|
||||
droppedOrphanCount: number;
|
||||
moved: boolean;
|
||||
};
|
||||
|
||||
export type SessionDiagnostics = {
|
||||
totalMessages: number;
|
||||
assistantMessages: number;
|
||||
userMessages: number;
|
||||
toolResultMessages: number;
|
||||
toolCalls: Array<{ id: string; name?: string; messageIndex: number }>;
|
||||
toolResults: Array<{ id: string | null; messageIndex: number }>;
|
||||
orphanedToolResults: Array<{ id: string | null; messageIndex: number }>;
|
||||
unmatchedToolCallIds: string[];
|
||||
duplicateToolResultIds: string[];
|
||||
isHealthy: boolean;
|
||||
issues: string[];
|
||||
};
|
||||
|
||||
function extractToolCallsFromAssistant(
|
||||
msg: Extract<AgentMessage, { role: "assistant" }>,
|
||||
): ToolCallLike[] {
|
||||
@ -35,6 +57,119 @@ function extractToolResultId(msg: Extract<AgentMessage, { role: "toolResult" }>)
|
||||
return null;
|
||||
}
|
||||
|
||||
export function diagnoseSessionHealth(
|
||||
messages: AgentMessage[],
|
||||
_sessionId?: string,
|
||||
): SessionDiagnostics {
|
||||
const toolCalls: Array<{ id: string; name?: string; messageIndex: number }> = [];
|
||||
const toolResults: Array<{ id: string | null; messageIndex: number }> = [];
|
||||
const seenToolResultIds = new Set<string>();
|
||||
const seenToolCallIds = new Set<string>();
|
||||
const duplicateToolResultIds: string[] = [];
|
||||
|
||||
let assistantMessages = 0;
|
||||
let userMessages = 0;
|
||||
let toolResultMessages = 0;
|
||||
|
||||
for (let i = 0; i < messages.length; i++) {
|
||||
const msg = messages[i] as AgentMessage;
|
||||
if (!msg || typeof msg !== "object") continue;
|
||||
|
||||
const role = (msg as { role?: unknown }).role;
|
||||
|
||||
if (role === "assistant") {
|
||||
assistantMessages++;
|
||||
const calls = extractToolCallsFromAssistant(
|
||||
msg as Extract<AgentMessage, { role: "assistant" }>,
|
||||
);
|
||||
for (const call of calls) {
|
||||
toolCalls.push({ id: call.id, name: call.name, messageIndex: i });
|
||||
seenToolCallIds.add(call.id);
|
||||
}
|
||||
} else if (role === "user") {
|
||||
userMessages++;
|
||||
} else if (role === "toolResult") {
|
||||
toolResultMessages++;
|
||||
const id = extractToolResultId(msg as Extract<AgentMessage, { role: "toolResult" }>);
|
||||
toolResults.push({ id, messageIndex: i });
|
||||
if (id && seenToolResultIds.has(id)) {
|
||||
duplicateToolResultIds.push(id);
|
||||
} else if (id) {
|
||||
seenToolResultIds.add(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const orphanedToolResults = toolResults.filter(
|
||||
(r) => r.id === null || !seenToolCallIds.has(r.id),
|
||||
);
|
||||
const unmatchedToolCallIds = Array.from(seenToolCallIds).filter((id) => {
|
||||
return !toolResults.some((r) => r.id === id);
|
||||
});
|
||||
|
||||
const issues: string[] = [];
|
||||
if (orphanedToolResults.length > 0) {
|
||||
issues.push(
|
||||
`Found ${orphanedToolResults.length} orphaned tool result(s) without matching tool call`,
|
||||
);
|
||||
}
|
||||
if (unmatchedToolCallIds.length > 0) {
|
||||
issues.push(
|
||||
`Found ${unmatchedToolCallIds.length} tool call(s) without matching result: ${unmatchedToolCallIds.join(", ")}`,
|
||||
);
|
||||
}
|
||||
if (duplicateToolResultIds.length > 0) {
|
||||
issues.push(
|
||||
`Found ${duplicateToolResultIds.length} duplicate tool result(s): ${duplicateToolResultIds.join(", ")}`,
|
||||
);
|
||||
}
|
||||
|
||||
return {
|
||||
totalMessages: messages.length,
|
||||
assistantMessages,
|
||||
userMessages,
|
||||
toolResultMessages,
|
||||
toolCalls,
|
||||
toolResults,
|
||||
orphanedToolResults,
|
||||
unmatchedToolCallIds,
|
||||
duplicateToolResultIds,
|
||||
isHealthy: issues.length === 0,
|
||||
issues,
|
||||
};
|
||||
}
|
||||
|
||||
export function logSessionDiagnostics(diagnostics: SessionDiagnostics, sessionId?: string): void {
|
||||
const prefix = sessionId ? `[session=${sessionId}]` : "[session]";
|
||||
console.log(`${prefix} Session Diagnostics:`);
|
||||
console.log(
|
||||
`${prefix} Messages: ${diagnostics.totalMessages} (${diagnostics.assistantMessages} assistant, ${diagnostics.userMessages} user, ${diagnostics.toolResultMessages} toolResult)`,
|
||||
);
|
||||
console.log(`${prefix} Tool calls: ${diagnostics.toolCalls.length}`);
|
||||
console.log(`${prefix} Tool results: ${diagnostics.toolResults.length}`);
|
||||
|
||||
if (diagnostics.isHealthy) {
|
||||
console.log(`${prefix} Status: HEALTHY ✅`);
|
||||
} else {
|
||||
console.log(`${prefix} Status: UNHEALTHY ❌`);
|
||||
for (const issue of diagnostics.issues) {
|
||||
console.log(`${prefix} - ${issue}`);
|
||||
}
|
||||
if (diagnostics.orphanedToolResults.length > 0) {
|
||||
const examples = diagnostics.orphanedToolResults.slice(0, 3).map((r) => r.id ?? "(null)");
|
||||
console.log(
|
||||
`${prefix} Orphaned IDs: ${examples.join(", ")}${diagnostics.orphanedToolResults.length > 3 ? " ..." : ""}`,
|
||||
);
|
||||
}
|
||||
if (diagnostics.unmatchedToolCallIds.length > 0) {
|
||||
const examples = diagnostics.unmatchedToolCallIds.slice(0, 3);
|
||||
console.log(
|
||||
`${prefix} Unmatched IDs: ${examples.join(", ")}${diagnostics.unmatchedToolCallIds.length > 3 ? " ..." : ""}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function makeMissingToolResult(params: {
|
||||
toolCallId: string;
|
||||
toolName?: string;
|
||||
@ -60,14 +195,6 @@ export function sanitizeToolUseResultPairing(messages: AgentMessage[]): AgentMes
|
||||
return repairToolUseResultPairing(messages).messages;
|
||||
}
|
||||
|
||||
export type ToolUseRepairReport = {
|
||||
messages: AgentMessage[];
|
||||
added: Array<Extract<AgentMessage, { role: "toolResult" }>>;
|
||||
droppedDuplicateCount: number;
|
||||
droppedOrphanCount: number;
|
||||
moved: boolean;
|
||||
};
|
||||
|
||||
export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRepairReport {
|
||||
// Anthropic (and Cloud Code Assist) reject transcripts where assistant tool calls are not
|
||||
// immediately followed by matching tool results. Session files can end up with results
|
||||
|
||||
@ -1,11 +1,17 @@
|
||||
import { lookupContextTokens } from "../agents/context.js";
|
||||
import { DEFAULT_CONTEXT_TOKENS, DEFAULT_MODEL, DEFAULT_PROVIDER } from "../agents/defaults.js";
|
||||
import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
||||
import {
|
||||
diagnoseSessionHealth,
|
||||
logSessionDiagnostics,
|
||||
} from "../agents/session-transcript-repair.js";
|
||||
import { resolveConfiguredModelRef } from "../agents/model-selection.js";
|
||||
import { loadConfig } from "../config/config.js";
|
||||
import { loadSessionStore, resolveStorePath, type SessionEntry } from "../config/sessions.js";
|
||||
import { info } from "../globals.js";
|
||||
import type { RuntimeEnv } from "../runtime.js";
|
||||
import { isRich, theme } from "../terminal/theme.js";
|
||||
import fs from "node:fs/promises";
|
||||
|
||||
type SessionRow = {
|
||||
key: string;
|
||||
@ -247,3 +253,80 @@ export async function sessionsCommand(
|
||||
runtime.log(line.trimEnd());
|
||||
}
|
||||
}
|
||||
|
||||
export async function sessionsHealthCommand(
|
||||
opts: { store?: string; sessionId?: string; verbose?: boolean },
|
||||
runtime: RuntimeEnv,
|
||||
) {
|
||||
const cfg = loadConfig();
|
||||
const storePath = resolveStorePath(opts.store ?? cfg.session?.store);
|
||||
const store = loadSessionStore(storePath);
|
||||
|
||||
let sessionsToCheck: Array<{ key: string; entry: SessionEntry }> = [];
|
||||
|
||||
if (opts.sessionId) {
|
||||
const found = Object.entries(store).find(([, entry]) => entry?.sessionId === opts.sessionId);
|
||||
if (found) {
|
||||
sessionsToCheck = [{ key: found[0], entry: found[1]! }];
|
||||
} else {
|
||||
runtime.error(`Session not found: ${opts.sessionId}`);
|
||||
runtime.exit(1);
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
sessionsToCheck = Object.entries(store).map(([key, entry]) => ({
|
||||
key,
|
||||
entry: entry!,
|
||||
}));
|
||||
}
|
||||
|
||||
let unhealthyCount = 0;
|
||||
const allDiagnostics: Array<{
|
||||
key: string;
|
||||
diagnostics: Awaited<ReturnType<typeof diagnoseSessionHealth>>;
|
||||
}> = [];
|
||||
|
||||
for (const { key, entry } of sessionsToCheck) {
|
||||
const sessionFilePath = entry?.sessionFile;
|
||||
if (!sessionFilePath) {
|
||||
runtime.log(`[${key}] No session file`);
|
||||
continue;
|
||||
}
|
||||
|
||||
let messages: unknown[];
|
||||
try {
|
||||
const content = await fs.readFile(sessionFilePath, "utf-8");
|
||||
messages = content
|
||||
.split("\n")
|
||||
.filter(Boolean)
|
||||
.map((line) => JSON.parse(line));
|
||||
} catch {
|
||||
runtime.error(`[${key}] Failed to read session file: ${sessionFilePath}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
const diagnostics = diagnoseSessionHealth(messages as AgentMessage[], key);
|
||||
allDiagnostics.push({ key, diagnostics });
|
||||
|
||||
if (!diagnostics.isHealthy) {
|
||||
unhealthyCount++;
|
||||
}
|
||||
|
||||
if (opts.verbose || !diagnostics.isHealthy) {
|
||||
logSessionDiagnostics(diagnostics, key);
|
||||
} else {
|
||||
runtime.log(`[${key}] ✅ HEALTHY (${diagnostics.totalMessages} messages)`);
|
||||
}
|
||||
}
|
||||
|
||||
if (unhealthyCount > 0) {
|
||||
runtime.log(
|
||||
info(
|
||||
`\nFound ${unhealthyCount} unhealthy session(s) out of ${allDiagnostics.length} checked`,
|
||||
),
|
||||
);
|
||||
runtime.exit(1);
|
||||
} else {
|
||||
runtime.log(info(`\nAll ${allDiagnostics.length} session(s) are healthy`));
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user