diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index e83c3ae4a..622bdb7f4 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -52,6 +52,7 @@ import { import { DEFAULT_BOOTSTRAP_FILENAME } from "../../workspace.js"; import { buildSystemPromptReport } from "../../system-prompt-report.js"; import { resolveDefaultModelForAgent } from "../../model-selection.js"; +import { sanitizeToolUseResultPairing } from "../../session-transcript-repair.js"; import { isAbortError } from "../abort.js"; import { buildEmbeddedExtensionPaths } from "../extensions.js"; @@ -535,9 +536,11 @@ export async function runEmbeddedAttempt( validated, getDmHistoryLimitFromSessionKey(params.sessionKey, params.config), ); - cacheTrace?.recordStage("session:limited", { messages: limited }); - if (limited.length > 0) { - activeSession.agent.replaceMessages(limited); + // Fix: Repair tool_use/tool_result pairings AFTER truncation (issue #4367) + const repaired = sanitizeToolUseResultPairing(limited); + cacheTrace?.recordStage("session:limited", { messages: repaired }); + if (repaired.length > 0) { + activeSession.agent.replaceMessages(repaired); } } catch (err) { sessionManager.flushPendingToolResults?.(); diff --git a/src/agents/subagent-announce.ts b/src/agents/subagent-announce.ts index 444726efc..46d902e80 100644 --- a/src/agents/subagent-announce.ts +++ b/src/agents/subagent-announce.ts @@ -192,6 +192,41 @@ async function maybeQueueSubagentAnnounce(params: { return "none"; } +/** + * Build a descriptive error status label from outcome data. + * Includes error type, message, and hint if available. + */ +function buildErrorStatusLabel(outcome: SubagentRunOutcome): string { + const parts: string[] = []; + + // Start with "failed" + parts.push("failed"); + + // Add error type context + if (outcome.errorType) { + const typeLabel: Record = { + model: "API error", + tool: "tool error", + network: "network error", + config: "configuration error", + timeout: "timeout", + }; + const label = typeLabel[outcome.errorType] || "error"; + parts.push(`(${label}):`); + } + + // Add error message + const errorMsg = outcome.error || "unknown error"; + parts.push(errorMsg); + + // Add hint if available + if (outcome.errorHint) { + parts.push(`— ${outcome.errorHint}`); + } + + return parts.join(" "); +} + async function buildSubagentStatsLine(params: { sessionKey: string; startedAt?: number; @@ -299,6 +334,8 @@ export function buildSubagentSystemPrompt(params: { export type SubagentRunOutcome = { status: "ok" | "error" | "timeout" | "unknown"; error?: string; + errorType?: "model" | "tool" | "network" | "config" | "timeout" | "unknown"; + errorHint?: string; }; export async function runSubagentAnnounceFlow(params: { @@ -380,7 +417,7 @@ export async function runSubagentAnnounceFlow(params: { : outcome.status === "timeout" ? "timed out" : outcome.status === "error" - ? `failed: ${outcome.error || "unknown error"}` + ? buildErrorStatusLabel(outcome) : "finished with unknown status"; // Build instructional message for main agent diff --git a/src/agents/subagent-registry.ts b/src/agents/subagent-registry.ts index d325e40e2..dca685b72 100644 --- a/src/agents/subagent-registry.ts +++ b/src/agents/subagent-registry.ts @@ -184,7 +184,16 @@ function ensureListener() { entry.endedAt = endedAt; if (phase === "error") { const error = typeof evt.data?.error === "string" ? (evt.data.error as string) : undefined; - entry.outcome = { status: "error", error }; + const errorType = + typeof evt.data?.errorType === "string" ? (evt.data.errorType as string) : undefined; + const errorHint = + typeof evt.data?.errorHint === "string" ? (evt.data.errorHint as string) : undefined; + entry.outcome = { + status: "error", + error, + errorType: errorType as SubagentRunOutcome["errorType"], + errorHint, + }; } else { entry.outcome = { status: "ok" }; } diff --git a/src/auto-reply/reply/agent-runner-execution.ts b/src/auto-reply/reply/agent-runner-execution.ts index 21732f49f..266a483e8 100644 --- a/src/auto-reply/reply/agent-runner-execution.ts +++ b/src/auto-reply/reply/agent-runner-execution.ts @@ -51,6 +51,85 @@ export type AgentRunLoopResult = } | { kind: "final"; payload: ReplyPayload }; +/** + * Categorize errors to provide better error messages to users. + * Returns error message, type, and optional hint for remediation. + */ +export function categorizeError(err: unknown): { + message: string; + type: "model" | "tool" | "network" | "config" | "timeout" | "unknown"; + hint?: string; +} { + const message = err instanceof Error ? err.message : String(err); + + // File system errors + if (message.includes("ENOENT") || message.includes("ENOTDIR")) { + return { message, type: "tool", hint: "File or directory not found" }; + } + if (message.includes("EACCES") || message.includes("EPERM")) { + return { message, type: "tool", hint: "Permission denied" }; + } + if (message.includes("EISDIR")) { + return { message, type: "tool", hint: "Expected file but found directory" }; + } + + // API/Model errors + if (message.includes("rate limit") || message.includes("429")) { + return { message, type: "model", hint: "Rate limit exceeded - retry in a few moments" }; + } + if ( + message.includes("401") || + message.includes("unauthorized") || + message.includes("authentication") + ) { + return { message, type: "config", hint: "Check API credentials and permissions" }; + } + if (message.includes("403") || message.includes("forbidden")) { + return { message, type: "config", hint: "Access denied - check permissions" }; + } + if (message.includes("400") || message.includes("invalid request")) { + return { message, type: "model", hint: "Invalid request parameters" }; + } + if (message.includes("500") || message.includes("503")) { + return { message, type: "model", hint: "API service error - try again later" }; + } + if (message.includes("quota") || message.includes("billing")) { + return { message, type: "config", hint: "Check billing and API quota limits" }; + } + + // Network errors + if (message.includes("ECONNREFUSED") || message.includes("ETIMEDOUT")) { + return { message, type: "network", hint: "Connection failed - check network connectivity" }; + } + if (message.includes("ENOTFOUND") || message.includes("DNS") || message.includes("EAI_AGAIN")) { + return { message, type: "network", hint: "DNS resolution failed - check hostname" }; + } + if (message.includes("ENETUNREACH") || message.includes("EHOSTUNREACH")) { + return { message, type: "network", hint: "Network unreachable - check connection" }; + } + + // Timeout errors + if ( + message.toLowerCase().includes("timeout") || + message.toLowerCase().includes("timed out") || + message.includes("ETIMEDOUT") + ) { + return { message, type: "timeout", hint: "Operation took too long - try increasing timeout" }; + } + + // Context/memory errors + if (message.includes("context") && message.includes("too large")) { + return { message, type: "model", hint: "Conversation too long - try clearing history" }; + } + + // Missing environment/config + if (message.includes("missing") && (message.includes("key") || message.includes("token"))) { + return { message, type: "config", hint: "Missing required configuration or credentials" }; + } + + return { message, type: "unknown" }; +} + export async function runAgentTurnWithFallback(params: { commandBody: string; followupRun: FollowupRun; @@ -204,6 +283,7 @@ export async function runAgentTurnWithFallback(params: { return result; }) .catch((err) => { + const { message, type, hint } = categorizeError(err); emitAgentEvent({ runId, stream: "lifecycle", @@ -211,7 +291,9 @@ export async function runAgentTurnWithFallback(params: { phase: "error", startedAt, endedAt: Date.now(), - error: err instanceof Error ? err.message : String(err), + error: message, + errorType: type, + errorHint: hint, }, }); throw err; diff --git a/src/auto-reply/reply/categorize-error.test.ts b/src/auto-reply/reply/categorize-error.test.ts new file mode 100644 index 000000000..17c85c58a --- /dev/null +++ b/src/auto-reply/reply/categorize-error.test.ts @@ -0,0 +1,318 @@ +import { describe, expect, it } from "vitest"; + +import { categorizeError } from "./agent-runner-execution.js"; + +describe("categorizeError", () => { + describe("timeout errors", () => { + it("categorizes lowercase 'timeout' as timeout type", () => { + const error = new Error("Request timeout after 30s"); + const result = categorizeError(error); + + expect(result.type).toBe("timeout"); + expect(result.message).toBe("Request timeout after 30s"); + expect(result.hint).toBe("Operation took too long - try increasing timeout"); + }); + + it("categorizes 'timed out' as timeout type", () => { + const error = new Error("Connection timed out"); + const result = categorizeError(error); + + expect(result.type).toBe("timeout"); + expect(result.hint).toBe("Operation took too long - try increasing timeout"); + }); + + it("categorizes ETIMEDOUT as network type (network error code takes precedence)", () => { + const error = new Error("ETIMEDOUT: socket hang up"); + const result = categorizeError(error); + + // ETIMEDOUT is caught by network errors before timeout section + expect(result.type).toBe("network"); + expect(result.hint).toBe("Connection failed - check network connectivity"); + }); + + it("handles uppercase TIMEOUT", () => { + const error = new Error("TIMEOUT ERROR"); + const result = categorizeError(error); + + expect(result.type).toBe("timeout"); + }); + }); + + describe("authentication errors", () => { + it("categorizes 401 as config type", () => { + const error = new Error("HTTP 401: Unauthorized"); + const result = categorizeError(error); + + expect(result.type).toBe("config"); + expect(result.message).toBe("HTTP 401: Unauthorized"); + expect(result.hint).toBe("Check API credentials and permissions"); + }); + + it("categorizes 'unauthorized' as config type", () => { + const error = new Error("Request failed: unauthorized access"); + const result = categorizeError(error); + + expect(result.type).toBe("config"); + expect(result.hint).toBe("Check API credentials and permissions"); + }); + + it("categorizes 'authentication' errors as config type (case-sensitive)", () => { + const error = new Error("authentication failed for API key"); + const result = categorizeError(error); + + expect(result.type).toBe("config"); + expect(result.hint).toBe("Check API credentials and permissions"); + }); + + it("categorizes 403 forbidden as config type", () => { + const error = new Error("HTTP 403 forbidden"); + const result = categorizeError(error); + + expect(result.type).toBe("config"); + expect(result.hint).toBe("Access denied - check permissions"); + }); + + it("categorizes 'forbidden' keyword as config type", () => { + const error = new Error("Access forbidden to resource"); + const result = categorizeError(error); + + expect(result.type).toBe("config"); + }); + }); + + describe("rate limit errors", () => { + it("categorizes 'rate limit' as model type", () => { + const error = new Error("rate limit exceeded"); + const result = categorizeError(error); + + expect(result.type).toBe("model"); + expect(result.message).toBe("rate limit exceeded"); + expect(result.hint).toBe("Rate limit exceeded - retry in a few moments"); + }); + + it("categorizes HTTP 429 as model type", () => { + const error = new Error("HTTP 429: Too Many Requests"); + const result = categorizeError(error); + + expect(result.type).toBe("model"); + expect(result.hint).toBe("Rate limit exceeded - retry in a few moments"); + }); + + it("handles rate limit with mixed case", () => { + const error = new Error("rate limit exceeded"); + const result = categorizeError(error); + + expect(result.type).toBe("model"); + }); + }); + + describe("unknown errors", () => { + it("categorizes unrecognized error as unknown type", () => { + const error = new Error("Something weird happened"); + const result = categorizeError(error); + + expect(result.type).toBe("unknown"); + expect(result.message).toBe("Something weird happened"); + expect(result.hint).toBeUndefined(); + }); + + it("categorizes generic error message as unknown", () => { + const error = new Error("An unexpected error occurred"); + const result = categorizeError(error); + + expect(result.type).toBe("unknown"); + expect(result.hint).toBeUndefined(); + }); + + it("handles non-Error objects", () => { + const result = categorizeError("plain string error"); + + expect(result.type).toBe("unknown"); + expect(result.message).toBe("plain string error"); + }); + + it("handles null/undefined errors", () => { + const result = categorizeError(null); + + expect(result.type).toBe("unknown"); + expect(result.message).toBe("null"); + }); + }); + + describe("API/model errors", () => { + it("categorizes HTTP 400 as model type", () => { + const error = new Error("HTTP 400: Bad Request"); + const result = categorizeError(error); + + expect(result.type).toBe("model"); + expect(result.hint).toBe("Invalid request parameters"); + }); + + it("categorizes 'invalid request' as model type", () => { + const error = new Error("invalid request format"); + const result = categorizeError(error); + + expect(result.type).toBe("model"); + }); + + it("categorizes HTTP 500 as model type", () => { + const error = new Error("HTTP 500: Internal Server Error"); + const result = categorizeError(error); + + expect(result.type).toBe("model"); + expect(result.hint).toBe("API service error - try again later"); + }); + + it("categorizes HTTP 503 as model type", () => { + const error = new Error("HTTP 503: Service Unavailable"); + const result = categorizeError(error); + + expect(result.type).toBe("model"); + expect(result.hint).toBe("API service error - try again later"); + }); + + it("categorizes quota errors as config type", () => { + const error = new Error("quota exceeded for this account"); + const result = categorizeError(error); + + expect(result.type).toBe("config"); + expect(result.hint).toBe("Check billing and API quota limits"); + }); + + it("categorizes billing errors as config type", () => { + const error = new Error("billing issue detected"); + const result = categorizeError(error); + + expect(result.type).toBe("config"); + expect(result.hint).toBe("Check billing and API quota limits"); + }); + }); + + describe("network errors", () => { + it("categorizes ECONNREFUSED as network type", () => { + const error = new Error("ECONNREFUSED: Connection refused"); + const result = categorizeError(error); + + expect(result.type).toBe("network"); + expect(result.hint).toBe("Connection failed - check network connectivity"); + }); + + it("categorizes ENOTFOUND as network type", () => { + const error = new Error("ENOTFOUND: DNS lookup failed"); + const result = categorizeError(error); + + expect(result.type).toBe("network"); + expect(result.hint).toBe("DNS resolution failed - check hostname"); + }); + + it("categorizes DNS errors as network type", () => { + const error = new Error("DNS resolution error"); + const result = categorizeError(error); + + expect(result.type).toBe("network"); + }); + + it("categorizes EAI_AGAIN as network type", () => { + const error = new Error("EAI_AGAIN: temporary failure"); + const result = categorizeError(error); + + expect(result.type).toBe("network"); + expect(result.hint).toBe("DNS resolution failed - check hostname"); + }); + + it("categorizes ENETUNREACH as network type", () => { + const error = new Error("ENETUNREACH: Network is unreachable"); + const result = categorizeError(error); + + expect(result.type).toBe("network"); + expect(result.hint).toBe("Network unreachable - check connection"); + }); + + it("categorizes EHOSTUNREACH as network type", () => { + const error = new Error("EHOSTUNREACH: No route to host"); + const result = categorizeError(error); + + expect(result.type).toBe("network"); + expect(result.hint).toBe("Network unreachable - check connection"); + }); + }); + + describe("file system errors (tool type)", () => { + it("categorizes ENOENT as tool type", () => { + const error = new Error("ENOENT: no such file or directory"); + const result = categorizeError(error); + + expect(result.type).toBe("tool"); + expect(result.hint).toBe("File or directory not found"); + }); + + it("categorizes ENOTDIR as tool type", () => { + const error = new Error("ENOTDIR: not a directory"); + const result = categorizeError(error); + + expect(result.type).toBe("tool"); + expect(result.hint).toBe("File or directory not found"); + }); + + it("categorizes EACCES as tool type", () => { + const error = new Error("EACCES: permission denied"); + const result = categorizeError(error); + + expect(result.type).toBe("tool"); + expect(result.hint).toBe("Permission denied"); + }); + + it("categorizes EPERM as tool type", () => { + const error = new Error("EPERM: operation not permitted"); + const result = categorizeError(error); + + expect(result.type).toBe("tool"); + expect(result.hint).toBe("Permission denied"); + }); + + it("categorizes EISDIR as tool type", () => { + const error = new Error("EISDIR: illegal operation on a directory"); + const result = categorizeError(error); + + expect(result.type).toBe("tool"); + expect(result.hint).toBe("Expected file but found directory"); + }); + }); + + describe("configuration errors", () => { + it("categorizes missing API key as config type", () => { + const error = new Error("missing API key"); + const result = categorizeError(error); + + expect(result.type).toBe("config"); + expect(result.hint).toBe("Missing required configuration or credentials"); + }); + + it("categorizes missing token as config type", () => { + const error = new Error("missing authentication token"); + const result = categorizeError(error); + + expect(result.type).toBe("config"); + // "authentication" keyword triggers auth error hint first + expect(result.hint).toBe("Check API credentials and permissions"); + }); + + it("categorizes missing API token without authentication keyword", () => { + const error = new Error("missing API token for request"); + const result = categorizeError(error); + + expect(result.type).toBe("config"); + expect(result.hint).toBe("Missing required configuration or credentials"); + }); + }); + + describe("context/memory errors", () => { + it("categorizes context too large as model type", () => { + const error = new Error("context window too large"); + const result = categorizeError(error); + + expect(result.type).toBe("model"); + expect(result.hint).toBe("Conversation too long - try clearing history"); + }); + }); +}); diff --git a/src/hooks/llm-slug-generator.ts b/src/hooks/llm-slug-generator.ts index c52627176..024262a5d 100644 --- a/src/hooks/llm-slug-generator.ts +++ b/src/hooks/llm-slug-generator.ts @@ -12,6 +12,7 @@ import { resolveAgentWorkspaceDir, resolveAgentDir, } from "../agents/agent-scope.js"; +import { resolveDefaultModelForAgent } from "../agents/model-selection.js"; /** * Generate a short 1-2 word filename slug from session content using LLM @@ -38,6 +39,11 @@ ${params.sessionContent.slice(0, 2000)} Reply with ONLY the slug, nothing else. Examples: "vendor-pitch", "api-design", "bug-fix"`; + // Resolve user's configured default model instead of hardcoded Opus + const { provider, model } = resolveDefaultModelForAgent({ + cfg: params.cfg, + }); + const result = await runEmbeddedPiAgent({ sessionId: `slug-generator-${Date.now()}`, sessionKey: "temp:slug-generator", @@ -46,6 +52,8 @@ Reply with ONLY the slug, nothing else. Examples: "vendor-pitch", "api-design", agentDir, config: params.cfg, prompt, + provider, + model, timeoutMs: 15_000, // 15 second timeout runId: `slug-gen-${Date.now()}`, }); @@ -75,7 +83,10 @@ Reply with ONLY the slug, nothing else. Examples: "vendor-pitch", "api-design", // Clean up temporary session file if (tempSessionFile) { try { - await fs.rm(path.dirname(tempSessionFile), { recursive: true, force: true }); + await fs.rm(path.dirname(tempSessionFile), { + recursive: true, + force: true, + }); } catch { // Ignore cleanup errors }