diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index e83c3ae4a..622bdb7f4 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -52,6 +52,7 @@ import { import { DEFAULT_BOOTSTRAP_FILENAME } from "../../workspace.js"; import { buildSystemPromptReport } from "../../system-prompt-report.js"; import { resolveDefaultModelForAgent } from "../../model-selection.js"; +import { sanitizeToolUseResultPairing } from "../../session-transcript-repair.js"; import { isAbortError } from "../abort.js"; import { buildEmbeddedExtensionPaths } from "../extensions.js"; @@ -535,9 +536,11 @@ export async function runEmbeddedAttempt( validated, getDmHistoryLimitFromSessionKey(params.sessionKey, params.config), ); - cacheTrace?.recordStage("session:limited", { messages: limited }); - if (limited.length > 0) { - activeSession.agent.replaceMessages(limited); + // Fix: Repair tool_use/tool_result pairings AFTER truncation (issue #4367) + const repaired = sanitizeToolUseResultPairing(limited); + cacheTrace?.recordStage("session:limited", { messages: repaired }); + if (repaired.length > 0) { + activeSession.agent.replaceMessages(repaired); } } catch (err) { sessionManager.flushPendingToolResults?.(); diff --git a/src/agents/subagent-announce.ts b/src/agents/subagent-announce.ts index 444726efc..46d902e80 100644 --- a/src/agents/subagent-announce.ts +++ b/src/agents/subagent-announce.ts @@ -192,6 +192,41 @@ async function maybeQueueSubagentAnnounce(params: { return "none"; } +/** + * Build a descriptive error status label from outcome data. + * Includes error type, message, and hint if available. + */ +function buildErrorStatusLabel(outcome: SubagentRunOutcome): string { + const parts: string[] = []; + + // Start with "failed" + parts.push("failed"); + + // Add error type context + if (outcome.errorType) { + const typeLabel: Record = { + model: "API error", + tool: "tool error", + network: "network error", + config: "configuration error", + timeout: "timeout", + }; + const label = typeLabel[outcome.errorType] || "error"; + parts.push(`(${label}):`); + } + + // Add error message + const errorMsg = outcome.error || "unknown error"; + parts.push(errorMsg); + + // Add hint if available + if (outcome.errorHint) { + parts.push(`— ${outcome.errorHint}`); + } + + return parts.join(" "); +} + async function buildSubagentStatsLine(params: { sessionKey: string; startedAt?: number; @@ -299,6 +334,8 @@ export function buildSubagentSystemPrompt(params: { export type SubagentRunOutcome = { status: "ok" | "error" | "timeout" | "unknown"; error?: string; + errorType?: "model" | "tool" | "network" | "config" | "timeout" | "unknown"; + errorHint?: string; }; export async function runSubagentAnnounceFlow(params: { @@ -380,7 +417,7 @@ export async function runSubagentAnnounceFlow(params: { : outcome.status === "timeout" ? "timed out" : outcome.status === "error" - ? `failed: ${outcome.error || "unknown error"}` + ? buildErrorStatusLabel(outcome) : "finished with unknown status"; // Build instructional message for main agent diff --git a/src/agents/subagent-registry.ts b/src/agents/subagent-registry.ts index d325e40e2..dca685b72 100644 --- a/src/agents/subagent-registry.ts +++ b/src/agents/subagent-registry.ts @@ -184,7 +184,16 @@ function ensureListener() { entry.endedAt = endedAt; if (phase === "error") { const error = typeof evt.data?.error === "string" ? (evt.data.error as string) : undefined; - entry.outcome = { status: "error", error }; + const errorType = + typeof evt.data?.errorType === "string" ? (evt.data.errorType as string) : undefined; + const errorHint = + typeof evt.data?.errorHint === "string" ? (evt.data.errorHint as string) : undefined; + entry.outcome = { + status: "error", + error, + errorType: errorType as SubagentRunOutcome["errorType"], + errorHint, + }; } else { entry.outcome = { status: "ok" }; } diff --git a/src/auto-reply/reply/agent-runner-execution.ts b/src/auto-reply/reply/agent-runner-execution.ts index 21732f49f..36305bc68 100644 --- a/src/auto-reply/reply/agent-runner-execution.ts +++ b/src/auto-reply/reply/agent-runner-execution.ts @@ -51,6 +51,85 @@ export type AgentRunLoopResult = } | { kind: "final"; payload: ReplyPayload }; +/** + * Categorize errors to provide better error messages to users. + * Returns error message, type, and optional hint for remediation. + */ +function categorizeError(err: unknown): { + message: string; + type: "model" | "tool" | "network" | "config" | "timeout" | "unknown"; + hint?: string; +} { + const message = err instanceof Error ? err.message : String(err); + + // File system errors + if (message.includes("ENOENT") || message.includes("ENOTDIR")) { + return { message, type: "tool", hint: "File or directory not found" }; + } + if (message.includes("EACCES") || message.includes("EPERM")) { + return { message, type: "tool", hint: "Permission denied" }; + } + if (message.includes("EISDIR")) { + return { message, type: "tool", hint: "Expected file but found directory" }; + } + + // API/Model errors + if (message.includes("rate limit") || message.includes("429")) { + return { message, type: "model", hint: "Rate limit exceeded - retry in a few moments" }; + } + if ( + message.includes("401") || + message.includes("unauthorized") || + message.includes("authentication") + ) { + return { message, type: "config", hint: "Check API credentials and permissions" }; + } + if (message.includes("403") || message.includes("forbidden")) { + return { message, type: "config", hint: "Access denied - check permissions" }; + } + if (message.includes("400") || message.includes("invalid request")) { + return { message, type: "model", hint: "Invalid request parameters" }; + } + if (message.includes("500") || message.includes("503")) { + return { message, type: "model", hint: "API service error - try again later" }; + } + if (message.includes("quota") || message.includes("billing")) { + return { message, type: "config", hint: "Check billing and API quota limits" }; + } + + // Network errors + if (message.includes("ECONNREFUSED") || message.includes("ETIMEDOUT")) { + return { message, type: "network", hint: "Connection failed - check network connectivity" }; + } + if (message.includes("ENOTFOUND") || message.includes("DNS") || message.includes("EAI_AGAIN")) { + return { message, type: "network", hint: "DNS resolution failed - check hostname" }; + } + if (message.includes("ENETUNREACH") || message.includes("EHOSTUNREACH")) { + return { message, type: "network", hint: "Network unreachable - check connection" }; + } + + // Timeout errors + if ( + message.toLowerCase().includes("timeout") || + message.toLowerCase().includes("timed out") || + message.includes("ETIMEDOUT") + ) { + return { message, type: "timeout", hint: "Operation took too long - try increasing timeout" }; + } + + // Context/memory errors + if (message.includes("context") && message.includes("too large")) { + return { message, type: "model", hint: "Conversation too long - try clearing history" }; + } + + // Missing environment/config + if (message.includes("missing") && (message.includes("key") || message.includes("token"))) { + return { message, type: "config", hint: "Missing required configuration or credentials" }; + } + + return { message, type: "unknown" }; +} + export async function runAgentTurnWithFallback(params: { commandBody: string; followupRun: FollowupRun; @@ -204,6 +283,7 @@ export async function runAgentTurnWithFallback(params: { return result; }) .catch((err) => { + const { message, type, hint } = categorizeError(err); emitAgentEvent({ runId, stream: "lifecycle", @@ -211,7 +291,9 @@ export async function runAgentTurnWithFallback(params: { phase: "error", startedAt, endedAt: Date.now(), - error: err instanceof Error ? err.message : String(err), + error: message, + errorType: type, + errorHint: hint, }, }); throw err; diff --git a/src/hooks/llm-slug-generator.test.ts b/src/hooks/llm-slug-generator.test.ts new file mode 100644 index 000000000..ba65ee6c3 --- /dev/null +++ b/src/hooks/llm-slug-generator.test.ts @@ -0,0 +1,313 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; +import { generateSlugViaLLM } from "./llm-slug-generator.js"; +import type { OpenClawConfig } from "../config/config.js"; +import type { EmbeddedPiRunResult } from "../agents/pi-embedded.js"; + +const runEmbeddedPiAgentMock = vi.fn< + Parameters, + Promise +>(); + +vi.mock("../agents/pi-embedded.js", () => ({ + runEmbeddedPiAgent: (...args: unknown[]) => runEmbeddedPiAgentMock(...args), +})); + +describe("generateSlugViaLLM", () => { + beforeEach(() => { + runEmbeddedPiAgentMock.mockReset(); + }); + + describe("model resolution", () => { + it("uses configured default model from agents.defaults.model.primary", async () => { + // Arrange: Config with custom default model (Sonnet instead of Opus) + const cfg: OpenClawConfig = { + agents: { + defaults: { + model: { + primary: "anthropic/claude-sonnet-4-5", + }, + }, + }, + }; + + runEmbeddedPiAgentMock.mockResolvedValue({ + payloads: [{ text: "test-slug" }], + meta: { runId: "test-run", sessionId: "test-session" }, + }); + + // Act + await generateSlugViaLLM({ + sessionContent: "Some conversation about testing", + cfg, + }); + + // Assert: Should use Sonnet, not hardcoded Opus + expect(runEmbeddedPiAgentMock).toHaveBeenCalledTimes(1); + const callArgs = runEmbeddedPiAgentMock.mock.calls[0]?.[0]; + expect(callArgs).toMatchObject({ + provider: "anthropic", + model: "claude-sonnet-4-5", + config: cfg, + }); + }); + + it("uses configured default model with custom provider", async () => { + // Arrange: Config with OpenRouter model + const cfg: OpenClawConfig = { + agents: { + defaults: { + model: { + primary: "openrouter/anthropic/claude-3.5-sonnet", + }, + }, + }, + }; + + runEmbeddedPiAgentMock.mockResolvedValue({ + payloads: [{ text: "custom-provider-slug" }], + meta: { runId: "test-run", sessionId: "test-session" }, + }); + + // Act + await generateSlugViaLLM({ + sessionContent: "Testing custom provider", + cfg, + }); + + // Assert: Should use OpenRouter provider + expect(runEmbeddedPiAgentMock).toHaveBeenCalledTimes(1); + const callArgs = runEmbeddedPiAgentMock.mock.calls[0]?.[0]; + expect(callArgs).toMatchObject({ + provider: "openrouter", + model: "anthropic/claude-3.5-sonnet", + config: cfg, + }); + }); + + it("falls back to DEFAULT_MODEL when no model.primary is configured", async () => { + // Arrange: Empty config (no custom model) + const cfg: OpenClawConfig = {}; + + runEmbeddedPiAgentMock.mockResolvedValue({ + payloads: [{ text: "default-model-slug" }], + meta: { runId: "test-run", sessionId: "test-session" }, + }); + + // Act + await generateSlugViaLLM({ + sessionContent: "Testing default fallback", + cfg, + }); + + // Assert: Should fall back to DEFAULT_MODEL (claude-opus-4-5) + expect(runEmbeddedPiAgentMock).toHaveBeenCalledTimes(1); + const callArgs = runEmbeddedPiAgentMock.mock.calls[0]?.[0]; + expect(callArgs).toMatchObject({ + provider: "anthropic", + model: "claude-opus-4-5", + config: cfg, + }); + }); + + it("uses Haiku when configured as default model", async () => { + // Arrange: Config with Haiku as default + const cfg: OpenClawConfig = { + agents: { + defaults: { + model: { + primary: "anthropic/claude-3-5-haiku-20241022", + }, + }, + }, + }; + + runEmbeddedPiAgentMock.mockResolvedValue({ + payloads: [{ text: "haiku-slug" }], + meta: { runId: "test-run", sessionId: "test-session" }, + }); + + // Act + await generateSlugViaLLM({ + sessionContent: "Budget-friendly slug generation", + cfg, + }); + + // Assert: Should use Haiku + expect(runEmbeddedPiAgentMock).toHaveBeenCalledTimes(1); + const callArgs = runEmbeddedPiAgentMock.mock.calls[0]?.[0]; + expect(callArgs).toMatchObject({ + provider: "anthropic", + model: "claude-3-5-haiku-20241022", + config: cfg, + }); + }); + }); + + describe("slug generation", () => { + it("generates valid slug from LLM response", async () => { + const cfg: OpenClawConfig = {}; + + runEmbeddedPiAgentMock.mockResolvedValue({ + payloads: [{ text: "api-design" }], + meta: { runId: "test-run", sessionId: "test-session" }, + }); + + const slug = await generateSlugViaLLM({ + sessionContent: "Discussion about API design patterns", + cfg, + }); + + expect(slug).toBe("api-design"); + }); + + it("cleans up LLM response with extra characters", async () => { + const cfg: OpenClawConfig = {}; + + runEmbeddedPiAgentMock.mockResolvedValue({ + payloads: [{ text: " Bug Fix! " }], + meta: { runId: "test-run", sessionId: "test-session" }, + }); + + const slug = await generateSlugViaLLM({ + sessionContent: "Bug fix discussion", + cfg, + }); + + expect(slug).toBe("bug-fix"); + }); + + it("truncates long slugs to 30 characters", async () => { + const cfg: OpenClawConfig = {}; + + runEmbeddedPiAgentMock.mockResolvedValue({ + payloads: [{ text: "this-is-a-very-long-slug-that-should-be-truncated-significantly" }], + meta: { runId: "test-run", sessionId: "test-session" }, + }); + + const slug = await generateSlugViaLLM({ + sessionContent: "Long discussion", + cfg, + }); + + // Verify length is enforced (implementation slices before final cleanup) + expect(slug?.length).toBeLessThanOrEqual(30); + expect(slug).toMatch(/^this-is-a-very-long-slug/); + }); + + it("returns null when LLM returns no payloads", async () => { + const cfg: OpenClawConfig = {}; + + runEmbeddedPiAgentMock.mockResolvedValue({ + payloads: [], + meta: { runId: "test-run", sessionId: "test-session" }, + }); + + const slug = await generateSlugViaLLM({ + sessionContent: "Test content", + cfg, + }); + + expect(slug).toBeNull(); + }); + + it("returns null when LLM returns empty text", async () => { + const cfg: OpenClawConfig = {}; + + runEmbeddedPiAgentMock.mockResolvedValue({ + payloads: [{ text: "" }], + meta: { runId: "test-run", sessionId: "test-session" }, + }); + + const slug = await generateSlugViaLLM({ + sessionContent: "Test content", + cfg, + }); + + expect(slug).toBeNull(); + }); + + it("returns null when embedded agent throws error", async () => { + const cfg: OpenClawConfig = {}; + + runEmbeddedPiAgentMock.mockRejectedValue(new Error("LLM timeout")); + + const slug = await generateSlugViaLLM({ + sessionContent: "Test content", + cfg, + }); + + expect(slug).toBeNull(); + }); + }); + + describe("embedded agent parameters", () => { + it("passes correct parameters to runEmbeddedPiAgent", async () => { + const cfg: OpenClawConfig = { + agents: { + defaults: { + model: { + primary: "anthropic/claude-sonnet-4-5", + }, + }, + }, + }; + + runEmbeddedPiAgentMock.mockResolvedValue({ + payloads: [{ text: "test-slug" }], + meta: { runId: "test-run", sessionId: "test-session" }, + }); + + await generateSlugViaLLM({ + sessionContent: "Testing parameters", + cfg, + }); + + expect(runEmbeddedPiAgentMock).toHaveBeenCalledTimes(1); + const callArgs = runEmbeddedPiAgentMock.mock.calls[0]?.[0]; + + // Verify all expected parameters are present + expect(callArgs).toMatchObject({ + config: cfg, + provider: "anthropic", + model: "claude-sonnet-4-5", + timeoutMs: 15_000, + }); + + // Verify prompt includes session content + expect(callArgs?.prompt).toContain("Testing parameters"); + expect(callArgs?.prompt).toContain("1-2 word filename slug"); + + // Verify session parameters + expect(callArgs?.sessionId).toMatch(/^slug-generator-\d+$/); + expect(callArgs?.sessionKey).toBe("temp:slug-generator"); + expect(callArgs?.runId).toMatch(/^slug-gen-\d+$/); + + // Verify temp session file path + expect(callArgs?.sessionFile).toContain("openclaw-slug-"); + expect(callArgs?.sessionFile).toContain("session.jsonl"); + }); + + it("truncates session content to 2000 characters in prompt", async () => { + const cfg: OpenClawConfig = {}; + const longContent = "a".repeat(3000); + + runEmbeddedPiAgentMock.mockResolvedValue({ + payloads: [{ text: "truncated-slug" }], + meta: { runId: "test-run", sessionId: "test-session" }, + }); + + await generateSlugViaLLM({ + sessionContent: longContent, + cfg, + }); + + const callArgs = runEmbeddedPiAgentMock.mock.calls[0]?.[0]; + const promptContentMatch = callArgs?.prompt.match( + /Conversation summary:\n([\s\S]+)\n\nReply/, + ); + const extractedContent = promptContentMatch?.[1] ?? ""; + + expect(extractedContent.length).toBeLessThanOrEqual(2000); + }); + }); +});