From 480fd42b2e7f03adbd2922e10152d40dc1f43345 Mon Sep 17 00:00:00 2001 From: spiceoogway Date: Fri, 30 Jan 2026 00:10:57 -0500 Subject: [PATCH 1/3] fix: repair tool_use/tool_result pairings after history truncation (fixes #4367) The message processing pipeline had a synchronization bug where limitHistoryTurns() truncated conversation history AFTER repairToolUseResultPairing() had already fixed tool_use/tool_result pairings. This could split assistant messages (with tool_use) from their corresponding tool_result blocks, creating orphaned tool_result blocks that the Anthropic API rejects. This fix calls sanitizeToolUseResultPairing() AFTER limitHistoryTurns() to repair any pairings broken by truncation, ensuring the transcript remains valid before being sent to the LLM API. Changes: - Added import for sanitizeToolUseResultPairing from session-transcript-repair.js - Call sanitizeToolUseResultPairing() on the limited message array - Updated variable name from 'limited' to 'repaired' for clarity --- src/agents/pi-embedded-runner/run/attempt.ts | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index e83c3ae4a..622bdb7f4 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -52,6 +52,7 @@ import { import { DEFAULT_BOOTSTRAP_FILENAME } from "../../workspace.js"; import { buildSystemPromptReport } from "../../system-prompt-report.js"; import { resolveDefaultModelForAgent } from "../../model-selection.js"; +import { sanitizeToolUseResultPairing } from "../../session-transcript-repair.js"; import { isAbortError } from "../abort.js"; import { buildEmbeddedExtensionPaths } from "../extensions.js"; @@ -535,9 +536,11 @@ export async function runEmbeddedAttempt( validated, getDmHistoryLimitFromSessionKey(params.sessionKey, params.config), ); - cacheTrace?.recordStage("session:limited", { messages: limited }); - if (limited.length > 0) { - activeSession.agent.replaceMessages(limited); + // Fix: Repair tool_use/tool_result pairings AFTER truncation (issue #4367) + const repaired = sanitizeToolUseResultPairing(limited); + cacheTrace?.recordStage("session:limited", { messages: repaired }); + if (repaired.length > 0) { + activeSession.agent.replaceMessages(repaired); } } catch (err) { sessionManager.flushPendingToolResults?.(); From 080a4dbd86be207efaac17604879ff9209bb7e12 Mon Sep 17 00:00:00 2001 From: spiceoogway Date: Fri, 30 Jan 2026 00:26:18 -0500 Subject: [PATCH 2/3] Improve subagent error messages with categorization and hints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Enhanced SubagentRunOutcome type with errorType and errorHint fields - Added categorizeError() helper to classify common error patterns: * File system errors (ENOENT, EACCES, etc.) * API/model errors (rate limits, auth failures, invalid requests) * Network errors (connection refused, DNS failures) * Timeout errors * Configuration errors (missing credentials, quota limits) - Updated error emission in agent-runner-execution.ts to categorize errors - Updated subagent-registry.ts to capture and propagate new error fields - Added buildErrorStatusLabel() helper for user-friendly error messages - Error announcements now include error type and remediation hints Example improved messages: - Before: 'failed: unknown error' - After: 'failed (tool error): ENOENT — File or directory not found' This makes subagent failures much easier to understand and debug while maintaining backward compatibility. --- src/agents/subagent-announce.ts | 39 ++++++++- src/agents/subagent-registry.ts | 11 ++- .../reply/agent-runner-execution.ts | 84 ++++++++++++++++++- 3 files changed, 131 insertions(+), 3 deletions(-) diff --git a/src/agents/subagent-announce.ts b/src/agents/subagent-announce.ts index 444726efc..46d902e80 100644 --- a/src/agents/subagent-announce.ts +++ b/src/agents/subagent-announce.ts @@ -192,6 +192,41 @@ async function maybeQueueSubagentAnnounce(params: { return "none"; } +/** + * Build a descriptive error status label from outcome data. + * Includes error type, message, and hint if available. + */ +function buildErrorStatusLabel(outcome: SubagentRunOutcome): string { + const parts: string[] = []; + + // Start with "failed" + parts.push("failed"); + + // Add error type context + if (outcome.errorType) { + const typeLabel: Record = { + model: "API error", + tool: "tool error", + network: "network error", + config: "configuration error", + timeout: "timeout", + }; + const label = typeLabel[outcome.errorType] || "error"; + parts.push(`(${label}):`); + } + + // Add error message + const errorMsg = outcome.error || "unknown error"; + parts.push(errorMsg); + + // Add hint if available + if (outcome.errorHint) { + parts.push(`— ${outcome.errorHint}`); + } + + return parts.join(" "); +} + async function buildSubagentStatsLine(params: { sessionKey: string; startedAt?: number; @@ -299,6 +334,8 @@ export function buildSubagentSystemPrompt(params: { export type SubagentRunOutcome = { status: "ok" | "error" | "timeout" | "unknown"; error?: string; + errorType?: "model" | "tool" | "network" | "config" | "timeout" | "unknown"; + errorHint?: string; }; export async function runSubagentAnnounceFlow(params: { @@ -380,7 +417,7 @@ export async function runSubagentAnnounceFlow(params: { : outcome.status === "timeout" ? "timed out" : outcome.status === "error" - ? `failed: ${outcome.error || "unknown error"}` + ? buildErrorStatusLabel(outcome) : "finished with unknown status"; // Build instructional message for main agent diff --git a/src/agents/subagent-registry.ts b/src/agents/subagent-registry.ts index d325e40e2..dca685b72 100644 --- a/src/agents/subagent-registry.ts +++ b/src/agents/subagent-registry.ts @@ -184,7 +184,16 @@ function ensureListener() { entry.endedAt = endedAt; if (phase === "error") { const error = typeof evt.data?.error === "string" ? (evt.data.error as string) : undefined; - entry.outcome = { status: "error", error }; + const errorType = + typeof evt.data?.errorType === "string" ? (evt.data.errorType as string) : undefined; + const errorHint = + typeof evt.data?.errorHint === "string" ? (evt.data.errorHint as string) : undefined; + entry.outcome = { + status: "error", + error, + errorType: errorType as SubagentRunOutcome["errorType"], + errorHint, + }; } else { entry.outcome = { status: "ok" }; } diff --git a/src/auto-reply/reply/agent-runner-execution.ts b/src/auto-reply/reply/agent-runner-execution.ts index 21732f49f..36305bc68 100644 --- a/src/auto-reply/reply/agent-runner-execution.ts +++ b/src/auto-reply/reply/agent-runner-execution.ts @@ -51,6 +51,85 @@ export type AgentRunLoopResult = } | { kind: "final"; payload: ReplyPayload }; +/** + * Categorize errors to provide better error messages to users. + * Returns error message, type, and optional hint for remediation. + */ +function categorizeError(err: unknown): { + message: string; + type: "model" | "tool" | "network" | "config" | "timeout" | "unknown"; + hint?: string; +} { + const message = err instanceof Error ? err.message : String(err); + + // File system errors + if (message.includes("ENOENT") || message.includes("ENOTDIR")) { + return { message, type: "tool", hint: "File or directory not found" }; + } + if (message.includes("EACCES") || message.includes("EPERM")) { + return { message, type: "tool", hint: "Permission denied" }; + } + if (message.includes("EISDIR")) { + return { message, type: "tool", hint: "Expected file but found directory" }; + } + + // API/Model errors + if (message.includes("rate limit") || message.includes("429")) { + return { message, type: "model", hint: "Rate limit exceeded - retry in a few moments" }; + } + if ( + message.includes("401") || + message.includes("unauthorized") || + message.includes("authentication") + ) { + return { message, type: "config", hint: "Check API credentials and permissions" }; + } + if (message.includes("403") || message.includes("forbidden")) { + return { message, type: "config", hint: "Access denied - check permissions" }; + } + if (message.includes("400") || message.includes("invalid request")) { + return { message, type: "model", hint: "Invalid request parameters" }; + } + if (message.includes("500") || message.includes("503")) { + return { message, type: "model", hint: "API service error - try again later" }; + } + if (message.includes("quota") || message.includes("billing")) { + return { message, type: "config", hint: "Check billing and API quota limits" }; + } + + // Network errors + if (message.includes("ECONNREFUSED") || message.includes("ETIMEDOUT")) { + return { message, type: "network", hint: "Connection failed - check network connectivity" }; + } + if (message.includes("ENOTFOUND") || message.includes("DNS") || message.includes("EAI_AGAIN")) { + return { message, type: "network", hint: "DNS resolution failed - check hostname" }; + } + if (message.includes("ENETUNREACH") || message.includes("EHOSTUNREACH")) { + return { message, type: "network", hint: "Network unreachable - check connection" }; + } + + // Timeout errors + if ( + message.toLowerCase().includes("timeout") || + message.toLowerCase().includes("timed out") || + message.includes("ETIMEDOUT") + ) { + return { message, type: "timeout", hint: "Operation took too long - try increasing timeout" }; + } + + // Context/memory errors + if (message.includes("context") && message.includes("too large")) { + return { message, type: "model", hint: "Conversation too long - try clearing history" }; + } + + // Missing environment/config + if (message.includes("missing") && (message.includes("key") || message.includes("token"))) { + return { message, type: "config", hint: "Missing required configuration or credentials" }; + } + + return { message, type: "unknown" }; +} + export async function runAgentTurnWithFallback(params: { commandBody: string; followupRun: FollowupRun; @@ -204,6 +283,7 @@ export async function runAgentTurnWithFallback(params: { return result; }) .catch((err) => { + const { message, type, hint } = categorizeError(err); emitAgentEvent({ runId, stream: "lifecycle", @@ -211,7 +291,9 @@ export async function runAgentTurnWithFallback(params: { phase: "error", startedAt, endedAt: Date.now(), - error: err instanceof Error ? err.message : String(err), + error: message, + errorType: type, + errorHint: hint, }, }); throw err; From d03adcc6a29f0392c54d1e54a07e6a11a89010c2 Mon Sep 17 00:00:00 2001 From: spiceoogway Date: Fri, 30 Jan 2026 02:10:09 -0500 Subject: [PATCH 3/3] test: add vitest tests for slug generator model resolution Verifies that generateSlugViaLLM respects the configured default model instead of using hardcoded Opus. Tests cover: - Model resolution from agents.defaults.model.primary - Custom provider support (e.g., OpenRouter) - Fallback to DEFAULT_MODEL when no config - Alternative models (Haiku, Sonnet) - Slug generation and cleanup logic - Parameter passing to runEmbeddedPiAgent All tests use mocked runEmbeddedPiAgent to avoid actual LLM calls. Relates to #4315 --- src/hooks/llm-slug-generator.test.ts | 313 +++++++++++++++++++++++++++ 1 file changed, 313 insertions(+) create mode 100644 src/hooks/llm-slug-generator.test.ts diff --git a/src/hooks/llm-slug-generator.test.ts b/src/hooks/llm-slug-generator.test.ts new file mode 100644 index 000000000..ba65ee6c3 --- /dev/null +++ b/src/hooks/llm-slug-generator.test.ts @@ -0,0 +1,313 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; +import { generateSlugViaLLM } from "./llm-slug-generator.js"; +import type { OpenClawConfig } from "../config/config.js"; +import type { EmbeddedPiRunResult } from "../agents/pi-embedded.js"; + +const runEmbeddedPiAgentMock = vi.fn< + Parameters, + Promise +>(); + +vi.mock("../agents/pi-embedded.js", () => ({ + runEmbeddedPiAgent: (...args: unknown[]) => runEmbeddedPiAgentMock(...args), +})); + +describe("generateSlugViaLLM", () => { + beforeEach(() => { + runEmbeddedPiAgentMock.mockReset(); + }); + + describe("model resolution", () => { + it("uses configured default model from agents.defaults.model.primary", async () => { + // Arrange: Config with custom default model (Sonnet instead of Opus) + const cfg: OpenClawConfig = { + agents: { + defaults: { + model: { + primary: "anthropic/claude-sonnet-4-5", + }, + }, + }, + }; + + runEmbeddedPiAgentMock.mockResolvedValue({ + payloads: [{ text: "test-slug" }], + meta: { runId: "test-run", sessionId: "test-session" }, + }); + + // Act + await generateSlugViaLLM({ + sessionContent: "Some conversation about testing", + cfg, + }); + + // Assert: Should use Sonnet, not hardcoded Opus + expect(runEmbeddedPiAgentMock).toHaveBeenCalledTimes(1); + const callArgs = runEmbeddedPiAgentMock.mock.calls[0]?.[0]; + expect(callArgs).toMatchObject({ + provider: "anthropic", + model: "claude-sonnet-4-5", + config: cfg, + }); + }); + + it("uses configured default model with custom provider", async () => { + // Arrange: Config with OpenRouter model + const cfg: OpenClawConfig = { + agents: { + defaults: { + model: { + primary: "openrouter/anthropic/claude-3.5-sonnet", + }, + }, + }, + }; + + runEmbeddedPiAgentMock.mockResolvedValue({ + payloads: [{ text: "custom-provider-slug" }], + meta: { runId: "test-run", sessionId: "test-session" }, + }); + + // Act + await generateSlugViaLLM({ + sessionContent: "Testing custom provider", + cfg, + }); + + // Assert: Should use OpenRouter provider + expect(runEmbeddedPiAgentMock).toHaveBeenCalledTimes(1); + const callArgs = runEmbeddedPiAgentMock.mock.calls[0]?.[0]; + expect(callArgs).toMatchObject({ + provider: "openrouter", + model: "anthropic/claude-3.5-sonnet", + config: cfg, + }); + }); + + it("falls back to DEFAULT_MODEL when no model.primary is configured", async () => { + // Arrange: Empty config (no custom model) + const cfg: OpenClawConfig = {}; + + runEmbeddedPiAgentMock.mockResolvedValue({ + payloads: [{ text: "default-model-slug" }], + meta: { runId: "test-run", sessionId: "test-session" }, + }); + + // Act + await generateSlugViaLLM({ + sessionContent: "Testing default fallback", + cfg, + }); + + // Assert: Should fall back to DEFAULT_MODEL (claude-opus-4-5) + expect(runEmbeddedPiAgentMock).toHaveBeenCalledTimes(1); + const callArgs = runEmbeddedPiAgentMock.mock.calls[0]?.[0]; + expect(callArgs).toMatchObject({ + provider: "anthropic", + model: "claude-opus-4-5", + config: cfg, + }); + }); + + it("uses Haiku when configured as default model", async () => { + // Arrange: Config with Haiku as default + const cfg: OpenClawConfig = { + agents: { + defaults: { + model: { + primary: "anthropic/claude-3-5-haiku-20241022", + }, + }, + }, + }; + + runEmbeddedPiAgentMock.mockResolvedValue({ + payloads: [{ text: "haiku-slug" }], + meta: { runId: "test-run", sessionId: "test-session" }, + }); + + // Act + await generateSlugViaLLM({ + sessionContent: "Budget-friendly slug generation", + cfg, + }); + + // Assert: Should use Haiku + expect(runEmbeddedPiAgentMock).toHaveBeenCalledTimes(1); + const callArgs = runEmbeddedPiAgentMock.mock.calls[0]?.[0]; + expect(callArgs).toMatchObject({ + provider: "anthropic", + model: "claude-3-5-haiku-20241022", + config: cfg, + }); + }); + }); + + describe("slug generation", () => { + it("generates valid slug from LLM response", async () => { + const cfg: OpenClawConfig = {}; + + runEmbeddedPiAgentMock.mockResolvedValue({ + payloads: [{ text: "api-design" }], + meta: { runId: "test-run", sessionId: "test-session" }, + }); + + const slug = await generateSlugViaLLM({ + sessionContent: "Discussion about API design patterns", + cfg, + }); + + expect(slug).toBe("api-design"); + }); + + it("cleans up LLM response with extra characters", async () => { + const cfg: OpenClawConfig = {}; + + runEmbeddedPiAgentMock.mockResolvedValue({ + payloads: [{ text: " Bug Fix! " }], + meta: { runId: "test-run", sessionId: "test-session" }, + }); + + const slug = await generateSlugViaLLM({ + sessionContent: "Bug fix discussion", + cfg, + }); + + expect(slug).toBe("bug-fix"); + }); + + it("truncates long slugs to 30 characters", async () => { + const cfg: OpenClawConfig = {}; + + runEmbeddedPiAgentMock.mockResolvedValue({ + payloads: [{ text: "this-is-a-very-long-slug-that-should-be-truncated-significantly" }], + meta: { runId: "test-run", sessionId: "test-session" }, + }); + + const slug = await generateSlugViaLLM({ + sessionContent: "Long discussion", + cfg, + }); + + // Verify length is enforced (implementation slices before final cleanup) + expect(slug?.length).toBeLessThanOrEqual(30); + expect(slug).toMatch(/^this-is-a-very-long-slug/); + }); + + it("returns null when LLM returns no payloads", async () => { + const cfg: OpenClawConfig = {}; + + runEmbeddedPiAgentMock.mockResolvedValue({ + payloads: [], + meta: { runId: "test-run", sessionId: "test-session" }, + }); + + const slug = await generateSlugViaLLM({ + sessionContent: "Test content", + cfg, + }); + + expect(slug).toBeNull(); + }); + + it("returns null when LLM returns empty text", async () => { + const cfg: OpenClawConfig = {}; + + runEmbeddedPiAgentMock.mockResolvedValue({ + payloads: [{ text: "" }], + meta: { runId: "test-run", sessionId: "test-session" }, + }); + + const slug = await generateSlugViaLLM({ + sessionContent: "Test content", + cfg, + }); + + expect(slug).toBeNull(); + }); + + it("returns null when embedded agent throws error", async () => { + const cfg: OpenClawConfig = {}; + + runEmbeddedPiAgentMock.mockRejectedValue(new Error("LLM timeout")); + + const slug = await generateSlugViaLLM({ + sessionContent: "Test content", + cfg, + }); + + expect(slug).toBeNull(); + }); + }); + + describe("embedded agent parameters", () => { + it("passes correct parameters to runEmbeddedPiAgent", async () => { + const cfg: OpenClawConfig = { + agents: { + defaults: { + model: { + primary: "anthropic/claude-sonnet-4-5", + }, + }, + }, + }; + + runEmbeddedPiAgentMock.mockResolvedValue({ + payloads: [{ text: "test-slug" }], + meta: { runId: "test-run", sessionId: "test-session" }, + }); + + await generateSlugViaLLM({ + sessionContent: "Testing parameters", + cfg, + }); + + expect(runEmbeddedPiAgentMock).toHaveBeenCalledTimes(1); + const callArgs = runEmbeddedPiAgentMock.mock.calls[0]?.[0]; + + // Verify all expected parameters are present + expect(callArgs).toMatchObject({ + config: cfg, + provider: "anthropic", + model: "claude-sonnet-4-5", + timeoutMs: 15_000, + }); + + // Verify prompt includes session content + expect(callArgs?.prompt).toContain("Testing parameters"); + expect(callArgs?.prompt).toContain("1-2 word filename slug"); + + // Verify session parameters + expect(callArgs?.sessionId).toMatch(/^slug-generator-\d+$/); + expect(callArgs?.sessionKey).toBe("temp:slug-generator"); + expect(callArgs?.runId).toMatch(/^slug-gen-\d+$/); + + // Verify temp session file path + expect(callArgs?.sessionFile).toContain("openclaw-slug-"); + expect(callArgs?.sessionFile).toContain("session.jsonl"); + }); + + it("truncates session content to 2000 characters in prompt", async () => { + const cfg: OpenClawConfig = {}; + const longContent = "a".repeat(3000); + + runEmbeddedPiAgentMock.mockResolvedValue({ + payloads: [{ text: "truncated-slug" }], + meta: { runId: "test-run", sessionId: "test-session" }, + }); + + await generateSlugViaLLM({ + sessionContent: longContent, + cfg, + }); + + const callArgs = runEmbeddedPiAgentMock.mock.calls[0]?.[0]; + const promptContentMatch = callArgs?.prompt.match( + /Conversation summary:\n([\s\S]+)\n\nReply/, + ); + const extractedContent = promptContentMatch?.[1] ?? ""; + + expect(extractedContent.length).toBeLessThanOrEqual(2000); + }); + }); +});