Compare commits

...

4 Commits

Author SHA1 Message Date
Peter Steinberger
adbc4d093e fix: handle repeated compaction failures (#1187) (thanks @fayrose) 2026-01-18 22:02:22 +00:00
Lauren Rosenberg
0c1ac41085 fix: add reserveTokensFloor suggestion to compaction error messages
When context limit is exceeded, the error message now suggests
setting agents.defaults.compaction.reserveTokensFloor to 4000
or higher to prevent future occurrences.
2026-01-18 21:54:04 +00:00
Lauren Rosenberg
a8c204e9fa style: apply prettier formatting 2026-01-18 21:54:04 +00:00
Lauren Rosenberg
1ac6f3ce92 fix: return user-facing error when session reset after compaction failure
Previously, when auto-compaction failed due to context overflow, the system
would reset the session and silently continue the execution loop without
sending any response to the user. This made it appear as if messages were
being ignored.

This change ensures users receive a clear error message explaining that
the context limit was exceeded and the conversation has been reset,
consistent with how role ordering conflicts are already handled.

Fixes the silent failure case where message + compaction exceeds context limits.
2026-01-18 21:54:04 +00:00
4 changed files with 164 additions and 42 deletions

View File

@ -10,6 +10,7 @@ Docs: https://docs.clawd.bot
### Fixes
- Configure: hide OpenRouter auto routing model from the model picker. (#1182) — thanks @zerone0x.
- Agents: retry compaction reset once, then surface a user-facing error on repeat failure. (#1187) — thanks @fayrose.
- macOS: load menu session previews asynchronously so items populate while the menu is open.
- macOS: use label colors for session preview text so previews render in menu subviews.

View File

@ -94,7 +94,7 @@ describe("buildAgentSystemPrompt", () => {
expect(prompt).toContain("- Read: Read file contents");
expect(prompt).toContain("- Exec: Run shell commands");
expect(prompt).toContain(
"Use `Read` to load the SKILL.md at the location listed for that skill.",
"- If exactly one skill clearly applies: read its SKILL.md at <location> with `Read`, then follow it.",
);
expect(prompt).toContain("Clawdbot docs: /tmp/clawd/docs");
expect(prompt).toContain(
@ -188,7 +188,7 @@ describe("buildAgentSystemPrompt", () => {
expect(prompt).toContain("## Skills");
expect(prompt).toContain(
"Use `read` to load the SKILL.md at the location listed for that skill.",
"- If exactly one skill clearly applies: read its SKILL.md at <location> with `read`, then follow it.",
);
});

View File

@ -18,7 +18,10 @@ import {
updateSessionStore,
} from "../../config/sessions.js";
import { logVerbose } from "../../globals.js";
import { emitAgentEvent, registerAgentRunContext } from "../../infra/agent-events.js";
import {
emitAgentEvent,
registerAgentRunContext,
} from "../../infra/agent-events.js";
import { defaultRuntime } from "../../runtime.js";
import {
isMarkdownCapableMessageChannel,
@ -29,11 +32,20 @@ import type { TemplateContext } from "../templating.js";
import type { VerboseLevel } from "../thinking.js";
import { isSilentReplyText, SILENT_REPLY_TOKEN } from "../tokens.js";
import type { GetReplyOptions, ReplyPayload } from "../types.js";
import { buildThreadingToolContext, resolveEnforceFinalTag } from "./agent-runner-utils.js";
import { createBlockReplyPayloadKey, type BlockReplyPipeline } from "./block-reply-pipeline.js";
import {
buildThreadingToolContext,
resolveEnforceFinalTag,
} from "./agent-runner-utils.js";
import {
createBlockReplyPayloadKey,
type BlockReplyPipeline,
} from "./block-reply-pipeline.js";
import type { FollowupRun } from "./queue.js";
import { parseReplyDirectives } from "./reply-directives.js";
import { applyReplyTagsToPayload, isRenderablePayload } from "./reply-payloads.js";
import {
applyReplyTagsToPayload,
isRenderablePayload,
} from "./reply-payloads.js";
import type { TypingSignaler } from "./typing-mode.js";
export type AgentRunLoopResult =
@ -96,9 +108,12 @@ export async function runAgentTurnWithFallback(params: {
while (true) {
try {
const allowPartialStream = !(
params.followupRun.run.reasoningLevel === "stream" && params.opts?.onReasoningStream
params.followupRun.run.reasoningLevel === "stream" &&
params.opts?.onReasoningStream
);
const normalizeStreamingText = (payload: ReplyPayload): { text?: string; skip: boolean } => {
const normalizeStreamingText = (
payload: ReplyPayload,
): { text?: string; skip: boolean } => {
if (!allowPartialStream) return { skip: true };
let text = payload.text;
if (!params.isHeartbeat && text?.includes("HEARTBEAT_OK")) {
@ -122,7 +137,9 @@ export async function runAgentTurnWithFallback(params: {
if (!sanitized.trim()) return { skip: true };
return { text: sanitized, skip: false };
};
const handlePartialForTyping = async (payload: ReplyPayload): Promise<string | undefined> => {
const handlePartialForTyping = async (
payload: ReplyPayload,
): Promise<string | undefined> => {
const { text, skip } = normalizeStreamingText(payload);
if (skip || !text) return undefined;
await params.typingSignals.signalTextDelta(text);
@ -157,7 +174,10 @@ export async function runAgentTurnWithFallback(params: {
startedAt,
},
});
const cliSessionId = getCliSessionId(params.getActiveSessionEntry(), provider);
const cliSessionId = getCliSessionId(
params.getActiveSessionEntry(),
provider,
);
return runCliAgent({
sessionId: params.followupRun.run.sessionId,
sessionKey: params.sessionKey,
@ -207,7 +227,8 @@ export async function runAgentTurnWithFallback(params: {
return runEmbeddedPiAgent({
sessionId: params.followupRun.run.sessionId,
sessionKey: params.sessionKey,
messageProvider: params.sessionCtx.Provider?.trim().toLowerCase() || undefined,
messageProvider:
params.sessionCtx.Provider?.trim().toLowerCase() || undefined,
agentAccountId: params.sessionCtx.AccountId,
// Provider threading context for tool auto-injection
...buildThreadingToolContext({
@ -223,7 +244,10 @@ export async function runAgentTurnWithFallback(params: {
prompt: params.commandBody,
extraSystemPrompt: params.followupRun.run.extraSystemPrompt,
ownerNumbers: params.followupRun.run.ownerNumbers,
enforceFinalTag: resolveEnforceFinalTag(params.followupRun.run, provider),
enforceFinalTag: resolveEnforceFinalTag(
params.followupRun.run,
provider,
),
provider,
model,
authProfileId,
@ -240,7 +264,9 @@ export async function runAgentTurnWithFallback(params: {
params.sessionCtx.Provider,
);
if (!channel) return "markdown";
return isMarkdownCapableMessageChannel(channel) ? "markdown" : "plain";
return isMarkdownCapableMessageChannel(channel)
? "markdown"
: "plain";
})(),
bashElevated: params.followupRun.run.bashElevated,
timeoutMs: params.followupRun.run.timeoutMs,
@ -250,7 +276,11 @@ export async function runAgentTurnWithFallback(params: {
onPartialReply: allowPartialStream
? async (payload) => {
const textForTyping = await handlePartialForTyping(payload);
if (!params.opts?.onPartialReply || textForTyping === undefined) return;
if (
!params.opts?.onPartialReply ||
textForTyping === undefined
)
return;
await params.opts.onPartialReply({
text: textForTyping,
mediaUrls: payload.mediaUrls,
@ -261,7 +291,8 @@ export async function runAgentTurnWithFallback(params: {
await params.typingSignals.signalMessageStart();
},
onReasoningStream:
params.typingSignals.shouldStartOnReasoning || params.opts?.onReasoningStream
params.typingSignals.shouldStartOnReasoning ||
params.opts?.onReasoningStream
? async (payload) => {
await params.typingSignals.signalReasoningDelta();
await params.opts?.onReasoningStream?.({
@ -274,14 +305,16 @@ export async function runAgentTurnWithFallback(params: {
// Trigger typing when tools start executing.
// Must await to ensure typing indicator starts before tool summaries are emitted.
if (evt.stream === "tool") {
const phase = typeof evt.data.phase === "string" ? evt.data.phase : "";
const phase =
typeof evt.data.phase === "string" ? evt.data.phase : "";
if (phase === "start" || phase === "update") {
await params.typingSignals.signalToolStart();
}
}
// Track auto-compaction completion
if (evt.stream === "compaction") {
const phase = typeof evt.data.phase === "string" ? evt.data.phase : "";
const phase =
typeof evt.data.phase === "string" ? evt.data.phase : "";
const willRetry = Boolean(evt.data.willRetry);
if (phase === "end" && !willRetry) {
autoCompactionCompleted = true;
@ -305,14 +338,22 @@ export async function runAgentTurnWithFallback(params: {
params.sessionCtx.MessageSid,
);
// Let through payloads with audioAsVoice flag even if empty (need to track it)
if (!isRenderablePayload(taggedPayload) && !payload.audioAsVoice) return;
const parsed = parseReplyDirectives(taggedPayload.text ?? "", {
currentMessageId: params.sessionCtx.MessageSid,
silentToken: SILENT_REPLY_TOKEN,
});
if (
!isRenderablePayload(taggedPayload) &&
!payload.audioAsVoice
)
return;
const parsed = parseReplyDirectives(
taggedPayload.text ?? "",
{
currentMessageId: params.sessionCtx.MessageSid,
silentToken: SILENT_REPLY_TOKEN,
},
);
const cleaned = parsed.text || undefined;
const hasRenderableMedia =
Boolean(taggedPayload.mediaUrl) || (taggedPayload.mediaUrls?.length ?? 0) > 0;
Boolean(taggedPayload.mediaUrl) ||
(taggedPayload.mediaUrls?.length ?? 0) > 0;
// Skip empty payloads unless they have audioAsVoice flag (need to track it)
if (
!cleaned &&
@ -326,25 +367,35 @@ export async function runAgentTurnWithFallback(params: {
const blockPayload: ReplyPayload = params.applyReplyToMode({
...taggedPayload,
text: cleaned,
audioAsVoice: Boolean(parsed.audioAsVoice || payload.audioAsVoice),
audioAsVoice: Boolean(
parsed.audioAsVoice || payload.audioAsVoice,
),
replyToId: taggedPayload.replyToId ?? parsed.replyToId,
replyToTag: taggedPayload.replyToTag || parsed.replyToTag,
replyToCurrent: taggedPayload.replyToCurrent || parsed.replyToCurrent,
replyToCurrent:
taggedPayload.replyToCurrent || parsed.replyToCurrent,
});
void params.typingSignals
.signalTextDelta(cleaned ?? taggedPayload.text)
.catch((err) => {
logVerbose(`block reply typing signal failed: ${String(err)}`);
logVerbose(
`block reply typing signal failed: ${String(err)}`,
);
});
// Use pipeline if available (block streaming enabled), otherwise send directly
if (params.blockStreamingEnabled && params.blockReplyPipeline) {
if (
params.blockStreamingEnabled &&
params.blockReplyPipeline
) {
params.blockReplyPipeline.enqueue(blockPayload);
} else {
// Send directly when flushing before tool execution (no streaming).
// Track sent key to avoid duplicate in final payloads.
directlySentBlockKeys.add(createBlockReplyPayloadKey(blockPayload));
directlySentBlockKeys.add(
createBlockReplyPayloadKey(blockPayload),
);
await params.opts?.onBlockReply?.(blockPayload);
}
}
@ -390,17 +441,27 @@ export async function runAgentTurnWithFallback(params: {
// Some embedded runs surface context overflow as an error payload instead of throwing.
// Treat those as a session-level failure and auto-recover by starting a fresh session.
const embeddedError = runResult.meta?.error;
if (
embeddedError &&
isContextOverflowError(embeddedError.message) &&
!didResetAfterCompactionFailure &&
(await params.resetSessionAfterCompactionFailure(embeddedError.message))
) {
didResetAfterCompactionFailure = true;
continue;
if (embeddedError && isContextOverflowError(embeddedError.message)) {
if (
!didResetAfterCompactionFailure &&
(await params.resetSessionAfterCompactionFailure(embeddedError.message))
) {
didResetAfterCompactionFailure = true;
continue;
}
if (didResetAfterCompactionFailure) {
return {
kind: "final",
payload: {
text: "⚠️ Context limit exceeded. I've reset our conversation to start fresh - please try again.\n\nTo prevent this, increase your compaction buffer by setting `agents.defaults.compaction.reserveTokensFloor` to 20000 or higher in your config.",
},
};
}
}
if (embeddedError?.kind === "role_ordering") {
const didReset = await params.resetSessionAfterRoleOrderingConflict(embeddedError.message);
const didReset = await params.resetSessionAfterRoleOrderingConflict(
embeddedError.message,
);
if (didReset) {
return {
kind: "final",
@ -418,8 +479,10 @@ export async function runAgentTurnWithFallback(params: {
isContextOverflowError(message) ||
/context.*overflow|too large|context window/i.test(message);
const isCompactionFailure = isCompactionFailureError(message);
const isSessionCorruption = /function call turn comes immediately after/i.test(message);
const isRoleOrderingError = /incorrect role information|roles must alternate/i.test(message);
const isSessionCorruption =
/function call turn comes immediately after/i.test(message);
const isRoleOrderingError =
/incorrect role information|roles must alternate/i.test(message);
if (
isCompactionFailure &&
@ -429,8 +492,17 @@ export async function runAgentTurnWithFallback(params: {
didResetAfterCompactionFailure = true;
continue;
}
if (isCompactionFailure && didResetAfterCompactionFailure) {
return {
kind: "final",
payload: {
text: "⚠️ Context limit exceeded during compaction. I've reset our conversation to start fresh - please try again.\n\nTo prevent this, increase your compaction buffer by setting `agents.defaults.compaction.reserveTokensFloor` to 20000 or higher in your config.",
},
};
}
if (isRoleOrderingError) {
const didReset = await params.resetSessionAfterRoleOrderingConflict(message);
const didReset =
await params.resetSessionAfterRoleOrderingConflict(message);
if (didReset) {
return {
kind: "final",
@ -457,7 +529,8 @@ export async function runAgentTurnWithFallback(params: {
try {
// Delete transcript file if it exists
if (corruptedSessionId) {
const transcriptPath = resolveSessionTranscriptPath(corruptedSessionId);
const transcriptPath =
resolveSessionTranscriptPath(corruptedSessionId);
try {
fs.unlinkSync(transcriptPath);
} catch {
@ -507,6 +580,7 @@ export async function runAgentTurnWithFallback(params: {
fallbackModel,
didLogHeartbeatStrip,
autoCompactionCompleted,
directlySentBlockKeys: directlySentBlockKeys.size > 0 ? directlySentBlockKeys : undefined,
directlySentBlockKeys:
directlySentBlockKeys.size > 0 ? directlySentBlockKeys : undefined,
};
}

View File

@ -172,6 +172,53 @@ describe("runReplyAgent typing (heartbeat)", () => {
}
}
});
it("returns a user-facing error after repeated compaction failure", async () => {
const prevStateDir = process.env.CLAWDBOT_STATE_DIR;
const stateDir = await fs.mkdtemp(path.join(tmpdir(), "clawdbot-session-compaction-repeat-"));
process.env.CLAWDBOT_STATE_DIR = stateDir;
try {
const sessionId = "session";
const storePath = path.join(stateDir, "sessions", "sessions.json");
const transcriptPath = sessions.resolveSessionTranscriptPath(sessionId);
const sessionEntry = { sessionId, updatedAt: Date.now(), sessionFile: transcriptPath };
const sessionStore = { main: sessionEntry };
await fs.mkdir(path.dirname(storePath), { recursive: true });
await fs.writeFile(storePath, JSON.stringify(sessionStore), "utf-8");
await fs.mkdir(path.dirname(transcriptPath), { recursive: true });
await fs.writeFile(transcriptPath, "ok", "utf-8");
runEmbeddedPiAgentMock.mockImplementation(async () => {
throw new Error(
'Context overflow: Summarization failed: 400 {"message":"prompt is too long"}',
);
});
const callsBefore = runEmbeddedPiAgentMock.mock.calls.length;
const { run } = createMinimalRun({
sessionEntry,
sessionStore,
sessionKey: "main",
storePath,
});
const res = await run();
expect(runEmbeddedPiAgentMock.mock.calls.length - callsBefore).toBe(2);
const payload = Array.isArray(res) ? res[0] : res;
expect(payload.text).toContain("Context limit exceeded");
expect(payload.text?.toLowerCase()).toContain("reset");
expect(sessionStore.main.sessionId).not.toBe(sessionId);
const persisted = JSON.parse(await fs.readFile(storePath, "utf-8"));
expect(persisted.main.sessionId).toBe(sessionStore.main.sessionId);
} finally {
if (prevStateDir) {
process.env.CLAWDBOT_STATE_DIR = prevStateDir;
} else {
delete process.env.CLAWDBOT_STATE_DIR;
}
}
});
it("retries after context overflow payload by resetting the session", async () => {
const prevStateDir = process.env.CLAWDBOT_STATE_DIR;
const stateDir = await fs.mkdtemp(path.join(tmpdir(), "clawdbot-session-overflow-reset-"));