From 97805e63bed4669cc1b254eb168483a6745e450e Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Fri, 23 Jan 2026 00:10:05 +0000 Subject: [PATCH] fix: keep session token totals in sync (#1440) (thanks @robbyczgw-cla) --- CHANGELOG.md | 1 + src/agents/pi-embedded-runner/compact.ts | 21 +++++++----- src/agents/subagent-registry.ts | 3 +- src/auto-reply/reply.block-streaming.test.ts | 2 +- src/auto-reply/reply/session-updates.test.ts | 36 +++++++++++++++++++- src/auto-reply/reply/session-updates.ts | 9 ++--- 6 files changed, 55 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 18b20f346..033c2c25b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ Docs: https://docs.clawd.bot ### Fixes - BlueBubbles: stop typing indicator on idle/no-reply. (#1439) Thanks @Nicell. - Auto-reply: only report a model switch when session state is available. (#1465) Thanks @robbyczgw-cla. +- Auto-reply: keep cached context token count in sync after compaction. (#1440) Thanks @robbyczgw-cla. - Control UI: resolve local avatar URLs with basePath across injection + identity RPC. (#1457) Thanks @dlauer. - Agents: surface concrete API error details instead of generic AI service errors. - Docs: fix gog auth services example to include docs scope. (#1454) Thanks @zerone0x. diff --git a/src/agents/pi-embedded-runner/compact.ts b/src/agents/pi-embedded-runner/compact.ts index 53b0ae8c0..e310ffb01 100644 --- a/src/agents/pi-embedded-runner/compact.ts +++ b/src/agents/pi-embedded-runner/compact.ts @@ -375,19 +375,24 @@ export async function compactEmbeddedPiSession(params: { session.agent.replaceMessages(limited); } const result = await session.compact(params.customInstructions); - // Estimate tokens after compaction by summing token estimates for remaining messages + // Estimate tokens after compaction with the same context-usage heuristics. let tokensAfter: number | undefined; try { - tokensAfter = 0; - for (const message of session.messages) { - tokensAfter += estimateTokens(message); + const usage = + typeof session.getContextUsage === "function" + ? session.getContextUsage() + : undefined; + let estimate = usage?.tokens; + if (!Number.isFinite(estimate) || !estimate || estimate <= 0) { + estimate = 0; + for (const message of session.messages) { + estimate += estimateTokens(message); + } } - // Sanity check: tokensAfter should be less than tokensBefore - if (tokensAfter > result.tokensBefore) { - tokensAfter = undefined; // Don't trust the estimate + if (Number.isFinite(estimate) && estimate > 0 && estimate <= result.tokensBefore) { + tokensAfter = estimate; } } catch { - // If estimation fails, leave tokensAfter undefined tokensAfter = undefined; } return { diff --git a/src/agents/subagent-registry.ts b/src/agents/subagent-registry.ts index d39bb5fe4..f038ae973 100644 --- a/src/agents/subagent-registry.ts +++ b/src/agents/subagent-registry.ts @@ -31,7 +31,8 @@ const subagentRuns = new Map(); let sweeper: NodeJS.Timeout | null = null; let listenerStarted = false; let listenerStop: (() => void) | null = null; -let restoreAttempted = false; +// Use var to avoid TDZ on circular init paths that can call restoreSubagentRunsOnce early. +var restoreAttempted = false; function persistSubagentRuns() { try { diff --git a/src/auto-reply/reply.block-streaming.test.ts b/src/auto-reply/reply.block-streaming.test.ts index 21b892b46..887512e16 100644 --- a/src/auto-reply/reply.block-streaming.test.ts +++ b/src/auto-reply/reply.block-streaming.test.ts @@ -42,7 +42,7 @@ describe("block streaming", () => { }); async function waitForCalls(fn: () => number, calls: number) { - const deadline = Date.now() + 1500; + const deadline = Date.now() + 15000; while (fn() < calls) { if (Date.now() > deadline) { throw new Error(`Expected ${calls} call(s), got ${fn()}`); diff --git a/src/auto-reply/reply/session-updates.test.ts b/src/auto-reply/reply/session-updates.test.ts index d673e2b4f..57fb737f4 100644 --- a/src/auto-reply/reply/session-updates.test.ts +++ b/src/auto-reply/reply/session-updates.test.ts @@ -2,7 +2,7 @@ import { describe, expect, it, vi } from "vitest"; import type { ClawdbotConfig } from "../../config/config.js"; import { enqueueSystemEvent, resetSystemEventsForTest } from "../../infra/system-events.js"; -import { prependSystemEvents } from "./session-updates.js"; +import { incrementCompactionCount, prependSystemEvents } from "./session-updates.js"; describe("prependSystemEvents", () => { it("adds a local timestamp to queued system events by default", async () => { @@ -29,3 +29,37 @@ describe("prependSystemEvents", () => { vi.useRealTimers(); }); }); + +describe("incrementCompactionCount", () => { + it("updates cached total tokens after compaction without clearing input/output", async () => { + const sessionKey = "agent:main:main"; + const sessionStore = { + [sessionKey]: { + sessionId: "s1", + updatedAt: 10, + compactionCount: 1, + totalTokens: 9_000, + inputTokens: 111, + outputTokens: 222, + }, + }; + const now = 1234; + + const nextCount = await incrementCompactionCount({ + sessionEntry: sessionStore[sessionKey], + sessionStore, + sessionKey, + now, + tokensAfter: 2_000, + }); + + expect(nextCount).toBe(2); + expect(sessionStore[sessionKey]).toMatchObject({ + compactionCount: 2, + totalTokens: 2_000, + inputTokens: 111, + outputTokens: 222, + updatedAt: now, + }); + }); +}); diff --git a/src/auto-reply/reply/session-updates.ts b/src/auto-reply/reply/session-updates.ts index 970a714d0..f525fd7e6 100644 --- a/src/auto-reply/reply/session-updates.ts +++ b/src/auto-reply/reply/session-updates.ts @@ -237,7 +237,7 @@ export async function incrementCompactionCount(params: { sessionKey?: string; storePath?: string; now?: number; - /** Token count after compaction - if provided, updates session token counts */ + /** Token count after compaction - if provided, updates cached context usage */ tokensAfter?: number; }): Promise { const { @@ -252,17 +252,14 @@ export async function incrementCompactionCount(params: { const entry = sessionStore[sessionKey] ?? sessionEntry; if (!entry) return undefined; const nextCount = (entry.compactionCount ?? 0) + 1; - // Build update payload with compaction count and optionally updated token counts + // Build update payload with compaction count and optionally updated context usage. const updates: Partial = { compactionCount: nextCount, updatedAt: now, }; - // If tokensAfter is provided, update the cached token counts to reflect post-compaction state + // If tokensAfter is provided, update the cached total to reflect post-compaction context size. if (tokensAfter != null && tokensAfter > 0) { updates.totalTokens = tokensAfter; - // Clear input/output breakdown since we only have the total estimate after compaction - updates.inputTokens = undefined; - updates.outputTokens = undefined; } sessionStore[sessionKey] = { ...entry,