feat(compaction): add recency buffer to preserve recent messages during compaction
Adds a configurable recency buffer that preserves the last N messages (or N tokens) in raw form during context compaction, preventing loss of immediate conversational context. Config options (under agents.defaults.compaction.recencyBuffer): - enabled: boolean (default: false) - keepMessages: number (default: 10) - keepTokens: number (default: 2000) When both limits are set, whichever threshold is reached first wins. Closes #3772
This commit is contained in:
parent
5f4715acfc
commit
02b04f7352
@ -79,6 +79,7 @@ export function buildEmbeddedExtensionPaths(params: {
|
|||||||
const compactionCfg = params.cfg?.agents?.defaults?.compaction;
|
const compactionCfg = params.cfg?.agents?.defaults?.compaction;
|
||||||
setCompactionSafeguardRuntime(params.sessionManager, {
|
setCompactionSafeguardRuntime(params.sessionManager, {
|
||||||
maxHistoryShare: compactionCfg?.maxHistoryShare,
|
maxHistoryShare: compactionCfg?.maxHistoryShare,
|
||||||
|
recencyBuffer: compactionCfg?.recencyBuffer,
|
||||||
});
|
});
|
||||||
paths.push(resolvePiExtensionPath("compaction-safeguard"));
|
paths.push(resolvePiExtensionPath("compaction-safeguard"));
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,5 +1,12 @@
|
|||||||
|
export type RecencyBufferConfig = {
|
||||||
|
enabled?: boolean;
|
||||||
|
keepMessages?: number;
|
||||||
|
keepTokens?: number;
|
||||||
|
};
|
||||||
|
|
||||||
export type CompactionSafeguardRuntimeValue = {
|
export type CompactionSafeguardRuntimeValue = {
|
||||||
maxHistoryShare?: number;
|
maxHistoryShare?: number;
|
||||||
|
recencyBuffer?: RecencyBufferConfig;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Session-scoped runtime registry keyed by object identity.
|
// Session-scoped runtime registry keyed by object identity.
|
||||||
|
|||||||
@ -12,6 +12,10 @@ const {
|
|||||||
formatToolFailuresSection,
|
formatToolFailuresSection,
|
||||||
computeAdaptiveChunkRatio,
|
computeAdaptiveChunkRatio,
|
||||||
isOversizedForSummary,
|
isOversizedForSummary,
|
||||||
|
computeRecencyBufferSlice,
|
||||||
|
formatRecencyBufferSection,
|
||||||
|
formatMessageForRecencyBuffer,
|
||||||
|
extractMessageText,
|
||||||
BASE_CHUNK_RATIO,
|
BASE_CHUNK_RATIO,
|
||||||
MIN_CHUNK_RATIO,
|
MIN_CHUNK_RATIO,
|
||||||
SAFETY_MARGIN,
|
SAFETY_MARGIN,
|
||||||
@ -213,6 +217,149 @@ describe("isOversizedForSummary", () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe("extractMessageText", () => {
|
||||||
|
it("extracts text from string content", () => {
|
||||||
|
const msg: AgentMessage = {
|
||||||
|
role: "user",
|
||||||
|
content: "Hello, world!",
|
||||||
|
timestamp: Date.now(),
|
||||||
|
};
|
||||||
|
expect(extractMessageText(msg)).toBe("Hello, world!");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("extracts text from array content blocks", () => {
|
||||||
|
const msg: AgentMessage = {
|
||||||
|
role: "assistant",
|
||||||
|
content: [
|
||||||
|
{ type: "text", text: "First part." },
|
||||||
|
{ type: "text", text: "Second part." },
|
||||||
|
],
|
||||||
|
timestamp: Date.now(),
|
||||||
|
};
|
||||||
|
expect(extractMessageText(msg)).toBe("First part.\nSecond part.");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("returns empty string for non-text content", () => {
|
||||||
|
const msg: AgentMessage = {
|
||||||
|
role: "user",
|
||||||
|
content: [{ type: "image", source: "data:..." }],
|
||||||
|
timestamp: Date.now(),
|
||||||
|
};
|
||||||
|
expect(extractMessageText(msg)).toBe("");
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("formatMessageForRecencyBuffer", () => {
|
||||||
|
it("formats user message with role label", () => {
|
||||||
|
const msg: AgentMessage = {
|
||||||
|
role: "user",
|
||||||
|
content: "What is 2+2?",
|
||||||
|
timestamp: Date.now(),
|
||||||
|
};
|
||||||
|
expect(formatMessageForRecencyBuffer(msg)).toBe("**User:** What is 2+2?");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("formats assistant message with role label", () => {
|
||||||
|
const msg: AgentMessage = {
|
||||||
|
role: "assistant",
|
||||||
|
content: [{ type: "text", text: "The answer is 4." }],
|
||||||
|
timestamp: Date.now(),
|
||||||
|
};
|
||||||
|
expect(formatMessageForRecencyBuffer(msg)).toBe("**Assistant:** The answer is 4.");
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("computeRecencyBufferSlice", () => {
|
||||||
|
const makeMessage = (role: string, content: string): AgentMessage => ({
|
||||||
|
role: role as "user" | "assistant",
|
||||||
|
content,
|
||||||
|
timestamp: Date.now(),
|
||||||
|
});
|
||||||
|
|
||||||
|
it("returns all messages as older when disabled", () => {
|
||||||
|
const messages = [makeMessage("user", "hello"), makeMessage("assistant", "hi")];
|
||||||
|
const result = computeRecencyBufferSlice(messages, { enabled: false });
|
||||||
|
expect(result.recentMessages).toHaveLength(0);
|
||||||
|
expect(result.olderMessages).toHaveLength(2);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("returns all messages as older when config is empty", () => {
|
||||||
|
const messages = [makeMessage("user", "hello"), makeMessage("assistant", "hi")];
|
||||||
|
const result = computeRecencyBufferSlice(messages, {});
|
||||||
|
expect(result.recentMessages).toHaveLength(0);
|
||||||
|
expect(result.olderMessages).toHaveLength(2);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("keeps last N messages when keepMessages is set", () => {
|
||||||
|
const messages = [
|
||||||
|
makeMessage("user", "msg1"),
|
||||||
|
makeMessage("assistant", "msg2"),
|
||||||
|
makeMessage("user", "msg3"),
|
||||||
|
makeMessage("assistant", "msg4"),
|
||||||
|
makeMessage("user", "msg5"),
|
||||||
|
];
|
||||||
|
const result = computeRecencyBufferSlice(messages, { enabled: true, keepMessages: 2 });
|
||||||
|
expect(result.recentMessages).toHaveLength(2);
|
||||||
|
expect(result.olderMessages).toHaveLength(3);
|
||||||
|
expect((result.recentMessages[0] as { content: string }).content).toBe("msg4");
|
||||||
|
expect((result.recentMessages[1] as { content: string }).content).toBe("msg5");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("respects keepTokens limit", () => {
|
||||||
|
// Short messages that fit within token limit
|
||||||
|
const messages = [
|
||||||
|
makeMessage("user", "short"),
|
||||||
|
makeMessage("assistant", "tiny"),
|
||||||
|
makeMessage("user", "x".repeat(10000)), // Large message
|
||||||
|
];
|
||||||
|
// With a small token limit, should only keep the last message if it's under limit
|
||||||
|
const result = computeRecencyBufferSlice(messages, {
|
||||||
|
enabled: true,
|
||||||
|
keepMessages: 10,
|
||||||
|
keepTokens: 100, // Very small token limit
|
||||||
|
});
|
||||||
|
// The large message exceeds the token limit on its own, but we keep at least 1
|
||||||
|
expect(result.recentMessages.length).toBeGreaterThanOrEqual(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("handles empty messages array", () => {
|
||||||
|
const result = computeRecencyBufferSlice([], { enabled: true, keepMessages: 5 });
|
||||||
|
expect(result.recentMessages).toHaveLength(0);
|
||||||
|
expect(result.olderMessages).toHaveLength(0);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("handles case where all messages fit in buffer", () => {
|
||||||
|
const messages = [makeMessage("user", "a"), makeMessage("assistant", "b")];
|
||||||
|
const result = computeRecencyBufferSlice(messages, {
|
||||||
|
enabled: true,
|
||||||
|
keepMessages: 10,
|
||||||
|
keepTokens: 10000,
|
||||||
|
});
|
||||||
|
expect(result.recentMessages).toHaveLength(2);
|
||||||
|
expect(result.olderMessages).toHaveLength(0);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("formatRecencyBufferSection", () => {
|
||||||
|
const makeMessage = (role: string, content: string): AgentMessage => ({
|
||||||
|
role: role as "user" | "assistant",
|
||||||
|
content,
|
||||||
|
timestamp: Date.now(),
|
||||||
|
});
|
||||||
|
|
||||||
|
it("returns empty string for empty messages", () => {
|
||||||
|
expect(formatRecencyBufferSection([])).toBe("");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("formats multiple messages with header", () => {
|
||||||
|
const messages = [makeMessage("user", "hello"), makeMessage("assistant", "hi there")];
|
||||||
|
const result = formatRecencyBufferSection(messages);
|
||||||
|
expect(result).toContain("## Recent Context (preserved verbatim)");
|
||||||
|
expect(result).toContain("**User:** hello");
|
||||||
|
expect(result).toContain("**Assistant:** hi there");
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
describe("compaction-safeguard runtime registry", () => {
|
describe("compaction-safeguard runtime registry", () => {
|
||||||
it("stores and retrieves config by session manager identity", () => {
|
it("stores and retrieves config by session manager identity", () => {
|
||||||
const sm = {};
|
const sm = {};
|
||||||
|
|||||||
@ -1,5 +1,6 @@
|
|||||||
import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
||||||
import type { ExtensionAPI, FileOperations } from "@mariozechner/pi-coding-agent";
|
import type { ExtensionAPI, FileOperations } from "@mariozechner/pi-coding-agent";
|
||||||
|
import { estimateTokens } from "@mariozechner/pi-coding-agent";
|
||||||
import {
|
import {
|
||||||
BASE_CHUNK_RATIO,
|
BASE_CHUNK_RATIO,
|
||||||
MIN_CHUNK_RATIO,
|
MIN_CHUNK_RATIO,
|
||||||
@ -11,7 +12,10 @@ import {
|
|||||||
resolveContextWindowTokens,
|
resolveContextWindowTokens,
|
||||||
summarizeInStages,
|
summarizeInStages,
|
||||||
} from "../compaction.js";
|
} from "../compaction.js";
|
||||||
import { getCompactionSafeguardRuntime } from "./compaction-safeguard-runtime.js";
|
import {
|
||||||
|
getCompactionSafeguardRuntime,
|
||||||
|
type RecencyBufferConfig,
|
||||||
|
} from "./compaction-safeguard-runtime.js";
|
||||||
const FALLBACK_SUMMARY =
|
const FALLBACK_SUMMARY =
|
||||||
"Summary unavailable due to context limits. Older messages were truncated.";
|
"Summary unavailable due to context limits. Older messages were truncated.";
|
||||||
const TURN_PREFIX_INSTRUCTIONS =
|
const TURN_PREFIX_INSTRUCTIONS =
|
||||||
@ -134,6 +138,86 @@ function formatFileOperations(readFiles: string[], modifiedFiles: string[]): str
|
|||||||
return `\n\n${sections.join("\n\n")}`;
|
return `\n\n${sections.join("\n\n")}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract text content from a message for raw preservation in recency buffer.
|
||||||
|
*/
|
||||||
|
function extractMessageText(message: AgentMessage): string {
|
||||||
|
const content = (message as { content?: unknown }).content;
|
||||||
|
if (typeof content === "string") return content;
|
||||||
|
if (Array.isArray(content)) {
|
||||||
|
const parts: string[] = [];
|
||||||
|
for (const block of content) {
|
||||||
|
if (!block || typeof block !== "object") continue;
|
||||||
|
const rec = block as { type?: unknown; text?: unknown };
|
||||||
|
if (rec.type === "text" && typeof rec.text === "string") {
|
||||||
|
parts.push(rec.text);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return parts.join("\n");
|
||||||
|
}
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Format a single message as raw text for the recency buffer section.
|
||||||
|
*/
|
||||||
|
function formatMessageForRecencyBuffer(message: AgentMessage): string {
|
||||||
|
const role = (message as { role?: string }).role ?? "unknown";
|
||||||
|
const text = extractMessageText(message);
|
||||||
|
const roleLabel = role === "user" ? "User" : role === "assistant" ? "Assistant" : role;
|
||||||
|
return `**${roleLabel}:** ${text}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compute the recency buffer slice: messages to keep raw at the end.
|
||||||
|
* Returns { recentMessages, olderMessages } where recentMessages are kept raw.
|
||||||
|
*/
|
||||||
|
function computeRecencyBufferSlice(
|
||||||
|
messages: AgentMessage[],
|
||||||
|
config: RecencyBufferConfig,
|
||||||
|
): { recentMessages: AgentMessage[]; olderMessages: AgentMessage[] } {
|
||||||
|
if (!config.enabled || messages.length === 0) {
|
||||||
|
return { recentMessages: [], olderMessages: messages };
|
||||||
|
}
|
||||||
|
|
||||||
|
const keepMessages = config.keepMessages ?? 10;
|
||||||
|
const keepTokens = config.keepTokens ?? 2000;
|
||||||
|
|
||||||
|
// Walk backwards from the end, counting messages and tokens
|
||||||
|
let recentCount = 0;
|
||||||
|
let recentTokens = 0;
|
||||||
|
|
||||||
|
for (let i = messages.length - 1; i >= 0; i--) {
|
||||||
|
const msgTokens = estimateTokens(messages[i]);
|
||||||
|
|
||||||
|
// Check if adding this message would exceed either limit
|
||||||
|
if (recentCount >= keepMessages) break;
|
||||||
|
if (recentTokens + msgTokens > keepTokens && recentCount > 0) break;
|
||||||
|
|
||||||
|
recentCount++;
|
||||||
|
recentTokens += msgTokens;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (recentCount === 0) {
|
||||||
|
return { recentMessages: [], olderMessages: messages };
|
||||||
|
}
|
||||||
|
|
||||||
|
const splitIndex = messages.length - recentCount;
|
||||||
|
return {
|
||||||
|
recentMessages: messages.slice(splitIndex),
|
||||||
|
olderMessages: messages.slice(0, splitIndex),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Format the recency buffer section to append to the summary.
|
||||||
|
*/
|
||||||
|
function formatRecencyBufferSection(messages: AgentMessage[]): string {
|
||||||
|
if (messages.length === 0) return "";
|
||||||
|
const formatted = messages.map(formatMessageForRecencyBuffer).join("\n\n");
|
||||||
|
return `\n\n---\n\n## Recent Context (preserved verbatim)\n\n${formatted}`;
|
||||||
|
}
|
||||||
|
|
||||||
export default function compactionSafeguardExtension(api: ExtensionAPI): void {
|
export default function compactionSafeguardExtension(api: ExtensionAPI): void {
|
||||||
api.on("session_before_compact", async (event, ctx) => {
|
api.on("session_before_compact", async (event, ctx) => {
|
||||||
const { preparation, customInstructions, signal } = event;
|
const { preparation, customInstructions, signal } = event;
|
||||||
@ -177,6 +261,18 @@ export default function compactionSafeguardExtension(api: ExtensionAPI): void {
|
|||||||
|
|
||||||
const runtime = getCompactionSafeguardRuntime(ctx.sessionManager);
|
const runtime = getCompactionSafeguardRuntime(ctx.sessionManager);
|
||||||
const maxHistoryShare = runtime?.maxHistoryShare ?? 0.5;
|
const maxHistoryShare = runtime?.maxHistoryShare ?? 0.5;
|
||||||
|
const recencyBufferConfig = runtime?.recencyBuffer;
|
||||||
|
|
||||||
|
// Apply recency buffer: slice off recent messages to keep raw
|
||||||
|
let recencyBufferMessages: AgentMessage[] = [];
|
||||||
|
if (recencyBufferConfig?.enabled) {
|
||||||
|
const { recentMessages, olderMessages } = computeRecencyBufferSlice(
|
||||||
|
messagesToSummarize,
|
||||||
|
recencyBufferConfig,
|
||||||
|
);
|
||||||
|
recencyBufferMessages = recentMessages;
|
||||||
|
messagesToSummarize = olderMessages;
|
||||||
|
}
|
||||||
|
|
||||||
const tokensBefore =
|
const tokensBefore =
|
||||||
typeof preparation.tokensBefore === "number" && Number.isFinite(preparation.tokensBefore)
|
typeof preparation.tokensBefore === "number" && Number.isFinite(preparation.tokensBefore)
|
||||||
@ -281,6 +377,8 @@ export default function compactionSafeguardExtension(api: ExtensionAPI): void {
|
|||||||
summary = `${historySummary}\n\n---\n\n**Turn Context (split turn):**\n\n${prefixSummary}`;
|
summary = `${historySummary}\n\n---\n\n**Turn Context (split turn):**\n\n${prefixSummary}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Append recency buffer (raw recent messages) before tool failures and file ops
|
||||||
|
summary += formatRecencyBufferSection(recencyBufferMessages);
|
||||||
summary += toolFailureSection;
|
summary += toolFailureSection;
|
||||||
summary += fileOpsSummary;
|
summary += fileOpsSummary;
|
||||||
|
|
||||||
@ -315,6 +413,10 @@ export const __testing = {
|
|||||||
formatToolFailuresSection,
|
formatToolFailuresSection,
|
||||||
computeAdaptiveChunkRatio,
|
computeAdaptiveChunkRatio,
|
||||||
isOversizedForSummary,
|
isOversizedForSummary,
|
||||||
|
computeRecencyBufferSlice,
|
||||||
|
formatRecencyBufferSection,
|
||||||
|
formatMessageForRecencyBuffer,
|
||||||
|
extractMessageText,
|
||||||
BASE_CHUNK_RATIO,
|
BASE_CHUNK_RATIO,
|
||||||
MIN_CHUNK_RATIO,
|
MIN_CHUNK_RATIO,
|
||||||
SAFETY_MARGIN,
|
SAFETY_MARGIN,
|
||||||
|
|||||||
@ -76,4 +76,41 @@ describe("config compaction settings", () => {
|
|||||||
expect(cfg.agents?.defaults?.compaction?.reserveTokensFloor).toBe(9000);
|
expect(cfg.agents?.defaults?.compaction?.reserveTokensFloor).toBe(9000);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("preserves recency buffer config values", async () => {
|
||||||
|
await withTempHome(async (home) => {
|
||||||
|
const configDir = path.join(home, ".clawdbot");
|
||||||
|
await fs.mkdir(configDir, { recursive: true });
|
||||||
|
await fs.writeFile(
|
||||||
|
path.join(configDir, "moltbot.json"),
|
||||||
|
JSON.stringify(
|
||||||
|
{
|
||||||
|
agents: {
|
||||||
|
defaults: {
|
||||||
|
compaction: {
|
||||||
|
mode: "safeguard",
|
||||||
|
recencyBuffer: {
|
||||||
|
enabled: true,
|
||||||
|
keepMessages: 15,
|
||||||
|
keepTokens: 3000,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
null,
|
||||||
|
2,
|
||||||
|
),
|
||||||
|
"utf-8",
|
||||||
|
);
|
||||||
|
|
||||||
|
vi.resetModules();
|
||||||
|
const { loadConfig } = await import("./config.js");
|
||||||
|
const cfg = loadConfig();
|
||||||
|
|
||||||
|
expect(cfg.agents?.defaults?.compaction?.recencyBuffer?.enabled).toBe(true);
|
||||||
|
expect(cfg.agents?.defaults?.compaction?.recencyBuffer?.keepMessages).toBe(15);
|
||||||
|
expect(cfg.agents?.defaults?.compaction?.recencyBuffer?.keepTokens).toBe(3000);
|
||||||
|
});
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@ -239,6 +239,15 @@ export type AgentDefaultsConfig = {
|
|||||||
|
|
||||||
export type AgentCompactionMode = "default" | "safeguard";
|
export type AgentCompactionMode = "default" | "safeguard";
|
||||||
|
|
||||||
|
export type AgentCompactionRecencyBufferConfig = {
|
||||||
|
/** Enable the recency buffer (default: false). */
|
||||||
|
enabled?: boolean;
|
||||||
|
/** Max messages to keep raw (default: 10). */
|
||||||
|
keepMessages?: number;
|
||||||
|
/** Max tokens to keep raw; whichever limit hits first wins (default: 2000). */
|
||||||
|
keepTokens?: number;
|
||||||
|
};
|
||||||
|
|
||||||
export type AgentCompactionConfig = {
|
export type AgentCompactionConfig = {
|
||||||
/** Compaction summarization mode. */
|
/** Compaction summarization mode. */
|
||||||
mode?: AgentCompactionMode;
|
mode?: AgentCompactionMode;
|
||||||
@ -248,6 +257,8 @@ export type AgentCompactionConfig = {
|
|||||||
maxHistoryShare?: number;
|
maxHistoryShare?: number;
|
||||||
/** Pre-compaction memory flush (agentic turn). Default: enabled. */
|
/** Pre-compaction memory flush (agentic turn). Default: enabled. */
|
||||||
memoryFlush?: AgentCompactionMemoryFlushConfig;
|
memoryFlush?: AgentCompactionMemoryFlushConfig;
|
||||||
|
/** Recency buffer: preserve recent messages as raw text instead of summarizing them. */
|
||||||
|
recencyBuffer?: AgentCompactionRecencyBufferConfig;
|
||||||
};
|
};
|
||||||
|
|
||||||
export type AgentCompactionMemoryFlushConfig = {
|
export type AgentCompactionMemoryFlushConfig = {
|
||||||
|
|||||||
@ -100,6 +100,14 @@ export const AgentDefaultsSchema = z
|
|||||||
})
|
})
|
||||||
.strict()
|
.strict()
|
||||||
.optional(),
|
.optional(),
|
||||||
|
recencyBuffer: z
|
||||||
|
.object({
|
||||||
|
enabled: z.boolean().optional(),
|
||||||
|
keepMessages: z.number().int().positive().optional(),
|
||||||
|
keepTokens: z.number().int().positive().optional(),
|
||||||
|
})
|
||||||
|
.strict()
|
||||||
|
.optional(),
|
||||||
})
|
})
|
||||||
.strict()
|
.strict()
|
||||||
.optional(),
|
.optional(),
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user