import type { AgentMessage } from "@mariozechner/pi-agent-core"; import type { ExtensionAPI, ExtensionContext, FileOperations } from "@mariozechner/pi-coding-agent"; import { estimateTokens, generateSummary } from "@mariozechner/pi-coding-agent"; import { DEFAULT_CONTEXT_TOKENS } from "../defaults.js"; const BASE_CHUNK_RATIO = 0.4; const MIN_CHUNK_RATIO = 0.15; const SAFETY_MARGIN = 1.2; // 20% buffer for estimateTokens() inaccuracy const FALLBACK_SUMMARY = "Summary unavailable due to context limits. Older messages were truncated."; const TURN_PREFIX_INSTRUCTIONS = "This summary covers the prefix of a split turn. Focus on the original request," + " early progress, and any details needed to understand the retained suffix."; const MAX_TOOL_FAILURES = 8; const MAX_TOOL_FAILURE_CHARS = 240; function isAbortError(err: unknown): boolean { if (!err || typeof err !== "object") return false; const name = "name" in err ? String(err.name) : ""; if (name === "AbortError") return true; const message = "message" in err && typeof err.message === "string" ? err.message.toLowerCase() : ""; return message.includes("aborted"); } type ToolFailure = { toolCallId: string; toolName: string; summary: string; meta?: string; }; function normalizeFailureText(text: string): string { return text.replace(/\s+/g, " ").trim(); } function truncateFailureText(text: string, maxChars: number): string { if (text.length <= maxChars) return text; return `${text.slice(0, Math.max(0, maxChars - 3))}...`; } function formatToolFailureMeta(details: unknown): string | undefined { if (!details || typeof details !== "object") return undefined; const record = details as Record; const status = typeof record.status === "string" ? record.status : undefined; const exitCode = typeof record.exitCode === "number" && Number.isFinite(record.exitCode) ? record.exitCode : undefined; const parts: string[] = []; if (status) parts.push(`status=${status}`); if (exitCode !== undefined) parts.push(`exitCode=${exitCode}`); return parts.length > 0 ? parts.join(" ") : undefined; } function extractToolResultText(content: unknown): string { if (!Array.isArray(content)) return ""; const parts: string[] = []; for (const block of content) { if (!block || typeof block !== "object") continue; const rec = block as { type?: unknown; text?: unknown }; if (rec.type === "text" && typeof rec.text === "string") { parts.push(rec.text); } } return parts.join("\n"); } function collectToolFailures(messages: AgentMessage[]): ToolFailure[] { const failures: ToolFailure[] = []; const seen = new Set(); for (const message of messages) { if (!message || typeof message !== "object") continue; const role = (message as { role?: unknown }).role; if (role !== "toolResult") continue; const toolResult = message as { toolCallId?: unknown; toolName?: unknown; content?: unknown; details?: unknown; isError?: unknown; }; if (toolResult.isError !== true) continue; const toolCallId = typeof toolResult.toolCallId === "string" ? toolResult.toolCallId : ""; if (!toolCallId || seen.has(toolCallId)) continue; seen.add(toolCallId); const toolName = typeof toolResult.toolName === "string" && toolResult.toolName.trim() ? toolResult.toolName : "tool"; const rawText = extractToolResultText(toolResult.content); const meta = formatToolFailureMeta(toolResult.details); const normalized = normalizeFailureText(rawText); const summary = truncateFailureText( normalized || (meta ? "failed" : "failed (no output)"), MAX_TOOL_FAILURE_CHARS, ); failures.push({ toolCallId, toolName, summary, meta }); } return failures; } function formatToolFailuresSection(failures: ToolFailure[]): string { if (failures.length === 0) return ""; const lines = failures.slice(0, MAX_TOOL_FAILURES).map((failure) => { const meta = failure.meta ? ` (${failure.meta})` : ""; return `- ${failure.toolName}${meta}: ${failure.summary}`; }); if (failures.length > MAX_TOOL_FAILURES) { lines.push(`- ...and ${failures.length - MAX_TOOL_FAILURES} more`); } return `\n\n## Tool Failures\n${lines.join("\n")}`; } function computeFileLists(fileOps: FileOperations): { readFiles: string[]; modifiedFiles: string[]; } { const modified = new Set([...fileOps.edited, ...fileOps.written]); const readFiles = [...fileOps.read].filter((f) => !modified.has(f)).sort(); const modifiedFiles = [...modified].sort(); return { readFiles, modifiedFiles }; } function formatFileOperations(readFiles: string[], modifiedFiles: string[]): string { const sections: string[] = []; if (readFiles.length > 0) { sections.push(`\n${readFiles.join("\n")}\n`); } if (modifiedFiles.length > 0) { sections.push(`\n${modifiedFiles.join("\n")}\n`); } if (sections.length === 0) return ""; return `\n\n${sections.join("\n\n")}`; } function chunkMessages(messages: AgentMessage[], maxTokens: number): AgentMessage[][] { if (messages.length === 0) return []; const chunks: AgentMessage[][] = []; let currentChunk: AgentMessage[] = []; let currentTokens = 0; for (const message of messages) { const messageTokens = estimateTokens(message); if (currentChunk.length > 0 && currentTokens + messageTokens > maxTokens) { chunks.push(currentChunk); currentChunk = []; currentTokens = 0; } currentChunk.push(message); currentTokens += messageTokens; if (messageTokens > maxTokens) { // Split oversized messages to avoid unbounded chunk growth. chunks.push(currentChunk); currentChunk = []; currentTokens = 0; } } if (currentChunk.length > 0) { chunks.push(currentChunk); } return chunks; } /** * Compute adaptive chunk ratio based on average message size. * When messages are large, we use smaller chunks to avoid exceeding model limits. */ function computeAdaptiveChunkRatio(messages: AgentMessage[], contextWindow: number): number { if (messages.length === 0) return BASE_CHUNK_RATIO; const totalTokens = messages.reduce((sum, m) => sum + estimateTokens(m), 0); const avgTokens = totalTokens / messages.length; // Apply safety margin to account for estimation inaccuracy const safeAvgTokens = avgTokens * SAFETY_MARGIN; const avgRatio = safeAvgTokens / contextWindow; // If average message is > 10% of context, reduce chunk ratio if (avgRatio > 0.1) { const reduction = Math.min(avgRatio * 2, BASE_CHUNK_RATIO - MIN_CHUNK_RATIO); return Math.max(MIN_CHUNK_RATIO, BASE_CHUNK_RATIO - reduction); } return BASE_CHUNK_RATIO; } /** * Check if a single message is too large to summarize. * If single message > 50% of context, it can't be summarized safely. */ function isOversizedForSummary(msg: AgentMessage, contextWindow: number): boolean { const tokens = estimateTokens(msg) * SAFETY_MARGIN; return tokens > contextWindow * 0.5; } async function summarizeChunks(params: { messages: AgentMessage[]; model: NonNullable; apiKey: string; signal: AbortSignal; reserveTokens: number; maxChunkTokens: number; customInstructions?: string; previousSummary?: string; }): Promise { if (params.messages.length === 0) { return params.previousSummary ?? "No prior history."; } const chunks = chunkMessages(params.messages, params.maxChunkTokens); let summary = params.previousSummary; for (const chunk of chunks) { summary = await generateSummary( chunk, params.model, params.reserveTokens, params.apiKey, params.signal, params.customInstructions, summary, ); } return summary ?? "No prior history."; } /** * Summarize with progressive fallback for handling oversized messages. * If full summarization fails, tries partial summarization excluding oversized messages. */ async function summarizeWithFallback(params: { messages: AgentMessage[]; model: NonNullable; apiKey: string; signal: AbortSignal; reserveTokens: number; maxChunkTokens: number; contextWindow: number; customInstructions?: string; previousSummary?: string; }): Promise { const { messages, contextWindow } = params; if (messages.length === 0) { return params.previousSummary ?? "No prior history."; } // Try full summarization first try { return await summarizeChunks(params); } catch (fullError) { if (params.signal.aborted || isAbortError(fullError)) { throw fullError; } console.warn( `Full summarization failed, trying partial: ${ fullError instanceof Error ? fullError.message : String(fullError) }`, ); } // Fallback 1: Summarize only small messages, note oversized ones const smallMessages: AgentMessage[] = []; const oversizedNotes: string[] = []; for (const msg of messages) { if (isOversizedForSummary(msg, contextWindow)) { const role = (msg as { role?: string }).role ?? "message"; const tokens = estimateTokens(msg); oversizedNotes.push( `[Large ${role} (~${Math.round(tokens / 1000)}K tokens) omitted from summary]`, ); } else { smallMessages.push(msg); } } if (smallMessages.length > 0) { try { const partialSummary = await summarizeChunks({ ...params, messages: smallMessages, }); const notes = oversizedNotes.length > 0 ? `\n\n${oversizedNotes.join("\n")}` : ""; return partialSummary + notes; } catch (partialError) { if (params.signal.aborted || isAbortError(partialError)) { throw partialError; } console.warn( `Partial summarization also failed: ${ partialError instanceof Error ? partialError.message : String(partialError) }`, ); } } // Final fallback: Just note what was there return ( `Context contained ${messages.length} messages (${oversizedNotes.length} oversized). ` + `Summary unavailable due to size limits.` ); } export default function compactionSafeguardExtension(api: ExtensionAPI): void { api.on("session_before_compact", async (event, ctx) => { const { preparation, customInstructions, signal } = event; const { readFiles, modifiedFiles } = computeFileLists(preparation.fileOps); const fileOpsSummary = formatFileOperations(readFiles, modifiedFiles); const toolFailures = collectToolFailures([ ...preparation.messagesToSummarize, ...preparation.turnPrefixMessages, ]); const toolFailureSection = formatToolFailuresSection(toolFailures); const fallbackSummary = `${FALLBACK_SUMMARY}${toolFailureSection}${fileOpsSummary}`; const model = ctx.model; if (!model) { return { compaction: { summary: fallbackSummary, firstKeptEntryId: preparation.firstKeptEntryId, tokensBefore: preparation.tokensBefore, details: { readFiles, modifiedFiles }, }, }; } const apiKey = await ctx.modelRegistry.getApiKey(model); if (!apiKey) { return { compaction: { summary: fallbackSummary, firstKeptEntryId: preparation.firstKeptEntryId, tokensBefore: preparation.tokensBefore, details: { readFiles, modifiedFiles }, }, }; } try { const contextWindowTokens = Math.max( 1, Math.floor(model.contextWindow ?? DEFAULT_CONTEXT_TOKENS), ); // Use adaptive chunk ratio based on message sizes const allMessages = [...preparation.messagesToSummarize, ...preparation.turnPrefixMessages]; const adaptiveRatio = computeAdaptiveChunkRatio(allMessages, contextWindowTokens); const maxChunkTokens = Math.max(1, Math.floor(contextWindowTokens * adaptiveRatio)); const reserveTokens = Math.max(1, Math.floor(preparation.settings.reserveTokens)); const historySummary = await summarizeWithFallback({ messages: preparation.messagesToSummarize, model, apiKey, signal, reserveTokens, maxChunkTokens, contextWindow: contextWindowTokens, customInstructions, previousSummary: preparation.previousSummary, }); let summary = historySummary; if (preparation.isSplitTurn && preparation.turnPrefixMessages.length > 0) { const prefixSummary = await summarizeWithFallback({ messages: preparation.turnPrefixMessages, model, apiKey, signal, reserveTokens, maxChunkTokens, contextWindow: contextWindowTokens, customInstructions: TURN_PREFIX_INSTRUCTIONS, }); summary = `${historySummary}\n\n---\n\n**Turn Context (split turn):**\n\n${prefixSummary}`; } summary += toolFailureSection; summary += fileOpsSummary; return { compaction: { summary, firstKeptEntryId: preparation.firstKeptEntryId, tokensBefore: preparation.tokensBefore, details: { readFiles, modifiedFiles }, }, }; } catch (error) { console.warn( `Compaction summarization failed; truncating history: ${ error instanceof Error ? error.message : String(error) }`, ); return { compaction: { summary: fallbackSummary, firstKeptEntryId: preparation.firstKeptEntryId, tokensBefore: preparation.tokensBefore, details: { readFiles, modifiedFiles }, }, }; } }); } export const __testing = { collectToolFailures, formatToolFailuresSection, computeAdaptiveChunkRatio, isOversizedForSummary, BASE_CHUNK_RATIO, MIN_CHUNK_RATIO, SAFETY_MARGIN, } as const;