Two fixes for autoCapture reliability: 1. Strip injected memory context before capture filtering - autoRecall prepends <relevant-memories> to user messages - This was causing shouldCapture() to skip all user messages - Now strips the context before evaluating capture criteria 2. Process only current turn instead of full history - Previously scanned all messages and picked arbitrary 3 - In long sessions (100+ messages), recent content was missed - Now captures only last user + last assistant message - Previous turns were already captured when they occurred These fixes ensure that autoCapture reliably stores each conversation turn as it happens, rather than missing recent exchanges.
610 lines
20 KiB
TypeScript
610 lines
20 KiB
TypeScript
/**
|
|
* Moltbot Memory (LanceDB) Plugin
|
|
*
|
|
* Long-term memory with vector search for AI conversations.
|
|
* Uses LanceDB for storage and OpenAI for embeddings.
|
|
* Provides seamless auto-recall and auto-capture via lifecycle hooks.
|
|
*/
|
|
|
|
import { Type } from "@sinclair/typebox";
|
|
import * as lancedb from "@lancedb/lancedb";
|
|
import OpenAI from "openai";
|
|
import { randomUUID } from "node:crypto";
|
|
import type { MoltbotPluginApi } from "clawdbot/plugin-sdk";
|
|
import { stringEnum } from "clawdbot/plugin-sdk";
|
|
|
|
import {
|
|
MEMORY_CATEGORIES,
|
|
type MemoryCategory,
|
|
memoryConfigSchema,
|
|
vectorDimsForModel,
|
|
} from "./config.js";
|
|
|
|
// ============================================================================
|
|
// Types
|
|
// ============================================================================
|
|
|
|
type MemoryEntry = {
|
|
id: string;
|
|
text: string;
|
|
vector: number[];
|
|
importance: number;
|
|
category: MemoryCategory;
|
|
createdAt: number;
|
|
};
|
|
|
|
type MemorySearchResult = {
|
|
entry: MemoryEntry;
|
|
score: number;
|
|
};
|
|
|
|
// ============================================================================
|
|
// LanceDB Provider
|
|
// ============================================================================
|
|
|
|
const TABLE_NAME = "memories";
|
|
|
|
class MemoryDB {
|
|
private db: lancedb.Connection | null = null;
|
|
private table: lancedb.Table | null = null;
|
|
private initPromise: Promise<void> | null = null;
|
|
|
|
constructor(
|
|
private readonly dbPath: string,
|
|
private readonly vectorDim: number,
|
|
) {}
|
|
|
|
private async ensureInitialized(): Promise<void> {
|
|
if (this.table) return;
|
|
if (this.initPromise) return this.initPromise;
|
|
|
|
this.initPromise = this.doInitialize();
|
|
return this.initPromise;
|
|
}
|
|
|
|
private async doInitialize(): Promise<void> {
|
|
this.db = await lancedb.connect(this.dbPath);
|
|
const tables = await this.db.tableNames();
|
|
|
|
if (tables.includes(TABLE_NAME)) {
|
|
this.table = await this.db.openTable(TABLE_NAME);
|
|
} else {
|
|
this.table = await this.db.createTable(TABLE_NAME, [
|
|
{
|
|
id: "__schema__",
|
|
text: "",
|
|
vector: new Array(this.vectorDim).fill(0),
|
|
importance: 0,
|
|
category: "other",
|
|
createdAt: 0,
|
|
},
|
|
]);
|
|
await this.table.delete('id = "__schema__"');
|
|
}
|
|
}
|
|
|
|
async store(
|
|
entry: Omit<MemoryEntry, "id" | "createdAt">,
|
|
): Promise<MemoryEntry> {
|
|
await this.ensureInitialized();
|
|
|
|
const fullEntry: MemoryEntry = {
|
|
...entry,
|
|
id: randomUUID(),
|
|
createdAt: Date.now(),
|
|
};
|
|
|
|
await this.table!.add([fullEntry]);
|
|
return fullEntry;
|
|
}
|
|
|
|
async search(
|
|
vector: number[],
|
|
limit = 5,
|
|
minScore = 0.5,
|
|
): Promise<MemorySearchResult[]> {
|
|
await this.ensureInitialized();
|
|
|
|
const results = await this.table!.vectorSearch(vector).limit(limit).toArray();
|
|
|
|
// LanceDB uses L2 distance by default; convert to similarity score
|
|
const mapped = results.map((row) => {
|
|
const distance = row._distance ?? 0;
|
|
// Use inverse for a 0-1 range: sim = 1 / (1 + d)
|
|
const score = 1 / (1 + distance);
|
|
return {
|
|
entry: {
|
|
id: row.id as string,
|
|
text: row.text as string,
|
|
vector: row.vector as number[],
|
|
importance: row.importance as number,
|
|
category: row.category as MemoryEntry["category"],
|
|
createdAt: row.createdAt as number,
|
|
},
|
|
score,
|
|
};
|
|
});
|
|
|
|
return mapped.filter((r) => r.score >= minScore);
|
|
}
|
|
|
|
async delete(id: string): Promise<boolean> {
|
|
await this.ensureInitialized();
|
|
// Validate UUID format to prevent injection
|
|
const uuidRegex =
|
|
/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
|
|
if (!uuidRegex.test(id)) {
|
|
throw new Error(`Invalid memory ID format: ${id}`);
|
|
}
|
|
await this.table!.delete(`id = '${id}'`);
|
|
return true;
|
|
}
|
|
|
|
async count(): Promise<number> {
|
|
await this.ensureInitialized();
|
|
return this.table!.countRows();
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// OpenAI Embeddings
|
|
// ============================================================================
|
|
|
|
class Embeddings {
|
|
private client: OpenAI;
|
|
|
|
constructor(
|
|
apiKey: string,
|
|
private model: string,
|
|
) {
|
|
this.client = new OpenAI({ apiKey });
|
|
}
|
|
|
|
async embed(text: string): Promise<number[]> {
|
|
const response = await this.client.embeddings.create({
|
|
model: this.model,
|
|
input: text,
|
|
});
|
|
return response.data[0].embedding;
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// Rule-based capture filter
|
|
// ============================================================================
|
|
|
|
const MEMORY_TRIGGERS = [
|
|
/zapamatuj si|pamatuj|remember/i,
|
|
/preferuji|radši|nechci|prefer/i,
|
|
/rozhodli jsme|budeme používat/i,
|
|
/\+\d{10,}/,
|
|
/[\w.-]+@[\w.-]+\.\w+/,
|
|
/můj\s+\w+\s+je|je\s+můj/i,
|
|
/my\s+\w+\s+is|is\s+my/i,
|
|
/i (like|prefer|hate|love|want|need)/i,
|
|
/always|never|important/i,
|
|
];
|
|
|
|
// Strip injected memory context from message text before processing
|
|
function stripMemoryContext(text: string): string {
|
|
const memoryBlockEnd = text.indexOf("</relevant-memories>");
|
|
if (memoryBlockEnd !== -1) {
|
|
return text.slice(memoryBlockEnd + "</relevant-memories>".length).trim();
|
|
}
|
|
return text;
|
|
}
|
|
|
|
function shouldCapture(text: string): boolean {
|
|
// Strip any injected memory context first
|
|
const cleanText = stripMemoryContext(text);
|
|
|
|
if (cleanText.length < 10 || cleanText.length > 500) return false;
|
|
// Skip system-generated content (pure XML)
|
|
if (cleanText.startsWith("<") && cleanText.includes("</")) return false;
|
|
// Skip agent summary responses (contain markdown formatting)
|
|
if (cleanText.includes("**") && cleanText.includes("\n-")) return false;
|
|
// Skip emoji-heavy responses (likely agent output)
|
|
const emojiCount = (cleanText.match(/[\u{1F300}-\u{1F9FF}]/gu) || []).length;
|
|
if (emojiCount > 3) return false;
|
|
return MEMORY_TRIGGERS.some((r) => r.test(cleanText));
|
|
}
|
|
|
|
function detectCategory(text: string): MemoryCategory {
|
|
const lower = text.toLowerCase();
|
|
if (/prefer|radši|like|love|hate|want/i.test(lower)) return "preference";
|
|
if (/rozhodli|decided|will use|budeme/i.test(lower)) return "decision";
|
|
if (/\+\d{10,}|@[\w.-]+\.\w+|is called|jmenuje se/i.test(lower))
|
|
return "entity";
|
|
if (/is|are|has|have|je|má|jsou/i.test(lower)) return "fact";
|
|
return "other";
|
|
}
|
|
|
|
// ============================================================================
|
|
// Plugin Definition
|
|
// ============================================================================
|
|
|
|
const memoryPlugin = {
|
|
id: "memory-lancedb",
|
|
name: "Memory (LanceDB)",
|
|
description: "LanceDB-backed long-term memory with auto-recall/capture",
|
|
kind: "memory" as const,
|
|
configSchema: memoryConfigSchema,
|
|
|
|
register(api: MoltbotPluginApi) {
|
|
const cfg = memoryConfigSchema.parse(api.pluginConfig);
|
|
const resolvedDbPath = api.resolvePath(cfg.dbPath!);
|
|
const vectorDim = vectorDimsForModel(cfg.embedding.model ?? "text-embedding-3-small");
|
|
const db = new MemoryDB(resolvedDbPath, vectorDim);
|
|
const embeddings = new Embeddings(cfg.embedding.apiKey, cfg.embedding.model!);
|
|
|
|
api.logger.info(
|
|
`memory-lancedb: plugin registered (db: ${resolvedDbPath}, lazy init)`,
|
|
);
|
|
|
|
// ========================================================================
|
|
// Tools
|
|
// ========================================================================
|
|
|
|
api.registerTool(
|
|
{
|
|
name: "memory_recall",
|
|
label: "Memory Recall",
|
|
description:
|
|
"Search through long-term memories. Use when you need context about user preferences, past decisions, or previously discussed topics.",
|
|
parameters: Type.Object({
|
|
query: Type.String({ description: "Search query" }),
|
|
limit: Type.Optional(Type.Number({ description: "Max results (default: 5)" })),
|
|
}),
|
|
async execute(_toolCallId, params) {
|
|
const { query, limit = 5 } = params as { query: string; limit?: number };
|
|
|
|
const vector = await embeddings.embed(query);
|
|
const results = await db.search(vector, limit, 0.1);
|
|
|
|
if (results.length === 0) {
|
|
return {
|
|
content: [{ type: "text", text: "No relevant memories found." }],
|
|
details: { count: 0 },
|
|
};
|
|
}
|
|
|
|
const text = results
|
|
.map(
|
|
(r, i) =>
|
|
`${i + 1}. [${r.entry.category}] ${r.entry.text} (${(r.score * 100).toFixed(0)}%)`,
|
|
)
|
|
.join("\n");
|
|
|
|
// Strip vector data for serialization (typed arrays can't be cloned)
|
|
const sanitizedResults = results.map((r) => ({
|
|
id: r.entry.id,
|
|
text: r.entry.text,
|
|
category: r.entry.category,
|
|
importance: r.entry.importance,
|
|
score: r.score,
|
|
}));
|
|
|
|
return {
|
|
content: [
|
|
{ type: "text", text: `Found ${results.length} memories:\n\n${text}` },
|
|
],
|
|
details: { count: results.length, memories: sanitizedResults },
|
|
};
|
|
},
|
|
},
|
|
{ name: "memory_recall" },
|
|
);
|
|
|
|
api.registerTool(
|
|
{
|
|
name: "memory_store",
|
|
label: "Memory Store",
|
|
description:
|
|
"Save important information in long-term memory. Use for preferences, facts, decisions.",
|
|
parameters: Type.Object({
|
|
text: Type.String({ description: "Information to remember" }),
|
|
importance: Type.Optional(
|
|
Type.Number({ description: "Importance 0-1 (default: 0.7)" }),
|
|
),
|
|
category: Type.Optional(stringEnum(MEMORY_CATEGORIES)),
|
|
}),
|
|
async execute(_toolCallId, params) {
|
|
const {
|
|
text,
|
|
importance = 0.7,
|
|
category = "other",
|
|
} = params as {
|
|
text: string;
|
|
importance?: number;
|
|
category?: MemoryEntry["category"];
|
|
};
|
|
|
|
const vector = await embeddings.embed(text);
|
|
|
|
// Check for duplicates
|
|
const existing = await db.search(vector, 1, 0.95);
|
|
if (existing.length > 0) {
|
|
return {
|
|
content: [
|
|
{ type: "text", text: `Similar memory already exists: "${existing[0].entry.text}"` },
|
|
],
|
|
details: { action: "duplicate", existingId: existing[0].entry.id, existingText: existing[0].entry.text },
|
|
};
|
|
}
|
|
|
|
const entry = await db.store({
|
|
text,
|
|
vector,
|
|
importance,
|
|
category,
|
|
});
|
|
|
|
return {
|
|
content: [{ type: "text", text: `Stored: "${text.slice(0, 100)}..."` }],
|
|
details: { action: "created", id: entry.id },
|
|
};
|
|
},
|
|
},
|
|
{ name: "memory_store" },
|
|
);
|
|
|
|
api.registerTool(
|
|
{
|
|
name: "memory_forget",
|
|
label: "Memory Forget",
|
|
description: "Delete specific memories. GDPR-compliant.",
|
|
parameters: Type.Object({
|
|
query: Type.Optional(Type.String({ description: "Search to find memory" })),
|
|
memoryId: Type.Optional(Type.String({ description: "Specific memory ID" })),
|
|
}),
|
|
async execute(_toolCallId, params) {
|
|
const { query, memoryId } = params as { query?: string; memoryId?: string };
|
|
|
|
if (memoryId) {
|
|
await db.delete(memoryId);
|
|
return {
|
|
content: [{ type: "text", text: `Memory ${memoryId} forgotten.` }],
|
|
details: { action: "deleted", id: memoryId },
|
|
};
|
|
}
|
|
|
|
if (query) {
|
|
const vector = await embeddings.embed(query);
|
|
const results = await db.search(vector, 5, 0.7);
|
|
|
|
if (results.length === 0) {
|
|
return {
|
|
content: [{ type: "text", text: "No matching memories found." }],
|
|
details: { found: 0 },
|
|
};
|
|
}
|
|
|
|
if (results.length === 1 && results[0].score > 0.9) {
|
|
await db.delete(results[0].entry.id);
|
|
return {
|
|
content: [
|
|
{ type: "text", text: `Forgotten: "${results[0].entry.text}"` },
|
|
],
|
|
details: { action: "deleted", id: results[0].entry.id },
|
|
};
|
|
}
|
|
|
|
const list = results
|
|
.map((r) => `- [${r.entry.id.slice(0, 8)}] ${r.entry.text.slice(0, 60)}...`)
|
|
.join("\n");
|
|
|
|
// Strip vector data for serialization
|
|
const sanitizedCandidates = results.map((r) => ({
|
|
id: r.entry.id,
|
|
text: r.entry.text,
|
|
category: r.entry.category,
|
|
score: r.score,
|
|
}));
|
|
|
|
return {
|
|
content: [
|
|
{
|
|
type: "text",
|
|
text: `Found ${results.length} candidates. Specify memoryId:\n${list}`,
|
|
},
|
|
],
|
|
details: { action: "candidates", candidates: sanitizedCandidates },
|
|
};
|
|
}
|
|
|
|
return {
|
|
content: [{ type: "text", text: "Provide query or memoryId." }],
|
|
details: { error: "missing_param" },
|
|
};
|
|
},
|
|
},
|
|
{ name: "memory_forget" },
|
|
);
|
|
|
|
// ========================================================================
|
|
// CLI Commands
|
|
// ========================================================================
|
|
|
|
api.registerCli(
|
|
({ program }) => {
|
|
const memory = program
|
|
.command("ltm")
|
|
.description("LanceDB memory plugin commands");
|
|
|
|
memory
|
|
.command("list")
|
|
.description("List memories")
|
|
.action(async () => {
|
|
const count = await db.count();
|
|
console.log(`Total memories: ${count}`);
|
|
});
|
|
|
|
memory
|
|
.command("search")
|
|
.description("Search memories")
|
|
.argument("<query>", "Search query")
|
|
.option("--limit <n>", "Max results", "5")
|
|
.action(async (query, opts) => {
|
|
const vector = await embeddings.embed(query);
|
|
const results = await db.search(vector, parseInt(opts.limit), 0.3);
|
|
// Strip vectors for output
|
|
const output = results.map((r) => ({
|
|
id: r.entry.id,
|
|
text: r.entry.text,
|
|
category: r.entry.category,
|
|
importance: r.entry.importance,
|
|
score: r.score,
|
|
}));
|
|
console.log(JSON.stringify(output, null, 2));
|
|
});
|
|
|
|
memory
|
|
.command("stats")
|
|
.description("Show memory statistics")
|
|
.action(async () => {
|
|
const count = await db.count();
|
|
console.log(`Total memories: ${count}`);
|
|
});
|
|
},
|
|
{ commands: ["ltm"] },
|
|
);
|
|
|
|
// ========================================================================
|
|
// Lifecycle Hooks
|
|
// ========================================================================
|
|
|
|
// Auto-recall: inject relevant memories before agent starts
|
|
if (cfg.autoRecall) {
|
|
api.on("before_agent_start", async (event) => {
|
|
if (!event.prompt || event.prompt.length < 5) return;
|
|
|
|
try {
|
|
const vector = await embeddings.embed(event.prompt);
|
|
const results = await db.search(vector, 3, 0.3);
|
|
|
|
if (results.length === 0) return;
|
|
|
|
const memoryContext = results
|
|
.map((r) => `- [${r.entry.category}] ${r.entry.text}`)
|
|
.join("\n");
|
|
|
|
api.logger.info?.(
|
|
`memory-lancedb: injecting ${results.length} memories into context`,
|
|
);
|
|
|
|
return {
|
|
prependContext: `<relevant-memories>\nThe following memories may be relevant to this conversation:\n${memoryContext}\n</relevant-memories>`,
|
|
};
|
|
} catch (err) {
|
|
api.logger.warn(`memory-lancedb: recall failed: ${String(err)}`);
|
|
}
|
|
});
|
|
}
|
|
|
|
// Auto-capture: store important information from the current turn
|
|
// Only processes the last user message and last assistant message (not full history)
|
|
if (cfg.autoCapture) {
|
|
api.on("agent_end", async (event) => {
|
|
if (!event.success || !event.messages || event.messages.length === 0) {
|
|
return;
|
|
}
|
|
|
|
try {
|
|
// Helper to extract text from message content
|
|
const extractText = (content: unknown): string | null => {
|
|
if (typeof content === "string") return content;
|
|
if (Array.isArray(content)) {
|
|
for (const block of content) {
|
|
if (
|
|
block &&
|
|
typeof block === "object" &&
|
|
"type" in block &&
|
|
(block as Record<string, unknown>).type === "text" &&
|
|
"text" in block &&
|
|
typeof (block as Record<string, unknown>).text === "string"
|
|
) {
|
|
return (block as Record<string, unknown>).text as string;
|
|
}
|
|
}
|
|
}
|
|
return null;
|
|
};
|
|
|
|
// Find the LAST user message and LAST assistant message (current turn only)
|
|
// Previous turns were already captured when they happened
|
|
let lastUserText: string | null = null;
|
|
let lastAssistantText: string | null = null;
|
|
|
|
for (const msg of event.messages) {
|
|
if (!msg || typeof msg !== "object") continue;
|
|
const msgObj = msg as Record<string, unknown>;
|
|
const role = msgObj.role;
|
|
const text = extractText(msgObj.content);
|
|
|
|
if (role === "user" && text) lastUserText = text;
|
|
if (role === "assistant" && text) lastAssistantText = text;
|
|
}
|
|
|
|
// Collect texts from this turn only
|
|
const turnTexts: string[] = [];
|
|
if (lastUserText) turnTexts.push(lastUserText);
|
|
if (lastAssistantText) turnTexts.push(lastAssistantText);
|
|
|
|
// Filter for capturable content
|
|
const toCapture = turnTexts.filter(
|
|
(text) => text && shouldCapture(text),
|
|
);
|
|
if (toCapture.length === 0) return;
|
|
|
|
// Store each capturable piece from this turn
|
|
let stored = 0;
|
|
for (const rawText of toCapture) {
|
|
// Clean the text before storing (strip injected memory context)
|
|
const text = stripMemoryContext(rawText);
|
|
if (text.length < 10) continue; // Re-check length after cleaning
|
|
|
|
const category = detectCategory(text);
|
|
const vector = await embeddings.embed(text);
|
|
|
|
// Check for duplicates (high similarity threshold)
|
|
const existing = await db.search(vector, 1, 0.95);
|
|
if (existing.length > 0) continue;
|
|
|
|
await db.store({
|
|
text,
|
|
vector,
|
|
importance: 0.7,
|
|
category,
|
|
});
|
|
stored++;
|
|
}
|
|
|
|
if (stored > 0) {
|
|
api.logger.info(`memory-lancedb: auto-captured ${stored} memories`);
|
|
}
|
|
} catch (err) {
|
|
api.logger.warn(`memory-lancedb: capture failed: ${String(err)}`);
|
|
}
|
|
});
|
|
}
|
|
|
|
// ========================================================================
|
|
// Service
|
|
// ========================================================================
|
|
|
|
api.registerService({
|
|
id: "memory-lancedb",
|
|
start: () => {
|
|
api.logger.info(
|
|
`memory-lancedb: initialized (db: ${resolvedDbPath}, model: ${cfg.embedding.model})`,
|
|
);
|
|
},
|
|
stop: () => {
|
|
api.logger.info("memory-lancedb: stopped");
|
|
},
|
|
});
|
|
},
|
|
};
|
|
|
|
export default memoryPlugin;
|