This commit is contained in:
solofberlin 2026-01-30 10:50:04 -05:00 committed by GitHub
commit fe0c6a4b2e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 298 additions and 31 deletions

View File

@ -3,11 +3,18 @@ import fs from "node:fs";
import { homedir } from "node:os"; import { homedir } from "node:os";
import { join } from "node:path"; import { join } from "node:path";
export type EmbeddingProvider = "openai" | "local";
export type MemoryConfig = { export type MemoryConfig = {
embedding: { embedding: {
provider: "openai"; provider: EmbeddingProvider;
model?: string; model?: string;
apiKey: string; apiKey?: string;
// Local embedding options
local?: {
modelPath?: string;
modelCacheDir?: string;
};
}; };
dbPath?: string; dbPath?: string;
autoCapture?: boolean; autoCapture?: boolean;
@ -17,7 +24,8 @@ export type MemoryConfig = {
export const MEMORY_CATEGORIES = ["preference", "fact", "decision", "entity", "other"] as const; export const MEMORY_CATEGORIES = ["preference", "fact", "decision", "entity", "other"] as const;
export type MemoryCategory = (typeof MEMORY_CATEGORIES)[number]; export type MemoryCategory = (typeof MEMORY_CATEGORIES)[number];
const DEFAULT_MODEL = "text-embedding-3-small"; const DEFAULT_OPENAI_MODEL = "text-embedding-3-small";
const DEFAULT_LOCAL_MODEL = "hf:ggml-org/embeddinggemma-300M-GGUF/embeddinggemma-300M-Q8_0.gguf";
const LEGACY_STATE_DIRS: string[] = []; const LEGACY_STATE_DIRS: string[] = [];
function resolveDefaultDbPath(): string { function resolveDefaultDbPath(): string {
@ -43,11 +51,15 @@ function resolveDefaultDbPath(): string {
const DEFAULT_DB_PATH = resolveDefaultDbPath(); const DEFAULT_DB_PATH = resolveDefaultDbPath();
const EMBEDDING_DIMENSIONS: Record<string, number> = { // OpenAI embedding dimensions
const OPENAI_EMBEDDING_DIMENSIONS: Record<string, number> = {
"text-embedding-3-small": 1536, "text-embedding-3-small": 1536,
"text-embedding-3-large": 3072, "text-embedding-3-large": 3072,
}; };
// Default dimension for local models (embeddinggemma-300M outputs 768-dim vectors)
const DEFAULT_LOCAL_EMBEDDING_DIM = 768;
function assertAllowedKeys( function assertAllowedKeys(
value: Record<string, unknown>, value: Record<string, unknown>,
allowed: string[], allowed: string[],
@ -58,10 +70,16 @@ function assertAllowedKeys(
throw new Error(`${label} has unknown keys: ${unknown.join(", ")}`); throw new Error(`${label} has unknown keys: ${unknown.join(", ")}`);
} }
export function vectorDimsForModel(model: string): number { export function vectorDimsForModel(model: string, provider: EmbeddingProvider): number {
const dims = EMBEDDING_DIMENSIONS[model]; if (provider === "local") {
// Local models have varying dimensions; default to embeddinggemma's 768
// TODO: Could detect from model metadata in the future
return DEFAULT_LOCAL_EMBEDDING_DIM;
}
const dims = OPENAI_EMBEDDING_DIMENSIONS[model];
if (!dims) { if (!dims) {
throw new Error(`Unsupported embedding model: ${model}`); throw new Error(`Unsupported OpenAI embedding model: ${model}. Supported: ${Object.keys(OPENAI_EMBEDDING_DIMENSIONS).join(", ")}`);
} }
return dims; return dims;
} }
@ -76,10 +94,17 @@ function resolveEnvVars(value: string): string {
}); });
} }
function resolveEmbeddingModel(embedding: Record<string, unknown>): string { function resolveEmbeddingModel(embedding: Record<string, unknown>, provider: EmbeddingProvider): string {
const model = typeof embedding.model === "string" ? embedding.model : DEFAULT_MODEL; const model = typeof embedding.model === "string" ? embedding.model : undefined;
vectorDimsForModel(model);
return model; if (provider === "local") {
return model || DEFAULT_LOCAL_MODEL;
}
// OpenAI provider
const resolvedModel = model || DEFAULT_OPENAI_MODEL;
vectorDimsForModel(resolvedModel, provider); // Validate
return resolvedModel;
} }
export const memoryConfigSchema = { export const memoryConfigSchema = {
@ -91,18 +116,38 @@ export const memoryConfigSchema = {
assertAllowedKeys(cfg, ["embedding", "dbPath", "autoCapture", "autoRecall"], "memory config"); assertAllowedKeys(cfg, ["embedding", "dbPath", "autoCapture", "autoRecall"], "memory config");
const embedding = cfg.embedding as Record<string, unknown> | undefined; const embedding = cfg.embedding as Record<string, unknown> | undefined;
if (!embedding || typeof embedding.apiKey !== "string") { if (!embedding) {
throw new Error("embedding.apiKey is required"); throw new Error("embedding config is required");
} }
assertAllowedKeys(embedding, ["apiKey", "model"], "embedding config"); assertAllowedKeys(embedding, ["provider", "apiKey", "model", "local"], "embedding config");
const model = resolveEmbeddingModel(embedding); // Determine provider (default to "openai" for backwards compatibility)
const provider: EmbeddingProvider = embedding.provider === "local" ? "local" : "openai";
// Validate apiKey requirement based on provider
if (provider === "openai" && typeof embedding.apiKey !== "string") {
throw new Error("embedding.apiKey is required when using OpenAI provider");
}
const model = resolveEmbeddingModel(embedding, provider);
// Parse local config if present
let localConfig: MemoryConfig["embedding"]["local"] | undefined;
if (embedding.local && typeof embedding.local === "object") {
const local = embedding.local as Record<string, unknown>;
assertAllowedKeys(local, ["modelPath", "modelCacheDir"], "embedding.local config");
localConfig = {
modelPath: typeof local.modelPath === "string" ? local.modelPath : undefined,
modelCacheDir: typeof local.modelCacheDir === "string" ? local.modelCacheDir : undefined,
};
}
return { return {
embedding: { embedding: {
provider: "openai", provider,
model, model,
apiKey: resolveEnvVars(embedding.apiKey), apiKey: typeof embedding.apiKey === "string" ? resolveEnvVars(embedding.apiKey) : undefined,
local: localConfig,
}, },
dbPath: typeof cfg.dbPath === "string" ? cfg.dbPath : DEFAULT_DB_PATH, dbPath: typeof cfg.dbPath === "string" ? cfg.dbPath : DEFAULT_DB_PATH,
autoCapture: cfg.autoCapture !== false, autoCapture: cfg.autoCapture !== false,
@ -110,16 +155,33 @@ export const memoryConfigSchema = {
}; };
}, },
uiHints: { uiHints: {
"embedding.provider": {
label: "Embedding Provider",
help: "Choose 'openai' for remote embeddings or 'local' for on-device embeddings using node-llama-cpp",
options: ["openai", "local"],
},
"embedding.apiKey": { "embedding.apiKey": {
label: "OpenAI API Key", label: "OpenAI API Key",
sensitive: true, sensitive: true,
placeholder: "sk-proj-...", placeholder: "sk-proj-...",
help: "API key for OpenAI embeddings (or use ${OPENAI_API_KEY})", help: "API key for OpenAI embeddings (required if provider is 'openai', or use ${OPENAI_API_KEY})",
}, },
"embedding.model": { "embedding.model": {
label: "Embedding Model", label: "Embedding Model",
placeholder: DEFAULT_MODEL, placeholder: DEFAULT_OPENAI_MODEL,
help: "OpenAI embedding model to use", help: "Model to use for embeddings. For OpenAI: text-embedding-3-small/large. For local: HuggingFace GGUF path.",
},
"embedding.local.modelPath": {
label: "Local Model Path",
placeholder: DEFAULT_LOCAL_MODEL,
help: "Path to local GGUF embedding model (e.g., hf:ggml-org/embeddinggemma-300M-GGUF/embeddinggemma-300M-Q8_0.gguf)",
advanced: true,
},
"embedding.local.modelCacheDir": {
label: "Model Cache Directory",
placeholder: "~/.cache/node-llama-cpp",
help: "Directory to cache downloaded models",
advanced: true,
}, },
dbPath: { dbPath: {
label: "Database Path", label: "Database Path",

View File

@ -81,7 +81,7 @@ describe("memory plugin e2e", () => {
delete process.env.TEST_MEMORY_API_KEY; delete process.env.TEST_MEMORY_API_KEY;
}); });
test("config schema rejects missing apiKey", async () => { test("config schema rejects missing apiKey for OpenAI provider", async () => {
const { default: memoryPlugin } = await import("./index.js"); const { default: memoryPlugin } = await import("./index.js");
expect(() => { expect(() => {
@ -89,7 +89,58 @@ describe("memory plugin e2e", () => {
embedding: {}, embedding: {},
dbPath, dbPath,
}); });
}).toThrow("embedding.apiKey is required"); }).toThrow("embedding.apiKey is required when using OpenAI provider");
});
test("config schema accepts local provider without apiKey", async () => {
const { default: memoryPlugin } = await import("./index.js");
const config = memoryPlugin.configSchema?.parse?.({
embedding: {
provider: "local",
},
dbPath,
});
expect(config).toBeDefined();
expect(config?.embedding?.provider).toBe("local");
expect(config?.embedding?.apiKey).toBeUndefined();
// Should use default local model
expect(config?.embedding?.model).toContain("embeddinggemma");
});
test("config schema accepts local provider with custom model path", async () => {
const { default: memoryPlugin } = await import("./index.js");
const config = memoryPlugin.configSchema?.parse?.({
embedding: {
provider: "local",
local: {
modelPath: "hf:custom/model.gguf",
modelCacheDir: "/tmp/models",
},
},
dbPath,
});
expect(config).toBeDefined();
expect(config?.embedding?.provider).toBe("local");
expect(config?.embedding?.local?.modelPath).toBe("hf:custom/model.gguf");
expect(config?.embedding?.local?.modelCacheDir).toBe("/tmp/models");
});
test("config schema defaults to OpenAI provider for backwards compatibility", async () => {
const { default: memoryPlugin } = await import("./index.js");
const config = memoryPlugin.configSchema?.parse?.({
embedding: {
apiKey: OPENAI_API_KEY,
},
dbPath,
});
expect(config).toBeDefined();
expect(config?.embedding?.provider).toBe("openai");
}); });
test("shouldCapture filters correctly", async () => { test("shouldCapture filters correctly", async () => {

View File

@ -2,7 +2,7 @@
* OpenClaw Memory (LanceDB) Plugin * OpenClaw Memory (LanceDB) Plugin
* *
* Long-term memory with vector search for AI conversations. * Long-term memory with vector search for AI conversations.
* Uses LanceDB for storage and OpenAI for embeddings. * Uses LanceDB for storage and supports both OpenAI and local embeddings.
* Provides seamless auto-recall and auto-capture via lifecycle hooks. * Provides seamless auto-recall and auto-capture via lifecycle hooks.
*/ */
@ -16,6 +16,8 @@ import { stringEnum } from "openclaw/plugin-sdk";
import { import {
MEMORY_CATEGORIES, MEMORY_CATEGORIES,
type MemoryCategory, type MemoryCategory,
type MemoryConfig,
type EmbeddingProvider as EmbeddingProviderType,
memoryConfigSchema, memoryConfigSchema,
vectorDimsForModel, vectorDimsForModel,
} from "./config.js"; } from "./config.js";
@ -38,6 +40,12 @@ type MemorySearchResult = {
score: number; score: number;
}; };
// Embedding provider interface
interface EmbeddingProviderInterface {
embed(text: string): Promise<number[]>;
embedBatch?(texts: string[]): Promise<number[][]>;
}
// ============================================================================ // ============================================================================
// LanceDB Provider // LanceDB Provider
// ============================================================================ // ============================================================================
@ -147,10 +155,10 @@ class MemoryDB {
} }
// ============================================================================ // ============================================================================
// OpenAI Embeddings // OpenAI Embeddings Provider
// ============================================================================ // ============================================================================
class Embeddings { class OpenAIEmbeddings implements EmbeddingProviderInterface {
private client: OpenAI; private client: OpenAI;
constructor( constructor(
@ -167,6 +175,137 @@ class Embeddings {
}); });
return response.data[0].embedding; return response.data[0].embedding;
} }
async embedBatch(texts: string[]): Promise<number[][]> {
if (texts.length === 0) return [];
const response = await this.client.embeddings.create({
model: this.model,
input: texts,
});
return response.data.map((d) => d.embedding);
}
}
// ============================================================================
// Local Embeddings Provider (node-llama-cpp)
// ============================================================================
// Lazy-load node-llama-cpp types
type Llama = import("node-llama-cpp").Llama;
type LlamaModel = import("node-llama-cpp").LlamaModel;
type LlamaEmbeddingContext = import("node-llama-cpp").LlamaEmbeddingContext;
async function importNodeLlamaCpp() {
return import("node-llama-cpp");
}
class LocalEmbeddings implements EmbeddingProviderInterface {
private llama: Llama | null = null;
private embeddingModel: LlamaModel | null = null;
private embeddingContext: LlamaEmbeddingContext | null = null;
private initPromise: Promise<void> | null = null;
constructor(
private modelPath: string,
private modelCacheDir?: string,
) {}
private async ensureInitialized(): Promise<LlamaEmbeddingContext> {
if (this.embeddingContext) return this.embeddingContext;
if (this.initPromise) {
await this.initPromise;
return this.embeddingContext!;
}
this.initPromise = this.doInitialize();
await this.initPromise;
return this.embeddingContext!;
}
private async doInitialize(): Promise<void> {
try {
const { getLlama, resolveModelFile, LlamaLogLevel } = await importNodeLlamaCpp();
this.llama = await getLlama({ logLevel: LlamaLogLevel.error });
const resolvedPath = await resolveModelFile(this.modelPath, this.modelCacheDir || undefined);
this.embeddingModel = await this.llama.loadModel({ modelPath: resolvedPath });
this.embeddingContext = await this.embeddingModel.createEmbeddingContext();
} catch (err) {
throw new Error(formatLocalSetupError(err));
}
}
async embed(text: string): Promise<number[]> {
const ctx = await this.ensureInitialized();
const embedding = await ctx.getEmbeddingFor(text);
return Array.from(embedding.vector) as number[];
}
async embedBatch(texts: string[]): Promise<number[][]> {
const ctx = await this.ensureInitialized();
const embeddings = await Promise.all(
texts.map(async (text) => {
const embedding = await ctx.getEmbeddingFor(text);
return Array.from(embedding.vector) as number[];
}),
);
return embeddings;
}
}
function isNodeLlamaCppMissing(err: unknown): boolean {
if (!(err instanceof Error)) return false;
const code = (err as Error & { code?: unknown }).code;
if (code === "ERR_MODULE_NOT_FOUND") {
return err.message.includes("node-llama-cpp");
}
return false;
}
function formatError(err: unknown): string {
if (err instanceof Error) return err.message;
return String(err);
}
function formatLocalSetupError(err: unknown): string {
const detail = formatError(err);
const missing = isNodeLlamaCppMissing(err);
return [
"Local embeddings unavailable.",
missing
? "Reason: optional dependency node-llama-cpp is missing (or failed to install)."
: detail
? `Reason: ${detail}`
: undefined,
missing && detail ? `Detail: ${detail}` : null,
"To enable local embeddings:",
"1) Use Node 22 LTS (recommended for installs/updates)",
missing
? "2) Reinstall OpenClaw (this should install node-llama-cpp): npm i -g openclaw@latest"
: null,
"3) If you use pnpm: pnpm approve-builds (select node-llama-cpp), then pnpm rebuild node-llama-cpp",
'Or set embedding.provider = "openai" (remote) in the plugin config.',
]
.filter(Boolean)
.join("\n");
}
// ============================================================================
// Factory function to create embedding provider
// ============================================================================
function createEmbeddingProvider(cfg: MemoryConfig): EmbeddingProviderInterface {
if (cfg.embedding.provider === "local") {
const modelPath = cfg.embedding.local?.modelPath || cfg.embedding.model!;
const modelCacheDir = cfg.embedding.local?.modelCacheDir;
return new LocalEmbeddings(modelPath, modelCacheDir);
}
// OpenAI provider (default)
if (!cfg.embedding.apiKey) {
throw new Error("embedding.apiKey is required for OpenAI provider");
}
return new OpenAIEmbeddings(cfg.embedding.apiKey, cfg.embedding.model!);
} }
// ============================================================================ // ============================================================================
@ -216,19 +355,23 @@ function detectCategory(text: string): MemoryCategory {
const memoryPlugin = { const memoryPlugin = {
id: "memory-lancedb", id: "memory-lancedb",
name: "Memory (LanceDB)", name: "Memory (LanceDB)",
description: "LanceDB-backed long-term memory with auto-recall/capture", description: "LanceDB-backed long-term memory with auto-recall/capture. Supports OpenAI and local embeddings.",
kind: "memory" as const, kind: "memory" as const,
configSchema: memoryConfigSchema, configSchema: memoryConfigSchema,
register(api: OpenClawPluginApi) { register(api: OpenClawPluginApi) {
const cfg = memoryConfigSchema.parse(api.pluginConfig); const cfg = memoryConfigSchema.parse(api.pluginConfig);
const resolvedDbPath = api.resolvePath(cfg.dbPath!); const resolvedDbPath = api.resolvePath(cfg.dbPath!);
const vectorDim = vectorDimsForModel(cfg.embedding.model ?? "text-embedding-3-small"); const vectorDim = vectorDimsForModel(cfg.embedding.model!, cfg.embedding.provider);
const db = new MemoryDB(resolvedDbPath, vectorDim); const db = new MemoryDB(resolvedDbPath, vectorDim);
const embeddings = new Embeddings(cfg.embedding.apiKey, cfg.embedding.model!); const embeddings = createEmbeddingProvider(cfg);
const providerInfo = cfg.embedding.provider === "local"
? `local (model: ${cfg.embedding.local?.modelPath || cfg.embedding.model})`
: `openai (model: ${cfg.embedding.model})`;
api.logger.info( api.logger.info(
`memory-lancedb: plugin registered (db: ${resolvedDbPath}, lazy init)`, `memory-lancedb: plugin registered (db: ${resolvedDbPath}, embeddings: ${providerInfo}, lazy init)`,
); );
// ======================================================================== // ========================================================================
@ -454,6 +597,9 @@ const memoryPlugin = {
.action(async () => { .action(async () => {
const count = await db.count(); const count = await db.count();
console.log(`Total memories: ${count}`); console.log(`Total memories: ${count}`);
console.log(`Embedding provider: ${cfg.embedding.provider}`);
console.log(`Embedding model: ${cfg.embedding.model}`);
console.log(`Vector dimensions: ${vectorDim}`);
}); });
}, },
{ commands: ["ltm"] }, { commands: ["ltm"] },
@ -577,7 +723,7 @@ const memoryPlugin = {
id: "memory-lancedb", id: "memory-lancedb",
start: () => { start: () => {
api.logger.info( api.logger.info(
`memory-lancedb: initialized (db: ${resolvedDbPath}, model: ${cfg.embedding.model})`, `memory-lancedb: initialized (db: ${resolvedDbPath}, embeddings: ${providerInfo})`,
); );
}, },
stop: () => { stop: () => {

View File

@ -2,12 +2,20 @@
"name": "@openclaw/memory-lancedb", "name": "@openclaw/memory-lancedb",
"version": "2026.1.29", "version": "2026.1.29",
"type": "module", "type": "module",
"description": "OpenClaw LanceDB-backed long-term memory plugin with auto-recall/capture", "description": "OpenClaw LanceDB-backed long-term memory plugin with auto-recall/capture. Supports OpenAI and local embeddings.",
"dependencies": { "dependencies": {
"@lancedb/lancedb": "^0.23.0", "@lancedb/lancedb": "^0.23.0",
"@sinclair/typebox": "0.34.47", "@sinclair/typebox": "0.34.47",
"openai": "^6.16.0" "openai": "^6.16.0"
}, },
"peerDependencies": {
"node-llama-cpp": ">=3.0.0"
},
"peerDependenciesMeta": {
"node-llama-cpp": {
"optional": true
}
},
"openclaw": { "openclaw": {
"extensions": [ "extensions": [
"./index.ts" "./index.ts"