Merge d395e40baf into 09be5d45d5
This commit is contained in:
commit
fe0c6a4b2e
@ -3,11 +3,18 @@ import fs from "node:fs";
|
||||
import { homedir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
|
||||
export type EmbeddingProvider = "openai" | "local";
|
||||
|
||||
export type MemoryConfig = {
|
||||
embedding: {
|
||||
provider: "openai";
|
||||
provider: EmbeddingProvider;
|
||||
model?: string;
|
||||
apiKey: string;
|
||||
apiKey?: string;
|
||||
// Local embedding options
|
||||
local?: {
|
||||
modelPath?: string;
|
||||
modelCacheDir?: string;
|
||||
};
|
||||
};
|
||||
dbPath?: string;
|
||||
autoCapture?: boolean;
|
||||
@ -17,7 +24,8 @@ export type MemoryConfig = {
|
||||
export const MEMORY_CATEGORIES = ["preference", "fact", "decision", "entity", "other"] as const;
|
||||
export type MemoryCategory = (typeof MEMORY_CATEGORIES)[number];
|
||||
|
||||
const DEFAULT_MODEL = "text-embedding-3-small";
|
||||
const DEFAULT_OPENAI_MODEL = "text-embedding-3-small";
|
||||
const DEFAULT_LOCAL_MODEL = "hf:ggml-org/embeddinggemma-300M-GGUF/embeddinggemma-300M-Q8_0.gguf";
|
||||
const LEGACY_STATE_DIRS: string[] = [];
|
||||
|
||||
function resolveDefaultDbPath(): string {
|
||||
@ -43,11 +51,15 @@ function resolveDefaultDbPath(): string {
|
||||
|
||||
const DEFAULT_DB_PATH = resolveDefaultDbPath();
|
||||
|
||||
const EMBEDDING_DIMENSIONS: Record<string, number> = {
|
||||
// OpenAI embedding dimensions
|
||||
const OPENAI_EMBEDDING_DIMENSIONS: Record<string, number> = {
|
||||
"text-embedding-3-small": 1536,
|
||||
"text-embedding-3-large": 3072,
|
||||
};
|
||||
|
||||
// Default dimension for local models (embeddinggemma-300M outputs 768-dim vectors)
|
||||
const DEFAULT_LOCAL_EMBEDDING_DIM = 768;
|
||||
|
||||
function assertAllowedKeys(
|
||||
value: Record<string, unknown>,
|
||||
allowed: string[],
|
||||
@ -58,10 +70,16 @@ function assertAllowedKeys(
|
||||
throw new Error(`${label} has unknown keys: ${unknown.join(", ")}`);
|
||||
}
|
||||
|
||||
export function vectorDimsForModel(model: string): number {
|
||||
const dims = EMBEDDING_DIMENSIONS[model];
|
||||
export function vectorDimsForModel(model: string, provider: EmbeddingProvider): number {
|
||||
if (provider === "local") {
|
||||
// Local models have varying dimensions; default to embeddinggemma's 768
|
||||
// TODO: Could detect from model metadata in the future
|
||||
return DEFAULT_LOCAL_EMBEDDING_DIM;
|
||||
}
|
||||
|
||||
const dims = OPENAI_EMBEDDING_DIMENSIONS[model];
|
||||
if (!dims) {
|
||||
throw new Error(`Unsupported embedding model: ${model}`);
|
||||
throw new Error(`Unsupported OpenAI embedding model: ${model}. Supported: ${Object.keys(OPENAI_EMBEDDING_DIMENSIONS).join(", ")}`);
|
||||
}
|
||||
return dims;
|
||||
}
|
||||
@ -76,10 +94,17 @@ function resolveEnvVars(value: string): string {
|
||||
});
|
||||
}
|
||||
|
||||
function resolveEmbeddingModel(embedding: Record<string, unknown>): string {
|
||||
const model = typeof embedding.model === "string" ? embedding.model : DEFAULT_MODEL;
|
||||
vectorDimsForModel(model);
|
||||
return model;
|
||||
function resolveEmbeddingModel(embedding: Record<string, unknown>, provider: EmbeddingProvider): string {
|
||||
const model = typeof embedding.model === "string" ? embedding.model : undefined;
|
||||
|
||||
if (provider === "local") {
|
||||
return model || DEFAULT_LOCAL_MODEL;
|
||||
}
|
||||
|
||||
// OpenAI provider
|
||||
const resolvedModel = model || DEFAULT_OPENAI_MODEL;
|
||||
vectorDimsForModel(resolvedModel, provider); // Validate
|
||||
return resolvedModel;
|
||||
}
|
||||
|
||||
export const memoryConfigSchema = {
|
||||
@ -91,18 +116,38 @@ export const memoryConfigSchema = {
|
||||
assertAllowedKeys(cfg, ["embedding", "dbPath", "autoCapture", "autoRecall"], "memory config");
|
||||
|
||||
const embedding = cfg.embedding as Record<string, unknown> | undefined;
|
||||
if (!embedding || typeof embedding.apiKey !== "string") {
|
||||
throw new Error("embedding.apiKey is required");
|
||||
if (!embedding) {
|
||||
throw new Error("embedding config is required");
|
||||
}
|
||||
assertAllowedKeys(embedding, ["apiKey", "model"], "embedding config");
|
||||
assertAllowedKeys(embedding, ["provider", "apiKey", "model", "local"], "embedding config");
|
||||
|
||||
const model = resolveEmbeddingModel(embedding);
|
||||
// Determine provider (default to "openai" for backwards compatibility)
|
||||
const provider: EmbeddingProvider = embedding.provider === "local" ? "local" : "openai";
|
||||
|
||||
// Validate apiKey requirement based on provider
|
||||
if (provider === "openai" && typeof embedding.apiKey !== "string") {
|
||||
throw new Error("embedding.apiKey is required when using OpenAI provider");
|
||||
}
|
||||
|
||||
const model = resolveEmbeddingModel(embedding, provider);
|
||||
|
||||
// Parse local config if present
|
||||
let localConfig: MemoryConfig["embedding"]["local"] | undefined;
|
||||
if (embedding.local && typeof embedding.local === "object") {
|
||||
const local = embedding.local as Record<string, unknown>;
|
||||
assertAllowedKeys(local, ["modelPath", "modelCacheDir"], "embedding.local config");
|
||||
localConfig = {
|
||||
modelPath: typeof local.modelPath === "string" ? local.modelPath : undefined,
|
||||
modelCacheDir: typeof local.modelCacheDir === "string" ? local.modelCacheDir : undefined,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
embedding: {
|
||||
provider: "openai",
|
||||
provider,
|
||||
model,
|
||||
apiKey: resolveEnvVars(embedding.apiKey),
|
||||
apiKey: typeof embedding.apiKey === "string" ? resolveEnvVars(embedding.apiKey) : undefined,
|
||||
local: localConfig,
|
||||
},
|
||||
dbPath: typeof cfg.dbPath === "string" ? cfg.dbPath : DEFAULT_DB_PATH,
|
||||
autoCapture: cfg.autoCapture !== false,
|
||||
@ -110,16 +155,33 @@ export const memoryConfigSchema = {
|
||||
};
|
||||
},
|
||||
uiHints: {
|
||||
"embedding.provider": {
|
||||
label: "Embedding Provider",
|
||||
help: "Choose 'openai' for remote embeddings or 'local' for on-device embeddings using node-llama-cpp",
|
||||
options: ["openai", "local"],
|
||||
},
|
||||
"embedding.apiKey": {
|
||||
label: "OpenAI API Key",
|
||||
sensitive: true,
|
||||
placeholder: "sk-proj-...",
|
||||
help: "API key for OpenAI embeddings (or use ${OPENAI_API_KEY})",
|
||||
help: "API key for OpenAI embeddings (required if provider is 'openai', or use ${OPENAI_API_KEY})",
|
||||
},
|
||||
"embedding.model": {
|
||||
label: "Embedding Model",
|
||||
placeholder: DEFAULT_MODEL,
|
||||
help: "OpenAI embedding model to use",
|
||||
placeholder: DEFAULT_OPENAI_MODEL,
|
||||
help: "Model to use for embeddings. For OpenAI: text-embedding-3-small/large. For local: HuggingFace GGUF path.",
|
||||
},
|
||||
"embedding.local.modelPath": {
|
||||
label: "Local Model Path",
|
||||
placeholder: DEFAULT_LOCAL_MODEL,
|
||||
help: "Path to local GGUF embedding model (e.g., hf:ggml-org/embeddinggemma-300M-GGUF/embeddinggemma-300M-Q8_0.gguf)",
|
||||
advanced: true,
|
||||
},
|
||||
"embedding.local.modelCacheDir": {
|
||||
label: "Model Cache Directory",
|
||||
placeholder: "~/.cache/node-llama-cpp",
|
||||
help: "Directory to cache downloaded models",
|
||||
advanced: true,
|
||||
},
|
||||
dbPath: {
|
||||
label: "Database Path",
|
||||
|
||||
@ -81,7 +81,7 @@ describe("memory plugin e2e", () => {
|
||||
delete process.env.TEST_MEMORY_API_KEY;
|
||||
});
|
||||
|
||||
test("config schema rejects missing apiKey", async () => {
|
||||
test("config schema rejects missing apiKey for OpenAI provider", async () => {
|
||||
const { default: memoryPlugin } = await import("./index.js");
|
||||
|
||||
expect(() => {
|
||||
@ -89,7 +89,58 @@ describe("memory plugin e2e", () => {
|
||||
embedding: {},
|
||||
dbPath,
|
||||
});
|
||||
}).toThrow("embedding.apiKey is required");
|
||||
}).toThrow("embedding.apiKey is required when using OpenAI provider");
|
||||
});
|
||||
|
||||
test("config schema accepts local provider without apiKey", async () => {
|
||||
const { default: memoryPlugin } = await import("./index.js");
|
||||
|
||||
const config = memoryPlugin.configSchema?.parse?.({
|
||||
embedding: {
|
||||
provider: "local",
|
||||
},
|
||||
dbPath,
|
||||
});
|
||||
|
||||
expect(config).toBeDefined();
|
||||
expect(config?.embedding?.provider).toBe("local");
|
||||
expect(config?.embedding?.apiKey).toBeUndefined();
|
||||
// Should use default local model
|
||||
expect(config?.embedding?.model).toContain("embeddinggemma");
|
||||
});
|
||||
|
||||
test("config schema accepts local provider with custom model path", async () => {
|
||||
const { default: memoryPlugin } = await import("./index.js");
|
||||
|
||||
const config = memoryPlugin.configSchema?.parse?.({
|
||||
embedding: {
|
||||
provider: "local",
|
||||
local: {
|
||||
modelPath: "hf:custom/model.gguf",
|
||||
modelCacheDir: "/tmp/models",
|
||||
},
|
||||
},
|
||||
dbPath,
|
||||
});
|
||||
|
||||
expect(config).toBeDefined();
|
||||
expect(config?.embedding?.provider).toBe("local");
|
||||
expect(config?.embedding?.local?.modelPath).toBe("hf:custom/model.gguf");
|
||||
expect(config?.embedding?.local?.modelCacheDir).toBe("/tmp/models");
|
||||
});
|
||||
|
||||
test("config schema defaults to OpenAI provider for backwards compatibility", async () => {
|
||||
const { default: memoryPlugin } = await import("./index.js");
|
||||
|
||||
const config = memoryPlugin.configSchema?.parse?.({
|
||||
embedding: {
|
||||
apiKey: OPENAI_API_KEY,
|
||||
},
|
||||
dbPath,
|
||||
});
|
||||
|
||||
expect(config).toBeDefined();
|
||||
expect(config?.embedding?.provider).toBe("openai");
|
||||
});
|
||||
|
||||
test("shouldCapture filters correctly", async () => {
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
* OpenClaw Memory (LanceDB) Plugin
|
||||
*
|
||||
* Long-term memory with vector search for AI conversations.
|
||||
* Uses LanceDB for storage and OpenAI for embeddings.
|
||||
* Uses LanceDB for storage and supports both OpenAI and local embeddings.
|
||||
* Provides seamless auto-recall and auto-capture via lifecycle hooks.
|
||||
*/
|
||||
|
||||
@ -16,6 +16,8 @@ import { stringEnum } from "openclaw/plugin-sdk";
|
||||
import {
|
||||
MEMORY_CATEGORIES,
|
||||
type MemoryCategory,
|
||||
type MemoryConfig,
|
||||
type EmbeddingProvider as EmbeddingProviderType,
|
||||
memoryConfigSchema,
|
||||
vectorDimsForModel,
|
||||
} from "./config.js";
|
||||
@ -38,6 +40,12 @@ type MemorySearchResult = {
|
||||
score: number;
|
||||
};
|
||||
|
||||
// Embedding provider interface
|
||||
interface EmbeddingProviderInterface {
|
||||
embed(text: string): Promise<number[]>;
|
||||
embedBatch?(texts: string[]): Promise<number[][]>;
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// LanceDB Provider
|
||||
// ============================================================================
|
||||
@ -147,10 +155,10 @@ class MemoryDB {
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// OpenAI Embeddings
|
||||
// OpenAI Embeddings Provider
|
||||
// ============================================================================
|
||||
|
||||
class Embeddings {
|
||||
class OpenAIEmbeddings implements EmbeddingProviderInterface {
|
||||
private client: OpenAI;
|
||||
|
||||
constructor(
|
||||
@ -167,6 +175,137 @@ class Embeddings {
|
||||
});
|
||||
return response.data[0].embedding;
|
||||
}
|
||||
|
||||
async embedBatch(texts: string[]): Promise<number[][]> {
|
||||
if (texts.length === 0) return [];
|
||||
const response = await this.client.embeddings.create({
|
||||
model: this.model,
|
||||
input: texts,
|
||||
});
|
||||
return response.data.map((d) => d.embedding);
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Local Embeddings Provider (node-llama-cpp)
|
||||
// ============================================================================
|
||||
|
||||
// Lazy-load node-llama-cpp types
|
||||
type Llama = import("node-llama-cpp").Llama;
|
||||
type LlamaModel = import("node-llama-cpp").LlamaModel;
|
||||
type LlamaEmbeddingContext = import("node-llama-cpp").LlamaEmbeddingContext;
|
||||
|
||||
async function importNodeLlamaCpp() {
|
||||
return import("node-llama-cpp");
|
||||
}
|
||||
|
||||
class LocalEmbeddings implements EmbeddingProviderInterface {
|
||||
private llama: Llama | null = null;
|
||||
private embeddingModel: LlamaModel | null = null;
|
||||
private embeddingContext: LlamaEmbeddingContext | null = null;
|
||||
private initPromise: Promise<void> | null = null;
|
||||
|
||||
constructor(
|
||||
private modelPath: string,
|
||||
private modelCacheDir?: string,
|
||||
) {}
|
||||
|
||||
private async ensureInitialized(): Promise<LlamaEmbeddingContext> {
|
||||
if (this.embeddingContext) return this.embeddingContext;
|
||||
if (this.initPromise) {
|
||||
await this.initPromise;
|
||||
return this.embeddingContext!;
|
||||
}
|
||||
|
||||
this.initPromise = this.doInitialize();
|
||||
await this.initPromise;
|
||||
return this.embeddingContext!;
|
||||
}
|
||||
|
||||
private async doInitialize(): Promise<void> {
|
||||
try {
|
||||
const { getLlama, resolveModelFile, LlamaLogLevel } = await importNodeLlamaCpp();
|
||||
|
||||
this.llama = await getLlama({ logLevel: LlamaLogLevel.error });
|
||||
const resolvedPath = await resolveModelFile(this.modelPath, this.modelCacheDir || undefined);
|
||||
this.embeddingModel = await this.llama.loadModel({ modelPath: resolvedPath });
|
||||
this.embeddingContext = await this.embeddingModel.createEmbeddingContext();
|
||||
} catch (err) {
|
||||
throw new Error(formatLocalSetupError(err));
|
||||
}
|
||||
}
|
||||
|
||||
async embed(text: string): Promise<number[]> {
|
||||
const ctx = await this.ensureInitialized();
|
||||
const embedding = await ctx.getEmbeddingFor(text);
|
||||
return Array.from(embedding.vector) as number[];
|
||||
}
|
||||
|
||||
async embedBatch(texts: string[]): Promise<number[][]> {
|
||||
const ctx = await this.ensureInitialized();
|
||||
const embeddings = await Promise.all(
|
||||
texts.map(async (text) => {
|
||||
const embedding = await ctx.getEmbeddingFor(text);
|
||||
return Array.from(embedding.vector) as number[];
|
||||
}),
|
||||
);
|
||||
return embeddings;
|
||||
}
|
||||
}
|
||||
|
||||
function isNodeLlamaCppMissing(err: unknown): boolean {
|
||||
if (!(err instanceof Error)) return false;
|
||||
const code = (err as Error & { code?: unknown }).code;
|
||||
if (code === "ERR_MODULE_NOT_FOUND") {
|
||||
return err.message.includes("node-llama-cpp");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
function formatError(err: unknown): string {
|
||||
if (err instanceof Error) return err.message;
|
||||
return String(err);
|
||||
}
|
||||
|
||||
function formatLocalSetupError(err: unknown): string {
|
||||
const detail = formatError(err);
|
||||
const missing = isNodeLlamaCppMissing(err);
|
||||
return [
|
||||
"Local embeddings unavailable.",
|
||||
missing
|
||||
? "Reason: optional dependency node-llama-cpp is missing (or failed to install)."
|
||||
: detail
|
||||
? `Reason: ${detail}`
|
||||
: undefined,
|
||||
missing && detail ? `Detail: ${detail}` : null,
|
||||
"To enable local embeddings:",
|
||||
"1) Use Node 22 LTS (recommended for installs/updates)",
|
||||
missing
|
||||
? "2) Reinstall OpenClaw (this should install node-llama-cpp): npm i -g openclaw@latest"
|
||||
: null,
|
||||
"3) If you use pnpm: pnpm approve-builds (select node-llama-cpp), then pnpm rebuild node-llama-cpp",
|
||||
'Or set embedding.provider = "openai" (remote) in the plugin config.',
|
||||
]
|
||||
.filter(Boolean)
|
||||
.join("\n");
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Factory function to create embedding provider
|
||||
// ============================================================================
|
||||
|
||||
function createEmbeddingProvider(cfg: MemoryConfig): EmbeddingProviderInterface {
|
||||
if (cfg.embedding.provider === "local") {
|
||||
const modelPath = cfg.embedding.local?.modelPath || cfg.embedding.model!;
|
||||
const modelCacheDir = cfg.embedding.local?.modelCacheDir;
|
||||
return new LocalEmbeddings(modelPath, modelCacheDir);
|
||||
}
|
||||
|
||||
// OpenAI provider (default)
|
||||
if (!cfg.embedding.apiKey) {
|
||||
throw new Error("embedding.apiKey is required for OpenAI provider");
|
||||
}
|
||||
return new OpenAIEmbeddings(cfg.embedding.apiKey, cfg.embedding.model!);
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
@ -216,19 +355,23 @@ function detectCategory(text: string): MemoryCategory {
|
||||
const memoryPlugin = {
|
||||
id: "memory-lancedb",
|
||||
name: "Memory (LanceDB)",
|
||||
description: "LanceDB-backed long-term memory with auto-recall/capture",
|
||||
description: "LanceDB-backed long-term memory with auto-recall/capture. Supports OpenAI and local embeddings.",
|
||||
kind: "memory" as const,
|
||||
configSchema: memoryConfigSchema,
|
||||
|
||||
register(api: OpenClawPluginApi) {
|
||||
const cfg = memoryConfigSchema.parse(api.pluginConfig);
|
||||
const resolvedDbPath = api.resolvePath(cfg.dbPath!);
|
||||
const vectorDim = vectorDimsForModel(cfg.embedding.model ?? "text-embedding-3-small");
|
||||
const vectorDim = vectorDimsForModel(cfg.embedding.model!, cfg.embedding.provider);
|
||||
const db = new MemoryDB(resolvedDbPath, vectorDim);
|
||||
const embeddings = new Embeddings(cfg.embedding.apiKey, cfg.embedding.model!);
|
||||
const embeddings = createEmbeddingProvider(cfg);
|
||||
|
||||
const providerInfo = cfg.embedding.provider === "local"
|
||||
? `local (model: ${cfg.embedding.local?.modelPath || cfg.embedding.model})`
|
||||
: `openai (model: ${cfg.embedding.model})`;
|
||||
|
||||
api.logger.info(
|
||||
`memory-lancedb: plugin registered (db: ${resolvedDbPath}, lazy init)`,
|
||||
`memory-lancedb: plugin registered (db: ${resolvedDbPath}, embeddings: ${providerInfo}, lazy init)`,
|
||||
);
|
||||
|
||||
// ========================================================================
|
||||
@ -454,6 +597,9 @@ const memoryPlugin = {
|
||||
.action(async () => {
|
||||
const count = await db.count();
|
||||
console.log(`Total memories: ${count}`);
|
||||
console.log(`Embedding provider: ${cfg.embedding.provider}`);
|
||||
console.log(`Embedding model: ${cfg.embedding.model}`);
|
||||
console.log(`Vector dimensions: ${vectorDim}`);
|
||||
});
|
||||
},
|
||||
{ commands: ["ltm"] },
|
||||
@ -577,7 +723,7 @@ const memoryPlugin = {
|
||||
id: "memory-lancedb",
|
||||
start: () => {
|
||||
api.logger.info(
|
||||
`memory-lancedb: initialized (db: ${resolvedDbPath}, model: ${cfg.embedding.model})`,
|
||||
`memory-lancedb: initialized (db: ${resolvedDbPath}, embeddings: ${providerInfo})`,
|
||||
);
|
||||
},
|
||||
stop: () => {
|
||||
|
||||
@ -2,12 +2,20 @@
|
||||
"name": "@openclaw/memory-lancedb",
|
||||
"version": "2026.1.29",
|
||||
"type": "module",
|
||||
"description": "OpenClaw LanceDB-backed long-term memory plugin with auto-recall/capture",
|
||||
"description": "OpenClaw LanceDB-backed long-term memory plugin with auto-recall/capture. Supports OpenAI and local embeddings.",
|
||||
"dependencies": {
|
||||
"@lancedb/lancedb": "^0.23.0",
|
||||
"@sinclair/typebox": "0.34.47",
|
||||
"openai": "^6.16.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"node-llama-cpp": ">=3.0.0"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"node-llama-cpp": {
|
||||
"optional": true
|
||||
}
|
||||
},
|
||||
"openclaw": {
|
||||
"extensions": [
|
||||
"./index.ts"
|
||||
|
||||
Loading…
Reference in New Issue
Block a user