Merge d395e40baf into 09be5d45d5

2026-01-30 10:50:04 -05:00 · 2026-01-30 10:50:04 -05:00 · fe0c6a4b2e
commit fe0c6a4b2e
parent 09be5d45d5 d395e40baf
4 changed files with 298 additions and 31 deletions
--- a/extensions/memory-lancedb/config.ts
+++ b/extensions/memory-lancedb/config.ts
@ -3,11 +3,18 @@ import fs from "node:fs";
 import { homedir } from "node:os";
 import { join } from "node:path";
 export type EmbeddingProvider = "openai" | "local";
 export type MemoryConfig = {
  embedding: {
-    provider: "openai";
+    provider: EmbeddingProvider;
    model?: string;
-    apiKey: string;
+    apiKey?: string;
    // Local embedding options
    local?: {
      modelPath?: string;
      modelCacheDir?: string;
    };
  };
  dbPath?: string;
  autoCapture?: boolean;
@ -17,7 +24,8 @@ export type MemoryConfig = {
 export const MEMORY_CATEGORIES = ["preference", "fact", "decision", "entity", "other"] as const;
 export type MemoryCategory = (typeof MEMORY_CATEGORIES)[number];
-const DEFAULT_MODEL = "text-embedding-3-small";
+const DEFAULT_OPENAI_MODEL = "text-embedding-3-small";
 const DEFAULT_LOCAL_MODEL = "hf:ggml-org/embeddinggemma-300M-GGUF/embeddinggemma-300M-Q8_0.gguf";
 const LEGACY_STATE_DIRS: string[] = [];
 function resolveDefaultDbPath(): string {
@ -43,11 +51,15 @@ function resolveDefaultDbPath(): string {
 const DEFAULT_DB_PATH = resolveDefaultDbPath();
-const EMBEDDING_DIMENSIONS: Record<string, number> = {
+// OpenAI embedding dimensions
 const OPENAI_EMBEDDING_DIMENSIONS: Record<string, number> = {
  "text-embedding-3-small": 1536,
  "text-embedding-3-large": 3072,
 };
 // Default dimension for local models (embeddinggemma-300M outputs 768-dim vectors)
 const DEFAULT_LOCAL_EMBEDDING_DIM = 768;
 function assertAllowedKeys(
  value: Record<string, unknown>,
  allowed: string[],
@ -58,10 +70,16 @@ function assertAllowedKeys(
  throw new Error(`${label} has unknown keys: ${unknown.join(", ")}`);
 }
-export function vectorDimsForModel(model: string): number {
+export function vectorDimsForModel(model: string, provider: EmbeddingProvider): number {
-  const dims = EMBEDDING_DIMENSIONS[model];
+  if (provider === "local") {
    // Local models have varying dimensions; default to embeddinggemma's 768
    // TODO: Could detect from model metadata in the future
    return DEFAULT_LOCAL_EMBEDDING_DIM;
  }
  const dims = OPENAI_EMBEDDING_DIMENSIONS[model];
  if (!dims) {
-    throw new Error(`Unsupported embedding model: ${model}`);
+    throw new Error(`Unsupported OpenAI embedding model: ${model}. Supported: ${Object.keys(OPENAI_EMBEDDING_DIMENSIONS).join(", ")}`);
  }
  return dims;
 }
@ -76,10 +94,17 @@ function resolveEnvVars(value: string): string {
  });
 }
-function resolveEmbeddingModel(embedding: Record<string, unknown>): string {
+function resolveEmbeddingModel(embedding: Record<string, unknown>, provider: EmbeddingProvider): string {
-  const model = typeof embedding.model === "string" ? embedding.model : DEFAULT_MODEL;
+  const model = typeof embedding.model === "string" ? embedding.model : undefined;
-  vectorDimsForModel(model);
+
-  return model;
+  if (provider === "local") {
    return model || DEFAULT_LOCAL_MODEL;
  }
  // OpenAI provider
  const resolvedModel = model || DEFAULT_OPENAI_MODEL;
  vectorDimsForModel(resolvedModel, provider); // Validate
  return resolvedModel;
 }
 export const memoryConfigSchema = {
@ -91,18 +116,38 @@ export const memoryConfigSchema = {
    assertAllowedKeys(cfg, ["embedding", "dbPath", "autoCapture", "autoRecall"], "memory config");
    const embedding = cfg.embedding as Record<string, unknown> | undefined;
-    if (!embedding || typeof embedding.apiKey !== "string") {
+    if (!embedding) {
-      throw new Error("embedding.apiKey is required");
+      throw new Error("embedding config is required");
    }
-    assertAllowedKeys(embedding, ["apiKey", "model"], "embedding config");
+    assertAllowedKeys(embedding, ["provider", "apiKey", "model", "local"], "embedding config");
-    const model = resolveEmbeddingModel(embedding);
+    // Determine provider (default to "openai" for backwards compatibility)
    const provider: EmbeddingProvider = embedding.provider === "local" ? "local" : "openai";
    // Validate apiKey requirement based on provider
    if (provider === "openai" && typeof embedding.apiKey !== "string") {
      throw new Error("embedding.apiKey is required when using OpenAI provider");
    }
    const model = resolveEmbeddingModel(embedding, provider);
    // Parse local config if present
    let localConfig: MemoryConfig["embedding"]["local"] | undefined;
    if (embedding.local && typeof embedding.local === "object") {
      const local = embedding.local as Record<string, unknown>;
      assertAllowedKeys(local, ["modelPath", "modelCacheDir"], "embedding.local config");
      localConfig = {
        modelPath: typeof local.modelPath === "string" ? local.modelPath : undefined,
        modelCacheDir: typeof local.modelCacheDir === "string" ? local.modelCacheDir : undefined,
      };
    }
    return {
      embedding: {
-        provider: "openai",
+        provider,
        model,
-        apiKey: resolveEnvVars(embedding.apiKey),
+        apiKey: typeof embedding.apiKey === "string" ? resolveEnvVars(embedding.apiKey) : undefined,
        local: localConfig,
      },
      dbPath: typeof cfg.dbPath === "string" ? cfg.dbPath : DEFAULT_DB_PATH,
      autoCapture: cfg.autoCapture !== false,
@ -110,16 +155,33 @@ export const memoryConfigSchema = {
    };
  },
  uiHints: {
    "embedding.provider": {
      label: "Embedding Provider",
      help: "Choose 'openai' for remote embeddings or 'local' for on-device embeddings using node-llama-cpp",
      options: ["openai", "local"],
    },
    "embedding.apiKey": {
      label: "OpenAI API Key",
      sensitive: true,
      placeholder: "sk-proj-...",
-      help: "API key for OpenAI embeddings (or use ${OPENAI_API_KEY})",
+      help: "API key for OpenAI embeddings (required if provider is 'openai', or use ${OPENAI_API_KEY})",
    },
    "embedding.model": {
      label: "Embedding Model",
-      placeholder: DEFAULT_MODEL,
+      placeholder: DEFAULT_OPENAI_MODEL,
-      help: "OpenAI embedding model to use",
+      help: "Model to use for embeddings. For OpenAI: text-embedding-3-small/large. For local: HuggingFace GGUF path.",
    },
    "embedding.local.modelPath": {
      label: "Local Model Path",
      placeholder: DEFAULT_LOCAL_MODEL,
      help: "Path to local GGUF embedding model (e.g., hf:ggml-org/embeddinggemma-300M-GGUF/embeddinggemma-300M-Q8_0.gguf)",
      advanced: true,
    },
    "embedding.local.modelCacheDir": {
      label: "Model Cache Directory",
      placeholder: "~/.cache/node-llama-cpp",
      help: "Directory to cache downloaded models",
      advanced: true,
    },
    dbPath: {
      label: "Database Path",
--- a/extensions/memory-lancedb/index.test.ts
+++ b/extensions/memory-lancedb/index.test.ts
@ -81,7 +81,7 @@ describe("memory plugin e2e", () => {
    delete process.env.TEST_MEMORY_API_KEY;
  });
-  test("config schema rejects missing apiKey", async () => {
+  test("config schema rejects missing apiKey for OpenAI provider", async () => {
    const { default: memoryPlugin } = await import("./index.js");
    expect(() => {
@ -89,7 +89,58 @@ describe("memory plugin e2e", () => {
        embedding: {},
        dbPath,
      });
-    }).toThrow("embedding.apiKey is required");
+    }).toThrow("embedding.apiKey is required when using OpenAI provider");
  });
  test("config schema accepts local provider without apiKey", async () => {
    const { default: memoryPlugin } = await import("./index.js");
    const config = memoryPlugin.configSchema?.parse?.({
      embedding: {
        provider: "local",
      },
      dbPath,
    });
    expect(config).toBeDefined();
    expect(config?.embedding?.provider).toBe("local");
    expect(config?.embedding?.apiKey).toBeUndefined();
    // Should use default local model
    expect(config?.embedding?.model).toContain("embeddinggemma");
  });
  test("config schema accepts local provider with custom model path", async () => {
    const { default: memoryPlugin } = await import("./index.js");
    const config = memoryPlugin.configSchema?.parse?.({
      embedding: {
        provider: "local",
        local: {
          modelPath: "hf:custom/model.gguf",
          modelCacheDir: "/tmp/models",
        },
      },
      dbPath,
    });
    expect(config).toBeDefined();
    expect(config?.embedding?.provider).toBe("local");
    expect(config?.embedding?.local?.modelPath).toBe("hf:custom/model.gguf");
    expect(config?.embedding?.local?.modelCacheDir).toBe("/tmp/models");
  });
  test("config schema defaults to OpenAI provider for backwards compatibility", async () => {
    const { default: memoryPlugin } = await import("./index.js");
    const config = memoryPlugin.configSchema?.parse?.({
      embedding: {
        apiKey: OPENAI_API_KEY,
      },
      dbPath,
    });
    expect(config).toBeDefined();
    expect(config?.embedding?.provider).toBe("openai");
  });
  test("shouldCapture filters correctly", async () => {
--- a/extensions/memory-lancedb/index.ts
+++ b/extensions/memory-lancedb/index.ts
@ -2,7 +2,7 @@
 * OpenClaw Memory (LanceDB) Plugin
 *
 * Long-term memory with vector search for AI conversations.
- * Uses LanceDB for storage and OpenAI for embeddings.
+ * Uses LanceDB for storage and supports both OpenAI and local embeddings.
 * Provides seamless auto-recall and auto-capture via lifecycle hooks.
 */
@ -16,6 +16,8 @@ import { stringEnum } from "openclaw/plugin-sdk";
 import {
  MEMORY_CATEGORIES,
  type MemoryCategory,
  type MemoryConfig,
  type EmbeddingProvider as EmbeddingProviderType,
  memoryConfigSchema,
  vectorDimsForModel,
 } from "./config.js";
@ -38,6 +40,12 @@ type MemorySearchResult = {
  score: number;
 };
 // Embedding provider interface
 interface EmbeddingProviderInterface {
  embed(text: string): Promise<number[]>;
  embedBatch?(texts: string[]): Promise<number[][]>;
 }
 // ============================================================================
 // LanceDB Provider
 // ============================================================================
@ -147,10 +155,10 @@ class MemoryDB {
 }
 // ============================================================================
-// OpenAI Embeddings
+// OpenAI Embeddings Provider
 // ============================================================================
-class Embeddings {
+class OpenAIEmbeddings implements EmbeddingProviderInterface {
  private client: OpenAI;
  constructor(
@ -167,6 +175,137 @@ class Embeddings {
    });
    return response.data[0].embedding;
  }
  async embedBatch(texts: string[]): Promise<number[][]> {
    if (texts.length === 0) return [];
    const response = await this.client.embeddings.create({
      model: this.model,
      input: texts,
    });
    return response.data.map((d) => d.embedding);
  }
 }
 // ============================================================================
 // Local Embeddings Provider (node-llama-cpp)
 // ============================================================================
 // Lazy-load node-llama-cpp types
 type Llama = import("node-llama-cpp").Llama;
 type LlamaModel = import("node-llama-cpp").LlamaModel;
 type LlamaEmbeddingContext = import("node-llama-cpp").LlamaEmbeddingContext;
 async function importNodeLlamaCpp() {
  return import("node-llama-cpp");
 }
 class LocalEmbeddings implements EmbeddingProviderInterface {
  private llama: Llama | null = null;
  private embeddingModel: LlamaModel | null = null;
  private embeddingContext: LlamaEmbeddingContext | null = null;
  private initPromise: Promise<void> | null = null;
  constructor(
    private modelPath: string,
    private modelCacheDir?: string,
  ) {}
  private async ensureInitialized(): Promise<LlamaEmbeddingContext> {
    if (this.embeddingContext) return this.embeddingContext;
    if (this.initPromise) {
      await this.initPromise;
      return this.embeddingContext!;
    }
    this.initPromise = this.doInitialize();
    await this.initPromise;
    return this.embeddingContext!;
  }
  private async doInitialize(): Promise<void> {
    try {
      const { getLlama, resolveModelFile, LlamaLogLevel } = await importNodeLlamaCpp();
      this.llama = await getLlama({ logLevel: LlamaLogLevel.error });
      const resolvedPath = await resolveModelFile(this.modelPath, this.modelCacheDir || undefined);
      this.embeddingModel = await this.llama.loadModel({ modelPath: resolvedPath });
      this.embeddingContext = await this.embeddingModel.createEmbeddingContext();
    } catch (err) {
      throw new Error(formatLocalSetupError(err));
    }
  }
  async embed(text: string): Promise<number[]> {
    const ctx = await this.ensureInitialized();
    const embedding = await ctx.getEmbeddingFor(text);
    return Array.from(embedding.vector) as number[];
  }
  async embedBatch(texts: string[]): Promise<number[][]> {
    const ctx = await this.ensureInitialized();
    const embeddings = await Promise.all(
      texts.map(async (text) => {
        const embedding = await ctx.getEmbeddingFor(text);
        return Array.from(embedding.vector) as number[];
      }),
    );
    return embeddings;
  }
 }
 function isNodeLlamaCppMissing(err: unknown): boolean {
  if (!(err instanceof Error)) return false;
  const code = (err as Error & { code?: unknown }).code;
  if (code === "ERR_MODULE_NOT_FOUND") {
    return err.message.includes("node-llama-cpp");
  }
  return false;
 }
 function formatError(err: unknown): string {
  if (err instanceof Error) return err.message;
  return String(err);
 }
 function formatLocalSetupError(err: unknown): string {
  const detail = formatError(err);
  const missing = isNodeLlamaCppMissing(err);
  return [
    "Local embeddings unavailable.",
    missing
      ? "Reason: optional dependency node-llama-cpp is missing (or failed to install)."
      : detail
        ? `Reason: ${detail}`
        : undefined,
    missing && detail ? `Detail: ${detail}` : null,
    "To enable local embeddings:",
    "1) Use Node 22 LTS (recommended for installs/updates)",
    missing
      ? "2) Reinstall OpenClaw (this should install node-llama-cpp): npm i -g openclaw@latest"
      : null,
    "3) If you use pnpm: pnpm approve-builds (select node-llama-cpp), then pnpm rebuild node-llama-cpp",
    'Or set embedding.provider = "openai" (remote) in the plugin config.',
  ]
    .filter(Boolean)
    .join("\n");
 }
 // ============================================================================
 // Factory function to create embedding provider
 // ============================================================================
 function createEmbeddingProvider(cfg: MemoryConfig): EmbeddingProviderInterface {
  if (cfg.embedding.provider === "local") {
    const modelPath = cfg.embedding.local?.modelPath || cfg.embedding.model!;
    const modelCacheDir = cfg.embedding.local?.modelCacheDir;
    return new LocalEmbeddings(modelPath, modelCacheDir);
  }
  // OpenAI provider (default)
  if (!cfg.embedding.apiKey) {
    throw new Error("embedding.apiKey is required for OpenAI provider");
  }
  return new OpenAIEmbeddings(cfg.embedding.apiKey, cfg.embedding.model!);
 }
 // ============================================================================
@ -216,19 +355,23 @@ function detectCategory(text: string): MemoryCategory {
 const memoryPlugin = {
  id: "memory-lancedb",
  name: "Memory (LanceDB)",
-  description: "LanceDB-backed long-term memory with auto-recall/capture",
+  description: "LanceDB-backed long-term memory with auto-recall/capture. Supports OpenAI and local embeddings.",
  kind: "memory" as const,
  configSchema: memoryConfigSchema,
  register(api: OpenClawPluginApi) {
    const cfg = memoryConfigSchema.parse(api.pluginConfig);
    const resolvedDbPath = api.resolvePath(cfg.dbPath!);
-    const vectorDim = vectorDimsForModel(cfg.embedding.model ?? "text-embedding-3-small");
+    const vectorDim = vectorDimsForModel(cfg.embedding.model!, cfg.embedding.provider);
    const db = new MemoryDB(resolvedDbPath, vectorDim);
-    const embeddings = new Embeddings(cfg.embedding.apiKey, cfg.embedding.model!);
+    const embeddings = createEmbeddingProvider(cfg);
    const providerInfo = cfg.embedding.provider === "local"
      ? `local (model: ${cfg.embedding.local?.modelPath || cfg.embedding.model})`
      : `openai (model: ${cfg.embedding.model})`;
    api.logger.info(
-      `memory-lancedb: plugin registered (db: ${resolvedDbPath}, lazy init)`,
+      `memory-lancedb: plugin registered (db: ${resolvedDbPath}, embeddings: ${providerInfo}, lazy init)`,
    );
    // ========================================================================
@ -454,6 +597,9 @@ const memoryPlugin = {
          .action(async () => {
            const count = await db.count();
            console.log(`Total memories: ${count}`);
            console.log(`Embedding provider: ${cfg.embedding.provider}`);
            console.log(`Embedding model: ${cfg.embedding.model}`);
            console.log(`Vector dimensions: ${vectorDim}`);
          });
      },
      { commands: ["ltm"] },
@ -577,7 +723,7 @@ const memoryPlugin = {
      id: "memory-lancedb",
      start: () => {
        api.logger.info(
-          `memory-lancedb: initialized (db: ${resolvedDbPath}, model: ${cfg.embedding.model})`,
+          `memory-lancedb: initialized (db: ${resolvedDbPath}, embeddings: ${providerInfo})`,
        );
      },
      stop: () => {
--- a/extensions/memory-lancedb/package.json
+++ b/extensions/memory-lancedb/package.json
@ -2,12 +2,20 @@
  "name": "@openclaw/memory-lancedb",
  "version": "2026.1.29",
  "type": "module",
-  "description": "OpenClaw LanceDB-backed long-term memory plugin with auto-recall/capture",
+  "description": "OpenClaw LanceDB-backed long-term memory plugin with auto-recall/capture. Supports OpenAI and local embeddings.",
  "dependencies": {
    "@lancedb/lancedb": "^0.23.0",
    "@sinclair/typebox": "0.34.47",
    "openai": "^6.16.0"
  },
  "peerDependencies": {
    "node-llama-cpp": ">=3.0.0"
  },
  "peerDependenciesMeta": {
    "node-llama-cpp": {
      "optional": true
    }
  },
  "openclaw": {
    "extensions": [
      "./index.ts"