openclaw/extensions/memory-lancedb/config.ts
Mike Nott d2b1dde73b feat(memory-lancedb): support custom embedding endpoints
Add support for self-hosted OpenAI-compatible embedding servers:

- Add `embedding.baseUrl` config option for custom endpoint URL
- Add `embedding.dimensions` config option to override vector dimensions
- Remove model enum restriction to allow any model name
- Update Embeddings class to pass baseURL to OpenAI client

This enables users to run local embedding models (e.g., via llama.cpp,
text-embeddings-inference, or other OpenAI-compatible servers) instead
of requiring the OpenAI API.

Example config:
```json
{
  "embedding": {
    "apiKey": "not-needed",
    "baseUrl": "http://localhost:8080/v1",
    "model": "my-local-model",
    "dimensions": 4096
  }
}
```
2026-01-28 15:06:17 +00:00

140 lines
4.4 KiB
TypeScript

import { Type } from "@sinclair/typebox";
import { homedir } from "node:os";
import { join } from "node:path";
export type MemoryConfig = {
embedding: {
provider: "openai";
model?: string;
apiKey: string;
baseUrl?: string; // Custom endpoint URL (for local/self-hosted embeddings)
dimensions?: number; // Override vector dimensions
};
dbPath?: string;
autoCapture?: boolean;
autoRecall?: boolean;
};
export const MEMORY_CATEGORIES = ["preference", "fact", "decision", "entity", "other"] as const;
export type MemoryCategory = (typeof MEMORY_CATEGORIES)[number];
const DEFAULT_MODEL = "text-embedding-3-small";
const DEFAULT_DB_PATH = join(homedir(), ".clawdbot", "memory", "lancedb");
const EMBEDDING_DIMENSIONS: Record<string, number> = {
"text-embedding-3-small": 1536,
"text-embedding-3-large": 3072,
};
function assertAllowedKeys(
value: Record<string, unknown>,
allowed: string[],
label: string,
) {
const unknown = Object.keys(value).filter((key) => !allowed.includes(key));
if (unknown.length === 0) return;
throw new Error(`${label} has unknown keys: ${unknown.join(", ")}`);
}
export function vectorDimsForModel(model: string, customDims?: number): number {
// Custom dimensions override built-in model lookup
if (customDims) return customDims;
const dims = EMBEDDING_DIMENSIONS[model];
if (!dims) {
throw new Error(
`Unsupported embedding model: ${model}. Specify dimensions manually via embedding.dimensions.`
);
}
return dims;
}
function resolveEnvVars(value: string): string {
return value.replace(/\$\{([^}]+)\}/g, (_, envVar) => {
const envValue = process.env[envVar];
if (!envValue) {
throw new Error(`Environment variable ${envVar} is not set`);
}
return envValue;
});
}
function resolveEmbeddingModel(embedding: Record<string, unknown>): string {
const model = typeof embedding.model === "string" ? embedding.model : DEFAULT_MODEL;
// Skip dimension validation if custom dimensions provided
if (typeof embedding.dimensions !== "number") {
vectorDimsForModel(model);
}
return model;
}
export const memoryConfigSchema = {
parse(value: unknown): MemoryConfig {
if (!value || typeof value !== "object" || Array.isArray(value)) {
throw new Error("memory config required");
}
const cfg = value as Record<string, unknown>;
assertAllowedKeys(cfg, ["embedding", "dbPath", "autoCapture", "autoRecall"], "memory config");
const embedding = cfg.embedding as Record<string, unknown> | undefined;
if (!embedding || typeof embedding.apiKey !== "string") {
throw new Error("embedding.apiKey is required");
}
assertAllowedKeys(embedding, ["apiKey", "model", "baseUrl", "dimensions"], "embedding config");
const model = resolveEmbeddingModel(embedding);
const baseUrl = typeof embedding.baseUrl === "string" ? embedding.baseUrl : undefined;
const dimensions = typeof embedding.dimensions === "number" ? embedding.dimensions : undefined;
return {
embedding: {
provider: "openai",
model,
apiKey: resolveEnvVars(embedding.apiKey),
baseUrl,
dimensions,
},
dbPath: typeof cfg.dbPath === "string" ? cfg.dbPath : DEFAULT_DB_PATH,
autoCapture: cfg.autoCapture !== false,
autoRecall: cfg.autoRecall !== false,
};
},
uiHints: {
"embedding.apiKey": {
label: "OpenAI API Key",
sensitive: true,
placeholder: "sk-proj-...",
help: "API key for embeddings (use 'not-needed' for local servers)",
},
"embedding.model": {
label: "Embedding Model",
placeholder: DEFAULT_MODEL,
help: "Embedding model name",
},
"embedding.baseUrl": {
label: "Custom Endpoint URL",
placeholder: "http://localhost:8080/v1",
help: "Custom OpenAI-compatible embedding endpoint (for local/self-hosted servers)",
advanced: true,
},
"embedding.dimensions": {
label: "Vector Dimensions",
placeholder: "1536",
help: "Override vector dimensions (required for custom models not in built-in list)",
advanced: true,
},
dbPath: {
label: "Database Path",
placeholder: "~/.clawdbot/memory/lancedb",
advanced: true,
},
autoCapture: {
label: "Auto-Capture",
help: "Automatically capture important information from conversations",
},
autoRecall: {
label: "Auto-Recall",
help: "Automatically inject relevant memories into context",
},
},
};