import { Type } from "@sinclair/typebox"; import fs from "node:fs"; import { homedir } from "node:os"; import { join } from "node:path"; export type EmbeddingProvider = "openai" | "local"; export type MemoryConfig = { embedding: { provider: EmbeddingProvider; model?: string; apiKey?: string; // Local embedding options local?: { modelPath?: string; modelCacheDir?: string; }; }; dbPath?: string; autoCapture?: boolean; autoRecall?: boolean; }; export const MEMORY_CATEGORIES = ["preference", "fact", "decision", "entity", "other"] as const; export type MemoryCategory = (typeof MEMORY_CATEGORIES)[number]; const DEFAULT_OPENAI_MODEL = "text-embedding-3-small"; const DEFAULT_LOCAL_MODEL = "hf:ggml-org/embeddinggemma-300M-GGUF/embeddinggemma-300M-Q8_0.gguf"; const LEGACY_STATE_DIRS: string[] = []; function resolveDefaultDbPath(): string { const home = homedir(); const preferred = join(home, ".openclaw", "memory", "lancedb"); try { if (fs.existsSync(preferred)) return preferred; } catch { // best-effort } for (const legacy of LEGACY_STATE_DIRS) { const candidate = join(home, legacy, "memory", "lancedb"); try { if (fs.existsSync(candidate)) return candidate; } catch { // best-effort } } return preferred; } const DEFAULT_DB_PATH = resolveDefaultDbPath(); // OpenAI embedding dimensions const OPENAI_EMBEDDING_DIMENSIONS: Record = { "text-embedding-3-small": 1536, "text-embedding-3-large": 3072, }; // Default dimension for local models (embeddinggemma-300M outputs 768-dim vectors) const DEFAULT_LOCAL_EMBEDDING_DIM = 768; function assertAllowedKeys( value: Record, allowed: string[], label: string, ) { const unknown = Object.keys(value).filter((key) => !allowed.includes(key)); if (unknown.length === 0) return; throw new Error(`${label} has unknown keys: ${unknown.join(", ")}`); } export function vectorDimsForModel(model: string, provider: EmbeddingProvider): number { if (provider === "local") { // Local models have varying dimensions; default to embeddinggemma's 768 // TODO: Could detect from model metadata in the future return DEFAULT_LOCAL_EMBEDDING_DIM; } const dims = OPENAI_EMBEDDING_DIMENSIONS[model]; if (!dims) { throw new Error(`Unsupported OpenAI embedding model: ${model}. Supported: ${Object.keys(OPENAI_EMBEDDING_DIMENSIONS).join(", ")}`); } return dims; } function resolveEnvVars(value: string): string { return value.replace(/\$\{([^}]+)\}/g, (_, envVar) => { const envValue = process.env[envVar]; if (!envValue) { throw new Error(`Environment variable ${envVar} is not set`); } return envValue; }); } function resolveEmbeddingModel(embedding: Record, provider: EmbeddingProvider): string { const model = typeof embedding.model === "string" ? embedding.model : undefined; if (provider === "local") { return model || DEFAULT_LOCAL_MODEL; } // OpenAI provider const resolvedModel = model || DEFAULT_OPENAI_MODEL; vectorDimsForModel(resolvedModel, provider); // Validate return resolvedModel; } export const memoryConfigSchema = { parse(value: unknown): MemoryConfig { if (!value || typeof value !== "object" || Array.isArray(value)) { throw new Error("memory config required"); } const cfg = value as Record; assertAllowedKeys(cfg, ["embedding", "dbPath", "autoCapture", "autoRecall"], "memory config"); const embedding = cfg.embedding as Record | undefined; if (!embedding) { throw new Error("embedding config is required"); } assertAllowedKeys(embedding, ["provider", "apiKey", "model", "local"], "embedding config"); // Determine provider (default to "openai" for backwards compatibility) const provider: EmbeddingProvider = embedding.provider === "local" ? "local" : "openai"; // Validate apiKey requirement based on provider if (provider === "openai" && typeof embedding.apiKey !== "string") { throw new Error("embedding.apiKey is required when using OpenAI provider"); } const model = resolveEmbeddingModel(embedding, provider); // Parse local config if present let localConfig: MemoryConfig["embedding"]["local"] | undefined; if (embedding.local && typeof embedding.local === "object") { const local = embedding.local as Record; assertAllowedKeys(local, ["modelPath", "modelCacheDir"], "embedding.local config"); localConfig = { modelPath: typeof local.modelPath === "string" ? local.modelPath : undefined, modelCacheDir: typeof local.modelCacheDir === "string" ? local.modelCacheDir : undefined, }; } return { embedding: { provider, model, apiKey: typeof embedding.apiKey === "string" ? resolveEnvVars(embedding.apiKey) : undefined, local: localConfig, }, dbPath: typeof cfg.dbPath === "string" ? cfg.dbPath : DEFAULT_DB_PATH, autoCapture: cfg.autoCapture !== false, autoRecall: cfg.autoRecall !== false, }; }, uiHints: { "embedding.provider": { label: "Embedding Provider", help: "Choose 'openai' for remote embeddings or 'local' for on-device embeddings using node-llama-cpp", options: ["openai", "local"], }, "embedding.apiKey": { label: "OpenAI API Key", sensitive: true, placeholder: "sk-proj-...", help: "API key for OpenAI embeddings (required if provider is 'openai', or use ${OPENAI_API_KEY})", }, "embedding.model": { label: "Embedding Model", placeholder: DEFAULT_OPENAI_MODEL, help: "Model to use for embeddings. For OpenAI: text-embedding-3-small/large. For local: HuggingFace GGUF path.", }, "embedding.local.modelPath": { label: "Local Model Path", placeholder: DEFAULT_LOCAL_MODEL, help: "Path to local GGUF embedding model (e.g., hf:ggml-org/embeddinggemma-300M-GGUF/embeddinggemma-300M-Q8_0.gguf)", advanced: true, }, "embedding.local.modelCacheDir": { label: "Model Cache Directory", placeholder: "~/.cache/node-llama-cpp", help: "Directory to cache downloaded models", advanced: true, }, dbPath: { label: "Database Path", placeholder: "~/.openclaw/memory/lancedb", advanced: true, }, autoCapture: { label: "Auto-Capture", help: "Automatically capture important information from conversations", }, autoRecall: { label: "Auto-Recall", help: "Automatically inject relevant memories into context", }, }, };