diff --git a/docs/memory-cognee.md b/docs/memory-cognee.md new file mode 100644 index 000000000..cc5a1e943 --- /dev/null +++ b/docs/memory-cognee.md @@ -0,0 +1,86 @@ +--- +summary: "Cognee memory quick setup and usage" +read_when: + - Setting up Cognee memory provider + - Configuring knowledge graph memory +--- + +# Cognee Memory Provider + +Moltbot supports [Cognee](https://www.cognee.ai/) - [open source AI memory](https://github.com/topoteretes/cognee) - as an optional memory provider. Cognee builds knowledge graph memory backed by embeddings from any data and can be run locally with Docker. Learn more from [Cognee Documentation](https://docs.cognee.ai/). + +## Quickstart with Docker + +Run the example compose file: + +```bash +docker compose -f examples/cognee-docker-compose.yaml up -d +``` +Verify: + +```bash +curl http://localhost:8000/health +``` + +## Configuration + +Put the token in `~/.clawdbot/.env`: + +```bash +COGNEE_API_KEY="your-cognee-access-token" +CLAWDBOT_GATEWAY_TOKEN="your-random-gateway-token" +``` + +Configure `~/.clawdbot/moltbot.json` (JSON5): + +```json5 +{ + agents: { + defaults: { + memorySearch: { + enabled: true, + provider: "cognee", + sources: ["memory", "sessions"], + experimental: { sessionMemory: true }, + cognee: { + baseUrl: "http://localhost:8000", + apiKey: "${COGNEE_API_KEY}", + datasetName: "clawdbot", + searchType: "GRAPH_COMPLETION", + maxResults: 6, + autoCognify: true, + timeoutSeconds: 180 + } + } + } + } +} +``` + +Start the gateway with env loaded: + +```zsh +set -a; source "$HOME/.clawdbot/.env"; set +a +pnpm moltbot gateway --port 18789 --token "$CLAWDBOT_GATEWAY_TOKEN" --verbose +``` + +## Usage + +Cognee indexes `MEMORY.md` in workspace root, `memory/*.md`, and session transcripts when `sources: ["sessions"]` is enabled. + +1. Initial index and status: + +```bash +pnpm moltbot memory status --index --json +``` + +2. Memory updates: + +```bash +pnpm moltbot memory status --index --update-cognee --json +``` + +## Troubleshooting + +- Connection test: `curl http://localhost:8000/health` +- Reset cached values that Moltbot reuses: `mv "$HOME/.clawdbot/memory/cognee/main.json" "$HOME/.clawdbot/memory/cognee/main.json.bak"` \ No newline at end of file diff --git a/examples/.env.template b/examples/.env.template new file mode 100644 index 000000000..65c1dc759 --- /dev/null +++ b/examples/.env.template @@ -0,0 +1,9 @@ +# example .env file for cognee docker integration +LLM_API_KEY= +ENABLE_BACKEND_ACCESS_CONTROL="false" + + +# ~/.clawdbot/ .env file for clawdbot gateway +COGNEE_API_KEY= + +CLAWDBOT_GATEWAY_TOKEN= \ No newline at end of file diff --git a/examples/cognee-clawdbot-config.yaml b/examples/cognee-clawdbot-config.yaml new file mode 100644 index 000000000..69aa763d3 --- /dev/null +++ b/examples/cognee-clawdbot-config.yaml @@ -0,0 +1,35 @@ +# Sample Clawdbot config for testing Cognee integration +# Copy this to ~/.clawdbot/config.yaml inside the dev container +# +# Inside container: +# mkdir -p ~/.clawdbot +# cp /app/examples/cognee-clawdbot-config.yaml ~/.clawdbot/config.yaml + +agents: + defaults: + # Enable Cognee as the memory provider + memorySearch: + enabled: true + provider: cognee + sources: [memory] # Start with just memory files + + cognee: + # Inside Docker network, Cognee is at this address + baseUrl: http://cognee:8000 + # No API key needed for local Docker setup + # apiKey: "" + + # Dataset name for organizing memories + datasetName: clawdbot-test + + # Search mode: "insights" (recommended), "chunks", or "summaries" + searchType: insights + + # Max results per search + maxResults: 6 + + # Auto-process documents after adding + autoCognify: true + + # Request timeout + timeoutSeconds: 30 diff --git a/examples/cognee-config.yaml b/examples/cognee-config.yaml new file mode 100644 index 000000000..411e83ca9 --- /dev/null +++ b/examples/cognee-config.yaml @@ -0,0 +1,48 @@ +# Example Clawdbot configuration with Cognee memory provider +# Copy to ~/.clawdbot/config.yaml and customize + +agents: + defaults: + # Use Cognee for knowledge graph memory + memorySearch: + enabled: true + provider: cognee + sources: [memory, sessions] # Index both memory files and conversation history + + # Cognee-specific configuration + cognee: + # Local Docker setup (default) + baseUrl: http://localhost:8000 + + # For Cognee Cloud, use: + # baseUrl: https://cognee--cognee-saas-backend-serve.modal.run + # Cognee API key is stored in the .env file + apiKey: ${COGNEE_API_KEY} + + # Dataset name for organizing memories + datasetName: clawdbot + + # Search mode: "GRAPH_COMPLETION" (recommended), "chunks", or "summaries" + searchType: GRAPH_COMPLETION + + # Maximum search results to return + maxResults: 6 + + # Automatically process documents after adding (recommended) + autoCognify: true + + # Batch size for processing multiple documents + cognifyBatchSize: 100 + + # Request timeout in seconds + timeoutSeconds: 180 + + # Enable experimental session memory indexing + experimental: + sessionMemory: true + +# To use default SQLite memory instead, set: +# agents: +# defaults: +# memorySearch: +# provider: auto # or "openai", "gemini", "local" diff --git a/examples/cognee-docker-compose.yaml b/examples/cognee-docker-compose.yaml new file mode 100644 index 000000000..d272378af --- /dev/null +++ b/examples/cognee-docker-compose.yaml @@ -0,0 +1,35 @@ +# Minimal Docker Compose for Cognee (local-only) +# +# Usage: +# 1. Export your LLM key: export LLM_API_KEY="your-openai-api-key" +# 2. Run: docker compose -f examples/cognee-docker-compose.yaml up -d +# 3. Verify: curl http://localhost:8000/health +# 4. Configure Clawdbot with baseUrl: http://localhost:8000 +# +# Defaults (no extra DB setup): +# - Relational DB: SQLite (file-based) +# - Vector DB: LanceDB (file-based) +# - Graph DB: Kuzu (file-based) + +services: + cognee: + image: cognee/cognee:latest + container_name: cognee + ports: + - "127.0.0.1:8000:8000" + environment: + - LLM_API_KEY=${LLM_API_KEY} + - ENABLE_BACKEND_ACCESS_CONTROL=false + volumes: + - cognee_data:/app/cognee/.cognee_system + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 40s + +volumes: + cognee_data: + driver: local diff --git a/examples/cognee-test-compose.yaml b/examples/cognee-test-compose.yaml new file mode 100644 index 000000000..865b334c8 --- /dev/null +++ b/examples/cognee-test-compose.yaml @@ -0,0 +1,117 @@ +# Docker Compose for testing Clawdbot + Cognee integration +# +# Usage: +# 1. cd examples +# 2. docker-compose -f cognee-test-compose.yaml up -d +# 3. docker-compose -f cognee-test-compose.yaml exec dev bash +# 4. Inside container: +# - pnpm install # Install dependencies +# - pnpm build # Build Clawdbot +# - pnpm test src/memory/cognee-* # Run unit tests +# - pnpm clawdbot agent --message "test" # Run Clawdbot CLI +# +# To tear down: docker-compose -f cognee-test-compose.yaml down -v + +version: '3.8' + +services: + # Cognee API server + cognee: + image: topoteretes/cognee:latest + container_name: cognee-test + ports: + - "8000:8000" + environment: + # No API key for local testing + - COGNEE_API_KEY= + # Database connection + - DATABASE_URL=postgresql://cognee:cognee@postgres:5432/cognee + volumes: + - cognee_data:/app/data + depends_on: + postgres: + condition: service_healthy + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/status"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 60s + networks: + - cognee-test-network + + # PostgreSQL database for Cognee + postgres: + image: postgres:15-alpine + container_name: cognee-test-postgres + environment: + - POSTGRES_USER=cognee + - POSTGRES_PASSWORD=cognee + - POSTGRES_DB=cognee + volumes: + - postgres_data:/var/lib/postgresql/data + restart: unless-stopped + healthcheck: + test: ["CMD-SHELL", "pg_isready -U cognee"] + interval: 10s + timeout: 5s + retries: 5 + networks: + - cognee-test-network + + # Development container for running Clawdbot + tests + dev: + image: node:22-bookworm + container_name: clawdbot-cognee-dev + working_dir: /app + volumes: + # Mount the entire clawdbot repo + - ..:/app + # Use a separate node_modules volume to avoid conflicts + - node_modules:/app/node_modules + # Clawdbot config directory (persisted) + - clawdbot_config:/root/.clawdbot + environment: + - HOME=/root + - TERM=xterm-256color + # Point to Cognee running in Docker network + - COGNEE_BASE_URL=http://cognee:8000 + # Optional: Add your API keys here for full testing + # - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-} + # - OPENAI_API_KEY=${OPENAI_API_KEY:-} + depends_on: + cognee: + condition: service_healthy + stdin_open: true + tty: true + # Install pnpm and keep container running + command: > + bash -c " + corepack enable && + echo '=== Clawdbot + Cognee Test Environment ===' && + echo 'Cognee is available at: http://cognee:8000' && + echo '' && + echo 'Quick start:' && + echo ' pnpm install && pnpm build' && + echo ' pnpm test src/memory/cognee-client.test.ts' && + echo ' pnpm clawdbot --help' && + echo '' && + sleep infinity + " + networks: + - cognee-test-network + +volumes: + cognee_data: + driver: local + postgres_data: + driver: local + node_modules: + driver: local + clawdbot_config: + driver: local + +networks: + cognee-test-network: + driver: bridge diff --git a/examples/test-cognee.sh b/examples/test-cognee.sh new file mode 100644 index 000000000..09a9f1f3c --- /dev/null +++ b/examples/test-cognee.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash +# Test script for Cognee integration +# Run this inside the dev container after starting docker-compose + +set -e + +echo "=== Clawdbot + Cognee Integration Test ===" +echo "" + +# 1. Check if Cognee is reachable +echo "1. Testing Cognee connection..." +if curl -sf http://cognee:8000/status > /dev/null 2>&1; then + echo " ✓ Cognee is healthy at http://cognee:8000" + curl -s http://cognee:8000/status | head -c 200 + echo "" +else + echo " ✗ Cannot reach Cognee at http://cognee:8000" + echo " Make sure Cognee container is running and healthy" + exit 1 +fi +echo "" + +# 2. Install dependencies if needed +if [ ! -d "node_modules/.pnpm" ]; then + echo "2. Installing dependencies..." + pnpm install +else + echo "2. Dependencies already installed" +fi +echo "" + +# 3. Run unit tests +echo "3. Running Cognee unit tests..." +pnpm test src/memory/cognee-client.test.ts src/memory/cognee-provider.test.ts +echo "" + +# 4. Quick integration check (manual API test) +echo "4. Quick API smoke test..." +echo " Adding test data to Cognee..." + +# Add test data +RESULT=$(curl -sf -X POST http://cognee:8000/add \ + -H "Content-Type: application/json" \ + -d '{ + "data": "Test memory entry: The capital of France is Paris. This is a test document for Clawdbot integration.", + "dataset_name": "clawdbot-test" + }' 2>&1) || true + +if echo "$RESULT" | grep -q "dataset"; then + echo " ✓ Data added successfully" + echo " Response: $RESULT" +else + echo " ⚠ Add may have failed (this is OK if Cognee API differs)" + echo " Response: $RESULT" +fi +echo "" + +echo "=== Test Complete ===" +echo "" +echo "Next steps:" +echo " - The unit tests verify the client/provider logic" +echo " - For full integration, configure Clawdbot with:" +echo " memorySearch:" +echo " provider: cognee" +echo " cognee:" +echo " baseUrl: http://cognee:8000 # or http://localhost:8000 from host" diff --git a/src/agents/memory-search.ts b/src/agents/memory-search.ts index 3d3f3c324..8cd1fdb73 100644 --- a/src/agents/memory-search.ts +++ b/src/agents/memory-search.ts @@ -10,7 +10,7 @@ export type ResolvedMemorySearchConfig = { enabled: boolean; sources: Array<"memory" | "sessions">; extraPaths: string[]; - provider: "openai" | "local" | "gemini" | "auto"; + provider: "openai" | "local" | "gemini" | "auto" | "cognee"; remote?: { baseUrl?: string; apiKey?: string; @@ -26,12 +26,22 @@ export type ResolvedMemorySearchConfig = { experimental: { sessionMemory: boolean; }; - fallback: "openai" | "gemini" | "local" | "none"; + fallback: "openai" | "gemini" | "local" | "cognee" | "none"; model: string; local: { modelPath?: string; modelCacheDir?: string; }; + cognee?: { + baseUrl?: string; + apiKey?: string; + datasetName?: string; + searchType?: "GRAPH_COMPLETION" | "CHUNKS" | "SUMMARIES"; + maxResults?: number; + timeoutSeconds?: number; + autoCognify?: boolean; + cognifyBatchSize?: number; + }; store: { driver: "sqlite"; path: string; @@ -227,6 +237,20 @@ function mergeConfig( enabled: overrides?.cache?.enabled ?? defaults?.cache?.enabled ?? DEFAULT_CACHE_ENABLED, maxEntries: overrides?.cache?.maxEntries ?? defaults?.cache?.maxEntries, }; + const cognee = + provider === "cognee" + ? { + baseUrl: overrides?.cognee?.baseUrl ?? defaults?.cognee?.baseUrl, + apiKey: overrides?.cognee?.apiKey ?? defaults?.cognee?.apiKey, + datasetName: overrides?.cognee?.datasetName ?? defaults?.cognee?.datasetName, + searchType: overrides?.cognee?.searchType ?? defaults?.cognee?.searchType, + maxResults: overrides?.cognee?.maxResults ?? defaults?.cognee?.maxResults, + timeoutSeconds: overrides?.cognee?.timeoutSeconds ?? defaults?.cognee?.timeoutSeconds, + autoCognify: overrides?.cognee?.autoCognify ?? defaults?.cognee?.autoCognify, + cognifyBatchSize: + overrides?.cognee?.cognifyBatchSize ?? defaults?.cognee?.cognifyBatchSize, + } + : undefined; const overlap = clampNumber(chunking.overlap, 0, Math.max(0, chunking.tokens - 1)); const minScore = clampNumber(query.minScore, 0, 1); @@ -250,6 +274,7 @@ function mergeConfig( fallback, model, local, + cognee, store, chunking: { tokens: Math.max(1, chunking.tokens), overlap }, sync: { diff --git a/src/cli/memory-cli.ts b/src/cli/memory-cli.ts index b34da11a3..48733405f 100644 --- a/src/cli/memory-cli.ts +++ b/src/cli/memory-cli.ts @@ -24,6 +24,7 @@ type MemoryCommandOptions = { json?: boolean; deep?: boolean; index?: boolean; + updateCognee?: boolean; verbose?: boolean; }; @@ -225,6 +226,7 @@ async function scanMemorySources(params: { export async function runMemoryStatus(opts: MemoryCommandOptions) { setVerbose(Boolean(opts.verbose)); + const updateCognee = opts.updateCognee ?? process.argv.includes("--update-cognee"); const cfg = loadConfig(); const agentIds = resolveAgentIds(cfg, opts.agent); const allResults: Array<{ @@ -233,6 +235,7 @@ export async function runMemoryStatus(opts: MemoryCommandOptions) { embeddingProbe?: Awaited>; indexError?: string; scan?: MemorySourceScan; + cogneeUpdate?: boolean; }> = []; for (const agentId of agentIds) { @@ -268,6 +271,7 @@ export async function runMemoryStatus(opts: MemoryCommandOptions) { try { await manager.sync({ reason: "cli", + update: updateCognee, progress: (syncUpdate) => { update({ completed: syncUpdate.completed, @@ -298,7 +302,14 @@ export async function runMemoryStatus(opts: MemoryCommandOptions) { sources, extraPaths: status.extraPaths, }); - allResults.push({ agentId, status, embeddingProbe, indexError, scan }); + allResults.push({ + agentId, + status, + embeddingProbe, + indexError, + scan, + cogneeUpdate: Boolean(updateCognee), + }); }, }); } @@ -465,6 +476,7 @@ export function registerMemoryCli(program: Command) { .option("--json", "Print JSON") .option("--deep", "Probe embedding provider availability") .option("--index", "Reindex if dirty (implies --deep)") + .option("--update-cognee", "Use Cognee update when file data ids are known", false) .option("--verbose", "Verbose logging", false) .action(async (opts: MemoryCommandOptions) => { await runMemoryStatus(opts); diff --git a/src/config/schema.ts b/src/config/schema.ts index 1401b0574..5ccdbd212 100644 --- a/src/config/schema.ts +++ b/src/config/schema.ts @@ -504,7 +504,8 @@ const FIELD_HELP: Record = { "Extra paths to include in memory search (directories or .md files; relative paths resolved from workspace).", "agents.defaults.memorySearch.experimental.sessionMemory": "Enable experimental session transcript indexing for memory search (default: false).", - "agents.defaults.memorySearch.provider": 'Embedding provider ("openai", "gemini", or "local").', + "agents.defaults.memorySearch.provider": + 'Embedding provider ("openai", "gemini", "local", or "cognee").', "agents.defaults.memorySearch.remote.baseUrl": "Custom base URL for remote embeddings (OpenAI-compatible proxies or Gemini overrides).", "agents.defaults.memorySearch.remote.apiKey": "Custom API key for the remote embedding provider.", @@ -520,10 +521,12 @@ const FIELD_HELP: Record = { "Polling interval in ms for batch status (default: 2000).", "agents.defaults.memorySearch.remote.batch.timeoutMinutes": "Timeout in minutes for batch indexing (default: 60).", + "agents.defaults.memorySearch.cognee": + "Cognee provider configuration (baseUrl, apiKey, datasetName, searchType, maxResults, autoCognify).", "agents.defaults.memorySearch.local.modelPath": "Local GGUF model path or hf: URI (node-llama-cpp).", "agents.defaults.memorySearch.fallback": - 'Fallback provider when embeddings fail ("openai", "gemini", "local", or "none").', + 'Fallback provider when embeddings fail ("openai", "gemini", "local", "cognee", or "none").', "agents.defaults.memorySearch.store.path": "SQLite index path (default: ~/.openclaw/memory/{agentId}.sqlite).", "agents.defaults.memorySearch.store.vector.enabled": diff --git a/src/config/types.tools.ts b/src/config/types.tools.ts index db32cb59d..fcf93d976 100644 --- a/src/config/types.tools.ts +++ b/src/config/types.tools.ts @@ -234,7 +234,7 @@ export type MemorySearchConfig = { sessionMemory?: boolean; }; /** Embedding provider mode. */ - provider?: "openai" | "gemini" | "local"; + provider?: "openai" | "gemini" | "local" | "cognee"; remote?: { baseUrl?: string; apiKey?: string; @@ -253,7 +253,7 @@ export type MemorySearchConfig = { }; }; /** Fallback behavior when embeddings fail. */ - fallback?: "openai" | "gemini" | "local" | "none"; + fallback?: "openai" | "gemini" | "local" | "cognee" | "none"; /** Embedding model id (remote) or alias (local). */ model?: string; /** Local embedding settings (node-llama-cpp). */ @@ -263,6 +263,25 @@ export type MemorySearchConfig = { /** Optional cache directory for local models. */ modelCacheDir?: string; }; + /** Cognee knowledge graph memory settings. */ + cognee?: { + /** Cognee API endpoint (default: http://localhost:8000). */ + baseUrl?: string; + /** Cognee API key (required for cloud, optional for local). */ + apiKey?: string; + /** Dataset name for organizing memories (default: "clawdbot"). */ + datasetName?: string; + /** Search type: "GRAPH_COMPLETION", "CHUNKS", or "SUMMARIES" (default: "GRAPH_COMPLETION"). */ + searchType?: "GRAPH_COMPLETION" | "CHUNKS" | "SUMMARIES"; + /** Max results per search query (default: 6). */ + maxResults?: number; + /** Timeout for API requests in seconds (default: 30). */ + timeoutSeconds?: number; + /** Enable automatic cognify after adding documents (default: true). */ + autoCognify?: boolean; + /** Cognify batch size for processing (default: 100). */ + cognifyBatchSize?: number; + }; /** Index storage configuration. */ store?: { driver?: "sqlite"; diff --git a/src/config/zod-schema.agent-runtime.ts b/src/config/zod-schema.agent-runtime.ts index 7e95c3538..0f421297a 100644 --- a/src/config/zod-schema.agent-runtime.ts +++ b/src/config/zod-schema.agent-runtime.ts @@ -311,7 +311,9 @@ export const MemorySearchSchema = z }) .strict() .optional(), - provider: z.union([z.literal("openai"), z.literal("local"), z.literal("gemini")]).optional(), + provider: z + .union([z.literal("openai"), z.literal("local"), z.literal("gemini"), z.literal("cognee")]) + .optional(), remote: z .object({ baseUrl: z.string().optional(), @@ -331,9 +333,30 @@ export const MemorySearchSchema = z .strict() .optional(), fallback: z - .union([z.literal("openai"), z.literal("gemini"), z.literal("local"), z.literal("none")]) + .union([ + z.literal("openai"), + z.literal("gemini"), + z.literal("local"), + z.literal("cognee"), + z.literal("none"), + ]) .optional(), model: z.string().optional(), + cognee: z + .object({ + baseUrl: z.string().optional(), + apiKey: z.string().optional(), + datasetName: z.string().optional(), + searchType: z + .union([z.literal("GRAPH_COMPLETION"), z.literal("CHUNKS"), z.literal("SUMMARIES")]) + .optional(), + maxResults: z.number().int().positive().optional(), + autoCognify: z.boolean().optional(), + cognifyBatchSize: z.number().int().positive().optional(), + timeoutSeconds: z.number().int().positive().optional(), + }) + .strict() + .optional(), local: z .object({ modelPath: z.string().optional(), diff --git a/src/memory/cognee-client.ts b/src/memory/cognee-client.ts new file mode 100644 index 000000000..0b351ca2e --- /dev/null +++ b/src/memory/cognee-client.ts @@ -0,0 +1,440 @@ +import { Blob } from "buffer"; +import { FormData, request } from "undici"; +import { createSubsystemLogger } from "../logging/subsystem.js"; + +const log = createSubsystemLogger("cognee"); + +const DEFAULT_BASE_URL = "http://localhost:8000"; +const DEFAULT_TIMEOUT_MS = 30_000; +const API_PREFIX = "/api/v1"; + +export type CogneeClientConfig = { + baseUrl?: string; + apiKey?: string; + timeoutMs?: number; +}; + +export type CogneeAddRequest = { + data: string; + datasetName?: string; + datasetId?: string; +}; + +export type CogneeAddResponse = { + datasetId: string; + datasetName: string; + message: string; + dataId?: string; +}; + +export type CogneeUpdateRequest = { + dataId: string; + datasetId: string; + data: string; +}; + +export type CogneeUpdateResponse = { + datasetId?: string; + datasetName?: string; + message?: string; + status?: string; + dataId?: string; +}; + +export type CogneeCognifyRequest = { + datasetIds?: string[]; +}; + +export type CogneeCognifyResponse = { + status: string; + message: string; +}; + +export type CogneeSearchRequest = { + queryText: string; + searchType?: "GRAPH_COMPLETION" | "CHUNKS" | "SUMMARIES"; + datasetIds?: string[]; +}; + +export type CogneeSearchResult = { + id: string; + text: string; + score: number; + metadata?: Record; +}; + +export type CogneeSearchResponse = { + results: CogneeSearchResult[]; + query: string; + searchType: string; +}; + +export type CogneeStatusResponse = { + status: string; + version?: string; + datasets?: Array<{ + id: string; + name: string; + documentCount?: number; + }>; +}; + +type CogneeSearchApiType = "SUMMARIES" | "CHUNKS" | "GRAPH_COMPLETION"; + +export class CogneeClient { + private readonly baseUrl: string; + private readonly apiKey?: string; + private readonly timeoutMs: number; + + constructor(config: CogneeClientConfig = {}) { + this.baseUrl = config.baseUrl?.replace(/\/$/, "") || DEFAULT_BASE_URL; + this.apiKey = config.apiKey; + this.timeoutMs = config.timeoutMs || DEFAULT_TIMEOUT_MS; + } + + async add(req: CogneeAddRequest): Promise { + const url = `${this.baseUrl}${API_PREFIX}/add`; + const headers: Record = {}; + if (this.apiKey) { + headers.Authorization = `Bearer ${this.apiKey}`; + headers["X-Api-Key"] = this.apiKey; + } + + log.debug("Adding data to Cognee", { + url, + datasetName: req.datasetName, + dataLength: req.data.length, + }); + + try { + const formData = new FormData(); + const blob = new Blob([req.data], { type: "text/plain" }); + formData.append("data", blob, "clawdbot-memory.txt"); + if (req.datasetName) { + formData.append("datasetName", req.datasetName); + } + if (req.datasetId) { + formData.append("datasetId", req.datasetId); + } + + const response = await request(url, { + method: "POST", + headers, + body: formData, + bodyTimeout: this.timeoutMs, + headersTimeout: this.timeoutMs, + }); + + if (response.statusCode !== 200) { + const errorText = await response.body.text(); + throw new Error(`Cognee add failed with status ${response.statusCode}: ${errorText}`); + } + + const data = (await response.body.json()) as { + dataset_id: string; + dataset_name: string; + message: string; + data_id?: unknown; + data_ingestion_info?: unknown; + }; + + return { + datasetId: data.dataset_id, + datasetName: data.dataset_name, + message: data.message, + dataId: this.extractDataId(data.data_id ?? data.data_ingestion_info), + }; + } catch (error) { + log.error("Failed to add data to Cognee", { error }); + throw new Error( + `Cognee add request failed: ${error instanceof Error ? error.message : String(error)}`, + ); + } + } + + async update(req: CogneeUpdateRequest): Promise { + const url = new URL(`${this.baseUrl}${API_PREFIX}/update`); + url.searchParams.set("data_id", req.dataId); + url.searchParams.set("dataset_id", req.datasetId); + const headers: Record = {}; + if (this.apiKey) { + headers.Authorization = `Bearer ${this.apiKey}`; + headers["X-Api-Key"] = this.apiKey; + } + + log.debug("Updating data in Cognee", { + url: url.toString(), + dataLength: req.data.length, + }); + + try { + const formData = new FormData(); + const blob = new Blob([req.data], { type: "text/plain" }); + formData.append("data", blob, "clawdbot-memory.txt"); + + const response = await request(url.toString(), { + method: "PATCH", + headers, + body: formData, + bodyTimeout: this.timeoutMs, + headersTimeout: this.timeoutMs, + }); + + if (response.statusCode !== 200) { + const errorText = await response.body.text(); + throw new Error(`Cognee update failed with status ${response.statusCode}: ${errorText}`); + } + + const data = (await response.body.json()) as { + status?: string; + message?: string; + dataset_id?: string; + dataset_name?: string; + data_id?: unknown; + data_ingestion_info?: unknown; + }; + + return { + status: data.status, + message: data.message, + datasetId: data.dataset_id ?? req.datasetId, + datasetName: data.dataset_name, + dataId: this.extractDataId(data.data_id ?? data.data_ingestion_info) ?? req.dataId, + }; + } catch (error) { + log.error("Failed to update data in Cognee", { error }); + throw new Error( + `Cognee update request failed: ${error instanceof Error ? error.message : String(error)}`, + ); + } + } + + async cognify(req: CogneeCognifyRequest = {}): Promise { + const url = `${this.baseUrl}${API_PREFIX}/cognify`; + const headers: Record = { + "Content-Type": "application/json", + }; + if (this.apiKey) { + headers.Authorization = `Bearer ${this.apiKey}`; + headers["X-Api-Key"] = this.apiKey; + } + + log.debug("Running cognify", { url, datasetIds: req.datasetIds }); + + try { + const response = await request(url, { + method: "POST", + headers, + body: JSON.stringify({ + datasetIds: req.datasetIds, + }), + bodyTimeout: this.timeoutMs, + headersTimeout: this.timeoutMs, + }); + + if (response.statusCode !== 200) { + const errorText = await response.body.text(); + throw new Error(`Cognee cognify failed with status ${response.statusCode}: ${errorText}`); + } + + const data = (await response.body.json()) as { + status: string; + message: string; + }; + + return { + status: data.status, + message: data.message, + }; + } catch (error) { + log.error("Failed to cognify", { error }); + throw new Error( + `Cognee cognify request failed: ${error instanceof Error ? error.message : String(error)}`, + ); + } + } + + async search(req: CogneeSearchRequest): Promise { + const url = `${this.baseUrl}${API_PREFIX}/search`; + const headers: Record = { + "Content-Type": "application/json", + }; + if (this.apiKey) { + headers.Authorization = `Bearer ${this.apiKey}`; + headers["X-Api-Key"] = this.apiKey; + } + + log.debug("Searching Cognee", { + url, + query: req.queryText, + searchType: req.searchType, + }); + + try { + const response = await request(url, { + method: "POST", + headers, + body: JSON.stringify({ + query: req.queryText, + searchType: this.mapSearchType(req.searchType), + datasetIds: req.datasetIds, + }), + bodyTimeout: this.timeoutMs, + headersTimeout: this.timeoutMs, + }); + + if (response.statusCode !== 200) { + const errorText = await response.body.text(); + throw new Error(`Cognee search failed with status ${response.statusCode}: ${errorText}`); + } + + const data = (await response.body.json()) as unknown; + const results = this.normalizeSearchResults(data); + + return { + results, + query: req.queryText, + searchType: req.searchType || "GRAPH_COMPLETION", + }; + } catch (error) { + log.error("Failed to search Cognee", { error }); + throw new Error( + `Cognee search request failed: ${error instanceof Error ? error.message : String(error)}`, + ); + } + } + + async status(): Promise { + const url = `${this.baseUrl}/health`; + const headers: Record = {}; + if (this.apiKey) { + headers.Authorization = `Bearer ${this.apiKey}`; + headers["X-Api-Key"] = this.apiKey; + } + + log.debug("Checking Cognee status", { url }); + + try { + const response = await request(url, { + method: "GET", + headers, + bodyTimeout: this.timeoutMs, + headersTimeout: this.timeoutMs, + }); + + if (response.statusCode !== 200) { + const errorText = await response.body.text(); + throw new Error(`Cognee status failed with status ${response.statusCode}: ${errorText}`); + } + + const data = (await response.body.json()) as { status?: string }; + + return { + status: data.status || "healthy", + }; + } catch (error) { + log.error("Failed to get Cognee status", { error }); + throw new Error( + `Cognee status request failed: ${error instanceof Error ? error.message : String(error)}`, + ); + } + } + + private extractDataId(value: unknown): string | undefined { + if (!value) return undefined; + if (typeof value === "string") return value; + if (Array.isArray(value)) { + for (const entry of value) { + const id = this.extractDataId(entry); + if (id) return id; + } + return undefined; + } + if (typeof value !== "object") return undefined; + const record = value as { data_id?: unknown; data_ingestion_info?: unknown }; + if (typeof record.data_id === "string") return record.data_id; + return this.extractDataId(record.data_ingestion_info); + } + + async healthCheck(): Promise { + try { + await this.status(); + return true; + } catch { + return false; + } + } + + private mapSearchType(type?: CogneeSearchRequest["searchType"]): CogneeSearchApiType { + switch (type) { + case "CHUNKS": + return "CHUNKS"; + case "SUMMARIES": + return "SUMMARIES"; + case "GRAPH_COMPLETION": + default: + return "GRAPH_COMPLETION"; + } + } + + private normalizeSearchResults(data: unknown): CogneeSearchResult[] { + if (Array.isArray(data)) { + return data.map((item, index) => { + if (typeof item === "string") { + return { id: `result-${index}`, text: item, score: 0 }; + } + + if (item && typeof item === "object") { + const record = item as Record; + const hasStructuredFields = + "id" in record || "text" in record || "score" in record || "metadata" in record; + const raw = + record.search_result ?? record.result ?? record.context ?? record.text ?? record; + const text = typeof raw === "string" ? raw : JSON.stringify(raw, null, 2); + const datasetMetadata = + record.dataset_name || record.dataset_id + ? { + datasetName: record.dataset_name, + datasetId: record.dataset_id, + } + : undefined; + const recordMetadata = + record.metadata && typeof record.metadata === "object" + ? (record.metadata as Record) + : undefined; + const metadata = recordMetadata + ? datasetMetadata + ? { ...datasetMetadata, ...recordMetadata } + : recordMetadata + : datasetMetadata; + + if (hasStructuredFields) { + return { + id: typeof record.id === "string" ? record.id : `result-${index}`, + text, + score: typeof record.score === "number" ? record.score : 0, + metadata, + }; + } + + return { id: `result-${index}`, text, score: 0, metadata }; + } + + return { + id: `result-${index}`, + text: String(item), + score: 0, + }; + }); + } + + if (data && typeof data === "object" && "results" in data) { + const results = (data as { results?: unknown }).results; + if (Array.isArray(results)) { + return this.normalizeSearchResults(results); + } + } + + return []; + } +} diff --git a/src/memory/cognee-provider.test.ts b/src/memory/cognee-provider.test.ts new file mode 100644 index 000000000..500bdc356 --- /dev/null +++ b/src/memory/cognee-provider.test.ts @@ -0,0 +1,86 @@ +import { describe, expect, it, vi } from "vitest"; +import { CogneeMemoryProvider } from "./cognee-provider.js"; +import type { ClawdbotConfig } from "../config/config.js"; + +const searchMock = vi.fn(); + +vi.mock("./cognee-client.js", () => { + class CogneeClient { + search = searchMock; + } + + return { CogneeClient }; +}); + +describe("CogneeMemoryProvider", () => { + it("maps search results into memory snippets", async () => { + const mockConfig: ClawdbotConfig = { + agents: { + defaults: { + workspace: "/tmp/test-workspace", + }, + }, + }; + searchMock.mockResolvedValue({ + results: [ + { + id: "result-1", + text: "A".repeat(800), + score: 0.85, + metadata: { + path: "memory/test.md", + source: "memory", + }, + }, + ], + query: "test query", + searchType: "GRAPH_COMPLETION", + }); + + const provider = new CogneeMemoryProvider(mockConfig, "test-agent", ["memory"]); + + const results = await provider.search("test query"); + + expect(results).toHaveLength(1); + expect(results[0]).toMatchObject({ + path: "memory/test.md", + source: "memory", + score: 0.85, + }); + expect(results[0].snippet.length).toBeGreaterThan(700); + expect(results[0].snippet.endsWith("...")).toBe(true); + }); + + it("defaults missing metadata to safe values", async () => { + searchMock.mockResolvedValue({ + results: [ + { + id: "result-2", + text: "Short result", + score: 0.2, + }, + ], + query: "missing metadata", + searchType: "GRAPH_COMPLETION", + }); + + const mockConfig: ClawdbotConfig = { + agents: { + defaults: { + workspace: "/tmp/test-workspace", + }, + }, + }; + const provider = new CogneeMemoryProvider(mockConfig, "test-agent", ["memory"]); + + const results = await provider.search("missing metadata"); + + expect(results).toHaveLength(1); + expect(results[0]).toMatchObject({ + path: "unknown", + source: "memory", + score: 0.2, + snippet: "Short result", + }); + }); +}); diff --git a/src/memory/cognee-provider.ts b/src/memory/cognee-provider.ts new file mode 100644 index 000000000..d9f3028c7 --- /dev/null +++ b/src/memory/cognee-provider.ts @@ -0,0 +1,557 @@ +import fs from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import type { OpenClawConfig } from "../config/config.js"; +import { resolveAgentWorkspaceDir } from "../agents/agent-scope.js"; +import { resolveStateDir } from "../config/paths.js"; +import { resolveSessionTranscriptsDirForAgent } from "../config/sessions/paths.js"; +import { createSubsystemLogger } from "../logging/subsystem.js"; +import type { MemorySearchResult } from "./index.js"; +import { CogneeClient, type CogneeClientConfig, type CogneeSearchResult } from "./cognee-client.js"; +import { + buildFileEntry, + hashText, + listMemoryFiles, + isMemoryPath, + normalizeRelPath, + type MemoryFileEntry, +} from "./internal.js"; + +const log = createSubsystemLogger("cognee-provider"); + +const DEFAULT_DATASET_NAME = "clawdbot"; +const DEFAULT_SEARCH_TYPE = "GRAPH_COMPLETION"; +const DEFAULT_MAX_RESULTS = 6; +const DEFAULT_TIMEOUT_SECONDS = 30; +const DEFAULT_AUTO_COGNIFY = true; +const DEFAULT_COGNIFY_BATCH_SIZE = 100; +const SNIPPET_MAX_CHARS = 700; + +type CogneeSyncIndex = { + datasetId?: string; + datasetName?: string; + files: Record; +}; + +export type CogneeProviderConfig = { + baseUrl?: string; + apiKey?: string; + datasetName?: string; + searchType?: "GRAPH_COMPLETION" | "CHUNKS" | "SUMMARIES"; + maxResults?: number; + timeoutSeconds?: number; + autoCognify?: boolean; + cognifyBatchSize?: number; +}; + +export type CogneeMemorySource = "memory" | "sessions"; + +export class CogneeMemoryProvider { + private readonly client: CogneeClient; + private readonly cfg: OpenClawConfig; + private readonly agentId: string; + private readonly workspaceDir: string; + private readonly datasetName: string; + private readonly searchType: "GRAPH_COMPLETION" | "CHUNKS" | "SUMMARIES"; + private readonly maxResults: number; + private readonly autoCognify: boolean; + private readonly cognifyBatchSize: number; + private readonly sources: Set; + private datasetId?: string; + private syncedFiles = new Map(); // path -> hash + private readonly syncIndexPath: string; + private syncIndexLoaded = false; + private syncIndex: CogneeSyncIndex = { files: {} }; + private syncIndexDirty = false; + + constructor( + cfg: OpenClawConfig, + agentId: string, + sources: Array, + config: CogneeProviderConfig = {}, + ) { + const timeoutMs = (config.timeoutSeconds || DEFAULT_TIMEOUT_SECONDS) * 1000; + const clientConfig: CogneeClientConfig = { + baseUrl: config.baseUrl, + apiKey: config.apiKey, + timeoutMs, + }; + + this.client = new CogneeClient(clientConfig); + this.cfg = cfg; + this.agentId = agentId; + this.workspaceDir = resolveAgentWorkspaceDir(cfg, agentId); + this.datasetName = config.datasetName || DEFAULT_DATASET_NAME; + this.searchType = config.searchType || DEFAULT_SEARCH_TYPE; + this.maxResults = config.maxResults || DEFAULT_MAX_RESULTS; + this.autoCognify = config.autoCognify ?? DEFAULT_AUTO_COGNIFY; + this.cognifyBatchSize = config.cognifyBatchSize || DEFAULT_COGNIFY_BATCH_SIZE; + this.sources = new Set(sources); + this.syncIndexPath = path.join( + resolveStateDir(process.env, os.homedir), + "memory", + "cognee", + `${agentId}.json`, + ); + + log.info("Cognee memory provider initialized", { + agentId, + datasetName: this.datasetName, + searchType: this.searchType, + sources: Array.from(this.sources), + }); + } + + async healthCheck(): Promise { + return await this.client.healthCheck(); + } + + async sync(params?: { + reason?: string; + force?: boolean; + update?: boolean; + progress?: (update: { completed: number; total: number; label?: string }) => void; + }): Promise { + log.info("Starting Cognee memory sync", { agentId: this.agentId }); + + let addedCount = 0; + await this.loadSyncIndex(); + const force = Boolean(params?.force); + const update = Boolean(params?.update); + + // Sync memory files + if (this.sources.has("memory")) { + const memoryFiles = await this.collectMemoryFiles(); + addedCount += await this.syncFiles(memoryFiles, "memory", { update }); + } + + // Sync session transcripts + if (this.sources.has("sessions")) { + const sessionFiles = await this.collectSessionFiles(); + addedCount += await this.syncFiles(sessionFiles, "sessions", { update }); + } + + // Run cognify if auto-enabled and files were added + if ((this.autoCognify && addedCount > 0) || (this.autoCognify && force)) { + log.info("Running cognify after sync", { addedCount }); + await this.cognify(); + } + + if (this.syncIndexDirty) { + await this.saveSyncIndex(); + } + + log.info("Cognee memory sync completed", { + agentId: this.agentId, + addedCount, + }); + + if (params?.progress) { + params.progress({ + completed: addedCount, + total: addedCount, + label: params.reason ? `Synced (${params.reason})` : "Synced", + }); + } + } + + async search( + query: string, + opts?: { maxResults?: number; minScore?: number; sessionKey?: string }, + ): Promise { + log.debug("Searching Cognee memory", { query, searchType: this.searchType }); + + try { + const response = await this.client.search({ + queryText: query, + searchType: this.searchType, + datasetIds: this.datasetId ? [this.datasetId] : undefined, + }); + + const maxResults = opts?.maxResults ?? this.maxResults; + const minScore = opts?.minScore ?? 0; + const results: MemorySearchResult[] = response.results + .map((r) => this.transformResult(r)) + .filter((r) => r.score >= minScore) + .slice(0, maxResults); + + log.debug("Cognee search completed", { query, resultCount: results.length }); + return results; + } catch (error) { + log.error("Cognee search failed", { error, query }); + throw error; + } + } + + async cognify(): Promise { + try { + const response = await this.client.cognify({ + datasetIds: this.datasetId ? [this.datasetId] : undefined, + }); + log.info("Cognify completed", { status: response.status }); + } catch (error) { + log.error("Cognify failed", { error }); + throw error; + } + } + + async getStatus(): Promise<{ + connected: boolean; + datasetId?: string; + datasetName: string; + syncedFileCount: number; + version?: string; + }> { + try { + const status = await this.client.status(); + const dataset = status.datasets?.find((d) => d.name === this.datasetName); + + return { + connected: true, + datasetId: this.datasetId || dataset?.id, + datasetName: this.datasetName, + syncedFileCount: this.syncedFiles.size, + version: status.version, + }; + } catch (error) { + log.error("Failed to get Cognee status", { error }); + return { + connected: false, + datasetName: this.datasetName, + syncedFileCount: this.syncedFiles.size, + }; + } + } + + status(): { + files: number; + chunks: number; + dirty: boolean; + workspaceDir: string; + dbPath: string; + provider: string; + model: string; + requestedProvider: string; + sources: Array; + extraPaths: string[]; + sourceCounts: Array<{ source: CogneeMemorySource; files: number; chunks: number }>; + cache?: { enabled: boolean; entries?: number; maxEntries?: number }; + fts?: { enabled: boolean; available: boolean; error?: string }; + fallback?: { from: string; reason?: string }; + vector?: { + enabled: boolean; + available?: boolean; + extensionPath?: string; + loadError?: string; + dims?: number; + }; + batch?: { + enabled: boolean; + failures: number; + limit: number; + wait: boolean; + concurrency: number; + pollIntervalMs: number; + timeoutMs: number; + lastError?: string; + lastProvider?: string; + }; + } { + const sources = Array.from(this.sources); + const files = this.syncedFiles.size; + return { + files, + chunks: 0, + dirty: false, + workspaceDir: this.workspaceDir, + dbPath: "cognee", + provider: "cognee", + model: this.searchType, + requestedProvider: "cognee", + sources, + extraPaths: [], + sourceCounts: sources.map((source) => ({ source, files, chunks: 0 })), + vector: { + enabled: false, + available: false, + }, + fts: { + enabled: false, + available: false, + }, + }; + } + + async readFile(params: { + relPath: string; + from?: number; + lines?: number; + }): Promise<{ text: string; path: string }> { + const relPath = normalizeRelPath(params.relPath); + if (!relPath || !isMemoryPath(relPath)) { + throw new Error("path required"); + } + const absPath = path.resolve(this.workspaceDir, relPath); + if (!absPath.startsWith(this.workspaceDir)) { + throw new Error("path escapes workspace"); + } + const content = await fs.readFile(absPath, "utf-8"); + if (!params.from && !params.lines) { + return { text: content, path: relPath }; + } + const lines = content.split("\n"); + const start = Math.max(1, params.from ?? 1); + const count = Math.max(1, params.lines ?? lines.length); + const slice = lines.slice(start - 1, start - 1 + count); + return { text: slice.join("\n"), path: relPath }; + } + + async probeEmbeddingAvailability(): Promise<{ ok: boolean; error?: string }> { + return { ok: false, error: "Cognee provider does not use embeddings." }; + } + + async probeVectorAvailability(): Promise { + return false; + } + + async close(): Promise {} + + private async loadSyncIndex(): Promise { + if (this.syncIndexLoaded) return; + this.syncIndexLoaded = true; + try { + const raw = await fs.readFile(this.syncIndexPath, "utf-8"); + const parsed = JSON.parse(raw) as CogneeSyncIndex; + if (!parsed || typeof parsed !== "object") return; + this.syncIndex = { + datasetId: parsed.datasetId, + datasetName: parsed.datasetName, + files: parsed.files && typeof parsed.files === "object" ? parsed.files : {}, + }; + } catch (error) { + const code = (error as NodeJS.ErrnoException).code; + if (code !== "ENOENT") { + log.warn("Failed to load Cognee sync index", { error }); + } + } + + if (this.syncIndex.datasetName && this.syncIndex.datasetName !== this.datasetName) { + log.info("Resetting Cognee sync index (dataset name changed)", { + from: this.syncIndex.datasetName, + to: this.datasetName, + }); + this.syncIndex = { files: {} }; + this.syncIndexDirty = true; + } + + if (this.syncIndex.datasetId && this.datasetId && this.syncIndex.datasetId !== this.datasetId) { + log.info("Resetting Cognee sync index (dataset id changed)", { + from: this.syncIndex.datasetId, + to: this.datasetId, + }); + this.syncIndex = { files: {} }; + this.syncIndexDirty = true; + } + + if (!this.datasetId && this.syncIndex.datasetId) { + this.datasetId = this.syncIndex.datasetId; + } + } + + private async saveSyncIndex(): Promise { + const dir = path.dirname(this.syncIndexPath); + await fs.mkdir(dir, { recursive: true }); + const payload: CogneeSyncIndex = { + datasetId: this.datasetId ?? this.syncIndex.datasetId, + datasetName: this.datasetName, + files: this.syncIndex.files, + }; + await fs.writeFile(this.syncIndexPath, JSON.stringify(payload, null, 2), "utf-8"); + this.syncIndexDirty = false; + } + + private async collectMemoryFiles(): Promise { + const files: MemoryFileEntry[] = []; + const memoryPaths = await listMemoryFiles(this.workspaceDir); + + for (const absPath of memoryPaths) { + try { + const entry = await buildFileEntry(absPath, this.workspaceDir); + files.push(entry); + } catch (error) { + log.warn("Failed to process memory file", { absPath, error }); + } + } + + return files; + } + + private async collectSessionFiles(): Promise { + const files: MemoryFileEntry[] = []; + const transcriptsDir = resolveSessionTranscriptsDirForAgent(this.agentId); + + try { + const entries = await fs.readdir(transcriptsDir, { withFileTypes: true }); + for (const entry of entries) { + if (!entry.isFile() || !entry.name.endsWith(".jsonl")) continue; + + const absPath = path.join(transcriptsDir, entry.name); + try { + const stat = await fs.stat(absPath); + const content = await fs.readFile(absPath, "utf-8"); + const hash = hashText(content); + + files.push({ + path: `sessions/${entry.name}`, + absPath, + mtimeMs: stat.mtimeMs, + size: stat.size, + hash, + }); + } catch (error) { + log.warn("Failed to process session file", { absPath, error }); + } + } + } catch (error) { + log.debug("No session transcripts directory", { transcriptsDir, error }); + } + + return files; + } + + private async syncFiles( + files: MemoryFileEntry[], + source: CogneeMemorySource, + opts?: { update?: boolean }, + ): Promise { + let addedCount = 0; + const batchSize = this.cognifyBatchSize; + const update = Boolean(opts?.update); + + for (let i = 0; i < files.length; i += batchSize) { + const batch = files.slice(i, i + batchSize); + + for (const file of batch) { + const existingHash = this.syncedFiles.get(file.path); + if (existingHash === file.hash) { + log.debug("Skipping unchanged file", { path: file.path }); + continue; + } + + try { + const content = await fs.readFile(file.absPath, "utf-8"); + const metadata = { + path: file.path, + source, + agentId: this.agentId, + size: file.size, + mtimeMs: file.mtimeMs, + }; + + const dataWithMetadata = `# ${file.path}\n\n${content}\n\n---\nMetadata: ${JSON.stringify(metadata)}`; + + const record = this.syncIndex.files[file.path]; + const datasetId = this.datasetId ?? this.syncIndex.datasetId; + const canUpdate = update && record?.dataId && datasetId; + + if (canUpdate && datasetId && record?.dataId) { + await this.client.update({ + dataId: record.dataId, + datasetId, + data: dataWithMetadata, + }); + addedCount++; + log.debug("Updated file in Cognee", { + path: file.path, + datasetId, + dataId: record.dataId, + }); + } else { + const response = await this.client.add({ + data: dataWithMetadata, + datasetName: this.datasetName, + datasetId, + }); + + if (!this.datasetId) { + this.datasetId = response.datasetId; + } + if (response.dataId) { + this.syncIndex.files[file.path] = { + hash: file.hash, + dataId: response.dataId, + }; + } else { + this.syncIndex.files[file.path] = { hash: file.hash }; + } + this.syncIndex.datasetId = this.datasetId ?? this.syncIndex.datasetId; + this.syncIndex.datasetName = this.datasetName; + this.syncIndexDirty = true; + + this.syncedFiles.set(file.path, file.hash); + addedCount++; + + log.debug("Added file to Cognee", { + path: file.path, + datasetId: response.datasetId, + }); + continue; + } + + const dataId = record?.dataId; + this.syncIndex.files[file.path] = { hash: file.hash, dataId }; + this.syncIndex.datasetId = datasetId ?? this.syncIndex.datasetId; + this.syncIndex.datasetName = this.datasetName; + this.syncIndexDirty = true; + this.syncedFiles.set(file.path, file.hash); + } catch (error) { + log.error("Failed to sync file to Cognee", { path: file.path, error }); + } + } + } + + return addedCount; + } + + private transformResult(result: CogneeSearchResult): MemorySearchResult { + // Extract path from metadata or text + const metadata = result.metadata || {}; + const path = (metadata.path as string) || "unknown"; + const source = (metadata.source as "memory" | "sessions") || "memory"; + + // Truncate snippet to max chars + let snippet = result.text; + if (snippet.length > SNIPPET_MAX_CHARS) { + snippet = snippet.slice(0, SNIPPET_MAX_CHARS) + "..."; + } + + return { + path, + startLine: 0, // Cognee doesn't provide line numbers + endLine: 0, + score: result.score, + snippet, + source, + }; + } +} + +export async function createCogneeProvider( + cfg: OpenClawConfig, + agentId: string, + sources: Array, + config: CogneeProviderConfig = {}, +): Promise { + const provider = new CogneeMemoryProvider(cfg, agentId, sources, config); + + // Verify connection + const healthy = await provider.healthCheck(); + if (!healthy) { + throw new Error( + `Failed to connect to Cognee at ${config.baseUrl || "http://localhost:8000"}. ` + + `Ensure Cognee is running (see docs/memory-cognee.md for setup).`, + ); + } + + return provider; +} diff --git a/src/memory/manager.ts b/src/memory/manager.ts index aa3cb317d..6782091ad 100644 --- a/src/memory/manager.ts +++ b/src/memory/manager.ts @@ -184,13 +184,15 @@ export class MemoryIndexManager { const key = `${agentId}:${workspaceDir}:${JSON.stringify(settings)}`; const existing = INDEX_CACHE.get(key); if (existing) return existing; + const provider = settings.provider === "cognee" ? "auto" : settings.provider; + const fallback = settings.fallback === "cognee" ? "none" : settings.fallback; const providerResult = await createEmbeddingProvider({ config: cfg, agentDir: resolveAgentDir(cfg, agentId), - provider: settings.provider, + provider, remote: settings.remote, model: settings.model, - fallback: settings.fallback, + fallback, local: settings.local, }); const manager = new MemoryIndexManager({ @@ -198,7 +200,7 @@ export class MemoryIndexManager { cfg, agentId, workspaceDir, - settings, + settings: { ...settings, provider, fallback }, providerResult, }); INDEX_CACHE.set(key, manager); @@ -383,6 +385,7 @@ export class MemoryIndexManager { async sync(params?: { reason?: string; force?: boolean; + update?: boolean; progress?: (update: MemorySyncProgressUpdate) => void; }): Promise { if (this.syncing) return this.syncing; @@ -1315,7 +1318,7 @@ export class MemoryIndexManager { } private async activateFallbackProvider(reason: string): Promise { - const fallback = this.settings.fallback; + const fallback = this.settings.fallback === "cognee" ? "none" : this.settings.fallback; if (!fallback || fallback === "none" || fallback === this.provider.id) return false; if (this.fallbackFrom) return false; const fallbackFrom = this.provider.id as "openai" | "gemini" | "local"; diff --git a/src/memory/search-manager.ts b/src/memory/search-manager.ts index c4eed3229..77801292e 100644 --- a/src/memory/search-manager.ts +++ b/src/memory/search-manager.ts @@ -1,8 +1,10 @@ import type { OpenClawConfig } from "../config/config.js"; import type { MemoryIndexManager } from "./manager.js"; +import type { CogneeMemoryProvider } from "./cognee-provider.js"; +import { resolveMemorySearchConfig } from "../agents/memory-search.js"; export type MemorySearchManagerResult = { - manager: MemoryIndexManager | null; + manager: MemoryIndexManager | CogneeMemoryProvider | null; error?: string; }; @@ -11,6 +13,24 @@ export async function getMemorySearchManager(params: { agentId: string; }): Promise { try { + const config = resolveMemorySearchConfig(params.cfg, params.agentId); + if (!config) { + return { manager: null, error: "Memory search is disabled" }; + } + + // Route to Cognee provider if configured + if (config.provider === "cognee") { + const { createCogneeProvider } = await import("./cognee-provider.js"); + const manager = await createCogneeProvider( + params.cfg, + params.agentId, + config.sources as Array<"memory" | "sessions">, + config.cognee || {}, + ); + return { manager }; + } + + // Default to SQLite-based memory manager const { MemoryIndexManager } = await import("./manager.js"); const manager = await MemoryIndexManager.get(params); return { manager };