From c1aabbb7ee227fec51832c8a1fb6100aed16aab5 Mon Sep 17 00:00:00 2001 From: vasilije Date: Mon, 26 Jan 2026 22:09:00 +0100 Subject: [PATCH] added cognee draft --- docs/memory-cognee.md | 340 ++++++++++++++++++++++++++++ examples/cognee-config.yaml | 47 ++++ examples/cognee-docker-compose.yaml | 77 +++++++ src/agents/memory-search.ts | 29 ++- src/config/types.tools.ts | 23 +- src/memory/cognee-client.test.ts | 239 +++++++++++++++++++ src/memory/cognee-client.ts | 299 ++++++++++++++++++++++++ src/memory/cognee-provider.test.ts | 153 +++++++++++++ src/memory/cognee-provider.ts | 331 +++++++++++++++++++++++++++ src/memory/search-manager.ts | 22 +- 10 files changed, 1555 insertions(+), 5 deletions(-) create mode 100644 docs/memory-cognee.md create mode 100644 examples/cognee-config.yaml create mode 100644 examples/cognee-docker-compose.yaml create mode 100644 src/memory/cognee-client.test.ts create mode 100644 src/memory/cognee-client.ts create mode 100644 src/memory/cognee-provider.test.ts create mode 100644 src/memory/cognee-provider.ts diff --git a/docs/memory-cognee.md b/docs/memory-cognee.md new file mode 100644 index 000000000..6980513f5 --- /dev/null +++ b/docs/memory-cognee.md @@ -0,0 +1,340 @@ +--- +summary: "Cognee knowledge graph memory: setup, Docker config, and usage" +read_when: + - Setting up Cognee memory provider + - Configuring knowledge graph memory + - Running Cognee with Docker +--- + +# Cognee Memory Provider + +Clawdbot supports **Cognee** as an optional memory provider. Unlike the default SQLite-based vector memory, Cognee builds a knowledge graph with entity extraction and semantic relationships, providing richer contextual memory for your AI agent. + +## What is Cognee? + +Cognee is an AI memory framework that: +- Extracts entities (people, places, concepts) from documents +- Builds a knowledge graph of relationships +- Enables semantic search with LLM-powered reasoning +- Supports multiple search modes (insights, chunks, summaries) + +Learn more at [docs.cognee.ai](https://docs.cognee.ai/). + +## Setup Options + +### Option 1: Local Docker (Recommended) + +Run Cognee locally using Docker Compose: + +**Step 1: Create docker-compose.yml** + +```yaml +version: '3.8' + +services: + cognee: + image: topoteretes/cognee:latest + container_name: cognee + ports: + - "8000:8000" + environment: + # Optional: Set API key for authentication + - COGNEE_API_KEY=your-local-api-key + # Database configuration + - DATABASE_URL=postgresql://cognee:cognee@postgres:5432/cognee + volumes: + - cognee_data:/app/data + depends_on: + - postgres + restart: unless-stopped + + postgres: + image: postgres:15-alpine + container_name: cognee-postgres + environment: + - POSTGRES_USER=cognee + - POSTGRES_PASSWORD=cognee + - POSTGRES_DB=cognee + volumes: + - postgres_data:/var/lib/postgresql/data + restart: unless-stopped + +volumes: + cognee_data: + postgres_data: +``` + +**Step 2: Start Cognee** + +```bash +docker-compose up -d +``` + +**Step 3: Verify** + +```bash +curl http://localhost:8000/status +# Should return: {"status":"healthy"} +``` + +### Option 2: Cognee Cloud + +Use the hosted Cognee service: + +1. Sign up at [platform.cognee.ai](https://platform.cognee.ai/) +2. Get your API key from the dashboard +3. Use base URL: `https://cognee--cognee-saas-backend-serve.modal.run` + +## Configuration + +Add Cognee memory configuration to your `~/.clawdbot/config.yaml`: + +### Basic Configuration (Docker Local) + +```yaml +agents: + defaults: + memorySearch: + enabled: true + provider: cognee + sources: [memory] # or [memory, sessions] + cognee: + baseUrl: http://localhost:8000 + datasetName: clawdbot + searchType: insights # or "chunks", "summaries" + maxResults: 6 + autoCognify: true +``` + +### Cloud Configuration + +```yaml +agents: + defaults: + memorySearch: + enabled: true + provider: cognee + sources: [memory, sessions] + cognee: + baseUrl: https://cognee--cognee-saas-backend-serve.modal.run + apiKey: your-api-key-here # Required for cloud + datasetName: clawdbot + searchType: insights + maxResults: 8 + autoCognify: true + timeoutSeconds: 60 +``` + +## Configuration Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `baseUrl` | string | `http://localhost:8000` | Cognee API endpoint | +| `apiKey` | string | - | API key (required for cloud, optional for local) | +| `datasetName` | string | `"clawdbot"` | Dataset name for organizing memories | +| `searchType` | string | `"insights"` | Search mode: `insights`, `chunks`, or `summaries` | +| `maxResults` | number | `6` | Maximum search results returned | +| `autoCognify` | boolean | `true` | Auto-process documents after adding | +| `cognifyBatchSize` | number | `100` | Batch size for processing | +| `timeoutSeconds` | number | `30` | Request timeout in seconds | + +## Search Types + +Cognee offers three search modes: + +### Insights (Recommended) +Best for: **High-level understanding and reasoning** +- Returns AI-generated insights from knowledge graph +- Combines multiple related facts +- Good for: "What projects am I working on?" or "Summarize my notes about X" + +### Chunks +Best for: **Specific text matching** +- Returns raw document chunks +- Similar to traditional vector search +- Good for: Finding exact quotes or specific information + +### Summaries +Best for: **Document overviews** +- Returns condensed summaries +- Good for: Quick scanning of content + +## Usage + +### Memory Files + +Cognee automatically syncs your memory files: +- `MEMORY.md` or `memory.md` in workspace root +- All `*.md` files in `memory/` directory + +### Session Transcripts (Optional) + +Enable session memory to index conversation history: + +```yaml +agents: + defaults: + memorySearch: + provider: cognee + sources: [memory, sessions] # Include sessions + experimental: + sessionMemory: true +``` + +### Manual Sync + +Force a memory sync: + +```bash +# Not yet implemented - coming soon +clawdbot memory sync --provider cognee +``` + +### Check Status + +```bash +# Not yet implemented - coming soon +clawdbot memory status --provider cognee +``` + +## How It Works + +1. **Add**: Memory files are sent to Cognee with metadata +2. **Cognify**: Cognee processes documents: + - Extracts entities (people, places, concepts) + - Identifies relationships + - Builds knowledge graph +3. **Search**: Agent queries use semantic search: + - Searches knowledge graph + - Returns relevant insights/chunks/summaries + - Includes metadata and scores + +## Comparison: Cognee vs SQLite Memory + +| Feature | Cognee | SQLite (Default) | +|---------|--------|------------------| +| **Setup** | Requires Docker/cloud | Built-in, no setup | +| **Offline** | No (needs service) | Yes (fully local) | +| **Search** | Knowledge graph + LLM | Vector + BM25 hybrid | +| **Entities** | Extracted automatically | Not available | +| **Relationships** | Yes (graph-based) | No | +| **Speed** | Slower (API calls) | Faster (local DB) | +| **Memory** | Stored externally | SQLite file | +| **Best for** | Rich context, reasoning | Fast lookup, privacy | + +## Troubleshooting + +### Connection Failed + +**Error**: `Failed to connect to Cognee at http://localhost:8000` + +**Solutions**: +1. Verify Docker is running: `docker ps | grep cognee` +2. Check Cognee logs: `docker logs cognee` +3. Test manually: `curl http://localhost:8000/status` +4. Ensure port 8000 is not blocked + +### Slow Performance + +**Solutions**: +1. Reduce `maxResults` (try 3-5 instead of 10+) +2. Use `searchType: "chunks"` for faster results +3. Set `autoCognify: false` and cognify manually +4. Check Docker resource limits + +### Out of Memory + +**Solutions**: +1. Increase Docker memory limit (Docker Desktop settings) +2. Reduce `cognifyBatchSize` (try 50 instead of 100) +3. Process fewer files at once +4. Clear old datasets via Cognee API + +## Advanced Configuration + +### Per-Agent Override + +```yaml +agents: + defaults: + memorySearch: + provider: openai # Default for all agents + + agents: + research-bot: + memorySearch: + provider: cognee # Override for this agent + cognee: + searchType: insights + maxResults: 10 +``` + +### Hybrid Setup (Not Yet Supported) + +Future versions may support using both Cognee and SQLite: +- Cognee for semantic understanding +- SQLite for fast local lookup + +## Docker Production Tips + +### Health Checks + +Add health checks to docker-compose.yml: + +```yaml +services: + cognee: + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/status"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 40s +``` + +### Resource Limits + +```yaml +services: + cognee: + deploy: + resources: + limits: + cpus: '2' + memory: 4G + reservations: + cpus: '1' + memory: 2G +``` + +### Persistent Storage + +Mount volumes for persistence: + +```yaml +volumes: + - ./cognee_data:/app/data + - ./cognee_logs:/app/logs +``` + +## Roadmap + +Planned features: +- [ ] `clawdbot memory status --provider cognee` command +- [ ] `clawdbot memory sync --provider cognee` command +- [ ] Hybrid mode (Cognee + SQLite) +- [ ] Graph visualization export +- [ ] Manual entity management + +## Resources + +- [Cognee Documentation](https://docs.cognee.ai/) +- [Cognee GitHub](https://github.com/topoteretes/cognee) +- [Clawdbot Memory Guide](/memory) +- [Docker Setup Guide](/install/docker) + +## Feedback + +Cognee integration is new. Report issues at: +- Clawdbot: [github.com/clawdbot/clawdbot/issues](https://github.com/clawdbot/clawdbot/issues) +- Cognee: [github.com/topoteretes/cognee/issues](https://github.com/topoteretes/cognee/issues) diff --git a/examples/cognee-config.yaml b/examples/cognee-config.yaml new file mode 100644 index 000000000..b8abc922b --- /dev/null +++ b/examples/cognee-config.yaml @@ -0,0 +1,47 @@ +# Example Clawdbot configuration with Cognee memory provider +# Copy to ~/.clawdbot/config.yaml and customize + +agents: + defaults: + # Use Cognee for knowledge graph memory + memorySearch: + enabled: true + provider: cognee + sources: [memory, sessions] # Index both memory files and conversation history + + # Cognee-specific configuration + cognee: + # Local Docker setup (default) + baseUrl: http://localhost:8000 + + # For Cognee Cloud, use: + # baseUrl: https://cognee--cognee-saas-backend-serve.modal.run + # apiKey: your-api-key-here + + # Dataset name for organizing memories + datasetName: clawdbot + + # Search mode: "insights" (recommended), "chunks", or "summaries" + searchType: insights + + # Maximum search results to return + maxResults: 6 + + # Automatically process documents after adding (recommended) + autoCognify: true + + # Batch size for processing multiple documents + cognifyBatchSize: 100 + + # Request timeout in seconds + timeoutSeconds: 30 + + # Enable experimental session memory indexing + experimental: + sessionMemory: true + +# To use default SQLite memory instead, set: +# agents: +# defaults: +# memorySearch: +# provider: auto # or "openai", "gemini", "local" diff --git a/examples/cognee-docker-compose.yaml b/examples/cognee-docker-compose.yaml new file mode 100644 index 000000000..045f8a7dc --- /dev/null +++ b/examples/cognee-docker-compose.yaml @@ -0,0 +1,77 @@ +# Docker Compose configuration for running Cognee locally with Clawdbot +# +# Usage: +# 1. Copy this file to your preferred location +# 2. Run: docker-compose -f cognee-docker-compose.yaml up -d +# 3. Verify: curl http://localhost:8000/status +# 4. Configure Clawdbot with baseUrl: http://localhost:8000 +# +# For production, see docs/memory-cognee.md for additional configuration + +version: '3.8' + +services: + # Cognee API server + cognee: + image: topoteretes/cognee:latest + container_name: cognee + ports: + - "8000:8000" + environment: + # Optional: Set API key for authentication + # Remove or comment out for local development without auth + - COGNEE_API_KEY=${COGNEE_API_KEY:-} + + # Database connection + - DATABASE_URL=postgresql://cognee:cognee@postgres:5432/cognee + + # Optional: Configure LLM provider for entity extraction + # Uncomment and set if you want to use specific providers + # - OPENAI_API_KEY=${OPENAI_API_KEY:-} + # - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-} + volumes: + - cognee_data:/app/data + - cognee_logs:/app/logs + depends_on: + postgres: + condition: service_healthy + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/status"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 40s + networks: + - cognee-network + + # PostgreSQL database for Cognee + postgres: + image: postgres:15-alpine + container_name: cognee-postgres + environment: + - POSTGRES_USER=cognee + - POSTGRES_PASSWORD=cognee + - POSTGRES_DB=cognee + volumes: + - postgres_data:/var/lib/postgresql/data + restart: unless-stopped + healthcheck: + test: ["CMD-SHELL", "pg_isready -U cognee"] + interval: 10s + timeout: 5s + retries: 5 + networks: + - cognee-network + +volumes: + cognee_data: + driver: local + cognee_logs: + driver: local + postgres_data: + driver: local + +networks: + cognee-network: + driver: bridge diff --git a/src/agents/memory-search.ts b/src/agents/memory-search.ts index c08161d4f..8fbdb2614 100644 --- a/src/agents/memory-search.ts +++ b/src/agents/memory-search.ts @@ -9,7 +9,7 @@ import { resolveAgentConfig } from "./agent-scope.js"; export type ResolvedMemorySearchConfig = { enabled: boolean; sources: Array<"memory" | "sessions">; - provider: "openai" | "local" | "gemini" | "auto"; + provider: "openai" | "local" | "gemini" | "auto" | "cognee"; remote?: { baseUrl?: string; apiKey?: string; @@ -25,12 +25,22 @@ export type ResolvedMemorySearchConfig = { experimental: { sessionMemory: boolean; }; - fallback: "openai" | "gemini" | "local" | "none"; + fallback: "openai" | "gemini" | "local" | "cognee" | "none"; model: string; local: { modelPath?: string; modelCacheDir?: string; }; + cognee?: { + baseUrl?: string; + apiKey?: string; + datasetName?: string; + searchType?: "insights" | "chunks" | "summaries"; + maxResults?: number; + timeoutSeconds?: number; + autoCognify?: boolean; + cognifyBatchSize?: number; + }; store: { driver: "sqlite"; path: string; @@ -222,6 +232,20 @@ function mergeConfig( enabled: overrides?.cache?.enabled ?? defaults?.cache?.enabled ?? DEFAULT_CACHE_ENABLED, maxEntries: overrides?.cache?.maxEntries ?? defaults?.cache?.maxEntries, }; + const cognee = + provider === "cognee" + ? { + baseUrl: overrides?.cognee?.baseUrl ?? defaults?.cognee?.baseUrl, + apiKey: overrides?.cognee?.apiKey ?? defaults?.cognee?.apiKey, + datasetName: overrides?.cognee?.datasetName ?? defaults?.cognee?.datasetName, + searchType: overrides?.cognee?.searchType ?? defaults?.cognee?.searchType, + maxResults: overrides?.cognee?.maxResults ?? defaults?.cognee?.maxResults, + timeoutSeconds: overrides?.cognee?.timeoutSeconds ?? defaults?.cognee?.timeoutSeconds, + autoCognify: overrides?.cognee?.autoCognify ?? defaults?.cognee?.autoCognify, + cognifyBatchSize: + overrides?.cognee?.cognifyBatchSize ?? defaults?.cognee?.cognifyBatchSize, + } + : undefined; const overlap = clampNumber(chunking.overlap, 0, Math.max(0, chunking.tokens - 1)); const minScore = clampNumber(query.minScore, 0, 1); @@ -244,6 +268,7 @@ function mergeConfig( fallback, model, local, + cognee, store, chunking: { tokens: Math.max(1, chunking.tokens), overlap }, sync: { diff --git a/src/config/types.tools.ts b/src/config/types.tools.ts index bb1d45bf0..7506204b9 100644 --- a/src/config/types.tools.ts +++ b/src/config/types.tools.ts @@ -232,7 +232,7 @@ export type MemorySearchConfig = { sessionMemory?: boolean; }; /** Embedding provider mode. */ - provider?: "openai" | "gemini" | "local"; + provider?: "openai" | "gemini" | "local" | "cognee"; remote?: { baseUrl?: string; apiKey?: string; @@ -251,7 +251,7 @@ export type MemorySearchConfig = { }; }; /** Fallback behavior when embeddings fail. */ - fallback?: "openai" | "gemini" | "local" | "none"; + fallback?: "openai" | "gemini" | "local" | "cognee" | "none"; /** Embedding model id (remote) or alias (local). */ model?: string; /** Local embedding settings (node-llama-cpp). */ @@ -261,6 +261,25 @@ export type MemorySearchConfig = { /** Optional cache directory for local models. */ modelCacheDir?: string; }; + /** Cognee knowledge graph memory settings. */ + cognee?: { + /** Cognee API endpoint (default: http://localhost:8000). */ + baseUrl?: string; + /** Cognee API key (required for cloud, optional for local). */ + apiKey?: string; + /** Dataset name for organizing memories (default: "clawdbot"). */ + datasetName?: string; + /** Search type: "insights", "chunks", or "summaries" (default: "insights"). */ + searchType?: "insights" | "chunks" | "summaries"; + /** Max results per search query (default: 6). */ + maxResults?: number; + /** Timeout for API requests in seconds (default: 30). */ + timeoutSeconds?: number; + /** Enable automatic cognify after adding documents (default: true). */ + autoCognify?: boolean; + /** Cognify batch size for processing (default: 100). */ + cognifyBatchSize?: number; + }; /** Index storage configuration. */ store?: { driver?: "sqlite"; diff --git a/src/memory/cognee-client.test.ts b/src/memory/cognee-client.test.ts new file mode 100644 index 000000000..6bbdb9a3a --- /dev/null +++ b/src/memory/cognee-client.test.ts @@ -0,0 +1,239 @@ +import { describe, expect, it, vi, beforeEach, afterEach } from "vitest"; +import { CogneeClient } from "./cognee-client.js"; +import { request } from "undici"; + +vi.mock("undici", () => ({ + request: vi.fn(), +})); + +describe("CogneeClient", () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + afterEach(() => { + vi.restoreAllMocks(); + }); + + describe("add", () => { + it("should add data successfully", async () => { + const mockResponse = { + statusCode: 200, + body: { + json: vi.fn().mockResolvedValue({ + dataset_id: "test-dataset-id", + dataset_name: "test-dataset", + message: "Data added successfully", + }), + text: vi.fn(), + }, + }; + vi.mocked(request).mockResolvedValue(mockResponse as any); + + const client = new CogneeClient({ + baseUrl: "http://localhost:8000", + apiKey: "test-key", + }); + + const result = await client.add({ + data: "Test data", + datasetName: "test-dataset", + }); + + expect(result).toEqual({ + datasetId: "test-dataset-id", + datasetName: "test-dataset", + message: "Data added successfully", + }); + expect(request).toHaveBeenCalledWith( + "http://localhost:8000/add", + expect.objectContaining({ + method: "POST", + headers: expect.objectContaining({ + "Content-Type": "application/json", + "X-Api-Key": "test-key", + }), + }), + ); + }); + + it("should handle errors", async () => { + const mockResponse = { + statusCode: 500, + body: { + text: vi.fn().mockResolvedValue("Internal server error"), + }, + }; + vi.mocked(request).mockResolvedValue(mockResponse as any); + + const client = new CogneeClient(); + + await expect( + client.add({ + data: "Test data", + datasetName: "test-dataset", + }), + ).rejects.toThrow("Cognee add failed with status 500"); + }); + }); + + describe("cognify", () => { + it("should run cognify successfully", async () => { + const mockResponse = { + statusCode: 200, + body: { + json: vi.fn().mockResolvedValue({ + status: "success", + message: "Cognify completed", + }), + text: vi.fn(), + }, + }; + vi.mocked(request).mockResolvedValue(mockResponse as any); + + const client = new CogneeClient({ + baseUrl: "http://localhost:8000", + }); + + const result = await client.cognify({ + datasetIds: ["dataset-1"], + }); + + expect(result).toEqual({ + status: "success", + message: "Cognify completed", + }); + }); + }); + + describe("search", () => { + it("should search successfully", async () => { + const mockResponse = { + statusCode: 200, + body: { + json: vi.fn().mockResolvedValue({ + results: [ + { + id: "result-1", + text: "Test result", + score: 0.9, + metadata: { path: "test.md" }, + }, + ], + query: "test query", + search_type: "insights", + }), + text: vi.fn(), + }, + }; + vi.mocked(request).mockResolvedValue(mockResponse as any); + + const client = new CogneeClient(); + + const result = await client.search({ + queryText: "test query", + searchType: "insights", + }); + + expect(result.results).toHaveLength(1); + expect(result.results[0]).toEqual({ + id: "result-1", + text: "Test result", + score: 0.9, + metadata: { path: "test.md" }, + }); + expect(result.query).toBe("test query"); + }); + + it("should use default search type", async () => { + const mockResponse = { + statusCode: 200, + body: { + json: vi.fn().mockResolvedValue({ + results: [], + query: "test", + search_type: "insights", + }), + text: vi.fn(), + }, + }; + vi.mocked(request).mockResolvedValue(mockResponse as any); + + const client = new CogneeClient(); + await client.search({ queryText: "test" }); + + expect(request).toHaveBeenCalledWith( + expect.any(String), + expect.objectContaining({ + body: expect.stringContaining('"search_type":"insights"'), + }), + ); + }); + }); + + describe("status", () => { + it("should get status successfully", async () => { + const mockResponse = { + statusCode: 200, + body: { + json: vi.fn().mockResolvedValue({ + status: "healthy", + version: "1.0.0", + datasets: [ + { + id: "dataset-1", + name: "test-dataset", + document_count: 10, + }, + ], + }), + text: vi.fn(), + }, + }; + vi.mocked(request).mockResolvedValue(mockResponse as any); + + const client = new CogneeClient(); + + const result = await client.status(); + + expect(result).toEqual({ + status: "healthy", + version: "1.0.0", + datasets: [ + { + id: "dataset-1", + name: "test-dataset", + documentCount: 10, + }, + ], + }); + }); + }); + + describe("healthCheck", () => { + it("should return true when status is successful", async () => { + const mockResponse = { + statusCode: 200, + body: { + json: vi.fn().mockResolvedValue({ status: "healthy" }), + text: vi.fn(), + }, + }; + vi.mocked(request).mockResolvedValue(mockResponse as any); + + const client = new CogneeClient(); + const result = await client.healthCheck(); + + expect(result).toBe(true); + }); + + it("should return false when status fails", async () => { + vi.mocked(request).mockRejectedValue(new Error("Connection failed")); + + const client = new CogneeClient(); + const result = await client.healthCheck(); + + expect(result).toBe(false); + }); + }); +}); diff --git a/src/memory/cognee-client.ts b/src/memory/cognee-client.ts new file mode 100644 index 000000000..c3ec3747a --- /dev/null +++ b/src/memory/cognee-client.ts @@ -0,0 +1,299 @@ +import { request } from "undici"; +import { createSubsystemLogger } from "../logging/subsystem.js"; + +const log = createSubsystemLogger("cognee"); + +const DEFAULT_BASE_URL = "http://localhost:8000"; +const DEFAULT_TIMEOUT_MS = 30_000; + +export type CogneeClientConfig = { + baseUrl?: string; + apiKey?: string; + timeoutMs?: number; +}; + +export type CogneeAddRequest = { + data: string; + datasetName?: string; + datasetId?: string; +}; + +export type CogneeAddResponse = { + datasetId: string; + datasetName: string; + message: string; +}; + +export type CogneeCognifyRequest = { + datasetIds?: string[]; +}; + +export type CogneeCognifyResponse = { + status: string; + message: string; +}; + +export type CogneeSearchRequest = { + queryText: string; + searchType?: "insights" | "chunks" | "summaries"; + datasetIds?: string[]; +}; + +export type CogneeSearchResult = { + id: string; + text: string; + score: number; + metadata?: Record<string, unknown>; +}; + +export type CogneeSearchResponse = { + results: CogneeSearchResult[]; + query: string; + searchType: string; +}; + +export type CogneeStatusResponse = { + status: string; + version?: string; + datasets?: Array<{ + id: string; + name: string; + documentCount?: number; + }>; +}; + +export class CogneeClient { + private readonly baseUrl: string; + private readonly apiKey?: string; + private readonly timeoutMs: number; + + constructor(config: CogneeClientConfig = {}) { + this.baseUrl = config.baseUrl?.replace(/\/$/, "") || DEFAULT_BASE_URL; + this.apiKey = config.apiKey; + this.timeoutMs = config.timeoutMs || DEFAULT_TIMEOUT_MS; + } + + async add(req: CogneeAddRequest): Promise<CogneeAddResponse> { + const url = `${this.baseUrl}/add`; + const headers: Record<string, string> = { + "Content-Type": "application/json", + }; + if (this.apiKey) { + headers["X-Api-Key"] = this.apiKey; + } + + log.debug("Adding data to Cognee", { + url, + datasetName: req.datasetName, + dataLength: req.data.length, + }); + + try { + const response = await request(url, { + method: "POST", + headers, + body: JSON.stringify({ + data: req.data, + dataset_name: req.datasetName, + dataset_id: req.datasetId, + }), + bodyTimeout: this.timeoutMs, + headersTimeout: this.timeoutMs, + }); + + if (response.statusCode !== 200) { + const errorText = await response.body.text(); + throw new Error( + `Cognee add failed with status ${response.statusCode}: ${errorText}`, + ); + } + + const data = (await response.body.json()) as { + dataset_id: string; + dataset_name: string; + message: string; + }; + + return { + datasetId: data.dataset_id, + datasetName: data.dataset_name, + message: data.message, + }; + } catch (error) { + log.error("Failed to add data to Cognee", { error }); + throw new Error( + `Cognee add request failed: ${error instanceof Error ? error.message : String(error)}`, + ); + } + } + + async cognify(req: CogneeCognifyRequest = {}): Promise<CogneeCognifyResponse> { + const url = `${this.baseUrl}/cognify`; + const headers: Record<string, string> = { + "Content-Type": "application/json", + }; + if (this.apiKey) { + headers["X-Api-Key"] = this.apiKey; + } + + log.debug("Running cognify", { url, datasetIds: req.datasetIds }); + + try { + const response = await request(url, { + method: "POST", + headers, + body: JSON.stringify({ + dataset_ids: req.datasetIds, + }), + bodyTimeout: this.timeoutMs, + headersTimeout: this.timeoutMs, + }); + + if (response.statusCode !== 200) { + const errorText = await response.body.text(); + throw new Error( + `Cognee cognify failed with status ${response.statusCode}: ${errorText}`, + ); + } + + const data = (await response.body.json()) as { + status: string; + message: string; + }; + + return { + status: data.status, + message: data.message, + }; + } catch (error) { + log.error("Failed to cognify", { error }); + throw new Error( + `Cognee cognify request failed: ${error instanceof Error ? error.message : String(error)}`, + ); + } + } + + async search(req: CogneeSearchRequest): Promise<CogneeSearchResponse> { + const url = `${this.baseUrl}/search`; + const headers: Record<string, string> = { + "Content-Type": "application/json", + }; + if (this.apiKey) { + headers["X-Api-Key"] = this.apiKey; + } + + log.debug("Searching Cognee", { + url, + query: req.queryText, + searchType: req.searchType, + }); + + try { + const response = await request(url, { + method: "POST", + headers, + body: JSON.stringify({ + query_text: req.queryText, + search_type: req.searchType || "insights", + dataset_ids: req.datasetIds, + }), + bodyTimeout: this.timeoutMs, + headersTimeout: this.timeoutMs, + }); + + if (response.statusCode !== 200) { + const errorText = await response.body.text(); + throw new Error( + `Cognee search failed with status ${response.statusCode}: ${errorText}`, + ); + } + + const data = (await response.body.json()) as { + results: Array<{ + id: string; + text: string; + score: number; + metadata?: Record<string, unknown>; + }>; + query: string; + search_type: string; + }; + + return { + results: data.results.map((r) => ({ + id: r.id, + text: r.text, + score: r.score, + metadata: r.metadata, + })), + query: data.query, + searchType: data.search_type, + }; + } catch (error) { + log.error("Failed to search Cognee", { error }); + throw new Error( + `Cognee search request failed: ${error instanceof Error ? error.message : String(error)}`, + ); + } + } + + async status(): Promise<CogneeStatusResponse> { + const url = `${this.baseUrl}/status`; + const headers: Record<string, string> = {}; + if (this.apiKey) { + headers["X-Api-Key"] = this.apiKey; + } + + log.debug("Checking Cognee status", { url }); + + try { + const response = await request(url, { + method: "GET", + headers, + bodyTimeout: this.timeoutMs, + headersTimeout: this.timeoutMs, + }); + + if (response.statusCode !== 200) { + const errorText = await response.body.text(); + throw new Error( + `Cognee status failed with status ${response.statusCode}: ${errorText}`, + ); + } + + const data = (await response.body.json()) as { + status: string; + version?: string; + datasets?: Array<{ + id: string; + name: string; + document_count?: number; + }>; + }; + + return { + status: data.status, + version: data.version, + datasets: data.datasets?.map((d) => ({ + id: d.id, + name: d.name, + documentCount: d.document_count, + })), + }; + } catch (error) { + log.error("Failed to get Cognee status", { error }); + throw new Error( + `Cognee status request failed: ${error instanceof Error ? error.message : String(error)}`, + ); + } + } + + async healthCheck(): Promise<boolean> { + try { + await this.status(); + return true; + } catch { + return false; + } + } +} diff --git a/src/memory/cognee-provider.test.ts b/src/memory/cognee-provider.test.ts new file mode 100644 index 000000000..2b6bd64b6 --- /dev/null +++ b/src/memory/cognee-provider.test.ts @@ -0,0 +1,153 @@ +import { describe, expect, it, vi, beforeEach } from "vitest"; +import { CogneeMemoryProvider } from "./cognee-provider.js"; +import type { ClawdbotConfig } from "../config/config.js"; + +vi.mock("./cognee-client.js", () => ({ + CogneeClient: vi.fn().mockImplementation(() => ({ + healthCheck: vi.fn().mockResolvedValue(true), + add: vi.fn().mockResolvedValue({ + datasetId: "test-dataset-id", + datasetName: "test-dataset", + message: "Success", + }), + cognify: vi.fn().mockResolvedValue({ + status: "success", + message: "Cognify completed", + }), + search: vi.fn().mockResolvedValue({ + results: [ + { + id: "result-1", + text: "Test result text", + score: 0.85, + metadata: { + path: "test.md", + source: "memory", + }, + }, + ], + query: "test query", + searchType: "insights", + }), + status: vi.fn().mockResolvedValue({ + status: "healthy", + version: "1.0.0", + datasets: [ + { + id: "test-dataset-id", + name: "clawdbot", + documentCount: 5, + }, + ], + }), + })), +})); + +vi.mock("./internal.js", () => ({ + listMemoryFiles: vi.fn().mockResolvedValue([]), + buildFileEntry: vi.fn(), + hashText: vi.fn().mockReturnValue("test-hash"), +})); + +vi.mock("node:fs/promises", () => ({ + default: { + readdir: vi.fn().mockResolvedValue([]), + readFile: vi.fn().mockResolvedValue("Test file content"), + stat: vi.fn().mockResolvedValue({ mtimeMs: 1234567890, size: 100 }), + }, +})); + +describe("CogneeMemoryProvider", () => { + const mockConfig: ClawdbotConfig = { + agents: { + defaults: { + workspace: "/tmp/test-workspace", + }, + }, + }; + + beforeEach(() => { + vi.clearAllMocks(); + }); + + describe("constructor", () => { + it("should initialize with default configuration", () => { + const provider = new CogneeMemoryProvider(mockConfig, "test-agent", ["memory"]); + + expect(provider).toBeDefined(); + }); + + it("should initialize with custom configuration", () => { + const provider = new CogneeMemoryProvider(mockConfig, "test-agent", ["memory"], { + baseUrl: "http://custom:8000", + apiKey: "custom-key", + datasetName: "custom-dataset", + searchType: "chunks", + maxResults: 10, + }); + + expect(provider).toBeDefined(); + }); + }); + + describe("healthCheck", () => { + it("should perform health check", async () => { + const provider = new CogneeMemoryProvider(mockConfig, "test-agent", ["memory"]); + + const healthy = await provider.healthCheck(); + + expect(healthy).toBe(true); + }); + }); + + describe("search", () => { + it("should search and transform results", async () => { + const provider = new CogneeMemoryProvider(mockConfig, "test-agent", ["memory"]); + + const results = await provider.search("test query"); + + expect(results).toHaveLength(1); + expect(results[0]).toMatchObject({ + path: "test.md", + source: "memory", + score: 0.85, + snippet: "Test result text", + }); + }); + + it("should respect maxResults setting", async () => { + const provider = new CogneeMemoryProvider( + mockConfig, + "test-agent", + ["memory"], + { maxResults: 5 }, + ); + + const results = await provider.search("test query"); + + expect(results.length).toBeLessThanOrEqual(5); + }); + }); + + describe("cognify", () => { + it("should run cognify", async () => { + const provider = new CogneeMemoryProvider(mockConfig, "test-agent", ["memory"]); + + await expect(provider.cognify()).resolves.not.toThrow(); + }); + }); + + describe("getStatus", () => { + it("should return status information", async () => { + const provider = new CogneeMemoryProvider(mockConfig, "test-agent", ["memory"]); + + const status = await provider.getStatus(); + + expect(status).toMatchObject({ + connected: true, + datasetName: "clawdbot", + syncedFileCount: 0, + }); + }); + }); +}); diff --git a/src/memory/cognee-provider.ts b/src/memory/cognee-provider.ts new file mode 100644 index 000000000..0f6050b66 --- /dev/null +++ b/src/memory/cognee-provider.ts @@ -0,0 +1,331 @@ +import fs from "node:fs/promises"; +import path from "node:path"; +import type { ClawdbotConfig } from "../config/config.js"; +import { resolveAgentWorkspaceDir } from "../agents/agent-scope.js"; +import { resolveSessionTranscriptsDirForAgent } from "../config/sessions/paths.js"; +import { createSubsystemLogger } from "../logging/subsystem.js"; +import type { MemorySearchResult } from "./index.js"; +import { + CogneeClient, + type CogneeClientConfig, + type CogneeSearchResult, +} from "./cognee-client.js"; +import { + buildFileEntry, + hashText, + listMemoryFiles, + type MemoryFileEntry, +} from "./internal.js"; + +const log = createSubsystemLogger("cognee-provider"); + +const DEFAULT_DATASET_NAME = "clawdbot"; +const DEFAULT_SEARCH_TYPE = "insights"; +const DEFAULT_MAX_RESULTS = 6; +const DEFAULT_TIMEOUT_SECONDS = 30; +const DEFAULT_AUTO_COGNIFY = true; +const DEFAULT_COGNIFY_BATCH_SIZE = 100; +const SNIPPET_MAX_CHARS = 700; + +export type CogneeProviderConfig = { + baseUrl?: string; + apiKey?: string; + datasetName?: string; + searchType?: "insights" | "chunks" | "summaries"; + maxResults?: number; + timeoutSeconds?: number; + autoCognify?: boolean; + cognifyBatchSize?: number; +}; + +export type CogneeMemorySource = "memory" | "sessions"; + +export class CogneeMemoryProvider { + private readonly client: CogneeClient; + private readonly cfg: ClawdbotConfig; + private readonly agentId: string; + private readonly workspaceDir: string; + private readonly datasetName: string; + private readonly searchType: "insights" | "chunks" | "summaries"; + private readonly maxResults: number; + private readonly autoCognify: boolean; + private readonly cognifyBatchSize: number; + private readonly sources: Set<CogneeMemorySource>; + private datasetId?: string; + private syncedFiles = new Map<string, string>(); // path -> hash + + constructor( + cfg: ClawdbotConfig, + agentId: string, + sources: Array<CogneeMemorySource>, + config: CogneeProviderConfig = {}, + ) { + const timeoutMs = (config.timeoutSeconds || DEFAULT_TIMEOUT_SECONDS) * 1000; + const clientConfig: CogneeClientConfig = { + baseUrl: config.baseUrl, + apiKey: config.apiKey, + timeoutMs, + }; + + this.client = new CogneeClient(clientConfig); + this.cfg = cfg; + this.agentId = agentId; + this.workspaceDir = resolveAgentWorkspaceDir(cfg, agentId); + this.datasetName = config.datasetName || DEFAULT_DATASET_NAME; + this.searchType = config.searchType || DEFAULT_SEARCH_TYPE; + this.maxResults = config.maxResults || DEFAULT_MAX_RESULTS; + this.autoCognify = config.autoCognify ?? DEFAULT_AUTO_COGNIFY; + this.cognifyBatchSize = config.cognifyBatchSize || DEFAULT_COGNIFY_BATCH_SIZE; + this.sources = new Set(sources); + + log.info("Cognee memory provider initialized", { + agentId, + datasetName: this.datasetName, + searchType: this.searchType, + sources: Array.from(this.sources), + }); + } + + async healthCheck(): Promise<boolean> { + return await this.client.healthCheck(); + } + + async sync(): Promise<void> { + log.info("Starting Cognee memory sync", { agentId: this.agentId }); + + let addedCount = 0; + + // Sync memory files + if (this.sources.has("memory")) { + const memoryFiles = await this.collectMemoryFiles(); + addedCount += await this.syncFiles(memoryFiles, "memory"); + } + + // Sync session transcripts + if (this.sources.has("sessions")) { + const sessionFiles = await this.collectSessionFiles(); + addedCount += await this.syncFiles(sessionFiles, "sessions"); + } + + // Run cognify if auto-enabled and files were added + if (this.autoCognify && addedCount > 0) { + log.info("Running cognify after sync", { addedCount }); + await this.cognify(); + } + + log.info("Cognee memory sync completed", { + agentId: this.agentId, + addedCount, + }); + } + + async search(query: string): Promise<MemorySearchResult[]> { + log.debug("Searching Cognee memory", { query, searchType: this.searchType }); + + try { + const response = await this.client.search({ + queryText: query, + searchType: this.searchType, + datasetIds: this.datasetId ? [this.datasetId] : undefined, + }); + + const results: MemorySearchResult[] = response.results + .slice(0, this.maxResults) + .map((r) => this.transformResult(r)); + + log.debug("Cognee search completed", { query, resultCount: results.length }); + return results; + } catch (error) { + log.error("Cognee search failed", { error, query }); + throw error; + } + } + + async cognify(): Promise<void> { + try { + const response = await this.client.cognify({ + datasetIds: this.datasetId ? [this.datasetId] : undefined, + }); + log.info("Cognify completed", { status: response.status }); + } catch (error) { + log.error("Cognify failed", { error }); + throw error; + } + } + + async getStatus(): Promise<{ + connected: boolean; + datasetId?: string; + datasetName: string; + syncedFileCount: number; + version?: string; + }> { + try { + const status = await this.client.status(); + const dataset = status.datasets?.find((d) => d.name === this.datasetName); + + return { + connected: true, + datasetId: this.datasetId || dataset?.id, + datasetName: this.datasetName, + syncedFileCount: this.syncedFiles.size, + version: status.version, + }; + } catch (error) { + log.error("Failed to get Cognee status", { error }); + return { + connected: false, + datasetName: this.datasetName, + syncedFileCount: this.syncedFiles.size, + }; + } + } + + private async collectMemoryFiles(): Promise<MemoryFileEntry[]> { + const files: MemoryFileEntry[] = []; + const memoryPaths = await listMemoryFiles(this.workspaceDir); + + for (const absPath of memoryPaths) { + try { + const entry = await buildFileEntry(absPath, this.workspaceDir); + files.push(entry); + } catch (error) { + log.warn("Failed to process memory file", { absPath, error }); + } + } + + return files; + } + + private async collectSessionFiles(): Promise<MemoryFileEntry[]> { + const files: MemoryFileEntry[] = []; + const transcriptsDir = resolveSessionTranscriptsDirForAgent( + this.cfg, + this.agentId, + ); + + try { + const entries = await fs.readdir(transcriptsDir, { withFileTypes: true }); + for (const entry of entries) { + if (!entry.isFile() || !entry.name.endsWith(".jsonl")) continue; + + const absPath = path.join(transcriptsDir, entry.name); + try { + const stat = await fs.stat(absPath); + const content = await fs.readFile(absPath, "utf-8"); + const hash = hashText(content); + + files.push({ + path: `sessions/${entry.name}`, + absPath, + mtimeMs: stat.mtimeMs, + size: stat.size, + hash, + }); + } catch (error) { + log.warn("Failed to process session file", { absPath, error }); + } + } + } catch (error) { + log.debug("No session transcripts directory", { transcriptsDir }); + } + + return files; + } + + private async syncFiles( + files: MemoryFileEntry[], + source: CogneeMemorySource, + ): Promise<number> { + let addedCount = 0; + const batchSize = this.cognifyBatchSize; + + for (let i = 0; i < files.length; i += batchSize) { + const batch = files.slice(i, i + batchSize); + + for (const file of batch) { + const existingHash = this.syncedFiles.get(file.path); + if (existingHash === file.hash) { + log.debug("Skipping unchanged file", { path: file.path }); + continue; + } + + try { + const content = await fs.readFile(file.absPath, "utf-8"); + const metadata = { + path: file.path, + source, + agentId: this.agentId, + size: file.size, + mtimeMs: file.mtimeMs, + }; + + const dataWithMetadata = `# ${file.path}\n\n${content}\n\n---\nMetadata: ${JSON.stringify(metadata)}`; + + const response = await this.client.add({ + data: dataWithMetadata, + datasetName: this.datasetName, + }); + + if (!this.datasetId) { + this.datasetId = response.datasetId; + } + + this.syncedFiles.set(file.path, file.hash); + addedCount++; + + log.debug("Added file to Cognee", { + path: file.path, + datasetId: response.datasetId, + }); + } catch (error) { + log.error("Failed to add file to Cognee", { path: file.path, error }); + } + } + } + + return addedCount; + } + + private transformResult(result: CogneeSearchResult): MemorySearchResult { + // Extract path from metadata or text + const metadata = result.metadata || {}; + const path = (metadata.path as string) || "unknown"; + const source = (metadata.source as "memory" | "sessions") || "memory"; + + // Truncate snippet to max chars + let snippet = result.text; + if (snippet.length > SNIPPET_MAX_CHARS) { + snippet = snippet.slice(0, SNIPPET_MAX_CHARS) + "..."; + } + + return { + path, + startLine: 0, // Cognee doesn't provide line numbers + endLine: 0, + score: result.score, + snippet, + source, + }; + } +} + +export async function createCogneeProvider( + cfg: ClawdbotConfig, + agentId: string, + sources: Array<CogneeMemorySource>, + config: CogneeProviderConfig = {}, +): Promise<CogneeMemoryProvider> { + const provider = new CogneeMemoryProvider(cfg, agentId, sources, config); + + // Verify connection + const healthy = await provider.healthCheck(); + if (!healthy) { + throw new Error( + `Failed to connect to Cognee at ${config.baseUrl || "http://localhost:8000"}. ` + + `Ensure Cognee is running (see docs/memory-cognee.md for setup).`, + ); + } + + return provider; +} diff --git a/src/memory/search-manager.ts b/src/memory/search-manager.ts index 9bcd529f3..8503c01da 100644 --- a/src/memory/search-manager.ts +++ b/src/memory/search-manager.ts @@ -1,8 +1,10 @@ import type { MoltbotConfig } from "../config/config.js"; +import { resolveMemorySearchConfig } from "../agents/memory-search.js"; import type { MemoryIndexManager } from "./manager.js"; +import type { CogneeMemoryProvider } from "./cognee-provider.js"; export type MemorySearchManagerResult = { - manager: MemoryIndexManager | null; + manager: MemoryIndexManager | CogneeMemoryProvider | null; error?: string; }; @@ -11,6 +13,24 @@ export async function getMemorySearchManager(params: { agentId: string; }): Promise { try { + const config = resolveMemorySearchConfig(params.cfg, params.agentId); + if (!config) { + return { manager: null, error: "Memory search is disabled" }; + } + + // Route to Cognee provider if configured + if (config.provider === "cognee") { + const { createCogneeProvider } = await import("./cognee-provider.js"); + const manager = await createCogneeProvider( + params.cfg, + params.agentId, + config.sources as Array<"memory" | "sessions">, + config.cognee || {}, + ); + return { manager }; + } + + // Default to SQLite-based memory manager const { MemoryIndexManager } = await import("./manager.js"); const manager = await MemoryIndexManager.get(params); return { manager };