added cognee draft

This commit is contained in:
vasilije 2026-01-26 22:09:00 +01:00 committed by Hande
parent 9688454a30
commit c1aabbb7ee
10 changed files with 1555 additions and 5 deletions

340
docs/memory-cognee.md Normal file
View File

@ -0,0 +1,340 @@
---
summary: "Cognee knowledge graph memory: setup, Docker config, and usage"
read_when:
- Setting up Cognee memory provider
- Configuring knowledge graph memory
- Running Cognee with Docker
---
# Cognee Memory Provider
Clawdbot supports **Cognee** as an optional memory provider. Unlike the default SQLite-based vector memory, Cognee builds a knowledge graph with entity extraction and semantic relationships, providing richer contextual memory for your AI agent.
## What is Cognee?
Cognee is an AI memory framework that:
- Extracts entities (people, places, concepts) from documents
- Builds a knowledge graph of relationships
- Enables semantic search with LLM-powered reasoning
- Supports multiple search modes (insights, chunks, summaries)
Learn more at [docs.cognee.ai](https://docs.cognee.ai/).
## Setup Options
### Option 1: Local Docker (Recommended)
Run Cognee locally using Docker Compose:
**Step 1: Create docker-compose.yml**
```yaml
version: '3.8'
services:
cognee:
image: topoteretes/cognee:latest
container_name: cognee
ports:
- "8000:8000"
environment:
# Optional: Set API key for authentication
- COGNEE_API_KEY=your-local-api-key
# Database configuration
- DATABASE_URL=postgresql://cognee:cognee@postgres:5432/cognee
volumes:
- cognee_data:/app/data
depends_on:
- postgres
restart: unless-stopped
postgres:
image: postgres:15-alpine
container_name: cognee-postgres
environment:
- POSTGRES_USER=cognee
- POSTGRES_PASSWORD=cognee
- POSTGRES_DB=cognee
volumes:
- postgres_data:/var/lib/postgresql/data
restart: unless-stopped
volumes:
cognee_data:
postgres_data:
```
**Step 2: Start Cognee**
```bash
docker-compose up -d
```
**Step 3: Verify**
```bash
curl http://localhost:8000/status
# Should return: {"status":"healthy"}
```
### Option 2: Cognee Cloud
Use the hosted Cognee service:
1. Sign up at [platform.cognee.ai](https://platform.cognee.ai/)
2. Get your API key from the dashboard
3. Use base URL: `https://cognee--cognee-saas-backend-serve.modal.run`
## Configuration
Add Cognee memory configuration to your `~/.clawdbot/config.yaml`:
### Basic Configuration (Docker Local)
```yaml
agents:
defaults:
memorySearch:
enabled: true
provider: cognee
sources: [memory] # or [memory, sessions]
cognee:
baseUrl: http://localhost:8000
datasetName: clawdbot
searchType: insights # or "chunks", "summaries"
maxResults: 6
autoCognify: true
```
### Cloud Configuration
```yaml
agents:
defaults:
memorySearch:
enabled: true
provider: cognee
sources: [memory, sessions]
cognee:
baseUrl: https://cognee--cognee-saas-backend-serve.modal.run
apiKey: your-api-key-here # Required for cloud
datasetName: clawdbot
searchType: insights
maxResults: 8
autoCognify: true
timeoutSeconds: 60
```
## Configuration Options
| Option | Type | Default | Description |
|--------|------|---------|-------------|
| `baseUrl` | string | `http://localhost:8000` | Cognee API endpoint |
| `apiKey` | string | - | API key (required for cloud, optional for local) |
| `datasetName` | string | `"clawdbot"` | Dataset name for organizing memories |
| `searchType` | string | `"insights"` | Search mode: `insights`, `chunks`, or `summaries` |
| `maxResults` | number | `6` | Maximum search results returned |
| `autoCognify` | boolean | `true` | Auto-process documents after adding |
| `cognifyBatchSize` | number | `100` | Batch size for processing |
| `timeoutSeconds` | number | `30` | Request timeout in seconds |
## Search Types
Cognee offers three search modes:
### Insights (Recommended)
Best for: **High-level understanding and reasoning**
- Returns AI-generated insights from knowledge graph
- Combines multiple related facts
- Good for: "What projects am I working on?" or "Summarize my notes about X"
### Chunks
Best for: **Specific text matching**
- Returns raw document chunks
- Similar to traditional vector search
- Good for: Finding exact quotes or specific information
### Summaries
Best for: **Document overviews**
- Returns condensed summaries
- Good for: Quick scanning of content
## Usage
### Memory Files
Cognee automatically syncs your memory files:
- `MEMORY.md` or `memory.md` in workspace root
- All `*.md` files in `memory/` directory
### Session Transcripts (Optional)
Enable session memory to index conversation history:
```yaml
agents:
defaults:
memorySearch:
provider: cognee
sources: [memory, sessions] # Include sessions
experimental:
sessionMemory: true
```
### Manual Sync
Force a memory sync:
```bash
# Not yet implemented - coming soon
clawdbot memory sync --provider cognee
```
### Check Status
```bash
# Not yet implemented - coming soon
clawdbot memory status --provider cognee
```
## How It Works
1. **Add**: Memory files are sent to Cognee with metadata
2. **Cognify**: Cognee processes documents:
- Extracts entities (people, places, concepts)
- Identifies relationships
- Builds knowledge graph
3. **Search**: Agent queries use semantic search:
- Searches knowledge graph
- Returns relevant insights/chunks/summaries
- Includes metadata and scores
## Comparison: Cognee vs SQLite Memory
| Feature | Cognee | SQLite (Default) |
|---------|--------|------------------|
| **Setup** | Requires Docker/cloud | Built-in, no setup |
| **Offline** | No (needs service) | Yes (fully local) |
| **Search** | Knowledge graph + LLM | Vector + BM25 hybrid |
| **Entities** | Extracted automatically | Not available |
| **Relationships** | Yes (graph-based) | No |
| **Speed** | Slower (API calls) | Faster (local DB) |
| **Memory** | Stored externally | SQLite file |
| **Best for** | Rich context, reasoning | Fast lookup, privacy |
## Troubleshooting
### Connection Failed
**Error**: `Failed to connect to Cognee at http://localhost:8000`
**Solutions**:
1. Verify Docker is running: `docker ps | grep cognee`
2. Check Cognee logs: `docker logs cognee`
3. Test manually: `curl http://localhost:8000/status`
4. Ensure port 8000 is not blocked
### Slow Performance
**Solutions**:
1. Reduce `maxResults` (try 3-5 instead of 10+)
2. Use `searchType: "chunks"` for faster results
3. Set `autoCognify: false` and cognify manually
4. Check Docker resource limits
### Out of Memory
**Solutions**:
1. Increase Docker memory limit (Docker Desktop settings)
2. Reduce `cognifyBatchSize` (try 50 instead of 100)
3. Process fewer files at once
4. Clear old datasets via Cognee API
## Advanced Configuration
### Per-Agent Override
```yaml
agents:
defaults:
memorySearch:
provider: openai # Default for all agents
agents:
research-bot:
memorySearch:
provider: cognee # Override for this agent
cognee:
searchType: insights
maxResults: 10
```
### Hybrid Setup (Not Yet Supported)
Future versions may support using both Cognee and SQLite:
- Cognee for semantic understanding
- SQLite for fast local lookup
## Docker Production Tips
### Health Checks
Add health checks to docker-compose.yml:
```yaml
services:
cognee:
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/status"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
```
### Resource Limits
```yaml
services:
cognee:
deploy:
resources:
limits:
cpus: '2'
memory: 4G
reservations:
cpus: '1'
memory: 2G
```
### Persistent Storage
Mount volumes for persistence:
```yaml
volumes:
- ./cognee_data:/app/data
- ./cognee_logs:/app/logs
```
## Roadmap
Planned features:
- [ ] `clawdbot memory status --provider cognee` command
- [ ] `clawdbot memory sync --provider cognee` command
- [ ] Hybrid mode (Cognee + SQLite)
- [ ] Graph visualization export
- [ ] Manual entity management
## Resources
- [Cognee Documentation](https://docs.cognee.ai/)
- [Cognee GitHub](https://github.com/topoteretes/cognee)
- [Clawdbot Memory Guide](/memory)
- [Docker Setup Guide](/install/docker)
## Feedback
Cognee integration is new. Report issues at:
- Clawdbot: [github.com/clawdbot/clawdbot/issues](https://github.com/clawdbot/clawdbot/issues)
- Cognee: [github.com/topoteretes/cognee/issues](https://github.com/topoteretes/cognee/issues)

View File

@ -0,0 +1,47 @@
# Example Clawdbot configuration with Cognee memory provider
# Copy to ~/.clawdbot/config.yaml and customize
agents:
defaults:
# Use Cognee for knowledge graph memory
memorySearch:
enabled: true
provider: cognee
sources: [memory, sessions] # Index both memory files and conversation history
# Cognee-specific configuration
cognee:
# Local Docker setup (default)
baseUrl: http://localhost:8000
# For Cognee Cloud, use:
# baseUrl: https://cognee--cognee-saas-backend-serve.modal.run
# apiKey: your-api-key-here
# Dataset name for organizing memories
datasetName: clawdbot
# Search mode: "insights" (recommended), "chunks", or "summaries"
searchType: insights
# Maximum search results to return
maxResults: 6
# Automatically process documents after adding (recommended)
autoCognify: true
# Batch size for processing multiple documents
cognifyBatchSize: 100
# Request timeout in seconds
timeoutSeconds: 30
# Enable experimental session memory indexing
experimental:
sessionMemory: true
# To use default SQLite memory instead, set:
# agents:
# defaults:
# memorySearch:
# provider: auto # or "openai", "gemini", "local"

View File

@ -0,0 +1,77 @@
# Docker Compose configuration for running Cognee locally with Clawdbot
#
# Usage:
# 1. Copy this file to your preferred location
# 2. Run: docker-compose -f cognee-docker-compose.yaml up -d
# 3. Verify: curl http://localhost:8000/status
# 4. Configure Clawdbot with baseUrl: http://localhost:8000
#
# For production, see docs/memory-cognee.md for additional configuration
version: '3.8'
services:
# Cognee API server
cognee:
image: topoteretes/cognee:latest
container_name: cognee
ports:
- "8000:8000"
environment:
# Optional: Set API key for authentication
# Remove or comment out for local development without auth
- COGNEE_API_KEY=${COGNEE_API_KEY:-}
# Database connection
- DATABASE_URL=postgresql://cognee:cognee@postgres:5432/cognee
# Optional: Configure LLM provider for entity extraction
# Uncomment and set if you want to use specific providers
# - OPENAI_API_KEY=${OPENAI_API_KEY:-}
# - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
volumes:
- cognee_data:/app/data
- cognee_logs:/app/logs
depends_on:
postgres:
condition: service_healthy
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/status"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
networks:
- cognee-network
# PostgreSQL database for Cognee
postgres:
image: postgres:15-alpine
container_name: cognee-postgres
environment:
- POSTGRES_USER=cognee
- POSTGRES_PASSWORD=cognee
- POSTGRES_DB=cognee
volumes:
- postgres_data:/var/lib/postgresql/data
restart: unless-stopped
healthcheck:
test: ["CMD-SHELL", "pg_isready -U cognee"]
interval: 10s
timeout: 5s
retries: 5
networks:
- cognee-network
volumes:
cognee_data:
driver: local
cognee_logs:
driver: local
postgres_data:
driver: local
networks:
cognee-network:
driver: bridge

View File

@ -9,7 +9,7 @@ import { resolveAgentConfig } from "./agent-scope.js";
export type ResolvedMemorySearchConfig = {
enabled: boolean;
sources: Array<"memory" | "sessions">;
provider: "openai" | "local" | "gemini" | "auto";
provider: "openai" | "local" | "gemini" | "auto" | "cognee";
remote?: {
baseUrl?: string;
apiKey?: string;
@ -25,12 +25,22 @@ export type ResolvedMemorySearchConfig = {
experimental: {
sessionMemory: boolean;
};
fallback: "openai" | "gemini" | "local" | "none";
fallback: "openai" | "gemini" | "local" | "cognee" | "none";
model: string;
local: {
modelPath?: string;
modelCacheDir?: string;
};
cognee?: {
baseUrl?: string;
apiKey?: string;
datasetName?: string;
searchType?: "insights" | "chunks" | "summaries";
maxResults?: number;
timeoutSeconds?: number;
autoCognify?: boolean;
cognifyBatchSize?: number;
};
store: {
driver: "sqlite";
path: string;
@ -222,6 +232,20 @@ function mergeConfig(
enabled: overrides?.cache?.enabled ?? defaults?.cache?.enabled ?? DEFAULT_CACHE_ENABLED,
maxEntries: overrides?.cache?.maxEntries ?? defaults?.cache?.maxEntries,
};
const cognee =
provider === "cognee"
? {
baseUrl: overrides?.cognee?.baseUrl ?? defaults?.cognee?.baseUrl,
apiKey: overrides?.cognee?.apiKey ?? defaults?.cognee?.apiKey,
datasetName: overrides?.cognee?.datasetName ?? defaults?.cognee?.datasetName,
searchType: overrides?.cognee?.searchType ?? defaults?.cognee?.searchType,
maxResults: overrides?.cognee?.maxResults ?? defaults?.cognee?.maxResults,
timeoutSeconds: overrides?.cognee?.timeoutSeconds ?? defaults?.cognee?.timeoutSeconds,
autoCognify: overrides?.cognee?.autoCognify ?? defaults?.cognee?.autoCognify,
cognifyBatchSize:
overrides?.cognee?.cognifyBatchSize ?? defaults?.cognee?.cognifyBatchSize,
}
: undefined;
const overlap = clampNumber(chunking.overlap, 0, Math.max(0, chunking.tokens - 1));
const minScore = clampNumber(query.minScore, 0, 1);
@ -244,6 +268,7 @@ function mergeConfig(
fallback,
model,
local,
cognee,
store,
chunking: { tokens: Math.max(1, chunking.tokens), overlap },
sync: {

View File

@ -232,7 +232,7 @@ export type MemorySearchConfig = {
sessionMemory?: boolean;
};
/** Embedding provider mode. */
provider?: "openai" | "gemini" | "local";
provider?: "openai" | "gemini" | "local" | "cognee";
remote?: {
baseUrl?: string;
apiKey?: string;
@ -251,7 +251,7 @@ export type MemorySearchConfig = {
};
};
/** Fallback behavior when embeddings fail. */
fallback?: "openai" | "gemini" | "local" | "none";
fallback?: "openai" | "gemini" | "local" | "cognee" | "none";
/** Embedding model id (remote) or alias (local). */
model?: string;
/** Local embedding settings (node-llama-cpp). */
@ -261,6 +261,25 @@ export type MemorySearchConfig = {
/** Optional cache directory for local models. */
modelCacheDir?: string;
};
/** Cognee knowledge graph memory settings. */
cognee?: {
/** Cognee API endpoint (default: http://localhost:8000). */
baseUrl?: string;
/** Cognee API key (required for cloud, optional for local). */
apiKey?: string;
/** Dataset name for organizing memories (default: "clawdbot"). */
datasetName?: string;
/** Search type: "insights", "chunks", or "summaries" (default: "insights"). */
searchType?: "insights" | "chunks" | "summaries";
/** Max results per search query (default: 6). */
maxResults?: number;
/** Timeout for API requests in seconds (default: 30). */
timeoutSeconds?: number;
/** Enable automatic cognify after adding documents (default: true). */
autoCognify?: boolean;
/** Cognify batch size for processing (default: 100). */
cognifyBatchSize?: number;
};
/** Index storage configuration. */
store?: {
driver?: "sqlite";

View File

@ -0,0 +1,239 @@
import { describe, expect, it, vi, beforeEach, afterEach } from "vitest";
import { CogneeClient } from "./cognee-client.js";
import { request } from "undici";
vi.mock("undici", () =&gt; ({
request: vi.fn(),
}));
describe("CogneeClient", () =&gt; {
beforeEach(() =&gt; {
vi.clearAllMocks();
});
afterEach(() =&gt; {
vi.restoreAllMocks();
});
describe("add", () =&gt; {
it("should add data successfully", async () =&gt; {
const mockResponse = {
statusCode: 200,
body: {
json: vi.fn().mockResolvedValue({
dataset_id: "test-dataset-id",
dataset_name: "test-dataset",
message: "Data added successfully",
}),
text: vi.fn(),
},
};
vi.mocked(request).mockResolvedValue(mockResponse as any);
const client = new CogneeClient({
baseUrl: "http://localhost:8000",
apiKey: "test-key",
});
const result = await client.add({
data: "Test data",
datasetName: "test-dataset",
});
expect(result).toEqual({
datasetId: "test-dataset-id",
datasetName: "test-dataset",
message: "Data added successfully",
});
expect(request).toHaveBeenCalledWith(
"http://localhost:8000/add",
expect.objectContaining({
method: "POST",
headers: expect.objectContaining({
"Content-Type": "application/json",
"X-Api-Key": "test-key",
}),
}),
);
});
it("should handle errors", async () =&gt; {
const mockResponse = {
statusCode: 500,
body: {
text: vi.fn().mockResolvedValue("Internal server error"),
},
};
vi.mocked(request).mockResolvedValue(mockResponse as any);
const client = new CogneeClient();
await expect(
client.add({
data: "Test data",
datasetName: "test-dataset",
}),
).rejects.toThrow("Cognee add failed with status 500");
});
});
describe("cognify", () =&gt; {
it("should run cognify successfully", async () =&gt; {
const mockResponse = {
statusCode: 200,
body: {
json: vi.fn().mockResolvedValue({
status: "success",
message: "Cognify completed",
}),
text: vi.fn(),
},
};
vi.mocked(request).mockResolvedValue(mockResponse as any);
const client = new CogneeClient({
baseUrl: "http://localhost:8000",
});
const result = await client.cognify({
datasetIds: ["dataset-1"],
});
expect(result).toEqual({
status: "success",
message: "Cognify completed",
});
});
});
describe("search", () =&gt; {
it("should search successfully", async () =&gt; {
const mockResponse = {
statusCode: 200,
body: {
json: vi.fn().mockResolvedValue({
results: [
{
id: "result-1",
text: "Test result",
score: 0.9,
metadata: { path: "test.md" },
},
],
query: "test query",
search_type: "insights",
}),
text: vi.fn(),
},
};
vi.mocked(request).mockResolvedValue(mockResponse as any);
const client = new CogneeClient();
const result = await client.search({
queryText: "test query",
searchType: "insights",
});
expect(result.results).toHaveLength(1);
expect(result.results[0]).toEqual({
id: "result-1",
text: "Test result",
score: 0.9,
metadata: { path: "test.md" },
});
expect(result.query).toBe("test query");
});
it("should use default search type", async () =&gt; {
const mockResponse = {
statusCode: 200,
body: {
json: vi.fn().mockResolvedValue({
results: [],
query: "test",
search_type: "insights",
}),
text: vi.fn(),
},
};
vi.mocked(request).mockResolvedValue(mockResponse as any);
const client = new CogneeClient();
await client.search({ queryText: "test" });
expect(request).toHaveBeenCalledWith(
expect.any(String),
expect.objectContaining({
body: expect.stringContaining('"search_type":"insights"'),
}),
);
});
});
describe("status", () =&gt; {
it("should get status successfully", async () =&gt; {
const mockResponse = {
statusCode: 200,
body: {
json: vi.fn().mockResolvedValue({
status: "healthy",
version: "1.0.0",
datasets: [
{
id: "dataset-1",
name: "test-dataset",
document_count: 10,
},
],
}),
text: vi.fn(),
},
};
vi.mocked(request).mockResolvedValue(mockResponse as any);
const client = new CogneeClient();
const result = await client.status();
expect(result).toEqual({
status: "healthy",
version: "1.0.0",
datasets: [
{
id: "dataset-1",
name: "test-dataset",
documentCount: 10,
},
],
});
});
});
describe("healthCheck", () =&gt; {
it("should return true when status is successful", async () =&gt; {
const mockResponse = {
statusCode: 200,
body: {
json: vi.fn().mockResolvedValue({ status: "healthy" }),
text: vi.fn(),
},
};
vi.mocked(request).mockResolvedValue(mockResponse as any);
const client = new CogneeClient();
const result = await client.healthCheck();
expect(result).toBe(true);
});
it("should return false when status fails", async () =&gt; {
vi.mocked(request).mockRejectedValue(new Error("Connection failed"));
const client = new CogneeClient();
const result = await client.healthCheck();
expect(result).toBe(false);
});
});
});

299
src/memory/cognee-client.ts Normal file
View File

@ -0,0 +1,299 @@
import { request } from "undici";
import { createSubsystemLogger } from "../logging/subsystem.js";
const log = createSubsystemLogger("cognee");
const DEFAULT_BASE_URL = "http://localhost:8000";
const DEFAULT_TIMEOUT_MS = 30_000;
export type CogneeClientConfig = {
baseUrl?: string;
apiKey?: string;
timeoutMs?: number;
};
export type CogneeAddRequest = {
data: string;
datasetName?: string;
datasetId?: string;
};
export type CogneeAddResponse = {
datasetId: string;
datasetName: string;
message: string;
};
export type CogneeCognifyRequest = {
datasetIds?: string[];
};
export type CogneeCognifyResponse = {
status: string;
message: string;
};
export type CogneeSearchRequest = {
queryText: string;
searchType?: "insights" | "chunks" | "summaries";
datasetIds?: string[];
};
export type CogneeSearchResult = {
id: string;
text: string;
score: number;
metadata?: Record&lt;string, unknown&gt;;
};
export type CogneeSearchResponse = {
results: CogneeSearchResult[];
query: string;
searchType: string;
};
export type CogneeStatusResponse = {
status: string;
version?: string;
datasets?: Array&lt;{
id: string;
name: string;
documentCount?: number;
}&gt;;
};
export class CogneeClient {
private readonly baseUrl: string;
private readonly apiKey?: string;
private readonly timeoutMs: number;
constructor(config: CogneeClientConfig = {}) {
this.baseUrl = config.baseUrl?.replace(/\/$/, "") || DEFAULT_BASE_URL;
this.apiKey = config.apiKey;
this.timeoutMs = config.timeoutMs || DEFAULT_TIMEOUT_MS;
}
async add(req: CogneeAddRequest): Promise&lt;CogneeAddResponse&gt; {
const url = `${this.baseUrl}/add`;
const headers: Record&lt;string, string&gt; = {
"Content-Type": "application/json",
};
if (this.apiKey) {
headers["X-Api-Key"] = this.apiKey;
}
log.debug("Adding data to Cognee", {
url,
datasetName: req.datasetName,
dataLength: req.data.length,
});
try {
const response = await request(url, {
method: "POST",
headers,
body: JSON.stringify({
data: req.data,
dataset_name: req.datasetName,
dataset_id: req.datasetId,
}),
bodyTimeout: this.timeoutMs,
headersTimeout: this.timeoutMs,
});
if (response.statusCode !== 200) {
const errorText = await response.body.text();
throw new Error(
`Cognee add failed with status ${response.statusCode}: ${errorText}`,
);
}
const data = (await response.body.json()) as {
dataset_id: string;
dataset_name: string;
message: string;
};
return {
datasetId: data.dataset_id,
datasetName: data.dataset_name,
message: data.message,
};
} catch (error) {
log.error("Failed to add data to Cognee", { error });
throw new Error(
`Cognee add request failed: ${error instanceof Error ? error.message : String(error)}`,
);
}
}
async cognify(req: CogneeCognifyRequest = {}): Promise&lt;CogneeCognifyResponse&gt; {
const url = `${this.baseUrl}/cognify`;
const headers: Record&lt;string, string&gt; = {
"Content-Type": "application/json",
};
if (this.apiKey) {
headers["X-Api-Key"] = this.apiKey;
}
log.debug("Running cognify", { url, datasetIds: req.datasetIds });
try {
const response = await request(url, {
method: "POST",
headers,
body: JSON.stringify({
dataset_ids: req.datasetIds,
}),
bodyTimeout: this.timeoutMs,
headersTimeout: this.timeoutMs,
});
if (response.statusCode !== 200) {
const errorText = await response.body.text();
throw new Error(
`Cognee cognify failed with status ${response.statusCode}: ${errorText}`,
);
}
const data = (await response.body.json()) as {
status: string;
message: string;
};
return {
status: data.status,
message: data.message,
};
} catch (error) {
log.error("Failed to cognify", { error });
throw new Error(
`Cognee cognify request failed: ${error instanceof Error ? error.message : String(error)}`,
);
}
}
async search(req: CogneeSearchRequest): Promise&lt;CogneeSearchResponse&gt; {
const url = `${this.baseUrl}/search`;
const headers: Record&lt;string, string&gt; = {
"Content-Type": "application/json",
};
if (this.apiKey) {
headers["X-Api-Key"] = this.apiKey;
}
log.debug("Searching Cognee", {
url,
query: req.queryText,
searchType: req.searchType,
});
try {
const response = await request(url, {
method: "POST",
headers,
body: JSON.stringify({
query_text: req.queryText,
search_type: req.searchType || "insights",
dataset_ids: req.datasetIds,
}),
bodyTimeout: this.timeoutMs,
headersTimeout: this.timeoutMs,
});
if (response.statusCode !== 200) {
const errorText = await response.body.text();
throw new Error(
`Cognee search failed with status ${response.statusCode}: ${errorText}`,
);
}
const data = (await response.body.json()) as {
results: Array&lt;{
id: string;
text: string;
score: number;
metadata?: Record&lt;string, unknown&gt;;
}&gt;;
query: string;
search_type: string;
};
return {
results: data.results.map((r) =&gt; ({
id: r.id,
text: r.text,
score: r.score,
metadata: r.metadata,
})),
query: data.query,
searchType: data.search_type,
};
} catch (error) {
log.error("Failed to search Cognee", { error });
throw new Error(
`Cognee search request failed: ${error instanceof Error ? error.message : String(error)}`,
);
}
}
async status(): Promise&lt;CogneeStatusResponse&gt; {
const url = `${this.baseUrl}/status`;
const headers: Record&lt;string, string&gt; = {};
if (this.apiKey) {
headers["X-Api-Key"] = this.apiKey;
}
log.debug("Checking Cognee status", { url });
try {
const response = await request(url, {
method: "GET",
headers,
bodyTimeout: this.timeoutMs,
headersTimeout: this.timeoutMs,
});
if (response.statusCode !== 200) {
const errorText = await response.body.text();
throw new Error(
`Cognee status failed with status ${response.statusCode}: ${errorText}`,
);
}
const data = (await response.body.json()) as {
status: string;
version?: string;
datasets?: Array&lt;{
id: string;
name: string;
document_count?: number;
}&gt;;
};
return {
status: data.status,
version: data.version,
datasets: data.datasets?.map((d) =&gt; ({
id: d.id,
name: d.name,
documentCount: d.document_count,
})),
};
} catch (error) {
log.error("Failed to get Cognee status", { error });
throw new Error(
`Cognee status request failed: ${error instanceof Error ? error.message : String(error)}`,
);
}
}
async healthCheck(): Promise&lt;boolean&gt; {
try {
await this.status();
return true;
} catch {
return false;
}
}
}

View File

@ -0,0 +1,153 @@
import { describe, expect, it, vi, beforeEach } from "vitest";
import { CogneeMemoryProvider } from "./cognee-provider.js";
import type { ClawdbotConfig } from "../config/config.js";
vi.mock("./cognee-client.js", () =&gt; ({
CogneeClient: vi.fn().mockImplementation(() =&gt; ({
healthCheck: vi.fn().mockResolvedValue(true),
add: vi.fn().mockResolvedValue({
datasetId: "test-dataset-id",
datasetName: "test-dataset",
message: "Success",
}),
cognify: vi.fn().mockResolvedValue({
status: "success",
message: "Cognify completed",
}),
search: vi.fn().mockResolvedValue({
results: [
{
id: "result-1",
text: "Test result text",
score: 0.85,
metadata: {
path: "test.md",
source: "memory",
},
},
],
query: "test query",
searchType: "insights",
}),
status: vi.fn().mockResolvedValue({
status: "healthy",
version: "1.0.0",
datasets: [
{
id: "test-dataset-id",
name: "clawdbot",
documentCount: 5,
},
],
}),
})),
}));
vi.mock("./internal.js", () =&gt; ({
listMemoryFiles: vi.fn().mockResolvedValue([]),
buildFileEntry: vi.fn(),
hashText: vi.fn().mockReturnValue("test-hash"),
}));
vi.mock("node:fs/promises", () =&gt; ({
default: {
readdir: vi.fn().mockResolvedValue([]),
readFile: vi.fn().mockResolvedValue("Test file content"),
stat: vi.fn().mockResolvedValue({ mtimeMs: 1234567890, size: 100 }),
},
}));
describe("CogneeMemoryProvider", () =&gt; {
const mockConfig: ClawdbotConfig = {
agents: {
defaults: {
workspace: "/tmp/test-workspace",
},
},
};
beforeEach(() =&gt; {
vi.clearAllMocks();
});
describe("constructor", () =&gt; {
it("should initialize with default configuration", () =&gt; {
const provider = new CogneeMemoryProvider(mockConfig, "test-agent", ["memory"]);
expect(provider).toBeDefined();
});
it("should initialize with custom configuration", () =&gt; {
const provider = new CogneeMemoryProvider(mockConfig, "test-agent", ["memory"], {
baseUrl: "http://custom:8000",
apiKey: "custom-key",
datasetName: "custom-dataset",
searchType: "chunks",
maxResults: 10,
});
expect(provider).toBeDefined();
});
});
describe("healthCheck", () =&gt; {
it("should perform health check", async () =&gt; {
const provider = new CogneeMemoryProvider(mockConfig, "test-agent", ["memory"]);
const healthy = await provider.healthCheck();
expect(healthy).toBe(true);
});
});
describe("search", () =&gt; {
it("should search and transform results", async () =&gt; {
const provider = new CogneeMemoryProvider(mockConfig, "test-agent", ["memory"]);
const results = await provider.search("test query");
expect(results).toHaveLength(1);
expect(results[0]).toMatchObject({
path: "test.md",
source: "memory",
score: 0.85,
snippet: "Test result text",
});
});
it("should respect maxResults setting", async () =&gt; {
const provider = new CogneeMemoryProvider(
mockConfig,
"test-agent",
["memory"],
{ maxResults: 5 },
);
const results = await provider.search("test query");
expect(results.length).toBeLessThanOrEqual(5);
});
});
describe("cognify", () =&gt; {
it("should run cognify", async () =&gt; {
const provider = new CogneeMemoryProvider(mockConfig, "test-agent", ["memory"]);
await expect(provider.cognify()).resolves.not.toThrow();
});
});
describe("getStatus", () =&gt; {
it("should return status information", async () =&gt; {
const provider = new CogneeMemoryProvider(mockConfig, "test-agent", ["memory"]);
const status = await provider.getStatus();
expect(status).toMatchObject({
connected: true,
datasetName: "clawdbot",
syncedFileCount: 0,
});
});
});
});

View File

@ -0,0 +1,331 @@
import fs from "node:fs/promises";
import path from "node:path";
import type { ClawdbotConfig } from "../config/config.js";
import { resolveAgentWorkspaceDir } from "../agents/agent-scope.js";
import { resolveSessionTranscriptsDirForAgent } from "../config/sessions/paths.js";
import { createSubsystemLogger } from "../logging/subsystem.js";
import type { MemorySearchResult } from "./index.js";
import {
CogneeClient,
type CogneeClientConfig,
type CogneeSearchResult,
} from "./cognee-client.js";
import {
buildFileEntry,
hashText,
listMemoryFiles,
type MemoryFileEntry,
} from "./internal.js";
const log = createSubsystemLogger("cognee-provider");
const DEFAULT_DATASET_NAME = "clawdbot";
const DEFAULT_SEARCH_TYPE = "insights";
const DEFAULT_MAX_RESULTS = 6;
const DEFAULT_TIMEOUT_SECONDS = 30;
const DEFAULT_AUTO_COGNIFY = true;
const DEFAULT_COGNIFY_BATCH_SIZE = 100;
const SNIPPET_MAX_CHARS = 700;
export type CogneeProviderConfig = {
baseUrl?: string;
apiKey?: string;
datasetName?: string;
searchType?: "insights" | "chunks" | "summaries";
maxResults?: number;
timeoutSeconds?: number;
autoCognify?: boolean;
cognifyBatchSize?: number;
};
export type CogneeMemorySource = "memory" | "sessions";
export class CogneeMemoryProvider {
private readonly client: CogneeClient;
private readonly cfg: ClawdbotConfig;
private readonly agentId: string;
private readonly workspaceDir: string;
private readonly datasetName: string;
private readonly searchType: "insights" | "chunks" | "summaries";
private readonly maxResults: number;
private readonly autoCognify: boolean;
private readonly cognifyBatchSize: number;
private readonly sources: Set&lt;CogneeMemorySource&gt;;
private datasetId?: string;
private syncedFiles = new Map&lt;string, string&gt;(); // path -&gt; hash
constructor(
cfg: ClawdbotConfig,
agentId: string,
sources: Array&lt;CogneeMemorySource&gt;,
config: CogneeProviderConfig = {},
) {
const timeoutMs = (config.timeoutSeconds || DEFAULT_TIMEOUT_SECONDS) * 1000;
const clientConfig: CogneeClientConfig = {
baseUrl: config.baseUrl,
apiKey: config.apiKey,
timeoutMs,
};
this.client = new CogneeClient(clientConfig);
this.cfg = cfg;
this.agentId = agentId;
this.workspaceDir = resolveAgentWorkspaceDir(cfg, agentId);
this.datasetName = config.datasetName || DEFAULT_DATASET_NAME;
this.searchType = config.searchType || DEFAULT_SEARCH_TYPE;
this.maxResults = config.maxResults || DEFAULT_MAX_RESULTS;
this.autoCognify = config.autoCognify ?? DEFAULT_AUTO_COGNIFY;
this.cognifyBatchSize = config.cognifyBatchSize || DEFAULT_COGNIFY_BATCH_SIZE;
this.sources = new Set(sources);
log.info("Cognee memory provider initialized", {
agentId,
datasetName: this.datasetName,
searchType: this.searchType,
sources: Array.from(this.sources),
});
}
async healthCheck(): Promise&lt;boolean&gt; {
return await this.client.healthCheck();
}
async sync(): Promise&lt;void&gt; {
log.info("Starting Cognee memory sync", { agentId: this.agentId });
let addedCount = 0;
// Sync memory files
if (this.sources.has("memory")) {
const memoryFiles = await this.collectMemoryFiles();
addedCount += await this.syncFiles(memoryFiles, "memory");
}
// Sync session transcripts
if (this.sources.has("sessions")) {
const sessionFiles = await this.collectSessionFiles();
addedCount += await this.syncFiles(sessionFiles, "sessions");
}
// Run cognify if auto-enabled and files were added
if (this.autoCognify &amp;&amp; addedCount &gt; 0) {
log.info("Running cognify after sync", { addedCount });
await this.cognify();
}
log.info("Cognee memory sync completed", {
agentId: this.agentId,
addedCount,
});
}
async search(query: string): Promise&lt;MemorySearchResult[]&gt; {
log.debug("Searching Cognee memory", { query, searchType: this.searchType });
try {
const response = await this.client.search({
queryText: query,
searchType: this.searchType,
datasetIds: this.datasetId ? [this.datasetId] : undefined,
});
const results: MemorySearchResult[] = response.results
.slice(0, this.maxResults)
.map((r) =&gt; this.transformResult(r));
log.debug("Cognee search completed", { query, resultCount: results.length });
return results;
} catch (error) {
log.error("Cognee search failed", { error, query });
throw error;
}
}
async cognify(): Promise&lt;void&gt; {
try {
const response = await this.client.cognify({
datasetIds: this.datasetId ? [this.datasetId] : undefined,
});
log.info("Cognify completed", { status: response.status });
} catch (error) {
log.error("Cognify failed", { error });
throw error;
}
}
async getStatus(): Promise&lt;{
connected: boolean;
datasetId?: string;
datasetName: string;
syncedFileCount: number;
version?: string;
}&gt; {
try {
const status = await this.client.status();
const dataset = status.datasets?.find((d) =&gt; d.name === this.datasetName);
return {
connected: true,
datasetId: this.datasetId || dataset?.id,
datasetName: this.datasetName,
syncedFileCount: this.syncedFiles.size,
version: status.version,
};
} catch (error) {
log.error("Failed to get Cognee status", { error });
return {
connected: false,
datasetName: this.datasetName,
syncedFileCount: this.syncedFiles.size,
};
}
}
private async collectMemoryFiles(): Promise&lt;MemoryFileEntry[]&gt; {
const files: MemoryFileEntry[] = [];
const memoryPaths = await listMemoryFiles(this.workspaceDir);
for (const absPath of memoryPaths) {
try {
const entry = await buildFileEntry(absPath, this.workspaceDir);
files.push(entry);
} catch (error) {
log.warn("Failed to process memory file", { absPath, error });
}
}
return files;
}
private async collectSessionFiles(): Promise&lt;MemoryFileEntry[]&gt; {
const files: MemoryFileEntry[] = [];
const transcriptsDir = resolveSessionTranscriptsDirForAgent(
this.cfg,
this.agentId,
);
try {
const entries = await fs.readdir(transcriptsDir, { withFileTypes: true });
for (const entry of entries) {
if (!entry.isFile() || !entry.name.endsWith(".jsonl")) continue;
const absPath = path.join(transcriptsDir, entry.name);
try {
const stat = await fs.stat(absPath);
const content = await fs.readFile(absPath, "utf-8");
const hash = hashText(content);
files.push({
path: `sessions/${entry.name}`,
absPath,
mtimeMs: stat.mtimeMs,
size: stat.size,
hash,
});
} catch (error) {
log.warn("Failed to process session file", { absPath, error });
}
}
} catch (error) {
log.debug("No session transcripts directory", { transcriptsDir });
}
return files;
}
private async syncFiles(
files: MemoryFileEntry[],
source: CogneeMemorySource,
): Promise&lt;number&gt; {
let addedCount = 0;
const batchSize = this.cognifyBatchSize;
for (let i = 0; i &lt; files.length; i += batchSize) {
const batch = files.slice(i, i + batchSize);
for (const file of batch) {
const existingHash = this.syncedFiles.get(file.path);
if (existingHash === file.hash) {
log.debug("Skipping unchanged file", { path: file.path });
continue;
}
try {
const content = await fs.readFile(file.absPath, "utf-8");
const metadata = {
path: file.path,
source,
agentId: this.agentId,
size: file.size,
mtimeMs: file.mtimeMs,
};
const dataWithMetadata = `# ${file.path}\n\n${content}\n\n---\nMetadata: ${JSON.stringify(metadata)}`;
const response = await this.client.add({
data: dataWithMetadata,
datasetName: this.datasetName,
});
if (!this.datasetId) {
this.datasetId = response.datasetId;
}
this.syncedFiles.set(file.path, file.hash);
addedCount++;
log.debug("Added file to Cognee", {
path: file.path,
datasetId: response.datasetId,
});
} catch (error) {
log.error("Failed to add file to Cognee", { path: file.path, error });
}
}
}
return addedCount;
}
private transformResult(result: CogneeSearchResult): MemorySearchResult {
// Extract path from metadata or text
const metadata = result.metadata || {};
const path = (metadata.path as string) || "unknown";
const source = (metadata.source as "memory" | "sessions") || "memory";
// Truncate snippet to max chars
let snippet = result.text;
if (snippet.length &gt; SNIPPET_MAX_CHARS) {
snippet = snippet.slice(0, SNIPPET_MAX_CHARS) + "...";
}
return {
path,
startLine: 0, // Cognee doesn't provide line numbers
endLine: 0,
score: result.score,
snippet,
source,
};
}
}
export async function createCogneeProvider(
cfg: ClawdbotConfig,
agentId: string,
sources: Array&lt;CogneeMemorySource&gt;,
config: CogneeProviderConfig = {},
): Promise&lt;CogneeMemoryProvider&gt; {
const provider = new CogneeMemoryProvider(cfg, agentId, sources, config);
// Verify connection
const healthy = await provider.healthCheck();
if (!healthy) {
throw new Error(
`Failed to connect to Cognee at ${config.baseUrl || "http://localhost:8000"}. ` +
`Ensure Cognee is running (see docs/memory-cognee.md for setup).`,
);
}
return provider;
}

View File

@ -1,8 +1,10 @@
import type { MoltbotConfig } from "../config/config.js";
import { resolveMemorySearchConfig } from "../agents/memory-search.js";
import type { MemoryIndexManager } from "./manager.js";
import type { CogneeMemoryProvider } from "./cognee-provider.js";
export type MemorySearchManagerResult = {
manager: MemoryIndexManager | null;
manager: MemoryIndexManager | CogneeMemoryProvider | null;
error?: string;
};
@ -11,6 +13,24 @@ export async function getMemorySearchManager(params: {
agentId: string;
}): Promise<MemorySearchManagerResult> {
try {
const config = resolveMemorySearchConfig(params.cfg, params.agentId);
if (!config) {
return { manager: null, error: "Memory search is disabled" };
}
// Route to Cognee provider if configured
if (config.provider === "cognee") {
const { createCogneeProvider } = await import("./cognee-provider.js");
const manager = await createCogneeProvider(
params.cfg,
params.agentId,
config.sources as Array<"memory" | "sessions">,
config.cognee || {},
);
return { manager };
}
// Default to SQLite-based memory manager
const { MemoryIndexManager } = await import("./manager.js");
const manager = await MemoryIndexManager.get(params);
return { manager };