diff --git a/docs/concepts/memory-migration.md b/docs/concepts/memory-migration.md new file mode 100644 index 000000000..538183b3b --- /dev/null +++ b/docs/concepts/memory-migration.md @@ -0,0 +1,172 @@ +--- +summary: "Migration guide for hierarchical memory structure (flat → YYYY/MM/)" +read_when: + - Upgrading from older Moltbot versions + - Understanding memory file format changes +--- + +# Memory Migration Guide + +## Overview + +Moltbot now uses a **hierarchical memory structure** for better scalability: + +**Before (flat):** +``` +memory/2025-01-27.md +memory/2025-01-26-conversation.md +...365+ files in one directory per year +``` + +**After (hierarchical):** +``` +memory/2025/01/2025-01-27.md +memory/2025/01/2025-01-26-conversation.md +...max 31 files per directory +``` + +## Why This Change? + +The flat structure worked well for small memory sets, but caused issues over time: + +- **Filesystem performance**: 1000+ files in one directory slows down filesystem operations +- **Navigation difficulty**: Hard to find files manually +- **No natural segmentation**: Cannot easily archive by month/year + +The hierarchical structure solves all these issues. + +--- + +## Backward Compatibility + +**Good news:** Moltbot automatically migrates old-format files. + +### How It Works + +When Moltbot encounters an old-format file (`memory/YYYY-MM-DD.md`): + +1. **Detects** the old format +2. **Creates** the new directory structure (`memory/YYYY/MM/`) +3. **Copies** the file to the new location +4. **Logs** the migration +5. **Uses** the new file going forward + +The old file is kept as a backup (not deleted automatically). + +### What You Need to Do + +**Nothing.** Migration is automatic and transparent. + +When you: +- Search memory → old files are migrated automatically +- Read memory → migrated files are used +- Write memory → new format is used + +--- + +## Migration Examples + +### Example 1: Automatic Migration + +You have a file at `memory/2025-01-27.md`. + +When Moltbot reads memory: +``` +[memory] Migrated old-format memory file: 2025-01-27.md -> memory/2025/01/2025-01-27.md +``` + +File is now at: `memory/2025/01/2025-01-27.md` + +### Example 2: Slug Files + +You have `memory/2025-01-27-discussion.md`. + +Migration creates: +``` +memory/2025/01/2025-01-27-discussion.md +``` + +### Example 3: Existing New-Format Files + +If `memory/2025/01/2025-01-27.md` already exists: +- Migration skips the old file +- New file is kept (no overwrite) +- Warning logged + +--- + +## Rolling Back + +If you need to revert to the old format: + +1. **Stop** Moltbot +2. **Delete** the `memory/YYYY/` directories +3. **Restore** from backup (if you kept one) +4. **Restart** Moltbot + +**Note:** Old-format files are not deleted during migration, so you may have both formats present temporarily. + +--- + +## Troubleshooting + +### Migration Fails + +If migration fails for a file: +- Check file permissions +- Ensure workspace directory is writable +- Check logs for specific error messages + +### Files Not Found After Migration + +If memory files seem missing: +- Check new location: `memory/YYYY/MM/` +- Old files still exist in `memory/` (backup) +- Memory search works across both formats + +### Performance Issues After Migration + +If you still see performance issues: +- Run `moltbot memory status` to check index +- Rebuild index: `moltbot memory index --force` +- Check disk space + +--- + +## Timeline + +| Version | Status | +|---------|--------| +| **Current** | Both formats supported, automatic migration | +| **Future +1** | Warning: old format deprecated | +| **Future +2** | Error: old format not supported | +| **Future +3** | Old format removed | + +**Recommendation:** No immediate action needed. Migration is automatic. + +--- + +## FAQ + +**Q: Will my old memory files be deleted?** +A: No. Old files are kept as backup. + +**Q: Do I need to update my scripts?** +A: No. Moltbot handles both formats transparently. + +**Q: What if I have both formats?** +A: New format takes precedence. Old format is ignored if new exists. + +**Q: Can I keep using the old format?** +A: Yes, for now. Old format will be deprecated in future versions. + +**Q: How do I know if migration happened?** +A: Check logs for `[memory] Migrated old-format memory file` messages. + +--- + +## See Also + +- [Memory](/concepts/memory) - Memory system overview +- [`moltbot memory`](/cli/memory) - CLI commands for memory management +- [Agent Workspace](/concepts/agent-workspace) - Workspace structure diff --git a/docs/concepts/memory.md b/docs/concepts/memory.md index f2bca461a..d459766e2 100644 --- a/docs/concepts/memory.md +++ b/docs/concepts/memory.md @@ -16,8 +16,8 @@ Memory search tools are provided by the active memory plugin (default: The default workspace layout uses two memory layers: -- `memory/YYYY-MM-DD.md` - - Daily log (append-only). +- `memory/YYYY/MM/YYYY-MM-DD.md` + - Daily log (append-only), organized by year/month hierarchy. - Read today + yesterday at session start. - `MEMORY.md` (optional) - Curated long-term memory. @@ -29,7 +29,7 @@ These files live under the workspace (`agents.defaults.workspace`, default ## When to write memory - Decisions, preferences, and durable facts go to `MEMORY.md`. -- Day-to-day notes and running context go to `memory/YYYY-MM-DD.md`. +- Day-to-day notes and running context go to `memory/YYYY/MM/YYYY-MM-DD.md`. - If someone says "remember this," write it down (do not keep it in RAM). - This area is still evolving. It helps to remind the model to store memories; it will know what to do. - If you want something to stick, **ask the bot to write it** into memory. @@ -53,7 +53,7 @@ This is controlled by `agents.defaults.compaction.memoryFlush`: enabled: true, softThresholdTokens: 4000, systemPrompt: "Session nearing compaction. Store durable memories now.", - prompt: "Write any lasting notes to memory/YYYY-MM-DD.md; reply with NO_REPLY if nothing to store." + prompt: "Write any lasting notes to memory/YYYY/MM/YYYY-MM-DD.md; reply with NO_REPLY if nothing to store." } } } @@ -408,3 +408,50 @@ agents: { Notes: - `remote.*` takes precedence over `models.providers.openai.*`. - `remote.headers` merge with OpenAI headers; remote wins on key conflicts. Omit `remote.headers` to use the OpenAI defaults. + +--- + +## Backward Compatibility & Migration + +Moltbot supports **automatic migration** from the old memory format to the new hierarchical structure. + +### Old Format vs New Format + +**Old (flat):** +``` +memory/2025-01-27.md +memory/2025-01-26-conversation.md +``` + +**New (hierarchical):** +``` +memory/2025/01/2025-01-27.md +memory/2025/01/2025-01-26-conversation.md +``` + +### Automatic Migration + +When Moltbot encounters old-format files: +1. Detects the old format +2. Creates `memory/YYYY/MM/` directories +3. Copies files to new location +4. Uses new format going forward + +**Old files are not deleted** — they're kept as backup. + +### What You Need to Do + +**Nothing.** Migration is transparent and automatic. + +Memory search, read, and write operations work seamlessly across both formats. + +### Deprecation Timeline + +| Version | Old Format Support | +|---------|-------------------| +| **Current** | ✅ Fully supported, automatic migration | +| **Future +1** | ⚠️ Deprecated warning | +| **Future +2** | ❌ Not supported (error) | +| **Future +3** | 🗑️ Removed from codebase | + +**See:** [Memory Migration Guide](/concepts/memory-migration) for detailed migration instructions. diff --git a/docs/help/faq.md b/docs/help/faq.md index 7372a4997..d779b509f 100644 --- a/docs/help/faq.md +++ b/docs/help/faq.md @@ -1122,7 +1122,7 @@ Set `agents.defaults.sandbox.docker.binds` to `["host:path:mode"]` (e.g., `"/hom ### How does memory work Moltbot memory is just Markdown files in the agent workspace: -- Daily notes in `memory/YYYY-MM-DD.md` +- Daily notes in `memory/YYYY/MM/YYYY-MM-DD.md` - Curated long-term notes in `MEMORY.md` (main/private sessions only) Moltbot also runs a **silent pre-compaction memory flush** to remind the model @@ -1132,7 +1132,7 @@ is writable (read-only sandboxes skip it). See [Memory](/concepts/memory). ### Memory keeps forgetting things How do I make it stick Ask the bot to **write the fact to memory**. Long-term notes belong in `MEMORY.md`, -short-term context goes into `memory/YYYY-MM-DD.md`. +short-term context goes into `memory/YYYY/MM/YYYY-MM-DD.md`. This is still an area we are improving. It helps to remind the model to store memories; it will know what to do. If it keeps forgetting, verify the Gateway is using the same @@ -1147,6 +1147,36 @@ does **not** grant embeddings access, so **signing in with Codex (OAuth or the Codex CLI login)** does not help for semantic memory search. OpenAI embeddings still need a real API key (`OPENAI_API_KEY` or `models.providers.openai.apiKey`). +### How do I migrate my old memory files to the new format + +Moltbot **automatically migrates** old-format memory files. + +If you have files in the old format (`memory/YYYY-MM-DD.md`), Moltbot will: +1. Detect them automatically +2. Create new directories (`memory/YYYY/MM/`) +3. Copy files to the new location +4. Use the new format going forward + +**Old files are not deleted** — they're kept as backup. + +**Docs:** [Memory Migration Guide](/concepts/memory-migration) + +### Will my old memory files still work after upgrading + +**Yes.** Moltbot supports both old and new memory formats with automatic migration. + +- Memory search works across both formats +- Old files are automatically migrated when accessed +- No manual action required + +**Timeline:** +- Current version: Both formats supported +- Future +1: Old format deprecated (warning) +- Future +2: Old format not supported (error) +- Future +3: Old format removed + +**See:** [Memory Migration Guide](/concepts/memory-migration) for details. + If you don’t set a provider explicitly, Moltbot auto-selects a provider when it can resolve an API key (auth profiles, `models.providers.*.apiKey`, or env vars). It prefers OpenAI if an OpenAI key resolves, otherwise Gemini if a Gemini key @@ -1209,7 +1239,7 @@ Your **workspace** (AGENTS.md, memory files, skills, etc.) is separate and confi These files live in the **agent workspace**, not `~/.clawdbot`. - **Workspace (per agent)**: `AGENTS.md`, `SOUL.md`, `IDENTITY.md`, `USER.md`, - `MEMORY.md` (or `memory.md`), `memory/YYYY-MM-DD.md`, optional `HEARTBEAT.md`. + `MEMORY.md` (or `memory.md`), `memory/YYYY/MM/YYYY-MM-DD.md`, optional `HEARTBEAT.md`. - **State dir (`~/.clawdbot`)**: config, credentials, auth profiles, sessions, logs, and shared skills (`~/.clawdbot/skills`). diff --git a/src/memory/index.test.ts b/src/memory/index.test.ts index cccd1fa49..feb612cea 100644 --- a/src/memory/index.test.ts +++ b/src/memory/index.test.ts @@ -84,7 +84,7 @@ describe("memory index", () => { await result.manager.sync({ force: true }); const results = await result.manager.search("alpha"); expect(results.length).toBeGreaterThan(0); - expect(results[0]?.path).toContain("memory/2026-01-12.md"); + expect(results[0]?.path).toContain("memory/2026/01/2026-01-12.md"); const status = result.manager.status(); expect(status.sourceCounts).toEqual( expect.arrayContaining([ @@ -254,7 +254,7 @@ describe("memory index", () => { await manager.sync({ force: true }); const results = await manager.search("zebra"); expect(results.length).toBeGreaterThan(0); - expect(results[0]?.path).toContain("memory/2026-01-12.md"); + expect(results[0]?.path).toContain("memory/2026/01/2026-01-12.md"); }); it("hybrid weights can favor vector-only matches over keyword-only matches", async () => { diff --git a/src/memory/internal.test.ts b/src/memory/internal.test.ts index 7530d8e44..52b14731d 100644 --- a/src/memory/internal.test.ts +++ b/src/memory/internal.test.ts @@ -4,7 +4,14 @@ import path from "node:path"; import { afterEach, beforeEach, describe, expect, it } from "vitest"; -import { chunkMarkdown, listMemoryFiles, normalizeExtraMemoryPaths } from "./internal.js"; +import { + chunkMarkdown, + isOldMemoryFormat, + listMemoryFiles, + migrateAllMemoryFiles, + migrateMemoryFile, + normalizeExtraMemoryPaths, +} from "./internal.js"; describe("normalizeExtraMemoryPaths", () => { it("trims, resolves, and dedupes paths", () => { @@ -125,3 +132,300 @@ describe("chunkMarkdown", () => { } }); }); + +describe("isOldMemoryFormat", () => { + it("matches old-format date files", () => { + expect(isOldMemoryFormat("memory/2025-01-27.md")).toBe(true); + expect(isOldMemoryFormat("memory/2024-12-31.md")).toBe(true); + expect(isOldMemoryFormat("memory/2023-06-15.md")).toBe(true); + }); + + it("matches old-format date files with slugs", () => { + expect(isOldMemoryFormat("memory/2025-01-27-discussion.md")).toBe(true); + expect(isOldMemoryFormat("memory/2025-01-27-bug-fix.md")).toBe(true); + expect(isOldMemoryFormat("memory/2024-12-31-year-end.md")).toBe(true); + }); + + it("does not match new hierarchical format", () => { + expect(isOldMemoryFormat("memory/2025/01/2025-01-27.md")).toBe(false); + expect(isOldMemoryFormat("memory/2025/01/2025-01-27-discussion.md")).toBe(false); + expect(isOldMemoryFormat("memory/2024/12/2024-12-31.md")).toBe(false); + }); + + it("does not match MEMORY.md or memory.md", () => { + expect(isOldMemoryFormat("MEMORY.md")).toBe(false); + expect(isOldMemoryFormat("memory.md")).toBe(false); + }); + + it("does not match invalid date patterns", () => { + expect(isOldMemoryFormat("memory/2025-1-27.md")).toBe(false); // single digit month + expect(isOldMemoryFormat("memory/25-01-27.md")).toBe(false); // 2-digit year + expect(isOldMemoryFormat("memory/2025-01-27")).toBe(false); // no .md extension + expect(isOldMemoryFormat("memory/2025-01-27.txt")).toBe(false); // wrong extension + expect(isOldMemoryFormat("memory/notes/2025-01-27.md")).toBe(false); // subdirectory + }); + + it("does not match files outside memory directory", () => { + expect(isOldMemoryFormat("2025-01-27.md")).toBe(false); + expect(isOldMemoryFormat("notes/2025-01-27.md")).toBe(false); + }); +}); + +describe("migrateMemoryFile", () => { + let tempDir: string; + const logger = { + info: () => {}, + warn: () => {}, + error: () => {}, + }; + + beforeEach(async () => { + tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "memory-migration-test-")); + await fs.mkdir(path.join(tempDir, "memory"), { recursive: true }); + }); + + afterEach(async () => { + await fs.rm(tempDir, { recursive: true, force: true }); + }); + + it("migrates old-format file to new location", async () => { + const oldPath = path.join(tempDir, "memory", "2025-01-27.md"); + await fs.writeFile(oldPath, "# Test content\n", "utf-8"); + + const result = await migrateMemoryFile(oldPath, tempDir, logger); + + expect(result.status).toBe("migrated"); + expect(result.path).toBe(path.join(tempDir, "memory", "2025", "01", "2025-01-27.md")); + + // Verify new file exists + const newContent = await fs.readFile(result.path, "utf-8"); + expect(newContent).toBe("# Test content\n"); + + // Verify old file still exists (backup) + const oldExists = await fs + .access(oldPath) + .then(() => true) + .catch(() => false); + expect(oldExists).toBe(true); + }); + + it("migrates file with slug", async () => { + const oldPath = path.join(tempDir, "memory", "2025-01-27-discussion.md"); + await fs.writeFile(oldPath, "# Discussion\n", "utf-8"); + + const result = await migrateMemoryFile(oldPath, tempDir, logger); + + expect(result.status).toBe("migrated"); + expect(result.path).toBe( + path.join(tempDir, "memory", "2025", "01", "2025-01-27-discussion.md"), + ); + }); + + it("skips migration if new file already exists", async () => { + const oldPath = path.join(tempDir, "memory", "2025-01-27.md"); + const newPath = path.join(tempDir, "memory", "2025", "01", "2025-01-27.md"); + + await fs.writeFile(oldPath, "# Old content\n", "utf-8"); + await fs.mkdir(path.join(tempDir, "memory", "2025", "01"), { recursive: true }); + await fs.writeFile(newPath, "# New content\n", "utf-8"); + + const result = await migrateMemoryFile(oldPath, tempDir, logger); + + expect(result.status).toBe("skipped"); + expect(result.path).toBe(newPath); + + // Verify new file was not overwritten + const content = await fs.readFile(newPath, "utf-8"); + expect(content).toBe("# New content\n"); + }); + + it("fails for invalid filename format", async () => { + const oldPath = path.join(tempDir, "memory", "invalid-file.md"); + await fs.writeFile(oldPath, "# Content\n", "utf-8"); + + const result = await migrateMemoryFile(oldPath, tempDir, logger); + + expect(result.status).toBe("failed"); + }); + + it("fails for invalid date", async () => { + const oldPath = path.join(tempDir, "memory", "2025-13-40.md"); + await fs.writeFile(oldPath, "# Content\n", "utf-8"); + + const result = await migrateMemoryFile(oldPath, tempDir, logger); + + expect(result.status).toBe("failed"); + }); + + it("handles multiple migrations to same target gracefully (race condition)", async () => { + const oldPath1 = path.join(tempDir, "memory", "2025-01-27.md"); + const oldPath2 = path.join(tempDir, "memory-copy", "2025-01-27.md"); + + await fs.writeFile(oldPath1, "# First\n", "utf-8"); + await fs.mkdir(path.join(tempDir, "memory-copy"), { recursive: true }); + await fs.writeFile(oldPath2, "# Second\n", "utf-8"); + + // Simulate concurrent migrations + const [result1, result2] = await Promise.all([ + migrateMemoryFile(oldPath1, tempDir, logger), + migrateMemoryFile(oldPath2, tempDir, logger), + ]); + + // One should succeed, one should skip or fail + const statuses = [result1.status, result2.status].sort(); + expect(statuses).toContain("migrated"); + expect(statuses.some((s) => s === "skipped" || s === "failed")).toBe(true); + }); +}); + +describe("migrateAllMemoryFiles", () => { + let tempDir: string; + const logger = { + info: () => {}, + warn: () => {}, + error: () => {}, + }; + + beforeEach(async () => { + tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "memory-migration-all-test-")); + await fs.mkdir(path.join(tempDir, "memory"), { recursive: true }); + }); + + afterEach(async () => { + await fs.rm(tempDir, { recursive: true, force: true }); + }); + + it("migrates all old-format files", async () => { + // Create old-format files + await fs.writeFile(path.join(tempDir, "memory", "2025-01-27.md"), "# Day 1\n", "utf-8"); + await fs.writeFile(path.join(tempDir, "memory", "2025-01-28.md"), "# Day 2\n", "utf-8"); + await fs.writeFile( + path.join(tempDir, "memory", "2025-01-29-discussion.md"), + "# Discussion\n", + "utf-8", + ); + + const result = await migrateAllMemoryFiles(tempDir, { logger }); + + expect(result.migrated).toBe(3); + expect(result.skipped).toBe(0); + expect(result.failed).toBe(0); + expect(result.migratedFiles).toHaveLength(3); + expect(result.totalBytes).toBeGreaterThan(0); + expect(result.durationMs).toBeGreaterThanOrEqual(0); + + // Verify new files exist + const file1Exists = await fs + .access(path.join(tempDir, "memory", "2025", "01", "2025-01-27.md")) + .then(() => true) + .catch(() => false); + const file2Exists = await fs + .access(path.join(tempDir, "memory", "2025", "01", "2025-01-28.md")) + .then(() => true) + .catch(() => false); + const file3Exists = await fs + .access(path.join(tempDir, "memory", "2025", "01", "2025-01-29-discussion.md")) + .then(() => true) + .catch(() => false); + expect(file1Exists).toBe(true); + expect(file2Exists).toBe(true); + expect(file3Exists).toBe(true); + }); + + it("only migrates files directly in memory/ not in subdirectories", async () => { + // Create files: one in memory/ root (old format), one in subdirectory (user custom, should not migrate) + await fs.mkdir(path.join(tempDir, "memory", "archive"), { recursive: true }); + await fs.writeFile(path.join(tempDir, "memory", "2025-01-27.md"), "# Root\n", "utf-8"); + await fs.writeFile( + path.join(tempDir, "memory", "archive", "2024-12-31.md"), + "# Archived\n", + "utf-8", + ); + + const result = await migrateAllMemoryFiles(tempDir, { logger }); + + // Only the root file should be migrated + expect(result.migrated).toBe(1); + expect(result.migratedFiles).toContain("memory/2025/01/2025-01-27.md"); + expect(result.migratedFiles).not.toContain("memory/2024/12/2024-12-31.md"); + + // Archived file should still exist in original location + const archivedExists = await fs + .access(path.join(tempDir, "memory", "archive", "2024-12-31.md")) + .then(() => true) + .catch(() => false); + expect(archivedExists).toBe(true); + }); + + it("skips year directories (new format)", async () => { + // Create new-format files (should be skipped) + await fs.mkdir(path.join(tempDir, "memory", "2025", "01"), { recursive: true }); + await fs.writeFile( + path.join(tempDir, "memory", "2025", "01", "2025-01-27.md"), + "# New format\n", + "utf-8", + ); + + // Create old-format file + await fs.writeFile(path.join(tempDir, "memory", "2025-01-28.md"), "# Old format\n", "utf-8"); + + const result = await migrateAllMemoryFiles(tempDir, { logger }); + + expect(result.migrated).toBe(1); + expect(result.migratedFiles).toEqual(["memory/2025/01/2025-01-28.md"]); + }); + + it("handles dry-run mode without actual migration", async () => { + await fs.writeFile(path.join(tempDir, "memory", "2025-01-27.md"), "# Test\n", "utf-8"); + + const result = await migrateAllMemoryFiles(tempDir, { dryRun: true, logger }); + + expect(result.migrated).toBe(1); + expect(result.skipped).toBe(0); + + // Verify file was NOT migrated + const newExists = await fs + .access(path.join(tempDir, "memory", "2025", "01", "2025-01-27.md")) + .then(() => true) + .catch(() => false); + expect(newExists).toBe(false); + }); + + it("returns empty result if no memory directory", async () => { + const emptyDir = await fs.mkdtemp(path.join(os.tmpdir(), "empty-")); + + const result = await migrateAllMemoryFiles(emptyDir, { logger }); + + expect(result.migrated).toBe(0); + expect(result.skipped).toBe(0); + expect(result.failed).toBe(0); + expect(result.migratedFiles).toEqual([]); + + await fs.rm(emptyDir, { recursive: true, force: true }); + }); + + it("tracks failed migrations", async () => { + // Create invalid date file + await fs.writeFile(path.join(tempDir, "memory", "2025-99-99.md"), "# Invalid\n", "utf-8"); + + const result = await migrateAllMemoryFiles(tempDir, { logger }); + + expect(result.failed).toBe(1); + expect(result.failedFiles).toHaveLength(1); + expect(result.failedFiles[0]?.path).toBe("memory/2025-99-99.md"); + }); + + it("calculates total bytes migrated", async () => { + const content1 = "# Content 1\n".repeat(100); + const content2 = "# Content 2\n".repeat(200); + + await fs.writeFile(path.join(tempDir, "memory", "2025-01-27.md"), content1, "utf-8"); + await fs.writeFile(path.join(tempDir, "memory", "2025-01-28.md"), content2, "utf-8"); + + const result = await migrateAllMemoryFiles(tempDir, { logger }); + + const expectedBytes = + Buffer.byteLength(content1, "utf-8") + Buffer.byteLength(content2, "utf-8"); + expect(result.totalBytes).toBe(expectedBytes); + }); +}); diff --git a/src/memory/internal.ts b/src/memory/internal.ts index b2ab8c0a4..9172cf5ef 100644 --- a/src/memory/internal.ts +++ b/src/memory/internal.ts @@ -48,24 +48,161 @@ export function isMemoryPath(relPath: string): boolean { return normalized.startsWith("memory/"); } -async function walkDir(dir: string, files: string[]) { +async function exists(filePath: string): Promise { + try { + await fs.access(filePath); + return true; + } catch { + return false; + } +} + +async function walkDir( + dir: string, + files: string[], + workspaceDir: string, + logger?: { + info: (msg: string) => void; + warn: (msg: string) => void; + error: (msg: string, err?: unknown) => void; + }, +) { const entries = await fs.readdir(dir, { withFileTypes: true }); for (const entry of entries) { const full = path.join(dir, entry.name); if (entry.isSymbolicLink()) continue; if (entry.isDirectory()) { - await walkDir(full, files); + await walkDir(full, files, workspaceDir, logger); continue; } if (!entry.isFile()) continue; if (!entry.name.endsWith(".md")) continue; + + // Check if this is an old-format file in memory/ root + const relPath = path.relative(workspaceDir, full).replace(/\\/g, "/"); + if (isOldMemoryFormat(relPath)) { + // Auto-migrate to new format + const result = await migrateMemoryFile(full, workspaceDir, logger); + if (result.status !== "failed") { + files.push(result.path); + } + continue; + } + files.push(full); } } +/** + * Check if a path matches old memory format (memory/YYYY-MM-DD.md) + * Old format = files directly in memory/ directory, not in subdirectories + */ +export function isOldMemoryFormat(relPath: string): boolean { + // Matches: memory/2025-01-27.md or memory/2025-01-27-slug.md + // But NOT: memory/2025/01/2025-01-27.md (new hierarchical format) + // But NOT: memory/notes/2025-01-27.md (user subdirectory) + const oldFormatRegex = /^memory\/\d{4}-\d{2}-\d{2}(?:-[^./]+)?\.md$/; + return oldFormatRegex.test(relPath); +} + +/** + * Result of migrating a memory file + */ +export type MigrationResult = + | { status: "migrated"; path: string } + | { status: "skipped"; path: string } + | { status: "failed" }; + +/** + * Migrate an old-format memory file to new hierarchical structure + * @param oldPath Absolute path to old-format file + * @param workspaceDir Workspace directory + * @param logger Optional logger for migration messages + * @returns MigrationResult with status and path + */ +export async function migrateMemoryFile( + oldPath: string, + workspaceDir: string, + logger?: { + info: (msg: string) => void; + warn: (msg: string) => void; + error: (msg: string, err?: unknown) => void; + }, +): Promise { + const log = logger || { info: console.log, warn: console.warn, error: console.error }; + + try { + const filename = path.basename(oldPath); + const match = filename.match(/^(\d{4})-(\d{2})-(\d{2})(?:-.*)?\.md$/); + + if (!match) { + log.error(`[memory] Invalid filename format for migration: ${filename}`); + return { status: "failed" }; + } + + const [, year, month, day] = match; + + // Validate date (prevent invalid dates like 2025-99-99) + const dateStr = `${year}-${month}-${day}`; + const parsedDate = new Date(dateStr); + if (isNaN(parsedDate.getTime()) || parsedDate.toISOString().split("T")[0] !== dateStr) { + log.error(`[memory] Invalid date in filename: ${filename} (parsed: ${dateStr})`); + return { status: "failed" }; + } + + const newDir = path.join(workspaceDir, "memory", year, month); + + // Ensure new directory exists + await fs.mkdir(newDir, { recursive: true }); + + const newPath = path.join(newDir, filename); + + // Check if new file already exists + if (await exists(newPath)) { + // New file exists, keep it and skip migration + log.warn(`[memory] New format file exists, skipping migration: ${filename}`); + return { status: "skipped", path: newPath }; + } + + // Atomic file creation with exclusive flag to prevent race conditions + let fileHandle; + try { + // Try to open file exclusively (fails if exists) + fileHandle = await fs.open(newPath, "wx"); + await fileHandle.close(); + } catch (err: any) { + if (err.code === "EEXIST") { + // Another process created it first + log.warn(`[memory] File created by another process, skipping: ${filename}`); + return { status: "skipped", path: newPath }; + } + throw err; + } + + // Copy file content to new location + await fs.copyFile(oldPath, newPath); + + // Optional: Remove old file after successful migration + // For now, keep both as backup + log.info( + `[memory] Migrated old-format memory file: ${filename} -> memory/${year}/${month}/${filename}`, + ); + + return { status: "migrated", path: newPath }; + } catch (err) { + log.error(`[memory] Failed to migrate memory file: ${path.basename(oldPath)}`, err); + return { status: "failed" }; + } +} + export async function listMemoryFiles( workspaceDir: string, extraPaths?: string[], + logger?: { + info: (msg: string) => void; + warn: (msg: string) => void; + error: (msg: string, err?: unknown) => void; + }, ): Promise { const result: string[] = []; const memoryFile = path.join(workspaceDir, "MEMORY.md"); @@ -86,7 +223,7 @@ export async function listMemoryFiles( try { const dirStat = await fs.lstat(memoryDir); if (!dirStat.isSymbolicLink() && dirStat.isDirectory()) { - await walkDir(memoryDir, result); + await walkDir(memoryDir, result, workspaceDir, logger); } } catch {} @@ -97,7 +234,7 @@ export async function listMemoryFiles( const stat = await fs.lstat(inputPath); if (stat.isSymbolicLink()) continue; if (stat.isDirectory()) { - await walkDir(inputPath, result); + await walkDir(inputPath, result, workspaceDir, logger); continue; } if (stat.isFile() && inputPath.endsWith(".md")) { @@ -106,6 +243,7 @@ export async function listMemoryFiles( } catch {} } } + if (result.length <= 1) return result; const seen = new Set(); const deduped: string[] = []; @@ -239,3 +377,190 @@ export function cosineSimilarity(a: number[], b: number[]): number { if (normA === 0 || normB === 0) return 0; return dot / (Math.sqrt(normA) * Math.sqrt(normB)); } + +export type MigrateAllOptions = { + /** If true, only simulate migration without actual file operations */ + dryRun?: boolean; + /** Optional logger for migration messages */ + logger?: { + info: (msg: string) => void; + warn: (msg: string) => void; + error: (msg: string, err?: unknown) => void; + }; +}; + +export type MigrateAllResult = { + migrated: number; + skipped: number; + failed: number; + /** Total bytes migrated */ + totalBytes: number; + /** Migration duration in milliseconds */ + durationMs: number; + /** List of migrated file paths (relative to workspace) */ + migratedFiles: string[]; + /** List of failed file paths with error messages */ + failedFiles: Array<{ path: string; error: string }>; +}; + +/** + * Recursively find all old-format memory files in a directory + */ +async function findOldFormatFiles( + dir: string, + workspaceDir: string, + results: string[] = [], +): Promise { + try { + const entries = await fs.readdir(dir, { withFileTypes: true }); + const memoryDir = path.join(workspaceDir, "memory"); + + for (const entry of entries) { + const fullPath = path.join(dir, entry.name); + + if (entry.isDirectory()) { + // Skip year directories (already new format) only in memory/ root + const isMemoryRoot = path.normalize(dir) === path.normalize(memoryDir); + if (isMemoryRoot && /^\d{4}$/.test(entry.name)) { + continue; + } + // Recurse into all other subdirectories + await findOldFormatFiles(fullPath, workspaceDir, results); + continue; + } + + if (entry.isFile() && entry.name.endsWith(".md")) { + const relPath = path.relative(workspaceDir, fullPath).replace(/\\/g, "/"); + if (isOldMemoryFormat(relPath)) { + results.push(fullPath); + } + } + } + } catch { + // Ignore permission errors and continue + } + return results; +} + +/** + * Migrate all old-format memory files in a workspace + * @param workspaceDir Workspace directory + * @param options Migration options (dry-run, logger) + * @returns Object with detailed migration results + */ +export async function migrateAllMemoryFiles( + workspaceDir: string, + options: MigrateAllOptions = {}, +): Promise { + const { dryRun = false, logger } = options; + const log = logger || { info: console.log, warn: console.warn, error: console.error }; + + const startTime = Date.now(); + const memoryDir = path.join(workspaceDir, "memory"); + + if (!(await exists(memoryDir))) { + return { + migrated: 0, + skipped: 0, + failed: 0, + totalBytes: 0, + durationMs: Date.now() - startTime, + migratedFiles: [], + failedFiles: [], + }; + } + + // Find all old-format files recursively + const oldFiles = await findOldFormatFiles(memoryDir, workspaceDir); + + if (oldFiles.length === 0) { + log.info(`[memory] No old-format files found in ${memoryDir}`); + return { + migrated: 0, + skipped: 0, + failed: 0, + totalBytes: 0, + durationMs: Date.now() - startTime, + migratedFiles: [], + failedFiles: [], + }; + } + + log.info(`[memory] Found ${oldFiles.length} old-format file(s) to migrate`); + if (dryRun) { + log.info(`[memory] DRY RUN: No files will be actually migrated`); + } + + let migrated = 0; + let skipped = 0; + let failed = 0; + let totalBytes = 0; + const migratedFiles: string[] = []; + const failedFiles: Array<{ path: string; error: string }> = []; + + for (const fullPath of oldFiles) { + const relPath = path.relative(workspaceDir, fullPath).replace(/\\/g, "/"); + + if (dryRun) { + // In dry-run mode, just check if target exists + const filename = path.basename(fullPath); + const match = filename.match(/^(\d{4})-(\d{2})-(\d{2})(?:-.*)?\.md$/); + if (match) { + const [, year, month] = match; + const newPath = path.join(workspaceDir, "memory", year, month, filename); + if (await exists(newPath)) { + log.info(`[memory] [DRY RUN] Would skip (exists): ${relPath}`); + skipped++; + } else { + log.info( + `[memory] [DRY RUN] Would migrate: ${relPath} -> memory/${year}/${month}/${filename}`, + ); + migrated++; + try { + const stat = await fs.stat(fullPath); + totalBytes += stat.size; + } catch {} + } + } + continue; + } + + // Actual migration + try { + const stat = await fs.stat(fullPath); + const result = await migrateMemoryFile(fullPath, workspaceDir, logger); + + if (result.status === "skipped") { + skipped++; + } else if (result.status === "migrated") { + migrated++; + totalBytes += stat.size; + migratedFiles.push(path.relative(workspaceDir, result.path).replace(/\\/g, "/")); + } else { + failed++; + failedFiles.push({ path: relPath, error: "Migration failed" }); + } + } catch (err) { + failed++; + const errorMsg = err instanceof Error ? err.message : String(err); + failedFiles.push({ path: relPath, error: errorMsg }); + log.error(`[memory] Failed to migrate ${relPath}:`, err); + } + } + + const durationMs = Date.now() - startTime; + + log.info( + `[memory] Migration complete: ${migrated} migrated, ${skipped} skipped, ${failed} failed (${durationMs}ms, ${totalBytes} bytes)`, + ); + + return { + migrated, + skipped, + failed, + totalBytes, + durationMs, + migratedFiles, + failedFiles, + }; +} diff --git a/src/memory/manager.batch.test.ts b/src/memory/manager.batch.test.ts index 31327cbc8..f3103e91c 100644 --- a/src/memory/manager.batch.test.ts +++ b/src/memory/manager.batch.test.ts @@ -266,7 +266,9 @@ describe("memory indexing with OpenAI batches", () => { it("falls back to non-batch on failure and resets failures after success", async () => { const content = ["flaky", "batch"].join("\n\n"); - await fs.writeFile(path.join(workspaceDir, "memory", "2026-01-09.md"), content); + // Create file in new format to avoid migration during sync + await fs.mkdir(path.join(workspaceDir, "memory", "2026", "01"), { recursive: true }); + await fs.writeFile(path.join(workspaceDir, "memory", "2026", "01", "2026-01-09.md"), content); let uploadedRequests: Array<{ custom_id?: string }> = []; let mode: "fail" | "ok" = "fail"; @@ -363,8 +365,10 @@ describe("memory indexing with OpenAI batches", () => { embedBatch.mockClear(); mode = "ok"; + // Create file in new format to avoid migration during sync + await fs.mkdir(path.join(workspaceDir, "memory", "2026", "01"), { recursive: true }); await fs.writeFile( - path.join(workspaceDir, "memory", "2026-01-09.md"), + path.join(workspaceDir, "memory", "2026", "01", "2026-01-09.md"), ["flaky", "batch", "recovery"].join("\n\n"), ); await manager.sync({ force: true }); @@ -376,7 +380,9 @@ describe("memory indexing with OpenAI batches", () => { it("disables batch after repeated failures and skips batch thereafter", async () => { const content = ["repeat", "failures"].join("\n\n"); - await fs.writeFile(path.join(workspaceDir, "memory", "2026-01-10.md"), content); + // Create file in new format to avoid migration during sync + await fs.mkdir(path.join(workspaceDir, "memory", "2026", "01"), { recursive: true }); + await fs.writeFile(path.join(workspaceDir, "memory", "2026", "01", "2026-01-10.md"), content); let uploadedRequests: Array<{ custom_id?: string }> = []; const fetchMock = vi.fn(async (input: RequestInfo | URL, init?: RequestInit) => { @@ -459,7 +465,7 @@ describe("memory indexing with OpenAI batches", () => { embedBatch.mockClear(); await fs.writeFile( - path.join(workspaceDir, "memory", "2026-01-10.md"), + path.join(workspaceDir, "memory", "2026", "01", "2026-01-10.md"), ["repeat", "failures", "again"].join("\n\n"), ); await manager.sync({ force: true }); @@ -470,7 +476,7 @@ describe("memory indexing with OpenAI batches", () => { const fetchCalls = fetchMock.mock.calls.length; embedBatch.mockClear(); await fs.writeFile( - path.join(workspaceDir, "memory", "2026-01-10.md"), + path.join(workspaceDir, "memory", "2026", "01", "2026-01-10.md"), ["repeat", "failures", "fallback"].join("\n\n"), ); await manager.sync({ force: true });