diff --git a/src/routing/llm-router/index.ts b/src/routing/llm-router/index.ts new file mode 100644 index 000000000..7a759337b --- /dev/null +++ b/src/routing/llm-router/index.ts @@ -0,0 +1,247 @@ +import fs from "node:fs/promises"; +import path from "node:path"; +import YAML from "yaml"; +import type { ModelRef } from "../../agents/model-selection.js"; +import { parseModelRef } from "../../agents/model-selection.js"; + +export type Intent = "chat" | "strategy" | "code" | "summarize" | "tool" | "continuity"; + +export type RouteDecision = { + intent: Intent; + provider: string; + model: string; + reason: string; + isDefault: boolean; + fallbacks?: ModelRef[]; +}; + +type IntentRoutingSpec = { + primary?: string; + fallbacks?: string[]; +}; + +type RouterRoutingConfig = { + intents?: Partial>; +}; + +type RouterPolicyConfig = { + complexity?: { + contextTokensGe?: number; + target?: string; + }; + guardrails?: { + highStakes?: boolean; + }; +}; + +export type RouterConfig = { + routing?: RouterRoutingConfig; + policy?: RouterPolicyConfig; + limitsRaw?: string; + pricingRaw?: string; +}; + +const INTENTS: Intent[] = ["chat", "strategy", "code", "summarize", "tool", "continuity"]; + +const DEFAULT_INTENT_ROUTES: Partial< + Record +> = { + chat: { + primary: { provider: "anthropic", model: "haiku" }, + fallbacks: [{ provider: "anthropic", model: "sonnet" }], + }, + strategy: { + primary: { provider: "anthropic", model: "sonnet" }, + fallbacks: [{ provider: "anthropic", model: "haiku" }], + }, + code: { + primary: { provider: "openai-codex", model: "codex" }, + }, + summarize: { + primary: { provider: "anthropic", model: "haiku" }, + }, + continuity: { + primary: { provider: "local", model: "local_small" }, + }, +}; + +function isRecord(value: unknown): value is Record { + return typeof value === "object" && value !== null && !Array.isArray(value); +} + +function toOptionalString(value: unknown): string | undefined { + if (typeof value !== "string") return undefined; + const trimmed = value.trim(); + return trimmed ? trimmed : undefined; +} + +function toStringArray(value: unknown): string[] | undefined { + if (!Array.isArray(value)) return undefined; + return value + .map((entry) => (typeof entry === "string" ? entry.trim() : "")) + .filter((entry) => entry.length > 0); +} + +async function readOptionalFile(filePath: string): Promise { + try { + return await fs.readFile(filePath, "utf-8"); + } catch (error) { + const err = error as NodeJS.ErrnoException; + if (err.code === "ENOENT") return null; + throw error; + } +} + +function parseRoutingConfig(raw: string): RouterRoutingConfig { + const parsed = YAML.parse(raw) as unknown; + if (!isRecord(parsed)) return {}; + const intentsRaw = isRecord(parsed.intents) ? parsed.intents : undefined; + if (!intentsRaw) return {}; + + const intents: Partial> = {}; + for (const intent of INTENTS) { + const entry = intentsRaw[intent]; + if (!isRecord(entry)) continue; + const primary = toOptionalString(entry.primary); + const fallbacks = toStringArray(entry.fallbacks); + if (!primary && !fallbacks) continue; + intents[intent] = { + ...(primary ? { primary } : {}), + ...(fallbacks ? { fallbacks } : {}), + }; + } + return { intents }; +} + +function parsePolicyConfig(raw: string): RouterPolicyConfig { + const parsed = YAML.parse(raw) as unknown; + if (!isRecord(parsed)) return {}; + const complexityRaw = isRecord(parsed.complexity) ? parsed.complexity : undefined; + const guardrailsRaw = isRecord(parsed.guardrails) ? parsed.guardrails : undefined; + + const contextTokensGe = (() => { + const value = + complexityRaw?.context_tokens_ge ?? + complexityRaw?.contextTokensGe ?? + complexityRaw?.threshold; + return typeof value === "number" && Number.isFinite(value) ? value : undefined; + })(); + + const target = toOptionalString(complexityRaw?.target); + const highStakes = (() => { + const value = guardrailsRaw?.high_stakes ?? guardrailsRaw?.highStakes; + return typeof value === "boolean" ? value : undefined; + })(); + + const hasComplexity = contextTokensGe !== undefined || target !== undefined; + return { + ...(hasComplexity + ? { + complexity: { + ...(contextTokensGe !== undefined ? { contextTokensGe } : {}), + ...(target ? { target } : {}), + }, + } + : {}), + ...(highStakes !== undefined ? { guardrails: { highStakes } } : {}), + }; +} + +function parseModelRefOrNull(raw: string | undefined, defaultProvider: string): ModelRef | null { + if (!raw) return null; + return parseModelRef(raw, defaultProvider); +} + +function resolveFallbackRefs( + fallbacks: string[] | undefined, + defaultProvider: string, +): ModelRef[] | undefined { + if (!fallbacks) return undefined; + const parsed = fallbacks + .map((entry) => parseModelRef(entry, defaultProvider)) + .filter((entry): entry is ModelRef => !!entry); + return parsed.length > 0 ? parsed : []; +} + +export async function loadRouterConfig(dir: string): Promise { + const [routingRaw, limitsRaw, pricingRaw, policyRaw] = await Promise.all([ + readOptionalFile(path.join(dir, "routing.yaml")), + readOptionalFile(path.join(dir, "limits.yaml")), + readOptionalFile(path.join(dir, "pricing.yaml")), + readOptionalFile(path.join(dir, "policy.yaml")), + ]); + + if (!routingRaw && !limitsRaw && !pricingRaw && !policyRaw) return null; + + return { + ...(routingRaw ? { routing: parseRoutingConfig(routingRaw) } : {}), + ...(policyRaw ? { policy: parsePolicyConfig(policyRaw) } : {}), + ...(limitsRaw ? { limitsRaw } : {}), + ...(pricingRaw ? { pricingRaw } : {}), + }; +} + +export function resolveRouteDecision(params: { + cfg: RouterConfig | null; + agentDir?: string; + intent: Intent; + defaultModelRef: ModelRef; + contextTokens?: number; + highStakes?: boolean; +}): RouteDecision { + const { cfg, intent, defaultModelRef, contextTokens } = params; + if (!cfg) { + return { + intent, + provider: defaultModelRef.provider, + model: defaultModelRef.model, + reason: "default", + isDefault: true, + }; + } + + const baseRoute = DEFAULT_INTENT_ROUTES[intent]; + let primary = baseRoute?.primary ?? defaultModelRef; + let fallbacks = baseRoute?.fallbacks; + const defaultProvider = defaultModelRef.provider; + + const intentRouting = cfg.routing?.intents?.[intent]; + const overridePrimary = parseModelRefOrNull(intentRouting?.primary, defaultProvider); + if (overridePrimary) { + primary = overridePrimary; + } + + if (intentRouting && "fallbacks" in intentRouting) { + fallbacks = resolveFallbackRefs(intentRouting.fallbacks, defaultProvider) ?? fallbacks; + } + + const shouldEscalate = + (intent === "chat" || intent === "strategy") && + cfg.policy?.complexity?.contextTokensGe !== undefined && + contextTokens !== undefined && + contextTokens >= cfg.policy.complexity.contextTokensGe; + + if (shouldEscalate) { + const target = parseModelRefOrNull(cfg.policy?.complexity?.target, defaultProvider) ?? { + provider: "anthropic", + model: "opus", + }; + return { + intent, + provider: target.provider, + model: target.model, + reason: "complexity", + isDefault: false, + ...(fallbacks && fallbacks.length > 0 ? { fallbacks } : {}), + }; + } + + return { + intent, + provider: primary.provider, + model: primary.model, + reason: "intent", + isDefault: false, + ...(fallbacks && fallbacks.length > 0 ? { fallbacks } : {}), + }; +} diff --git a/src/routing/llm-router/llm-router.test.ts b/src/routing/llm-router/llm-router.test.ts new file mode 100644 index 000000000..ee6758387 --- /dev/null +++ b/src/routing/llm-router/llm-router.test.ts @@ -0,0 +1,141 @@ +import fs from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import { describe, expect, it } from "vitest"; +import type { ModelRef } from "../../agents/model-selection.js"; +import { loadRouterConfig, resolveRouteDecision } from "./index.js"; + +async function withTempDir(runner: (dir: string) => Promise): Promise { + const dir = await fs.mkdtemp(path.join(os.tmpdir(), "moltbot-router-")); + try { + return await runner(dir); + } finally { + await fs.rm(dir, { recursive: true, force: true }); + } +} + +const defaultModelRef: ModelRef = { provider: "anthropic", model: "default" }; + +describe("llm-router", () => { + it("returns default when config dir is missing or empty", async () => { + const missingDir = path.join(os.tmpdir(), `moltbot-router-missing-${Date.now()}`); + const missingCfg = await loadRouterConfig(missingDir); + expect(missingCfg).toBeNull(); + + const decision = resolveRouteDecision({ + cfg: missingCfg, + intent: "chat", + defaultModelRef, + }); + expect(decision).toEqual({ + intent: "chat", + provider: "anthropic", + model: "default", + reason: "default", + isDefault: true, + }); + + await withTempDir(async (dir) => { + const emptyCfg = await loadRouterConfig(dir); + expect(emptyCfg).toBeNull(); + }); + }); + + it("resolves routing.yaml intents", async () => { + await withTempDir(async (dir) => { + await fs.writeFile( + path.join(dir, "routing.yaml"), + [ + "intents:", + " chat:", + " primary: anthropic/haiku", + " fallbacks:", + " - anthropic/sonnet", + " strategy:", + " primary: anthropic/sonnet", + " fallbacks:", + " - anthropic/haiku", + " code:", + " primary: openai-codex/codex", + " summarize:", + " primary: anthropic/haiku", + " continuity:", + " primary: local/local_small", + "", + ].join("\n"), + "utf-8", + ); + + const cfg = await loadRouterConfig(dir); + expect(cfg).not.toBeNull(); + + expect(resolveRouteDecision({ cfg, intent: "chat", defaultModelRef }).fallbacks).toEqual([ + { provider: "anthropic", model: "sonnet" }, + ]); + expect(resolveRouteDecision({ cfg, intent: "chat", defaultModelRef }).provider).toBe( + "anthropic", + ); + expect(resolveRouteDecision({ cfg, intent: "chat", defaultModelRef }).model).toBe("haiku"); + + const strategy = resolveRouteDecision({ cfg, intent: "strategy", defaultModelRef }); + expect(strategy.provider).toBe("anthropic"); + expect(strategy.model).toBe("sonnet"); + + const code = resolveRouteDecision({ cfg, intent: "code", defaultModelRef }); + expect(code.provider).toBe("openai-codex"); + expect(code.model).toBe("codex"); + + const summarize = resolveRouteDecision({ cfg, intent: "summarize", defaultModelRef }); + expect(summarize.provider).toBe("anthropic"); + expect(summarize.model).toBe("haiku"); + + const continuity = resolveRouteDecision({ cfg, intent: "continuity", defaultModelRef }); + expect(continuity.provider).toBe("local"); + expect(continuity.model).toBe("local_small"); + }); + }); + + it("escalates chat/strategy to opus based on complexity policy", async () => { + await withTempDir(async (dir) => { + await fs.writeFile( + path.join(dir, "routing.yaml"), + [ + "intents:", + " chat:", + " primary: anthropic/haiku", + " strategy:", + " primary: anthropic/sonnet", + "", + ].join("\n"), + "utf-8", + ); + await fs.writeFile( + path.join(dir, "policy.yaml"), + ["complexity:", " context_tokens_ge: 2000", " target: anthropic/opus", ""].join("\n"), + "utf-8", + ); + + const cfg = await loadRouterConfig(dir); + const chat = resolveRouteDecision({ + cfg, + intent: "chat", + defaultModelRef, + contextTokens: 2000, + }); + const strategy = resolveRouteDecision({ + cfg, + intent: "strategy", + defaultModelRef, + contextTokens: 2500, + }); + + expect(chat.provider).toBe("anthropic"); + expect(chat.model).toBe("opus"); + expect(chat.reason).toBe("complexity"); + + expect(strategy.provider).toBe("anthropic"); + expect(strategy.model).toBe("opus"); + expect(strategy.reason).toBe("complexity"); + }); + }); +});