fix: enforce reasoning tags across providers (#801) (thanks @mcinteerj)

2026-01-12 22:58:44 +00:00 · 2026-01-12 22:58:44 +00:00 · 382681178b
commit 382681178b
parent cd169aceb5
5 changed files with 101 additions and 10 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -34,6 +34,7 @@
 ### Fixes
 - Auto-reply: inline `/status` now honors allowlists (authorized stripped + replied inline; unauthorized leaves text for the agent) to match command gating tests.
 - Auto-reply: enforce `<final>` tag for all reasoning-tag providers (Gemini Antigravity, MiniMax, etc.), not just Ollama. (#801 — thanks @mcinteerj)
 - Models: normalize `${ENV_VAR}` apiKey config values and auto-fill missing provider `apiKey` from env/auth when custom provider models are configured (fixes MiniMax “Unknown model” on fresh installs).
 - Models/Tools: include `MiniMax-VL-01` in implicit MiniMax provider so image pairing uses a real vision model.
 - Telegram: show typing indicator in General forum topics. (#779) — thanks @azade-c.
--- a/src/auto-reply/reply.reasoning-tags.test.ts
+++ b/src/auto-reply/reply.reasoning-tags.test.ts
@ -0,0 +1,83 @@
 import path from "node:path";
 import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
 import { withTempHome as withTempHomeBase } from "../../test/helpers/temp-home.js";
 import { loadModelCatalog } from "../agents/model-catalog.js";
 import { runEmbeddedPiAgent } from "../agents/pi-embedded.js";
 import { getReplyFromConfig } from "./reply.js";
 vi.mock("../agents/pi-embedded.js", () => ({
  abortEmbeddedPiRun: vi.fn().mockReturnValue(false),
  runEmbeddedPiAgent: vi.fn(),
  queueEmbeddedPiMessage: vi.fn().mockReturnValue(false),
  resolveEmbeddedSessionLane: (key: string) =>
    `session:${key.trim() || "main"}`,
  isEmbeddedPiRunActive: vi.fn().mockReturnValue(false),
  isEmbeddedPiRunStreaming: vi.fn().mockReturnValue(false),
 }));
 vi.mock("../agents/model-catalog.js", () => ({
  loadModelCatalog: vi.fn(),
 }));
 async function withTempHome<T>(fn: (home: string) => Promise<T>): Promise<T> {
  return withTempHomeBase(
    async (home) => {
      return await fn(home);
    },
    { prefix: "clawdbot-reasoning-tags-" },
  );
 }
 describe("reasoning tag enforcement", () => {
  const reasoningModel = "google-antigravity/gemini-3";
  beforeEach(() => {
    vi.mocked(runEmbeddedPiAgent).mockReset();
    vi.mocked(loadModelCatalog).mockResolvedValue([
      { id: "gemini-3", name: "Gemini 3", provider: "google-antigravity" },
    ]);
  });
  afterEach(() => {
    vi.clearAllMocks();
  });
  it("sets enforceFinalTag for providers that require reasoning tags", async () => {
    await withTempHome(async (home) => {
      vi.mocked(runEmbeddedPiAgent).mockResolvedValue({
        payloads: [{ text: "ok" }],
        meta: {
          durationMs: 1,
          agentMeta: {
            sessionId: "s",
            provider: "google-antigravity",
            model: "gemini-3",
          },
        },
      });
      await getReplyFromConfig(
        { Body: "hello", From: "+1999", To: "+2000" },
        {},
        {
          agents: {
            defaults: {
              model: reasoningModel,
              models: { [reasoningModel]: {} },
              workspace: path.join(home, "clawd"),
            },
          },
          whatsapp: { allowFrom: ["*"] },
          session: { store: path.join(home, "sessions.json") },
        },
      );
      expect(runEmbeddedPiAgent).toHaveBeenCalledTimes(1);
      const args = vi.mocked(runEmbeddedPiAgent).mock.calls[0]?.[0];
      expect(args?.enforceFinalTag).toBe(true);
      expect(args?.provider).toBe("google-antigravity");
    });
  });
 });
--- a/src/auto-reply/reply.ts
+++ b/src/auto-reply/reply.ts
@ -849,7 +849,8 @@ export async function getReplyFromConfig(
      formatModelSwitchEvent,
      agentCfg,
      modelState: {
-        resolveDefaultThinkingLevel: modelState.resolveDefaultThinkingLevel,
+        resolveDefaultThinkingLevel: async () =>
          (await modelState.resolveDefaultThinkingLevel()) ?? "off",
        allowedModelKeys: modelState.allowedModelKeys,
        allowedModelCatalog: modelState.allowedModelCatalog,
        resetModelOverride: modelState.resetModelOverride,
--- a/src/auto-reply/reply/agent-runner.ts
+++ b/src/auto-reply/reply/agent-runner.ts
@ -565,7 +565,8 @@ export async function runReplyAgent(params: {
            }
            text = stripped.text;
          }
-          if (isSilentReplyText(text, SILENT_REPLY_TOKEN)) return { skip: true };
+          if (isSilentReplyText(text, SILENT_REPLY_TOKEN))
            return { skip: true };
          return { text, skip: false };
        };
        const handlePartialForTyping = async (
@ -713,8 +714,6 @@ export async function runReplyAgent(params: {
                blockStreamingEnabled && opts?.onBlockReply
                  ? async (payload) => {
                      const { text, skip } = normalizeStreamingText(payload);
                      const hasMedia = (payload.mediaUrls?.length ?? 0) > 0;
                      if (skip && !hasMedia) return;
                      const taggedPayload = applyReplyTagsToPayload(
                        {
                          text,
@ -723,6 +722,10 @@ export async function runReplyAgent(params: {
                        },
                        sessionCtx.MessageSid,
                      );
                      const hasMedia =
                        Boolean(taggedPayload.mediaUrl) ||
                        (taggedPayload.mediaUrls?.length ?? 0) > 0;
                      if (skip && !hasMedia) return;
                      // Let through payloads with audioAsVoice flag even if empty (need to track it)
                      if (
                        !isRenderablePayload(taggedPayload) &&
@ -737,9 +740,6 @@ export async function runReplyAgent(params: {
                        },
                      );
                      const cleaned = parsed.text || undefined;
                      const hasMedia =
                        Boolean(taggedPayload.mediaUrl) ||
                        (taggedPayload.mediaUrls?.length ?? 0) > 0;
                      // Skip empty payloads unless they have audioAsVoice flag (need to track it)
                      if (
                        !cleaned &&
--- a/src/auto-reply/reply/directive-handling.ts
+++ b/src/auto-reply/reply/directive-handling.ts
@ -635,7 +635,9 @@ export async function applyInlineDirectivesFastLane(params: {
    resolveDefaultThinkingLevel: () => Promise<ThinkLevel>;
    allowedModelKeys: Set<string>;
    allowedModelCatalog: Awaited<
-      ReturnType<typeof import("../../agents/model-catalog.js").loadModelCatalog>
+      ReturnType<
        typeof import("../../agents/model-catalog.js").loadModelCatalog
      >
    >;
    resetModelOverride: boolean;
  };
@ -1357,7 +1359,9 @@ export async function handleDirectiveOnly(params: {
    }
  }
  if (directives.hasQueueDirective && directives.queueMode) {
-    parts.push(formatDirectiveAck(`Queue mode set to ${directives.queueMode}.`));
+    parts.push(
      formatDirectiveAck(`Queue mode set to ${directives.queueMode}.`),
    );
  } else if (directives.hasQueueDirective && directives.queueReset) {
    parts.push(formatDirectiveAck("Queue mode reset to default."));
  }
@ -1373,7 +1377,9 @@ export async function handleDirectiveOnly(params: {
    parts.push(formatDirectiveAck(`Queue cap set to ${directives.cap}.`));
  }
  if (directives.hasQueueDirective && directives.dropPolicy) {
-    parts.push(formatDirectiveAck(`Queue drop set to ${directives.dropPolicy}.`));
+    parts.push(
      formatDirectiveAck(`Queue drop set to ${directives.dropPolicy}.`),
    );
  }
  const ack = parts.join(" ").trim();
  if (!ack && directives.hasStatusDirective) return undefined;