fix: enforce reasoning tags across providers (#801) (thanks @mcinteerj)
This commit is contained in:
parent
cd169aceb5
commit
382681178b
@ -34,6 +34,7 @@
|
|||||||
|
|
||||||
### Fixes
|
### Fixes
|
||||||
- Auto-reply: inline `/status` now honors allowlists (authorized stripped + replied inline; unauthorized leaves text for the agent) to match command gating tests.
|
- Auto-reply: inline `/status` now honors allowlists (authorized stripped + replied inline; unauthorized leaves text for the agent) to match command gating tests.
|
||||||
|
- Auto-reply: enforce `<final>` tag for all reasoning-tag providers (Gemini Antigravity, MiniMax, etc.), not just Ollama. (#801 — thanks @mcinteerj)
|
||||||
- Models: normalize `${ENV_VAR}` apiKey config values and auto-fill missing provider `apiKey` from env/auth when custom provider models are configured (fixes MiniMax “Unknown model” on fresh installs).
|
- Models: normalize `${ENV_VAR}` apiKey config values and auto-fill missing provider `apiKey` from env/auth when custom provider models are configured (fixes MiniMax “Unknown model” on fresh installs).
|
||||||
- Models/Tools: include `MiniMax-VL-01` in implicit MiniMax provider so image pairing uses a real vision model.
|
- Models/Tools: include `MiniMax-VL-01` in implicit MiniMax provider so image pairing uses a real vision model.
|
||||||
- Telegram: show typing indicator in General forum topics. (#779) — thanks @azade-c.
|
- Telegram: show typing indicator in General forum topics. (#779) — thanks @azade-c.
|
||||||
|
|||||||
83
src/auto-reply/reply.reasoning-tags.test.ts
Normal file
83
src/auto-reply/reply.reasoning-tags.test.ts
Normal file
@ -0,0 +1,83 @@
|
|||||||
|
import path from "node:path";
|
||||||
|
|
||||||
|
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
||||||
|
|
||||||
|
import { withTempHome as withTempHomeBase } from "../../test/helpers/temp-home.js";
|
||||||
|
import { loadModelCatalog } from "../agents/model-catalog.js";
|
||||||
|
import { runEmbeddedPiAgent } from "../agents/pi-embedded.js";
|
||||||
|
import { getReplyFromConfig } from "./reply.js";
|
||||||
|
|
||||||
|
vi.mock("../agents/pi-embedded.js", () => ({
|
||||||
|
abortEmbeddedPiRun: vi.fn().mockReturnValue(false),
|
||||||
|
runEmbeddedPiAgent: vi.fn(),
|
||||||
|
queueEmbeddedPiMessage: vi.fn().mockReturnValue(false),
|
||||||
|
resolveEmbeddedSessionLane: (key: string) =>
|
||||||
|
`session:${key.trim() || "main"}`,
|
||||||
|
isEmbeddedPiRunActive: vi.fn().mockReturnValue(false),
|
||||||
|
isEmbeddedPiRunStreaming: vi.fn().mockReturnValue(false),
|
||||||
|
}));
|
||||||
|
|
||||||
|
vi.mock("../agents/model-catalog.js", () => ({
|
||||||
|
loadModelCatalog: vi.fn(),
|
||||||
|
}));
|
||||||
|
|
||||||
|
async function withTempHome<T>(fn: (home: string) => Promise<T>): Promise<T> {
|
||||||
|
return withTempHomeBase(
|
||||||
|
async (home) => {
|
||||||
|
return await fn(home);
|
||||||
|
},
|
||||||
|
{ prefix: "clawdbot-reasoning-tags-" },
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
describe("reasoning tag enforcement", () => {
|
||||||
|
const reasoningModel = "google-antigravity/gemini-3";
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
vi.mocked(runEmbeddedPiAgent).mockReset();
|
||||||
|
vi.mocked(loadModelCatalog).mockResolvedValue([
|
||||||
|
{ id: "gemini-3", name: "Gemini 3", provider: "google-antigravity" },
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
|
afterEach(() => {
|
||||||
|
vi.clearAllMocks();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("sets enforceFinalTag for providers that require reasoning tags", async () => {
|
||||||
|
await withTempHome(async (home) => {
|
||||||
|
vi.mocked(runEmbeddedPiAgent).mockResolvedValue({
|
||||||
|
payloads: [{ text: "ok" }],
|
||||||
|
meta: {
|
||||||
|
durationMs: 1,
|
||||||
|
agentMeta: {
|
||||||
|
sessionId: "s",
|
||||||
|
provider: "google-antigravity",
|
||||||
|
model: "gemini-3",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
await getReplyFromConfig(
|
||||||
|
{ Body: "hello", From: "+1999", To: "+2000" },
|
||||||
|
{},
|
||||||
|
{
|
||||||
|
agents: {
|
||||||
|
defaults: {
|
||||||
|
model: reasoningModel,
|
||||||
|
models: { [reasoningModel]: {} },
|
||||||
|
workspace: path.join(home, "clawd"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
whatsapp: { allowFrom: ["*"] },
|
||||||
|
session: { store: path.join(home, "sessions.json") },
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(runEmbeddedPiAgent).toHaveBeenCalledTimes(1);
|
||||||
|
const args = vi.mocked(runEmbeddedPiAgent).mock.calls[0]?.[0];
|
||||||
|
expect(args?.enforceFinalTag).toBe(true);
|
||||||
|
expect(args?.provider).toBe("google-antigravity");
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
@ -849,7 +849,8 @@ export async function getReplyFromConfig(
|
|||||||
formatModelSwitchEvent,
|
formatModelSwitchEvent,
|
||||||
agentCfg,
|
agentCfg,
|
||||||
modelState: {
|
modelState: {
|
||||||
resolveDefaultThinkingLevel: modelState.resolveDefaultThinkingLevel,
|
resolveDefaultThinkingLevel: async () =>
|
||||||
|
(await modelState.resolveDefaultThinkingLevel()) ?? "off",
|
||||||
allowedModelKeys: modelState.allowedModelKeys,
|
allowedModelKeys: modelState.allowedModelKeys,
|
||||||
allowedModelCatalog: modelState.allowedModelCatalog,
|
allowedModelCatalog: modelState.allowedModelCatalog,
|
||||||
resetModelOverride: modelState.resetModelOverride,
|
resetModelOverride: modelState.resetModelOverride,
|
||||||
|
|||||||
@ -565,7 +565,8 @@ export async function runReplyAgent(params: {
|
|||||||
}
|
}
|
||||||
text = stripped.text;
|
text = stripped.text;
|
||||||
}
|
}
|
||||||
if (isSilentReplyText(text, SILENT_REPLY_TOKEN)) return { skip: true };
|
if (isSilentReplyText(text, SILENT_REPLY_TOKEN))
|
||||||
|
return { skip: true };
|
||||||
return { text, skip: false };
|
return { text, skip: false };
|
||||||
};
|
};
|
||||||
const handlePartialForTyping = async (
|
const handlePartialForTyping = async (
|
||||||
@ -713,8 +714,6 @@ export async function runReplyAgent(params: {
|
|||||||
blockStreamingEnabled && opts?.onBlockReply
|
blockStreamingEnabled && opts?.onBlockReply
|
||||||
? async (payload) => {
|
? async (payload) => {
|
||||||
const { text, skip } = normalizeStreamingText(payload);
|
const { text, skip } = normalizeStreamingText(payload);
|
||||||
const hasMedia = (payload.mediaUrls?.length ?? 0) > 0;
|
|
||||||
if (skip && !hasMedia) return;
|
|
||||||
const taggedPayload = applyReplyTagsToPayload(
|
const taggedPayload = applyReplyTagsToPayload(
|
||||||
{
|
{
|
||||||
text,
|
text,
|
||||||
@ -723,6 +722,10 @@ export async function runReplyAgent(params: {
|
|||||||
},
|
},
|
||||||
sessionCtx.MessageSid,
|
sessionCtx.MessageSid,
|
||||||
);
|
);
|
||||||
|
const hasMedia =
|
||||||
|
Boolean(taggedPayload.mediaUrl) ||
|
||||||
|
(taggedPayload.mediaUrls?.length ?? 0) > 0;
|
||||||
|
if (skip && !hasMedia) return;
|
||||||
// Let through payloads with audioAsVoice flag even if empty (need to track it)
|
// Let through payloads with audioAsVoice flag even if empty (need to track it)
|
||||||
if (
|
if (
|
||||||
!isRenderablePayload(taggedPayload) &&
|
!isRenderablePayload(taggedPayload) &&
|
||||||
@ -737,9 +740,6 @@ export async function runReplyAgent(params: {
|
|||||||
},
|
},
|
||||||
);
|
);
|
||||||
const cleaned = parsed.text || undefined;
|
const cleaned = parsed.text || undefined;
|
||||||
const hasMedia =
|
|
||||||
Boolean(taggedPayload.mediaUrl) ||
|
|
||||||
(taggedPayload.mediaUrls?.length ?? 0) > 0;
|
|
||||||
// Skip empty payloads unless they have audioAsVoice flag (need to track it)
|
// Skip empty payloads unless they have audioAsVoice flag (need to track it)
|
||||||
if (
|
if (
|
||||||
!cleaned &&
|
!cleaned &&
|
||||||
|
|||||||
@ -635,7 +635,9 @@ export async function applyInlineDirectivesFastLane(params: {
|
|||||||
resolveDefaultThinkingLevel: () => Promise<ThinkLevel>;
|
resolveDefaultThinkingLevel: () => Promise<ThinkLevel>;
|
||||||
allowedModelKeys: Set<string>;
|
allowedModelKeys: Set<string>;
|
||||||
allowedModelCatalog: Awaited<
|
allowedModelCatalog: Awaited<
|
||||||
ReturnType<typeof import("../../agents/model-catalog.js").loadModelCatalog>
|
ReturnType<
|
||||||
|
typeof import("../../agents/model-catalog.js").loadModelCatalog
|
||||||
|
>
|
||||||
>;
|
>;
|
||||||
resetModelOverride: boolean;
|
resetModelOverride: boolean;
|
||||||
};
|
};
|
||||||
@ -1357,7 +1359,9 @@ export async function handleDirectiveOnly(params: {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (directives.hasQueueDirective && directives.queueMode) {
|
if (directives.hasQueueDirective && directives.queueMode) {
|
||||||
parts.push(formatDirectiveAck(`Queue mode set to ${directives.queueMode}.`));
|
parts.push(
|
||||||
|
formatDirectiveAck(`Queue mode set to ${directives.queueMode}.`),
|
||||||
|
);
|
||||||
} else if (directives.hasQueueDirective && directives.queueReset) {
|
} else if (directives.hasQueueDirective && directives.queueReset) {
|
||||||
parts.push(formatDirectiveAck("Queue mode reset to default."));
|
parts.push(formatDirectiveAck("Queue mode reset to default."));
|
||||||
}
|
}
|
||||||
@ -1373,7 +1377,9 @@ export async function handleDirectiveOnly(params: {
|
|||||||
parts.push(formatDirectiveAck(`Queue cap set to ${directives.cap}.`));
|
parts.push(formatDirectiveAck(`Queue cap set to ${directives.cap}.`));
|
||||||
}
|
}
|
||||||
if (directives.hasQueueDirective && directives.dropPolicy) {
|
if (directives.hasQueueDirective && directives.dropPolicy) {
|
||||||
parts.push(formatDirectiveAck(`Queue drop set to ${directives.dropPolicy}.`));
|
parts.push(
|
||||||
|
formatDirectiveAck(`Queue drop set to ${directives.dropPolicy}.`),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
const ack = parts.join(" ").trim();
|
const ack = parts.join(" ").trim();
|
||||||
if (!ack && directives.hasStatusDirective) return undefined;
|
if (!ack && directives.hasStatusDirective) return undefined;
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user