fix: add audio capability to OpenAI provider
The OpenAI provider had the `transcribeAudio` method implemented but didn't declare "audio" in its capabilities array. This caused audio transcription to fail when using OpenAI for Telegram voice messages. Fixes #3539
This commit is contained in:
parent
109ac1c549
commit
be03f1c6e8
@ -4,7 +4,7 @@ import { transcribeOpenAiCompatibleAudio } from "./audio.js";
|
|||||||
|
|
||||||
export const openaiProvider: MediaUnderstandingProvider = {
|
export const openaiProvider: MediaUnderstandingProvider = {
|
||||||
id: "openai",
|
id: "openai",
|
||||||
capabilities: ["image"],
|
capabilities: ["image", "audio"],
|
||||||
describeImage: describeImageWithModel,
|
describeImage: describeImageWithModel,
|
||||||
transcribeAudio: transcribeOpenAiCompatibleAudio,
|
transcribeAudio: transcribeOpenAiCompatibleAudio,
|
||||||
};
|
};
|
||||||
|
|||||||
65
src/media-understanding/providers/providers.test.ts
Normal file
65
src/media-understanding/providers/providers.test.ts
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
import { describe, expect, it } from "vitest";
|
||||||
|
|
||||||
|
import { buildMediaUnderstandingRegistry } from "./index.js";
|
||||||
|
|
||||||
|
describe("media understanding providers", () => {
|
||||||
|
const registry = buildMediaUnderstandingRegistry();
|
||||||
|
|
||||||
|
it("providers declare capabilities matching their implemented methods", () => {
|
||||||
|
for (const [id, provider] of registry) {
|
||||||
|
const declared = provider.capabilities ?? [];
|
||||||
|
|
||||||
|
if (provider.transcribeAudio) {
|
||||||
|
expect(
|
||||||
|
declared.includes("audio"),
|
||||||
|
`Provider "${id}" has transcribeAudio but doesn't declare "audio" capability`,
|
||||||
|
).toBe(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (provider.describeImage) {
|
||||||
|
expect(
|
||||||
|
declared.includes("image"),
|
||||||
|
`Provider "${id}" has describeImage but doesn't declare "image" capability`,
|
||||||
|
).toBe(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (provider.describeVideo) {
|
||||||
|
expect(
|
||||||
|
declared.includes("video"),
|
||||||
|
`Provider "${id}" has describeVideo but doesn't declare "video" capability`,
|
||||||
|
).toBe(true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it("openai provider declares both image and audio capabilities", () => {
|
||||||
|
const openai = registry.get("openai");
|
||||||
|
expect(openai).toBeDefined();
|
||||||
|
expect(openai?.capabilities).toContain("image");
|
||||||
|
expect(openai?.capabilities).toContain("audio");
|
||||||
|
expect(openai?.describeImage).toBeDefined();
|
||||||
|
expect(openai?.transcribeAudio).toBeDefined();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("groq provider declares audio capability", () => {
|
||||||
|
const groq = registry.get("groq");
|
||||||
|
expect(groq).toBeDefined();
|
||||||
|
expect(groq?.capabilities).toContain("audio");
|
||||||
|
expect(groq?.transcribeAudio).toBeDefined();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("google provider declares image, audio, and video capabilities", () => {
|
||||||
|
const google = registry.get("google");
|
||||||
|
expect(google).toBeDefined();
|
||||||
|
expect(google?.capabilities).toContain("image");
|
||||||
|
expect(google?.capabilities).toContain("audio");
|
||||||
|
expect(google?.capabilities).toContain("video");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("deepgram provider declares audio capability", () => {
|
||||||
|
const deepgram = registry.get("deepgram");
|
||||||
|
expect(deepgram).toBeDefined();
|
||||||
|
expect(deepgram?.capabilities).toContain("audio");
|
||||||
|
expect(deepgram?.transcribeAudio).toBeDefined();
|
||||||
|
});
|
||||||
|
});
|
||||||
@ -4,7 +4,7 @@ import type { MoltbotConfig } from "../config/config.js";
|
|||||||
import { resolveEntriesWithActiveFallback, resolveModelEntries } from "./resolve.js";
|
import { resolveEntriesWithActiveFallback, resolveModelEntries } from "./resolve.js";
|
||||||
|
|
||||||
const providerRegistry = new Map([
|
const providerRegistry = new Map([
|
||||||
["openai", { capabilities: ["image"] }],
|
["openai", { capabilities: ["image", "audio"] }],
|
||||||
["groq", { capabilities: ["audio"] }],
|
["groq", { capabilities: ["audio"] }],
|
||||||
]);
|
]);
|
||||||
|
|
||||||
@ -30,7 +30,7 @@ describe("resolveModelEntries", () => {
|
|||||||
capability: "audio",
|
capability: "audio",
|
||||||
providerRegistry,
|
providerRegistry,
|
||||||
});
|
});
|
||||||
expect(audioEntries).toHaveLength(0);
|
expect(audioEntries).toHaveLength(1);
|
||||||
});
|
});
|
||||||
|
|
||||||
it("keeps per-capability entries even without explicit caps", () => {
|
it("keeps per-capability entries even without explicit caps", () => {
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user