fix: add audio capability to OpenAI provider
The OpenAI provider had the `transcribeAudio` method implemented but didn't declare "audio" in its capabilities array. This caused audio transcription to fail when using OpenAI for Telegram voice messages. Fixes #3539
This commit is contained in:
parent
109ac1c549
commit
be03f1c6e8
@ -4,7 +4,7 @@ import { transcribeOpenAiCompatibleAudio } from "./audio.js";
|
||||
|
||||
export const openaiProvider: MediaUnderstandingProvider = {
|
||||
id: "openai",
|
||||
capabilities: ["image"],
|
||||
capabilities: ["image", "audio"],
|
||||
describeImage: describeImageWithModel,
|
||||
transcribeAudio: transcribeOpenAiCompatibleAudio,
|
||||
};
|
||||
|
||||
65
src/media-understanding/providers/providers.test.ts
Normal file
65
src/media-understanding/providers/providers.test.ts
Normal file
@ -0,0 +1,65 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
|
||||
import { buildMediaUnderstandingRegistry } from "./index.js";
|
||||
|
||||
describe("media understanding providers", () => {
|
||||
const registry = buildMediaUnderstandingRegistry();
|
||||
|
||||
it("providers declare capabilities matching their implemented methods", () => {
|
||||
for (const [id, provider] of registry) {
|
||||
const declared = provider.capabilities ?? [];
|
||||
|
||||
if (provider.transcribeAudio) {
|
||||
expect(
|
||||
declared.includes("audio"),
|
||||
`Provider "${id}" has transcribeAudio but doesn't declare "audio" capability`,
|
||||
).toBe(true);
|
||||
}
|
||||
|
||||
if (provider.describeImage) {
|
||||
expect(
|
||||
declared.includes("image"),
|
||||
`Provider "${id}" has describeImage but doesn't declare "image" capability`,
|
||||
).toBe(true);
|
||||
}
|
||||
|
||||
if (provider.describeVideo) {
|
||||
expect(
|
||||
declared.includes("video"),
|
||||
`Provider "${id}" has describeVideo but doesn't declare "video" capability`,
|
||||
).toBe(true);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it("openai provider declares both image and audio capabilities", () => {
|
||||
const openai = registry.get("openai");
|
||||
expect(openai).toBeDefined();
|
||||
expect(openai?.capabilities).toContain("image");
|
||||
expect(openai?.capabilities).toContain("audio");
|
||||
expect(openai?.describeImage).toBeDefined();
|
||||
expect(openai?.transcribeAudio).toBeDefined();
|
||||
});
|
||||
|
||||
it("groq provider declares audio capability", () => {
|
||||
const groq = registry.get("groq");
|
||||
expect(groq).toBeDefined();
|
||||
expect(groq?.capabilities).toContain("audio");
|
||||
expect(groq?.transcribeAudio).toBeDefined();
|
||||
});
|
||||
|
||||
it("google provider declares image, audio, and video capabilities", () => {
|
||||
const google = registry.get("google");
|
||||
expect(google).toBeDefined();
|
||||
expect(google?.capabilities).toContain("image");
|
||||
expect(google?.capabilities).toContain("audio");
|
||||
expect(google?.capabilities).toContain("video");
|
||||
});
|
||||
|
||||
it("deepgram provider declares audio capability", () => {
|
||||
const deepgram = registry.get("deepgram");
|
||||
expect(deepgram).toBeDefined();
|
||||
expect(deepgram?.capabilities).toContain("audio");
|
||||
expect(deepgram?.transcribeAudio).toBeDefined();
|
||||
});
|
||||
});
|
||||
@ -4,7 +4,7 @@ import type { MoltbotConfig } from "../config/config.js";
|
||||
import { resolveEntriesWithActiveFallback, resolveModelEntries } from "./resolve.js";
|
||||
|
||||
const providerRegistry = new Map([
|
||||
["openai", { capabilities: ["image"] }],
|
||||
["openai", { capabilities: ["image", "audio"] }],
|
||||
["groq", { capabilities: ["audio"] }],
|
||||
]);
|
||||
|
||||
@ -30,7 +30,7 @@ describe("resolveModelEntries", () => {
|
||||
capability: "audio",
|
||||
providerRegistry,
|
||||
});
|
||||
expect(audioEntries).toHaveLength(0);
|
||||
expect(audioEntries).toHaveLength(1);
|
||||
});
|
||||
|
||||
it("keeps per-capability entries even without explicit caps", () => {
|
||||
|
||||
Loading…
Reference in New Issue
Block a user