This commit is contained in:
Suksham 2026-01-30 14:09:01 +03:00 committed by GitHub
commit e8d5ec6476
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 68 additions and 3 deletions

View File

@ -4,7 +4,7 @@ import { transcribeOpenAiCompatibleAudio } from "./audio.js";
export const openaiProvider: MediaUnderstandingProvider = { export const openaiProvider: MediaUnderstandingProvider = {
id: "openai", id: "openai",
capabilities: ["image"], capabilities: ["image", "audio"],
describeImage: describeImageWithModel, describeImage: describeImageWithModel,
transcribeAudio: transcribeOpenAiCompatibleAudio, transcribeAudio: transcribeOpenAiCompatibleAudio,
}; };

View File

@ -0,0 +1,65 @@
import { describe, expect, it } from "vitest";
import { buildMediaUnderstandingRegistry } from "./index.js";
describe("media understanding providers", () => {
const registry = buildMediaUnderstandingRegistry();
it("providers declare capabilities matching their implemented methods", () => {
for (const [id, provider] of registry) {
const declared = provider.capabilities ?? [];
if (provider.transcribeAudio) {
expect(
declared.includes("audio"),
`Provider "${id}" has transcribeAudio but doesn't declare "audio" capability`,
).toBe(true);
}
if (provider.describeImage) {
expect(
declared.includes("image"),
`Provider "${id}" has describeImage but doesn't declare "image" capability`,
).toBe(true);
}
if (provider.describeVideo) {
expect(
declared.includes("video"),
`Provider "${id}" has describeVideo but doesn't declare "video" capability`,
).toBe(true);
}
}
});
it("openai provider declares both image and audio capabilities", () => {
const openai = registry.get("openai");
expect(openai).toBeDefined();
expect(openai?.capabilities).toContain("image");
expect(openai?.capabilities).toContain("audio");
expect(openai?.describeImage).toBeDefined();
expect(openai?.transcribeAudio).toBeDefined();
});
it("groq provider declares audio capability", () => {
const groq = registry.get("groq");
expect(groq).toBeDefined();
expect(groq?.capabilities).toContain("audio");
expect(groq?.transcribeAudio).toBeDefined();
});
it("google provider declares image, audio, and video capabilities", () => {
const google = registry.get("google");
expect(google).toBeDefined();
expect(google?.capabilities).toContain("image");
expect(google?.capabilities).toContain("audio");
expect(google?.capabilities).toContain("video");
});
it("deepgram provider declares audio capability", () => {
const deepgram = registry.get("deepgram");
expect(deepgram).toBeDefined();
expect(deepgram?.capabilities).toContain("audio");
expect(deepgram?.transcribeAudio).toBeDefined();
});
});

View File

@ -4,7 +4,7 @@ import type { OpenClawConfig } from "../config/config.js";
import { resolveEntriesWithActiveFallback, resolveModelEntries } from "./resolve.js"; import { resolveEntriesWithActiveFallback, resolveModelEntries } from "./resolve.js";
const providerRegistry = new Map([ const providerRegistry = new Map([
["openai", { capabilities: ["image"] }], ["openai", { capabilities: ["image", "audio"] }],
["groq", { capabilities: ["audio"] }], ["groq", { capabilities: ["audio"] }],
]); ]);
@ -30,7 +30,7 @@ describe("resolveModelEntries", () => {
capability: "audio", capability: "audio",
providerRegistry, providerRegistry,
}); });
expect(audioEntries).toHaveLength(0); expect(audioEntries).toHaveLength(1);
}); });
it("keeps per-capability entries even without explicit caps", () => { it("keeps per-capability entries even without explicit caps", () => {