fix(voice-call): align OpenAI Realtime STT with GA API format
The OpenAI Realtime STT provider was using an outdated Beta API format that doesn't produce transcriptions when using the GA endpoint. Changes: - Event type: transcription_session.update → session.update - Session type: Added type: "transcription" - Structure: Flat session → nested session.audio.input - Format: g711_ulaw → audio/pcmu (MIME type) Fixes #3447 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
a109b7f1a9
commit
5a9fd4de5c
@ -124,19 +124,26 @@ class OpenAIRealtimeSTTSession implements RealtimeSTTSession {
|
|||||||
this.connected = true;
|
this.connected = true;
|
||||||
this.reconnectAttempts = 0;
|
this.reconnectAttempts = 0;
|
||||||
|
|
||||||
// Configure the transcription session
|
// Configure the transcription session (GA API format)
|
||||||
this.sendEvent({
|
this.sendEvent({
|
||||||
type: "transcription_session.update",
|
type: "session.update",
|
||||||
session: {
|
session: {
|
||||||
input_audio_format: "g711_ulaw",
|
type: "transcription",
|
||||||
input_audio_transcription: {
|
audio: {
|
||||||
model: this.model,
|
input: {
|
||||||
},
|
format: {
|
||||||
turn_detection: {
|
type: "audio/pcmu",
|
||||||
type: "server_vad",
|
},
|
||||||
threshold: this.vadThreshold,
|
transcription: {
|
||||||
prefix_padding_ms: 300,
|
model: this.model,
|
||||||
silence_duration_ms: this.silenceDurationMs,
|
},
|
||||||
|
turn_detection: {
|
||||||
|
type: "server_vad",
|
||||||
|
threshold: this.vadThreshold,
|
||||||
|
prefix_padding_ms: 300,
|
||||||
|
silence_duration_ms: this.silenceDurationMs,
|
||||||
|
},
|
||||||
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user