fix(voice-call): align OpenAI Realtime STT with GA API format
The OpenAI Realtime STT provider was using an outdated Beta API format that doesn't produce transcriptions when using the GA endpoint. Changes: - Event type: transcription_session.update → session.update - Session type: Added type: "transcription" - Structure: Flat session → nested session.audio.input - Format: g711_ulaw → audio/pcmu (MIME type) Fixes #3447 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
a109b7f1a9
commit
5a9fd4de5c
@ -124,19 +124,26 @@ class OpenAIRealtimeSTTSession implements RealtimeSTTSession {
|
||||
this.connected = true;
|
||||
this.reconnectAttempts = 0;
|
||||
|
||||
// Configure the transcription session
|
||||
// Configure the transcription session (GA API format)
|
||||
this.sendEvent({
|
||||
type: "transcription_session.update",
|
||||
type: "session.update",
|
||||
session: {
|
||||
input_audio_format: "g711_ulaw",
|
||||
input_audio_transcription: {
|
||||
model: this.model,
|
||||
},
|
||||
turn_detection: {
|
||||
type: "server_vad",
|
||||
threshold: this.vadThreshold,
|
||||
prefix_padding_ms: 300,
|
||||
silence_duration_ms: this.silenceDurationMs,
|
||||
type: "transcription",
|
||||
audio: {
|
||||
input: {
|
||||
format: {
|
||||
type: "audio/pcmu",
|
||||
},
|
||||
transcription: {
|
||||
model: this.model,
|
||||
},
|
||||
turn_detection: {
|
||||
type: "server_vad",
|
||||
threshold: this.vadThreshold,
|
||||
prefix_padding_ms: 300,
|
||||
silence_duration_ms: this.silenceDurationMs,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
Loading…
Reference in New Issue
Block a user