feat(webchat): add inline audio playback for TTS-generated audio

Implements feature request #3504 - Inline audio playback in WebChat UI

Changes:
- Add InlineAudioParser to detect MEDIA: prefixed paths pointing to audio files
- Add InlineAudioPlayerView SwiftUI component with play/pause controls
- Integrate audio player rendering into ChatMarkdownRenderer
- Add comprehensive unit tests for audio path parsing

Supported audio formats: .mp3, .opus, .m4a, .ogg, .oga, .wav, .aac, .flac

The inline audio player displays:
- Play/pause button
- Audio file name
- Progress bar with duration
- Graceful error handling for missing files

Closes #3504
This commit is contained in:
Aditya Bhuran 2026-01-28 13:18:59 -05:00
parent 109ac1c549
commit df1895ee1a
3 changed files with 424 additions and 8 deletions

View File

@ -20,15 +20,28 @@ struct ChatMarkdownRenderer: View {
let textColor: Color let textColor: Color
var body: some View { var body: some View {
let processed = ChatMarkdownPreprocessor.preprocess(markdown: self.text) // First extract any MEDIA: audio references
VStack(alignment: .leading, spacing: 10) { let audioResult = InlineAudioParser.parse(self.text)
StructuredText(markdown: processed.cleaned) // Then process images from the remaining text
.modifier(ChatMarkdownStyle( let processed = ChatMarkdownPreprocessor.preprocess(markdown: audioResult.cleaned)
variant: self.variant,
context: self.context,
font: self.font,
textColor: self.textColor))
VStack(alignment: .leading, spacing: 10) {
// Only render text if there's content after processing
if !processed.cleaned.isEmpty {
StructuredText(markdown: processed.cleaned)
.modifier(ChatMarkdownStyle(
variant: self.variant,
context: self.context,
font: self.font,
textColor: self.textColor))
}
// Render inline audio players
if !audioResult.audioFiles.isEmpty {
InlineAudioList(audioFiles: audioResult.audioFiles)
}
// Render inline images
if !processed.images.isEmpty { if !processed.images.isEmpty {
InlineImageList(images: processed.images) InlineImageList(images: processed.images)
} }

View File

@ -0,0 +1,280 @@
import AVFoundation
import SwiftUI
/// Detects and extracts `MEDIA:` prefixed audio file paths from message text.
/// Supports common audio extensions: .mp3, .opus, .m4a, .ogg, .oga, .wav
public enum InlineAudioParser {
/// Represents an inline audio reference found in text
public struct InlineAudio: Identifiable, Equatable {
public let id = UUID()
public let path: String
public let displayName: String
public init(path: String) {
self.path = path
self.displayName = (path as NSString).lastPathComponent
}
}
/// Result of parsing text for inline audio
public struct Result: Equatable {
public let cleaned: String
public let audioFiles: [InlineAudio]
}
/// Regex pattern to detect MEDIA:/path/to/file.ext references
/// Matches MEDIA: followed by a file path ending in a supported audio extension
private static let audioExtensions = ["mp3", "opus", "m4a", "ogg", "oga", "wav", "aac", "flac"]
private static var pattern: String {
let extPattern = audioExtensions.joined(separator: "|")
// Match MEDIA: followed by a path, then a supported audio extension
// The path can contain alphanumeric chars, slashes, dashes, underscores, dots, and spaces
return #"MEDIA:([^\s<>\"]+\.(?:"# + extPattern + #"))"#
}
/// Parses the given text and extracts any MEDIA: audio references
/// - Parameter text: The raw message text
/// - Returns: A Result containing the cleaned text and extracted audio files
public static func parse(_ text: String) -> Result {
guard let regex = try? NSRegularExpression(pattern: pattern, options: .caseInsensitive) else {
return Result(cleaned: text, audioFiles: [])
}
let nsString = text as NSString
let matches = regex.matches(in: text, range: NSRange(location: 0, length: nsString.length))
if matches.isEmpty {
return Result(cleaned: text, audioFiles: [])
}
var audioFiles: [InlineAudio] = []
var cleaned = text
// Process matches in reverse order to preserve indices
for match in matches.reversed() {
guard match.numberOfRanges >= 2 else { continue }
let fullRange = match.range
let pathRange = match.range(at: 1)
let path = nsString.substring(with: pathRange)
audioFiles.insert(InlineAudio(path: path), at: 0)
// Remove the MEDIA:path from the text
let start = cleaned.index(cleaned.startIndex, offsetBy: fullRange.location)
let end = cleaned.index(start, offsetBy: fullRange.length)
cleaned.replaceSubrange(start..<end, with: "")
}
// Clean up any extra whitespace
let normalized = cleaned
.replacingOccurrences(of: "\n\n\n", with: "\n\n")
.trimmingCharacters(in: .whitespacesAndNewlines)
return Result(cleaned: normalized, audioFiles: audioFiles)
}
}
// MARK: - Audio Player View
/// A SwiftUI view that renders an inline audio player for a single audio file.
/// Shows a play/pause button and the audio file name.
@MainActor
public struct InlineAudioPlayerView: View {
let audioPath: String
let displayName: String
@State private var isPlaying = false
@State private var player: AVAudioPlayer?
@State private var progress: Double = 0
@State private var duration: Double = 0
@State private var loadError: Bool = false
@State private var timer: Timer?
public init(audioPath: String, displayName: String? = nil) {
self.audioPath = audioPath
self.displayName = displayName ?? (audioPath as NSString).lastPathComponent
}
public var body: some View {
HStack(spacing: 12) {
// Play/Pause Button
Button(action: togglePlayback) {
Image(systemName: isPlaying ? "pause.circle.fill" : "play.circle.fill")
.font(.system(size: 32))
.foregroundStyle(loadError ? .secondary : .accentColor)
}
.buttonStyle(.plain)
.disabled(loadError)
VStack(alignment: .leading, spacing: 4) {
// File name
Text(displayName)
.font(.footnote.weight(.medium))
.lineLimit(1)
.foregroundStyle(.primary)
// Progress bar
if duration > 0 {
HStack(spacing: 6) {
GeometryReader { geo in
ZStack(alignment: .leading) {
Capsule()
.fill(Color.secondary.opacity(0.2))
.frame(height: 4)
Capsule()
.fill(Color.accentColor)
.frame(width: geo.size.width * min(progress / duration, 1.0), height: 4)
}
}
.frame(height: 4)
Text(formatTime(isPlaying ? progress : duration))
.font(.caption2.monospacedDigit())
.foregroundStyle(.secondary)
.frame(minWidth: 36, alignment: .trailing)
}
} else if loadError {
Text("Unable to load audio")
.font(.caption2)
.foregroundStyle(.secondary)
}
}
Spacer(minLength: 0)
}
.padding(.horizontal, 12)
.padding(.vertical, 10)
.background(
RoundedRectangle(cornerRadius: 12, style: .continuous)
.fill(Color.secondary.opacity(0.1))
)
.overlay(
RoundedRectangle(cornerRadius: 12, style: .continuous)
.strokeBorder(Color.secondary.opacity(0.15), lineWidth: 1)
)
.onAppear {
loadAudio()
}
.onDisappear {
stopPlayback()
}
}
private func loadAudio() {
let url = URL(fileURLWithPath: audioPath)
guard FileManager.default.fileExists(atPath: audioPath) else {
loadError = true
return
}
do {
let audioPlayer = try AVAudioPlayer(contentsOf: url)
audioPlayer.prepareToPlay()
player = audioPlayer
duration = audioPlayer.duration
loadError = false
} catch {
loadError = true
}
}
private func togglePlayback() {
guard let player = player else {
loadAudio()
return
}
if isPlaying {
pausePlayback()
} else {
startPlayback(player)
}
}
private func startPlayback(_ player: AVAudioPlayer) {
player.play()
isPlaying = true
// Update progress periodically
timer = Timer.scheduledTimer(withTimeInterval: 0.1, repeats: true) { [weak player] _ in
guard let player = player else {
stopPlayback()
return
}
Task { @MainActor in
progress = player.currentTime
// Check if playback finished
if !player.isPlaying && progress >= duration - 0.1 {
stopPlayback()
progress = 0
player.currentTime = 0
}
}
}
}
private func pausePlayback() {
player?.pause()
isPlaying = false
timer?.invalidate()
timer = nil
}
private func stopPlayback() {
player?.stop()
isPlaying = false
timer?.invalidate()
timer = nil
}
private func formatTime(_ time: Double) -> String {
let minutes = Int(time) / 60
let seconds = Int(time) % 60
return String(format: "%d:%02d", minutes, seconds)
}
}
// MARK: - Audio Player List View
/// A view that displays a list of inline audio players
@MainActor
public struct InlineAudioList: View {
let audioFiles: [InlineAudioParser.InlineAudio]
public init(audioFiles: [InlineAudioParser.InlineAudio]) {
self.audioFiles = audioFiles
}
public var body: some View {
ForEach(audioFiles) { audio in
InlineAudioPlayerView(audioPath: audio.path, displayName: audio.displayName)
}
}
}
// MARK: - Preview
#if DEBUG
struct InlineAudioPlayerView_Previews: PreviewProvider {
static var previews: some View {
VStack(spacing: 16) {
InlineAudioPlayerView(
audioPath: "/tmp/test.mp3",
displayName: "voice-message.mp3"
)
InlineAudioPlayerView(
audioPath: "/nonexistent/path.opus",
displayName: "missing-file.opus"
)
}
.padding()
.frame(width: 320)
}
}
#endif

View File

@ -0,0 +1,123 @@
import MoltbotChatUI
import Foundation
import Testing
@Suite struct InlineAudioParserTests {
@Test func parsesEmptyText() {
let result = InlineAudioParser.parse("")
#expect(result.cleaned.isEmpty)
#expect(result.audioFiles.isEmpty)
}
@Test func parsesTextWithoutMediaPaths() {
let text = "Hello, this is a regular message without any media."
let result = InlineAudioParser.parse(text)
#expect(result.cleaned == text)
#expect(result.audioFiles.isEmpty)
}
@Test func detectsMP3MediaPath() {
let text = "Here is your audio: MEDIA:/tmp/voice-12345.mp3"
let result = InlineAudioParser.parse(text)
#expect(result.cleaned == "Here is your audio:")
#expect(result.audioFiles.count == 1)
#expect(result.audioFiles[0].path == "/tmp/voice-12345.mp3")
#expect(result.audioFiles[0].displayName == "voice-12345.mp3")
}
@Test func detectsOpusMediaPath() {
let text = "Voice message: MEDIA:/var/data/message.opus"
let result = InlineAudioParser.parse(text)
#expect(result.cleaned == "Voice message:")
#expect(result.audioFiles.count == 1)
#expect(result.audioFiles[0].path == "/var/data/message.opus")
}
@Test func detectsM4AMediaPath() {
let text = "MEDIA:/path/to/audio.m4a is ready"
let result = InlineAudioParser.parse(text)
#expect(result.cleaned == "is ready")
#expect(result.audioFiles.count == 1)
#expect(result.audioFiles[0].path == "/path/to/audio.m4a")
}
@Test func detectsOGGMediaPath() {
let text = "Listen: MEDIA:/files/sound.ogg"
let result = InlineAudioParser.parse(text)
#expect(result.audioFiles.count == 1)
#expect(result.audioFiles[0].path == "/files/sound.ogg")
}
@Test func detectsWAVMediaPath() {
let text = "Audio clip: MEDIA:/recordings/clip.wav"
let result = InlineAudioParser.parse(text)
#expect(result.audioFiles.count == 1)
#expect(result.audioFiles[0].path == "/recordings/clip.wav")
}
@Test func detectsMultipleMediaPaths() {
let text = """
Here are two audio files:
First: MEDIA:/tmp/voice1.mp3
Second: MEDIA:/tmp/voice2.opus
Enjoy!
"""
let result = InlineAudioParser.parse(text)
#expect(result.audioFiles.count == 2)
#expect(result.audioFiles[0].path == "/tmp/voice1.mp3")
#expect(result.audioFiles[1].path == "/tmp/voice2.opus")
#expect(result.cleaned.contains("Here are two audio files:"))
#expect(result.cleaned.contains("Enjoy!"))
#expect(!result.cleaned.contains("MEDIA:"))
}
@Test func handlesMediaPathWithDashesAndUnderscores() {
let text = "MEDIA:/path/to/voice-message_2024-01-15.mp3"
let result = InlineAudioParser.parse(text)
#expect(result.audioFiles.count == 1)
#expect(result.audioFiles[0].path == "/path/to/voice-message_2024-01-15.mp3")
}
@Test func isCaseInsensitive() {
let text1 = "media:/tmp/test.mp3"
let text2 = "Media:/tmp/test.mp3"
let text3 = "MEDIA:/tmp/test.mp3"
let result1 = InlineAudioParser.parse(text1)
let result2 = InlineAudioParser.parse(text2)
let result3 = InlineAudioParser.parse(text3)
#expect(result1.audioFiles.count == 1)
#expect(result2.audioFiles.count == 1)
#expect(result3.audioFiles.count == 1)
}
@Test func ignoresNonAudioMediaPaths() {
// The parser should only detect audio extensions
let text = "Image: MEDIA:/tmp/photo.jpg"
let result = InlineAudioParser.parse(text)
#expect(result.audioFiles.isEmpty)
#expect(result.cleaned == text) // Text unchanged for non-audio
}
@Test func preservesTextAroundMediaPath() {
let text = "Before MEDIA:/tmp/audio.mp3 After"
let result = InlineAudioParser.parse(text)
#expect(result.audioFiles.count == 1)
#expect(result.cleaned == "Before After" || result.cleaned == "Before After")
}
@Test func handlesMediaPathAtStartOfLine() {
let text = "MEDIA:/tmp/audio.mp3\nSome text after"
let result = InlineAudioParser.parse(text)
#expect(result.audioFiles.count == 1)
#expect(result.cleaned.contains("Some text after"))
}
@Test func handlesMediaPathAtEndOfLine() {
let text = "Check this out: MEDIA:/tmp/audio.mp3"
let result = InlineAudioParser.parse(text)
#expect(result.audioFiles.count == 1)
#expect(result.cleaned == "Check this out:")
}
}