feat(webchat): add inline audio playback for TTS-generated audio
Implements feature request #3504 - Inline audio playback in WebChat UI Changes: - Add InlineAudioParser to detect MEDIA: prefixed paths pointing to audio files - Add InlineAudioPlayerView SwiftUI component with play/pause controls - Integrate audio player rendering into ChatMarkdownRenderer - Add comprehensive unit tests for audio path parsing Supported audio formats: .mp3, .opus, .m4a, .ogg, .oga, .wav, .aac, .flac The inline audio player displays: - Play/pause button - Audio file name - Progress bar with duration - Graceful error handling for missing files Closes #3504
This commit is contained in:
parent
109ac1c549
commit
df1895ee1a
@ -20,15 +20,28 @@ struct ChatMarkdownRenderer: View {
|
||||
let textColor: Color
|
||||
|
||||
var body: some View {
|
||||
let processed = ChatMarkdownPreprocessor.preprocess(markdown: self.text)
|
||||
VStack(alignment: .leading, spacing: 10) {
|
||||
StructuredText(markdown: processed.cleaned)
|
||||
.modifier(ChatMarkdownStyle(
|
||||
variant: self.variant,
|
||||
context: self.context,
|
||||
font: self.font,
|
||||
textColor: self.textColor))
|
||||
// First extract any MEDIA: audio references
|
||||
let audioResult = InlineAudioParser.parse(self.text)
|
||||
// Then process images from the remaining text
|
||||
let processed = ChatMarkdownPreprocessor.preprocess(markdown: audioResult.cleaned)
|
||||
|
||||
VStack(alignment: .leading, spacing: 10) {
|
||||
// Only render text if there's content after processing
|
||||
if !processed.cleaned.isEmpty {
|
||||
StructuredText(markdown: processed.cleaned)
|
||||
.modifier(ChatMarkdownStyle(
|
||||
variant: self.variant,
|
||||
context: self.context,
|
||||
font: self.font,
|
||||
textColor: self.textColor))
|
||||
}
|
||||
|
||||
// Render inline audio players
|
||||
if !audioResult.audioFiles.isEmpty {
|
||||
InlineAudioList(audioFiles: audioResult.audioFiles)
|
||||
}
|
||||
|
||||
// Render inline images
|
||||
if !processed.images.isEmpty {
|
||||
InlineImageList(images: processed.images)
|
||||
}
|
||||
|
||||
@ -0,0 +1,280 @@
|
||||
import AVFoundation
|
||||
import SwiftUI
|
||||
|
||||
/// Detects and extracts `MEDIA:` prefixed audio file paths from message text.
|
||||
/// Supports common audio extensions: .mp3, .opus, .m4a, .ogg, .oga, .wav
|
||||
public enum InlineAudioParser {
|
||||
/// Represents an inline audio reference found in text
|
||||
public struct InlineAudio: Identifiable, Equatable {
|
||||
public let id = UUID()
|
||||
public let path: String
|
||||
public let displayName: String
|
||||
|
||||
public init(path: String) {
|
||||
self.path = path
|
||||
self.displayName = (path as NSString).lastPathComponent
|
||||
}
|
||||
}
|
||||
|
||||
/// Result of parsing text for inline audio
|
||||
public struct Result: Equatable {
|
||||
public let cleaned: String
|
||||
public let audioFiles: [InlineAudio]
|
||||
}
|
||||
|
||||
/// Regex pattern to detect MEDIA:/path/to/file.ext references
|
||||
/// Matches MEDIA: followed by a file path ending in a supported audio extension
|
||||
private static let audioExtensions = ["mp3", "opus", "m4a", "ogg", "oga", "wav", "aac", "flac"]
|
||||
private static var pattern: String {
|
||||
let extPattern = audioExtensions.joined(separator: "|")
|
||||
// Match MEDIA: followed by a path, then a supported audio extension
|
||||
// The path can contain alphanumeric chars, slashes, dashes, underscores, dots, and spaces
|
||||
return #"MEDIA:([^\s<>\"]+\.(?:"# + extPattern + #"))"#
|
||||
}
|
||||
|
||||
/// Parses the given text and extracts any MEDIA: audio references
|
||||
/// - Parameter text: The raw message text
|
||||
/// - Returns: A Result containing the cleaned text and extracted audio files
|
||||
public static func parse(_ text: String) -> Result {
|
||||
guard let regex = try? NSRegularExpression(pattern: pattern, options: .caseInsensitive) else {
|
||||
return Result(cleaned: text, audioFiles: [])
|
||||
}
|
||||
|
||||
let nsString = text as NSString
|
||||
let matches = regex.matches(in: text, range: NSRange(location: 0, length: nsString.length))
|
||||
|
||||
if matches.isEmpty {
|
||||
return Result(cleaned: text, audioFiles: [])
|
||||
}
|
||||
|
||||
var audioFiles: [InlineAudio] = []
|
||||
var cleaned = text
|
||||
|
||||
// Process matches in reverse order to preserve indices
|
||||
for match in matches.reversed() {
|
||||
guard match.numberOfRanges >= 2 else { continue }
|
||||
|
||||
let fullRange = match.range
|
||||
let pathRange = match.range(at: 1)
|
||||
|
||||
let path = nsString.substring(with: pathRange)
|
||||
audioFiles.insert(InlineAudio(path: path), at: 0)
|
||||
|
||||
// Remove the MEDIA:path from the text
|
||||
let start = cleaned.index(cleaned.startIndex, offsetBy: fullRange.location)
|
||||
let end = cleaned.index(start, offsetBy: fullRange.length)
|
||||
cleaned.replaceSubrange(start..<end, with: "")
|
||||
}
|
||||
|
||||
// Clean up any extra whitespace
|
||||
let normalized = cleaned
|
||||
.replacingOccurrences(of: "\n\n\n", with: "\n\n")
|
||||
.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
|
||||
return Result(cleaned: normalized, audioFiles: audioFiles)
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Audio Player View
|
||||
|
||||
/// A SwiftUI view that renders an inline audio player for a single audio file.
|
||||
/// Shows a play/pause button and the audio file name.
|
||||
@MainActor
|
||||
public struct InlineAudioPlayerView: View {
|
||||
let audioPath: String
|
||||
let displayName: String
|
||||
|
||||
@State private var isPlaying = false
|
||||
@State private var player: AVAudioPlayer?
|
||||
@State private var progress: Double = 0
|
||||
@State private var duration: Double = 0
|
||||
@State private var loadError: Bool = false
|
||||
@State private var timer: Timer?
|
||||
|
||||
public init(audioPath: String, displayName: String? = nil) {
|
||||
self.audioPath = audioPath
|
||||
self.displayName = displayName ?? (audioPath as NSString).lastPathComponent
|
||||
}
|
||||
|
||||
public var body: some View {
|
||||
HStack(spacing: 12) {
|
||||
// Play/Pause Button
|
||||
Button(action: togglePlayback) {
|
||||
Image(systemName: isPlaying ? "pause.circle.fill" : "play.circle.fill")
|
||||
.font(.system(size: 32))
|
||||
.foregroundStyle(loadError ? .secondary : .accentColor)
|
||||
}
|
||||
.buttonStyle(.plain)
|
||||
.disabled(loadError)
|
||||
|
||||
VStack(alignment: .leading, spacing: 4) {
|
||||
// File name
|
||||
Text(displayName)
|
||||
.font(.footnote.weight(.medium))
|
||||
.lineLimit(1)
|
||||
.foregroundStyle(.primary)
|
||||
|
||||
// Progress bar
|
||||
if duration > 0 {
|
||||
HStack(spacing: 6) {
|
||||
GeometryReader { geo in
|
||||
ZStack(alignment: .leading) {
|
||||
Capsule()
|
||||
.fill(Color.secondary.opacity(0.2))
|
||||
.frame(height: 4)
|
||||
|
||||
Capsule()
|
||||
.fill(Color.accentColor)
|
||||
.frame(width: geo.size.width * min(progress / duration, 1.0), height: 4)
|
||||
}
|
||||
}
|
||||
.frame(height: 4)
|
||||
|
||||
Text(formatTime(isPlaying ? progress : duration))
|
||||
.font(.caption2.monospacedDigit())
|
||||
.foregroundStyle(.secondary)
|
||||
.frame(minWidth: 36, alignment: .trailing)
|
||||
}
|
||||
} else if loadError {
|
||||
Text("Unable to load audio")
|
||||
.font(.caption2)
|
||||
.foregroundStyle(.secondary)
|
||||
}
|
||||
}
|
||||
|
||||
Spacer(minLength: 0)
|
||||
}
|
||||
.padding(.horizontal, 12)
|
||||
.padding(.vertical, 10)
|
||||
.background(
|
||||
RoundedRectangle(cornerRadius: 12, style: .continuous)
|
||||
.fill(Color.secondary.opacity(0.1))
|
||||
)
|
||||
.overlay(
|
||||
RoundedRectangle(cornerRadius: 12, style: .continuous)
|
||||
.strokeBorder(Color.secondary.opacity(0.15), lineWidth: 1)
|
||||
)
|
||||
.onAppear {
|
||||
loadAudio()
|
||||
}
|
||||
.onDisappear {
|
||||
stopPlayback()
|
||||
}
|
||||
}
|
||||
|
||||
private func loadAudio() {
|
||||
let url = URL(fileURLWithPath: audioPath)
|
||||
|
||||
guard FileManager.default.fileExists(atPath: audioPath) else {
|
||||
loadError = true
|
||||
return
|
||||
}
|
||||
|
||||
do {
|
||||
let audioPlayer = try AVAudioPlayer(contentsOf: url)
|
||||
audioPlayer.prepareToPlay()
|
||||
player = audioPlayer
|
||||
duration = audioPlayer.duration
|
||||
loadError = false
|
||||
} catch {
|
||||
loadError = true
|
||||
}
|
||||
}
|
||||
|
||||
private func togglePlayback() {
|
||||
guard let player = player else {
|
||||
loadAudio()
|
||||
return
|
||||
}
|
||||
|
||||
if isPlaying {
|
||||
pausePlayback()
|
||||
} else {
|
||||
startPlayback(player)
|
||||
}
|
||||
}
|
||||
|
||||
private func startPlayback(_ player: AVAudioPlayer) {
|
||||
player.play()
|
||||
isPlaying = true
|
||||
|
||||
// Update progress periodically
|
||||
timer = Timer.scheduledTimer(withTimeInterval: 0.1, repeats: true) { [weak player] _ in
|
||||
guard let player = player else {
|
||||
stopPlayback()
|
||||
return
|
||||
}
|
||||
|
||||
Task { @MainActor in
|
||||
progress = player.currentTime
|
||||
|
||||
// Check if playback finished
|
||||
if !player.isPlaying && progress >= duration - 0.1 {
|
||||
stopPlayback()
|
||||
progress = 0
|
||||
player.currentTime = 0
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private func pausePlayback() {
|
||||
player?.pause()
|
||||
isPlaying = false
|
||||
timer?.invalidate()
|
||||
timer = nil
|
||||
}
|
||||
|
||||
private func stopPlayback() {
|
||||
player?.stop()
|
||||
isPlaying = false
|
||||
timer?.invalidate()
|
||||
timer = nil
|
||||
}
|
||||
|
||||
private func formatTime(_ time: Double) -> String {
|
||||
let minutes = Int(time) / 60
|
||||
let seconds = Int(time) % 60
|
||||
return String(format: "%d:%02d", minutes, seconds)
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Audio Player List View
|
||||
|
||||
/// A view that displays a list of inline audio players
|
||||
@MainActor
|
||||
public struct InlineAudioList: View {
|
||||
let audioFiles: [InlineAudioParser.InlineAudio]
|
||||
|
||||
public init(audioFiles: [InlineAudioParser.InlineAudio]) {
|
||||
self.audioFiles = audioFiles
|
||||
}
|
||||
|
||||
public var body: some View {
|
||||
ForEach(audioFiles) { audio in
|
||||
InlineAudioPlayerView(audioPath: audio.path, displayName: audio.displayName)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Preview
|
||||
|
||||
#if DEBUG
|
||||
struct InlineAudioPlayerView_Previews: PreviewProvider {
|
||||
static var previews: some View {
|
||||
VStack(spacing: 16) {
|
||||
InlineAudioPlayerView(
|
||||
audioPath: "/tmp/test.mp3",
|
||||
displayName: "voice-message.mp3"
|
||||
)
|
||||
|
||||
InlineAudioPlayerView(
|
||||
audioPath: "/nonexistent/path.opus",
|
||||
displayName: "missing-file.opus"
|
||||
)
|
||||
}
|
||||
.padding()
|
||||
.frame(width: 320)
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -0,0 +1,123 @@
|
||||
import MoltbotChatUI
|
||||
import Foundation
|
||||
import Testing
|
||||
|
||||
@Suite struct InlineAudioParserTests {
|
||||
@Test func parsesEmptyText() {
|
||||
let result = InlineAudioParser.parse("")
|
||||
#expect(result.cleaned.isEmpty)
|
||||
#expect(result.audioFiles.isEmpty)
|
||||
}
|
||||
|
||||
@Test func parsesTextWithoutMediaPaths() {
|
||||
let text = "Hello, this is a regular message without any media."
|
||||
let result = InlineAudioParser.parse(text)
|
||||
#expect(result.cleaned == text)
|
||||
#expect(result.audioFiles.isEmpty)
|
||||
}
|
||||
|
||||
@Test func detectsMP3MediaPath() {
|
||||
let text = "Here is your audio: MEDIA:/tmp/voice-12345.mp3"
|
||||
let result = InlineAudioParser.parse(text)
|
||||
#expect(result.cleaned == "Here is your audio:")
|
||||
#expect(result.audioFiles.count == 1)
|
||||
#expect(result.audioFiles[0].path == "/tmp/voice-12345.mp3")
|
||||
#expect(result.audioFiles[0].displayName == "voice-12345.mp3")
|
||||
}
|
||||
|
||||
@Test func detectsOpusMediaPath() {
|
||||
let text = "Voice message: MEDIA:/var/data/message.opus"
|
||||
let result = InlineAudioParser.parse(text)
|
||||
#expect(result.cleaned == "Voice message:")
|
||||
#expect(result.audioFiles.count == 1)
|
||||
#expect(result.audioFiles[0].path == "/var/data/message.opus")
|
||||
}
|
||||
|
||||
@Test func detectsM4AMediaPath() {
|
||||
let text = "MEDIA:/path/to/audio.m4a is ready"
|
||||
let result = InlineAudioParser.parse(text)
|
||||
#expect(result.cleaned == "is ready")
|
||||
#expect(result.audioFiles.count == 1)
|
||||
#expect(result.audioFiles[0].path == "/path/to/audio.m4a")
|
||||
}
|
||||
|
||||
@Test func detectsOGGMediaPath() {
|
||||
let text = "Listen: MEDIA:/files/sound.ogg"
|
||||
let result = InlineAudioParser.parse(text)
|
||||
#expect(result.audioFiles.count == 1)
|
||||
#expect(result.audioFiles[0].path == "/files/sound.ogg")
|
||||
}
|
||||
|
||||
@Test func detectsWAVMediaPath() {
|
||||
let text = "Audio clip: MEDIA:/recordings/clip.wav"
|
||||
let result = InlineAudioParser.parse(text)
|
||||
#expect(result.audioFiles.count == 1)
|
||||
#expect(result.audioFiles[0].path == "/recordings/clip.wav")
|
||||
}
|
||||
|
||||
@Test func detectsMultipleMediaPaths() {
|
||||
let text = """
|
||||
Here are two audio files:
|
||||
First: MEDIA:/tmp/voice1.mp3
|
||||
Second: MEDIA:/tmp/voice2.opus
|
||||
Enjoy!
|
||||
"""
|
||||
let result = InlineAudioParser.parse(text)
|
||||
#expect(result.audioFiles.count == 2)
|
||||
#expect(result.audioFiles[0].path == "/tmp/voice1.mp3")
|
||||
#expect(result.audioFiles[1].path == "/tmp/voice2.opus")
|
||||
#expect(result.cleaned.contains("Here are two audio files:"))
|
||||
#expect(result.cleaned.contains("Enjoy!"))
|
||||
#expect(!result.cleaned.contains("MEDIA:"))
|
||||
}
|
||||
|
||||
@Test func handlesMediaPathWithDashesAndUnderscores() {
|
||||
let text = "MEDIA:/path/to/voice-message_2024-01-15.mp3"
|
||||
let result = InlineAudioParser.parse(text)
|
||||
#expect(result.audioFiles.count == 1)
|
||||
#expect(result.audioFiles[0].path == "/path/to/voice-message_2024-01-15.mp3")
|
||||
}
|
||||
|
||||
@Test func isCaseInsensitive() {
|
||||
let text1 = "media:/tmp/test.mp3"
|
||||
let text2 = "Media:/tmp/test.mp3"
|
||||
let text3 = "MEDIA:/tmp/test.mp3"
|
||||
|
||||
let result1 = InlineAudioParser.parse(text1)
|
||||
let result2 = InlineAudioParser.parse(text2)
|
||||
let result3 = InlineAudioParser.parse(text3)
|
||||
|
||||
#expect(result1.audioFiles.count == 1)
|
||||
#expect(result2.audioFiles.count == 1)
|
||||
#expect(result3.audioFiles.count == 1)
|
||||
}
|
||||
|
||||
@Test func ignoresNonAudioMediaPaths() {
|
||||
// The parser should only detect audio extensions
|
||||
let text = "Image: MEDIA:/tmp/photo.jpg"
|
||||
let result = InlineAudioParser.parse(text)
|
||||
#expect(result.audioFiles.isEmpty)
|
||||
#expect(result.cleaned == text) // Text unchanged for non-audio
|
||||
}
|
||||
|
||||
@Test func preservesTextAroundMediaPath() {
|
||||
let text = "Before MEDIA:/tmp/audio.mp3 After"
|
||||
let result = InlineAudioParser.parse(text)
|
||||
#expect(result.audioFiles.count == 1)
|
||||
#expect(result.cleaned == "Before After" || result.cleaned == "Before After")
|
||||
}
|
||||
|
||||
@Test func handlesMediaPathAtStartOfLine() {
|
||||
let text = "MEDIA:/tmp/audio.mp3\nSome text after"
|
||||
let result = InlineAudioParser.parse(text)
|
||||
#expect(result.audioFiles.count == 1)
|
||||
#expect(result.cleaned.contains("Some text after"))
|
||||
}
|
||||
|
||||
@Test func handlesMediaPathAtEndOfLine() {
|
||||
let text = "Check this out: MEDIA:/tmp/audio.mp3"
|
||||
let result = InlineAudioParser.parse(text)
|
||||
#expect(result.audioFiles.count == 1)
|
||||
#expect(result.cleaned == "Check this out:")
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user