From 2fffbe22748b0bc717f4474cff976a4253385045 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Monnom?= Date: Fri, 4 Oct 2024 15:42:07 -0700 Subject: [PATCH] use rtc.combine_audio_frames (#841) --- .changeset/breezy-houses-remember.md | 5 ++ livekit-agents/livekit/agents/utils/audio.py | 84 +------------------- 2 files changed, 8 insertions(+), 81 deletions(-) create mode 100644 .changeset/breezy-houses-remember.md diff --git a/.changeset/breezy-houses-remember.md b/.changeset/breezy-houses-remember.md new file mode 100644 index 000000000..59bb0889c --- /dev/null +++ b/.changeset/breezy-houses-remember.md @@ -0,0 +1,5 @@ +--- +"livekit-agents": patch +--- + +use rtc.combine_audio_frames diff --git a/livekit-agents/livekit/agents/utils/audio.py b/livekit-agents/livekit/agents/utils/audio.py index 33ab8571f..1497aee67 100644 --- a/livekit-agents/livekit/agents/utils/audio.py +++ b/livekit-agents/livekit/agents/utils/audio.py @@ -7,89 +7,11 @@ from ..log import logger +# deprecated aliases AudioBuffer = Union[List[rtc.AudioFrame], rtc.AudioFrame] - -def combine_frames(buffer: AudioBuffer) -> rtc.AudioFrame: - """ - Combines one or more `rtc.AudioFrame` objects into a single `rtc.AudioFrame`. - - This function concatenates the audio data from multiple frames, ensuring that - all frames have the same sample rate and number of channels. It efficiently - merges the data by preallocating the necessary memory and copying the frame - data without unnecessary reallocations. - - Args: - buffer (AudioBuffer): A single `rtc.AudioFrame` or a list of `rtc.AudioFrame` - objects to be combined. - - Returns: - rtc.AudioFrame: A new `rtc.AudioFrame` containing the combined audio data. - - Raises: - ValueError: If the buffer is empty. - ValueError: If frames have differing sample rates. - ValueError: If frames have differing numbers of channels. - - Example: - >>> frame1 = rtc.AudioFrame( - ... data=b"\x01\x02", sample_rate=48000, num_channels=2, samples_per_channel=1 - ... ) - >>> frame2 = rtc.AudioFrame( - ... data=b"\x03\x04", sample_rate=48000, num_channels=2, samples_per_channel=1 - ... ) - >>> combined_frame = combine_frames([frame1, frame2]) - >>> combined_frame.data - b'\x01\x02\x03\x04' - >>> combined_frame.sample_rate - 48000 - >>> combined_frame.num_channels - 2 - >>> combined_frame.samples_per_channel - 2 - """ - if not isinstance(buffer, list): - return buffer - - if not buffer: - raise ValueError("buffer is empty") - - sample_rate = buffer[0].sample_rate - num_channels = buffer[0].num_channels - - total_data_length = 0 - total_samples_per_channel = 0 - - for frame in buffer: - if frame.sample_rate != sample_rate: - raise ValueError( - f"Sample rate mismatch: expected {sample_rate}, got {frame.sample_rate}" - ) - - if frame.num_channels != num_channels: - raise ValueError( - f"Channel count mismatch: expected {num_channels}, got {frame.num_channels}" - ) - - total_data_length += len(frame.data) - total_samples_per_channel += frame.samples_per_channel - - data = bytearray(total_data_length) - offset = 0 - for frame in buffer: - frame_data = frame.data.cast("b") - data[offset : offset + len(frame_data)] = frame_data - offset += len(frame_data) - - return rtc.AudioFrame( - data=data, - sample_rate=sample_rate, - num_channels=num_channels, - samples_per_channel=total_samples_per_channel, - ) - - -merge_frames = combine_frames +combine_frames = rtc.combine_audio_frames +merge_frames = rtc.combine_audio_frames class AudioByteStream: