Skip to content

Commit

Permalink
use rtc.combine_audio_frames (#841)
Browse files Browse the repository at this point in the history
  • Loading branch information
theomonnom authored Oct 4, 2024
1 parent ab0f446 commit 2fffbe2
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 81 deletions.
5 changes: 5 additions & 0 deletions .changeset/breezy-houses-remember.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"livekit-agents": patch
---

use rtc.combine_audio_frames
84 changes: 3 additions & 81 deletions livekit-agents/livekit/agents/utils/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,89 +7,11 @@

from ..log import logger

# deprecated aliases
AudioBuffer = Union[List[rtc.AudioFrame], rtc.AudioFrame]


def combine_frames(buffer: AudioBuffer) -> rtc.AudioFrame:
"""
Combines one or more `rtc.AudioFrame` objects into a single `rtc.AudioFrame`.
This function concatenates the audio data from multiple frames, ensuring that
all frames have the same sample rate and number of channels. It efficiently
merges the data by preallocating the necessary memory and copying the frame
data without unnecessary reallocations.
Args:
buffer (AudioBuffer): A single `rtc.AudioFrame` or a list of `rtc.AudioFrame`
objects to be combined.
Returns:
rtc.AudioFrame: A new `rtc.AudioFrame` containing the combined audio data.
Raises:
ValueError: If the buffer is empty.
ValueError: If frames have differing sample rates.
ValueError: If frames have differing numbers of channels.
Example:
>>> frame1 = rtc.AudioFrame(
... data=b"\x01\x02", sample_rate=48000, num_channels=2, samples_per_channel=1
... )
>>> frame2 = rtc.AudioFrame(
... data=b"\x03\x04", sample_rate=48000, num_channels=2, samples_per_channel=1
... )
>>> combined_frame = combine_frames([frame1, frame2])
>>> combined_frame.data
b'\x01\x02\x03\x04'
>>> combined_frame.sample_rate
48000
>>> combined_frame.num_channels
2
>>> combined_frame.samples_per_channel
2
"""
if not isinstance(buffer, list):
return buffer

if not buffer:
raise ValueError("buffer is empty")

sample_rate = buffer[0].sample_rate
num_channels = buffer[0].num_channels

total_data_length = 0
total_samples_per_channel = 0

for frame in buffer:
if frame.sample_rate != sample_rate:
raise ValueError(
f"Sample rate mismatch: expected {sample_rate}, got {frame.sample_rate}"
)

if frame.num_channels != num_channels:
raise ValueError(
f"Channel count mismatch: expected {num_channels}, got {frame.num_channels}"
)

total_data_length += len(frame.data)
total_samples_per_channel += frame.samples_per_channel

data = bytearray(total_data_length)
offset = 0
for frame in buffer:
frame_data = frame.data.cast("b")
data[offset : offset + len(frame_data)] = frame_data
offset += len(frame_data)

return rtc.AudioFrame(
data=data,
sample_rate=sample_rate,
num_channels=num_channels,
samples_per_channel=total_samples_per_channel,
)


merge_frames = combine_frames
combine_frames = rtc.combine_audio_frames
merge_frames = rtc.combine_audio_frames


class AudioByteStream:
Expand Down

0 comments on commit 2fffbe2

Please sign in to comment.