From 2fffbe22748b0bc717f4474cff976a4253385045 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Th=C3=A9o=20Monnom?= <theo.8bits@gmail.com>
Date: Fri, 4 Oct 2024 15:42:07 -0700
Subject: [PATCH] use rtc.combine_audio_frames (#841)

---
 .changeset/breezy-houses-remember.md         |  5 ++
 livekit-agents/livekit/agents/utils/audio.py | 84 +-------------------
 2 files changed, 8 insertions(+), 81 deletions(-)
 create mode 100644 .changeset/breezy-houses-remember.md

diff --git a/.changeset/breezy-houses-remember.md b/.changeset/breezy-houses-remember.md
new file mode 100644
index 000000000..59bb0889c
--- /dev/null
+++ b/.changeset/breezy-houses-remember.md
@@ -0,0 +1,5 @@
+---
+"livekit-agents": patch
+---
+
+use rtc.combine_audio_frames
diff --git a/livekit-agents/livekit/agents/utils/audio.py b/livekit-agents/livekit/agents/utils/audio.py
index 33ab8571f..1497aee67 100644
--- a/livekit-agents/livekit/agents/utils/audio.py
+++ b/livekit-agents/livekit/agents/utils/audio.py
@@ -7,89 +7,11 @@
 
 from ..log import logger
 
+# deprecated aliases
 AudioBuffer = Union[List[rtc.AudioFrame], rtc.AudioFrame]
 
-
-def combine_frames(buffer: AudioBuffer) -> rtc.AudioFrame:
-    """
-    Combines one or more `rtc.AudioFrame` objects into a single `rtc.AudioFrame`.
-
-    This function concatenates the audio data from multiple frames, ensuring that
-    all frames have the same sample rate and number of channels. It efficiently
-    merges the data by preallocating the necessary memory and copying the frame
-    data without unnecessary reallocations.
-
-    Args:
-        buffer (AudioBuffer): A single `rtc.AudioFrame` or a list of `rtc.AudioFrame`
-            objects to be combined.
-
-    Returns:
-        rtc.AudioFrame: A new `rtc.AudioFrame` containing the combined audio data.
-
-    Raises:
-        ValueError: If the buffer is empty.
-        ValueError: If frames have differing sample rates.
-        ValueError: If frames have differing numbers of channels.
-
-    Example:
-        >>> frame1 = rtc.AudioFrame(
-        ...     data=b"\x01\x02", sample_rate=48000, num_channels=2, samples_per_channel=1
-        ... )
-        >>> frame2 = rtc.AudioFrame(
-        ...     data=b"\x03\x04", sample_rate=48000, num_channels=2, samples_per_channel=1
-        ... )
-        >>> combined_frame = combine_frames([frame1, frame2])
-        >>> combined_frame.data
-        b'\x01\x02\x03\x04'
-        >>> combined_frame.sample_rate
-        48000
-        >>> combined_frame.num_channels
-        2
-        >>> combined_frame.samples_per_channel
-        2
-    """
-    if not isinstance(buffer, list):
-        return buffer
-
-    if not buffer:
-        raise ValueError("buffer is empty")
-
-    sample_rate = buffer[0].sample_rate
-    num_channels = buffer[0].num_channels
-
-    total_data_length = 0
-    total_samples_per_channel = 0
-
-    for frame in buffer:
-        if frame.sample_rate != sample_rate:
-            raise ValueError(
-                f"Sample rate mismatch: expected {sample_rate}, got {frame.sample_rate}"
-            )
-
-        if frame.num_channels != num_channels:
-            raise ValueError(
-                f"Channel count mismatch: expected {num_channels}, got {frame.num_channels}"
-            )
-
-        total_data_length += len(frame.data)
-        total_samples_per_channel += frame.samples_per_channel
-
-    data = bytearray(total_data_length)
-    offset = 0
-    for frame in buffer:
-        frame_data = frame.data.cast("b")
-        data[offset : offset + len(frame_data)] = frame_data
-        offset += len(frame_data)
-
-    return rtc.AudioFrame(
-        data=data,
-        sample_rate=sample_rate,
-        num_channels=num_channels,
-        samples_per_channel=total_samples_per_channel,
-    )
-
-
-merge_frames = combine_frames
+combine_frames = rtc.combine_audio_frames
+merge_frames = rtc.combine_audio_frames
 
 
 class AudioByteStream: