pipecat-ai · Vaibhav159 · Jan 16, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -51,6 +51,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Added `examples/foundational/26d-gemini-multimodal-live-text.py` which is
   using Gemini as TEXT modality and using another TTS provider for TTS process.
 
+- Added `text_chunker` as utils and using it in `ElevenLabsTTSService` to
+  support chunked text as per their doc which recommends sending text word by
+  word, also Possible fix of #983. 
+
 ### Changed
 
 - Modified `OpenAIAssistantContextAggregator` to support controlled completions

diff --git a/src/pipecat/services/elevenlabs.py b/src/pipecat/services/elevenlabs.py
@@ -29,6 +29,7 @@
 from pipecat.services.ai_services import WordTTSService
 from pipecat.services.websocket_service import WebsocketService
 from pipecat.transcriptions.language import Language
+from pipecat.utils.string import text_chunker
 
 # See .env.example for ElevenLabs configuration needed
 try:
@@ -407,7 +408,9 @@ async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
                     self._started = True
                     self._cumulative_time = 0
 
-                await self._send_text(text)
+                for text_chunk in text_chunker(text):
+                    # Ref: https://elevenlabs.io/docs/developer-guides/reducing-latency#3-use-the-input-streaming-websocket
+                    await self._send_text(text_chunk)
                 await self.start_tts_usage_metrics(text)
             except Exception as e:
                 logger.error(f"{self} error sending message: {e}")

diff --git a/src/pipecat/utils/string.py b/src/pipecat/utils/string.py
@@ -5,6 +5,7 @@
 #
 
 import re
+import typing
 
 ENDOFSENTENCE_PATTERN_STR = r"""
     (?<![A-Z])       # Negative lookbehind: not preceded by an uppercase letter (e.g., "U.S.A.")
@@ -23,3 +24,21 @@
 def match_endofsentence(text: str) -> int:
     match = ENDOFSENTENCE_PATTERN.search(text.rstrip())
     return match.end() if match else 0
+
+
+def text_chunker(chunks: str) -> str:
+    """Used during input streaming to chunk text blocks and set last char to space"""
+    splitters = (".", ",", "?", "!", ";", ":", "—", "-", "(", ")", "[", "]", "}", " ")
+    buffer = ""
+    for text in chunks:
+        if buffer.endswith(splitters):
+            yield buffer if buffer.endswith(" ") else buffer + " "
+            buffer = text
+        elif text.startswith(splitters):
+            output = buffer + text[0]
+            yield output if output.endswith(" ") else output + " "
+            buffer = text[1:]
+        else:
+            buffer += text
+    if buffer != "":
+        yield buffer + " "