playai: enable streaming TTS (#1340)

Co-authored-by: jayesh <[email protected]>
livekit · Jan 10, 2025 · 5eb41e9 · 5eb41e9
1 parent 2b6fb18
commit 5eb41e9
Show file tree

Hide file tree

Showing 2 changed files with 24 additions and 9 deletions.
diff --git a/.changeset/nice-stingrays-occur.md b/.changeset/nice-stingrays-occur.md
@@ -0,0 +1,5 @@
+---
+"livekit-plugins-playai": patch
+---
+
+playai: enable streaming TTS
diff --git a/livekit-plugins/livekit-plugins-playai/livekit/plugins/playai/tts.py b/livekit-plugins/livekit-plugins-playai/livekit/plugins/playai/tts.py
@@ -61,7 +61,7 @@ def __init__(
 
         super().__init__(
             capabilities=tts.TTSCapabilities(
-                streaming=False,
+                streaming=True,
             ),
             sample_rate=sample_rate,
             num_channels=1,
@@ -89,11 +89,9 @@ def __init__(
             word_tokenizer=word_tokenizer,
         )
 
-        # Initialize client
-        self._client = PlayHTAsyncClient(
-            user_id=user_id,
-            api_key=api_key,
-        )
+        self._api_key = api_key
+        self._user_id = user_id
+
         self._streams = weakref.WeakSet[SynthesizeStream]()
 
     def update_options(
@@ -159,7 +157,8 @@ def __init__(
         opts: _Options,
     ) -> None:
         super().__init__(tts=tts, input_text=input_text, conn_options=conn_options)
-        self._client = tts._client
+        self._api_key = tts._api_key
+        self._user_id = tts._user_id
         self._opts = opts
         self._config = self._opts.tts_options
         self._mp3_decoder = utils.codecs.Mp3StreamDecoder()
@@ -169,6 +168,10 @@ async def _run(self) -> None:
         bstream = utils.audio.AudioByteStream(
             sample_rate=self._config.sample_rate, num_channels=NUM_CHANNELS
         )
+        self._client = PlayHTAsyncClient(
+            user_id=self._user_id,
+            api_key=self._api_key,
+        )
 
         try:
             async for chunk in self._client.tts(
@@ -202,7 +205,8 @@ def __init__(
         opts: _Options,
     ):
         super().__init__(tts=tts, conn_options=conn_options)
-        self._client = tts._client
+        self._api_key = tts._api_key
+        self._user_id = tts._user_id
         self._opts = opts
         self._config = self._opts.tts_options
         self._segments_ch = utils.aio.Chan[tokenize.WordStream]()
@@ -231,8 +235,13 @@ def _send_last_frame(*, segment_id: str, is_final: bool) -> None:
                 last_frame = None
 
         input_task = asyncio.create_task(self._tokenize_input())
+
         try:
             text_stream = await self._create_text_stream()
+            self._client = PlayHTAsyncClient(
+                user_id=self._user_id,
+                api_key=self._api_key,
+            )
             async for chunk in self._client.stream_tts_input(
                 text_stream=text_stream,
                 options=self._config,
@@ -247,11 +256,12 @@ def _send_last_frame(*, segment_id: str, is_final: bool) -> None:
                 _send_last_frame(segment_id=segment_id, is_final=False)
                 last_frame = frame
             _send_last_frame(segment_id=segment_id, is_final=True)
+
         except Exception as e:
             raise APIConnectionError() from e
         finally:
             await utils.aio.gracefully_cancel(input_task)
-            self._client.close()
+            await self._client.close()
 
     @utils.log_exceptions(logger=logger)
     async def _tokenize_input(self):