diff --git a/.changeset/nice-stingrays-occur.md b/.changeset/nice-stingrays-occur.md new file mode 100644 index 000000000..715784430 --- /dev/null +++ b/.changeset/nice-stingrays-occur.md @@ -0,0 +1,5 @@ +--- +"livekit-plugins-playai": patch +--- + +playai: enable streaming TTS diff --git a/livekit-plugins/livekit-plugins-playai/livekit/plugins/playai/tts.py b/livekit-plugins/livekit-plugins-playai/livekit/plugins/playai/tts.py index 464f3f418..a8ac42fd0 100644 --- a/livekit-plugins/livekit-plugins-playai/livekit/plugins/playai/tts.py +++ b/livekit-plugins/livekit-plugins-playai/livekit/plugins/playai/tts.py @@ -61,7 +61,7 @@ def __init__( super().__init__( capabilities=tts.TTSCapabilities( - streaming=False, + streaming=True, ), sample_rate=sample_rate, num_channels=1, @@ -89,11 +89,9 @@ def __init__( word_tokenizer=word_tokenizer, ) - # Initialize client - self._client = PlayHTAsyncClient( - user_id=user_id, - api_key=api_key, - ) + self._api_key = api_key + self._user_id = user_id + self._streams = weakref.WeakSet[SynthesizeStream]() def update_options( @@ -159,7 +157,8 @@ def __init__( opts: _Options, ) -> None: super().__init__(tts=tts, input_text=input_text, conn_options=conn_options) - self._client = tts._client + self._api_key = tts._api_key + self._user_id = tts._user_id self._opts = opts self._config = self._opts.tts_options self._mp3_decoder = utils.codecs.Mp3StreamDecoder() @@ -169,6 +168,10 @@ async def _run(self) -> None: bstream = utils.audio.AudioByteStream( sample_rate=self._config.sample_rate, num_channels=NUM_CHANNELS ) + self._client = PlayHTAsyncClient( + user_id=self._user_id, + api_key=self._api_key, + ) try: async for chunk in self._client.tts( @@ -202,7 +205,8 @@ def __init__( opts: _Options, ): super().__init__(tts=tts, conn_options=conn_options) - self._client = tts._client + self._api_key = tts._api_key + self._user_id = tts._user_id self._opts = opts self._config = self._opts.tts_options self._segments_ch = utils.aio.Chan[tokenize.WordStream]() @@ -231,8 +235,13 @@ def _send_last_frame(*, segment_id: str, is_final: bool) -> None: last_frame = None input_task = asyncio.create_task(self._tokenize_input()) + try: text_stream = await self._create_text_stream() + self._client = PlayHTAsyncClient( + user_id=self._user_id, + api_key=self._api_key, + ) async for chunk in self._client.stream_tts_input( text_stream=text_stream, options=self._config, @@ -247,11 +256,12 @@ def _send_last_frame(*, segment_id: str, is_final: bool) -> None: _send_last_frame(segment_id=segment_id, is_final=False) last_frame = frame _send_last_frame(segment_id=segment_id, is_final=True) + except Exception as e: raise APIConnectionError() from e finally: await utils.aio.gracefully_cancel(input_task) - self._client.close() + await self._client.close() @utils.log_exceptions(logger=logger) async def _tokenize_input(self):