diff --git a/examples/voice-pipeline-agent/custom_pronunciation.py b/examples/voice-pipeline-agent/custom_pronunciation.py index e6ff7cd52..eed16ef1f 100644 --- a/examples/voice-pipeline-agent/custom_pronunciation.py +++ b/examples/voice-pipeline-agent/custom_pronunciation.py @@ -4,7 +4,7 @@ from dotenv import load_dotenv from livekit.agents import AutoSubscribe, JobContext, WorkerOptions, cli, llm, tokenize -from livekit.agents.voice_assistant import VoiceAssistant +from livekit.agents.pipeline import VoicePipelineAgent from livekit.plugins import cartesia, deepgram, openai, silero load_dotenv() @@ -21,7 +21,7 @@ async def entrypoint(ctx: JobContext): await ctx.connect(auto_subscribe=AutoSubscribe.AUDIO_ONLY) - def _before_tts_cb(assistant: VoiceAssistant, text: str | AsyncIterable[str]): + def _before_tts_cb(agent: VoicePipelineAgent, text: str | AsyncIterable[str]): # The TTS is incorrectly pronouncing "LiveKit", so we'll replace it with a phonetic # spelling return tokenize.utils.replace_words( @@ -32,7 +32,7 @@ def _before_tts_cb(assistant: VoiceAssistant, text: str | AsyncIterable[str]): # recognized with the STT deepgram_stt = deepgram.STT(keywords=[("LiveKit", 3.5)]) - assistant = VoiceAssistant( + agent = VoicePipelineAgent( vad=silero.VAD.load(), stt=deepgram_stt, llm=openai.LLM(), @@ -40,9 +40,9 @@ def _before_tts_cb(assistant: VoiceAssistant, text: str | AsyncIterable[str]): chat_ctx=initial_ctx, before_tts_cb=_before_tts_cb, ) - assistant.start(ctx.room) + agent.start(ctx.room) - await assistant.say("Hey, LiveKit is awesome!", allow_interruptions=True) + await agent.say("Hey, LiveKit is awesome!", allow_interruptions=True) if __name__ == "__main__": diff --git a/examples/voice-pipeline-agent/function_calling_weather.py b/examples/voice-pipeline-agent/function_calling_weather.py index 82155cce1..978294114 100644 --- a/examples/voice-pipeline-agent/function_calling_weather.py +++ b/examples/voice-pipeline-agent/function_calling_weather.py @@ -11,7 +11,7 @@ cli, llm, ) -from livekit.agents.voice_assistant import VoiceAssistant +from livekit.agents.pipeline import VoicePipelineAgent from livekit.plugins import deepgram, openai, silero load_dotenv() @@ -61,7 +61,7 @@ async def entrypoint(ctx: JobContext): role="system", ) participant = await ctx.wait_for_participant() - assistant = VoiceAssistant( + agent = VoicePipelineAgent( vad=ctx.proc.userdata["vad"], stt=deepgram.STT(), llm=openai.LLM(), @@ -70,8 +70,8 @@ async def entrypoint(ctx: JobContext): chat_ctx=initial_chat_ctx, ) # Start the assistant. This will automatically publish a microphone track and listen to the participant. - assistant.start(ctx.room, participant) - await assistant.say( + agent.start(ctx.room, participant) + await agent.say( "Hello from the weather station. Would you like to know the weather? If so, tell me your location." ) diff --git a/examples/voice-pipeline-agent/minimal_assistant.py b/examples/voice-pipeline-agent/minimal_assistant.py index c1aec2a44..e5ea9b64a 100644 --- a/examples/voice-pipeline-agent/minimal_assistant.py +++ b/examples/voice-pipeline-agent/minimal_assistant.py @@ -11,7 +11,7 @@ cli, llm, ) -from livekit.agents.voice_assistant import VoiceAssistant +from livekit.agents.pipeline import VoicePipelineAgent from livekit.plugins import deepgram, openai, silero load_dotenv() @@ -43,7 +43,7 @@ async def entrypoint(ctx: JobContext): # use a model optimized for telephony dg_model = "nova-2-phonecall" - assistant = VoiceAssistant( + agent = VoicePipelineAgent( vad=ctx.proc.userdata["vad"], stt=deepgram.STT(model=dg_model), llm=openai.LLM(), @@ -51,24 +51,24 @@ async def entrypoint(ctx: JobContext): chat_ctx=initial_ctx, ) - assistant.start(ctx.room, participant) + agent.start(ctx.room, participant) # listen to incoming chat messages, only required if you'd like the agent to # answer incoming messages from Chat chat = rtc.ChatManager(ctx.room) async def answer_from_text(txt: str): - chat_ctx = assistant.chat_ctx.copy() + chat_ctx = agent.chat_ctx.copy() chat_ctx.append(role="user", text=txt) - stream = assistant.llm.chat(chat_ctx=chat_ctx) - await assistant.say(stream) + stream = agent.llm.chat(chat_ctx=chat_ctx) + await agent.say(stream) @chat.on("message_received") def on_chat_received(msg: rtc.ChatMessage): if msg.message: asyncio.create_task(answer_from_text(msg.message)) - await assistant.say("Hey, how can I help you today?", allow_interruptions=True) + await agent.say("Hey, how can I help you today?", allow_interruptions=True) if __name__ == "__main__": diff --git a/examples/voice-pipeline-agent/save_chatctx.py b/examples/voice-pipeline-agent/save_chatctx.py index d6b1b6ac6..67c53f4ff 100644 --- a/examples/voice-pipeline-agent/save_chatctx.py +++ b/examples/voice-pipeline-agent/save_chatctx.py @@ -5,7 +5,7 @@ from dotenv import load_dotenv from livekit import rtc from livekit.agents import AutoSubscribe, JobContext, WorkerOptions, cli, llm -from livekit.agents.voice_assistant import VoiceAssistant +from livekit.agents.pipeline import VoicePipelineAgent from livekit.plugins import deepgram, openai, silero load_dotenv() @@ -22,24 +22,24 @@ async def entrypoint(ctx: JobContext): await ctx.connect(auto_subscribe=AutoSubscribe.AUDIO_ONLY) - assistant = VoiceAssistant( + agent = VoicePipelineAgent( vad=silero.VAD.load(), stt=deepgram.STT(), llm=openai.LLM(), tts=openai.TTS(), chat_ctx=initial_ctx, ) - assistant.start(ctx.room) + agent.start(ctx.room) # listen to incoming chat messages, only required if you'd like the agent to # answer incoming messages from Chat chat = rtc.ChatManager(ctx.room) async def answer_from_text(txt: str): - chat_ctx = assistant.chat_ctx.copy() + chat_ctx = agent.chat_ctx.copy() chat_ctx.append(role="user", text=txt) - stream = assistant.llm.chat(chat_ctx=chat_ctx) - await assistant.say(stream) + stream = agent.llm.chat(chat_ctx=chat_ctx) + await agent.say(stream) @chat.on("message_received") def on_chat_received(msg: rtc.ChatMessage): @@ -48,7 +48,7 @@ def on_chat_received(msg: rtc.ChatMessage): log_queue = asyncio.Queue() - @assistant.on("user_speech_committed") + @agent.on("user_speech_committed") def on_user_speech_committed(msg: llm.ChatMessage): # convert string lists to strings, drop images if isinstance(msg.content, list): @@ -57,7 +57,7 @@ def on_user_speech_committed(msg: llm.ChatMessage): ) log_queue.put_nowait(f"[{datetime.now()}] USER:\n{msg.content}\n\n") - @assistant.on("agent_speech_committed") + @agent.on("agent_speech_committed") def on_agent_speech_committed(msg: llm.ChatMessage): log_queue.put_nowait(f"[{datetime.now()}] AGENT:\n{msg.content}\n\n") @@ -77,7 +77,7 @@ async def finish_queue(): ctx.add_shutdown_callback(finish_queue) - await assistant.say("Hey, how can I help you today?", allow_interruptions=True) + await agent.say("Hey, how can I help you today?", allow_interruptions=True) if __name__ == "__main__": diff --git a/examples/voice-pipeline-agent/simple-rag/assistant.py b/examples/voice-pipeline-agent/simple-rag/assistant.py index 1bbcda056..8baf551b1 100644 --- a/examples/voice-pipeline-agent/simple-rag/assistant.py +++ b/examples/voice-pipeline-agent/simple-rag/assistant.py @@ -1,7 +1,7 @@ import pickle from livekit.agents import AutoSubscribe, JobContext, WorkerOptions, cli, llm -from livekit.agents.voice_assistant import VoiceAssistant +from livekit.agents.pipeline import VoicePipelineAgent from livekit.plugins import deepgram, openai, rag, silero annoy_index = rag.annoy.AnnoyIndex.load("vdb_data") # see build_data.py @@ -12,7 +12,9 @@ async def entrypoint(ctx: JobContext): - async def _enrich_with_rag(assistant: VoiceAssistant, chat_ctx: llm.ChatContext): + async def _enrich_with_rag( + assistant: VoicePipelineAgent, chat_ctx: llm.ChatContext + ): # locate the last user message and use it to query the RAG model # to get the most relevant paragraph # then provide that as additional context to the LLM @@ -40,7 +42,7 @@ async def _enrich_with_rag(assistant: VoiceAssistant, chat_ctx: llm.ChatContext) await ctx.connect(auto_subscribe=AutoSubscribe.AUDIO_ONLY) - assistant = VoiceAssistant( + assistant = VoicePipelineAgent( chat_ctx=initial_ctx, vad=silero.VAD.load(), stt=deepgram.STT(),