Skip to content

Commit

Permalink
Merge branch 'main' into theo/more-metrics-errors
Browse files Browse the repository at this point in the history
  • Loading branch information
theomonnom committed Oct 28, 2024
2 parents 259818a + ca3e47a commit 44c519a
Show file tree
Hide file tree
Showing 12 changed files with 101 additions and 25 deletions.
5 changes: 5 additions & 0 deletions .changeset/eleven-items-love.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"livekit-plugins-openai": patch
---

Groq integration with Whisper-compatible STT endpoints
15 changes: 15 additions & 0 deletions .changeset/hot-toys-wash.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
---
"livekit-plugins-elevenlabs": patch
"livekit-plugins-anthropic": patch
"livekit-plugins-cartesia": patch
"livekit-plugins-deepgram": patch
"livekit-plugins-browser": patch
"livekit-plugins-google": patch
"livekit-plugins-openai": patch
"livekit-plugins-playht": patch
"livekit-plugins-silero": patch
"livekit-plugins-azure": patch
"livekit-agents": patch
---

pipelineagent: expose timing metrics & api errors wip
5 changes: 5 additions & 0 deletions .changeset/serious-worms-tap.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"livekit-agents": patch
---

Fix stack dump on room shutdown
5 changes: 5 additions & 0 deletions livekit-agents/livekit/agents/ipc/job_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import pickle
import queue
import socket
import sys
import threading
from dataclasses import dataclass
from typing import Any, Callable, Optional
Expand Down Expand Up @@ -47,6 +48,10 @@ def _forward_logs(self):

def emit(self, record: logging.LogRecord) -> None:
try:
# Check if Python is shutting down
if sys.is_finalizing():
return

# from https://github.com/python/cpython/blob/91b7f2e7f6593acefda4fa860250dd87d6f849bf/Lib/logging/handlers.py#L1453
msg = self.format(record)
record = copy.copy(record)
Expand Down
16 changes: 13 additions & 3 deletions livekit-agents/livekit/agents/pipeline/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,21 +25,31 @@ class PipelineSTTMetrics(STTMetrics, TypedDict):
class PipelineEOUMetrics(TypedDict):
type: Literal["eou_metrics"]
sequence_id: str
"""Unique identifier shared across different metrics to combine related STT, LLM, and TTS metrics."""

timestamp: float
duration: float
"""Timestamp of when the event was recorded."""

end_of_utterance_delay: float
"""Amount of time between the end of speech from VAD and the decision to end the user's turn."""

transcription_delay: float
"""time it took to get the transcript after the end of the user's speech
could be 0 if the transcript was already available"""
"""Time taken to obtain the transcript after the end of the user's speech.
May be 0 if the transcript was already available.
"""


class PipelineLLMMetrics(LLMMetrics, TypedDict):
type: Literal["llm_metrics"]
sequence_id: str
"""Unique identifier shared across different metrics to combine related STT, LLM, and TTS metrics."""


class PipelineTTSMetrics(TTSMetrics, TypedDict):
type: Literal["tts_metrics"]
sequence_id: str
"""Unique identifier shared across different metrics to combine related STT, LLM, and TTS metrics."""


class PipelineVADMetrics(VADMetrics, TypedDict):
Expand Down
2 changes: 1 addition & 1 deletion livekit-agents/livekit/agents/pipeline/pipeline_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -935,7 +935,7 @@ def _validate_reply_if_possible(self) -> None:
"type": "eou_metrics",
"timestamp": time.time(),
"sequence_id": self._pending_agent_reply.id,
"duration": time_since_last_speech,
"end_of_utterance_delay": time_since_last_speech,
"transcription_delay": transcription_delay,
}
self.emit("metrics_collected", eou_metrics)
Expand Down
2 changes: 1 addition & 1 deletion livekit-agents/livekit/agents/stt/stt.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@


class STTMetrics(TypedDict):
request_id: str # can be empty when just emitting usage (in streaming mode)
request_id: str # can be empty when just emitting usage (in streaming mode)
timestamp: float
duration: float
label: str
Expand Down
12 changes: 6 additions & 6 deletions livekit-agents/livekit/agents/vad.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

class VADMetrics(TypedDict):
timestamp: float
inference_duration_avg: float
inference_duration_total: float
inference_count: int
label: str

Expand Down Expand Up @@ -114,24 +114,24 @@ async def _main_task(self) -> None: ...
async def _metrics_monitor_task(self, event_aiter: AsyncIterable[VADEvent]) -> None:
"""Task used to collect metrics"""

inference_duration_sum = 0.0
inference_duration_total = 0.0
inference_count = 0

async for ev in event_aiter:
if ev.type == VADEventType.INFERENCE_DONE:
inference_duration_sum += ev.inference_duration
inference_duration_total += ev.inference_duration
inference_count += 1

if inference_count >= 1 / self._vad.capabilities.update_interval:
vad_metrics: VADMetrics = {
"timestamp": time.time(),
"inference_duration_avg": inference_duration_sum
/ inference_count,
"inference_duration_total": inference_duration_total,
"inference_count": inference_count,
"label": self._vad._label,
}
self._vad.emit("metrics_collected", vad_metrics)
inference_duration_sum = 0.0

inference_duration_total = 0.0
inference_count = 0

def push_frame(self, frame: rtc.AudioFrame) -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -473,7 +473,7 @@ void DevRenderer::Run() {
}

// Render the browser tex
ImGui::Image((void*)(intptr_t)data.texture_id,
ImGui::Image((ImTextureID)(intptr_t)data.texture_id,
ImVec2((float)data.view_width, (float)data.view_height));
}
ImGui::End();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@
"gemma2-9b-it",
]

GroqAudioModels = Literal[
"whisper-large-v3", "distil-whisper-large-v3-en", "whisper-large-v3-turbo"
]

DeepSeekChatModels = Literal[
"deepseek-coder",
"deepseek-chat",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import dataclasses
import io
import os
import wave
from dataclasses import dataclass

Expand All @@ -26,14 +27,14 @@

import openai

from .models import WhisperModels
from .models import GroqAudioModels, WhisperModels


@dataclass
class _STTOptions:
language: str
detect_language: bool
model: WhisperModels
model: WhisperModels | str


class STT(stt.STT):
Expand All @@ -42,7 +43,7 @@ def __init__(
*,
language: str = "en",
detect_language: bool = False,
model: WhisperModels = "whisper-1",
model: WhisperModels | str = "whisper-1",
base_url: str | None = None,
api_key: str | None = None,
client: openai.AsyncClient | None = None,
Expand Down Expand Up @@ -80,6 +81,38 @@ def __init__(
),
)

@staticmethod
def with_groq(
*,
model: GroqAudioModels | str = "whisper-large-v3-turbo",
api_key: str | None = None,
base_url: str | None = "https://api.groq.com/openai/v1",
client: openai.AsyncClient | None = None,
language: str = "en",
detect_language: bool = False,
) -> STT:
"""
Create a new instance of Groq STT.
``api_key`` must be set to your Groq API key, either using the argument or by setting
the ``GROQ_API_KEY`` environmental variable.
"""

# Use environment variable if API key is not provided
api_key = api_key or os.environ.get("GROQ_API_KEY")
if api_key is None:
raise ValueError("Groq API key is required")

# Instantiate and return a configured STT instance
return STT(
model=model,
api_key=api_key,
base_url=base_url,
client=client,
language=language,
detect_language=detect_language,
)

def _sanitize_options(self, *, language: str | None = None) -> _STTOptions:
config = dataclasses.replace(self._opts)
config.language = language or config.language
Expand Down
19 changes: 9 additions & 10 deletions tests/test_llm.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from __future__ import annotations

import asyncio
import uuid
from enum import Enum
from typing import Annotated, Callable, Optional

Expand Down Expand Up @@ -90,15 +89,15 @@ def test_hashable_typeinfo():

LLMS: list[Callable[[], llm.LLM]] = [
lambda: openai.LLM(),
lambda: openai.beta.AssistantLLM(
assistant_opts=openai.beta.AssistantOptions(
create_options=openai.beta.AssistantCreateOptions(
name=f"test-{uuid.uuid4()}",
instructions="You are a basic assistant",
model="gpt-4o",
)
)
),
# lambda: openai.beta.AssistantLLM(
# assistant_opts=openai.beta.AssistantOptions(
# create_options=openai.beta.AssistantCreateOptions(
# name=f"test-{uuid.uuid4()}",
# instructions="You are a basic assistant",
# model="gpt-4o",
# )
# )
# ),
# anthropic.LLM(),
]

Expand Down

0 comments on commit 44c519a

Please sign in to comment.