Skip to content

Commit

Permalink
Merge branch 'livekit:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
tinalenguyen authored Dec 31, 2024
2 parents 10c17ab + 1ab8d88 commit b718710
Show file tree
Hide file tree
Showing 135 changed files with 2,880 additions and 811 deletions.
5 changes: 0 additions & 5 deletions .changeset/famous-points-tickle.md

This file was deleted.

9 changes: 0 additions & 9 deletions .changeset/great-lizards-pump.md

This file was deleted.

5 changes: 0 additions & 5 deletions .changeset/strange-snakes-hug.md

This file was deleted.

9 changes: 0 additions & 9 deletions .changeset/tiny-papayas-film.md

This file was deleted.

5 changes: 0 additions & 5 deletions .changeset/warm-pillows-grow.md

This file was deleted.

1 change: 1 addition & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -80,4 +80,5 @@ jobs:
-p livekit.plugins.azure \
-p livekit.plugins.anthropic \
-p livekit.plugins.fal \
-p livekit.plugins.playai \
-p livekit.plugins.assemblyai
10 changes: 5 additions & 5 deletions .github/workflows/publish-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,7 @@ jobs:
with:
submodules: true
lfs: true
env:
GITHUB_TOKEN: ${{ secrets.CHANGESETS_PUSH_PAT }}
ssh-key: ${{ secrets.CHANGESETS_PUSH_DEPLOY_KEY }}

- uses: pnpm/action-setup@v4
- name: Use Node.js 20
Expand Down Expand Up @@ -84,7 +83,7 @@ jobs:
uses: livekit/agents/.github/workflows/build-package.yml@main
with:
package: ${{ matrix.package.name }}
artifact_name: python-package-distributions
artifact_name: python-package-dist-${{matrix.package.name}}

publish:
needs:
Expand All @@ -98,8 +97,9 @@ jobs:
- name: Download all the dists
uses: actions/download-artifact@v4
with:
name: python-package-distributions
path: dist/
path: dist
pattern: python-package-dist-*
merge-multiple: true

- name: Publish package
uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
Expand Down
35 changes: 12 additions & 23 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@ jobs:
strategy:
fail-fast: false
matrix:
os:
[
macos-14-large,
os: [
# disabled Intel Macs due to pytorch 2.3+ not supporting it
# macos-14-large,
macos-14,
windows-2019,
ubuntu-20.04,
namespace-profile-default-arm64,
]
python_version: ["3.12"]
python_version: ["3.9", "3.12"]
test_group: ["base"]
include:
# Include llm, stt, and tts tests only on Ubuntu 20.04 with Python 3.9
Expand Down Expand Up @@ -60,11 +60,8 @@ jobs:
${{ runner.os }}-cache
- uses: actions/setup-python@v5
# brew will install python as part of ffmpeg install on MacOS
# installing system Python could cause a conflict with `Could not symlink bin/idle3`
if: ${{ matrix.os != 'macos-14-large' }}
with:
python-version: "3.12"
python-version: ${{ matrix.python_version }}
cache: "pip"

- name: Install ffmpeg (Linux)
Expand All @@ -80,6 +77,7 @@ jobs:
sudo dpkg -i libssl1.1_1.1.1-1ubuntu2.1_arm64.deb
sudo dpkg -i libssl-dev_1.1.1-1ubuntu2.1_arm64.deb
- name: Install ffmpeg (macOS)
if: ${{ startsWith(matrix.os, 'macos') }}
run: brew install ffmpeg
Expand All @@ -91,20 +89,9 @@ jobs:
- name: Install packages
shell: bash
run: |
pip3 install pytest pytest-asyncio pytest-timeout './livekit-agents[codecs]' psutil
pip3 install -r ./tests/test-requirements.txt
pip3 install ./livekit-agents \
./livekit-plugins/livekit-plugins-openai \
./livekit-plugins/livekit-plugins-deepgram \
./livekit-plugins/livekit-plugins-google \
./livekit-plugins/livekit-plugins-nltk \
./livekit-plugins/livekit-plugins-silero \
./livekit-plugins/livekit-plugins-elevenlabs \
./livekit-plugins/livekit-plugins-cartesia \
./livekit-plugins/livekit-plugins-azure \
./livekit-plugins/livekit-plugins-anthropic \
./livekit-plugins/livekit-plugins-assemblyai \
./livekit-plugins/livekit-plugins-fal
pip install pytest pytest-asyncio pytest-timeout './livekit-agents[codecs]' psutil
pip install -r ./tests/test-requirements.txt
./livekit-plugins/install_local.sh
- name: Run tests
shell: bash
Expand All @@ -123,6 +110,8 @@ jobs:
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
ASSEMBLYAI_API_KEY: ${{ secrets.ASSEMBLYAI_API_KEY }}
FAL_KEY: ${{ secrets.FAL_KEY }}
PLAYHT_API_KEY: ${{ secrets.PLAYHT_API_KEY }}
PLAYHT_USER_ID: ${{ secrets.PLAYHT_USER_ID }}
GOOGLE_APPLICATION_CREDENTIALS: google.json
PYTEST_ADDOPTS: "--color=yes"
working-directory: tests
Expand All @@ -131,7 +120,7 @@ jobs:
case "${{ matrix.test_group }}" in
base)
test_files="test_aio.py test_tokenizer.py test_vad.py test_ipc.py test_tts_fallback.py test_stt_fallback.py test_message_change.py"
test_files="test_aio.py test_tokenizer.py test_vad.py test_ipc.py test_tts_fallback.py test_stt_fallback.py test_message_change.py test_build_func_desc.py test_create_func.py"
;;
llm)
test_files="test_llm.py"
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ Documentation on the framework and how to use it can be found [here](https://doc
| Voice agent using the new OpenAI Realtime API | [demo](https://playground.livekit.io) | [code](https://github.com/livekit-examples/realtime-playground) |
| Super fast voice agent using Cerebras hosted Llama 3.1 | [demo](https://cerebras.vercel.app) | [code](https://github.com/dsa/fast-voice-assistant/) |
| Voice agent using Cartesia's Sonic model | [demo](https://cartesia-assistant.vercel.app/) | [code](https://github.com/livekit-examples/cartesia-voice-agent) |
| Agent that looks up the current weather via function call | N/A | [code](https://github.com/livekit-examples/cartesia-voice-agent) |
| Agent that looks up the current weather via function call | N/A | [code](https://github.com/livekit/agents/blob/main/examples/voice-pipeline-agent/function_calling_weather.py) |
| Voice Agent using Gemini 2.0 Flash | N/A | [code](https://github.com/livekit-examples/voice-pipeline-agent/gemini_voice_agent.py) |
| Voice agent with custom turn-detection model | N/A | [code](https://github.com/livekit/agents/blob/main/examples/voice-pipeline-agent/turn_detector.py) |
| Voice agent that performs a RAG-based lookup | N/A | [code](https://github.com/livekit/agents/tree/main/examples/voice-pipeline-agent/simple-rag) |
Expand Down
41 changes: 41 additions & 0 deletions examples/hive-moderation-agent/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# LiveKit realtime moderation agent using Hive

This is an agent that performs visual moderation of every participant's video in a room. It does this moderation using the Visual Content Moderation model from [Hive](https://thehive.ai) [[docs](https://docs.thehive.ai/docs/visual-content-moderation#visual-content-moderation)].

## Prerequisites

Before running this agent, you'll need:

1. A LiveKit Cloud project (or a self-hosted LiveKit server).
2. An API key from Hive to access the above mentioned model.

## Configuration

Currently, this agent is configured entirely from the `agent.py` source code and the environment.

### Environment Variables

| configuration | description | example value |
|---------------|-------------|---------------|
| `LIVEKIT_URL` | Your LiveKit URL | `wss://test-abc123de.livekit.cloud` |
| `LIVEKIT_API_KEY` | Your LiveKit API key | |
| `LIVEKIT_API_SECRET` | Your LiveKit API secret | |
| `HIVE_API_KEY` | The API key from Hive to access the `Visual Content Moderation` model | `abc1deFgHIjK23KLMNOp45QrsTuv6wx8` |

### Code

| configuration | description | example value |
|---------------|-------------|---------------|
| `MOD_FRAME_INTERVAL` | Minimum number of seconds to wait between frames | 5.0 |
| `HIVE_HEADERS` | The headers to send with every request to the Hive API | `{}` |
| `CONFIDENCE_THRESHOLD` | The minimum score Hive's moderation class must meet before it is considered a problem | 0.9 |

## Running

Run this code like you would any other [LiveKit agent](https://docs.livekit.io/agents/build/anatomy/#starting-the-worker):

```
python3 agent.py start
```

Once running, the agent will join all new LiveKit rooms by default and begin moderation.
163 changes: 163 additions & 0 deletions examples/hive-moderation-agent/agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
"""
LiveKit agent that connects to a room and performs visual moderation on the video
of all participants using the Visual Content Moderation model from Hive
(https://docs.thehive.ai/docs/visual-content-moderation#visual-content-moderation).
The agent periodically sends a frame from the participant's video to Hive's API
for a moderation check. If the results of that check show a confidence score
of 0.9 or higher for any of the positive classes, it logs the result and adds a
message to the room's chat. This can easily be extended to take additional
actions like removing a participant or ending a livestream, etc.
"""

import asyncio
import logging
import os
import time
from io import BytesIO

import aiohttp
from dotenv import load_dotenv
from hive_data_classes import HiveResponse, from_dict
from livekit import agents, rtc
from PIL import Image

load_dotenv()

MOD_FRAME_INTERVAL = 5.0 # check 1 frame every 5 seconds
"""
How often to check a frame (in seconds)
"""

HIVE_HEADERS = {
"Authorization": f"Token {os.getenv('HIVE_API_KEY')}",
"accept": "application/json",
}
"""
The default headers included with every request to thehive.ai
"""

CONFIDENCE_THRESHOLD = 0.9
"""
THe threshold level for scores returned by thehive.ai. See details in this doc:
https://docs.thehive.ai/docs/visual-content-moderation#choosing-thresholds-for-visual-moderation
"""


logger = logging.getLogger("hive-moderation-agent")
logger.setLevel(logging.INFO)


async def request_fnc(req: agents.JobRequest):
"""
The request handler for the agent. We use this to set the name of the
agent that is displayed to users
"""
# accept the job request and name the agent participant so users know what this is
await req.accept(
name="Moderator",
identity="hive-moderator",
)


async def entrypoint(ctx: agents.JobContext):
"""
The entrypoint of the agent. This is called every time the moderator
agent joins a room.
"""

# connect to the room and automatically subscribe to all participants' video
await ctx.connect(auto_subscribe=agents.AutoSubscribe.VIDEO_ONLY)
chat = rtc.ChatManager(ctx.room)

@ctx.room.on("track_subscribed")
def on_track_subscribed(
track: rtc.Track,
_publication: rtc.TrackPublication,
participant: rtc.RemoteParticipant,
):
"""
Event handler for video tracks. We automatically subscribe to all video
tracks when a participant joins the room. This event is triggered
once we have completed subscription to that video track.
This creates a backgrond task to process frames from each track
"""
asyncio.create_task(process_track(participant, track))

async def process_track(participant: rtc.RemoteParticipant, track: rtc.VideoTrack):
"""
This function is running in a background task once for each video track
(i.e., once for each participant). It handles processing a frame
from the video once every MOD_FRAME INTERVAL seconds.
"""

video_stream = rtc.VideoStream(track)
last_processed_time = 0
async for frame in video_stream:
current_time = time.time()
if (current_time - last_processed_time) >= MOD_FRAME_INTERVAL:
last_processed_time = current_time
await check_frame(participant, frame)

async def check_frame(participant: rtc.RemoteParticipant, frame: rtc.VideoFrame):
"""
Uses thehive.ai API to check the frame for any classifications we care about
"""

# get the current frame and convert to png format
argb_frame = frame.frame.convert(rtc.VideoBufferType.RGBA)
image = Image.frombytes(
"RGBA", (argb_frame.width, argb_frame.height), argb_frame.data
)
buffer = BytesIO()
image.save(buffer, format="PNG")
buffer.seek(0) # reset buffer position to beginning after writing

data = aiohttp.FormData()
data.add_field("image", buffer, filename="image.png", content_type="image/png")

# submit the image to Hive
logger.info("submitting image to hive")
async with aiohttp.ClientSession() as session:
async with session.post(
"https://api.thehive.ai/api/v2/task/sync",
headers=HIVE_HEADERS,
data=data,
) as response:
response.raise_for_status()
response_dict = await response.json()
hive_response: HiveResponse = from_dict(HiveResponse, response_dict)
if (
hive_response.code == 200
and len(hive_response.status) > 0
and len(hive_response.status[0].response.output) > 0
):
results = hive_response.status[0].response.output[0].classes
# filter to anything with a confidence score > threshold
for mod_class in results:
if mod_class.class_[0:4] == "yes_":
# TODO: should also include "general_nsfw" class
if mod_class.score >= CONFIDENCE_THRESHOLD:
class_name = mod_class.class_[4:]
message = (
'FOUND %s for participant "%s" (confidence score: %0.3f)'
% (
class_name,
participant.identity,
mod_class.score,
)
)
logger.info(message)
await chat.send_message(message)

await ctx.wait_for_participant()
await chat.send_message(
"I'm a moderation agent,"
"I will detect and notify you of all inappropriate material in your video stream"
)


if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)

agents.cli.run_app(agents.WorkerOptions(entrypoint, request_fnc=request_fnc))
Loading

0 comments on commit b718710

Please sign in to comment.