Skip to content

Commit

Permalink
workflow test
Browse files Browse the repository at this point in the history
  • Loading branch information
rafleze committed May 7, 2024
1 parent 48f3f42 commit 5cf4be4
Show file tree
Hide file tree
Showing 8 changed files with 177 additions and 0 deletions.
33 changes: 33 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
name: Python package

on: [push]

jobs:
build:

runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.11]

steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
pip install -r requirements.txt
pip install -r requirements/tests.txt
- name: Check with black and ruff
run: |
python3 -m black .
python3 -m ruff check --fix .
- name: Test with coverage
run: |
python -m pytest --cache-clear --cov=fattureincloud --cov-report=xml
- name: "Upload coverage to Codecov"
uses: codecov/codecov-action@v1
with:
fail_ci_if_error: true
Empty file added __init__.py
Empty file.
18 changes: 18 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[tool.black]
target-version = ["py311"]

[tool.coverage.report]
fail_under = 100
show_missing = true

[tool.coverage.run]
branch = true
concurrency = ["multiprocessing"]
disable_warnings = ["no-data-collected"]
omit = [
".venv/*",
"tests/*",
"youtube_to_text.py",
"*/__init__.py",
]
source = ["."]
6 changes: 6 additions & 0 deletions requirements/tests.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
black==24.4.2
coverage==7.2.7
pydantic==2.7.1
pytest==8.2.0
pytest-cov==5.0.0
ruff==0.4.3
Empty file added tests/__init__.py
Empty file.
1 change: 1 addition & 0 deletions tests/media/youtube/test.mp4
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
test
76 changes: 76 additions & 0 deletions tests/test_transcriber.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import pytest
from unittest import TestCase
from unittest.mock import patch
from youtube2text.transcriber import transcribe


@patch("youtube2text.transcriber.YouTube")
@patch("youtube2text.transcriber.WhisperModel")
def test_transcribe_with_settings(mock_whisper_model, mock_youtube):
"""Test transcribe function with settings."""

with open("tests/media/youtube/test.mp4", "w") as f:
f.write("test")

mock_youtube.return_value.streams.filter.return_value.first.return_value.download.return_value = (
"tests/media/youtube/test.mp4"
)
mock_whisper_model.return_value.transcribe.return_value = [
{"text": "Hello", "start": 0.0, "end": 1.0},
{"text": "World", "start": 1.0, "end": 2.0},
]
settings = {
"model_size_or_path": "large-v3",
"device": "cpu",
"compute_type": "int8",
}
result = transcribe(
"https://www.youtube.com/watch?v=Jo07YIB3HBU", language="it", settings=settings
)
TestCase().assertListEqual(
result,
[
{"text": "Hello", "start": 0.0, "end": 1.0},
{"text": "World", "start": 1.0, "end": 2.0},
],
)


@patch("youtube2text.transcriber.YouTube")
def test_transcribe_without_settings(mock_youtube):
"""Test transcribe function with settings."""

with open("tests/media/youtube/test.mp4", "w") as f:
f.write("test")

mock_youtube.return_value.streams.filter.return_value.first.return_value.download.return_value = (
"tests/media/youtube/test.mp4"
)
settings = {}
with pytest.raises(Exception):
transcribe(
"https://www.youtube.com/watch?v=Jo07YIB3HBU",
language="it",
settings=settings,
)


@patch("youtube2text.transcriber.YouTube")
def test_transcribe_with_download_exception(mock_youtube):
"""Test transcribe function with settings."""

# mock youtube stream filter first download as exception
mock_youtube.return_value.streams.filter.return_value.first.return_value.download.side_effect = Exception(
"Download failed"
)
settings = {
"model_size_or_path": "large-v3",
"device": "cpu",
"compute_type": "int8",
}
with pytest.raises(Exception):
transcribe(
"https://www.youtube.com/watch?v=Jo07YIB3HBU",
language="it",
settings=settings,
)
43 changes: 43 additions & 0 deletions transcriber.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
"""Transcribe a youtube video."""

from pytube import YouTube
from faster_whisper import WhisperModel
import tempfile
import os


def transcribe(link, language="en", settings={}):
"""Transcribe a youtube video."""
if not settings:
raise Exception("No configuration found for Youtube2Text.")
model_size_or_path = settings.get("model_size_or_path", "large-v3")
device = settings.get("device", "cpu")
compute_type = settings.get("compute_type", "int8")
try:
yt = YouTube(link)
filename = yt.streams.filter(only_audio=True).first().download("media/youtube")
except Exception as e:
raise e
else:
audio = open(filename, "rb")
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
temp_file.write(audio.read())
temp_file_path = temp_file.name
model = WhisperModel(
model_size_or_path, device=device, compute_type=compute_type
)
result = model.transcribe(temp_file_path, language=language)
os.remove(temp_file_path)
os.remove(filename)
return result


# settings = {
# "model_size_or_path": "large-v3",
# "device": "cpu",
# "compute_type": "int8",
# }
# x, _ = transcribe("https://www.youtube.com/watch?v=Jo07YIB3HBU", language="it", settings=settings)

# for el in x:
# print(el.text)

0 comments on commit 5cf4be4

Please sign in to comment.