workflow test

rafleze · May 7, 2024 · 5cf4be4 · 5cf4be4
1 parent 48f3f42
commit 5cf4be4
Show file tree

Hide file tree

Showing 8 changed files with 177 additions and 0 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -0,0 +1,33 @@
+name: Python package
+
+on: [push]
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: [3.11]
+
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        pip install -r requirements.txt
+        pip install -r requirements/tests.txt
+    - name: Check with black and ruff
+      run: |
+        python3 -m black .
+        python3 -m ruff check --fix .
+    - name: Test with coverage
+      run: |
+        python -m pytest --cache-clear --cov=fattureincloud --cov-report=xml
+    - name: "Upload coverage to Codecov"
+      uses: codecov/codecov-action@v1
+      with:
+        fail_ci_if_error: true
diff --git a/__init__.py b/__init__.py
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,18 @@
+[tool.black]
+target-version = ["py311"]
+
+[tool.coverage.report]
+fail_under = 100
+show_missing = true
+
+[tool.coverage.run]
+branch = true
+concurrency = ["multiprocessing"]
+disable_warnings = ["no-data-collected"]
+omit = [
+    ".venv/*",
+    "tests/*",
+    "youtube_to_text.py",
+    "*/__init__.py",
+]
+source = ["."]
diff --git a/requirements/tests.txt b/requirements/tests.txt
@@ -0,0 +1,6 @@
+black==24.4.2
+coverage==7.2.7
+pydantic==2.7.1
+pytest==8.2.0
+pytest-cov==5.0.0
+ruff==0.4.3
diff --git a/tests/__init__.py b/tests/__init__.py
diff --git a/tests/media/youtube/test.mp4 b/tests/media/youtube/test.mp4
@@ -0,0 +1 @@
+test
diff --git a/tests/test_transcriber.py b/tests/test_transcriber.py
@@ -0,0 +1,76 @@
+import pytest
+from unittest import TestCase
+from unittest.mock import patch
+from youtube2text.transcriber import transcribe
+
+
+@patch("youtube2text.transcriber.YouTube")
+@patch("youtube2text.transcriber.WhisperModel")
+def test_transcribe_with_settings(mock_whisper_model, mock_youtube):
+    """Test transcribe function with settings."""
+
+    with open("tests/media/youtube/test.mp4", "w") as f:
+        f.write("test")
+
+    mock_youtube.return_value.streams.filter.return_value.first.return_value.download.return_value = (
+        "tests/media/youtube/test.mp4"
+    )
+    mock_whisper_model.return_value.transcribe.return_value = [
+        {"text": "Hello", "start": 0.0, "end": 1.0},
+        {"text": "World", "start": 1.0, "end": 2.0},
+    ]
+    settings = {
+        "model_size_or_path": "large-v3",
+        "device": "cpu",
+        "compute_type": "int8",
+    }
+    result = transcribe(
+        "https://www.youtube.com/watch?v=Jo07YIB3HBU", language="it", settings=settings
+    )
+    TestCase().assertListEqual(
+        result,
+        [
+            {"text": "Hello", "start": 0.0, "end": 1.0},
+            {"text": "World", "start": 1.0, "end": 2.0},
+        ],
+    )
+
+
+@patch("youtube2text.transcriber.YouTube")
+def test_transcribe_without_settings(mock_youtube):
+    """Test transcribe function with settings."""
+
+    with open("tests/media/youtube/test.mp4", "w") as f:
+        f.write("test")
+
+    mock_youtube.return_value.streams.filter.return_value.first.return_value.download.return_value = (
+        "tests/media/youtube/test.mp4"
+    )
+    settings = {}
+    with pytest.raises(Exception):
+        transcribe(
+            "https://www.youtube.com/watch?v=Jo07YIB3HBU",
+            language="it",
+            settings=settings,
+        )
+
+
+@patch("youtube2text.transcriber.YouTube")
+def test_transcribe_with_download_exception(mock_youtube):
+    """Test transcribe function with settings."""
+
+    # mock youtube stream filter first download as exception
+    mock_youtube.return_value.streams.filter.return_value.first.return_value.download.side_effect = Exception(
+        "Download failed"
+    )
+    settings = {
+        "model_size_or_path": "large-v3",
+        "device": "cpu",
+        "compute_type": "int8",
+    }
+    with pytest.raises(Exception):
+        transcribe(
+            "https://www.youtube.com/watch?v=Jo07YIB3HBU",
+            language="it",
+            settings=settings,
+        )
diff --git a/transcriber.py b/transcriber.py
@@ -0,0 +1,43 @@
+"""Transcribe a youtube video."""
+
+from pytube import YouTube
+from faster_whisper import WhisperModel
+import tempfile
+import os
+
+
+def transcribe(link, language="en", settings={}):
+    """Transcribe a youtube video."""
+    if not settings:
+        raise Exception("No configuration found for Youtube2Text.")
+    model_size_or_path = settings.get("model_size_or_path", "large-v3")
+    device = settings.get("device", "cpu")
+    compute_type = settings.get("compute_type", "int8")
+    try:
+        yt = YouTube(link)
+        filename = yt.streams.filter(only_audio=True).first().download("media/youtube")
+    except Exception as e:
+        raise e
+    else:
+        audio = open(filename, "rb")
+        with tempfile.NamedTemporaryFile(delete=False) as temp_file:
+            temp_file.write(audio.read())
+            temp_file_path = temp_file.name
+            model = WhisperModel(
+                model_size_or_path, device=device, compute_type=compute_type
+            )
+            result = model.transcribe(temp_file_path, language=language)
+            os.remove(temp_file_path)
+            os.remove(filename)
+            return result
+
+
+# settings = {
+#     "model_size_or_path": "large-v3",
+#     "device": "cpu",
+#     "compute_type": "int8",
+# }
+# x, _ = transcribe("https://www.youtube.com/watch?v=Jo07YIB3HBU", language="it", settings=settings)
+
+# for el in x:
+#     print(el.text)