Run some automatic tests with GitHub Actions (#68)

2023-03-22 20:50:03 +01:00
parent 52264f2277
commit 66efd02bd0
9 changed files with 143 additions and 2 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,62 @@
+name: CI
+
+on:
+  push:
+    branches:
+      - master
+    tags:
+      - v*
+  pull_request:
+    branches:
+      - master
+
+jobs:
+  check-code-format:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Set up Python 3.8
+        uses: actions/setup-python@v4
+        with:
+          python-version: 3.8
+
+      - name: Install module
+        run: |
+          pip install wheel
+          pip install .[dev] --extra-index-url https://download.pytorch.org/whl/cpu
+
+      - name: Check code format with Black
+        run: |
+          black --check .
+
+      - name: Check imports order with isort
+        run: |
+          isort --check-only .
+
+      - name: Check code style with Flake8
+        if: ${{ always() }}
+        run: |
+          flake8 .
+
+
+  run-tests:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Set up Python 3.8
+        uses: actions/setup-python@v4
+        with:
+          python-version: 3.8
+
+      - name: Install module
+        run: |
+          pip install wheel
+          pip install .[dev] --extra-index-url https://download.pytorch.org/whl/cpu
+
+      - name: Run pytest
+        run: |
+          pytest -v tests/test.py
--- a/faster_whisper/init.py
+++ b/faster_whisper/init.py
@@ -1,2 +1,9 @@
+from faster_whisper.audio import decode_audio
 from faster_whisper.transcribe import WhisperModel
 from faster_whisper.utils import format_timestamp
+
+__all__ = [
+    "decode_audio",
+    "WhisperModel",
+    "format_timestamp",
+]
--- a/faster_whisper/feature_extractor.py
+++ b/faster_whisper/feature_extractor.py
@@ -1,7 +1,7 @@
 import numpy as np


-# Adapted from https://github.com/huggingface/transformers/blob/main/src/transformers/models/whisper/feature_extraction_whisper.py
+# Adapted from https://github.com/huggingface/transformers/blob/main/src/transformers/models/whisper/feature_extraction_whisper.py  # noqa: E501
 class FeatureExtractor:
    def __init__(
        self,
--- a/faster_whisper/utils.py
+++ b/faster_whisper/utils.py
@@ -2,7 +2,7 @@ def format_timestamp(
    seconds: float,
    always_include_hours: bool = False,
    decimal_marker: str = ".",
-):
+) -> str:
    assert seconds >= 0, "non-negative timestamp expected"
    milliseconds = round(seconds * 1000.0)

--- a/setup.cfg
+++ b/setup.cfg
@@ -0,0 +1,9 @@
+[flake8]
+max-line-length = 100
+ignore =
+  E203,
+  W503,
+
+[isort]
+profile=black
+lines_between_types=1
--- a/setup.py
+++ b/setup.py
@@ -23,6 +23,13 @@ setup(
    install_requires=install_requires,
    extras_require={
        "conversion": conversion_requires,
+        "dev": conversion_requires
+        + [
+            "black==23.*",
+            "flake8==6.*",
+            "isort==5.*",
+            "pytest==7.*",
+        ],
    },
    packages=find_packages(),
 )
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -0,0 +1,31 @@
+import os
+
+import ctranslate2
+import pytest
+
+
+@pytest.fixture
+def data_dir():
+    return os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
+
+
+@pytest.fixture
+def jfk_path(data_dir):
+    return os.path.join(data_dir, "jfk.flac")
+
+
+@pytest.fixture(scope="session")
+def tiny_model_dir(tmp_path_factory):
+    model_path = str(tmp_path_factory.mktemp("data") / "model")
+    convert_model("tiny", model_path)
+    return model_path
+
+
+def convert_model(size, output_dir):
+    name = "openai/whisper-%s" % size
+
+    ctranslate2.converters.TransformersConverter(
+        name,
+        copy_files=["tokenizer.json"],
+        load_as_float16=True,
+    ).convert(output_dir, quantization="float16")
--- a/tests/data/jfk.flac
+++ b/tests/data/jfk.flac
--- a/tests/test.py
+++ b/tests/test.py
@@ -0,0 +1,25 @@
+from faster_whisper import WhisperModel
+
+
+def test_transcribe(tiny_model_dir, jfk_path):
+    model = WhisperModel(tiny_model_dir)
+    segments, info = model.transcribe(jfk_path, word_timestamps=True)
+
+    assert info.language == "en"
+    assert info.language_probability > 0.9
+    assert info.duration == 11
+
+    segments = list(segments)
+
+    assert len(segments) == 1
+
+    segment = segments[0]
+
+    assert segment.text == (
+        " And so my fellow Americans ask not what your country can do for you, "
+        "ask what you can do for your country."
+    )
+
+    assert segment.text == "".join(word.word for word in segment.words)
+    assert segment.start == segment.words[0].start
+    assert segment.end == segment.words[-1].end