Run some automatic tests with GitHub Actions (#68)

This commit is contained in:
Guillaume Klein
2023-03-22 20:50:03 +01:00
committed by GitHub
parent 52264f2277
commit 66efd02bd0
9 changed files with 143 additions and 2 deletions

62
.github/workflows/ci.yml vendored Normal file
View File

@@ -0,0 +1,62 @@
name: CI
on:
push:
branches:
- master
tags:
- v*
pull_request:
branches:
- master
jobs:
check-code-format:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python 3.8
uses: actions/setup-python@v4
with:
python-version: 3.8
- name: Install module
run: |
pip install wheel
pip install .[dev] --extra-index-url https://download.pytorch.org/whl/cpu
- name: Check code format with Black
run: |
black --check .
- name: Check imports order with isort
run: |
isort --check-only .
- name: Check code style with Flake8
if: ${{ always() }}
run: |
flake8 .
run-tests:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python 3.8
uses: actions/setup-python@v4
with:
python-version: 3.8
- name: Install module
run: |
pip install wheel
pip install .[dev] --extra-index-url https://download.pytorch.org/whl/cpu
- name: Run pytest
run: |
pytest -v tests/test.py

View File

@@ -1,2 +1,9 @@
from faster_whisper.audio import decode_audio
from faster_whisper.transcribe import WhisperModel
from faster_whisper.utils import format_timestamp
__all__ = [
"decode_audio",
"WhisperModel",
"format_timestamp",
]

View File

@@ -1,7 +1,7 @@
import numpy as np
# Adapted from https://github.com/huggingface/transformers/blob/main/src/transformers/models/whisper/feature_extraction_whisper.py
# Adapted from https://github.com/huggingface/transformers/blob/main/src/transformers/models/whisper/feature_extraction_whisper.py # noqa: E501
class FeatureExtractor:
def __init__(
self,

View File

@@ -2,7 +2,7 @@ def format_timestamp(
seconds: float,
always_include_hours: bool = False,
decimal_marker: str = ".",
):
) -> str:
assert seconds >= 0, "non-negative timestamp expected"
milliseconds = round(seconds * 1000.0)

9
setup.cfg Normal file
View File

@@ -0,0 +1,9 @@
[flake8]
max-line-length = 100
ignore =
E203,
W503,
[isort]
profile=black
lines_between_types=1

View File

@@ -23,6 +23,13 @@ setup(
install_requires=install_requires,
extras_require={
"conversion": conversion_requires,
"dev": conversion_requires
+ [
"black==23.*",
"flake8==6.*",
"isort==5.*",
"pytest==7.*",
],
},
packages=find_packages(),
)

31
tests/conftest.py Normal file
View File

@@ -0,0 +1,31 @@
import os
import ctranslate2
import pytest
@pytest.fixture
def data_dir():
return os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
@pytest.fixture
def jfk_path(data_dir):
return os.path.join(data_dir, "jfk.flac")
@pytest.fixture(scope="session")
def tiny_model_dir(tmp_path_factory):
model_path = str(tmp_path_factory.mktemp("data") / "model")
convert_model("tiny", model_path)
return model_path
def convert_model(size, output_dir):
name = "openai/whisper-%s" % size
ctranslate2.converters.TransformersConverter(
name,
copy_files=["tokenizer.json"],
load_as_float16=True,
).convert(output_dir, quantization="float16")

BIN
tests/data/jfk.flac Normal file

Binary file not shown.

25
tests/test.py Normal file
View File

@@ -0,0 +1,25 @@
from faster_whisper import WhisperModel
def test_transcribe(tiny_model_dir, jfk_path):
model = WhisperModel(tiny_model_dir)
segments, info = model.transcribe(jfk_path, word_timestamps=True)
assert info.language == "en"
assert info.language_probability > 0.9
assert info.duration == 11
segments = list(segments)
assert len(segments) == 1
segment = segments[0]
assert segment.text == (
" And so my fellow Americans ask not what your country can do for you, "
"ask what you can do for your country."
)
assert segment.text == "".join(word.word for word in segment.words)
assert segment.start == segment.words[0].start
assert segment.end == segment.words[-1].end