Automatically download converted models from the Hugging Face Hub (#70)

* Automatically download converted models from the Hugging Face Hub

* Remove unused import

* Remove non needed requirements in dev mode

* Remove extra index URL when pip install in CI

* Allow downloading to a specific directory

* Update docstring

* Add argument to disable the progess bars

* Fix typo in docstring
This commit is contained in:
Guillaume Klein
2023-03-24 10:55:55 +01:00
committed by GitHub
parent 523ae2180f
commit de7682a2f0
10 changed files with 105 additions and 53 deletions

View File

@@ -1,6 +1,5 @@
import os
import ctranslate2
import pytest
@@ -12,20 +11,3 @@ def data_dir():
@pytest.fixture
def jfk_path(data_dir):
return os.path.join(data_dir, "jfk.flac")
@pytest.fixture(scope="session")
def tiny_model_dir(tmp_path_factory):
model_path = str(tmp_path_factory.mktemp("data") / "model")
convert_model("tiny", model_path)
return model_path
def convert_model(size, output_dir):
name = "openai/whisper-%s" % size
ctranslate2.converters.TransformersConverter(
name,
copy_files=["tokenizer.json"],
load_as_float16=True,
).convert(output_dir, quantization="float16")

View File

@@ -1,8 +1,8 @@
from faster_whisper import WhisperModel
def test_transcribe(tiny_model_dir, jfk_path):
model = WhisperModel(tiny_model_dir)
def test_transcribe(jfk_path):
model = WhisperModel("tiny")
segments, info = model.transcribe(jfk_path, word_timestamps=True)
assert info.language == "en"

17
tests/test_utils.py Normal file
View File

@@ -0,0 +1,17 @@
import os
from faster_whisper import download_model
def test_download_model(tmpdir):
output_dir = str(tmpdir.join("model"))
model_dir = download_model("tiny", output_dir=output_dir)
assert model_dir == output_dir
assert os.path.isdir(model_dir)
assert not os.path.islink(model_dir)
for filename in os.listdir(model_dir):
path = os.path.join(model_dir, filename)
assert not os.path.islink(path)