Add V3 Support (#578)
* Add V3 Support * update conversion example --------- Co-authored-by: oscaarjs <oscar.johansson@conversy.se>
This commit is contained in:
@@ -1,8 +1,10 @@
|
||||
import itertools
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import zlib
|
||||
|
||||
from inspect import signature
|
||||
from typing import BinaryIO, Iterable, List, NamedTuple, Optional, Tuple, Union
|
||||
|
||||
import ctranslate2
|
||||
@@ -92,8 +94,8 @@ class WhisperModel:
|
||||
|
||||
Args:
|
||||
model_size_or_path: Size of the model to use (tiny, tiny.en, base, base.en,
|
||||
small, small.en, medium, medium.en, large-v1, large-v2, or large), a path to a converted
|
||||
model directory, or a CTranslate2-converted Whisper model ID from the Hugging Face Hub.
|
||||
small, small.en, medium, medium.en, large-v1, large-v2, large-v3, or large), a path to a
|
||||
converted model directory, or a CTranslate2-converted Whisper model ID from the HF Hub.
|
||||
When a size or a model ID is configured, the converted model is downloaded
|
||||
from the Hugging Face Hub.
|
||||
device: Device to use for computation ("cpu", "cuda", "auto").
|
||||
@@ -142,7 +144,8 @@ class WhisperModel:
|
||||
"openai/whisper-tiny" + ("" if self.model.is_multilingual else ".en")
|
||||
)
|
||||
|
||||
self.feature_extractor = FeatureExtractor()
|
||||
self.feat_kwargs = self._get_feature_kwargs(model_path)
|
||||
self.feature_extractor = FeatureExtractor(**self.feat_kwargs)
|
||||
self.num_samples_per_token = self.feature_extractor.hop_length * 2
|
||||
self.frames_per_second = (
|
||||
self.feature_extractor.sampling_rate // self.feature_extractor.hop_length
|
||||
@@ -159,6 +162,22 @@ class WhisperModel:
|
||||
"""The languages supported by the model."""
|
||||
return list(_LANGUAGE_CODES) if self.model.is_multilingual else ["en"]
|
||||
|
||||
def _get_feature_kwargs(self, model_path) -> dict:
|
||||
preprocessor_config_file = os.path.join(model_path, "preprocessor_config.json")
|
||||
config = {}
|
||||
if os.path.isfile(preprocessor_config_file):
|
||||
try:
|
||||
with open(preprocessor_config_file, "r", encoding="utf-8") as json_file:
|
||||
config = json.load(json_file)
|
||||
valid_keys = signature(FeatureExtractor.__init__).parameters.keys()
|
||||
config = {k: v for k, v in config.items() if k in valid_keys}
|
||||
except json.JSONDecodeError as e:
|
||||
self.logger.warning(
|
||||
"Could not load preprocessor_config.json: %s", str(e)
|
||||
)
|
||||
|
||||
return config
|
||||
|
||||
def transcribe(
|
||||
self,
|
||||
audio: Union[str, BinaryIO, np.ndarray],
|
||||
|
||||
Reference in New Issue
Block a user