From e94711bb5cee175d8165a22fd9ce03ef295c7170 Mon Sep 17 00:00:00 2001 From: Guillaume Klein Date: Thu, 14 Sep 2023 17:42:02 +0200 Subject: [PATCH] Add property WhisperModel.supported_languages (#476) * Expose function supported_languages * Make it a method --- faster_whisper/transcribe.py | 7 ++++++- tests/test_transcribe.py | 5 +++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/faster_whisper/transcribe.py b/faster_whisper/transcribe.py index 9053d3c..86187fc 100644 --- a/faster_whisper/transcribe.py +++ b/faster_whisper/transcribe.py @@ -11,7 +11,7 @@ import tokenizers from faster_whisper.audio import decode_audio from faster_whisper.feature_extractor import FeatureExtractor -from faster_whisper.tokenizer import Tokenizer +from faster_whisper.tokenizer import _LANGUAGE_CODES, Tokenizer from faster_whisper.utils import download_model, format_timestamp, get_logger from faster_whisper.vad import ( SpeechTimestampsMap, @@ -154,6 +154,11 @@ class WhisperModel: self.time_precision = 0.02 self.max_length = 448 + @property + def supported_languages(self) -> List[str]: + """The languages supported by the model.""" + return list(_LANGUAGE_CODES) if self.model.is_multilingual else ["en"] + def transcribe( self, audio: Union[str, BinaryIO, np.ndarray], diff --git a/tests/test_transcribe.py b/tests/test_transcribe.py index ca8d5a9..d30a0fb 100644 --- a/tests/test_transcribe.py +++ b/tests/test_transcribe.py @@ -3,6 +3,11 @@ import os from faster_whisper import WhisperModel, decode_audio +def test_supported_languages(): + model = WhisperModel("tiny.en") + assert model.supported_languages == ["en"] + + def test_transcribe(jfk_path): model = WhisperModel("tiny") segments, info = model.transcribe(jfk_path, word_timestamps=True)