diff --git a/faster_whisper/transcribe.py b/faster_whisper/transcribe.py index 9053d3c..86187fc 100644 --- a/faster_whisper/transcribe.py +++ b/faster_whisper/transcribe.py @@ -11,7 +11,7 @@ import tokenizers from faster_whisper.audio import decode_audio from faster_whisper.feature_extractor import FeatureExtractor -from faster_whisper.tokenizer import Tokenizer +from faster_whisper.tokenizer import _LANGUAGE_CODES, Tokenizer from faster_whisper.utils import download_model, format_timestamp, get_logger from faster_whisper.vad import ( SpeechTimestampsMap, @@ -154,6 +154,11 @@ class WhisperModel: self.time_precision = 0.02 self.max_length = 448 + @property + def supported_languages(self) -> List[str]: + """The languages supported by the model.""" + return list(_LANGUAGE_CODES) if self.model.is_multilingual else ["en"] + def transcribe( self, audio: Union[str, BinaryIO, np.ndarray], diff --git a/tests/test_transcribe.py b/tests/test_transcribe.py index ca8d5a9..d30a0fb 100644 --- a/tests/test_transcribe.py +++ b/tests/test_transcribe.py @@ -3,6 +3,11 @@ import os from faster_whisper import WhisperModel, decode_audio +def test_supported_languages(): + model = WhisperModel("tiny.en") + assert model.supported_languages == ["en"] + + def test_transcribe(jfk_path): model = WhisperModel("tiny") segments, info = model.transcribe(jfk_path, word_timestamps=True)