Add property WhisperModel.supported_languages (#476)

* Expose function supported_languages

* Make it a method
This commit is contained in:
Guillaume Klein
2023-09-14 17:42:02 +02:00
committed by GitHub
parent 0048844f54
commit e94711bb5c
2 changed files with 11 additions and 1 deletions

View File

@@ -11,7 +11,7 @@ import tokenizers
from faster_whisper.audio import decode_audio from faster_whisper.audio import decode_audio
from faster_whisper.feature_extractor import FeatureExtractor from faster_whisper.feature_extractor import FeatureExtractor
from faster_whisper.tokenizer import Tokenizer from faster_whisper.tokenizer import _LANGUAGE_CODES, Tokenizer
from faster_whisper.utils import download_model, format_timestamp, get_logger from faster_whisper.utils import download_model, format_timestamp, get_logger
from faster_whisper.vad import ( from faster_whisper.vad import (
SpeechTimestampsMap, SpeechTimestampsMap,
@@ -154,6 +154,11 @@ class WhisperModel:
self.time_precision = 0.02 self.time_precision = 0.02
self.max_length = 448 self.max_length = 448
@property
def supported_languages(self) -> List[str]:
"""The languages supported by the model."""
return list(_LANGUAGE_CODES) if self.model.is_multilingual else ["en"]
def transcribe( def transcribe(
self, self,
audio: Union[str, BinaryIO, np.ndarray], audio: Union[str, BinaryIO, np.ndarray],

View File

@@ -3,6 +3,11 @@ import os
from faster_whisper import WhisperModel, decode_audio from faster_whisper import WhisperModel, decode_audio
def test_supported_languages():
model = WhisperModel("tiny.en")
assert model.supported_languages == ["en"]
def test_transcribe(jfk_path): def test_transcribe(jfk_path):
model = WhisperModel("tiny") model = WhisperModel("tiny")
segments, info = model.transcribe(jfk_path, word_timestamps=True) segments, info = model.transcribe(jfk_path, word_timestamps=True)