Check that the language code is valid

2023-02-21 12:10:54 +01:00
parent a98a2eeec4
commit f5c9f15c2c
1 changed files with 5 additions and 2 deletions
--- a/faster_whisper/transcribe.py
+++ b/faster_whisper/transcribe.py
@@ -107,8 +107,9 @@ class WhisperModel:
        Arguments:
          input_file: Path to the input file or a file-like object.
-          language: The language spoken in the audio. If not set, the language will be
+          language: The language spoken in the audio. It should be a language code such
-            detected in the first 30 seconds of audio.
+            as "en" or "fr". If not set, the language will be detected in the first 30 seconds
            of audio.
          task: Task to execute (transcribe or translate).
          beam_size: Beam size to use for decoding.
          best_of: Number of candidates when sampling with non-zero temperature.
@@ -152,6 +153,8 @@ class WhisperModel:
                language_token, language_probability = results[0][0]
                language = language_token[2:-2]
        else:
            if self.tokenizer.token_to_id("<|%s|>" % language) is None:
                raise ValueError("%s is not a valid language code" % language)
            language_probability = 1
        options = TranscriptionOptions(