Raise a more explicit error message for English-only models
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
import collections
|
||||
import os
|
||||
import zlib
|
||||
|
||||
import ctranslate2
|
||||
@@ -66,6 +67,19 @@ class WhisperModel:
|
||||
intra_threads=cpu_threads,
|
||||
)
|
||||
|
||||
with open(os.path.join(model_path, "vocabulary.txt")) as vocab_file:
|
||||
vocab_size = sum(1 for _ in vocab_file)
|
||||
|
||||
is_multilingual = vocab_size == 51865
|
||||
if not is_multilingual:
|
||||
raise NotImplementedError(
|
||||
"English-only models are currently not supported. "
|
||||
"The underlying CTranslate2 implementation makes some assumptions about "
|
||||
"the prompt format that are not compatible with English-only models. "
|
||||
"This will be improved in a future version. "
|
||||
"Please use a multilingual model for now."
|
||||
)
|
||||
|
||||
self.feature_extractor = FeatureExtractor()
|
||||
self.tokenizer = tokenizers.Tokenizer.from_pretrained("openai/whisper-tiny")
|
||||
self.eot_id = self.tokenizer.token_to_id("<|endoftext|>")
|
||||
|
||||
Reference in New Issue
Block a user