diff --git a/faster_whisper/tokenizer.py b/faster_whisper/tokenizer.py index efe22a3..7417a86 100644 --- a/faster_whisper/tokenizer.py +++ b/faster_whisper/tokenizer.py @@ -135,6 +135,9 @@ class Tokenizer: current_tokens = [] unicode_offset += len(decoded) + if unicode_offset >= len(decoded_full): + break + return words, word_tokens def split_tokens_on_spaces(