diff --git a/faster_whisper/tokenizer.py b/faster_whisper/tokenizer.py index 7417a86..efe22a3 100644 --- a/faster_whisper/tokenizer.py +++ b/faster_whisper/tokenizer.py @@ -135,9 +135,6 @@ class Tokenizer: current_tokens = [] unicode_offset += len(decoded) - if unicode_offset >= len(decoded_full): - break - return words, word_tokens def split_tokens_on_spaces(