From 9fa198907309c64b8b3a11c16b1388a7abc60319 Mon Sep 17 00:00:00 2001 From: Guillaume Klein Date: Tue, 4 Apr 2023 10:25:41 +0200 Subject: [PATCH] Revert "Prevent out of range error in method split_tokens_on_unicode" This reverts commit 36160c1e7ed39c4787b5fe2ea390587a3edaebc5. --- faster_whisper/tokenizer.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/faster_whisper/tokenizer.py b/faster_whisper/tokenizer.py index 7417a86..efe22a3 100644 --- a/faster_whisper/tokenizer.py +++ b/faster_whisper/tokenizer.py @@ -135,9 +135,6 @@ class Tokenizer: current_tokens = [] unicode_offset += len(decoded) - if unicode_offset >= len(decoded_full): - break - return words, word_tokens def split_tokens_on_spaces(