From 36160c1e7ed39c4787b5fe2ea390587a3edaebc5 Mon Sep 17 00:00:00 2001 From: Guillaume Klein Date: Tue, 4 Apr 2023 10:17:56 +0200 Subject: [PATCH] Prevent out of range error in method split_tokens_on_unicode --- faster_whisper/tokenizer.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/faster_whisper/tokenizer.py b/faster_whisper/tokenizer.py index efe22a3..7417a86 100644 --- a/faster_whisper/tokenizer.py +++ b/faster_whisper/tokenizer.py @@ -135,6 +135,9 @@ class Tokenizer: current_tokens = [] unicode_offset += len(decoded) + if unicode_offset >= len(decoded_full): + break + return words, word_tokens def split_tokens_on_spaces(