Revert "Prevent out of range error in method split_tokens_on_unicode"

This reverts commit 36160c1e7e.
This commit is contained in:
Guillaume Klein
2023-04-04 10:25:41 +02:00
parent 36160c1e7e
commit 9fa1989073

View File

@@ -135,9 +135,6 @@ class Tokenizer:
current_tokens = []
unicode_offset += len(decoded)
if unicode_offset >= len(decoded_full):
break
return words, word_tokens
def split_tokens_on_spaces(