Revert "Prevent out of range error in method split_tokens_on_unicode"
This reverts commit 36160c1e7e.
This commit is contained in:
@@ -135,9 +135,6 @@ class Tokenizer:
|
||||
current_tokens = []
|
||||
unicode_offset += len(decoded)
|
||||
|
||||
if unicode_offset >= len(decoded_full):
|
||||
break
|
||||
|
||||
return words, word_tokens
|
||||
|
||||
def split_tokens_on_spaces(
|
||||
|
||||
Reference in New Issue
Block a user