Revert "Prevent out of range error in method split_tokens_on_unicode"
This reverts commit 36160c1e7e.
This commit is contained in:
@@ -135,9 +135,6 @@ class Tokenizer:
|
|||||||
current_tokens = []
|
current_tokens = []
|
||||||
unicode_offset += len(decoded)
|
unicode_offset += len(decoded)
|
||||||
|
|
||||||
if unicode_offset >= len(decoded_full):
|
|
||||||
break
|
|
||||||
|
|
||||||
return words, word_tokens
|
return words, word_tokens
|
||||||
|
|
||||||
def split_tokens_on_spaces(
|
def split_tokens_on_spaces(
|
||||||
|
|||||||
Reference in New Issue
Block a user