Word timing tweaks (#616)

This commit is contained in:
trungkienbkhn
2023-12-13 18:38:44 +07:00
committed by GitHub
parent 65094b779e
commit 19329a3611

View File

@@ -908,6 +908,13 @@ class WhisperModel:
words, word_tokens = tokenizer.split_to_word_tokens(
text_tokens + [tokenizer.eot]
)
if len(word_tokens) <= 1:
# return on eot only
# >>> np.pad([], (1, 0))
# array([0.])
# This results in crashes when we lookup jump_times with float, like
# IndexError: arrays used as indices must be of integer (or boolean) type
return []
word_boundaries = np.pad(np.cumsum([len(t) for t in word_tokens[:-1]]), (1, 0))
if len(word_boundaries) <= 1:
return []