Word timing tweaks (#616)
This commit is contained in:
@@ -908,6 +908,13 @@ class WhisperModel:
|
||||
words, word_tokens = tokenizer.split_to_word_tokens(
|
||||
text_tokens + [tokenizer.eot]
|
||||
)
|
||||
if len(word_tokens) <= 1:
|
||||
# return on eot only
|
||||
# >>> np.pad([], (1, 0))
|
||||
# array([0.])
|
||||
# This results in crashes when we lookup jump_times with float, like
|
||||
# IndexError: arrays used as indices must be of integer (or boolean) type
|
||||
return []
|
||||
word_boundaries = np.pad(np.cumsum([len(t) for t in word_tokens[:-1]]), (1, 0))
|
||||
if len(word_boundaries) <= 1:
|
||||
return []
|
||||
|
||||
Reference in New Issue
Block a user