fix all_tokens handling that caused more repetitions and discrepancy in JSON (#1060)

This commit is contained in:
Jong Wook Kim
2023-03-08 18:34:07 -05:00
committed by GitHub
parent aac47c9834
commit 38f2f4d99d
3 changed files with 14 additions and 11 deletions

View File

@@ -290,7 +290,7 @@ def add_word_timestamps(
if len(segments) == 0:
return
text_tokens = [t for segment in segments for t in segment["tokens"]]
text_tokens = [t for s in segments for t in s["tokens"] if t < tokenizer.eot]
alignment = find_alignment(model, tokenizer, text_tokens, mel, num_frames, **kwargs)
merge_punctuations(alignment, prepend_punctuations, append_punctuations)