From 8340e04dc6e9d009080658e41f12fc3ac565ae7b Mon Sep 17 00:00:00 2001 From: Guillaume Klein Date: Tue, 25 Apr 2023 15:54:31 +0200 Subject: [PATCH] Assign words to the speech chunk with the greatest coverage (#180) --- faster_whisper/transcribe.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/faster_whisper/transcribe.py b/faster_whisper/transcribe.py index d89dfeb..a54b9c6 100644 --- a/faster_whisper/transcribe.py +++ b/faster_whisper/transcribe.py @@ -796,7 +796,8 @@ def restore_speech_timestamps( words = [] for word in segment.words: # Ensure the word start and end times are resolved to the same chunk. - chunk_index = ts_map.get_chunk_index(word.start) + middle = (word.start + word.end) / 2 + chunk_index = ts_map.get_chunk_index(middle) word = word._replace( start=ts_map.get_original_time(word.start, chunk_index), end=ts_map.get_original_time(word.end, chunk_index),