Assign words to the speech chunk with the greatest coverage (#180)

2023-04-25 15:54:31 +02:00
parent 8cf5d5a4b3
commit 8340e04dc6
1 changed files with 2 additions and 1 deletions
--- a/faster_whisper/transcribe.py
+++ b/faster_whisper/transcribe.py
@@ -796,7 +796,8 @@ def restore_speech_timestamps(
            words = []
            for word in segment.words:
                # Ensure the word start and end times are resolved to the same chunk.
-                chunk_index = ts_map.get_chunk_index(word.start)
+                middle = (word.start + word.end) / 2
+                chunk_index = ts_map.get_chunk_index(middle)
                word = word._replace(
                    start=ts_map.get_original_time(word.start, chunk_index),
                    end=ts_map.get_original_time(word.end, chunk_index),