From 8340e04dc6e9d009080658e41f12fc3ac565ae7b Mon Sep 17 00:00:00 2001
From: Guillaume Klein <guillaumekln@users.noreply.github.com>
Date: Tue, 25 Apr 2023 15:54:31 +0200
Subject: [PATCH] Assign words to the speech chunk with the greatest coverage
 (#180)

---
 faster_whisper/transcribe.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/faster_whisper/transcribe.py b/faster_whisper/transcribe.py
index d89dfeb..a54b9c6 100644
--- a/faster_whisper/transcribe.py
+++ b/faster_whisper/transcribe.py
@@ -796,7 +796,8 @@ def restore_speech_timestamps(
             words = []
             for word in segment.words:
                 # Ensure the word start and end times are resolved to the same chunk.
-                chunk_index = ts_map.get_chunk_index(word.start)
+                middle = (word.start + word.end) / 2
+                chunk_index = ts_map.get_chunk_index(middle)
                 word = word._replace(
                     start=ts_map.get_original_time(word.start, chunk_index),
                     end=ts_map.get_original_time(word.end, chunk_index),