From 01ef12a6a0a56e63dbf5327dea263b1fd253ab3c Mon Sep 17 00:00:00 2001 From: Guillaume Klein Date: Tue, 7 Mar 2023 10:05:04 +0100 Subject: [PATCH] Do not ignore last segment ending with one timestamp See https://github.com/openai/whisper/commit/eab8d920edf3947294c466f3912c24ed4b191264 --- faster_whisper/transcribe.py | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/faster_whisper/transcribe.py b/faster_whisper/transcribe.py index a99d6bc..032bebd 100644 --- a/faster_whisper/transcribe.py +++ b/faster_whisper/transcribe.py @@ -292,6 +292,15 @@ class WhisperModel: ] if len(consecutive_timestamps) > 0: + ended_with_single_timestamp = ( + len(tokens) >= 2 + and tokens[-2] < self.timestamp_begin_id + and tokens[-1] >= self.timestamp_begin_id + ) + + if ended_with_single_timestamp: + consecutive_timestamps.append(len(tokens)) + last_slice = 0 for i, current_slice in enumerate(consecutive_timestamps): sliced_tokens = tokens[last_slice:current_slice] @@ -306,19 +315,19 @@ class WhisperModel: time_offset + end_timestamp_position * self.time_precision ) - last_in_window = i + 1 == len(consecutive_timestamps) - - # Include the last timestamp so that all tokens are included in a segment. - if last_in_window: - sliced_tokens.append(tokens[current_slice]) - yield start_time, end_time, sliced_tokens last_slice = current_slice - last_timestamp_position = ( - tokens[last_slice - 1] - self.timestamp_begin_id - ) - offset += last_timestamp_position * self.input_stride + if ended_with_single_timestamp: + # single timestamp at the end means no speech after the last timestamp. + offset += segment.shape[-1] + else: + # otherwise, ignore the unfinished segment and seek to the last timestamp + last_timestamp_position = ( + tokens[last_slice - 1] - self.timestamp_begin_id + ) + offset += last_timestamp_position * self.input_stride + all_tokens.extend(tokens[: last_slice + 1]) else: