From b0022b3283232b2b9f19262360cd80ec9975aeb4 Mon Sep 17 00:00:00 2001 From: "Fernando O. Gallego" Date: Wed, 12 Apr 2023 00:06:03 +0200 Subject: [PATCH] Update decoding.py (#1155) * Update decoding.py Following the suggestions of @Jeronymous in https://github.com/openai/whisper/pull/914 and https://github.com/openai/whisper/discussions/924, it solves the problem of endless loop. * Removed blank line and whitespaces in empty lines. * Suggested changes according to the linter --------- Co-authored-by: Jong Wook Kim --- whisper/decoding.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/whisper/decoding.py b/whisper/decoding.py index 81cd845..2592ba9 100644 --- a/whisper/decoding.py +++ b/whisper/decoding.py @@ -471,6 +471,13 @@ class ApplyTimestampRules(LogitFilter): # timestamps shouldn't decrease; forbid timestamp tokens smaller than the last logits[k, self.tokenizer.timestamp_begin : timestamps[-1]] = -np.inf + # to force that timestamps are strictly increasing + if last_was_timestamp and not penultimate_was_timestamp: + timestamp_last = timestamps[-1] + else: + timestamp_last = timestamps[-1] + 1 + logits[k, self.tokenizer.timestamp_begin : timestamp_last] = -np.inf + if tokens.shape[1] == self.sample_begin: # suppress generating non-timestamp tokens at the beginning logits[:, : self.tokenizer.timestamp_begin] = -np.inf