diff --git a/faster_whisper/transcribe.py b/faster_whisper/transcribe.py index 39d25d5..a685775 100644 --- a/faster_whisper/transcribe.py +++ b/faster_whisper/transcribe.py @@ -483,9 +483,6 @@ class WhisperModel: seek += segment_size - if not options.condition_on_previous_text or temperature > 0.5: - prompt_reset_since = len(all_tokens) - if options.word_timestamps: self.add_word_timestamps( current_segments, @@ -538,6 +535,9 @@ class WhisperModel: ), ) + if not options.condition_on_previous_text or temperature > 0.5: + prompt_reset_since = len(all_tokens) + def encode(self, features: np.ndarray) -> ctranslate2.StorageView: # When the model is running on multiple GPUs, the encoder output should be moved # to the CPU since we don't know which GPU will handle the next job.