From 16141e65d902e5aee1737d39110d05c927208c0a Mon Sep 17 00:00:00 2001
From: trungkienbkhn <trungkienbkhn123@gmail.com>
Date: Thu, 29 Feb 2024 23:08:28 +0700
Subject: [PATCH 1/4] Add pad_or_trim function to handle segment before
 encoding (#705)

---
 faster_whisper/audio.py      | 15 +++++++++++++++
 faster_whisper/transcribe.py |  3 ++-
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/faster_whisper/audio.py b/faster_whisper/audio.py
index 3190619..a597fd8 100644
--- a/faster_whisper/audio.py
+++ b/faster_whisper/audio.py
@@ -102,3 +102,18 @@ def _resample_frames(frames, resampler):
     # Add None to flush the resampler.
     for frame in itertools.chain(frames, [None]):
         yield from resampler.resample(frame)
+
+
+def pad_or_trim(array, length: int, *, axis: int = -1):
+    """
+    Pad or trim the audio array to N_SAMPLES, as expected by the encoder.
+    """
+    if array.shape[axis] > length:
+        array = array.take(indices=range(length), axis=axis)
+
+    if array.shape[axis] < length:
+        pad_widths = [(0, 0)] * array.ndim
+        pad_widths[axis] = (0, length - array.shape[axis])
+        array = np.pad(array, pad_widths)
+
+    return array
diff --git a/faster_whisper/transcribe.py b/faster_whisper/transcribe.py
index c1ea390..bce84d2 100644
--- a/faster_whisper/transcribe.py
+++ b/faster_whisper/transcribe.py
@@ -11,7 +11,7 @@ import ctranslate2
 import numpy as np
 import tokenizers
 
-from faster_whisper.audio import decode_audio
+from faster_whisper.audio import decode_audio, pad_or_trim
 from faster_whisper.feature_extractor import FeatureExtractor
 from faster_whisper.tokenizer import _LANGUAGE_CODES, Tokenizer
 from faster_whisper.utils import download_model, format_timestamp, get_end, get_logger
@@ -492,6 +492,7 @@ class WhisperModel:
             )
             segment = features[:, seek : seek + segment_size]
             segment_duration = segment_size * self.feature_extractor.time_per_frame
+            segment = pad_or_trim(segment, self.feature_extractor.nb_max_frames)
 
             if self.logger.isEnabledFor(logging.DEBUG):
                 self.logger.debug(

From 09cd57e7f32f6bf1396a0f2fe469e46355ba3258 Mon Sep 17 00:00:00 2001
From: Gabriel F <gabriel.rolfsen@gmail.com>
Date: Thu, 29 Feb 2024 13:08:58 -0300
Subject: [PATCH 2/4] Fix typo 'ditil' (#721)

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 570cd66..e81e3a2 100644
--- a/README.md
+++ b/README.md
@@ -160,7 +160,7 @@ segments, _ = model.transcribe("audio.mp3")
 segments = list(segments)  # The transcription will actually run here.
 ```
 ### Faster-distil-whisper
-For usage of `faster-ditil-whisper`, please refer to: https://github.com/guillaumekln/faster-whisper/issues/533
+For usage of `faster-distil-whisper`, please refer to: https://github.com/guillaumekln/faster-whisper/issues/533
 
 ```python
 model_size = "distil-large-v2"

From 5090cc9d0d3048731d63b4e2fe4bf7cd73ccbfdc Mon Sep 17 00:00:00 2001
From: Purfview <69023953+Purfview@users.noreply.github.com>
Date: Thu, 29 Feb 2024 16:59:32 +0000
Subject: [PATCH 3/4] Fix window end heuristic for
 hallucination_silence_threshold (#706)

Removes the wishful heuristic causing more issues than it's fixing.

Same as https://github.com/openai/whisper/pull/2043

Example of the issue: https://github.com/openai/whisper/pull/1838#issuecomment-1960041500
---
 faster_whisper/transcribe.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/faster_whisper/transcribe.py b/faster_whisper/transcribe.py
index bce84d2..d3d5deb 100644
--- a/faster_whisper/transcribe.py
+++ b/faster_whisper/transcribe.py
@@ -661,14 +661,6 @@ class WhisperModel:
                 # skip silence before possible hallucinations
                 if options.hallucination_silence_threshold is not None:
                     threshold = options.hallucination_silence_threshold
-                    if not single_timestamp_ending:
-                        last_word_end = get_end(current_segments)
-                        if last_word_end is not None and last_word_end > time_offset:
-                            remaining_duration = window_end_time - last_word_end
-                            if remaining_duration > threshold:
-                                seek = round(last_word_end * self.frames_per_second)
-                            else:
-                                seek = previous_seek + segment_size
 
                     # if first segment might be a hallucination, skip leading silence
                     first_segment = next_words_segment(current_segments)

From a342b028b7e875465dca14c15533484ccbc9f725 Mon Sep 17 00:00:00 2001
From: trungkienbkhn <trungkienbkhn123@gmail.com>
Date: Fri, 1 Mar 2024 17:32:12 +0700
Subject: [PATCH 4/4] Bump version to 1.0.1 (#725)

---
 faster_whisper/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/faster_whisper/version.py b/faster_whisper/version.py
index 01ad014..3b64d12 100644
--- a/faster_whisper/version.py
+++ b/faster_whisper/version.py
@@ -1,3 +1,3 @@
 """Version information."""
 
-__version__ = "1.0.0"
+__version__ = "1.0.1"