Improve language detection when using clip_timestamps (#867)

2024-07-01 17:12:45 +08:00
parent 8d400e9870
commit 8862bee1f8
1 changed files with 18 additions and 7 deletions
--- a/faster_whisper/transcribe.py
+++ b/faster_whisper/transcribe.py
@@ -370,16 +370,27 @@ class WhisperModel:
                    or language_detection_segments < 1
                ):
                    language_detection_segments = 1
-                seek = 0
+                start_timestamp = (
-                detected_language_info = {}
+                    float(clip_timestamps.split(",")[0])
                    if isinstance(clip_timestamps, str)
                    else clip_timestamps[0]
                )
                content_frames = (
                    features.shape[-1] - self.feature_extractor.nb_max_frames
                )
-                while (
+                seek = (
-                    seek <= content_frames
+                    int(start_timestamp * self.frames_per_second)
-                    and seek
+                    if start_timestamp * self.frames_per_second < content_frames
-                    < self.feature_extractor.nb_max_frames * language_detection_segments
+                    else 0
-                ):
+                )
                end_frames = min(
                    seek
                    + self.feature_extractor.nb_max_frames
                    * language_detection_segments,
                    content_frames,
                )
                detected_language_info = {}
                while seek < end_frames:
                    segment = features[
                        :, seek : seek + self.feature_extractor.nb_max_frames
                    ]