diff --git a/faster_whisper/transcribe.py b/faster_whisper/transcribe.py index 34cd271..337cc42 100644 --- a/faster_whisper/transcribe.py +++ b/faster_whisper/transcribe.py @@ -280,6 +280,7 @@ class WhisperModel: clip_timestamps: Union[str, List[float]] Comma-separated list start,end,start,end,... timestamps (in seconds) of clips to process. The last end timestamp defaults to the end of the file. + vad_filter will be ignored if clip_timestamps is used. hallucination_silence_threshold: Optional[float] When word_timestamps is True, skip silent periods longer than this threshold (in seconds) when a possible hallucination is detected @@ -305,7 +306,7 @@ class WhisperModel: "Processing audio with duration %s", format_timestamp(duration) ) - if vad_filter: + if vad_filter and clip_timestamps == "0": if vad_parameters is None: vad_parameters = VadOptions() elif isinstance(vad_parameters, dict):