diff --git a/faster_whisper/vad.py b/faster_whisper/vad.py index 080795d..cf14d5c 100644 --- a/faster_whisper/vad.py +++ b/faster_whisper/vad.py @@ -20,7 +20,7 @@ def get_speech_timestamps( max_speech_duration_s: float = float("inf"), min_silence_duration_ms: int = 2000, window_size_samples: int = 1024, - speech_pad_ms: int = 200, + speech_pad_ms: int = 400, ) -> List[dict]: """This method is used for splitting long audios into speech chunks using silero VAD. diff --git a/tests/test_transcribe.py b/tests/test_transcribe.py index 5406535..8bebd2a 100644 --- a/tests/test_transcribe.py +++ b/tests/test_transcribe.py @@ -32,7 +32,7 @@ def test_vad(jfk_path): segments, _ = model.transcribe( jfk_path, vad_filter=True, - vad_parameters=dict(min_silence_duration_ms=500), + vad_parameters=dict(min_silence_duration_ms=500, speech_pad_ms=200), ) segments = list(segments)