Support VAD filter (#95)

* Support VAD filter * Generalize function collect_samples * Define AudioSegment class * Only pass prompt and prefix to the first chunk * Add dict argument vad_parameters * Fix isort format * Rename method * Update README * Add shortcut when the chunk offset is 0 * Reword readme * Fix end property * Concatenate the speech chunks * Cleanup diff * Increase default speech pad * Update README * Increase default speech pad
2023-04-03 17:22:48 +02:00
parent b4c1c57781
commit 19698c95f8
9 changed files with 370 additions and 0 deletions
--- a/tests/test_transcribe.py
+++ b/tests/test_transcribe.py
@@ -27,6 +27,27 @@ def test_transcribe(jfk_path):
    assert segment.end == segment.words[-1].end


+def test_vad(jfk_path):
+    model = WhisperModel("tiny")
+    segments, _ = model.transcribe(
+        jfk_path,
+        vad_filter=True,
+        vad_parameters=dict(min_silence_duration_ms=500),
+    )
+    segments = list(segments)
+
+    assert len(segments) == 1
+    segment = segments[0]
+
+    assert segment.text == (
+        " And so my fellow Americans ask not what your country can do for you, "
+        "ask what you can do for your country."
+    )
+
+    assert 0 < segment.start < 1
+    assert 10 < segment.end < 11
+
+
 def test_stereo_diarization(data_dir):
    model = WhisperModel("tiny")