Support VAD filter (#95)

* Support VAD filter

* Generalize function collect_samples

* Define AudioSegment class

* Only pass prompt and prefix to the first chunk

* Add dict argument vad_parameters

* Fix isort format

* Rename method

* Update README

* Add shortcut when the chunk offset is 0

* Reword readme

* Fix end property

* Concatenate the speech chunks

* Cleanup diff

* Increase default speech pad

* Update README

* Increase default speech pad
This commit is contained in:
Guillaume Klein
2023-04-03 17:22:48 +02:00
committed by GitHub
parent b4c1c57781
commit 19698c95f8
9 changed files with 370 additions and 0 deletions

View File

@@ -27,6 +27,27 @@ def test_transcribe(jfk_path):
assert segment.end == segment.words[-1].end
def test_vad(jfk_path):
model = WhisperModel("tiny")
segments, _ = model.transcribe(
jfk_path,
vad_filter=True,
vad_parameters=dict(min_silence_duration_ms=500),
)
segments = list(segments)
assert len(segments) == 1
segment = segments[0]
assert segment.text == (
" And so my fellow Americans ask not what your country can do for you, "
"ask what you can do for your country."
)
assert 0 < segment.start < 1
assert 10 < segment.end < 11
def test_stereo_diarization(data_dir):
model = WhisperModel("tiny")