Prepend prefix tokens with the initial timestamp token (#358)

This commit is contained in:
Guillaume Klein
2023-07-18 15:22:39 +02:00
committed by GitHub
parent 2a37390fed
commit 0e051a5b77
3 changed files with 21 additions and 1 deletions

View File

@@ -34,6 +34,24 @@ def test_transcribe(jfk_path):
assert segment.end == segment.words[-1].end
def test_prefix_with_timestamps(jfk_path):
model = WhisperModel("tiny")
segments, _ = model.transcribe(jfk_path, prefix="And so my fellow Americans")
segments = list(segments)
assert len(segments) == 1
segment = segments[0]
assert segment.text == (
" And so my fellow Americans ask not what your country can do for you, "
"ask what you can do for your country."
)
assert segment.start == 0
assert 10 < segment.end < 11
def test_vad(jfk_path):
model = WhisperModel("tiny")
segments, info = model.transcribe(