* transcribe: return all language probabilities if requested If return_all_language_probs is True, TranscriptionInfo structure will have a list of tuples reflecting all language probabilities as returned by the model. * transcribe: fix docstring * transcribe: remove return_all_lang_probs parameter
77 lines
2.3 KiB
Python
77 lines
2.3 KiB
Python
import os
|
|
|
|
from faster_whisper import WhisperModel, decode_audio
|
|
|
|
|
|
def test_transcribe(jfk_path):
|
|
model = WhisperModel("tiny")
|
|
segments, info = model.transcribe(jfk_path, word_timestamps=True)
|
|
assert info.all_language_probs is not None
|
|
|
|
assert info.language == "en"
|
|
assert info.language_probability > 0.9
|
|
assert info.duration == 11
|
|
|
|
# Get top language info from all results, which should match the
|
|
# already existing metadata
|
|
top_lang, top_lang_score = info.all_language_probs[0]
|
|
assert info.language == top_lang
|
|
assert abs(info.language_probability - top_lang_score) < 1e-16
|
|
|
|
segments = list(segments)
|
|
|
|
assert len(segments) == 1
|
|
|
|
segment = segments[0]
|
|
|
|
assert segment.text == (
|
|
" And so my fellow Americans ask not what your country can do for you, "
|
|
"ask what you can do for your country."
|
|
)
|
|
|
|
assert segment.text == "".join(word.word for word in segment.words)
|
|
assert segment.start == segment.words[0].start
|
|
assert segment.end == segment.words[-1].end
|
|
|
|
|
|
def test_vad(jfk_path):
|
|
model = WhisperModel("tiny")
|
|
segments, info = model.transcribe(
|
|
jfk_path,
|
|
vad_filter=True,
|
|
vad_parameters=dict(min_silence_duration_ms=500, speech_pad_ms=200),
|
|
)
|
|
segments = list(segments)
|
|
|
|
assert len(segments) == 1
|
|
segment = segments[0]
|
|
|
|
assert segment.text == (
|
|
" And so my fellow Americans ask not what your country can do for you, "
|
|
"ask what you can do for your country."
|
|
)
|
|
|
|
assert 0 < segment.start < 1
|
|
assert 10 < segment.end < 11
|
|
|
|
assert info.vad_options.min_silence_duration_ms == 500
|
|
assert info.vad_options.speech_pad_ms == 200
|
|
|
|
|
|
def test_stereo_diarization(data_dir):
|
|
model = WhisperModel("tiny")
|
|
|
|
audio_path = os.path.join(data_dir, "stereo_diarization.wav")
|
|
left, right = decode_audio(audio_path, split_stereo=True)
|
|
|
|
segments, _ = model.transcribe(left)
|
|
transcription = "".join(segment.text for segment in segments).strip()
|
|
assert transcription == (
|
|
"He began a confused complaint against the wizard, "
|
|
"who had vanished behind the curtain on the left."
|
|
)
|
|
|
|
segments, _ = model.transcribe(right)
|
|
transcription = "".join(segment.text for segment in segments).strip()
|
|
assert transcription == "The horizon seems extremely distant."
|