Added audio duration after VAD to TranscriptionInfo object (#445)
* Added VAD removed audio duration to TranscriptionInfo object Along with the duration of the original audio, this commit adds the seconds of audio removed by the VAD to the returned info obj * Chaning naming for duration_after_vad Instead of the property returning the audio duration removed, it now returns the final duration after the vad. If vad_filter is False or if it doesn't remove any audio, the original duration is returned.
This commit is contained in:
@@ -69,6 +69,7 @@ class TranscriptionInfo(NamedTuple):
|
||||
language: str
|
||||
language_probability: float
|
||||
duration: float
|
||||
duration_after_vad: float
|
||||
all_language_probs: Optional[List[Tuple[str, float]]]
|
||||
transcription_options: TranscriptionOptions
|
||||
vad_options: VadOptions
|
||||
@@ -249,6 +250,7 @@ class WhisperModel:
|
||||
audio = decode_audio(audio, sampling_rate=sampling_rate)
|
||||
|
||||
duration = audio.shape[0] / sampling_rate
|
||||
duration_after_vad = duration
|
||||
|
||||
self.logger.info(
|
||||
"Processing audio with duration %s", format_timestamp(duration)
|
||||
@@ -261,10 +263,11 @@ class WhisperModel:
|
||||
vad_parameters = VadOptions(**vad_parameters)
|
||||
speech_chunks = get_speech_timestamps(audio, vad_parameters)
|
||||
audio = collect_chunks(audio, speech_chunks)
|
||||
duration_after_vad = audio.shape[0] / sampling_rate
|
||||
|
||||
self.logger.info(
|
||||
"VAD filter removed %s of audio",
|
||||
format_timestamp(duration - (audio.shape[0] / sampling_rate)),
|
||||
format_timestamp(duration - duration_after_vad),
|
||||
)
|
||||
|
||||
if self.logger.isEnabledFor(logging.DEBUG):
|
||||
@@ -352,6 +355,7 @@ class WhisperModel:
|
||||
language=language,
|
||||
language_probability=language_probability,
|
||||
duration=duration,
|
||||
duration_after_vad=duration_after_vad,
|
||||
transcription_options=options,
|
||||
vad_options=vad_parameters,
|
||||
all_language_probs=all_language_probs,
|
||||
|
||||
Reference in New Issue
Block a user