From e87fbf8a496699ce79ed6a4d2e52af389eed7197 Mon Sep 17 00:00:00 2001 From: MinorJinx <47308878+MinorJinx@users.noreply.github.com> Date: Thu, 31 Aug 2023 10:19:48 -0500 Subject: [PATCH] Added audio duration after VAD to TranscriptionInfo object (#445) * Added VAD removed audio duration to TranscriptionInfo object Along with the duration of the original audio, this commit adds the seconds of audio removed by the VAD to the returned info obj * Chaning naming for duration_after_vad Instead of the property returning the audio duration removed, it now returns the final duration after the vad. If vad_filter is False or if it doesn't remove any audio, the original duration is returned. --- faster_whisper/transcribe.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/faster_whisper/transcribe.py b/faster_whisper/transcribe.py index ba55adc..7ff27d2 100644 --- a/faster_whisper/transcribe.py +++ b/faster_whisper/transcribe.py @@ -69,6 +69,7 @@ class TranscriptionInfo(NamedTuple): language: str language_probability: float duration: float + duration_after_vad: float all_language_probs: Optional[List[Tuple[str, float]]] transcription_options: TranscriptionOptions vad_options: VadOptions @@ -249,6 +250,7 @@ class WhisperModel: audio = decode_audio(audio, sampling_rate=sampling_rate) duration = audio.shape[0] / sampling_rate + duration_after_vad = duration self.logger.info( "Processing audio with duration %s", format_timestamp(duration) @@ -261,10 +263,11 @@ class WhisperModel: vad_parameters = VadOptions(**vad_parameters) speech_chunks = get_speech_timestamps(audio, vad_parameters) audio = collect_chunks(audio, speech_chunks) + duration_after_vad = audio.shape[0] / sampling_rate self.logger.info( "VAD filter removed %s of audio", - format_timestamp(duration - (audio.shape[0] / sampling_rate)), + format_timestamp(duration - duration_after_vad), ) if self.logger.isEnabledFor(logging.DEBUG): @@ -352,6 +355,7 @@ class WhisperModel: language=language, language_probability=language_probability, duration=duration, + duration_after_vad=duration_after_vad, transcription_options=options, vad_options=vad_parameters, all_language_probs=all_language_probs,