From 8246479fda33f3d8eefeaadd8106065ec56b2607 Mon Sep 17 00:00:00 2001 From: Guillaume Klein Date: Mon, 27 Mar 2023 10:19:22 +0200 Subject: [PATCH] Ignore the invalid audio frames (#82) --- faster_whisper/audio.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/faster_whisper/audio.py b/faster_whisper/audio.py index 4cc1564..b6f5709 100644 --- a/faster_whisper/audio.py +++ b/faster_whisper/audio.py @@ -36,6 +36,7 @@ def decode_audio(input_file: Union[str, BinaryIO], sampling_rate: int = 16000): with av.open(input_file, metadata_errors="ignore") as container: frames = container.decode(audio=0) + frames = _ignore_invalid_frames(frames) frames = _group_frames(frames, 500000) frames = _resample_frames(frames, resampler) @@ -50,6 +51,18 @@ def decode_audio(input_file: Union[str, BinaryIO], sampling_rate: int = 16000): return audio.astype(np.float32) / 32768.0 +def _ignore_invalid_frames(frames): + iterator = iter(frames) + + while True: + try: + yield next(iterator) + except StopIteration: + break + except av.error.InvalidDataError: + continue + + def _group_frames(frames, num_samples=None): fifo = av.audio.fifo.AudioFifo()