Files
faster-whisper/faster_whisper/audio.py
Guillaume Klein 5216d52d94 Initial commit
2023-02-11 10:21:19 +01:00

37 lines
968 B
Python

import av
import numpy as np
def decode_audio(input_file, sampling_rate=16000):
"""Decodes the audio.
Args:
input_file: Path to the input file or a file-like object.
sampling_rate: Resample the audio to this sample rate.
Returns:
A float32 Numpy array.
"""
fifo = av.audio.fifo.AudioFifo()
resampler = av.audio.resampler.AudioResampler(
format="s16",
layout="mono",
rate=sampling_rate,
)
with av.open(input_file) as container:
# Decode and resample each audio frame.
for frame in container.decode(audio=0):
frame.pts = None
for new_frame in resampler.resample(frame):
fifo.write(new_frame)
# Flush the resampler.
for new_frame in resampler.resample(None):
fifo.write(new_frame)
frame = fifo.read()
# Convert s16 back to f32.
return frame.to_ndarray().flatten().astype(np.float32) / 32768.0