Initial commit
This commit is contained in:
36
faster_whisper/audio.py
Normal file
36
faster_whisper/audio.py
Normal file
@@ -0,0 +1,36 @@
|
||||
import av
|
||||
import numpy as np
|
||||
|
||||
|
||||
def decode_audio(input_file, sampling_rate=16000):
|
||||
"""Decodes the audio.
|
||||
|
||||
Args:
|
||||
input_file: Path to the input file or a file-like object.
|
||||
sampling_rate: Resample the audio to this sample rate.
|
||||
|
||||
Returns:
|
||||
A float32 Numpy array.
|
||||
"""
|
||||
fifo = av.audio.fifo.AudioFifo()
|
||||
resampler = av.audio.resampler.AudioResampler(
|
||||
format="s16",
|
||||
layout="mono",
|
||||
rate=sampling_rate,
|
||||
)
|
||||
|
||||
with av.open(input_file) as container:
|
||||
# Decode and resample each audio frame.
|
||||
for frame in container.decode(audio=0):
|
||||
frame.pts = None
|
||||
for new_frame in resampler.resample(frame):
|
||||
fifo.write(new_frame)
|
||||
|
||||
# Flush the resampler.
|
||||
for new_frame in resampler.resample(None):
|
||||
fifo.write(new_frame)
|
||||
|
||||
frame = fifo.read()
|
||||
|
||||
# Convert s16 back to f32.
|
||||
return frame.to_ndarray().flatten().astype(np.float32) / 32768.0
|
||||
Reference in New Issue
Block a user