Initial commit

2023-02-11 10:21:19 +01:00
commit 5216d52d94
9 changed files with 658 additions and 0 deletions
--- a/faster_whisper/audio.py
+++ b/faster_whisper/audio.py
@@ -0,0 +1,36 @@
+import av
+import numpy as np
+
+
+def decode_audio(input_file, sampling_rate=16000):
+    """Decodes the audio.
+
+    Args:
+      input_file: Path to the input file or a file-like object.
+      sampling_rate: Resample the audio to this sample rate.
+
+    Returns:
+      A float32 Numpy array.
+    """
+    fifo = av.audio.fifo.AudioFifo()
+    resampler = av.audio.resampler.AudioResampler(
+        format="s16",
+        layout="mono",
+        rate=sampling_rate,
+    )
+
+    with av.open(input_file) as container:
+        # Decode and resample each audio frame.
+        for frame in container.decode(audio=0):
+            frame.pts = None
+            for new_frame in resampler.resample(frame):
+                fifo.write(new_frame)
+
+        # Flush the resampler.
+        for new_frame in resampler.resample(None):
+            fifo.write(new_frame)
+
+    frame = fifo.read()
+
+    # Convert s16 back to f32.
+    return frame.to_ndarray().flatten().astype(np.float32) / 32768.0