Accept the audio waveform as an input to transcribe() (#21)

2023-02-28 19:01:31 +01:00
parent ed32002aea
commit 873992623c
1 changed files with 7 additions and 5 deletions
--- a/faster_whisper/transcribe.py
+++ b/faster_whisper/transcribe.py
@@ -93,7 +93,7 @@ class WhisperModel:

    def transcribe(
        self,
-        input_file: Union[str, BinaryIO],
+        audio: Union[str, BinaryIO, np.ndarray],
        language: Optional[str] = None,
        task: str = "transcribe",
        beam_size: int = 5,
@@ -119,7 +119,7 @@ class WhisperModel:
        """Transcribes an input file.

        Arguments:
-          input_file: Path to the input file or a file-like object.
+          audio: Path to the input file (or a file-like object), or the audio waveform.
          language: The language spoken in the audio. It should be a language code such
            as "en" or "fr". If not set, the language will be detected in the first 30 seconds
            of audio.
@@ -152,9 +152,11 @@ class WhisperModel:
            - a generator over transcribed segments
            - an instance of AudioInfo
        """
+        if not isinstance(audio, np.ndarray):
            audio = decode_audio(
-            input_file, sampling_rate=self.feature_extractor.sampling_rate
+                audio, sampling_rate=self.feature_extractor.sampling_rate
            )
+
        features = self.feature_extractor(audio)

        if language is None: