Accept the audio waveform as an input to transcribe() (#21)

This commit is contained in:
Guillaume Klein
2023-02-28 19:01:31 +01:00
committed by GitHub
parent ed32002aea
commit 873992623c

View File

@@ -93,7 +93,7 @@ class WhisperModel:
def transcribe( def transcribe(
self, self,
input_file: Union[str, BinaryIO], audio: Union[str, BinaryIO, np.ndarray],
language: Optional[str] = None, language: Optional[str] = None,
task: str = "transcribe", task: str = "transcribe",
beam_size: int = 5, beam_size: int = 5,
@@ -119,7 +119,7 @@ class WhisperModel:
"""Transcribes an input file. """Transcribes an input file.
Arguments: Arguments:
input_file: Path to the input file or a file-like object. audio: Path to the input file (or a file-like object), or the audio waveform.
language: The language spoken in the audio. It should be a language code such language: The language spoken in the audio. It should be a language code such
as "en" or "fr". If not set, the language will be detected in the first 30 seconds as "en" or "fr". If not set, the language will be detected in the first 30 seconds
of audio. of audio.
@@ -152,9 +152,11 @@ class WhisperModel:
- a generator over transcribed segments - a generator over transcribed segments
- an instance of AudioInfo - an instance of AudioInfo
""" """
if not isinstance(audio, np.ndarray):
audio = decode_audio( audio = decode_audio(
input_file, sampling_rate=self.feature_extractor.sampling_rate audio, sampling_rate=self.feature_extractor.sampling_rate
) )
features = self.feature_extractor(audio) features = self.feature_extractor(audio)
if language is None: if language is None: