Accept the audio waveform as an input to transcribe() (#21)

This commit is contained in:
Guillaume Klein
2023-02-28 19:01:31 +01:00
committed by GitHub
parent ed32002aea
commit 873992623c

View File

@@ -93,7 +93,7 @@ class WhisperModel:
def transcribe(
self,
input_file: Union[str, BinaryIO],
audio: Union[str, BinaryIO, np.ndarray],
language: Optional[str] = None,
task: str = "transcribe",
beam_size: int = 5,
@@ -119,7 +119,7 @@ class WhisperModel:
"""Transcribes an input file.
Arguments:
input_file: Path to the input file or a file-like object.
audio: Path to the input file (or a file-like object), or the audio waveform.
language: The language spoken in the audio. It should be a language code such
as "en" or "fr". If not set, the language will be detected in the first 30 seconds
of audio.
@@ -152,9 +152,11 @@ class WhisperModel:
- a generator over transcribed segments
- an instance of AudioInfo
"""
if not isinstance(audio, np.ndarray):
audio = decode_audio(
input_file, sampling_rate=self.feature_extractor.sampling_rate
audio, sampling_rate=self.feature_extractor.sampling_rate
)
features = self.feature_extractor(audio)
if language is None: