Add pad_or_trim function to handle segment before encoding (#705)
This commit is contained in:
@@ -11,7 +11,7 @@ import ctranslate2
|
||||
import numpy as np
|
||||
import tokenizers
|
||||
|
||||
from faster_whisper.audio import decode_audio
|
||||
from faster_whisper.audio import decode_audio, pad_or_trim
|
||||
from faster_whisper.feature_extractor import FeatureExtractor
|
||||
from faster_whisper.tokenizer import _LANGUAGE_CODES, Tokenizer
|
||||
from faster_whisper.utils import download_model, format_timestamp, get_end, get_logger
|
||||
@@ -492,6 +492,7 @@ class WhisperModel:
|
||||
)
|
||||
segment = features[:, seek : seek + segment_size]
|
||||
segment_duration = segment_size * self.feature_extractor.time_per_frame
|
||||
segment = pad_or_trim(segment, self.feature_extractor.nb_max_frames)
|
||||
|
||||
if self.logger.isEnabledFor(logging.DEBUG):
|
||||
self.logger.debug(
|
||||
|
||||
Reference in New Issue
Block a user