Prepend prefix tokens with the initial timestamp token (#358)

This commit is contained in:
Guillaume Klein
2023-07-18 15:22:39 +02:00
committed by GitHub
parent 2a37390fed
commit 0e051a5b77
3 changed files with 21 additions and 1 deletions

View File

@@ -686,6 +686,8 @@ class WhisperModel:
prefix_tokens = tokenizer.encode(" " + prefix.strip())
if len(prefix_tokens) >= self.max_length // 2:
prefix_tokens = prefix_tokens[: self.max_length // 2 - 1]
if not without_timestamps:
prompt.append(tokenizer.timestamp_begin)
prompt.extend(prefix_tokens)
return prompt