Suppress some special tokens when the default set is not used
This commit is contained in:
@@ -33,10 +33,22 @@ class Tokenizer:
|
||||
self.language = None
|
||||
self.language_code = "en"
|
||||
|
||||
@cached_property
|
||||
def transcribe(self) -> int:
|
||||
return self.tokenizer.token_to_id("<|transcribe|>")
|
||||
|
||||
@cached_property
|
||||
def translate(self) -> int:
|
||||
return self.tokenizer.token_to_id("<|translate|>")
|
||||
|
||||
@cached_property
|
||||
def sot(self) -> int:
|
||||
return self.tokenizer.token_to_id("<|startoftranscript|>")
|
||||
|
||||
@cached_property
|
||||
def sot_lm(self) -> int:
|
||||
return self.tokenizer.token_to_id("<|startoflm|>")
|
||||
|
||||
@cached_property
|
||||
def sot_prev(self) -> int:
|
||||
return self.tokenizer.token_to_id("<|startofprev|>")
|
||||
|
||||
Reference in New Issue
Block a user