New PR for Faster Whisper: Batching Support, Speed Boosts, and Quality Enhancements (#856)

Batching Support, Speed Boosts, and Quality Enhancements

---------

Co-authored-by: Hargun Mujral <83234565+hargunmujral@users.noreply.github.com>
Co-authored-by: MahmoudAshraf97 <hassouna97.ma@gmail.com>
This commit is contained in:
Jilt Sebastian
2024-07-18 11:48:52 +02:00
committed by GitHub
parent fbcf58bf98
commit eb8390233c
13 changed files with 1697 additions and 423 deletions

View File

@@ -1,5 +1,6 @@
import argparse
import json
import os
from datasets import load_dataset
from evaluate import load
@@ -26,7 +27,9 @@ dataset = load_dataset("librispeech_asr", "clean", split="validation", streaming
# define the evaluation metric
wer_metric = load("wer")
normalizer = EnglishTextNormalizer(json.load(open("normalizer.json")))
with open(os.path.join(os.path.dirname(__file__), "normalizer.json"), "r") as f:
normalizer = EnglishTextNormalizer(json.load(f))
def inference(batch):