New PR for Faster Whisper: Batching Support, Speed Boosts, and Quality Enhancements (#856)

Batching Support, Speed Boosts, and Quality Enhancements --------- Co-authored-by: Hargun Mujral <83234565+hargunmujral@users.noreply.github.com> Co-authored-by: MahmoudAshraf97 <hassouna97.ma@gmail.com>
2024-07-18 11:48:52 +02:00
parent fbcf58bf98
commit eb8390233c
13 changed files with 1697 additions and 423 deletions
--- a/benchmark/wer_benchmark.py
+++ b/benchmark/wer_benchmark.py
@@ -1,5 +1,6 @@
 import argparse
 import json
+import os

 from datasets import load_dataset
 from evaluate import load
@@ -26,7 +27,9 @@ dataset = load_dataset("librispeech_asr", "clean", split="validation", streaming

 # define the evaluation metric
 wer_metric = load("wer")
-normalizer = EnglishTextNormalizer(json.load(open("normalizer.json")))
+
+with open(os.path.join(os.path.dirname(__file__), "normalizer.json"), "r") as f:
+    normalizer = EnglishTextNormalizer(json.load(f))


 def inference(batch):