Add benchmarking logic for memory, wer and speed (#773)
This commit is contained in:
BIN
benchmark/benchmark.m4a
Normal file
BIN
benchmark/benchmark.m4a
Normal file
Binary file not shown.
94
benchmark/memory_benchmark.py
Normal file
94
benchmark/memory_benchmark.py
Normal file
@@ -0,0 +1,94 @@
|
|||||||
|
import argparse
|
||||||
|
import time
|
||||||
|
|
||||||
|
from typing import Callable
|
||||||
|
|
||||||
|
import py3nvml.py3nvml as nvml
|
||||||
|
|
||||||
|
from memory_profiler import memory_usage
|
||||||
|
from utils import MyThread, get_logger, inference
|
||||||
|
|
||||||
|
logger = get_logger("faster-whisper")
|
||||||
|
parser = argparse.ArgumentParser(description="Memory benchmark")
|
||||||
|
parser.add_argument(
|
||||||
|
"--gpu_memory", action="store_true", help="Measure GPU memory usage"
|
||||||
|
)
|
||||||
|
parser.add_argument("--device-index", type=int, default=0, help="GPU device index")
|
||||||
|
parser.add_argument(
|
||||||
|
"--interval",
|
||||||
|
type=float,
|
||||||
|
default=0.5,
|
||||||
|
help="Interval at which measurements are collected",
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
device_idx = args.device_index
|
||||||
|
interval = args.interval
|
||||||
|
|
||||||
|
|
||||||
|
def measure_memory(func: Callable[[], None]):
|
||||||
|
if args.gpu_memory:
|
||||||
|
logger.info(
|
||||||
|
"Measuring maximum GPU memory usage on GPU device."
|
||||||
|
" Make sure to not have additional processes running on the same GPU."
|
||||||
|
)
|
||||||
|
# init nvml
|
||||||
|
nvml.nvmlInit()
|
||||||
|
handle = nvml.nvmlDeviceGetHandleByIndex(device_idx)
|
||||||
|
gpu_name = nvml.nvmlDeviceGetName(handle)
|
||||||
|
gpu_memory_limit = nvml.nvmlDeviceGetMemoryInfo(handle).total >> 20
|
||||||
|
gpu_power_limit = nvml.nvmlDeviceGetPowerManagementLimit(handle) / 1000.0
|
||||||
|
info = {"gpu_memory_usage": [], "gpu_power_usage": []}
|
||||||
|
|
||||||
|
def _get_gpu_info():
|
||||||
|
while True:
|
||||||
|
info["gpu_memory_usage"].append(
|
||||||
|
nvml.nvmlDeviceGetMemoryInfo(handle).used >> 20
|
||||||
|
)
|
||||||
|
info["gpu_power_usage"].append(
|
||||||
|
nvml.nvmlDeviceGetPowerUsage(handle) / 1000
|
||||||
|
)
|
||||||
|
time.sleep(interval)
|
||||||
|
|
||||||
|
if stop:
|
||||||
|
break
|
||||||
|
|
||||||
|
return info
|
||||||
|
|
||||||
|
stop = False
|
||||||
|
thread = MyThread(_get_gpu_info, params=())
|
||||||
|
thread.start()
|
||||||
|
func()
|
||||||
|
stop = True
|
||||||
|
thread.join()
|
||||||
|
result = thread.get_result()
|
||||||
|
|
||||||
|
# shutdown nvml
|
||||||
|
nvml.nvmlShutdown()
|
||||||
|
max_memory_usage = max(result["gpu_memory_usage"])
|
||||||
|
max_power_usage = max(result["gpu_power_usage"])
|
||||||
|
print("GPU name: %s" % gpu_name)
|
||||||
|
print("GPU device index: %s" % device_idx)
|
||||||
|
print(
|
||||||
|
"Maximum GPU memory usage: %dMiB / %dMiB (%.2f%%)"
|
||||||
|
% (
|
||||||
|
max_memory_usage,
|
||||||
|
gpu_memory_limit,
|
||||||
|
(max_memory_usage / gpu_memory_limit) * 100,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
print(
|
||||||
|
"Maximum GPU power usage: %dW / %dW (%.2f%%)"
|
||||||
|
% (
|
||||||
|
max_power_usage,
|
||||||
|
gpu_power_limit,
|
||||||
|
(max_power_usage / gpu_power_limit) * 100,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.info("Measuring maximum increase of memory usage.")
|
||||||
|
max_usage = memory_usage(func, max_usage=True, interval=interval)
|
||||||
|
print("Maximum increase of RAM memory usage: %d MiB" % max_usage)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
measure_memory(inference)
|
||||||
1742
benchmark/normalizer.json
Normal file
1742
benchmark/normalizer.json
Normal file
File diff suppressed because it is too large
Load Diff
6
benchmark/requirements.benchmark.txt
Normal file
6
benchmark/requirements.benchmark.txt
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
transformers
|
||||||
|
jiwer
|
||||||
|
evaluate
|
||||||
|
datasets
|
||||||
|
memory_profiler
|
||||||
|
py3nvml
|
||||||
31
benchmark/speed_benchmark.py
Normal file
31
benchmark/speed_benchmark.py
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
import argparse
|
||||||
|
import timeit
|
||||||
|
|
||||||
|
from typing import Callable
|
||||||
|
|
||||||
|
from utils import inference
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description="Speed benchmark")
|
||||||
|
parser.add_argument(
|
||||||
|
"--repeat",
|
||||||
|
type=int,
|
||||||
|
default=3,
|
||||||
|
help="Times an experiment will be run.",
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
def measure_speed(func: Callable[[], None]):
|
||||||
|
# as written in https://docs.python.org/3/library/timeit.html#timeit.Timer.repeat,
|
||||||
|
# min should be taken rather than the average
|
||||||
|
runtimes = timeit.repeat(
|
||||||
|
func,
|
||||||
|
repeat=args.repeat,
|
||||||
|
number=10,
|
||||||
|
)
|
||||||
|
print(runtimes)
|
||||||
|
print("Min execution time: %.3fs" % (min(runtimes) / 10.0))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
measure_speed(inference)
|
||||||
39
benchmark/utils.py
Normal file
39
benchmark/utils.py
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
import logging
|
||||||
|
|
||||||
|
from threading import Thread
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from faster_whisper import WhisperModel
|
||||||
|
|
||||||
|
model_path = "large-v3"
|
||||||
|
model = WhisperModel(model_path, device="cuda")
|
||||||
|
|
||||||
|
|
||||||
|
def inference():
|
||||||
|
segments, info = model.transcribe("benchmark.m4a", language="fr")
|
||||||
|
for segment in segments:
|
||||||
|
print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
|
||||||
|
|
||||||
|
|
||||||
|
def get_logger(name: Optional[str] = None) -> logging.Logger:
|
||||||
|
formatter = logging.Formatter("%(levelname)s: %(message)s")
|
||||||
|
logger = logging.getLogger(name)
|
||||||
|
logger.setLevel(logging.DEBUG)
|
||||||
|
handler = logging.StreamHandler()
|
||||||
|
handler.setFormatter(formatter)
|
||||||
|
logger.addHandler(handler)
|
||||||
|
return logger
|
||||||
|
|
||||||
|
|
||||||
|
class MyThread(Thread):
|
||||||
|
def __init__(self, func, params):
|
||||||
|
super(MyThread, self).__init__()
|
||||||
|
self.func = func
|
||||||
|
self.params = params
|
||||||
|
self.result = None
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
self.result = self.func(*self.params)
|
||||||
|
|
||||||
|
def get_result(self):
|
||||||
|
return self.result
|
||||||
61
benchmark/wer_benchmark.py
Normal file
61
benchmark/wer_benchmark.py
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
|
||||||
|
from datasets import load_dataset
|
||||||
|
from evaluate import load
|
||||||
|
from tqdm import tqdm
|
||||||
|
from transformers.models.whisper.english_normalizer import EnglishTextNormalizer
|
||||||
|
|
||||||
|
from faster_whisper import WhisperModel
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description="WER benchmark")
|
||||||
|
parser.add_argument(
|
||||||
|
"--audio_numb",
|
||||||
|
type=int,
|
||||||
|
default=None,
|
||||||
|
help="Specify the number of validation audio files in the dataset."
|
||||||
|
" Set to None to retrieve all audio files.",
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
model_path = "large-v3"
|
||||||
|
model = WhisperModel(model_path, device="cuda")
|
||||||
|
|
||||||
|
# load the dataset with streaming mode
|
||||||
|
dataset = load_dataset("librispeech_asr", "clean", split="validation", streaming=True)
|
||||||
|
|
||||||
|
# define the evaluation metric
|
||||||
|
wer_metric = load("wer")
|
||||||
|
normalizer = EnglishTextNormalizer(json.load(open("normalizer.json")))
|
||||||
|
|
||||||
|
|
||||||
|
def inference(batch):
|
||||||
|
batch["transcription"] = []
|
||||||
|
for sample in batch["audio"]:
|
||||||
|
segments, info = model.transcribe(sample["array"], language="en")
|
||||||
|
batch["transcription"].append("".join([segment.text for segment in segments]))
|
||||||
|
batch["reference"] = batch["text"]
|
||||||
|
return batch
|
||||||
|
|
||||||
|
|
||||||
|
dataset = dataset.map(function=inference, batched=True, batch_size=16)
|
||||||
|
|
||||||
|
all_transcriptions = []
|
||||||
|
all_references = []
|
||||||
|
|
||||||
|
# iterate over the dataset and run inference
|
||||||
|
for i, result in tqdm(enumerate(dataset), desc="Evaluating..."):
|
||||||
|
all_transcriptions.append(result["transcription"])
|
||||||
|
all_references.append(result["reference"])
|
||||||
|
if args.audio_numb and i == (args.audio_numb - 1):
|
||||||
|
break
|
||||||
|
|
||||||
|
# normalize predictions and references
|
||||||
|
all_transcriptions = [normalizer(transcription) for transcription in all_transcriptions]
|
||||||
|
all_references = [normalizer(reference) for reference in all_references]
|
||||||
|
|
||||||
|
# compute the WER metric
|
||||||
|
wer = 100 * wer_metric.compute(
|
||||||
|
predictions=all_transcriptions, references=all_references
|
||||||
|
)
|
||||||
|
print("WER: %.3f" % wer)
|
||||||
Reference in New Issue
Block a user