95 lines
2.9 KiB
Python
95 lines
2.9 KiB
Python
import argparse
|
|
import time
|
|
|
|
from typing import Callable
|
|
|
|
import py3nvml.py3nvml as nvml
|
|
|
|
from memory_profiler import memory_usage
|
|
from utils import MyThread, get_logger, inference
|
|
|
|
logger = get_logger("faster-whisper")
|
|
parser = argparse.ArgumentParser(description="Memory benchmark")
|
|
parser.add_argument(
|
|
"--gpu_memory", action="store_true", help="Measure GPU memory usage"
|
|
)
|
|
parser.add_argument("--device-index", type=int, default=0, help="GPU device index")
|
|
parser.add_argument(
|
|
"--interval",
|
|
type=float,
|
|
default=0.5,
|
|
help="Interval at which measurements are collected",
|
|
)
|
|
args = parser.parse_args()
|
|
device_idx = args.device_index
|
|
interval = args.interval
|
|
|
|
|
|
def measure_memory(func: Callable[[], None]):
|
|
if args.gpu_memory:
|
|
logger.info(
|
|
"Measuring maximum GPU memory usage on GPU device."
|
|
" Make sure to not have additional processes running on the same GPU."
|
|
)
|
|
# init nvml
|
|
nvml.nvmlInit()
|
|
handle = nvml.nvmlDeviceGetHandleByIndex(device_idx)
|
|
gpu_name = nvml.nvmlDeviceGetName(handle)
|
|
gpu_memory_limit = nvml.nvmlDeviceGetMemoryInfo(handle).total >> 20
|
|
gpu_power_limit = nvml.nvmlDeviceGetPowerManagementLimit(handle) / 1000.0
|
|
info = {"gpu_memory_usage": [], "gpu_power_usage": []}
|
|
|
|
def _get_gpu_info():
|
|
while True:
|
|
info["gpu_memory_usage"].append(
|
|
nvml.nvmlDeviceGetMemoryInfo(handle).used >> 20
|
|
)
|
|
info["gpu_power_usage"].append(
|
|
nvml.nvmlDeviceGetPowerUsage(handle) / 1000
|
|
)
|
|
time.sleep(interval)
|
|
|
|
if stop:
|
|
break
|
|
|
|
return info
|
|
|
|
stop = False
|
|
thread = MyThread(_get_gpu_info, params=())
|
|
thread.start()
|
|
func()
|
|
stop = True
|
|
thread.join()
|
|
result = thread.get_result()
|
|
|
|
# shutdown nvml
|
|
nvml.nvmlShutdown()
|
|
max_memory_usage = max(result["gpu_memory_usage"])
|
|
max_power_usage = max(result["gpu_power_usage"])
|
|
print("GPU name: %s" % gpu_name)
|
|
print("GPU device index: %s" % device_idx)
|
|
print(
|
|
"Maximum GPU memory usage: %dMiB / %dMiB (%.2f%%)"
|
|
% (
|
|
max_memory_usage,
|
|
gpu_memory_limit,
|
|
(max_memory_usage / gpu_memory_limit) * 100,
|
|
)
|
|
)
|
|
print(
|
|
"Maximum GPU power usage: %dW / %dW (%.2f%%)"
|
|
% (
|
|
max_power_usage,
|
|
gpu_power_limit,
|
|
(max_power_usage / gpu_power_limit) * 100,
|
|
)
|
|
)
|
|
else:
|
|
logger.info("Measuring maximum increase of memory usage.")
|
|
max_usage = memory_usage(func, max_usage=True, interval=interval)
|
|
print("Maximum increase of RAM memory usage: %d MiB" % max_usage)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
measure_memory(inference)
|