Compare commits

...

5 Commits

6 changed files with 164 additions and 52 deletions

1
.dockerignore Normal file
View File

@@ -0,0 +1 @@
/venv

View File

@@ -1,7 +1,7 @@
FROM docker.io/nvidia/cuda:12.0.0-cudnn8-runtime-ubuntu22.04 FROM docker.io/nvidia/cuda:12.0.0-cudnn8-runtime-ubuntu22.04
RUN apt-get update && \ RUN apt-get update && \
apt-get install -y ffmpeg python3 python3-pip && \ apt-get install -y ffmpeg python3 python3-pip git && \
apt-get clean && \ apt-get clean && \
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*

View File

@@ -4,4 +4,5 @@ uvicorn[standard]
whisper_ctranslate2 whisper_ctranslate2
opencc opencc
prometheus-fastapi-instrumentator prometheus-fastapi-instrumentator
git+https://git.yongyuancv.cn/heimoshuiyu/faster-whisper@prompt
pydub pydub

View File

@@ -1,48 +1,141 @@
annotated-types==0.7.0 aiohappyeyeballs==2.4.0
anyio==4.4.0 aiohttp==3.10.5
av==12.2.0 aiosignal==1.3.1
certifi==2024.7.4 alembic==1.13.2
cffi==1.16.0 annotated-types==0.7.0
charset-normalizer==3.3.2 antlr4-python3-runtime==4.9.3
click==8.1.7 anyio==4.4.0
coloredlogs==15.0.1 asteroid-filterbanks==0.4.0
ctranslate2==4.3.1 async-timeout==4.0.3
fastapi==0.111.0 attrs==24.2.0
faster-whisper==1.0.3 audioread==3.0.1
filelock==3.15.4 certifi==2024.8.30
flatbuffers==24.3.25 cffi==1.17.0
fsspec==2024.6.1 charset-normalizer==3.3.2
h11==0.14.0 click==8.1.7
httptools==0.6.1 coloredlogs==15.0.1
huggingface-hub==0.23.4 colorlog==6.8.2
humanfriendly==10.0 contourpy==1.3.0
idna==3.7 ctranslate2==4.3.1
mpmath==1.3.0 cycler==0.12.1
numpy==1.26.4 decorator==5.1.1
onnxruntime==1.18.1 docopt==0.6.2
OpenCC==1.1.7 einops==0.8.0
packaging==24.1 exceptiongroup==1.2.2
prometheus-client==0.18.0 fastapi==0.112.2
prometheus-fastapi-instrumentator==7.0.0 faster-whisper @ git+https://git.yongyuancv.cn/heimoshuiyu/faster-whisper@6e42088656e63c4222b8a193e8c783accb2a1df7
protobuf==5.27.2 filelock==3.15.4
pycparser==2.22 flatbuffers==24.3.25
pydantic==2.8.2 fonttools==4.53.1
pydantic_core==2.20.1 frozenlist==1.4.1
pydub==0.25.1 fsspec==2024.6.1
python-dotenv==1.0.1 greenlet==3.0.3
python-multipart==0.0.9 h11==0.14.0
PyYAML==6.0.1 httptools==0.6.1
requests==2.32.3 huggingface-hub==0.24.6
sniffio==1.3.1 humanfriendly==10.0
sounddevice==0.4.7 HyperPyYAML==1.2.2
starlette==0.37.2 idna==3.8
sympy==1.12.1 Jinja2==3.1.4
tokenizers==0.19.1 joblib==1.4.2
tqdm==4.66.4 julius==0.2.7
typing_extensions==4.12.2 kiwisolver==1.4.7
urllib3==2.2.2 lazy_loader==0.4
uvicorn==0.30.1 librosa==0.10.2.post1
uvloop==0.19.0 lightning==2.4.0
watchfiles==0.22.0 lightning-utilities==0.11.7
websockets==12.0 llvmlite==0.43.0
whisper-ctranslate2==0.4.5 Mako==1.3.5
markdown-it-py==3.0.0
MarkupSafe==2.1.5
matplotlib==3.9.2
mdurl==0.1.2
mpmath==1.3.0
msgpack==1.0.8
multidict==6.0.5
networkx==3.3
numba==0.60.0
numpy==1.26.4
nvidia-cublas-cu12==12.1.3.1
nvidia-cuda-cupti-cu12==12.1.105
nvidia-cuda-nvrtc-cu12==12.1.105
nvidia-cuda-runtime-cu12==12.1.105
nvidia-cudnn-cu12==9.1.0.70
nvidia-cufft-cu12==11.0.2.54
nvidia-curand-cu12==10.3.2.106
nvidia-cusolver-cu12==11.4.5.107
nvidia-cusparse-cu12==12.1.0.106
nvidia-nccl-cu12==2.20.5
nvidia-nvjitlink-cu12==12.6.68
nvidia-nvtx-cu12==12.1.105
omegaconf==2.3.0
onnxruntime==1.19.2
OpenCC==1.1.9
optuna==4.0.0
packaging==24.1
pandas==2.2.2
pillow==10.4.0
platformdirs==4.2.2
pooch==1.8.2
primePy==1.3
prometheus-fastapi-instrumentator==7.0.0
prometheus_client==0.20.0
protobuf==5.28.0
pyannote.audio==3.3.1
pyannote.core==5.0.0
pyannote.database==5.1.0
pyannote.metrics==3.2.1
pyannote.pipeline==3.0.1
pycparser==2.22
pydantic==2.8.2
pydantic_core==2.20.1
pydub==0.25.1
Pygments==2.18.0
pyparsing==3.1.4
python-dateutil==2.9.0.post0
python-dotenv==1.0.1
python-multipart==0.0.9
pytorch-lightning==2.4.0
pytorch-metric-learning==2.6.0
pytz==2024.1
PyYAML==6.0.2
requests==2.32.3
rich==13.8.0
ruamel.yaml==0.18.6
ruamel.yaml.clib==0.2.8
scikit-learn==1.5.1
scipy==1.14.1
semver==3.0.2
sentencepiece==0.2.0
shellingham==1.5.4
six==1.16.0
sniffio==1.3.1
sortedcontainers==2.4.0
sounddevice==0.5.0
soundfile==0.12.1
soxr==0.5.0.post1
speechbrain==1.0.1
SQLAlchemy==2.0.33
starlette==0.38.4
sympy==1.13.2
tabulate==0.9.0
tensorboardX==2.6.2.2
threadpoolctl==3.5.0
tokenizers==0.20.0
torch==2.4.0
torch-audiomentations==0.11.1
torch-pitch-shift==1.2.4
torchaudio==2.4.0
torchmetrics==1.4.1
tqdm==4.66.5
triton==3.0.0
typer==0.12.5
typing_extensions==4.12.2
tzdata==2024.1
urllib3==2.2.2
uvicorn==0.30.6
uvloop==0.20.0
watchfiles==0.24.0
websockets==13.0.1
whisper-ctranslate2==0.4.5
yarl==1.9.8

11
start-podman.sh Executable file
View File

@@ -0,0 +1,11 @@
#!/bin/bash
podman run -d --name whisper-fastapi \
--restart unless-stopped \
--name whisper-fastapi \
-v ~/.cache/huggingface:/root/.cache/huggingface \
--device nvidia.com/gpu=all --security-opt=label=disable \
--gpus all \
-p 5000:5000 \
docker.io/heimoshuiyu/whisper-fastapi:latest \
--model large-v2

View File

@@ -78,7 +78,7 @@ def tsv_writer(generator: Generator[dict[str, Any], Any, None]):
for i, segment in enumerate(generator): for i, segment in enumerate(generator):
start_time = str(round(1000 * segment["start"])) start_time = str(round(1000 * segment["start"]))
end_time = str(round(1000 * segment["end"])) end_time = str(round(1000 * segment["end"]))
text = segment["text"] text = segment["text"].strip()
yield f"{start_time}\t{end_time}\t{text}\n" yield f"{start_time}\t{end_time}\t{text}\n"
@@ -90,7 +90,7 @@ def srt_writer(generator: Generator[dict[str, Any], Any, None]):
end_time = format_timestamp( end_time = format_timestamp(
segment["end"], decimal_marker=",", always_include_hours=True segment["end"], decimal_marker=",", always_include_hours=True
) )
text = segment["text"] text = segment["text"].strip()
yield f"{i}\n{start_time} --> {end_time}\n{text}\n\n" yield f"{i}\n{start_time} --> {end_time}\n{text}\n\n"
@@ -99,7 +99,7 @@ def vtt_writer(generator: Generator[dict[str, Any], Any, None]):
for i, segment in enumerate(generator): for i, segment in enumerate(generator):
start_time = format_timestamp(segment["start"]) start_time = format_timestamp(segment["start"])
end_time = format_timestamp(segment["end"]) end_time = format_timestamp(segment["end"])
text = segment["text"] text = segment["text"].strip()
yield f"{start_time} --> {end_time}\n{text}\n\n" yield f"{start_time} --> {end_time}\n{text}\n\n"
@@ -159,6 +159,8 @@ def stream_builder(
@app.websocket("/k6nele/status") @app.websocket("/k6nele/status")
@app.websocket("/konele/status") @app.websocket("/konele/status")
@app.websocket("/v1/k6nele/status")
@app.websocket("/v1/konele/status")
async def konele_status( async def konele_status(
websocket: WebSocket, websocket: WebSocket,
): ):
@@ -169,6 +171,8 @@ async def konele_status(
@app.websocket("/k6nele/ws") @app.websocket("/k6nele/ws")
@app.websocket("/konele/ws") @app.websocket("/konele/ws")
@app.websocket("/v1/k6nele/ws")
@app.websocket("/v1/konele/ws")
async def konele_ws( async def konele_ws(
websocket: WebSocket, websocket: WebSocket,
task: Literal["transcribe", "translate"] = "transcribe", task: Literal["transcribe", "translate"] = "transcribe",
@@ -233,6 +237,8 @@ async def konele_ws(
@app.post("/k6nele/post") @app.post("/k6nele/post")
@app.post("/konele/post") @app.post("/konele/post")
@app.post("/v1/k6nele/post")
@app.post("/v1/konele/post")
async def translateapi( async def translateapi(
request: Request, request: Request,
task: Literal["transcribe", "translate"] = "transcribe", task: Literal["transcribe", "translate"] = "transcribe",