strip text on tsv and srt output

update faster-whisper to heimoshuiyu(prompt) patched version
add start-podman.sh
2024-09-04 18:03:01 +08:00 · 2024-09-04 18:02:44 +08:00 · 2024-09-04 17:45:59 +08:00 · 2024-08-08 18:13:42 +08:00 · 2024-08-08 18:12:13 +08:00
6 changed files with 164 additions and 52 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -0,0 +1 @@
 /venv
--- a/2
+++ b/2
@@ -1,7 +1,7 @@
 FROM docker.io/nvidia/cuda:12.0.0-cudnn8-runtime-ubuntu22.04
 RUN apt-get update && \
-    apt-get install -y ffmpeg python3 python3-pip && \
+    apt-get install -y ffmpeg python3 python3-pip git && \
    apt-get clean && \
    rm -rf /var/lib/apt/lists/*
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,4 +4,5 @@ uvicorn[standard]
 whisper_ctranslate2
 opencc
 prometheus-fastapi-instrumentator
 git+https://git.yongyuancv.cn/heimoshuiyu/faster-whisper@prompt
 pydub
--- a/requirements_version.txt
+++ b/requirements_version.txt
@@ -1,48 +1,141 @@
-annotated-types==0.7.0
+aiohappyeyeballs==2.4.0
-anyio==4.4.0
+aiohttp==3.10.5
-av==12.2.0
+aiosignal==1.3.1
-certifi==2024.7.4
+alembic==1.13.2
-cffi==1.16.0
+annotated-types==0.7.0
-charset-normalizer==3.3.2
+antlr4-python3-runtime==4.9.3
-click==8.1.7
+anyio==4.4.0
-coloredlogs==15.0.1
+asteroid-filterbanks==0.4.0
-ctranslate2==4.3.1
+async-timeout==4.0.3
-fastapi==0.111.0
+attrs==24.2.0
-faster-whisper==1.0.3
+audioread==3.0.1
-filelock==3.15.4
+certifi==2024.8.30
-flatbuffers==24.3.25
+cffi==1.17.0
-fsspec==2024.6.1
+charset-normalizer==3.3.2
-h11==0.14.0
+click==8.1.7
-httptools==0.6.1
+coloredlogs==15.0.1
-huggingface-hub==0.23.4
+colorlog==6.8.2
-humanfriendly==10.0
+contourpy==1.3.0
-idna==3.7
+ctranslate2==4.3.1
-mpmath==1.3.0
+cycler==0.12.1
-numpy==1.26.4
+decorator==5.1.1
-onnxruntime==1.18.1
+docopt==0.6.2
-OpenCC==1.1.7
+einops==0.8.0
-packaging==24.1
+exceptiongroup==1.2.2
-prometheus-client==0.18.0
+fastapi==0.112.2
-prometheus-fastapi-instrumentator==7.0.0
+faster-whisper @ git+https://git.yongyuancv.cn/heimoshuiyu/faster-whisper@6e42088656e63c4222b8a193e8c783accb2a1df7
-protobuf==5.27.2
+filelock==3.15.4
-pycparser==2.22
+flatbuffers==24.3.25
-pydantic==2.8.2
+fonttools==4.53.1
-pydantic_core==2.20.1
+frozenlist==1.4.1
-pydub==0.25.1
+fsspec==2024.6.1
-python-dotenv==1.0.1
+greenlet==3.0.3
-python-multipart==0.0.9
+h11==0.14.0
-PyYAML==6.0.1
+httptools==0.6.1
-requests==2.32.3
+huggingface-hub==0.24.6
-sniffio==1.3.1
+humanfriendly==10.0
-sounddevice==0.4.7
+HyperPyYAML==1.2.2
-starlette==0.37.2
+idna==3.8
-sympy==1.12.1
+Jinja2==3.1.4
-tokenizers==0.19.1
+joblib==1.4.2
-tqdm==4.66.4
+julius==0.2.7
-typing_extensions==4.12.2
+kiwisolver==1.4.7
-urllib3==2.2.2
+lazy_loader==0.4
-uvicorn==0.30.1
+librosa==0.10.2.post1
-uvloop==0.19.0
+lightning==2.4.0
-watchfiles==0.22.0
+lightning-utilities==0.11.7
-websockets==12.0
+llvmlite==0.43.0
-whisper-ctranslate2==0.4.5
+Mako==1.3.5
 markdown-it-py==3.0.0
 MarkupSafe==2.1.5
 matplotlib==3.9.2
 mdurl==0.1.2
 mpmath==1.3.0
 msgpack==1.0.8
 multidict==6.0.5
 networkx==3.3
 numba==0.60.0
 numpy==1.26.4
 nvidia-cublas-cu12==12.1.3.1
 nvidia-cuda-cupti-cu12==12.1.105
 nvidia-cuda-nvrtc-cu12==12.1.105
 nvidia-cuda-runtime-cu12==12.1.105
 nvidia-cudnn-cu12==9.1.0.70
 nvidia-cufft-cu12==11.0.2.54
 nvidia-curand-cu12==10.3.2.106
 nvidia-cusolver-cu12==11.4.5.107
 nvidia-cusparse-cu12==12.1.0.106
 nvidia-nccl-cu12==2.20.5
 nvidia-nvjitlink-cu12==12.6.68
 nvidia-nvtx-cu12==12.1.105
 omegaconf==2.3.0
 onnxruntime==1.19.2
 OpenCC==1.1.9
 optuna==4.0.0
 packaging==24.1
 pandas==2.2.2
 pillow==10.4.0
 platformdirs==4.2.2
 pooch==1.8.2
 primePy==1.3
 prometheus-fastapi-instrumentator==7.0.0
 prometheus_client==0.20.0
 protobuf==5.28.0
 pyannote.audio==3.3.1
 pyannote.core==5.0.0
 pyannote.database==5.1.0
 pyannote.metrics==3.2.1
 pyannote.pipeline==3.0.1
 pycparser==2.22
 pydantic==2.8.2
 pydantic_core==2.20.1
 pydub==0.25.1
 Pygments==2.18.0
 pyparsing==3.1.4
 python-dateutil==2.9.0.post0
 python-dotenv==1.0.1
 python-multipart==0.0.9
 pytorch-lightning==2.4.0
 pytorch-metric-learning==2.6.0
 pytz==2024.1
 PyYAML==6.0.2
 requests==2.32.3
 rich==13.8.0
 ruamel.yaml==0.18.6
 ruamel.yaml.clib==0.2.8
 scikit-learn==1.5.1
 scipy==1.14.1
 semver==3.0.2
 sentencepiece==0.2.0
 shellingham==1.5.4
 six==1.16.0
 sniffio==1.3.1
 sortedcontainers==2.4.0
 sounddevice==0.5.0
 soundfile==0.12.1
 soxr==0.5.0.post1
 speechbrain==1.0.1
 SQLAlchemy==2.0.33
 starlette==0.38.4
 sympy==1.13.2
 tabulate==0.9.0
 tensorboardX==2.6.2.2
 threadpoolctl==3.5.0
 tokenizers==0.20.0
 torch==2.4.0
 torch-audiomentations==0.11.1
 torch-pitch-shift==1.2.4
 torchaudio==2.4.0
 torchmetrics==1.4.1
 tqdm==4.66.5
 triton==3.0.0
 typer==0.12.5
 typing_extensions==4.12.2
 tzdata==2024.1
 urllib3==2.2.2
 uvicorn==0.30.6
 uvloop==0.20.0
 watchfiles==0.24.0
 websockets==13.0.1
 whisper-ctranslate2==0.4.5
 yarl==1.9.8
--- a/start-podman.sh
+++ b/start-podman.sh
@@ -0,0 +1,11 @@
 #!/bin/bash
 podman run -d --name whisper-fastapi \
    --restart unless-stopped \
    --name whisper-fastapi \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
    --device nvidia.com/gpu=all --security-opt=label=disable \
    --gpus all \
    -p 5000:5000 \
    docker.io/heimoshuiyu/whisper-fastapi:latest \
    --model large-v2
--- a/whisper_fastapi.py
+++ b/whisper_fastapi.py
@@ -78,7 +78,7 @@ def tsv_writer(generator: Generator[dict[str, Any], Any, None]):
    for i, segment in enumerate(generator):
        start_time = str(round(1000 * segment["start"]))
        end_time = str(round(1000 * segment["end"]))
-        text = segment["text"]
+        text = segment["text"].strip()
        yield f"{start_time}\t{end_time}\t{text}\n"
@@ -90,7 +90,7 @@ def srt_writer(generator: Generator[dict[str, Any], Any, None]):
        end_time = format_timestamp(
            segment["end"], decimal_marker=",", always_include_hours=True
        )
-        text = segment["text"]
+        text = segment["text"].strip()
        yield f"{i}\n{start_time} --> {end_time}\n{text}\n\n"
@@ -99,7 +99,7 @@ def vtt_writer(generator: Generator[dict[str, Any], Any, None]):
    for i, segment in enumerate(generator):
        start_time = format_timestamp(segment["start"])
        end_time = format_timestamp(segment["end"])
-        text = segment["text"]
+        text = segment["text"].strip()
        yield f"{start_time} --> {end_time}\n{text}\n\n"
@@ -159,6 +159,8 @@ def stream_builder(
@app.websocket("/k6nele/status")
@app.websocket("/konele/status")
@app.websocket("/v1/k6nele/status")
@app.websocket("/v1/konele/status")
 async def konele_status(
    websocket: WebSocket,
 ):
@@ -169,6 +171,8 @@ async def konele_status(
@app.websocket("/k6nele/ws")
@app.websocket("/konele/ws")
@app.websocket("/v1/k6nele/ws")
@app.websocket("/v1/konele/ws")
 async def konele_ws(
    websocket: WebSocket,
    task: Literal["transcribe", "translate"] = "transcribe",
@@ -233,6 +237,8 @@ async def konele_ws(
@app.post("/k6nele/post")
@app.post("/konele/post")
@app.post("/v1/k6nele/post")
@app.post("/v1/konele/post")
 async def translateapi(
    request: Request,
    task: Literal["transcribe", "translate"] = "transcribe",
Author	SHA1	Message	Date
heimoshuiyu	b4fb0f217b	strip text on tsv and srt output	2024-09-04 18:03:01 +08:00
heimoshuiyu	1a5dbc65e0	update faster-whisper to heimoshuiyu(prompt) patched version	2024-09-04 18:02:44 +08:00
heimoshuiyu	ea8fc74ed2	add start-podman.sh	2024-09-04 17:45:59 +08:00
heimoshuiyu	c6948654a4	add .dockerignore	2024-08-08 18:13:42 +08:00
heimoshuiyu	ffefb2f09e	Add new WebSocket and POST endpoints	2024-08-08 18:12:13 +08:00