Compare commits
13 Commits
d86ed9be69
...
v1.0.3
| Author | SHA1 | Date | |
|---|---|---|---|
|
72a8c736e3
|
|||
|
b4fb0f217b
|
|||
|
1a5dbc65e0
|
|||
|
ea8fc74ed2
|
|||
|
c6948654a4
|
|||
|
ffefb2f09e
|
|||
|
1c8a685e9e
|
|||
|
ed1e51fefa
|
|||
|
042800721d
|
|||
|
f71ef945db
|
|||
|
1c93201250
|
|||
|
2ecdc4e607
|
|||
|
204ccb8f3d
|
1
.dockerignore
Normal file
1
.dockerignore
Normal file
@@ -0,0 +1 @@
|
||||
/venv
|
||||
19
Dockerfile
Normal file
19
Dockerfile
Normal file
@@ -0,0 +1,19 @@
|
||||
FROM docker.io/nvidia/cuda:12.0.0-cudnn8-runtime-ubuntu22.04
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y ffmpeg python3 python3-pip git && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY requirements.txt .
|
||||
|
||||
RUN pip3 install --no-cache-dir -r requirements.txt
|
||||
|
||||
COPY . .
|
||||
|
||||
EXPOSE 5000
|
||||
|
||||
# 启动 whisper_fastapi.py
|
||||
ENTRYPOINT ["python3", "whisper_fastapi.py"]
|
||||
@@ -4,4 +4,5 @@ uvicorn[standard]
|
||||
whisper_ctranslate2
|
||||
opencc
|
||||
prometheus-fastapi-instrumentator
|
||||
git+https://github.com/heimoshuiyu/faster-whisper@prompt
|
||||
pydub
|
||||
|
||||
@@ -1,48 +1,49 @@
|
||||
annotated-types==0.7.0
|
||||
anyio==4.4.0
|
||||
av==12.2.0
|
||||
certifi==2024.7.4
|
||||
cffi==1.16.0
|
||||
charset-normalizer==3.3.2
|
||||
click==8.1.7
|
||||
coloredlogs==15.0.1
|
||||
ctranslate2==4.3.1
|
||||
fastapi==0.111.0
|
||||
faster-whisper==1.0.3
|
||||
filelock==3.15.4
|
||||
flatbuffers==24.3.25
|
||||
fsspec==2024.6.1
|
||||
h11==0.14.0
|
||||
httptools==0.6.1
|
||||
huggingface-hub==0.23.4
|
||||
humanfriendly==10.0
|
||||
idna==3.7
|
||||
mpmath==1.3.0
|
||||
numpy==1.26.4
|
||||
onnxruntime==1.18.1
|
||||
OpenCC==1.1.7
|
||||
packaging==24.1
|
||||
prometheus-client==0.18.0
|
||||
prometheus-fastapi-instrumentator==7.0.0
|
||||
protobuf==5.27.2
|
||||
pycparser==2.22
|
||||
pydantic==2.8.2
|
||||
pydantic_core==2.20.1
|
||||
pydub==0.25.1
|
||||
python-dotenv==1.0.1
|
||||
python-multipart==0.0.9
|
||||
PyYAML==6.0.1
|
||||
requests==2.32.3
|
||||
sniffio==1.3.1
|
||||
sounddevice==0.4.7
|
||||
starlette==0.37.2
|
||||
sympy==1.12.1
|
||||
tokenizers==0.19.1
|
||||
tqdm==4.66.4
|
||||
typing_extensions==4.12.2
|
||||
urllib3==2.2.2
|
||||
uvicorn==0.30.1
|
||||
uvloop==0.19.0
|
||||
watchfiles==0.22.0
|
||||
websockets==12.0
|
||||
whisper-ctranslate2==0.4.5
|
||||
annotated-types==0.7.0
|
||||
anyio==4.4.0
|
||||
av==12.3.0
|
||||
certifi==2024.8.30
|
||||
cffi==1.17.1
|
||||
charset-normalizer==3.3.2
|
||||
click==8.1.7
|
||||
coloredlogs==15.0.1
|
||||
ctranslate2==4.4.0
|
||||
exceptiongroup==1.2.2
|
||||
fastapi==0.114.1
|
||||
faster-whisper @ git+https://github.com/heimoshuiyu/faster-whisper@28a4d11a736d8cdeb4655ee5d7e4b4e7ae5ec8e0
|
||||
filelock==3.16.0
|
||||
flatbuffers==24.3.25
|
||||
fsspec==2024.9.0
|
||||
h11==0.14.0
|
||||
httptools==0.6.1
|
||||
huggingface-hub==0.24.6
|
||||
humanfriendly==10.0
|
||||
idna==3.8
|
||||
mpmath==1.3.0
|
||||
numpy==2.1.1
|
||||
onnxruntime==1.19.2
|
||||
OpenCC==1.1.9
|
||||
packaging==24.1
|
||||
prometheus-fastapi-instrumentator==7.0.0
|
||||
prometheus_client==0.20.0
|
||||
protobuf==5.28.0
|
||||
pycparser==2.22
|
||||
pydantic==2.9.1
|
||||
pydantic_core==2.23.3
|
||||
pydub==0.25.1
|
||||
python-dotenv==1.0.1
|
||||
python-multipart==0.0.9
|
||||
PyYAML==6.0.2
|
||||
requests==2.32.3
|
||||
sniffio==1.3.1
|
||||
sounddevice==0.5.0
|
||||
starlette==0.38.5
|
||||
sympy==1.13.2
|
||||
tokenizers==0.20.0
|
||||
tqdm==4.66.5
|
||||
typing_extensions==4.12.2
|
||||
urllib3==2.2.2
|
||||
uvicorn==0.30.6
|
||||
uvloop==0.20.0
|
||||
watchfiles==0.24.0
|
||||
websockets==13.0.1
|
||||
whisper-ctranslate2==0.4.5
|
||||
|
||||
10
start-docker.sh
Executable file
10
start-docker.sh
Executable file
@@ -0,0 +1,10 @@
|
||||
#!/bin/bash
|
||||
|
||||
docker run -d --name whisper-fastapi \
|
||||
--restart unless-stopped \
|
||||
--name whisper-fastapi \
|
||||
-v ~/.cache/huggingface:/root/.cache/huggingface \
|
||||
--gpus all \
|
||||
-p 5000:5000 \
|
||||
docker.io/heimoshuiyu/whisper-fastapi:latest \
|
||||
--model large-v2
|
||||
11
start-podman.sh
Executable file
11
start-podman.sh
Executable file
@@ -0,0 +1,11 @@
|
||||
#!/bin/bash
|
||||
|
||||
podman run -d --name whisper-fastapi \
|
||||
--restart unless-stopped \
|
||||
--name whisper-fastapi \
|
||||
-v ~/.cache/huggingface:/root/.cache/huggingface \
|
||||
--device nvidia.com/gpu=all --security-opt=label=disable \
|
||||
--gpus all \
|
||||
-p 5000:5000 \
|
||||
docker.io/heimoshuiyu/whisper-fastapi:latest \
|
||||
--model large-v2
|
||||
@@ -22,6 +22,7 @@ from fastapi.middleware.cors import CORSMiddleware
|
||||
from src.whisper_ctranslate2.whisper_ctranslate2 import Transcribe
|
||||
from src.whisper_ctranslate2.writers import format_timestamp
|
||||
from faster_whisper.transcribe import Segment, TranscriptionInfo
|
||||
import opencc
|
||||
from prometheus_fastapi_instrumentator import Instrumentator
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
@@ -36,8 +37,9 @@ args = parser.parse_args()
|
||||
app = FastAPI()
|
||||
# Instrument your app with default metrics and expose the metrics
|
||||
Instrumentator().instrument(app).expose(app, endpoint="/konele/metrics")
|
||||
ccc = opencc.OpenCC("t2s.json")
|
||||
|
||||
print("Loading model...")
|
||||
print(f"Loading model to device {args.device}...")
|
||||
transcriber = Transcribe(
|
||||
model_path=args.model,
|
||||
device=args.device,
|
||||
@@ -47,7 +49,7 @@ transcriber = Transcribe(
|
||||
cache_directory=args.cache_dir,
|
||||
local_files_only=args.local_files_only,
|
||||
)
|
||||
print("Model loaded!")
|
||||
print(f"Model loaded to device {transcriber.model.model.device}")
|
||||
|
||||
|
||||
# allow all cors
|
||||
@@ -76,7 +78,7 @@ def tsv_writer(generator: Generator[dict[str, Any], Any, None]):
|
||||
for i, segment in enumerate(generator):
|
||||
start_time = str(round(1000 * segment["start"]))
|
||||
end_time = str(round(1000 * segment["end"]))
|
||||
text = segment["text"]
|
||||
text = segment["text"].strip()
|
||||
yield f"{start_time}\t{end_time}\t{text}\n"
|
||||
|
||||
|
||||
@@ -88,7 +90,7 @@ def srt_writer(generator: Generator[dict[str, Any], Any, None]):
|
||||
end_time = format_timestamp(
|
||||
segment["end"], decimal_marker=",", always_include_hours=True
|
||||
)
|
||||
text = segment["text"]
|
||||
text = segment["text"].strip()
|
||||
yield f"{i}\n{start_time} --> {end_time}\n{text}\n\n"
|
||||
|
||||
|
||||
@@ -97,7 +99,7 @@ def vtt_writer(generator: Generator[dict[str, Any], Any, None]):
|
||||
for i, segment in enumerate(generator):
|
||||
start_time = format_timestamp(segment["start"])
|
||||
end_time = format_timestamp(segment["end"])
|
||||
text = segment["text"]
|
||||
text = segment["text"].strip()
|
||||
yield f"{start_time} --> {end_time}\n{text}\n\n"
|
||||
|
||||
|
||||
@@ -142,6 +144,8 @@ def stream_builder(
|
||||
data = segment._asdict()
|
||||
if data.get('words') is not None:
|
||||
data["words"] = [i._asdict() for i in data["words"]]
|
||||
if info.language == "zh":
|
||||
data["text"] = ccc.convert(data["text"])
|
||||
yield data
|
||||
|
||||
info_dict = info._asdict()
|
||||
@@ -155,6 +159,8 @@ def stream_builder(
|
||||
|
||||
@app.websocket("/k6nele/status")
|
||||
@app.websocket("/konele/status")
|
||||
@app.websocket("/v1/k6nele/status")
|
||||
@app.websocket("/v1/konele/status")
|
||||
async def konele_status(
|
||||
websocket: WebSocket,
|
||||
):
|
||||
@@ -165,6 +171,8 @@ async def konele_status(
|
||||
|
||||
@app.websocket("/k6nele/ws")
|
||||
@app.websocket("/konele/ws")
|
||||
@app.websocket("/v1/k6nele/ws")
|
||||
@app.websocket("/v1/konele/ws")
|
||||
async def konele_ws(
|
||||
websocket: WebSocket,
|
||||
task: Literal["transcribe", "translate"] = "transcribe",
|
||||
@@ -178,15 +186,11 @@ async def konele_ws(
|
||||
# convert lang code format (eg. en-US to en)
|
||||
lang = lang.split("-")[0]
|
||||
|
||||
print("WebSocket client connected, lang is", lang)
|
||||
print("content-type is", content_type)
|
||||
data = b""
|
||||
while True:
|
||||
try:
|
||||
data += await websocket.receive_bytes()
|
||||
print("Received data:", len(data), data[-10:])
|
||||
if data[-3:] == b"EOS":
|
||||
print("End of speech")
|
||||
break
|
||||
except:
|
||||
break
|
||||
@@ -219,7 +223,6 @@ async def konele_ws(
|
||||
result = build_json_result(generator, info)
|
||||
|
||||
text = result.get("text", "")
|
||||
print("result", text)
|
||||
|
||||
await websocket.send_json(
|
||||
{
|
||||
@@ -234,6 +237,8 @@ async def konele_ws(
|
||||
|
||||
@app.post("/k6nele/post")
|
||||
@app.post("/konele/post")
|
||||
@app.post("/v1/k6nele/post")
|
||||
@app.post("/v1/konele/post")
|
||||
async def translateapi(
|
||||
request: Request,
|
||||
task: Literal["transcribe", "translate"] = "transcribe",
|
||||
@@ -242,14 +247,12 @@ async def translateapi(
|
||||
vad_filter: bool = False,
|
||||
):
|
||||
content_type = request.headers.get("Content-Type", "")
|
||||
print("downloading request file", content_type)
|
||||
|
||||
# convert lang code format (eg. en-US to en)
|
||||
lang = lang.split("-")[0]
|
||||
|
||||
splited = [i.strip() for i in content_type.split(",") if "=" in i]
|
||||
info = {k: v for k, v in (i.split("=") for i in splited)}
|
||||
print(info)
|
||||
|
||||
channels = int(info.get("channels", "1"))
|
||||
rate = int(info.get("rate", "16000"))
|
||||
@@ -283,7 +286,6 @@ async def translateapi(
|
||||
result = build_json_result(generator, info)
|
||||
|
||||
text = result.get("text", "")
|
||||
print("result", text)
|
||||
|
||||
return {
|
||||
"status": 0,
|
||||
|
||||
Reference in New Issue
Block a user