Compare commits

...

4 Commits

Author SHA1 Message Date
bd2c6b95cf update faster-whisper 2024-11-28 18:52:00 +08:00
0e46bd91d4 format code 2024-11-21 22:45:02 +08:00
99272b230f Upgrade Dependency 2024-11-21 22:44:49 +08:00
3c01a76405 Convert Traditional Chinese to Simplified Chinese 2024-11-21 22:44:27 +08:00
3 changed files with 25 additions and 22 deletions

View File

@@ -4,5 +4,5 @@ uvicorn[standard]
whisper_ctranslate2 whisper_ctranslate2
opencc opencc
prometheus-fastapi-instrumentator prometheus-fastapi-instrumentator
git+https://github.com/SYSTRAN/faster-whisper@be9fb36ed356b9e299b125de6ee91862e0ac9038 git+https://github.com/heimoshuiyu/faster-whisper@a759f5f48f5ef5b79461a6461966eafe9df088a9
pydub pydub

View File

@@ -9,7 +9,7 @@ coloredlogs==15.0.1
ctranslate2==4.5.0 ctranslate2==4.5.0
exceptiongroup==1.2.2 exceptiongroup==1.2.2
fastapi==0.115.5 fastapi==0.115.5
faster-whisper @ git+https://github.com/SYSTRAN/faster-whisper@be9fb36ed356b9e299b125de6ee91862e0ac9038 faster-whisper @ git+https://github.com/heimoshuiyu/faster-whisper@a759f5f48f5ef5b79461a6461966eafe9df088a9
filelock==3.16.1 filelock==3.16.1
flatbuffers==24.3.25 flatbuffers==24.3.25
fsspec==2024.10.0 fsspec==2024.10.0
@@ -20,15 +20,15 @@ humanfriendly==10.0
idna==3.10 idna==3.10
mpmath==1.3.0 mpmath==1.3.0
numpy==2.1.3 numpy==2.1.3
onnxruntime==1.20.0 onnxruntime==1.20.1
OpenCC==1.1.9 OpenCC==1.1.9
packaging==24.2 packaging==24.2
prometheus-fastapi-instrumentator==7.0.0 prometheus-fastapi-instrumentator==7.0.0
prometheus_client==0.21.0 prometheus_client==0.21.0
protobuf==5.28.3 protobuf==5.28.3
pycparser==2.22 pycparser==2.22
pydantic==2.9.2 pydantic==2.10.1
pydantic_core==2.23.4 pydantic_core==2.27.1
pydub==0.25.1 pydub==0.25.1
python-dotenv==1.0.1 python-dotenv==1.0.1
python-multipart==0.0.17 python-multipart==0.0.17
@@ -36,14 +36,14 @@ PyYAML==6.0.2
requests==2.32.3 requests==2.32.3
sniffio==1.3.1 sniffio==1.3.1
sounddevice==0.5.1 sounddevice==0.5.1
starlette==0.41.2 starlette==0.41.3
sympy==1.13.3 sympy==1.13.3
tokenizers==0.20.3 tokenizers==0.20.3
tqdm==4.67.0 tqdm==4.67.0
typing_extensions==4.12.2 typing_extensions==4.12.2
urllib3==2.2.3 urllib3==2.2.3
uvicorn==0.32.0 uvicorn==0.32.1
uvloop==0.21.0 uvloop==0.21.0
watchfiles==0.24.0 watchfiles==0.24.0
websockets==14.1 websockets==14.1
whisper-ctranslate2==0.4.7 whisper-ctranslate2==0.4.8

View File

@@ -1,6 +1,6 @@
import sys
import dataclasses import dataclasses
import faster_whisper import faster_whisper
import tqdm
import json import json
from fastapi.responses import StreamingResponse from fastapi.responses import StreamingResponse
import wave import wave
@@ -21,12 +21,16 @@ from fastapi import (
WebSocket, WebSocket,
) )
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from src.whisper_ctranslate2.whisper_ctranslate2 import Transcribe
from src.whisper_ctranslate2.writers import format_timestamp from src.whisper_ctranslate2.writers import format_timestamp
from faster_whisper.transcribe import Segment, TranscriptionInfo from faster_whisper.transcribe import Segment, TranscriptionInfo
import opencc import opencc
from prometheus_fastapi_instrumentator import Instrumentator from prometheus_fastapi_instrumentator import Instrumentator
# redirect print to stderr
_print = print
def print(*args, **kwargs):
_print(*args, file=sys.stderr, **kwargs)
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("--host", default="0.0.0.0", type=str) parser.add_argument("--host", default="0.0.0.0", type=str)
parser.add_argument("--port", default=5000, type=int) parser.add_argument("--port", default=5000, type=int)
@@ -95,7 +99,7 @@ def srt_writer(generator: Generator[Segment, Any, None]):
def vtt_writer(generator: Generator[Segment, Any, None]): def vtt_writer(generator: Generator[Segment, Any, None]):
yield "WEBVTT\n\n" yield "WEBVTT\n\n"
for i, segment in enumerate(generator): for _, segment in enumerate(generator):
start_time = format_timestamp(segment.start) start_time = format_timestamp(segment.start)
end_time = format_timestamp(segment.end) end_time = format_timestamp(segment.end)
text = segment.text.strip() text = segment.text.strip()
@@ -107,15 +111,16 @@ class JsonResult(TranscriptionInfo):
segments: list[Segment] segments: list[Segment]
text: str text: str
def build_json_result( def build_json_result(
generator: Iterable[Segment], generator: Iterable[Segment],
info: TranscriptionInfo, info: TranscriptionInfo,
) -> JsonResult: ) -> JsonResult:
segments = [i for i in generator] segments = [i for i in generator]
return JsonResult( return JsonResult(
text="\n".join(i.text for i in segments), text="\n".join(i.text for i in segments),
segments=segments, segments=segments,
**dataclasses.asdict(info) **dataclasses.asdict(info),
) )
@@ -140,14 +145,12 @@ def stream_builder(
"Detected language '%s' with probability %f" "Detected language '%s' with probability %f"
% (info.language, info.language_probability) % (info.language, info.language_probability)
) )
def wrap(): def wrap():
last_pos = 0 for segment in segments:
with tqdm.tqdm(total=info.duration, unit="seconds", disable=True) as pbar: if info.language == "zh":
for segment in segments: segment.text = ccc.convert(segment.text)
start, end, text = segment.start, segment.end, segment.text yield segment
pbar.update(end - last_pos)
last_pos = end
yield segment
return wrap(), info return wrap(), info
@@ -303,9 +306,9 @@ async def transcription(
""" """
if not task: if not task:
if request.url.path == '/v1/audio/transcriptions': if request.url.path == "/v1/audio/transcriptions":
task = "transcribe" task = "transcribe"
elif request.url.path == '/v1/audio/translations': elif request.url.path == "/v1/audio/translations":
task = "translate" task = "translate"
else: else:
raise HTTPException(400, "task parameter is required") raise HTTPException(400, "task parameter is required")