import asyncio import os import aiohttp import json import base64 from langdetect import detect from bot import Bot, print from nio import MatrixRoom, RoomMessageText from io import BytesIO url = f"https://texttospeech.googleapis.com/v1/text:synthesize?key={os.environ['GOOGLE_TTS_API_KEY']}" client = Bot( os.environ["BOT_TTS_HOMESERVER"], os.environ["BOT_TTS_USER"], os.environ["MATRIX_CHAIN_DEVICE"], os.environ["BOT_TTS_ACCESS_TOKEN"], ) async def tts(text: str): lang = detect(text) langMap = { "zh-cn": { "languageCode": "cmn-cn", "name": "cmn-CN-Wavenet-B", }, "en": {"languageCode": "en-US", "name": "en-US-Neural2-F"}, "ja": {"languageCode": "ja-JP", "name": "ja-JP-Neural2-B"}, } voice = langMap.get(lang, langMap["en"]) async with aiohttp.ClientSession() as session: payload = { "input": {"text": text}, "voice": voice, "audioConfig": {"audioEncoding": "OGG_OPUS", "speakingRate": 1.39}, } headers = {"content-type": "application/json"} async with session.post(url, data=json.dumps(payload), headers=headers) as resp: data = await resp.json() audio_content = data.get("audioContent") decoded = base64.b64decode(audio_content) return decoded @client.ignore_self_message @client.handel_no_gpt @client.log_message @client.with_typing @client.change_event_id_to_root_id @client.replace_command_mark @client.safe_try async def message_callback(room: MatrixRoom, event: RoomMessageText) -> None: if not event.sender.startswith("@chatgpt-bot"): return audio = await tts(event.body) # convert resp, upload = await client.upload(BytesIO(audio), "audio/ogg", filesize=len(audio)) content = { "msgtype": "m.audio", "body": event.body if len(event.body) < 20 else event.body[16] + "...", "info": {"mimetype": "audio/ogg", "size": len(audio)}, "url": resp.content_uri, "m.relates_to": { "rel_type": "m.thread", "event_id": event.event_id, }, } await client.room_send(room.room_id, message_type="m.room.message", content=content) client.add_event_callback(message_callback, RoomMessageText) asyncio.run(client.sync_forever())