import os from nio import DownloadError, MatrixRoom, RoomMessageAudio, RoomMessageFile import asyncio import openai import io from bot import Bot, print client = Bot( os.environ["BOT_WHISPER_HOMESERVER"], os.environ["BOT_WHISPER_USER"], os.environ["MATRIX_CHAIN_DEVICE"], os.environ["BOT_WHISPER_ACCESS_TOKEN"], ) client.welcome_message = ( """欢迎使用 matrix chain whisper 插件,我能将房间中的语音消息转换成文字发出,如果语音过长,我会用文件形式发出""" ) @client.message_callback_common_wrapper async def message_callback(room: MatrixRoom, event: RoomMessageAudio): print("received message") print(event.flattened()) if event.flattened().get("content.info.duration", 0) > 1000 * 60 * 5: return await message_file(room, event) if event.source.get("content", {}).get("org.matrix.msc1767.audio") is None: # handle audio file return await message_file(room, event) resp = await client.download(event.url) if isinstance(resp, DownloadError): return filelikeobj = io.BytesIO(resp.body) filelikeobj.name = "matrixaudio.ogg" # get prompt rows = await client.db.fetch_all( query="""select content from ( select role, content, sum(token) over (partition by root order by id desc) as total_token from memories where root = :event_id order by id ) as sub where total_token < 3039 ;""", values={"event_id": event.event_id}, ) prompt = "".join([i[0] for i in rows]) # no memory if not prompt: db_result = await client.db.fetch_all( query="select system, examples from room_configs where room = :room_id;", values={"room_id": room.room_id}, ) if len(db_result) > 0: systemMessageContent = db_result[0][0] examples = [ m.get("content", "") for m in db_result[0][1] if m.get("example") ] while "" in examples: examples.remove("") if systemMessageContent: prompt += systemMessageContent + "\n\n" if len(examples) > 0: prompt += "\n\n".join(examples) print("initial_prompt", prompt) result = openai.Audio.transcribe(file=filelikeobj, model="large-v2", prompt=prompt) result = "\n".join([i.text for i in result["segments"]]) print(event.sender, result) await client.room_send( room.room_id, "m.room.message", { "body": result, "msgtype": "m.text", "m.relates_to": { "rel_type": "m.thread", "event_id": event.event_id, }, }, ) client.add_event_callback(message_callback, RoomMessageAudio) ALLOWED_EXTENSIONS = { "mp3", "mp4", "mpeg", "mpga", "m4a", "wav", "webm", "3gp", "flac", "ogg", "mkv", } def allowed_file(mimetype): return "/" in mimetype and mimetype.rsplit("/", 1)[1].lower() in ALLOWED_EXTENSIONS def get_txt_filename(filename): return filename + ".txt" async def message_file(room: MatrixRoom, event: RoomMessageFile): print("received file") if not allowed_file(event.flattened().get("content.info.mimetype")): print("not allowed file", event.body) raise Exception("not allowed file") resp = await client.download(event.url) if isinstance(resp, DownloadError): return filelikeobj = io.BytesIO(resp.body) filelikeobj.name = event.body # get prompt rows = await client.db.execute( query=""" select content from ( select role, content, sum(token) over (partition by root order by id desc) as total_token from memories where root = :event_id order by id ) as sub where total_token < 3039 ;""", values={"event_id": event.event_id}, ) prompt = "".join([i[0] for i in rows]) print("initial_prompt", prompt) result = openai.Audio.transcribe(file=filelikeobj, model="large-v2", prompt=prompt) result = "\n".join([i.text for i in result["segments"]]) print(event.sender, result) resultfilelike = io.BytesIO(result.encode()) resultfilelike.name = get_txt_filename(event.body) resultfileSize = len(result.encode()) uploadResp, _ = await client.upload( resultfilelike, content_type="text/plain", filesize=resultfileSize ) print("uri", uploadResp.content_uri) await client.room_send( room.room_id, "m.room.message", { "body": resultfilelike.name, "filename": resultfilelike.name, "msgtype": "m.file", "info": { "mimetype": "text/plain", "size": resultfileSize, }, "m.relates_to": { "rel_type": "m.thread", "event_id": event.event_id, }, "url": uploadResp.content_uri, }, ) asyncio.run(client.sync_forever())