From 16f44693650fe69c0c7eadaf097ae2cd9aa07a29 Mon Sep 17 00:00:00 2001 From: heimoshuiyu Date: Mon, 23 Sep 2024 15:13:33 +0800 Subject: [PATCH] =?UTF-8?q?=E8=B0=83=E6=95=B4embedding=E7=AD=96=E7=95=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 只处理2周内的新闻,利用索引、时间降序 --- cucyuqing/cmd/embedding.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/cucyuqing/cmd/embedding.py b/cucyuqing/cmd/embedding.py index ddbbf86..2e6ff24 100644 --- a/cucyuqing/cmd/embedding.py +++ b/cucyuqing/cmd/embedding.py @@ -177,7 +177,14 @@ async def do_update(): while True: async with get_cur() as cur: # 这里选择 embedding_updated_at is null 使用索引避免全表扫描 - await cur.execute("SELECT id, title, content from risk_news where embedding_updated_at is null limit 1000") + await cur.execute(""" + SELECT id, title, content + from risk_news + where embedding_updated_at is null + and time > now() - interval '14 day' + order by time desc + limit 1000 + """) docs = await cur.fetchall() # 循环出口