调整embedding策略

只处理2周内的新闻,利用索引、时间降序
This commit is contained in:
2024-09-23 15:13:33 +08:00
parent 341435e603
commit 16f4469365

View File

@@ -177,7 +177,14 @@ async def do_update():
while True:
async with get_cur() as cur:
# 这里选择 embedding_updated_at is null 使用索引避免全表扫描
await cur.execute("SELECT id, title, content from risk_news where embedding_updated_at is null limit 1000")
await cur.execute("""
SELECT id, title, content
from risk_news
where embedding_updated_at is null
and time > now() - interval '14 day'
order by time desc
limit 1000
""")
docs = await cur.fetchall()
# 循环出口