调整embedding策略
只处理2周内的新闻,利用索引、时间降序
This commit is contained in:
@@ -177,7 +177,14 @@ async def do_update():
|
||||
while True:
|
||||
async with get_cur() as cur:
|
||||
# 这里选择 embedding_updated_at is null 使用索引避免全表扫描
|
||||
await cur.execute("SELECT id, title, content from risk_news where embedding_updated_at is null limit 1000")
|
||||
await cur.execute("""
|
||||
SELECT id, title, content
|
||||
from risk_news
|
||||
where embedding_updated_at is null
|
||||
and time > now() - interval '14 day'
|
||||
order by time desc
|
||||
limit 1000
|
||||
""")
|
||||
docs = await cur.fetchall()
|
||||
|
||||
# 循环出口
|
||||
|
||||
Reference in New Issue
Block a user