调整embedding策略
只处理2周内的新闻,利用索引、时间降序
This commit is contained in:
@@ -177,7 +177,14 @@ async def do_update():
|
|||||||
while True:
|
while True:
|
||||||
async with get_cur() as cur:
|
async with get_cur() as cur:
|
||||||
# 这里选择 embedding_updated_at is null 使用索引避免全表扫描
|
# 这里选择 embedding_updated_at is null 使用索引避免全表扫描
|
||||||
await cur.execute("SELECT id, title, content from risk_news where embedding_updated_at is null limit 1000")
|
await cur.execute("""
|
||||||
|
SELECT id, title, content
|
||||||
|
from risk_news
|
||||||
|
where embedding_updated_at is null
|
||||||
|
and time > now() - interval '14 day'
|
||||||
|
order by time desc
|
||||||
|
limit 1000
|
||||||
|
""")
|
||||||
docs = await cur.fetchall()
|
docs = await cur.fetchall()
|
||||||
|
|
||||||
# 循环出口
|
# 循环出口
|
||||||
|
|||||||
Reference in New Issue
Block a user