从ES获取的结果中过滤掉\x00
This commit is contained in:
@@ -66,6 +66,8 @@ async def fetch(interval: ESInterval, size=1000) -> AsyncIterable[dict]:
|
||||
f'用时 {int(duration)} 秒,获取到 {len(docs)} 条数据,最早时间 {parse_unixtime(docs[0]["crawled_at"])},最晚时间 {parse_unixtime(docs[-1]["crawled_at"])}'
|
||||
)
|
||||
for d in docs:
|
||||
d['title'] = d['title'].replace('\x00', '')
|
||||
d['content'] = d['content'].replace('\x00', '')
|
||||
yield d
|
||||
# 如果当前时间度的数据量 = size 说明还有数据,继续请求
|
||||
# 这里使用递归
|
||||
|
||||
Reference in New Issue
Block a user