diff --git a/cucyuqing/cmd/es-sync.py b/cucyuqing/cmd/es-sync.py index 660f6a4..59e3f28 100644 --- a/cucyuqing/cmd/es-sync.py +++ b/cucyuqing/cmd/es-sync.py @@ -66,6 +66,8 @@ async def fetch(interval: ESInterval, size=1000) -> AsyncIterable[dict]: f'用时 {int(duration)} 秒,获取到 {len(docs)} 条数据,最早时间 {parse_unixtime(docs[0]["crawled_at"])},最晚时间 {parse_unixtime(docs[-1]["crawled_at"])}' ) for d in docs: + d['title'] = d['title'].replace('\x00', '') + d['content'] = d['content'].replace('\x00', '') yield d # 如果当前时间度的数据量 = size 说明还有数据,继续请求 # 这里使用递归