From fff2a32d7eb8369fc79d489fc5432ffbfaf3df2f Mon Sep 17 00:00:00 2001 From: heimoshuiyu Date: Fri, 20 Sep 2024 15:20:19 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BB=8EES=E8=8E=B7=E5=8F=96=E7=9A=84=E7=BB=93?= =?UTF-8?q?=E6=9E=9C=E4=B8=AD=E8=BF=87=E6=BB=A4=E6=8E=89\x00?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cucyuqing/cmd/es-sync.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cucyuqing/cmd/es-sync.py b/cucyuqing/cmd/es-sync.py index 660f6a4..59e3f28 100644 --- a/cucyuqing/cmd/es-sync.py +++ b/cucyuqing/cmd/es-sync.py @@ -66,6 +66,8 @@ async def fetch(interval: ESInterval, size=1000) -> AsyncIterable[dict]: f'用时 {int(duration)} 秒,获取到 {len(docs)} 条数据,最早时间 {parse_unixtime(docs[0]["crawled_at"])},最晚时间 {parse_unixtime(docs[-1]["crawled_at"])}' ) for d in docs: + d['title'] = d['title'].replace('\x00', '') + d['content'] = d['content'].replace('\x00', '') yield d # 如果当前时间度的数据量 = size 说明还有数据,继续请求 # 这里使用递归