asdf
This commit is contained in:
422
4. 遍历网页.ipynb
Normal file
422
4. 遍历网页.ipynb
Normal file
@@ -0,0 +1,422 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"id": "bc76e623-3b53-459c-83a7-1c190ef8486e",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Requirement already satisfied: selenium in /home/hmsy/.conda/envs/python311/lib/python3.11/site-packages (4.25.0)\n",
|
||||||
|
"Requirement already satisfied: urllib3<3,>=1.26 in /home/hmsy/.conda/envs/python311/lib/python3.11/site-packages (from urllib3[socks]<3,>=1.26->selenium) (2.2.2)\n",
|
||||||
|
"Requirement already satisfied: trio~=0.17 in /home/hmsy/.conda/envs/python311/lib/python3.11/site-packages (from selenium) (0.26.2)\n",
|
||||||
|
"Requirement already satisfied: trio-websocket~=0.9 in /home/hmsy/.conda/envs/python311/lib/python3.11/site-packages (from selenium) (0.11.1)\n",
|
||||||
|
"Requirement already satisfied: certifi>=2021.10.8 in /home/hmsy/.conda/envs/python311/lib/python3.11/site-packages (from selenium) (2024.7.4)\n",
|
||||||
|
"Requirement already satisfied: typing_extensions~=4.9 in /home/hmsy/.conda/envs/python311/lib/python3.11/site-packages (from selenium) (4.12.2)\n",
|
||||||
|
"Requirement already satisfied: websocket-client~=1.8 in /home/hmsy/.conda/envs/python311/lib/python3.11/site-packages (from selenium) (1.8.0)\n",
|
||||||
|
"Requirement already satisfied: attrs>=23.2.0 in /home/hmsy/.conda/envs/python311/lib/python3.11/site-packages (from trio~=0.17->selenium) (23.2.0)\n",
|
||||||
|
"Requirement already satisfied: sortedcontainers in /home/hmsy/.conda/envs/python311/lib/python3.11/site-packages (from trio~=0.17->selenium) (2.4.0)\n",
|
||||||
|
"Requirement already satisfied: idna in /home/hmsy/.conda/envs/python311/lib/python3.11/site-packages (from trio~=0.17->selenium) (3.7)\n",
|
||||||
|
"Requirement already satisfied: outcome in /home/hmsy/.conda/envs/python311/lib/python3.11/site-packages (from trio~=0.17->selenium) (1.3.0.post0)\n",
|
||||||
|
"Requirement already satisfied: sniffio>=1.3.0 in /home/hmsy/.conda/envs/python311/lib/python3.11/site-packages (from trio~=0.17->selenium) (1.3.1)\n",
|
||||||
|
"Requirement already satisfied: wsproto>=0.14 in /home/hmsy/.conda/envs/python311/lib/python3.11/site-packages (from trio-websocket~=0.9->selenium) (1.2.0)\n",
|
||||||
|
"Requirement already satisfied: pysocks!=1.5.7,<2.0,>=1.5.6 in /home/hmsy/.conda/envs/python311/lib/python3.11/site-packages (from urllib3[socks]<3,>=1.26->selenium) (1.7.1)\n",
|
||||||
|
"Requirement already satisfied: h11<1,>=0.9.0 in /home/hmsy/.conda/envs/python311/lib/python3.11/site-packages (from wsproto>=0.14->trio-websocket~=0.9->selenium) (0.14.0)\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# 安装依赖(如果已经安装过了可以跳过)\n",
|
||||||
|
"!pip install selenium"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 40,
|
||||||
|
"id": "1fd79faf-f138-41fa-9519-7bc72b407afb",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from selenium import webdriver\n",
|
||||||
|
"from selenium.webdriver.common.by import By\n",
|
||||||
|
"import time\n",
|
||||||
|
"\n",
|
||||||
|
"# 创建一个新的 Chrome 浏览器会话\n",
|
||||||
|
"driver = webdriver.Chrome()\n",
|
||||||
|
"\n",
|
||||||
|
"# 让浏览器打开一个网页\n",
|
||||||
|
"driver.get('https://sou.chinanews.com/')\n",
|
||||||
|
"driver.implicitly_wait(3) # 设置隐式等待时间为3秒\n",
|
||||||
|
"\n",
|
||||||
|
"# 找到搜索框,输入法文本\n",
|
||||||
|
"input = driver.find_element(By.XPATH, '//*[@id=\"q\"]')\n",
|
||||||
|
"input.send_keys('初音未来')\n",
|
||||||
|
"\n",
|
||||||
|
"# 找到搜索按钮,点击按钮\n",
|
||||||
|
"search = driver.find_element(By.XPATH, '//button[@class=\"searchBtn\"]')\n",
|
||||||
|
"search.click()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 41,
|
||||||
|
"id": "876319e9-a10c-47ee-9ef9-ac0e03a39d82",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"['https://www.chinanews.com.cn/cj/2023/08-28/10068093.shtml',\n",
|
||||||
|
" 'https://www.chinanews.com.cn/sh/2023/07-26/10049675.shtml',\n",
|
||||||
|
" 'https://www.chinanews.com.cn/sh/2023/06-05/10019224.shtml',\n",
|
||||||
|
" 'https://www.chinanews.com.cn/sh/2023/06-01/10017432.shtml',\n",
|
||||||
|
" 'https://www.chinanews.com.cn/cj/2023/05-20/10010862.shtml',\n",
|
||||||
|
" 'https://www.chinanews.com.cn/cul/2022/12-15/9915069.shtml',\n",
|
||||||
|
" 'https://www.chinanews.com.cn/cj/2022/10-26/9880366.shtml',\n",
|
||||||
|
" 'https://www.chinanews.com.cn/cj/2022/09-07/9847169.shtml',\n",
|
||||||
|
" 'https://www.chinanews.com.cn/cj/2022/07-05/9795608.shtml',\n",
|
||||||
|
" 'https://www.chinanews.com.cn/cj/2022/07-05/9795607.shtml']"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 41,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# 遍历符合条件的 XPATH,寻找所有 新闻标题 的 URL,并保存在 links 列表中\n",
|
||||||
|
"links = []\n",
|
||||||
|
"for element in driver.find_elements(By.XPATH, '//div[@class=\"news_title\"]/a'):\n",
|
||||||
|
" links.append(element.get_attribute('href'))\n",
|
||||||
|
"links"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 8,
|
||||||
|
"id": "e0823aa9-1aa9-43d1-bdb0-d8a9a130a70f",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# 跳转到该网页\n",
|
||||||
|
"driver.get(links[0])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 26,
|
||||||
|
"id": "1cc5210c-a61a-48ba-9938-3f10449ff784",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"AI创作新风潮:影视业,拥抱AI新机遇\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# 解析标题\n",
|
||||||
|
"title = driver.find_element(By.XPATH, '//*[@id=\"cont_1_1_2\"]/div[2]/h1').text.strip()\n",
|
||||||
|
"print(title)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 12,
|
||||||
|
"id": "9877f965-f606-49bd-bd8e-ee578b9b90cb",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# 解析正文\n",
|
||||||
|
"content = driver.find_element(By.XPATH, '//*[@id=\"cont_1_1_2\"]/div[2]/div[4]/div[2]').text.strip()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 21,
|
||||||
|
"id": "f001d905-abef-47df-9c43-8d87267553c1",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"2023年08月28日 04:03\n",
|
||||||
|
"人民日报海外版\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# 解析 时间 和 来源\n",
|
||||||
|
"text = driver.find_element(By.XPATH, '//*[@id=\"cont_1_1_2\"]/div[2]/div[2]').text.strip()\n",
|
||||||
|
"tuples = text.split('\\n', 1)[0].split('来源:')\n",
|
||||||
|
"date = tuples[0].strip()\n",
|
||||||
|
"source = tuples[1].strip()\n",
|
||||||
|
"print(date)\n",
|
||||||
|
"print(source)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 37,
|
||||||
|
"id": "1ffb2586-753c-4fb8-8f42-30c122a2e8b4",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"2022年09月07日 19:55 中国新闻网\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# 针对 旧版 的网页\n",
|
||||||
|
"title = driver.find_element(By.XPATH, '//*[@id=\"cont_1_1_2\"]/h1').text.strip()\n",
|
||||||
|
"content = driver.find_element(By.XPATH, '//*[@id=\"cont_1_1_2\"]/div[5]').text.strip()\n",
|
||||||
|
"text = driver.find_element(By.XPATH, '//*[@id=\"cont_1_1_2\"]/div[3]/div').text.strip()\n",
|
||||||
|
"date = tuples[0].strip()\n",
|
||||||
|
"source = tuples[1].strip()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "e53bc9b7-a103-48ae-ab6f-d574ecd7687b",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"---"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 31,
|
||||||
|
"id": "866f8acc-37d7-4f25-ba2b-62bb8baae94f",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"'https://www.chinanews.com.cn/cj/2022/07-05/9795608.shtml'"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 31,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"driver.current_url"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "b7c28cdf-40f2-466d-a684-cefc0e34a1e4",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# 合并以上的代码\n",
|
||||||
|
"data = []\n",
|
||||||
|
"for link in links:\n",
|
||||||
|
" # 跳转到该网页\n",
|
||||||
|
" driver.get(link)\n",
|
||||||
|
" try:\n",
|
||||||
|
" # 解析标题\n",
|
||||||
|
" title = driver.find_element(By.XPATH, '//*[@id=\"cont_1_1_2\"]/div[2]/h1').text.strip()\n",
|
||||||
|
" # 解析正文\n",
|
||||||
|
" content = driver.find_element(By.XPATH, '//*[@id=\"cont_1_1_2\"]/div[2]/div[4]/div[2]').text.strip()\n",
|
||||||
|
" # 解析 时间 和 来源\n",
|
||||||
|
" text = driver.find_element(By.XPATH, '//*[@id=\"cont_1_1_2\"]/div[2]/div[2]').text.strip()\n",
|
||||||
|
" tuples = text.split('\\n', 1)[0].split('来源:')\n",
|
||||||
|
" date = tuples[0].strip()\n",
|
||||||
|
" source = tuples[1].strip()\n",
|
||||||
|
" except Exception as e:\n",
|
||||||
|
" # 如果上面的代码报错了,说明可能是旧版网页,使用以下的代码进行解析\n",
|
||||||
|
" title = driver.find_element(By.XPATH, '//*[@id=\"cont_1_1_2\"]/h1').text.strip()\n",
|
||||||
|
" content = driver.find_element(By.XPATH, '//*[@id=\"cont_1_1_2\"]/div[5]').text.strip()\n",
|
||||||
|
" text = driver.find_element(By.XPATH, '//*[@id=\"cont_1_1_2\"]/div[3]/div').text.strip()\n",
|
||||||
|
" date = tuples[0].strip()\n",
|
||||||
|
" source = tuples[1].strip()\n",
|
||||||
|
" \n",
|
||||||
|
" # 保存所有数据到一个二维列表\n",
|
||||||
|
" data.append([title, date, source, content])\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 44,
|
||||||
|
"id": "a6d8861c-6b1d-494d-bc8a-bf9310317776",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>标题</th>\n",
|
||||||
|
" <th>时间</th>\n",
|
||||||
|
" <th>来源</th>\n",
|
||||||
|
" <th>正文</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>0</th>\n",
|
||||||
|
" <td>AI创作新风潮:影视业,拥抱AI新机遇</td>\n",
|
||||||
|
" <td>2023年08月28日 04:03</td>\n",
|
||||||
|
" <td>人民日报海外版</td>\n",
|
||||||
|
" <td>前不久,一部以元宇宙为概念的国潮微短剧《神女杂货铺》在某视频平台播出,讲述了一个现代女孩穿越...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>1</th>\n",
|
||||||
|
" <td>雨中跪地救人的“二次元小姐姐” 是位喜欢动漫的苏州医生</td>\n",
|
||||||
|
" <td>2023年07月26日 02:23</td>\n",
|
||||||
|
" <td>扬子晚报</td>\n",
|
||||||
|
" <td>7月21日,在上海某漫展场馆外,一名年轻男子突然在雨中晕厥倒地,这一幕,恰好被一位穿cosp...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2</th>\n",
|
||||||
|
" <td>首批AI克隆明星上线,不只是娱乐业“躺赚”</td>\n",
|
||||||
|
" <td>2023年06月05日 01:40</td>\n",
|
||||||
|
" <td>新京报</td>\n",
|
||||||
|
" <td>现实中偶像与粉丝互动被AI复制到虚拟空间中,虚实边界被进一步打破。\\n花30元就可以和网红明...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>3</th>\n",
|
||||||
|
" <td>“10后”的流行密语你能对上几个?</td>\n",
|
||||||
|
" <td>2023年06月01日 09:51</td>\n",
|
||||||
|
" <td>羊城晚报</td>\n",
|
||||||
|
" <td>羊城晚报记者 秦小杰\\n作为互联网新生代,“10后”的小学生有哪些流行“密语”?喜欢什么样的...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>4</th>\n",
|
||||||
|
" <td>(经济观察)虚拟数字人“现身”各行各业 释放可观商业价值</td>\n",
|
||||||
|
" <td>2023年05月20日 09:37</td>\n",
|
||||||
|
" <td>中国新闻网</td>\n",
|
||||||
|
" <td>中新社上海5月20日电 (谢梦圆)近期,多个品牌启用虚拟形象作为代言人、社交平台AI博主大受...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>5</th>\n",
|
||||||
|
" <td>网络热梗也能成为热门IP IP如何吸引Z世代?</td>\n",
|
||||||
|
" <td>2022年12月15日 01:00</td>\n",
|
||||||
|
" <td>北京青年报</td>\n",
|
||||||
|
" <td>随着网络文化的发展,新时代IP内容也随之扩展创新,不仅涵盖动漫、影视、游戏、潮玩,甚至一个符...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>6</th>\n",
|
||||||
|
" <td>玩具市场迎来多元需求 成年人“入坑”潮流玩具</td>\n",
|
||||||
|
" <td>2022年10月26日 15:31</td>\n",
|
||||||
|
" <td>北京青年报</td>\n",
|
||||||
|
" <td>一年一度的双11来临,潮流玩具市场再度成为各大电商平台必争之地,玩具市场迎来更多元的市场需求...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>7</th>\n",
|
||||||
|
" <td>越来越多场景应用 “数字人”走进大众生活</td>\n",
|
||||||
|
" <td>2022年09月07日 19:55</td>\n",
|
||||||
|
" <td>中国新闻网</td>\n",
|
||||||
|
" <td>中新网北京9月7日电 (中新财经 吴家驹)从“初音未来”到“洛天依”再到“嘉然”,近年来,“...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>8</th>\n",
|
||||||
|
" <td>爱的是“皮”还是“魂”?虚拟偶像凭什么“圈粉”</td>\n",
|
||||||
|
" <td>2022年09月07日 19:55</td>\n",
|
||||||
|
" <td>中国新闻网</td>\n",
|
||||||
|
" <td>虚拟偶像深受当下年轻人的欢迎。艾媒咨询调研显示,中国虚拟人爱好者中,19岁至30岁之间的年轻...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>9</th>\n",
|
||||||
|
" <td>唱歌跳舞的“皮套人”?这个千亿级生意没那么简单</td>\n",
|
||||||
|
" <td>2022年09月07日 19:55</td>\n",
|
||||||
|
" <td>中国新闻网</td>\n",
|
||||||
|
" <td>近日,一条微博热搜将人们的视线拉回到了虚拟偶像的身上,一名来自美国的虚拟主播在短短两小时内吸...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
" 标题 时间 来源 \\\n",
|
||||||
|
"0 AI创作新风潮:影视业,拥抱AI新机遇 2023年08月28日 04:03 人民日报海外版 \n",
|
||||||
|
"1 雨中跪地救人的“二次元小姐姐” 是位喜欢动漫的苏州医生 2023年07月26日 02:23 扬子晚报 \n",
|
||||||
|
"2 首批AI克隆明星上线,不只是娱乐业“躺赚” 2023年06月05日 01:40 新京报 \n",
|
||||||
|
"3 “10后”的流行密语你能对上几个? 2023年06月01日 09:51 羊城晚报 \n",
|
||||||
|
"4 (经济观察)虚拟数字人“现身”各行各业 释放可观商业价值 2023年05月20日 09:37 中国新闻网 \n",
|
||||||
|
"5 网络热梗也能成为热门IP IP如何吸引Z世代? 2022年12月15日 01:00 北京青年报 \n",
|
||||||
|
"6 玩具市场迎来多元需求 成年人“入坑”潮流玩具 2022年10月26日 15:31 北京青年报 \n",
|
||||||
|
"7 越来越多场景应用 “数字人”走进大众生活 2022年09月07日 19:55 中国新闻网 \n",
|
||||||
|
"8 爱的是“皮”还是“魂”?虚拟偶像凭什么“圈粉” 2022年09月07日 19:55 中国新闻网 \n",
|
||||||
|
"9 唱歌跳舞的“皮套人”?这个千亿级生意没那么简单 2022年09月07日 19:55 中国新闻网 \n",
|
||||||
|
"\n",
|
||||||
|
" 正文 \n",
|
||||||
|
"0 前不久,一部以元宇宙为概念的国潮微短剧《神女杂货铺》在某视频平台播出,讲述了一个现代女孩穿越... \n",
|
||||||
|
"1 7月21日,在上海某漫展场馆外,一名年轻男子突然在雨中晕厥倒地,这一幕,恰好被一位穿cosp... \n",
|
||||||
|
"2 现实中偶像与粉丝互动被AI复制到虚拟空间中,虚实边界被进一步打破。\\n花30元就可以和网红明... \n",
|
||||||
|
"3 羊城晚报记者 秦小杰\\n作为互联网新生代,“10后”的小学生有哪些流行“密语”?喜欢什么样的... \n",
|
||||||
|
"4 中新社上海5月20日电 (谢梦圆)近期,多个品牌启用虚拟形象作为代言人、社交平台AI博主大受... \n",
|
||||||
|
"5 随着网络文化的发展,新时代IP内容也随之扩展创新,不仅涵盖动漫、影视、游戏、潮玩,甚至一个符... \n",
|
||||||
|
"6 一年一度的双11来临,潮流玩具市场再度成为各大电商平台必争之地,玩具市场迎来更多元的市场需求... \n",
|
||||||
|
"7 中新网北京9月7日电 (中新财经 吴家驹)从“初音未来”到“洛天依”再到“嘉然”,近年来,“... \n",
|
||||||
|
"8 虚拟偶像深受当下年轻人的欢迎。艾媒咨询调研显示,中国虚拟人爱好者中,19岁至30岁之间的年轻... \n",
|
||||||
|
"9 近日,一条微博热搜将人们的视线拉回到了虚拟偶像的身上,一名来自美国的虚拟主播在短短两小时内吸... "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 44,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# 循环结束后,将data转为 DataFrame 并保存到 csv\n",
|
||||||
|
"import pandas\n",
|
||||||
|
"df = pandas.DataFrame(data, columns=['标题', '时间', '来源', '正文'])\n",
|
||||||
|
"df.to_csv('news.csv')\n",
|
||||||
|
"\n",
|
||||||
|
"# 显示df\n",
|
||||||
|
"df"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.11.9"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
||||||
790
5. Requests 动态页面.ipynb
Normal file
790
5. Requests 动态页面.ipynb
Normal file
@@ -0,0 +1,790 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "4719acdf-1017-4e90-b0cf-9126d52187a9",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# curlconverter.com"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 20,
|
||||||
|
"id": "c896da51-8c3a-441e-8c4b-fb6c09aaff71",
|
||||||
|
"metadata": {
|
||||||
|
"scrolled": true
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"[{'authorId': '120122317',\n",
|
||||||
|
" 'authorName': '断断续续的灰暗',\n",
|
||||||
|
" 'authorNameHL': '断断续续的灰暗',\n",
|
||||||
|
" 'authorPic': '',\n",
|
||||||
|
" 'authorUrl': '',\n",
|
||||||
|
" 'author_header': '//5b0988e595225.cdn.sohucs.com/a_auto,c_cut,x_0,y_40,w_378,h_378/images/20190326/294963d73b2b4fdd92e562891df3a7e9.png',\n",
|
||||||
|
" 'bigCover': '',\n",
|
||||||
|
" 'brief': 'MMD初音未来,UNDEAD',\n",
|
||||||
|
" 'briefAlg': '',\n",
|
||||||
|
" 'briefAlgHL': '',\n",
|
||||||
|
" 'briefHL': 'MMD<b>初音未来</b>,UNDEAD',\n",
|
||||||
|
" 'content': '',\n",
|
||||||
|
" 'cover': '//q9.itc.cn/images01/20240928/85d64a5e103549a38ede8db30745ae94.jpeg',\n",
|
||||||
|
" 'finalScore': 0,\n",
|
||||||
|
" 'hasHeadVideo': False,\n",
|
||||||
|
" 'id': 812565548,\n",
|
||||||
|
" 'imageNews': False,\n",
|
||||||
|
" 'images': '[\"//q9.itc.cn/images01/20240928/85d64a5e103549a38ede8db30745ae94.jpeg\"]',\n",
|
||||||
|
" 'imagesList': ['//q9.itc.cn/images01/20240928/85d64a5e103549a38ede8db30745ae94.jpeg'],\n",
|
||||||
|
" 'insertType': 'not',\n",
|
||||||
|
" 'original': '0',\n",
|
||||||
|
" 'passport': '',\n",
|
||||||
|
" 'postTime': 1727537683000,\n",
|
||||||
|
" 'publicTime': 0,\n",
|
||||||
|
" 'pv': 0,\n",
|
||||||
|
" 'rankScore': 0,\n",
|
||||||
|
" 'resourceType': 1,\n",
|
||||||
|
" 'scm': '1019.20001.0.0.0',\n",
|
||||||
|
" 'scoreMap': {},\n",
|
||||||
|
" 'size': 0,\n",
|
||||||
|
" 'status': 4,\n",
|
||||||
|
" 'subType': '',\n",
|
||||||
|
" 'title': 'MMD初音未来,UNDEAD',\n",
|
||||||
|
" 'titleHL': 'MMD<b>初音未来</b>,UNDEAD',\n",
|
||||||
|
" 'tkd': {'desc': '', 'titleSeo': 'MMD初音未来,UNDEAD'},\n",
|
||||||
|
" 'type': 4,\n",
|
||||||
|
" 'url': 'https://www.sohu.com/a/812565548_120122317'},\n",
|
||||||
|
" {'authorId': '532686',\n",
|
||||||
|
" 'authorName': '次元岛',\n",
|
||||||
|
" 'authorNameHL': '次元岛',\n",
|
||||||
|
" 'authorPic': '',\n",
|
||||||
|
" 'authorUrl': '',\n",
|
||||||
|
" 'author_header': '//5b0988e595225.cdn.sohucs.com/a_auto,c_cut,x_79,y_60,w_526,h_526/images/20190624/f4e8e5dbd6a44cf5ba77cf56a8a8c9df.jpeg',\n",
|
||||||
|
" 'bigCover': '',\n",
|
||||||
|
" 'brief': \"角色名:初音未来CN:安凉公主殿下生日快乐wwෆ(˶''ᵕ''˶)ෆ图片授权来源:次元岛…\",\n",
|
||||||
|
" 'briefAlg': '',\n",
|
||||||
|
" 'briefAlgHL': \" 角色名:<b>初音未来</b> CN:安凉 公主殿下生日快乐ww ෆ( ˶''ᵕ''˶)ෆ 图片授权来源:次元岛…\",\n",
|
||||||
|
" 'briefHL': \"角色名:<b>初音未来</b>CN:安凉公主殿下生日快乐wwෆ(˶''ᵕ''˶)ෆ图片授权来源:次元岛…\",\n",
|
||||||
|
" 'content': '',\n",
|
||||||
|
" 'cover': '//q5.itc.cn/q_70/images03/20240906/f90b9a4ec23e4f59b88dd51aff5206d2.jpeg',\n",
|
||||||
|
" 'finalScore': 0,\n",
|
||||||
|
" 'hasHeadVideo': False,\n",
|
||||||
|
" 'id': 806861635,\n",
|
||||||
|
" 'imageNews': True,\n",
|
||||||
|
" 'images': '[\"//q5.itc.cn/q_70/images03/20240906/f90b9a4ec23e4f59b88dd51aff5206d2.jpeg\",\"//q5.itc.cn/q_70/images03/20240906/20ee37b7d51442c9b0989403d126b26f.jpeg\",\"//q4.itc.cn/q_70/images03/20240906/4306ade7b9a0420a834dc3e6b45ebc9a.jpeg\",\"//q7.itc.cn/q_70/images03/20240906/a42ae5a2fb344dbe8f08f950ce7ab134.jpeg\",\"//q3.itc.cn/q_70/images03/20240906/935915519f284b5faf7cb157b3feb1c8.jpeg\",\"//q1.itc.cn/q_70/images03/20240906/fb1542f48886445292b9889ec650d339.jpeg\",\"//q4.itc.cn/q_70/images03/20240906/4d855fa2228e4a9ea136d13a48b1806c.jpeg\",\"//q4.itc.cn/q_70/images03/20240906/5baf5f19c1034f059c330ce276ef8a7f.jpeg\",\"//q0.itc.cn/q_70/images03/20240906/e0bec6d6d5df4c2694d7b87b5d4836d3.jpeg\"]',\n",
|
||||||
|
" 'imagesList': ['//q5.itc.cn/q_70/images03/20240906/f90b9a4ec23e4f59b88dd51aff5206d2.jpeg',\n",
|
||||||
|
" '//q5.itc.cn/q_70/images03/20240906/20ee37b7d51442c9b0989403d126b26f.jpeg',\n",
|
||||||
|
" '//q4.itc.cn/q_70/images03/20240906/4306ade7b9a0420a834dc3e6b45ebc9a.jpeg',\n",
|
||||||
|
" '//q7.itc.cn/q_70/images03/20240906/a42ae5a2fb344dbe8f08f950ce7ab134.jpeg',\n",
|
||||||
|
" '//q3.itc.cn/q_70/images03/20240906/935915519f284b5faf7cb157b3feb1c8.jpeg',\n",
|
||||||
|
" '//q1.itc.cn/q_70/images03/20240906/fb1542f48886445292b9889ec650d339.jpeg',\n",
|
||||||
|
" '//q4.itc.cn/q_70/images03/20240906/4d855fa2228e4a9ea136d13a48b1806c.jpeg',\n",
|
||||||
|
" '//q4.itc.cn/q_70/images03/20240906/5baf5f19c1034f059c330ce276ef8a7f.jpeg',\n",
|
||||||
|
" '//q0.itc.cn/q_70/images03/20240906/e0bec6d6d5df4c2694d7b87b5d4836d3.jpeg'],\n",
|
||||||
|
" 'insertType': 'not',\n",
|
||||||
|
" 'original': '0',\n",
|
||||||
|
" 'passport': '',\n",
|
||||||
|
" 'postTime': 1725609435000,\n",
|
||||||
|
" 'publicTime': 0,\n",
|
||||||
|
" 'pv': 0,\n",
|
||||||
|
" 'rankScore': 0,\n",
|
||||||
|
" 'resourceType': 1,\n",
|
||||||
|
" 'scm': '1019.20001.0.0.0',\n",
|
||||||
|
" 'scoreMap': {},\n",
|
||||||
|
" 'size': 0,\n",
|
||||||
|
" 'status': 4,\n",
|
||||||
|
" 'subType': '',\n",
|
||||||
|
" 'title': '初音未来17周年',\n",
|
||||||
|
" 'titleHL': '<b>初音未来</b>17周年',\n",
|
||||||
|
" 'tkd': {'desc': \" 角色名:初音未来 CN:安凉 公主殿下生日快乐ww ෆ( ˶''ᵕ''˶)ෆ 图片授权来源:次元岛…\",\n",
|
||||||
|
" 'titleSeo': '初音未来17周年_安凉_ww_公主'},\n",
|
||||||
|
" 'type': 2,\n",
|
||||||
|
" 'url': 'https://www.sohu.com/a/806861635_532686'},\n",
|
||||||
|
" {'authorId': '120122317',\n",
|
||||||
|
" 'authorName': '断断续续的灰暗',\n",
|
||||||
|
" 'authorNameHL': '断断续续的灰暗',\n",
|
||||||
|
" 'authorPic': '',\n",
|
||||||
|
" 'authorUrl': '',\n",
|
||||||
|
" 'author_header': '//5b0988e595225.cdn.sohucs.com/a_auto,c_cut,x_0,y_40,w_378,h_378/images/20190326/294963d73b2b4fdd92e562891df3a7e9.png',\n",
|
||||||
|
" 'bigCover': '',\n",
|
||||||
|
" 'brief': 'MMD初音未来五人组,Unveiled',\n",
|
||||||
|
" 'briefAlg': '',\n",
|
||||||
|
" 'briefAlgHL': '',\n",
|
||||||
|
" 'briefHL': 'MMD<b>初音未来</b>五人组,Unveiled',\n",
|
||||||
|
" 'content': '',\n",
|
||||||
|
" 'cover': '//q2.itc.cn/images01/20241023/5dd140dbde83449da519e1fde36392ed.jpeg',\n",
|
||||||
|
" 'finalScore': 0,\n",
|
||||||
|
" 'hasHeadVideo': False,\n",
|
||||||
|
" 'id': 819573079,\n",
|
||||||
|
" 'imageNews': False,\n",
|
||||||
|
" 'images': '[\"//q2.itc.cn/images01/20241023/5dd140dbde83449da519e1fde36392ed.jpeg\"]',\n",
|
||||||
|
" 'imagesList': ['//q2.itc.cn/images01/20241023/5dd140dbde83449da519e1fde36392ed.jpeg'],\n",
|
||||||
|
" 'insertType': 'not',\n",
|
||||||
|
" 'original': '0',\n",
|
||||||
|
" 'passport': '',\n",
|
||||||
|
" 'postTime': 1729690905000,\n",
|
||||||
|
" 'publicTime': 0,\n",
|
||||||
|
" 'pv': 0,\n",
|
||||||
|
" 'rankScore': 0,\n",
|
||||||
|
" 'resourceType': 1,\n",
|
||||||
|
" 'scm': '1019.20001.0.0.0',\n",
|
||||||
|
" 'scoreMap': {},\n",
|
||||||
|
" 'size': 0,\n",
|
||||||
|
" 'status': 4,\n",
|
||||||
|
" 'subType': '',\n",
|
||||||
|
" 'title': 'MMD初音未来五人组,Unveiled',\n",
|
||||||
|
" 'titleHL': 'MMD<b>初音未来</b>五人组,Unveiled',\n",
|
||||||
|
" 'tkd': {'desc': '', 'titleSeo': 'MMD初音未来五人组,Unveiled'},\n",
|
||||||
|
" 'type': 4,\n",
|
||||||
|
" 'url': 'https://www.sohu.com/a/819573079_120122317'},\n",
|
||||||
|
" {'authorId': '120122317',\n",
|
||||||
|
" 'authorName': '断断续续的灰暗',\n",
|
||||||
|
" 'authorNameHL': '断断续续的灰暗',\n",
|
||||||
|
" 'authorPic': '',\n",
|
||||||
|
" 'authorUrl': '',\n",
|
||||||
|
" 'author_header': '//5b0988e595225.cdn.sohucs.com/a_auto,c_cut,x_0,y_40,w_378,h_378/images/20190326/294963d73b2b4fdd92e562891df3a7e9.png',\n",
|
||||||
|
" 'bigCover': '',\n",
|
||||||
|
" 'brief': 'MMD初音未来,单色骑士',\n",
|
||||||
|
" 'briefAlg': '',\n",
|
||||||
|
" 'briefAlgHL': '',\n",
|
||||||
|
" 'briefHL': 'MMD<b>初音未来</b>,单色骑士',\n",
|
||||||
|
" 'content': '',\n",
|
||||||
|
" 'cover': '//q4.itc.cn/images01/20240925/15fac091615e4414a8464584bc781b17.jpeg',\n",
|
||||||
|
" 'finalScore': 0,\n",
|
||||||
|
" 'hasHeadVideo': False,\n",
|
||||||
|
" 'id': 811660644,\n",
|
||||||
|
" 'imageNews': False,\n",
|
||||||
|
" 'images': '[\"//q4.itc.cn/images01/20240925/15fac091615e4414a8464584bc781b17.jpeg\"]',\n",
|
||||||
|
" 'imagesList': ['//q4.itc.cn/images01/20240925/15fac091615e4414a8464584bc781b17.jpeg'],\n",
|
||||||
|
" 'insertType': 'not',\n",
|
||||||
|
" 'original': '0',\n",
|
||||||
|
" 'passport': '',\n",
|
||||||
|
" 'postTime': 1727266715000,\n",
|
||||||
|
" 'publicTime': 0,\n",
|
||||||
|
" 'pv': 0,\n",
|
||||||
|
" 'rankScore': 0,\n",
|
||||||
|
" 'resourceType': 1,\n",
|
||||||
|
" 'scm': '1019.20001.0.0.0',\n",
|
||||||
|
" 'scoreMap': {},\n",
|
||||||
|
" 'size': 0,\n",
|
||||||
|
" 'status': 4,\n",
|
||||||
|
" 'subType': '',\n",
|
||||||
|
" 'title': 'MMD初音未来,单色骑士',\n",
|
||||||
|
" 'titleHL': 'MMD<b>初音未来</b>,单色骑士',\n",
|
||||||
|
" 'tkd': {'desc': '', 'titleSeo': 'MMD初音未来,单色骑士'},\n",
|
||||||
|
" 'type': 4,\n",
|
||||||
|
" 'url': 'https://www.sohu.com/a/811660644_120122317'},\n",
|
||||||
|
" {'authorId': '120122317',\n",
|
||||||
|
" 'authorName': '断断续续的灰暗',\n",
|
||||||
|
" 'authorNameHL': '断断续续的灰暗',\n",
|
||||||
|
" 'authorPic': '',\n",
|
||||||
|
" 'authorUrl': '',\n",
|
||||||
|
" 'author_header': '//5b0988e595225.cdn.sohucs.com/a_auto,c_cut,x_0,y_40,w_378,h_378/images/20190326/294963d73b2b4fdd92e562891df3a7e9.png',\n",
|
||||||
|
" 'bigCover': '',\n",
|
||||||
|
" 'brief': 'MMD初音未来,SHOW',\n",
|
||||||
|
" 'briefAlg': '',\n",
|
||||||
|
" 'briefAlgHL': '',\n",
|
||||||
|
" 'briefHL': 'MMD<b>初音未来</b>,SHOW',\n",
|
||||||
|
" 'content': '',\n",
|
||||||
|
" 'cover': '//q6.itc.cn/images01/20240810/9747fd8e91df442bad0884992c48298f.jpeg',\n",
|
||||||
|
" 'finalScore': 0,\n",
|
||||||
|
" 'hasHeadVideo': False,\n",
|
||||||
|
" 'id': 799888119,\n",
|
||||||
|
" 'imageNews': False,\n",
|
||||||
|
" 'images': '[\"//q6.itc.cn/images01/20240810/9747fd8e91df442bad0884992c48298f.jpeg\"]',\n",
|
||||||
|
" 'imagesList': ['//q6.itc.cn/images01/20240810/9747fd8e91df442bad0884992c48298f.jpeg'],\n",
|
||||||
|
" 'insertType': 'not',\n",
|
||||||
|
" 'original': '0',\n",
|
||||||
|
" 'passport': '',\n",
|
||||||
|
" 'postTime': 1723271453000,\n",
|
||||||
|
" 'publicTime': 0,\n",
|
||||||
|
" 'pv': 0,\n",
|
||||||
|
" 'rankScore': 0,\n",
|
||||||
|
" 'resourceType': 1,\n",
|
||||||
|
" 'scm': '1019.20001.0.0.0',\n",
|
||||||
|
" 'scoreMap': {},\n",
|
||||||
|
" 'size': 0,\n",
|
||||||
|
" 'status': 4,\n",
|
||||||
|
" 'subType': '',\n",
|
||||||
|
" 'title': 'MMD初音未来,SHOW',\n",
|
||||||
|
" 'titleHL': 'MMD<b>初音未来</b>,SHOW',\n",
|
||||||
|
" 'tkd': {'desc': '', 'titleSeo': 'MMD初音未来,SHOW'},\n",
|
||||||
|
" 'type': 4,\n",
|
||||||
|
" 'url': 'https://www.sohu.com/a/799888119_120122317'},\n",
|
||||||
|
" {'authorId': '120122317',\n",
|
||||||
|
" 'authorName': '断断续续的灰暗',\n",
|
||||||
|
" 'authorNameHL': '断断续续的灰暗',\n",
|
||||||
|
" 'authorPic': '',\n",
|
||||||
|
" 'authorUrl': '',\n",
|
||||||
|
" 'author_header': '//5b0988e595225.cdn.sohucs.com/a_auto,c_cut,x_0,y_40,w_378,h_378/images/20190326/294963d73b2b4fdd92e562891df3a7e9.png',\n",
|
||||||
|
" 'bigCover': '',\n",
|
||||||
|
" 'brief': 'MMD初音未来,卡哇伊',\n",
|
||||||
|
" 'briefAlg': '',\n",
|
||||||
|
" 'briefAlgHL': '',\n",
|
||||||
|
" 'briefHL': 'MMD<b>初音未来</b>,卡哇伊',\n",
|
||||||
|
" 'content': '',\n",
|
||||||
|
" 'cover': '//q7.itc.cn/images01/20240811/4f19e40cf4f14aea8949409ed3bb11c1.jpeg',\n",
|
||||||
|
" 'finalScore': 0,\n",
|
||||||
|
" 'hasHeadVideo': False,\n",
|
||||||
|
" 'id': 800129858,\n",
|
||||||
|
" 'imageNews': False,\n",
|
||||||
|
" 'images': '[\"//q7.itc.cn/images01/20240811/4f19e40cf4f14aea8949409ed3bb11c1.jpeg\"]',\n",
|
||||||
|
" 'imagesList': ['//q7.itc.cn/images01/20240811/4f19e40cf4f14aea8949409ed3bb11c1.jpeg'],\n",
|
||||||
|
" 'insertType': 'not',\n",
|
||||||
|
" 'original': '0',\n",
|
||||||
|
" 'passport': '',\n",
|
||||||
|
" 'postTime': 1723388073000,\n",
|
||||||
|
" 'publicTime': 0,\n",
|
||||||
|
" 'pv': 0,\n",
|
||||||
|
" 'rankScore': 0,\n",
|
||||||
|
" 'resourceType': 1,\n",
|
||||||
|
" 'scm': '1019.20001.0.0.0',\n",
|
||||||
|
" 'scoreMap': {},\n",
|
||||||
|
" 'size': 0,\n",
|
||||||
|
" 'status': 4,\n",
|
||||||
|
" 'subType': '',\n",
|
||||||
|
" 'title': 'MMD初音未来,卡哇伊',\n",
|
||||||
|
" 'titleHL': 'MMD<b>初音未来</b>,卡哇伊',\n",
|
||||||
|
" 'tkd': {'desc': '', 'titleSeo': 'MMD初音未来,卡哇伊'},\n",
|
||||||
|
" 'type': 4,\n",
|
||||||
|
" 'url': 'https://www.sohu.com/a/800129858_120122317'},\n",
|
||||||
|
" {'authorId': '120122317',\n",
|
||||||
|
" 'authorName': '断断续续的灰暗',\n",
|
||||||
|
" 'authorNameHL': '断断续续的灰暗',\n",
|
||||||
|
" 'authorPic': '',\n",
|
||||||
|
" 'authorUrl': '',\n",
|
||||||
|
" 'author_header': '//5b0988e595225.cdn.sohucs.com/a_auto,c_cut,x_0,y_40,w_378,h_378/images/20190326/294963d73b2b4fdd92e562891df3a7e9.png',\n",
|
||||||
|
" 'bigCover': '',\n",
|
||||||
|
" 'brief': 'MMD初音未来,MelticHell',\n",
|
||||||
|
" 'briefAlg': '',\n",
|
||||||
|
" 'briefAlgHL': '',\n",
|
||||||
|
" 'briefHL': 'MMD<b>初音未来</b>,MelticHell',\n",
|
||||||
|
" 'content': '',\n",
|
||||||
|
" 'cover': '//q2.itc.cn/images01/20241013/cbdd5e2ccfa54bd09958df3eb7605b43.jpeg',\n",
|
||||||
|
" 'finalScore': 0,\n",
|
||||||
|
" 'hasHeadVideo': False,\n",
|
||||||
|
" 'id': 816244183,\n",
|
||||||
|
" 'imageNews': False,\n",
|
||||||
|
" 'images': '[\"//q2.itc.cn/images01/20241013/cbdd5e2ccfa54bd09958df3eb7605b43.jpeg\"]',\n",
|
||||||
|
" 'imagesList': ['//q2.itc.cn/images01/20241013/cbdd5e2ccfa54bd09958df3eb7605b43.jpeg'],\n",
|
||||||
|
" 'insertType': 'not',\n",
|
||||||
|
" 'original': '0',\n",
|
||||||
|
" 'passport': '',\n",
|
||||||
|
" 'postTime': 1728832935000,\n",
|
||||||
|
" 'publicTime': 0,\n",
|
||||||
|
" 'pv': 0,\n",
|
||||||
|
" 'rankScore': 0,\n",
|
||||||
|
" 'resourceType': 1,\n",
|
||||||
|
" 'scm': '1019.20001.0.0.0',\n",
|
||||||
|
" 'scoreMap': {},\n",
|
||||||
|
" 'size': 0,\n",
|
||||||
|
" 'status': 4,\n",
|
||||||
|
" 'subType': '',\n",
|
||||||
|
" 'title': 'MMD初音未来,MelticHell',\n",
|
||||||
|
" 'titleHL': 'MMD<b>初音未来</b>,MelticHell',\n",
|
||||||
|
" 'tkd': {'desc': '', 'titleSeo': 'MMD初音未来,Meltic Hell'},\n",
|
||||||
|
" 'type': 4,\n",
|
||||||
|
" 'url': 'https://www.sohu.com/a/816244183_120122317'},\n",
|
||||||
|
" {'authorId': '120122317',\n",
|
||||||
|
" 'authorName': '断断续续的灰暗',\n",
|
||||||
|
" 'authorNameHL': '断断续续的灰暗',\n",
|
||||||
|
" 'authorPic': '',\n",
|
||||||
|
" 'authorUrl': '',\n",
|
||||||
|
" 'author_header': '//5b0988e595225.cdn.sohucs.com/a_auto,c_cut,x_0,y_40,w_378,h_378/images/20190326/294963d73b2b4fdd92e562891df3a7e9.png',\n",
|
||||||
|
" 'bigCover': '',\n",
|
||||||
|
" 'brief': 'MMD初音未来,情感风车',\n",
|
||||||
|
" 'briefAlg': '',\n",
|
||||||
|
" 'briefAlgHL': '',\n",
|
||||||
|
" 'briefHL': 'MMD<b>初音未来</b>,情感风车',\n",
|
||||||
|
" 'content': '',\n",
|
||||||
|
" 'cover': '//q2.itc.cn/images01/20240901/90104834c6db474f944a0c4eab49171a.jpeg',\n",
|
||||||
|
" 'finalScore': 0,\n",
|
||||||
|
" 'hasHeadVideo': False,\n",
|
||||||
|
" 'id': 805468248,\n",
|
||||||
|
" 'imageNews': False,\n",
|
||||||
|
" 'images': '[\"//q2.itc.cn/images01/20240901/90104834c6db474f944a0c4eab49171a.jpeg\"]',\n",
|
||||||
|
" 'imagesList': ['//q2.itc.cn/images01/20240901/90104834c6db474f944a0c4eab49171a.jpeg'],\n",
|
||||||
|
" 'insertType': 'not',\n",
|
||||||
|
" 'original': '0',\n",
|
||||||
|
" 'passport': '',\n",
|
||||||
|
" 'postTime': 1725175701000,\n",
|
||||||
|
" 'publicTime': 0,\n",
|
||||||
|
" 'pv': 0,\n",
|
||||||
|
" 'rankScore': 0,\n",
|
||||||
|
" 'resourceType': 1,\n",
|
||||||
|
" 'scm': '1019.20001.0.0.0',\n",
|
||||||
|
" 'scoreMap': {},\n",
|
||||||
|
" 'size': 0,\n",
|
||||||
|
" 'status': 4,\n",
|
||||||
|
" 'subType': '',\n",
|
||||||
|
" 'title': 'MMD初音未来,情感风车',\n",
|
||||||
|
" 'titleHL': 'MMD<b>初音未来</b>,情感风车',\n",
|
||||||
|
" 'tkd': {'desc': '', 'titleSeo': 'MMD初音未来,情感风车'},\n",
|
||||||
|
" 'type': 4,\n",
|
||||||
|
" 'url': 'https://www.sohu.com/a/805468248_120122317'},\n",
|
||||||
|
" {'authorId': '120122317',\n",
|
||||||
|
" 'authorName': '断断续续的灰暗',\n",
|
||||||
|
" 'authorNameHL': '断断续续的灰暗',\n",
|
||||||
|
" 'authorPic': '',\n",
|
||||||
|
" 'authorUrl': '',\n",
|
||||||
|
" 'author_header': '//5b0988e595225.cdn.sohucs.com/a_auto,c_cut,x_0,y_40,w_378,h_378/images/20190326/294963d73b2b4fdd92e562891df3a7e9.png',\n",
|
||||||
|
" 'bigCover': '',\n",
|
||||||
|
" 'brief': 'MMD初音未来,RPG',\n",
|
||||||
|
" 'briefAlg': '',\n",
|
||||||
|
" 'briefAlgHL': '',\n",
|
||||||
|
" 'briefHL': 'MMD<b>初音未来</b>,RPG',\n",
|
||||||
|
" 'content': '',\n",
|
||||||
|
" 'cover': '//q2.itc.cn/images01/20240818/63f12d9b98954d75ab030228164044ed.jpeg',\n",
|
||||||
|
" 'finalScore': 0,\n",
|
||||||
|
" 'hasHeadVideo': False,\n",
|
||||||
|
" 'id': 801797229,\n",
|
||||||
|
" 'imageNews': False,\n",
|
||||||
|
" 'images': '[\"//q2.itc.cn/images01/20240818/63f12d9b98954d75ab030228164044ed.jpeg\"]',\n",
|
||||||
|
" 'imagesList': ['//q2.itc.cn/images01/20240818/63f12d9b98954d75ab030228164044ed.jpeg'],\n",
|
||||||
|
" 'insertType': 'not',\n",
|
||||||
|
" 'original': '0',\n",
|
||||||
|
" 'passport': '',\n",
|
||||||
|
" 'postTime': 1723994416000,\n",
|
||||||
|
" 'publicTime': 0,\n",
|
||||||
|
" 'pv': 0,\n",
|
||||||
|
" 'rankScore': 0,\n",
|
||||||
|
" 'resourceType': 1,\n",
|
||||||
|
" 'scm': '1019.20001.0.0.0',\n",
|
||||||
|
" 'scoreMap': {},\n",
|
||||||
|
" 'size': 0,\n",
|
||||||
|
" 'status': 4,\n",
|
||||||
|
" 'subType': '',\n",
|
||||||
|
" 'title': 'MMD初音未来,RPG',\n",
|
||||||
|
" 'titleHL': 'MMD<b>初音未来</b>,RPG',\n",
|
||||||
|
" 'tkd': {'desc': '', 'titleSeo': 'MMD初音未来,RPG'},\n",
|
||||||
|
" 'type': 4,\n",
|
||||||
|
" 'url': 'https://www.sohu.com/a/801797229_120122317'},\n",
|
||||||
|
" {'authorId': '120122317',\n",
|
||||||
|
" 'authorName': '断断续续的灰暗',\n",
|
||||||
|
" 'authorNameHL': '断断续续的灰暗',\n",
|
||||||
|
" 'authorPic': '',\n",
|
||||||
|
" 'authorUrl': '',\n",
|
||||||
|
" 'author_header': '//5b0988e595225.cdn.sohucs.com/a_auto,c_cut,x_0,y_40,w_378,h_378/images/20190326/294963d73b2b4fdd92e562891df3a7e9.png',\n",
|
||||||
|
" 'bigCover': '',\n",
|
||||||
|
" 'brief': 'MMD初音未来,Go-Getters',\n",
|
||||||
|
" 'briefAlg': '',\n",
|
||||||
|
" 'briefAlgHL': '',\n",
|
||||||
|
" 'briefHL': 'MMD<b>初音未来</b>,Go-Getters',\n",
|
||||||
|
" 'content': '',\n",
|
||||||
|
" 'cover': '//q0.itc.cn/images01/20240914/557237d353d244dd873e01c0e0f907f5.jpeg',\n",
|
||||||
|
" 'finalScore': 0,\n",
|
||||||
|
" 'hasHeadVideo': False,\n",
|
||||||
|
" 'id': 809054528,\n",
|
||||||
|
" 'imageNews': False,\n",
|
||||||
|
" 'images': '[\"//q0.itc.cn/images01/20240914/557237d353d244dd873e01c0e0f907f5.jpeg\"]',\n",
|
||||||
|
" 'imagesList': ['//q0.itc.cn/images01/20240914/557237d353d244dd873e01c0e0f907f5.jpeg'],\n",
|
||||||
|
" 'insertType': 'not',\n",
|
||||||
|
" 'original': '0',\n",
|
||||||
|
" 'passport': '',\n",
|
||||||
|
" 'postTime': 1726322159000,\n",
|
||||||
|
" 'publicTime': 0,\n",
|
||||||
|
" 'pv': 0,\n",
|
||||||
|
" 'rankScore': 0,\n",
|
||||||
|
" 'resourceType': 1,\n",
|
||||||
|
" 'scm': '1019.20001.0.0.0',\n",
|
||||||
|
" 'scoreMap': {},\n",
|
||||||
|
" 'size': 0,\n",
|
||||||
|
" 'status': 4,\n",
|
||||||
|
" 'subType': '',\n",
|
||||||
|
" 'title': 'MMD初音未来,Go-Getters',\n",
|
||||||
|
" 'titleHL': 'MMD<b>初音未来</b>,Go-Getters',\n",
|
||||||
|
" 'tkd': {'desc': '', 'titleSeo': 'MMD初音未来,Go-Getters'},\n",
|
||||||
|
" 'type': 4,\n",
|
||||||
|
" 'url': 'https://www.sohu.com/a/809054528_120122317'}]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 20,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"import requests\n",
|
||||||
|
"\n",
|
||||||
|
"# 定义获取数据的函数\n",
|
||||||
|
"def get(f):\n",
|
||||||
|
" cookies = {\n",
|
||||||
|
" 'SUV': '1682600488353n7ed0n',\n",
|
||||||
|
" 'gidinf': 'x099980108ee16eb903bf9c3a0002c6c45a6126fa0ce',\n",
|
||||||
|
" '__bid_n': '187c2cdf5124a9a1094207',\n",
|
||||||
|
" 'FPTOKEN': '1Mt2B+q4ms2SmDH0RjHas02rR3wqOchNJg4OJnkgx809EgEUOjpnKG+QmQQlssyVk0njeniH4+/iAx1hMsEp3ikNwLcFH7wGBidjr2JVmt6IZ/vH4wGJXp2rf6qa8O7ItDipl3MK3C5f8o1HewrQfrhTJl/kwOoyIIxjCnmpLh+KFMe56vAkc3QXrmEo0PjxfoR0FKM/hMeTFKbmpb10GQ9gNtGaVazm0Mak/cP51Yjc6zXg5k8Pshk6KsJyh68uG6Apan1nm5OnH9z+UHCMzfI1tX8PdLLC0tZKpAA5yYbTjnMCUJiDgHyDzIRuXRJV9L0Sqdm3In7qspvgT0yK8dcOA/FX6Ftxre+izqYifswkFu47RUeaks49cU3iRgil+GaldYgvPrseHsHbHopwgA==|NtNVk52T90o323e3k7qaAnqOwNsKBY+7kxKUu+iRrbU=|10|ea23f8f7dd46deccfb869d38e8709ecb',\n",
|
||||||
|
" 't': '1729738002359',\n",
|
||||||
|
" 'IPLOC': 'CN',\n",
|
||||||
|
" 'cityIpLocation': '120.230.118.166',\n",
|
||||||
|
" 'reqtype': 'pc',\n",
|
||||||
|
" '_dfp': '5hsxjNh8Kh6BGaB/hxL0ivahlxLemvXQraPfbMmNgeA=',\n",
|
||||||
|
" 'clt': '1729737995',\n",
|
||||||
|
" 'cld': '20241024104635',\n",
|
||||||
|
" 'arialoadData': 'false',\n",
|
||||||
|
" }\n",
|
||||||
|
" \n",
|
||||||
|
" headers = {\n",
|
||||||
|
" 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:131.0) Gecko/20100101 Firefox/131.0',\n",
|
||||||
|
" 'Accept': 'application/json, text/plain, */*',\n",
|
||||||
|
" 'Accept-Language': 'en-US,en;q=0.5',\n",
|
||||||
|
" # 'Accept-Encoding': 'gzip, deflate, br, zstd',\n",
|
||||||
|
" 'Referer': 'https://search.sohu.com/?queryType=outside&keyword=%E5%88%9D%E9%9F%B3%E6%9C%AA%E6%9D%A5&spm=smpc.home.0.0.1729737994965ei0ArwS_1467',\n",
|
||||||
|
" 'clientType': '1',\n",
|
||||||
|
" 'referPath': 'https://www.sohu.com/',\n",
|
||||||
|
" 'requestId': '1729738071936c1NFydh',\n",
|
||||||
|
" 'referSpm': 'smpc.home.0.0.1729737994965ei0ArwS_1467',\n",
|
||||||
|
" 'refererPath': '/',\n",
|
||||||
|
" 'pvId': '1729738002239XgA6S9q',\n",
|
||||||
|
" 'Connection': 'keep-alive',\n",
|
||||||
|
" # 'Cookie': 'SUV=1682600488353n7ed0n; gidinf=x099980108ee16eb903bf9c3a0002c6c45a6126fa0ce; __bid_n=187c2cdf5124a9a1094207; FPTOKEN=1Mt2B+q4ms2SmDH0RjHas02rR3wqOchNJg4OJnkgx809EgEUOjpnKG+QmQQlssyVk0njeniH4+/iAx1hMsEp3ikNwLcFH7wGBidjr2JVmt6IZ/vH4wGJXp2rf6qa8O7ItDipl3MK3C5f8o1HewrQfrhTJl/kwOoyIIxjCnmpLh+KFMe56vAkc3QXrmEo0PjxfoR0FKM/hMeTFKbmpb10GQ9gNtGaVazm0Mak/cP51Yjc6zXg5k8Pshk6KsJyh68uG6Apan1nm5OnH9z+UHCMzfI1tX8PdLLC0tZKpAA5yYbTjnMCUJiDgHyDzIRuXRJV9L0Sqdm3In7qspvgT0yK8dcOA/FX6Ftxre+izqYifswkFu47RUeaks49cU3iRgil+GaldYgvPrseHsHbHopwgA==|NtNVk52T90o323e3k7qaAnqOwNsKBY+7kxKUu+iRrbU=|10|ea23f8f7dd46deccfb869d38e8709ecb; t=1729738002359; IPLOC=CN; cityIpLocation=120.230.118.166; reqtype=pc; _dfp=5hsxjNh8Kh6BGaB/hxL0ivahlxLemvXQraPfbMmNgeA=; clt=1729737995; cld=20241024104635; arialoadData=false',\n",
|
||||||
|
" 'Sec-Fetch-Dest': 'empty',\n",
|
||||||
|
" 'Sec-Fetch-Mode': 'cors',\n",
|
||||||
|
" 'Sec-Fetch-Site': 'same-origin',\n",
|
||||||
|
" }\n",
|
||||||
|
" \n",
|
||||||
|
" params = {\n",
|
||||||
|
" 'keyword': '初音未来',\n",
|
||||||
|
" 'terminalType': 'pc',\n",
|
||||||
|
" 'spm-pre': 'smpc.csrpage.0.0.1729738002239XgA6S9q',\n",
|
||||||
|
" 'SUV': '1682600488353n7ed0n',\n",
|
||||||
|
" 'from': f,\n",
|
||||||
|
" 'size': '10',\n",
|
||||||
|
" 'searchType': 'news',\n",
|
||||||
|
" 'queryType': 'outside',\n",
|
||||||
|
" 'queryId': '17297380710005Yid007',\n",
|
||||||
|
" 'pvId': '1729738002239XgA6S9q',\n",
|
||||||
|
" 'refer': 'https%3A//www.sohu.com/',\n",
|
||||||
|
" 'spm': 'smpc.csrpage.0.0.1729738002239XgA6S9q',\n",
|
||||||
|
" 'maxL': '15',\n",
|
||||||
|
" }\n",
|
||||||
|
" \n",
|
||||||
|
" response = requests.get('https://search.sohu.com/search/meta', params=params, cookies=cookies, headers=headers)\n",
|
||||||
|
" news = response.json()['data']['news']\n",
|
||||||
|
" return news\n",
|
||||||
|
"get(0)\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 22,
|
||||||
|
"id": "0de84f22-ec09-4e20-8dd7-6ca2322d1adf",
|
||||||
|
"metadata": {
|
||||||
|
"scrolled": true
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"[datetime.datetime(2024, 9, 28, 23, 34, 43), 'MMD初音未来,UNDEAD', 'https://www.sohu.com/a/812565548_120122317']\n",
|
||||||
|
"[datetime.datetime(2024, 9, 6, 15, 57, 15), '初音未来17周年', 'https://www.sohu.com/a/806861635_532686']\n",
|
||||||
|
"[datetime.datetime(2024, 10, 23, 21, 41, 45), 'MMD初音未来五人组,Unveiled', 'https://www.sohu.com/a/819573079_120122317']\n",
|
||||||
|
"[datetime.datetime(2024, 9, 25, 20, 18, 35), 'MMD初音未来,单色骑士', 'https://www.sohu.com/a/811660644_120122317']\n",
|
||||||
|
"[datetime.datetime(2024, 8, 10, 14, 30, 53), 'MMD初音未来,SHOW', 'https://www.sohu.com/a/799888119_120122317']\n",
|
||||||
|
"[datetime.datetime(2024, 8, 11, 22, 54, 33), 'MMD初音未来,卡哇伊', 'https://www.sohu.com/a/800129858_120122317']\n",
|
||||||
|
"[datetime.datetime(2024, 10, 13, 23, 22, 15), 'MMD初音未来,MelticHell', 'https://www.sohu.com/a/816244183_120122317']\n",
|
||||||
|
"[datetime.datetime(2024, 9, 1, 15, 28, 21), 'MMD初音未来,情感风车', 'https://www.sohu.com/a/805468248_120122317']\n",
|
||||||
|
"[datetime.datetime(2024, 8, 18, 23, 20, 16), 'MMD初音未来,RPG', 'https://www.sohu.com/a/801797229_120122317']\n",
|
||||||
|
"[datetime.datetime(2024, 9, 14, 21, 55, 59), 'MMD初音未来,Go-Getters', 'https://www.sohu.com/a/809054528_120122317']\n",
|
||||||
|
"[datetime.datetime(2024, 10, 18, 23, 58, 48), 'MMD初音未来,叭噗', 'https://www.sohu.com/a/817974411_120122317']\n",
|
||||||
|
"[datetime.datetime(2024, 10, 2, 20, 44, 58), 'MMD初音未来6人组,热风', 'https://www.sohu.com/a/813525870_120122317']\n",
|
||||||
|
"[datetime.datetime(2024, 8, 13, 22, 59, 23), 'MMD初音未来,倾诉迷魂', 'https://www.sohu.com/a/800642732_120122317']\n",
|
||||||
|
"[datetime.datetime(2024, 9, 20, 21, 21, 34), 'MMD初音未来,拜拜呀呆', 'https://www.sohu.com/a/810397011_120122317']\n",
|
||||||
|
"[datetime.datetime(2024, 9, 11, 23, 59, 16), '初音未来,成了巴西人民的自由女神', 'https://www.sohu.com/a/808201244_628730']\n",
|
||||||
|
"[datetime.datetime(2024, 7, 28, 22, 4, 25), 'MMD初音未来,恋爱哲学', 'https://www.sohu.com/a/796798684_120122317']\n",
|
||||||
|
"[datetime.datetime(2024, 10, 15, 11, 36, 32), '纪念系列45周年高达宣布将联动初音未来', 'https://www.sohu.com/a/816700739_258858']\n",
|
||||||
|
"[datetime.datetime(2024, 7, 31, 21, 43, 59), 'MMD25时,初音未来,心灵烙印', 'https://www.sohu.com/a/797613538_120122317']\n",
|
||||||
|
"[datetime.datetime(2024, 9, 5, 9, 41, 14), '韩国妹COS初音未来超可爱', 'https://www.sohu.com/a/806469511_114822']\n",
|
||||||
|
"[datetime.datetime(2024, 8, 24, 23, 33, 3), 'MMD初音未来,猫尾草之歌', 'https://www.sohu.com/a/803409803_120122317']\n",
|
||||||
|
"[datetime.datetime(2024, 10, 12, 16, 23, 7), 'MMD初音未来,在黎明前一跃而起', 'https://www.sohu.com/a/815933102_120122317']\n",
|
||||||
|
"[datetime.datetime(2024, 9, 8, 12, 28, 58), 'MMD初音未来,甜甜圈洞short', 'https://www.sohu.com/a/807255356_120122317']\n",
|
||||||
|
"[datetime.datetime(2024, 8, 29, 14, 40, 43), '世嘉“折扣季”初音未来等游戏大促来袭!', 'https://www.sohu.com/a/804684003_114822']\n",
|
||||||
|
"[datetime.datetime(2024, 9, 16, 11, 20, 41), 'steam上198元!初音未来音乐舞蹈游戏上架!', 'https://www.sohu.com/a/809348262_114822']\n",
|
||||||
|
"[datetime.datetime(2024, 8, 28, 22, 55, 49), 'MMD初音未来,晓山瑞希,宵崎奏,孜然炉火', 'https://www.sohu.com/a/804480099_120122317']\n",
|
||||||
|
"[datetime.datetime(2024, 8, 5, 14, 33, 11), '我去,初音未来', 'https://www.sohu.com/a/798669629_121978531']\n",
|
||||||
|
"[datetime.datetime(2024, 9, 16, 10, 48, 54), '《初音未来ProjectDIVAMEGA39’s+》Steam国区永降,198元', 'https://www.sohu.com/a/809329371_114760']\n",
|
||||||
|
"[datetime.datetime(2024, 8, 2, 0, 55, 5), '初音未来绝美“五线谱双马尾”造型重现!梦幻感十足,引发网友疯狂预购', 'https://www.sohu.com/a/797914936_100136455']\n",
|
||||||
|
"[datetime.datetime(2024, 7, 26, 9, 44, 13), '萤火虫漫展百位初音未来游场来啦!', 'https://www.sohu.com/a/796188957_492199']\n",
|
||||||
|
"[datetime.datetime(2024, 9, 26, 11, 41, 11), '初音未来:右肩的蝶', 'https://www.sohu.com/a/811829063_120214166']\n",
|
||||||
|
"[datetime.datetime(2024, 10, 22, 10, 48, 23), '《蓦然回首》电影宣发惹争议高达联动初音未来|二次元的一周', 'https://www.sohu.com/a/818934933_400919']\n",
|
||||||
|
"[datetime.datetime(2024, 7, 30, 14, 4, 38), '初音未来演唱会“开”到快手,独家直播引8304万人次观看', 'https://www.sohu.com/a/797213099_339728']\n",
|
||||||
|
"[datetime.datetime(2024, 9, 5, 15, 29, 49), '孙尚香初音未来联动首爆,裴擒虎迎来FMVP传说皮肤,李白玩家笑了', 'https://www.sohu.com/a/806558731_99974395']\n",
|
||||||
|
"[datetime.datetime(2024, 9, 3, 0, 0, 30), '寡姐上身初音未来同款,这个工具把次元壁给干碎了。。。', 'https://www.sohu.com/a/805809883_355019']\n",
|
||||||
|
"[datetime.datetime(2024, 7, 30, 11, 40, 42), '《世界计划:破碎的世界与不能唱歌的未来》官宣制作初音未来主演', 'https://www.sohu.com/a/797220452_114822']\n",
|
||||||
|
"[datetime.datetime(2024, 10, 20, 21, 10, 10), '初音未来登陆Twitter,Digiral6账号揭秘', 'https://www.sohu.com/a/818433829_122014422']\n",
|
||||||
|
"[datetime.datetime(2024, 9, 30, 19, 57, 53), '《初音未来:缤纷舞台》国服预约开始啦!', 'https://www.sohu.com/a/813108136_121980188']\n",
|
||||||
|
"[datetime.datetime(2024, 9, 1, 9, 5, 51), 'PSN214港币,音游《初音未来ProjectDIVAFTDX》5折史低', 'https://www.sohu.com/a/805391494_114760']\n",
|
||||||
|
"[datetime.datetime(2024, 9, 5, 11, 44, 34), '韩国妹COS初音未来超可爱', 'https://www.sohu.com/a/806504111_211762']\n",
|
||||||
|
"[datetime.datetime(2024, 9, 5, 10, 53, 21), '韩妹COS初音未来:丝袜胶衣大展偶像身材', 'https://www.sohu.com/a/806482198_120850052']\n",
|
||||||
|
"[datetime.datetime(2024, 9, 6, 19, 40, 4), '初音未来:不同国家画师笔下的特色造型', 'https://www.sohu.com/a/806926738_120889838']\n",
|
||||||
|
"[datetime.datetime(2024, 10, 9, 15, 40, 40), '世界计划彩色舞台!feat.初音未来入坑教程来啦!', 'https://www.sohu.com/a/814952487_122050252']\n",
|
||||||
|
"[datetime.datetime(2024, 9, 11, 16, 21, 33), '初音未来压泡面人偶:樱花未来2025版即将推出', 'https://www.sohu.com/a/808092315_120889838']\n",
|
||||||
|
"[datetime.datetime(2024, 8, 29, 15, 51, 22), '世嘉“折扣季”初音未来等游戏大促来袭!', 'https://www.sohu.com/a/804688857_211762']\n",
|
||||||
|
"[datetime.datetime(2024, 9, 25, 17, 11, 57), '【24-cv-08758】HATSUNEMIKU初音未来发起维权,暂未TRO!', 'https://www.sohu.com/a/811606822_122001311']\n",
|
||||||
|
"[datetime.datetime(2024, 9, 25, 19, 14, 2), '【24-cv-8758】速看!日本动漫人气歌手“初音未来”商标维权!', 'https://www.sohu.com/a/811647248_121439307']\n",
|
||||||
|
"[datetime.datetime(2024, 10, 3, 16, 39, 58), '世界计划彩色舞台!feat.初音未来太好玩!汉化教程', 'https://www.sohu.com/a/813680488_122050161']\n",
|
||||||
|
"[datetime.datetime(2024, 10, 17, 19, 45, 23), '玛特宇宙:初音未来Live将于12月举行!', 'https://www.sohu.com/a/817537998_121628232']\n",
|
||||||
|
"[datetime.datetime(2024, 10, 8, 15, 49, 40), '世界计划彩色舞台!feat.初音未来,提示通信错误,如何解决?', 'https://www.sohu.com/a/814629757_122050227']\n",
|
||||||
|
"[datetime.datetime(2024, 8, 29, 22, 20, 57), '我以后不会再画涩涩的初音未来了', 'https://www.sohu.com/a/804802670_121981185']\n",
|
||||||
|
"[datetime.datetime(2024, 9, 10, 18, 54), '限时秒杀!FURYU天猫官方旗舰店即将开业,初音未来、《RE:从零开始》超值福袋低至33折,9月12日10点开抢!', 'https://www.sohu.com/a/807854871_639898']\n",
|
||||||
|
"[datetime.datetime(2024, 8, 23, 13, 20, 10), '星之迟迟cos蕾姆+初音未来+空之境界写真', 'https://www.sohu.com/a/803042507_121984665']\n",
|
||||||
|
"[datetime.datetime(2024, 10, 8, 14, 56, 5), '世界计划彩色舞台!feat.初音未来四周年资讯大放送', 'https://www.sohu.com/a/814609255_122050252']\n",
|
||||||
|
"[datetime.datetime(2024, 10, 8, 14, 56, 58), '超火的《世界计划彩色舞台!feat.初音未来》下载游玩教程', 'https://www.sohu.com/a/814610002_122050252']\n",
|
||||||
|
"[datetime.datetime(2024, 8, 11, 8, 41, 23), '初音未来黑丝COS,这颜值和身材太绝了,可惜丝袜破了', 'https://www.sohu.com/a/799998139_121936213']\n",
|
||||||
|
"[datetime.datetime(2024, 9, 1, 10, 20, 25), 'PSN214港币,音游《初音未来ProjectDIVAFTDX》5折史低', 'https://www.sohu.com/a/805408714_211762']\n",
|
||||||
|
"[datetime.datetime(2024, 9, 11, 11, 18, 14), '“我去,初音未来!”Twitter:rnrqkq123(Octosoup)', 'https://www.sohu.com/a/808009495_122014422']\n",
|
||||||
|
"[datetime.datetime(2024, 8, 23, 20, 36, 6), '初音未来COS:这美腿与过膝黑胶袜,性感身材真是太绝了!', 'https://www.sohu.com/a/803167085_120889838']\n",
|
||||||
|
"[datetime.datetime(2024, 9, 29, 15, 50, 8), '24-cv-08758,当心!HatsuneMiku初音未来商标TRO风暴再来袭', 'https://www.sohu.com/a/812721398_121196590']\n",
|
||||||
|
"[datetime.datetime(2024, 10, 8, 15, 1, 8), '世界计划彩色舞台!feat.初音未来引继码如何生成和使用?', 'https://www.sohu.com/a/814610909_122050252']\n",
|
||||||
|
"[datetime.datetime(2024, 9, 30, 17, 48, 36), '初音未来:缤纷舞台新手入门攻略,桃心云手机挂机开启音乐之旅', 'https://www.sohu.com/a/813071840_121932100']\n",
|
||||||
|
"[datetime.datetime(2024, 9, 26, 17, 52, 21), '【24-cv-08758】二次元顶流”初音未来“商标维权又来了!', 'https://www.sohu.com/a/811952812_122010985']\n",
|
||||||
|
"[datetime.datetime(2024, 9, 30, 13, 44, 4), '日本动漫人气偶像“初音未来”发起商标维权案件号:24-cv-08758', 'https://www.sohu.com/a/812985173_100203300']\n",
|
||||||
|
"[datetime.datetime(2024, 10, 19, 15, 57, 39), '7,998架无人机夜空再现初音未来MV打破健力士记录', 'https://www.sohu.com/a/818137991_506163']\n",
|
||||||
|
"[datetime.datetime(2024, 8, 16, 19, 53, 13), '玛特宇宙:数字偶像与虚拟艺术家——初音未来引领的虚拟革命!', 'https://www.sohu.com/a/801388737_121628232']\n",
|
||||||
|
"[datetime.datetime(2024, 9, 3, 13, 27, 9), '在沃尔玛打工的初音未来Twitter:RumblyF(🪷SpringTanuki🪷)', 'https://www.sohu.com/a/805942812_122014422']\n",
|
||||||
|
"[datetime.datetime(2024, 9, 30, 14, 24, 32), '初音未来:缤纷舞台有电脑版吗?电脑版安装攻略带你轻松上手!', 'https://www.sohu.com/a/812996250_121953768']\n",
|
||||||
|
"[datetime.datetime(2024, 10, 8, 15, 35, 20), '世界计划彩色舞台!feat.初音未来显示安装失败要怎么办才好?', 'https://www.sohu.com/a/814623828_122052153']\n",
|
||||||
|
"[datetime.datetime(2024, 9, 16, 13, 16, 45), '初音未来音乐舞蹈游戏登陆Steam!仅售198元,你还不快入手?', 'https://www.sohu.com/a/809363809_362225']\n",
|
||||||
|
"[datetime.datetime(2024, 10, 8, 15, 34, 46), '16世界计划彩色舞台!feat.初音未来,卡页面进不去怎么办啊?!', 'https://www.sohu.com/a/814623815_122052153']\n",
|
||||||
|
"[datetime.datetime(2024, 9, 1, 10, 29, 15), '初音未来大折扣!PSN港服《ProjectDIVAFTDX》5折优惠,音游迷怎能错过?', 'https://www.sohu.com/a/805408718_362225']\n",
|
||||||
|
"[datetime.datetime(2024, 10, 8, 19, 10, 47), '无视一切的恋爱,宅男当年曾娶初音未来领结婚证,如今怎么样了?', 'https://www.sohu.com/a/814623848_121166535']\n",
|
||||||
|
"[datetime.datetime(2024, 7, 30, 15, 19, 15), '《世界计划:破碎的世界与不能唱歌的未来》官宣制作初音未来主演', 'https://www.sohu.com/a/797235003_211762']\n",
|
||||||
|
"[datetime.datetime(2024, 9, 22, 19, 56, 47), '还记得和初音未来结婚的男子吗?一年过去了,他们的现状怎么样了?', 'https://www.sohu.com/a/810770579_121166539']\n",
|
||||||
|
"[datetime.datetime(2024, 9, 2, 8, 39, 4), '神っぽいな(像神一样呐)|初音ミク(初音未来)|揉揉酱自制小提琴谱|五线谱|乐谱', 'https://www.sohu.com/a/805585736_120879343']\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"from datetime import datetime\n",
|
||||||
|
"import time\n",
|
||||||
|
"data = []\n",
|
||||||
|
"\n",
|
||||||
|
"for i in range(10):\n",
|
||||||
|
" for n in get(i * 10):\n",
|
||||||
|
" title = n['title']\n",
|
||||||
|
" brief = n['brief']\n",
|
||||||
|
" url = n['url']\n",
|
||||||
|
" \n",
|
||||||
|
" # 给定的时间戳\n",
|
||||||
|
" timestamp = n['postTime']\n",
|
||||||
|
" \n",
|
||||||
|
" # 将时间戳从毫秒转换为秒\n",
|
||||||
|
" timestamp_in_seconds = timestamp / 1000\n",
|
||||||
|
" \n",
|
||||||
|
" # 转换为日期时间对象\n",
|
||||||
|
" date_time = datetime.fromtimestamp(timestamp_in_seconds)\n",
|
||||||
|
" print([date_time, title, url])\n",
|
||||||
|
" data.append([date_time, title, url, brief])\n",
|
||||||
|
"\n",
|
||||||
|
" time.sleep(1)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 26,
|
||||||
|
"id": "73da2eaf-edc7-49df-be42-1211a70e6819",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>时间</th>\n",
|
||||||
|
" <th>标题</th>\n",
|
||||||
|
" <th>URL</th>\n",
|
||||||
|
" <th>摘要</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>0</th>\n",
|
||||||
|
" <td>2024-09-28 23:34:43</td>\n",
|
||||||
|
" <td>MMD初音未来,UNDEAD</td>\n",
|
||||||
|
" <td>https://www.sohu.com/a/812565548_120122317</td>\n",
|
||||||
|
" <td>MMD初音未来,UNDEAD</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>1</th>\n",
|
||||||
|
" <td>2024-09-06 15:57:15</td>\n",
|
||||||
|
" <td>初音未来17周年</td>\n",
|
||||||
|
" <td>https://www.sohu.com/a/806861635_532686</td>\n",
|
||||||
|
" <td>角色名:初音未来CN:安凉公主殿下生日快乐wwෆ(˶''ᵕ''˶)ෆ图片授权来源:次元岛…</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2</th>\n",
|
||||||
|
" <td>2024-10-23 21:41:45</td>\n",
|
||||||
|
" <td>MMD初音未来五人组,Unveiled</td>\n",
|
||||||
|
" <td>https://www.sohu.com/a/819573079_120122317</td>\n",
|
||||||
|
" <td>MMD初音未来五人组,Unveiled</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>3</th>\n",
|
||||||
|
" <td>2024-09-25 20:18:35</td>\n",
|
||||||
|
" <td>MMD初音未来,单色骑士</td>\n",
|
||||||
|
" <td>https://www.sohu.com/a/811660644_120122317</td>\n",
|
||||||
|
" <td>MMD初音未来,单色骑士</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>4</th>\n",
|
||||||
|
" <td>2024-08-10 14:30:53</td>\n",
|
||||||
|
" <td>MMD初音未来,SHOW</td>\n",
|
||||||
|
" <td>https://www.sohu.com/a/799888119_120122317</td>\n",
|
||||||
|
" <td>MMD初音未来,SHOW</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>...</th>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>70</th>\n",
|
||||||
|
" <td>2024-09-01 10:29:15</td>\n",
|
||||||
|
" <td>初音未来大折扣!PSN港服《ProjectDIVAFTDX》5折优惠,音游迷怎能错过?</td>\n",
|
||||||
|
" <td>https://www.sohu.com/a/805408718_362225</td>\n",
|
||||||
|
" <td>《初音未来ProjectDIVAFutureToneDX》不仅是一款音乐游戏,更是一次全面的...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>71</th>\n",
|
||||||
|
" <td>2024-10-08 19:10:47</td>\n",
|
||||||
|
" <td>无视一切的恋爱,宅男当年曾娶初音未来领结婚证,如今怎么样了?</td>\n",
|
||||||
|
" <td>https://www.sohu.com/a/814623848_121166535</td>\n",
|
||||||
|
" <td>在日本,一男子却做出更加惊人之举,因为他的爱情已经跨越了种族、肤色甚至是虚实,和一位虚拟人物...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>72</th>\n",
|
||||||
|
" <td>2024-07-30 15:19:15</td>\n",
|
||||||
|
" <td>《世界计划:破碎的世界与不能唱歌的未来》官宣制作初音未来主演</td>\n",
|
||||||
|
" <td>https://www.sohu.com/a/797235003_211762</td>\n",
|
||||||
|
" <td>初音未来主演的剧场版动画《世界计划:破碎的世界与不能唱歌的未来》现已正式开始制作,并且预告和...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>73</th>\n",
|
||||||
|
" <td>2024-09-22 19:56:47</td>\n",
|
||||||
|
" <td>还记得和初音未来结婚的男子吗?一年过去了,他们的现状怎么样了?</td>\n",
|
||||||
|
" <td>https://www.sohu.com/a/810770579_121166539</td>\n",
|
||||||
|
" <td>现在社会,科技已经深入到生活每一个细节,大到国家重量级项目,小到家中一个小小的扫地机器人,不...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>74</th>\n",
|
||||||
|
" <td>2024-09-02 08:39:04</td>\n",
|
||||||
|
" <td>神っぽいな(像神一样呐)|初音ミク(初音未来)|揉揉酱自制小提琴谱|五线谱|乐谱</td>\n",
|
||||||
|
" <td>https://www.sohu.com/a/805585736_120879343</td>\n",
|
||||||
|
" <td>这首歌曲是二次元音乐人匹诺曹P的《神芽na(像神一样呐)》,歌词讽刺现代社会中对“神性”的盲...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"<p>75 rows × 4 columns</p>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
" 时间 标题 \\\n",
|
||||||
|
"0 2024-09-28 23:34:43 MMD初音未来,UNDEAD \n",
|
||||||
|
"1 2024-09-06 15:57:15 初音未来17周年 \n",
|
||||||
|
"2 2024-10-23 21:41:45 MMD初音未来五人组,Unveiled \n",
|
||||||
|
"3 2024-09-25 20:18:35 MMD初音未来,单色骑士 \n",
|
||||||
|
"4 2024-08-10 14:30:53 MMD初音未来,SHOW \n",
|
||||||
|
".. ... ... \n",
|
||||||
|
"70 2024-09-01 10:29:15 初音未来大折扣!PSN港服《ProjectDIVAFTDX》5折优惠,音游迷怎能错过? \n",
|
||||||
|
"71 2024-10-08 19:10:47 无视一切的恋爱,宅男当年曾娶初音未来领结婚证,如今怎么样了? \n",
|
||||||
|
"72 2024-07-30 15:19:15 《世界计划:破碎的世界与不能唱歌的未来》官宣制作初音未来主演 \n",
|
||||||
|
"73 2024-09-22 19:56:47 还记得和初音未来结婚的男子吗?一年过去了,他们的现状怎么样了? \n",
|
||||||
|
"74 2024-09-02 08:39:04 神っぽいな(像神一样呐)|初音ミク(初音未来)|揉揉酱自制小提琴谱|五线谱|乐谱 \n",
|
||||||
|
"\n",
|
||||||
|
" URL \\\n",
|
||||||
|
"0 https://www.sohu.com/a/812565548_120122317 \n",
|
||||||
|
"1 https://www.sohu.com/a/806861635_532686 \n",
|
||||||
|
"2 https://www.sohu.com/a/819573079_120122317 \n",
|
||||||
|
"3 https://www.sohu.com/a/811660644_120122317 \n",
|
||||||
|
"4 https://www.sohu.com/a/799888119_120122317 \n",
|
||||||
|
".. ... \n",
|
||||||
|
"70 https://www.sohu.com/a/805408718_362225 \n",
|
||||||
|
"71 https://www.sohu.com/a/814623848_121166535 \n",
|
||||||
|
"72 https://www.sohu.com/a/797235003_211762 \n",
|
||||||
|
"73 https://www.sohu.com/a/810770579_121166539 \n",
|
||||||
|
"74 https://www.sohu.com/a/805585736_120879343 \n",
|
||||||
|
"\n",
|
||||||
|
" 摘要 \n",
|
||||||
|
"0 MMD初音未来,UNDEAD \n",
|
||||||
|
"1 角色名:初音未来CN:安凉公主殿下生日快乐wwෆ(˶''ᵕ''˶)ෆ图片授权来源:次元岛… \n",
|
||||||
|
"2 MMD初音未来五人组,Unveiled \n",
|
||||||
|
"3 MMD初音未来,单色骑士 \n",
|
||||||
|
"4 MMD初音未来,SHOW \n",
|
||||||
|
".. ... \n",
|
||||||
|
"70 《初音未来ProjectDIVAFutureToneDX》不仅是一款音乐游戏,更是一次全面的... \n",
|
||||||
|
"71 在日本,一男子却做出更加惊人之举,因为他的爱情已经跨越了种族、肤色甚至是虚实,和一位虚拟人物... \n",
|
||||||
|
"72 初音未来主演的剧场版动画《世界计划:破碎的世界与不能唱歌的未来》现已正式开始制作,并且预告和... \n",
|
||||||
|
"73 现在社会,科技已经深入到生活每一个细节,大到国家重量级项目,小到家中一个小小的扫地机器人,不... \n",
|
||||||
|
"74 这首歌曲是二次元音乐人匹诺曹P的《神芽na(像神一样呐)》,歌词讽刺现代社会中对“神性”的盲... \n",
|
||||||
|
"\n",
|
||||||
|
"[75 rows x 4 columns]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 26,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"import pandas\n",
|
||||||
|
"df = pandas.DataFrame(data, columns=['时间', '标题', 'URL', '摘要'])\n",
|
||||||
|
"df"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 27,
|
||||||
|
"id": "e1146fad-d792-4d0b-9761-ddae876b86f8",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"df.to_csv('souhu.csv')"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.11.9"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
||||||
1040
6. 大模型分析.ipynb
Normal file
1040
6. 大模型分析.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user