下厨房整体还是比较容易,就是有一个302链接跳转需要判断下即可
import requests
from lxml import etree
import re
import json
import csv
import pandas as pd
import hashlib
product_lists=[]
def down_load(url):
headers1 = {
# "Cookie":"SINAGLOBAL=7238757845138.87.1528291392417; UOR=,,spr_web_360_hao360_weibo_t001; login_sid_t=bd5a4abe734c091249cdce71379c0348; cross_origin_proto=SSL; Ugrow-G0=e66b2e50a7e7f417f6cc12eec600f517; TC-V5-G0=866fef700b11606a930f0b3297300d95; _s_tentry=-; Apache=685802145012.8082.1542780237180; ULV=1542780237187:19:3:1:685802145012.8082.1542780237180:1541462062210; TC-Page-G0=cdcf495cbaea129529aa606e7629fea7; WBtopGlobal_register_version=18608f873d5d88f2; SSOLoginState=1542781061; wvr=6; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9W50QEC8VdzuOHjXwxjEGser5JpX5K2hUgL.Fo-feo.ceKe4S0M2dJLoIpjLxKqLBoqL1-qLxKqLB.eLB-2LxKqL1KMLB.2t; ALF=1574324177; SCF=ApgoQqG5luyu67rkHic6LidzChLHTIe5EQZgRnsuPrfkK57iJqk723zd_GSb5ZMq2jbGlYvGXkZ6LbJj5PpY6zI.; SUB=_2A2528WQBDeRhGeNL6VsX8S3FzDuIHXVVh9LJrDV8PUNbmtAKLVnXkW9NSQ30mXwLfrcwH1SRYaTHBUXB4ipbEQrL; SUHB=02MvCTyTmQYvsK; un=18514476337; YF-V5-G0=a5a6106293f9aeef5e34a2e71f04fae4; wb_view_log_5529613977=1920*10801",
# "Cache-Control":"max-age=0",
"Connection": "keep-alive",
"Cookie": "bid=YTzZGjuP; gr_user_id=97f998e4-aa93-486a-99f8-854c784f6bcd; BAIDU_SSP_lcr=https://www.baidu.com/link?url=Pwq6pphyjxk3oQRJg05jjUGiwtLjL7FBVFdqA8oFqpZoXc0hIxm9p0sa3fyWRSSs&wd=&eqid=c9c1816400092dd0000000035c218bdd; Hm_lvt_ecd4feb5c351cc02583045a5813b5142=1545271479,1545702369; __utmc=177678124; __utmz=177678124.1545702370.2.2.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; gr_session_id_8187ff886f0929da=3a10677b-5117-45e8-9de4-d80fc87ffc01; gr_session_id_da48e7b9eb89482489897fc1e45e98b6=3f128e3a-5f5c-42fd-9c45-e0e1ddeb97b2; _ga=GA1.2.377109310.1545271479; _gid=GA1.2.1629322193.1545716866; __utma=177678124.377109310.1545271479.1545702370.1545717020.3; __utmt=1; gr_session_id_8187ff886f0929da_3a10677b-5117-45e8-9de4-d80fc87ffc01=true; Hm_lpvt_ecd4feb5c351cc02583045a5813b5142=1545717163; __utmb=177678124.4.10.1545717020",
"Host": "www.xiachufang.com",
# "Referer":"https://weibo.com/",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36",
# "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
# "Accept-Encoding":"gzip, deflate, br",
# "Accept-Language":"zh-CN,zh;q=0.9",
}
html = requests.get(url=url,headers=headers1,allow_redirects=False)
print(html.status_code)
# print(html.headers["location"])
if html.status_code == 302:
new_id_url="http://www.xiachufang.com"+html.headers["location"]
print(new_id_url)
new_html=requests.get(url=new_id_url,headers=headers1).text
return new_id_url,etree.HTML(new_html)
else:
print("++++++++++++++++")
print(url)
# print(requests.get(url=url,headers=headers1).text)
return url,etree.HTML(requests.get(url=url,headers=headers1).text)
# return (new_html,new_id_url)
def down_load1(url):
headers2 = {
# "Cookie":"SINAGLOBAL=7238757845138.87.1528291392417; UOR=,,spr_web_360_hao360_weibo_t001; login_sid_t=bd5a4abe734c091249cdce71379c0348; cross_origin_proto=SSL; Ugrow-G0=e66b2e50a7e7f417f6cc12eec600f517; TC-V5-G0=866fef700b11606a930f0b3297300d95; _s_tentry=-; Apache=685802145012.8082.1542780237180; ULV=1542780237187:19:3:1:685802145012.8082.1542780237180:1541462062210; TC-Page-G0=cdcf495cbaea129529aa606e7629fea7; WBtopGlobal_register_version=18608f873d5d88f2; SSOLoginState=1542781061; wvr=6; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9W50QEC8VdzuOHjXwxjEGser5JpX5K2hUgL.Fo-feo.ceKe4S0M2dJLoIpjLxKqLBoqL1-qLxKqLB.eLB-2LxKqL1KMLB.2t; ALF=1574324177; SCF=ApgoQqG5luyu67rkHic6LidzChLHTIe5EQZgRnsuPrfkK57iJqk723zd_GSb5ZMq2jbGlYvGXkZ6LbJj5PpY6zI.; SUB=_2A2528WQBDeRhGeNL6VsX8S3FzDuIHXVVh9LJrDV8PUNbmtAKLVnXkW9NSQ30mXwLfrcwH1SRYaTHBUXB4ipbEQrL; SUHB=02MvCTyTmQYvsK; un=18514476337; YF-V5-G0=a5a6106293f9aeef5e34a2e71f04fae4; wb_view_log_5529613977=1920*10801",
# "Cache-Control":"max-age=0",
"Connection": "keep-alive",
"Cookie": "bid=YTzZGjuP; gr_user_id=97f998e4-aa93-486a-99f8-854c784f6bcd; BAIDU_SSP_lcr=https://www.baidu.com/link?url=Pwq6pphyjxk3oQRJg05jjUGiwtLjL7FBVFdqA8oFqpZoXc0hIxm9p0sa3fyWRSSs&wd=&eqid=c9c1816400092dd0000000035c218bdd; Hm_lvt_ecd4feb5c351cc02583045a5813b5142=1545271479,1545702369; __utmc=177678124; __utmz=177678124.1545702370.2.2.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; gr_session_id_8187ff886f0929da=3a10677b-5117-45e8-9de4-d80fc87ffc01; gr_session_id_da48e7b9eb89482489897fc1e45e98b6=3f128e3a-5f5c-42fd-9c45-e0e1ddeb97b2; _ga=GA1.2.377109310.1545271479; _gid=GA1.2.1629322193.1545716866; __utma=177678124.377109310.1545271479.1545702370.1545717020.3; __utmt=1; gr_session_id_8187ff886f0929da_3a10677b-5117-45e8-9de4-d80fc87ffc01=true; Hm_lpvt_ecd4feb5c351cc02583045a5813b5142=1545717163; __utmb=177678124.4.10.1545717020",
"Host": "www.xiachufang.com",
# "Referer":"https://weibo.com/",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
# "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
# "Accept-Encoding":"gzip, deflate, br",
# "Accept-Language":"zh-CN,zh;q=0.9",
}
html = requests.get(url=url,headers=headers2).text
# return (new_html,new_id_url)
return etree.HTML(html)
def down_load2(url):
headers3 = {
# "Cookie":"SINAGLOBAL=7238757845138.87.1528291392417; UOR=,,spr_web_360_hao360_weibo_t001; login_sid_t=bd5a4abe734c091249cdce71379c0348; cross_origin_proto=SSL; Ugrow-G0=e66b2e50a7e7f417f6cc12eec600f517; TC-V5-G0=866fef700b11606a930f0b3297300d95; _s_tentry=-; Apache=685802145012.8082.1542780237180; ULV=1542780237187:19:3:1:685802145012.8082.1542780237180:1541462062210; TC-Page-G0=cdcf495cbaea129529aa606e7629fea7; WBtopGlobal_register_version=18608f873d5d88f2; SSOLoginState=1542781061; wvr=6; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9W50QEC8VdzuOHjXwxjEGser5JpX5K2hUgL.Fo-feo.ceKe4S0M2dJLoIpjLxKqLBoqL1-qLxKqLB.eLB-2LxKqL1KMLB.2t; ALF=1574324177; SCF=ApgoQqG5luyu67rkHic6LidzChLHTIe5EQZgRnsuPrfkK57iJqk723zd_GSb5ZMq2jbGlYvGXkZ6LbJj5PpY6zI.; SUB=_2A2528WQBDeRhGeNL6VsX8S3FzDuIHXVVh9LJrDV8PUNbmtAKLVnXkW9NSQ30mXwLfrcwH1SRYaTHBUXB4ipbEQrL; SUHB=02MvCTyTmQYvsK; un=18514476337; YF-V5-G0=a5a6106293f9aeef5e34a2e71f04fae4; wb_view_log_5529613977=1920*10801",
# "Cache-Control":"max-age=0",
"Connection": "keep-alive",
"Cookie": "bid=YTzZGjuP; gr_user_id=97f998e4-aa93-486a-99f8-854c784f6bcd; BAIDU_SSP_lcr=https://www.baidu.com/link?url=Pwq6pphyjxk3oQRJg05jjUGiwtLjL7FBVFdqA8oFqpZoXc0hIxm9p0sa3fyWRSSs&wd=&eqid=c9c1816400092dd0000000035c218bdd; Hm_lvt_ecd4feb5c351cc02583045a5813b5142=1545271479,1545702369; __utmc=177678124; __utmz=177678124.1545702370.2.2.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; gr_session_id_8187ff886f0929da=3a10677b-5117-45e8-9de4-d80fc87ffc01; gr_session_id_da48e7b9eb89482489897fc1e45e98b6=3f128e3a-5f5c-42fd-9c45-e0e1ddeb97b2; _ga=GA1.2.377109310.1545271479; _gid=GA1.2.1629322193.1545716866; __utma=177678124.377109310.1545271479.1545702370.1545717020.3; __utmt=1; gr_session_id_8187ff886f0929da_3a10677b-5117-45e8-9de4-d80fc87ffc01=true; Hm_lpvt_ecd4feb5c351cc02583045a5813b5142=1545717163; __utmb=177678124.4.10.1545717020",
"Host": "www.xiachufang.com",
# "Referer":"https://weibo.com/",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
# "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
# "Accept-Encoding":"gzip, deflate, br",
# "Accept-Language":"zh-CN,zh;q=0.9",
}
html = requests.get(url=url,headers=headers3).text
# return (new_html,new_id_url)
return etree.HTML(html)
word_pd=pd.read_csv(r"C:\Users\Lavector\Desktop\百事小红书\ele1.csv",engine='python',header=None).values.tolist()
print(word_pd)
# for m in range(1):
for m in range(1,len(word_pd)):
key_word=word_pd[m][0]
print(key_word)
url1="http://www.xiachufang.com/search/?keyword={}".format(key_word)
try:
# key_word="酸奶"
# url1 = "http://www.xiachufang.com/search/?keyword={}".format(key_word)
product_html_all=down_load(url1)
product_html=product_html_all[1]
print(len(product_html.xpath('//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li')))
for w in range(len(product_html.xpath('//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li'))):
# for i in range(1):
print("**************")
product_url="http://www.xiachufang.com"+product_html.xpath('//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="name"]/a/@href'.format(w+1))[0]
print(product_url)
product_name=product_html.xpath('//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="name"]/a/text()'.format(w+1))[0].strip()
if product_html.xpath('//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="stats"]/span[1]/text()'.format(w+1)):
product_star=product_html.xpath('//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="stats"]/span[1]/text()'.format(w+1))[0]
else:
product_star="暂无"
if product_html.xpath('//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="stats"]/span[@class="bold score"]/text()'.format(w+1)):
product_make=product_html.xpath('//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="stats"]/span[@class="bold score"]/text()'.format(w+1))[0]
else:
product_make=0
product_author=product_html.xpath('//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="author"]/a[1]/text()'.format(w+1))[0]
product_author_url="http://www.xiachufang.com"+product_html.xpath('//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="author"]/a[1]/@href'.format(w+1))[0]
person_html=down_load2(product_author_url)
person_info1=person_html.xpath('//div[@class="gray-font"]/div[1]')[0]
person_info=person_info1.xpath('string(.)').strip().replace(" ","").replace("\n","")
print(person_info)
print(product_html.xpath('//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="name"]/a/@href'.format(w+1))[0])
print(product_name)
detil_html=down_load1(product_url)[1]
product_save=detil_html.xpath('//div[@class="pv"]/text()')[0]
product_content1=detil_html.xpath('//div[contains(@class,"recipe-show")]')[0]
product_content=product_content1.xpath('string(.)').strip().replace(" ","").replace("\n","")
product_time=detil_html.xpath('//div[@class="time"]/span/text()')[0]
product_meke_all=detil_html.xpath('//div[contains(@class,"cooked")]/span[@class="number"]/text()')[0]
print(product_meke_all)
product_lists.append(
[key_word, product_author, person_info, product_name, product_url, product_content, product_star,
product_meke_all, product_make, product_save, product_time])
print(product_html[0])
print("++++++++")
if "category" in product_html_all[0]:
for k in range(2):
url2 = product_html_all[0]+"?page={}".format(str(k+2))
product_html2 = down_load2(url2)
for p in range(len(product_html2.xpath(
'//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li'))):
# for i in range(1):
print("**************")
product_url = "http://www.xiachufang.com" + product_html2.xpath(
'//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="name"]/a/@href'.format(
p + 1))[0]
print(product_url)
product_name = product_html2.xpath(
'//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="name"]/a/text()'.format(
p + 1))[0].strip()
if product_html2.xpath(
'//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="stats"]/span[1]/text()'.format(
p + 1)):
product_star = product_html2.xpath(
'//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="stats"]/span[1]/text()'.format(
p + 1))[0]
else:
product_star="暂无"
if product_html2.xpath(
'//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="stats"]/span[@class="bold score"]/text()'.format(
p + 1)):
product_make = product_html2.xpath(
'//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="stats"]/span[@class="bold score"]/text()'.format(
p + 1))[0]
else:
product_make=0
product_author = product_html2.xpath(
'//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="author"]/a[1]/text()'.format(
p + 1))[0]
product_author_url = "http://www.xiachufang.com" + product_html2.xpath(
'//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="author"]/a[1]/@href'.format(
p + 1))[0]
person_html = down_load2(product_author_url)
person_info1 = person_html.xpath('//div[@class="gray-font"]/div[1]')[0]
person_info = person_info1.xpath('string(.)').strip().replace(" ", "").replace("\n", "")
print(person_info)
print(product_html2.xpath(
'//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="name"]/a/@href'.format(
p + 1))[0])
print(product_name)
detil_html = down_load1(product_url)[1]
product_save = detil_html.xpath('//div[@class="pv"]/text()')[0]
product_content1 = detil_html.xpath('//div[contains(@class,"recipe-show")]')[0]
product_content = product_content1.xpath('string(.)').strip().replace(" ", "").replace("\n", "")
product_time = detil_html.xpath('//div[@class="time"]/span/text()')[0]
product_meke_all = \
detil_html.xpath('//div[contains(@class,"cooked")]/span[@class="number"]/text()')[0]
print(product_meke_all)
product_lists.append(
[key_word, product_author, person_info, product_name, product_url, product_content,
product_star,
product_meke_all, product_make, product_save, product_time])
else:
for k in range(2):
url3 = product_html_all[0] + "&page={}".format(str(k + 2))
product_html2 = down_load2(url3)
for p in range(len(product_html2.xpath(
'//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li'))):
# for i in range(1):
print("**************")
product_url = "http://www.xiachufang.com" + product_html2.xpath(
'//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="name"]/a/@href'.format(
p + 1))[0]
print(product_url)
product_name = product_html2.xpath(
'//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="name"]/a/text()'.format(
p + 1))[0].strip()
if product_html2.xpath(
'//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="stats"]/span[1]/text()'.format(
p + 1)):
product_star = product_html2.xpath(
'//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="stats"]/span[1]/text()'.format(
p + 1))[0]
else:
product_star="暂无"
if product_html2.xpath(
'//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="stats"]/span[@class="bold score"]/text()'.format(
p + 1)):
product_make = product_html2.xpath(
'//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="stats"]/span[@class="bold score"]/text()'.format(
p + 1))[0]
else:
product_make=0
product_author = product_html2.xpath(
'//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="author"]/a[1]/text()'.format(
p + 1))[0]
product_author_url = "http://www.xiachufang.com" + product_html2.xpath(
'//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="author"]/a[1]/@href'.format(
p + 1))[0]
person_html = down_load2(product_author_url)
person_info1 = person_html.xpath('//div[@class="gray-font"]/div[1]')[0]
person_info = person_info1.xpath('string(.)').strip().replace(" ", "").replace("\n", "")
print(person_info)
print(product_html2.xpath(
'//div[contains(@class,"pure-u-3-4")]//div[@class="normal-recipe-list"]//ul[@class="list"]/li[{}]//p[@class="name"]/a/@href'.format(
p + 1))[0])
print(product_name)
detil_html = down_load1(product_url)[1]
product_save = detil_html.xpath('//div[@class="pv"]/text()')[0]
product_content1 = detil_html.xpath('//div[contains(@class,"recipe-show")]')[0]
product_content = product_content1.xpath('string(.)').strip().replace(" ", "").replace("\n", "")
product_time = detil_html.xpath('//div[@class="time"]/span/text()')[0]
product_meke_all = \
detil_html.xpath('//div[contains(@class,"cooked")]/span[@class="number"]/text()')[0]
print(product_meke_all)
product_lists.append(
[key_word, product_author, person_info, product_name, product_url, product_content,
product_star,
product_meke_all, product_make, product_save, product_time])
# replys=""
#评论信息
# for j in range(len(detil_html.xpath('//div[contains(@class,"page-bottom-outer")]/div[@class="page-container"]//li'))):
# reply_name=detil_html.xpath('//div[contains(@class,"page-bottom-outer")]/div[@class="page-container"]//li[{}]//div[contains(@class,"info")]/a/text()'.format(j+1))[0]
# product_reply=detil_html.xpath('//div[contains(@class,"page-bottom-outer")]/div[@class="page-container"]//li[{}]//div[@class="right-bottom"]/text()'.format(j+1))[0]
# # replys+="用户名"+reply_name+"评论内容"+product_reply+"{{{{{}}}}}"
# reply_time1=detil_html.xpath('//div[contains(@class,"page-bottom-outer")]/div[@class="page-container"]//li[{}]//div[@role="question"]//div[contains(@class,"info")]/span/text()'.format(j+1))
# reply_time="".join(reply_time1)
# reply_url="http://www.xiachufang.com"+detil_html.xpath('//div[contains(@class,"page-bottom-outer")]/div[@class="page-container"]//li[{}]//div[contains(@class,"info")]/a/@href'.format(j+1))[0]
# person_reply_html = down_load2(reply_url)
# person_reply_info1 = person_reply_html.xpath('//div[@class="gray-font"]/div[1]')[0]
# person_reply_info = person_reply_info1.xpath('string(.)').strip().replace(" ","").replace("\n","")
# print(person_reply_info)
# print(key_word,product_author,person_info,product_name,product_url,product_content,product_star,product_meke_all,product_make,product_save,product_time,reply_name,person_reply_info,reply_time,product_reply)
# product_lists.append([key_word,product_author,person_info,product_name,product_url,product_content,product_star,product_meke_all,product_make,product_save,product_time,reply_name,person_reply_info,reply_time,product_reply])
except:
with open("下厨房61.csv", "w", encoding="utf-8-sig", newline="") as f:
k = csv.writer(f, dialect="excel")
k.writerow(["关键词", "作者", "作者信息", "产品名称", "产品链接","内容", "产品评分", "做过人数", "7天做过","产品收藏", "创建时间"])
for list in product_lists:
k.writerow(list)
pass
print(product_lists)
print(len(product_lists))
with open("下厨房62.csv", "w", encoding="utf-8-sig", newline="") as f:
k = csv.writer(f, dialect="excel")
k.writerow(["关键词", "作者", "作者信息", "产品名称", "产品链接","内容", "产品评分", "做过人数", "7天做过","产品收藏", "创建时间"])
# k.writerow(["关键词", "作者", "作者信息", "产品名称", "产品链接","内容", "产品评分", "做过人数", "7天做过","产品收藏", "创建时间", "评论人","评论者信息", "评论时间","评论"])
for list in product_lists:
k.writerow(list)