没爬出来文字,正在努力中。。。。
import requests
from lxml import etree
link='https://weread.qq.com/web/reader/92d32410715db32941cb665'
hd={'Cookie':'pgv_pvi=2308313088; RK=6GxVm0pNOQ; ptcz=a97a0f28f53c36a08e66bcb57cc3e0ff97ac9050441c7aff2acfaf119fe0ec77; pgv_pvid=8513433666; pac_uid=0_5dc7a8db1f07f; tvfe_boss_uuid=eca051f5ab3a8f09; mobileUV=1_16e93667a12_2a38e; wr_vid=207232559; wr_rt=web%40HjGdy9XbOWO5w8~SAUb_WL; wr_pf=1; wr_gender=1; wr_name=Heisenberg; wr_avatar=https%3A%2F%2Fsiteproxy.ruqli.workers.dev%3A443%2Fhttp%2Fthirdwx.qlogo.cn%2Fmmopen%2Fvi_32%2F4CKQlZonX0c9h8JM16ZPJRfSWNTGibJZ2sHj4IIziciam8RUFhN7sv1NGjI5RhEiaHIURztLnBJXmesF4B1GzicBT2g%2F132; wr_theme=dark; ptui_loginuin=635891216@qq.com; wr_skey=9sgHVtg0; wr_localvid=d0a31207c5a1e2f22de0421'
,'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36'}
r=requests.get(link,headers=hd)
#r.encoding=r.apparent_encoding#防止编码不一致,打印出来的是乱码
html=etree.HTML(r.text)
path1='//*[@id="renderTargetContent"]/span['
path2=']/text()'
for i in range(1,10):
title_list=html.xpath(path1+str(i)+path2)
print(i-1,title_list)