import requests,os from lxml import etree from urllib.parse import urljoin import re pattern = re.compile(r"《(.*?)》") url = 'https://www.dy2018.com/html/gndy/dyzz/index.html' response = requests.get(url) html_content = response.text root = etree.HTML(html_content) all_pages = root.xpath("//select[@name='select']/option/@value") f = open('小调网.csv','w',encoding = 'utf8') for page in all_pages: page = urljoin(url,page) response = requests.get(page) response.encoding = 'gbk' html_content = response.text root = etree.HTML(html_content) mov_name = root.xpath('//table/tr/td/b/a/@title') mov_url = root.xpath('//table/tr/td/b/a/@href') for mov_name,mov_url in zip(mov_name,mov_url): try: result = pattern.findall(mov_name)[0] href = urljoin(url,mov_url) response = requests.get(href) response.encoding= 'gbk' html_content = response.text root = etree.HTML(html_content)