<div class="daohang-org"><span>AA管理部</span></div>
<div class="daohang-org"><span>BB管理部</span></div>
<div class="daohang-org"><a>CC管理部</a></div>
<div class="test-org"><span>DD管理部</span></div>
# !/usr/bin/env python
# -*- coding: utf-8 -*-
import urllib2
import urllib
import cookielib
import json
import httplib
import re
import requests
from lxml import etree
import StringIO
import time
f=open('test02.html','r')
fd1=f.read().decode('utf-8')
htmlEmt=etree.HTML(fd1)
result=htmlEmt.xpath('//div[@class="daohang-org"]/span')
for x in result:
print x.text
python 爬取指定标签的class
最新推荐文章于 2024-12-17 10:54:28 发布