15.1屏幕抓取
from urllib import urlopen
import re
p=re.compile('<h3><a .*?><a .*? href="(.*?)">(.*?)</a>')
text=urlopen("http://www.baidu.com").read()
for url,name in p.findall(text):
print '%s (%s)'%(name,url)
from urllib import urlopen
import re
p=re.compile('<h3><a .*?><a .*? href="(.*?)">(.*?)</a>')
text=urlopen("http://www.baidu.com").read()
for url,name in p.findall(text):
print '%s (%s)'%(name,url)