test_11.py
# 添加Cookies的处理
import http.cookiejar, urllib.request
cookie = http.cookiejar.CookieJar()
handler = urllib.request.HTTPCookieProcessor(cookie)
opener = urllib.request.build_opener(handler)
response = opener.open('http://www.baidu.com')
for item in cookie:
print(item.name+"="+item.value)
test_12.py
# 添加Cookies的处理,保存为文本文件
import http.cookiejar, urllib.request
filename = 'cookies.txt'
cookie = http.cookiejar.MozillaCookieJar(filename)
handler = urllib.request.HTTPCookieProcessor(cookie)
opener = urllib.request.build_opener(handler)
response = opener.open('http://www.baidu.com')
cookie.save(ignore_discard=True, ignore_expires=True)
test_13.py
# 添加Cookies的处理,保存为文本文件, libwww-perl(LWP) 格式的 Cookies 文件
import http.cookiejar, urllib.request
filename = 'cookies.txt'
# cookie = http.cookiejar.MozillaCookieJar(filename)
cookie = http.cookiejar.LWPCookieJar(filename)
handler = urllib.request.HTTPCookieProcessor(cookie)
opener = urllib.request.build_opener(handler)
response = opener.open('http://www.baidu.com')
cookie.save(ignore_discard=True, ignore_expires=True)
test_14.py
# 添加Cookies的处理,读取文件, libwww-perl(LWP) 格式的 Cookies 文件
import http.cookiejar, urllib.request
cookie = http.cookiejar.LWPCookieJar()
cookie.load('cookies.txt', ignore_discard=True, ignore_expires=True)
handler = urllib.request.HTTPCookieProcessor(cookie)
opener = urllib.request.build_opener(handler)
response = opener.open('http://www.baidu.com')
print(response.read().decode('utf-8'))
test_15.py
# 处理异常,打开一个不存在的页面
from urllib import request, error
try:
response = request.urlopen('https://cuiqingcai.com/index.htm')
except error.URLError as e:
print(e.reason)
test_16.py
from urllib import request,error
try:
response = request.urlopen('https://cuiqingcai.com/index.htm')
except error.HTTPError as e:
print(e.reason, e.code, e.headers, sep='\n')
test_17.py
from urllib import request, error
try:
response = request.urlopen('https://cuiqingcai.com/index.htm')
except error.HTTPError as e:
print(e.reason, e.code, e.headers, sep='\n')
except error.URLError as e:
print(e.reason)
else:
print('Request Successfully')
test_18.py
import socket
import urllib.request
import urllib.error
try:
response = urllib.request.urlopen('https://www.baidu.com', timeout=0.01)
except urllib.error.URLError as e:
print(type(e.reason))
if isinstance(e.reason, socket.timeout):
print('TIME OUT')
test_19.py
# 利用urlparse方法进行URL的解析
from urllib.parse import urlparse
result = urlparse('http://www.baidu.com/index.html;user?id=5#comment')
print(type(result), result)
test_20.py
# 利用urlparse方法进行URL的解析
from urllib.parse import urlparse
result = urlparse('www.baidu.com/index.html;user?id=5#comment', scheme='https')
print(result)
print()
result = urlparse('http://www.baidu.com/index.html;user?id=5#comment', scheme='https')
print(result)
result = urlparse('http://www.baidu.com/index.html;user?id=5#comment', allow_fragments=False)
print(result)
result = urlparse('http://www.baidu.com/index.html#comment', allow_fragments=False)
print(result)
print("__"*10)
result = urlparse('http://www.baidu.com/index.html#comment', allow_fragments=False)
print(result.scheme, result[0], result.netloc, result[1], sep='\n')