使用urllib (中)

这个系列的Python脚本展示了如何使用urllib模块处理HTTP Cookies,包括保存和读取Cookies到文件,以及处理HTTP和URL相关的异常。例如,test_11.py演示了创建CookieJar,test_14.py展示了读取LWP格式Cookies文件,而test_18.py展示了处理超时异常。其余脚本涵盖了不同的异常处理和Cookie操作。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

3.1-使用urllib

test_11.py

# 添加Cookies的处理
import http.cookiejar, urllib.request  

cookie = http.cookiejar.CookieJar()  
handler = urllib.request.HTTPCookieProcessor(cookie)  
opener = urllib.request.build_opener(handler)  
response = opener.open('http://www.baidu.com')  
for item in cookie:  
    print(item.name+"="+item.value)

test_12.py

# 添加Cookies的处理,保存为文本文件
import http.cookiejar, urllib.request  
filename = 'cookies.txt'  
cookie = http.cookiejar.MozillaCookieJar(filename)  
handler = urllib.request.HTTPCookieProcessor(cookie)  
opener = urllib.request.build_opener(handler)  
response = opener.open('http://www.baidu.com')  
cookie.save(ignore_discard=True, ignore_expires=True)

test_13.py

# 添加Cookies的处理,保存为文本文件, libwww-perl(LWP) 格式的 Cookies 文件
import http.cookiejar, urllib.request  
filename = 'cookies.txt'  
# cookie = http.cookiejar.MozillaCookieJar(filename)  
cookie = http.cookiejar.LWPCookieJar(filename)
handler = urllib.request.HTTPCookieProcessor(cookie)  
opener = urllib.request.build_opener(handler)  
response = opener.open('http://www.baidu.com')  
cookie.save(ignore_discard=True, ignore_expires=True)

test_14.py

# 添加Cookies的处理,读取文件, libwww-perl(LWP) 格式的 Cookies 文件
import http.cookiejar, urllib.request  
cookie = http.cookiejar.LWPCookieJar()  
cookie.load('cookies.txt', ignore_discard=True, ignore_expires=True)  
handler = urllib.request.HTTPCookieProcessor(cookie)  
opener = urllib.request.build_opener(handler)  
response = opener.open('http://www.baidu.com')  
print(response.read().decode('utf-8'))

test_15.py

# 处理异常,打开一个不存在的页面
from urllib import request, error  
try:  
    response = request.urlopen('https://cuiqingcai.com/index.htm')  
except error.URLError as e:  
    print(e.reason)

test_16.py

from urllib import request,error  
try:  
    response = request.urlopen('https://cuiqingcai.com/index.htm')  
except error.HTTPError as e:  
    print(e.reason, e.code, e.headers, sep='\n')

test_17.py

from urllib import request, error  

try:  
    response = request.urlopen('https://cuiqingcai.com/index.htm')  
except error.HTTPError as e:  
    print(e.reason, e.code, e.headers, sep='\n')  
except error.URLError as e:  
    print(e.reason)  
else:  
    print('Request Successfully')

test_18.py

import socket  
import urllib.request  
import urllib.error  

try:  
    response = urllib.request.urlopen('https://www.baidu.com', timeout=0.01)  
except urllib.error.URLError as e:  
    print(type(e.reason))  
    if isinstance(e.reason, socket.timeout):  
        print('TIME OUT')

test_19.py

# 利用urlparse方法进行URL的解析
from urllib.parse import urlparse  

result = urlparse('http://www.baidu.com/index.html;user?id=5#comment')  
print(type(result), result)

test_20.py

# 利用urlparse方法进行URL的解析
from urllib.parse import urlparse  

result = urlparse('www.baidu.com/index.html;user?id=5#comment', scheme='https')  
print(result)
print()
result = urlparse('http://www.baidu.com/index.html;user?id=5#comment', scheme='https')
print(result)

result = urlparse('http://www.baidu.com/index.html;user?id=5#comment', allow_fragments=False)  
print(result)

result = urlparse('http://www.baidu.com/index.html#comment', allow_fragments=False)  
print(result)

print("__"*10)
result = urlparse('http://www.baidu.com/index.html#comment', allow_fragments=False)  
print(result.scheme, result[0], result.netloc, result[1], sep='\n')
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值