raise TypeError(‘to_bytes must receive a str or bytes ‘TypeError:

博客讲述了在Scrapy设置代理学习过程中日志报错的问题,报错信息为“TypeError: to_bytes must receive a str or bytes object, got NoneType”。起初以为是代理问题,后发现问题出在“request.meta['proxy']”行,解决方案是自定义类时继承自带中间件,之后不继承也不报错。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

在scrapy 设置代理学习过程中日志出现报错

raise TypeError('to_bytes must receive a str or bytes '
TypeError: to_bytes must receive a str or bytes object, got NoneType

原代码

# 设置代理
class MyProxyDownloaderMiddleware:
    def process_request(self, request, spider):
        request.meta['proxy'] = 'http://************:******@114.239.124.157:18761'

后面仔细研究了一下

一开始以为是代理的问题,后面直接找了个专属ip发现不是代理的问题

问题出在request.meta['proxy'] 这一行

解决方案:

自定义类的时候继承一下自带的中间件就解决了

而且偶然发现之后就算不继承好像也不会报错了(什么奇葩bug...)

from scrapy.downloadermiddlewares.httpproxy import HttpProxyMiddleware
# 设置代理
class MyProxyDownloaderMiddleware(HttpProxyMiddleware):
    def process_request(self, request, spider):
        request.meta['proxy'] = 'http://************:******@114.239.124.157:18761'

import struct import socket import io import os from inout import InitIO, InOutException # 第四章4.3 def int_to_nbyte(n): if n < (1 << 8): tag = 'B' elif n < (1 << 16): tag = 'H' elif n < (1 << 32): tag = 'L' else: tag = 'Q' return tag.encode('UTF-8') + struct.pack('!' + tag,n) # 修改后的nbyte_to_int def nbyte_to_int(source): read_bytes = lambda d, s: (d[:s], d[s:]) read_file = lambda d, s: (d.read(s), d) read_socket = lambda d, s: (d.recv(s), d) readers = {bytes:read_bytes, io.IOBase:read_file, socket.socket:read_socket,} size_info = { 'B': 1, 'H': 2, 'L':4, 'Q':8 } reader = readers[type(source)] btag, source = reader(source, 1) tag = btag.decode('utf-8') if not tag in size_info: raise TypeError('Invalid type:' + type(tag)) size = size_info[tag] bnum, source = reader(source, size) return struct.unpack('!' + tag, bnum)[0], source # 第四章4.4 def bignum_to_bytes(n): result = b'' while n > 0: b = n % 128 n >>= 7 if n: b += 128 result += bytes([b]) return result def bytes_to_bignum(bs): result = 0 exp = 0 for b in bs: n = b % 128 result += n << exp exp += 7 if b & (1 << 7) == 0: break return result def str_to_nbyte(s): tag, s_byte = ('s', s) if isinstance(s,bytes) else ('c', s.encode('utf-8')) n_byte = int_to_nbyte(len(s_byte)) return tag.encode('utf-8') + n_byte + s_byte def nbyte_to_str(source): read_bytes = lambda d, s: (d[:s], d[s:]) read_file = lambda d, s: (d.read(s), d) read_socket = lambda d, s: (d.recv(s), d) readers = {bytes:read_bytes, io.IOBase:read_file, socket.socket:read_socket,} reader = readers[type(source)] btag, source = reader(source , 1) tag = btag.decode('utf-8') if tag not in ['s', 'c']: raise TypeError('invalid tag:' + tag) size, source = nbyte_to_int(source) bstr, source = reader(source, size) return bstr if tag == 's' else bstr.decode('utf-8') # ------------------------------------------------------------ # int_to_nbyte,str_to_nbyte def data_to_nbyte(n): if isinstance(n,int): if n < (1 << 8): tag = 'B' elif n < (1 << 16): tag = 'H' elif n < (1 << 32): tag = 'L' else: tag = 'Q' n = struct.pack('!' + tag, n) return tag.encode('utf-8') + n elif isinstance(n, bytes): tag = 's' return tag.encode('utf-8') + data_to_nbyte(len(n)) + n elif isinstance(n, str): tag = 'c' n = n.encode('utf-8') return tag.encode('utf-8') + data_to_nbyte(len(n)) + n raise TypeError('invalid type: ' + type(tag)) # nbyte_to_int,nbyte_to_str def nbyte_to_data(source): read_bytes = lambda d, s: (d[:s], d[s:]) read_file = lambda d, s: (d.read(s), d) read_socket = lambda d, s: (d.recv(s), d) readers = {bytes:read_bytes, io.IOBase:read_file, socket.socket:read_socket,} size_info = {'B':1, 'H':2, 'L':4, 'Q':8 } reader = readers[type(source)] btag, source = reader(source, 1) if not btag: return None, source tag = btag.decode('utf-8') if tag in size_info: size = size_info[tag] bnum, source = reader(source, size) result = struct.unpack('!' + tag,bnum)[0] elif tag in ['s', 'c']: size, source = nbyte_to_data(source) if size >= 65536: raise ValueError('length too long:' + str(size)) bstr, source = reader(source, size) result = bstr if tag == 's' else bstr.decode('utf-8') else: raise TypeError('Invalid type:' + tag) return result, source # 第五章5.2 FILE_SUCCESS_TAG = b'FILEGOOD' FILE_FALL_TAG = b'FILEFALL' FILE_ABORT_TAG = b'FILEABRT' # 第五章二进制标准的列子 FILE_BEGIN_TAG = b'\x14\x12\n\x0e\x01\x01' FILE_END_TAG = b'\x14\x12\n\x0e\01\x02' FILE_SIZE_TAG = b'\x14\x12\n\x0e\x01\x11' FILE_TAME_TAG = b'\x14\x12\n\x0e\x01\x12' FILE_NAME_TAG = b'\x14\x12\n\x0e\x01\x12' FILE_CONTENT_TAG = b'\x14\x12\n\x0e\x01\x13' FILE_FALL_TAG = b'\x14\x12\n\x0e\x00\x01' FILE_TAG_SIZE = len(FILE_BEGIN_TAG) # FILE_BEGIN_TAG = b'FILEBEG0' FILE_END_TAG = b'FILEEND0' FILE_SIZE_TAG = b'FILESIZE' FILE_NAME_TAG = b'FILENAME' FILE_CONTENT_TAG = b'FILEDATA' FILE_BLOCKS_TAG = b'FILEBLKS' FILE_SUCCESS_TAG = b'FILEGOOD' FILE_FAIL_TAG = b'FILEFAIL' FILE_ABORT_TAG = b'FILEABRT' FILE_TAG_SIZE = len(FILE_BEGIN_TAG) # 5.4修改 class INOUT: def __init__(self,handle): self.handle = handle def data_to_nbyte(self, n): if isinstance(n,int): if n < (1 << 8): tag = 'B' elif n (1 << 16): tag = 'H' elif n (1 << 31): tag = 'L' else: tag = 'Q' n = struct.pack('!' + tag, n) return tag.encode('utf-8') + n elif isinstance(n, bytes): tag = 's' return tag.encode('utf-8') + self.data_to_nbyte(len(n)) + n elif isinstance(n, str): tag = 'c' n = n.encode('utf-8') return tag.encode('utf-8') + self.data_to_nbyte(len(n)) + n raise TypeError('Invalid type:' + type(tag)) def nbyte_to_data(self): size_info = {'B':1, 'H':2 , 'L':4, 'Q':8} btag = self.read_raw(1) if not btag: return None tag = btag.decode('utf-8') if tag in size_info: size = size_info[tag] bnum = size.read_raw(size) result = struct.unpack('!' + tag, bnum)[0] elif tag in ['s', 'c']: size = self.nbyte_to_data() if size >= 65536: raise ValueError('length too long:' + str(size)) bstr = self.read_raw(size) result = bstr if tag == 's' else bstr.decode('utf-8') else: raise TypeError('invalid type:' + tag) return result def read(self): return self.nbyte_to_data() def write(self, d): byte_data = self.data_to_nbyte(d) self.write_raw(byte_data) def read_raw(self, n): return self.read_raw(n) def write_raw(self, d): return self.write_handle(d) def close(self): return self.close_handle() def read_handle(self, n): return b'' def write_handle(self, d): print(d) return len(d) def close_handle(self): return self.handle def InitIO(handle): readers = {bytes:StringIO, io.IOBase:FileIO,socket.socket:NEtworkIO,} return readers.get(type(handle),lambda n: None) (handle) # 5.6 class NetAPI: def __init__(self, iHandle): self.iHandle = iHandle def recv_file(self): result = {} while True: tag = self.recv_tag() if not tag or tag in [FILE_END_TAG, FILE_ABORT_TAG]: break data = self.recv_data() if not data: break print(tag, data) result[tag] = data return result # ----------------------------------------------- def send_file(self, path): ## --------------------------------- filename = path filesize = os.path.getsize(path) filedata = open(path, 'rb').read() ## --------------------------------- try: self.send_tag(FILE_NAME_TAG) self.send_data(filename) self.send_tag(FILE_SIZE_TAG) self.send_data(filesize) self.send_tag(FILE_CONTENT_TAG) self.send_data(filedata) self.send_tag(FILE_END_TAG) return True except Exception as e: print(str(e)) self.send_tag(FILE_ABORT_TAG) return False def recv_tag(self): return iHandle.read_raw(FILE_TAG_SIZE) def recv_data(self): return iHandle.read() def send_tag(self, tag): self.oHandle.write_raw(tag) def send_data(self, data): self.oHandle.write(data) def send_size(self, n): return self.send_data(n) def send_name(self, s): return self.send_data(s) def send_content(self, d): return self.send_data(d) def recv_size(self): pass def recv_name(self): pass def recv_content(self): pass
06-16
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值