1.json.loads():将str转为dict类型的
通常loads会在读取文件中的数据时会用到,读取的文件中数据看上去是个字典,但实际上是个str类型的数据这个时候就会用到
2.json.dumps:将dict转为str
通常dumps会在保存数据时会用到,保存的数据是个字典,但使用写入方法时,写入的只能是str类型的
3.举例
(1)json.loads()
"""
{
"shop_id": "299244686",
"shop_name": "伊利旗舰店",
"shop_category": "进口纯牛奶"
},
{
"shop_id": "1790973264",
"shop_name": "蒙牛旗舰店",
"shop_category": "进口纯牛奶"
},
{
"shop_id": "2204191628299",
"shop_name": "pagoda百果园旗舰店",
"shop_category": "进口纯牛奶"
},
{
"shop_id": "1657488384",
"shop_name": "光明乳业官方旗舰店",
"shop_category": "进口纯牛奶"
},
{
"shop_id": "2212318149350",
"shop_name": "海河旗舰店",
"shop_category": "进口纯牛奶"
},
{
"shop_id": "2549841410",
"shop_name": "天猫国际自营进口超市",
"shop_category": "进口纯牛奶"
},
{
"shop_id": "2211913840869",
"shop_name": "伊利休闲家专卖店",
"shop_category": "进口纯牛奶"
},
"""
假设这个文件保存在shop.txt文件中,使用read读取出来以后对其遍历
def shop_read():
"""
每个店铺的url获取
:return:
"""
path = Path.cwd()
path1 = path / "data" / "shop_info" / "shop_info.txt"
with open(path1, 'r', encoding='utf-8') as f:
shop_list = f.read()
return shop_list
def get_shop():
"""
获取店铺信息
"""
shop_list = self.shop_read()
shop_list = json.loads(shop_list)
shop_name_list = re.findall(r'"shop_name": "(.*?)"', shop_list)
shop_url_list = re.findall(r'"shop_url": "(.*?)"', shop_list)
shop_cat_list = re.findall(r'"shop_category": "(.*?)"', shop_list)
for shop in range(num1 * 100, num1 * 100 + num2):
search_text = shop_cat_list[shop]
shop_url = shop_url_list[shop]
shop_name = shop_name_list[shop]
self.carwl(search_text, shop_url, shop_name)
2.json.dumps()
def save(self,product_json_data):
"""
保存商品信息的数据
:param self:
:param product_json_data:
:return:
"""
with open("product_info.txt", "a", encoding='utf-8') as file:
file.write(product_json_data)
time.sleep(2)
self.driver.close()
def parse():
shop_message_list = []
try:
for i in range(0, 20):
try:
address = shop_data.xpath(
f'//li[@class="list-item"][{i + 1}]/ul/li[2]/p[@class="shop-info"]/span[@class="shop-address"]/text()')[
0]
except:
address = "null"
dict_uid_name = {
"shop_id": shop_id[i],
"shop_name": shop_name[i],
"shop_url": "https:" + shop_url[i],
"shop_category": name_text,
"shop_address": address,
"good_comt": shop_comt[i],
"shop_img": shop_img[i],
}
shop_message_list.append(dict_uid_name)
logger.info(f"第{num_c + 1}个分类的第{page + 1}页数据抓取完成!")
page1 = page - 1
except:
logger.info(f"第{num_c + 1}个分类的第{page + 1}页的数据未能抓取!")
page1 = page - 1
time.sleep(random.randint(3, 4))
shop_json_list = json.dumps(shop_message_list, ensure_ascii=False, indent=2)
save(shop_json_list)