import csv
from bs4 import BeautifulSoup
import requests
href_list = []
for i in range(1,101):
url = f'https://sz.lianjia.com/ershoufang/pg{i}rs%E6%B7%B1%E5%9C%B3/'
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
}
data = requests.get(url=url, headers=headers).text
soup = BeautifulSoup(data,'lxml')
a_tags = soup.find_all('a', class_='noresultRecommend')
for a_tag in a_tags:
href = a_tag.get('href')
href_list.append(href)
with open('深圳2024年链家二手房信息_1.csv', mode='w', newline='', encoding='utf-8') as csv_file:
fieldnames = [
'title', 'area', 'community', 'position', 'total_price',
'unit_price', 'hourseType', 'hourseSize', 'direction', 'fitment'
]
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for j in range(len(href_list)):
try:
print(href_list[j])
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
}
data = requests.get(url=href_list[j], headers=headers).text
soup = BeautifulSoup(data,'lxml')
#title
title_h1 = soup.find('h1',class_='main')
for titles in title_h1:
title = titles
#area
area_span = soup.find('span',class_='info')
area_a = area_span.find('a').text
for position_a in area_span.find_all('a')[-1]:
position = position_a
#community
community = soup.find('a',class_='info').text
#total_price
try:
total_price_div = soup.find('div',class_='price')
for total_price_span in total_price_div.find('span',class_='total'):
total_price = total_price_span
except:
total_price = None
# unit_price
try:
unit_price_span = soup.find('span',class_='unitPriceValue')
unit_price = unit_price_span.get_text(strip=True, separator=" ")[:-4]
except:
unit_price = None
#hourseType
hourseType = soup.find('div',class_='mainInfo').text
#hourseSize
hourseSize_div = soup.find('div',class_='area')
for hourseSize_divs in hourseSize_div.find('div',class_='mainInfo'):
hourseSize = hourseSize_divs
#direction
direction_div = soup.find('div',class_='type')
for direction_divs in direction_div.find('div',class_='mainInfo'):
direction = direction_divs
#fitment
fitment_div = soup.find('div',class_='type')
for fitment_divs in fitment_div.find('div',class_='subInfo'):
fitment = fitment_divs[-2:]
row_data = {
'title':title,
'area':area_a,
'community':community,
'position':position,
'total_price':total_price,
'unit_price':unit_price,
'hourseType':hourseType,
'hourseSize':hourseSize,
'direction':direction,
'fitment':fitment
}
writer.writerow(row_data)
except:
pass

i阿极
- 粉丝: 4w+
最新资源
- BP模型训练算法源代码
- C#运动控制系统:基于雷赛及其他品牌控制卡的多厂商兼容框架
- BP模型预测算法源代码
- 连接器插拔力的ABAQUS CAE仿真分析与实践教程 CAE仿真
- STM32F3平台基于SOGI-PLL锁相环的并网逆变技术详解及应用 详细版
- 安卓远程控制工具,开启开发者模式可以远程控制安卓设备
- 图像处理领域中二维经验模式分解(BEMD)算法的Matlab实现及其应用 - 图像处理
- 基于SpringBoot的智慧药店药品信息管理系统-h640ol82.zip
- Redis持久化机制详解:保障数据安全的双保险策略
- Redis配置文件详解:解锁高性能与安全的终极指南
- Java 匹配文件流特定数据块的方法
- harbor-v2.9.5 arm 版本镜像
- 光伏混合储能VSG讲解:光储一次调频、功率平抑、直流母线电压控制
- Redis Stream 作为消息队列的深度取舍:高性能与有限保障的平衡术
- Java 故障定位 - 高 CPU 占用问题排查指南
- JNI 中 NewStringUTF 字符串的释放问题
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈


