import requests
from bs4 import BeautifulSoup
def get_baidu_hot():
url = 'https://top.baidu.com/board?tab=realtime&sa=fyb_realtime_31065'
res = requests.get(url)
res.encoding = 'utf-8'
soup = BeautifulSoup(res.text, 'html.parser')
hot_list = soup.select('.category-wrap_iQLoo')
hot_data = []
for i in range(31):
hot_title = hot_list[i].select_one('.c-single-text-ellipsis').text
hot_num = hot_list[i].select_one('.hot-index_1Bl1a').text
image_url = hot_list[i].select_one('.hot-desc_1m_jR > a').get('href')
content = hot_list[i].select_one('.hot-desc_1m_jR').text
hot_data.append({
'title': hot_title,
'num': hot_num,
'image_url': image_url,
'content': content
})
return hot_data
def main():
hot_list = get_baidu_hot()
for hot in hot_list:
print(hot['title'], '-', hot['num'], '-', hot['content'], '-', hot['image_url'])
if __name__ == '__main__':
main()
一、pip 安装 requests和BeautifulSoup模块,入pip install requests和pip install beautifulsoup4;
二、拿到上述代码,并执行
本次是对python爬虫的初步认知和学习,还有这个是咨询chatgpt加上自己搜的百度热搜,纯自己手写,非版权问题,仅用于学习