# -*- coding: utf-8 -*-
import requests
import pandas as pd
from bs4 import BeautifulSoup
import re
import json
#import cx_Oracle
from sqlalchemy import create_engine
import sys
url = 'http://www.sac.net.cn/hyfw/hydt/'
for i in range(1):
if i ==0:
url1='http://www.sac.net.cn/hyfw/hydt/index.html'
else:
url1=url+'index_'+str(i)+'.html'
print url1
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
directory = {'User-Agent': user_agent}
response = requests.get(url1, headers=directory)
# 告诉他这段文本是以utf8方式编码 所以要以utf8方式解码
response.encoding = 'utf-8'
bs = BeautifulSoup(response.text, 'html.parser')
title = bs.find('title').string[5:12]
print title
print url[0:21]
table = bs.find_all('table')
for a in table:
b = a.find_all('td', attrs={'class': 'pad_le30 hei_000'})
#print b
for c in b:
print c.find_all('a')
#print d
使用get函数无法获取相对应的标签
最新推荐文章于 2025-05-07 21:36:38 发布