import urllib.request,os,re,random,time
from urllib.error import URLError, HTTPError
l import load_workbook
from openpyxl import Workbook
def mk_dir():
#创建文件夹
if not os.path.exists(newspath):
os.mkdir(newspath)
if not os.path.exists(newspath+'config\\'):
os.mkdir(newspath+'config\\')
if not os.path.exists(newspath + 'config\\adress_dict.xlsx'):
#预置新闻
header = ['网站名称','网站地址','新闻地址规则','新闻头',
'新闻尾','标题头','标题尾','正文头','段落头',
'段落尾']
url_dict = {'zaobao':'https://www.zaobao.com/realtime/china',
'sina':'http://hunan.sina.com.cn',
'fenghuang':'http://news.ifeng.com/',
'baidu':'http://news.baidu.com/guonei',
'163':'http://news.163.com/',
'pengbai':'http://www.thepaper.cn/channel_25950'}
news_addr = {'zaobao':'realtime/china/story'+time.strftime('%Y%m%d',time.localtime())+u'-\d+',
'sina':'http://hunan.sina.com.cn/news/'+u'./'+time.strftime('%Y-%m-%d',time.localtime())+u'/detail-\D+\d{7}\.shtml',
'fenghuang':'http://news.ifeng.com'+u'/./'+time.strftime('%Y%m%d',time.localtime())+'/\d{8}_\d\.shtml',
'baidu':'http://www.thepaper.cn/newsDetail_forward_'+u'\d{7}',
'163':'http://news.163.com/'+time.strftime('%y/%m%d',time.localtime())+u'/\d\d/\.+\.html',
'pengbai':'newsDetail_forward_'+u'\d+'}
news_dict = {'zaobao_begin':'<div class="body-content">','zaobao_end':'<div id="dfp-ad-midarticlespecial-wrapper" class="dfp-tag-wrapper">',
'sina_begin':'<div class="article-header clearfix">','sina_end':'<strong class=\'article_erweima_title\'>',
'fenghuang_begin':'<div class="yc_main wrap">','fenghuang_end':'<span class="ifengLogo">',
'163_begin':'<div class="post_content_main" id="epContentLeft">','163_end':'<p></p>',
'baidu_begin':'<div class="bdwd main clearfix">','baidu_end':'<div class="bottom_word_relation">',
&nb