开篇说明
阅读本篇文章前建议先将百度AI接口应用创建好,调试清楚,该流程在另一篇文章中介绍
项目介绍
本项目接入两个百度AI接口(情感倾向性分析与机器翻译)对商品评论进行分析,得到结果后抽出置信度与积极消极概率再根据公式判断情感极性
项目架构
架构抽象为接口封装,只需要再Analysis.py中修改相关参数即可实现分析
规定Excel文档名称格式为:
sheet_name = "AmazonInfo_" + country + "_" + kind + ".xlsx"
项目代码
包括数据集一同打包在百度网盘,在pycharm中创建好新项目导入即可
链接:https://pan.baidu.com/s/1j4fvWpNUx1rse0VyDwAxNg?pwd=q7k7
提取码:q7k7
文字介绍如下
Analysis.py
import json
import ApiConnect
import GetData
import Judge_confidence
# 定义分析哪一个Excel数据
# en_从英文翻译为中文,fr_从法语翻译为中文等等
lan_code = "en"
# 分析不同国家的亚马逊平台
country = "us"
# 分析亚马逊平台的商品
kind = "furniture"
# 数据总量
row_length = GetData.text(1, country, lan_code, kind)[1]
# 记录情感极性数量
mid_num = 0
neg_num = 0
pos_num = 0
for i in range(2, row_length):
# 将判断数据转化为字典后获取其中包含了情感极性与置信度数据的items
items = json.loads(ApiConnect.method(i, country, lan_code, kind)).get("items")
# print(GetData.text(i, "us", "en", "furniture")[0])
# print(items)
# 获取情感极性几率与置信度
conf = items[0].get("confidence")
neg = items[0].get("negative_prob")
pos = items[0].get("positive_prob")
# 根据数据判断情感极性
sentiment = Judge_confidence.determine_sentiment(conf, neg, pos)
# print(sentiment)
if sentiment == "积极":
pos_num += 1
elif sentiment == "消极":
neg_num += 1
else:
mid_num += 1
print("pos_num:", pos_num, "neg_num:", neg_num, "mid_num:", mid_num)
ApiConnect.py
这个代码应从百度AI接口的API文档中直接复制后再修改逻辑,由于我更改了他的方法调用参数,所以放出来以供参考
import requests
import json
import GetData
API_KEY = ""
SECRET_KEY = ""
def method(num_method, country, lan_code, kind):
url = ("https://aip.baidubce.com/rpc/2.0/nlp/v1/sentiment_classify?charset=KuQwQKes8e2eM8EwmTiqCyMYO6yxSiWO"
"&access_token=") + get_access_token()
text = GetData.text(num_method, country, lan_code, kind)[0]
payload = json.dumps(text)
headers = {
'Content-Type': 'application/json',
'Accept': 'application/json'
}
response = requests.request("POST", url, headers=headers, data=payload)
return response.text
def get_access_token():
"""
使用 AK,SK 生成鉴权签名(Access Token)
:return: access_token,或是None(如果错误)
"""
url = "https://aip.baidubce.com/oauth/2.0/token"
params = {"grant_type": "client_credentials", "client_id": API_KEY, "client_secret": SECRET_KEY}
return str(requests.post(url, params=params).json().get("access_token"))
GetData.py
import json
import openpyxl
import LanTrans
# num_text 由Analysis定义,从第二行数据自动开始
def text(num_text, country, lan_code, kind):
sheet_name = "AmazonInfo_" + country + "_" + kind + ".xlsx"
wb = openpyxl.load_workbook(sheet_name)
# 表名注意大小写
sheet = wb["Sheet1"]
row = sheet.max_row
# 此处需要修改column,按照自己的excel文档确认,我这里是第六列
cell_value = LanTrans.trans(sheet.cell(row=num_text, column=6).value, lan_code)
cell_value_json = json.loads(cell_value)
case_dict = {"text": cell_value_json["result"]["trans_result"][0]["dst"]}
return case_dict, row
Judge_confidence.py
def determine_sentiment(confidence, negative_prob, positive_prob):
if confidence > 0.7:
if positive_prob > negative_prob:
return "积极"
else:
return "消极"
elif 0.4 < confidence <= 0.7:
if positive_prob > 0.6:
return "积极"
elif negative_prob > 0.6:
return "消极"
else:
return "中立/不明显"
else:
return "中立/不明显"
LanTrans.py
该接口代码修改逻辑与前面相同
import requests
import json
API_KEY = ""
SECRET_KEY = ""
def trans(value, lan_code):
url = "https://aip.baidubce.com/rpc/2.0/mt/texttrans/v1?access_token=" + get_access_token()
q = {"from": lan_code,
"to": "zh",
"q": value
}
payload = json.dumps(q)
headers = {
'Content-Type': 'application/json',
'Accept': 'application/json'
}
response = requests.request("POST", url, headers=headers, data=payload)
return response.text
def get_access_token():
"""
使用 AK,SK 生成鉴权签名(Access Token)
:return: access_token,或是None(如果错误)
"""
url = "https://aip.baidubce.com/oauth/2.0/token"
params = {"grant_type": "client_credentials", "client_id": API_KEY, "client_secret": SECRET_KEY}
return str(requests.post(url, params=params).json().get("access_token"))
项目维护
预计更新:
输出情感极性判断结果到Excel中,含confidence,positive_prob,negative_prob,emo_result