自动调用Python Api进行情感测试

  • 时间:
  • 来源:互联网

-- coding: utf-8 --

“”"
Created on Mon Oct 22 17:47:24 2018

@author: Python_test
“”"
import requests
import pandas as pd
import jieba
#import pkuseg
#seg = pkuseg.pkuseg()
#读取需要处理的excel档
f2 = pd.read_excel(‘test_20191210.xlsx’,sheet_name = “test”)
#mat = “”"
#我都唔明呢間學校嘅校方高層個腦諗乜,你點樣投誠都唔夠濠江中學咁根正苗紅㗎啦粵華之秘相信咁多位粵華仔都知道,由今個學期開始,每週一嘅週會都要升「中華人民共和國」嘅國旗。跟住係週會結束之際,係偉大嘅祖国嘅国旗之下,公然宣佈高二級要去天主教嘅教堂念玫瑰經。咁站係偉大祖国嘅立場,係咪犯左「煽動巔覆國家政權罪」呢?一邊係度愛国愛黨,一邊又係度信祖国唔鍾意嘅宗教😢,學校會唔會
#"""

query = “”"
query(
KaTeX parse error: Expected '}', got 'EOF' at end of input: … sentences:sentences
)
}
“”"
#因api需要登陆,所以需要拿最新token的url
url = “http://python-api.access_token”#自设

for i in range(len(f2)):
sen = f2.iloc[i,0]
variables= {
“sentences”:sen
}
data = {
“query”:query,
“variables”: variables
}
try:
result = requests.post(url,json=data)
#print(result)
output = result.json()
# print(result.json())

    a = output['data']['sentiment']['data']['result']
#    b = output['data']['sentiment']['data']['score'] 
#    c = output['data']['sentiment']['data']['proportion']
    d = output['data']['sentiment']['data']['words']
    f2.loc[i,'result'] = str(a)
#    f2.loc[i,'score'] = str(b)
#    f2.loc[i,'proportion'] = str(c)
    f2.loc[i,'words'] = str(d)

except Exception as e:
    print(str(e))
#    fx.loc[i,'runtime'] = float(t)#直接生成浮点型
#    a=[t]
#    a+=a
print(i)

print(output)

#将结果保存为excel

#f2.to_excel(“ers_时事sentiment(0103~0109)情感素材收集_James.xlsx”,index = False)
#seg結合自定義詞庫切詞
#f4 = pd.read_csv(“ers_sentiment_dict.txt”)[‘ciyu’].tolist()
f3 = pd.read_csv(“ers_sentiment_dict.txt”)

l =[]
for i in range(len(f3)):

 test = f3.iloc[i][0].split(" ")[0]
 l.append(test)

f4 = l
#f3 = pd.read_csv(“universal_20191021.txt”)

# 獲取詞庫列表

##seg = pkuseg.pkuseg(user_dict=f3)
#f5 =pd.read_csv("") #
def addDict(words: list):
“”"
jieba.add_word(word, freq=None, tag=None)
freq and tag can be omitted, freq defaults to be a calculated value
that ensures the word can be cut out.
“”"
if words:
for i in keys:
if i:
jieba.add_word(i[0])

keys = pd.read_csv(“universal_20191204.txt”,header = None)
for i in range(len(keys)):
words = keys.loc[i][0].split(" ")[0]
addDict(words)
print(“开始添加:”,i,words)
jieba.load_userdict(“universal_20191204.txt”) ##词频要超过10000000才会优先切词
keys.to_excel(“test.xlsx”)
f1 = pd.read_excel(“test.xlsx”)
f1= f1.rename(columns = {0:“rawdata”})
f2 = pd.DataFrame(f1,columns =[“rawdata”,“Allcutword”,“Lucutword”,“compare”])
for i in range(len(keys)):
try:
sentence = keys.loc[i][0]
Allcutword = jieba.lcut(sentence,cut_all = True)
Lcutword = jieba.lcut(sentence)
f2.loc[i,“Allcutword”] = str(Allcutword)
f2.loc[i,“Lcutword”] = str(Lcutword)
print(“成功切词:{}”.format(i),sentence)
except:
print(“报错:{t}”.format(t =i),sentence)

f2.to_excel(“切词测试.xlsx”)

#def addDict(dict_list):

“”"

jieba.add_word(word, freq=None, tag=None)

freq and tag can be omitted, freq defaults to be a calculated value

that ensures the word can be cut out.

“”"

if dict_list:

for i in dict_list:

if i:

jieba.add_word(i)#jieba.add_word,和suggest_freq都是强制调高詞頻

#addDict(“universal_20191030.txt”)
#f5 = pd.read_csv(“universal_20191030.txt”)
#for i in range(len(f5)):

words= f5.iloc[i][0].split(" ")[0]

jieba.add_word(words)

#jieba.add_word(“universal_20191030.txt”)
#s = “衷心感谢”
#test1 = jieba.cut(s)
#for i in test1:

print(i)

#test2 = jieba.lcut(s)
#f2 = pd.read_excel(“sentiment(924~1023)result训练集James.xlsx”)
#f2[‘cutword’] = f2[“content”].apply(lambda x:seg.cut(x)) # 切詞
f2[‘cutword’] = f2[“contents”].apply(lambda x:jieba.lcut(x)) # 切詞

#xx = jieba.lcut(“​共築中國夢奮進新時代,返工頂你個肺”)
f2[‘newword’] = f2[‘cutword’].apply(lambda x: [i for i in x if i not in f4])

#e = [‘cutword’]
#f2.loc[‘cutword’] = str(e)
f2.to_excel(“testsentiment-result3.xlsx”,index = False)

M.D
发布了0 篇原创文章 · 获赞 0 · 访问量 4
私信 关注

本文链接http://element-ui.cn/news/show-1036.aspx