#Author:Mini #!/usr/bin/env python import jieba import numpy as n import pymysql conn = pymysql.connect(host="127.0.0.1", user="root", passwd="wangmianny111", db="galaxy_macau_ad",charset='utf8') jieba.load_userdict("C:/Users/Administrator/Desktop/tripadvisor_gm/tripadvisor_code_python/galaxy_macau_dict.txt") jieba.load_userdict("C:/Users/Administrator/Desktop/tripadvisor_gm/tripadvisor_code_python/chinese_sentiment_score/positive_dic.txt") jieba.load_userdict("C:/Users/Administrator/Desktop/tripadvisor_gm/tripadvisor_code_python/chinese_sentiment_score/negative_dic.txt") def open_dict(Dict = 'mini', path=r'/Users/apple888/PycharmProjects/Textming/Sent_Dict/Hownet/'): path = path + '%s.txt' % Dict dictionary = open(path, 'r', encoding='utf-8') dict = {} for line in dictionary: seperate_word = line.strip().split(",") num = len(seperate_word) for i in range(1, num): dict[seperate_word[i]] = seperate_word[i] #print (dict) try: for word in dictionary: word=word.strip(',') jieba.suggest_freq(word, tune=True) # change the frequency except: print ("memery run out!") return dict def sentiment_score_list(dataset): seg_sentence = [] seg_sentence1 = dataset.split('。') for item in seg_sentence1: seg_sentence2=item.split(',') seg_sentence+=seg_sentence2 print(seg_sentence) return seg_sentence def judgeodd(num): if (num % 2) == 0: return 'even' else: return 'odd' deny_word = open_dict(Dict = 'deny', path= r'C:/Users/Administrator/Desktop/tripadvisor_gm/tripadvisor_code_python/chinese_sentiment_score/') posdict = open_dict(Dict = 'positive', path= r'C:/Users/Administrator/Desktop/tripadvisor_gm/tripadvisor_code_python/chinese_sentiment_score/') negdict = open_dict(Dict = 'negative', path= r'C:/Users/Administrator/Desktop/tripadvisor_gm/tripadvisor_code_python/chinese_sentiment_score/') """ degree_word = open_dict(Dict = '程度级别词语', path= r'C:/Users/Administrator/Desktop/Textming/') mostdict = degree_word[degree_word.index('extreme')+1 : degree_word.index('very')]#权重4,即在情感词前乘以4 verydict = degree_word[degree_word.index('very')+1 : degree_word.index('more')]#权重3 moredict = degree_word[degree_word.index('more')+1 : degree_word.index('ish')]#权重2 ishdict = degree_word[degree_word.index('ish')+1 : degree_word.index('last')]#权重0.5 """ combine_dict = {} for line in open("C:/Users/Administrator/Desktop/tripadvisor_gm/tripadvisor_code_python/chinese_sentiment_score/synonyms.txt", "r",encoding='utf-8'): seperate_word = line.strip().split(",") jieba.suggest_freq(seperate_word, tune=True) # change the frequency #print (seperate_word) num = len(seperate_word) #print(num) for i in range(1, num): combine_dict[seperate_word[i]] = seperate_word[0] #print (seperate_word[0]) print("loading dic and changing freq finished!") def sentiment_score_list(dataset): print(dataset) seg_sentence = [] seg_sentence1 = dataset.split('。') #print(seg_sentence1) count1 = [] count2 = [] for item in seg_sentence1: seg_sentence2=item.split(',') #print (seg_sentence2) seg_sentence+=seg_sentence2 print(seg_sentence) #print (len(seg_sentence)) poscount_service1 = 0 # (fist time) caculate the value of this postive word #sinsitive_count_service = 0 poscount_service2 = 0 # postive value after considering about the deny words negcount_service1 = 0 negcount_service2 = 0 score_service = 0 # final positive value s = 0 # record the sum of number of sinsitive words for sen in seg_sentence: #traverse each clause of comments segtmp = jieba.lcut(sen, cut_all=False) #cut the word, return a list of words i = 0 #record the being-scanning-word's location a = 0 #record the being-scanning-sintimental-word's location #print(segtmp) for word in segtmp: if word =="demond_show": print ("the customer is talking about "+word) for word in segtmp: print (word) if word in posdict: # if it is a postive word print("this customer's attitude is positive!") poscount_service1 = 5 s+=1 c = 0 for w in segtmp[a:i]: # scan the words before the sinsitive word if w in deny_word: c += 1 if judgeodd(c) == 'odd': # scan deny words poscount_service1 = 1 poscount_service2 += poscount_service1 poscount_service1 = 0 else: poscount_service2 = poscount_service1 + poscount_service2 poscount_service1 = 0 a = i + 1 # 情感词的位置变化 print(poscount_service2) elif word in negdict: # 消极情感的分析,与上面一致 negcount_service1 = 1 s+=1 d = 0 for w in segtmp[a:i]: if w in deny_word: d += 1 if judgeodd(d) == 'odd': negcount_service1=5 negcount_service2 += negcount_service1 negcount_service1 = 0 #negcount3 = negcount + negcount2 + negcount3 else: negcount_service2 += negcount_service1 negcount_service1 = 0 a = i + 1 else: pass i += 1 # 扫描词位置前移 else: print("not talking about this certain topic!") print("s"+str(s)) if s==0: pass else: score_service = (poscount_service2 + negcount_service2)/s score_service = float('%.1f' % score_service) count1.append(score_service) # sql = "UPDATE tripadvisor_chinese SET service = '"+score_service+"' WHERE ID = '"+ID+"' ;" #conn.query(sql) #conn.commit() #count2.append(count1) #count1 = [] print (count1) return score_service def sentiment_score(senti_score_list): print(ID+":senti_score_list:"+str(senti_score_list)) if senti_score_list==0: pass else: sql = "UPDATE tripadvisor_chinese SET demond_show = '"+str(senti_score_list)+"' WHERE customer_num = '"+str(index)+"' ;" conn.query(sql) conn.commit() print("sucess!" ) """ test1='兔子一号 我中意澳门银河,尤其喜欢银河酒店的房间还有服务,服务特别周到,服务特别好。' test2='兔子二号 澳门银河的服务一点也不好,很差劲。' test3='兔子三号 服务不能说不好,也不是很差。' """ """data_combine="" for chinese_data in open("C:/Users/Administrator/Desktop/tripadvisor_gm/tripadvisor_code_python/chinese_sentiment_score/tripadvisor_chinese.txt", "r",encoding='utf-8'): chinese_comment= chinese_data.strip().split("\n") print(chinese_comment) #data=[test1,test2,test3] #data_combine="" for comment in chinese_comment: print(comment) combine_sentence = "" words_1 = jieba.cut(comment) for word in words_1: #print(word) if word in combine_dict: word = combine_dict[word] combine_sentence += word else: combine_sentence += word print(combine_sentence) data_combine += combine_sentence+"\n" print(data_combine) f_combine = open("C:/Users/Administrator/Desktop/tripadvisor_gm/tripadvisor_code_python/chinese_sentiment_score/combine_chinese.txt", "a", encoding="utf_8") f_combine.write(data_combine) print (data_combine)""" index=1 for combine_data in open("C:/Users/Administrator/Desktop/tripadvisor_gm/tripadvisor_code_python/chinese_sentiment_score/combine_chinese.txt", "r",encoding='utf-8'): seperate_sentice = combine_data.split("\n") print(seperate_sentice) for item in seperate_sentice: if item=="": pass else: ID_list = item.strip().split('\t') #for i in ID_list: #print("ID_list:"+i) ID = ID_list[0].replace('"','') print("ID"+ID) service_score=sentiment_score(sentiment_score_list(item)) print(sentiment_score(sentiment_score_list(item))) print("index:"+str(index)) index+=1
float('%.1f' % score_service)