python - 对 '数码大冒险tri 泡泡评论' 进行简单的情感分析
发布日期:2021-06-30 19:50:49 浏览次数:2 分类:技术文章

本文共 2693 字,大约阅读时间需要 8 分钟。

爬虫

NLP

import jiebaimport numpy as npimport pymongofrom NLP.Config import *'''db'''client = pymongo.MongoClient(MONGO_URL)db = client[MONGO_DB]def get_comments_from_db(count=100):    try:        comments_list = [msg['comment'] for msg in db[MONGO_TABLE].find().limit(count)]        return comments_list    except Exception as e:        print(e.args)        return None'''将txt转为list'''def get_list_from_file(name=''):    path = './{}.txt'.format(name)    words = []    with open(path, mode='r', encoding='utf-8') as f:        for w in f:            words.append(w.strip())    return words'''几个词典'''stop_words = get_list_from_file(name='stopwords')refute_words = get_list_from_file(name='refute')nega_words = get_list_from_file(name='negative')posi_words = get_list_from_file(name='positive')degree_words = get_list_from_file(name='degree')degree_index_list = ['extreme', 'very', 'more', 'ish', 'last']degree_dict = {}for i in range(4):    first_index = degree_index_list[i]    second_index = degree_index_list[i+1]    degree_dict[first_index]\        = degree_words[degree_words.index(first_index)+1 : degree_words.index(second_index)]'''1. 分词(情感词, 否定词, 程度词/号, )2. 评论的情感值的均值与方差'''def sentiment_value(**kwargs):    comment = kwargs['comment']    words = [w.strip() for w in jieba.cut(comment, cut_all=False) if w not in stop_words]    sent_value_list = []    # 对每个word求一次情感值    pre_index = 0    print(words)    for word in list(words):        seg_sent_value = 0        # 求情感值,        if word in posi_words:            seg_sent_value += POSI_VALUE        elif word in nega_words:            seg_sent_value += NEGA_VALUE        if seg_sent_value != 0:            index = words.index(word)            for w in words[pre_index : index]:                if w in degree_dict['extreme']:                    seg_sent_value *= EXTREME_VALUE                elif w in degree_dict['very']:                    seg_sent_value *= VERY_VALUE                elif w in degree_dict['more']:                    seg_sent_value *= MORE_VALUE                elif w in degree_dict['ish']:                    seg_sent_value *= ISH_VALUE                elif w in refute_words:                    seg_sent_value *= REFUTE_VALUE            pre_index = index + 1            sent_value_list.append(seg_sent_value)    if sent_value_list:        arr = np.array(sent_value_list)        print(arr)        words_value_dict = {            'sum' : arr.sum(),            'avg' : arr.mean(),            'std' : arr.std()        }        return words_value_dict    else:        return Nonedef run():    for comment in get_comments_from_db():        print(sentiment_value(comment=comment))if __name__ == '__main__':    run()
链接: 密码: y4xa

转载地址:https://lipenglin.blog.csdn.net/article/details/74380276 如侵犯您的版权,请留言回复原文章的地址,我们会给您删除此文章,给您带来不便请您谅解!

上一篇:python - 制作简单 ‘词云图‘
下一篇:python - selenium 抓取‘楚乔传’ 评论

发表评论

最新留言

关注你微信了!
[***.104.42.241]2024年04月29日 01时43分57秒