在Python中进行基于方面的情绪分析时，需要关于否定处理的建议吗

import pandas as pd import numpy as np import nltk from nltk.corpus import stopwords from nltk.corpus import wordnet from nltk.stem.wordnet import WordNetLemmatizer import stanfordnlp stanfordnlp.download('en') nltk.download('stopwords') nltk.download('punkt') nltk.download('averaged_perceptron_tagger') txt = "The Sound Quality is not great but the battery life is not bad." txt = txt.lower() sentList = nltk.sent_tokenize(txt) taggedList = [] for line in sentList: txt_list = nltk.word_tokenize(line) # tokenize sentence taggedList = taggedList + nltk.pos_tag(txt_list) # perform POS-Tagging print(taggedList) newwordList = [] flag = 0 for i in range(0,len(taggedList)-1): if(taggedList[i][1]=='NN' and taggedList[i+1][1]=='NN'): newwordList.append(taggedList[i][0]+taggedList[i+1][0]) flag=1 else: if(flag == 1): flag=0 continue newwordList.append(taggedList[i][0]) if(i==len(taggedList)-2): newwordList.append(taggedList[i+1][0]) finaltxt = ' '.join(word for word in newwordList) print(finaltxt) stop_words = set(stopwords.words('english')) new_txt_list = nltk.word_tokenize(finaltxt) wordsList = [w for w in new_txt_list if not w in stop_words] taggedList = nltk.pos_tag(wordsList) nlp = stanfordnlp.Pipeline() doc = nlp(finaltxt) dep_node = [] for dep_edge in doc.sentences[0].dependencies: dep_node.append([dep_edge[2].text, dep_edge[0].index, dep_edge[1]]) for i in range(0, len(dep_node)): if(int(dep_node[i][1]) != 0): dep_node[i][1] = newwordList[(int(dep_node[i][1]) - 1)] print(dep_node) featureList = [] categories = [] totalfeatureList = [] for i in taggedList: if(i[1]=='JJ' or i[1]=='NN' or i[1]=='JJR' or i[1]=='NNS' or i[1]=='RB'): featureList.append(list(i)) totalfeatureList.append(list(i)) # stores all the features for every sentence categories.append(i[0]) print(featureList) print(categories) fcluster = [] for i in featureList: filist = [] for j in dep_node: if((j[0]==i[0] or j[1]==i[0]) and (j[2] in ["nsubj", "acl:relcl", "obj", "dobj", "agent", "advmod", "amod", "neg", "prep_of", "acomp", "xcomp", "compound"])): if(j[0]==i[0]): filist.append(j[1]) else: filist.append(j[0]) fcluster.append([i[0], filist]) print(fcluster) finalcluster = [] dic = {} for i in featureList: dic[i[0]] = i[1] for i in fcluster: if(dic[i[0]]=='NN'): finalcluster.append(i) print(finalcluster)

1条回答

网友

1楼 · 发布于 2024-10-02 02:34:11

您可能希望尝试spacy。以下模式将适用：

名词短语
后跟is或are
可选地后跟not
后跟形容词

import spacy
from spacy.matcher import Matcher
nlp = spacy.load('en_core_web_sm')

output = []
doc = nlp('The product is very good')
matcher = Matcher(nlp.vocab)
matcher.add("mood",None,[{"LOWER":{"IN":["is","are"]}},{"LOWER":{"IN":["no","not"]},"OP":"?"},{"LOWER":"very","OP":"?"},{"POS":"ADJ"}])
for nc in doc.noun_chunks:
    d = doc[nc.root.right_edge.i+1:nc.root.right_edge.i+1+3]
    matches = matcher(d)
    if matches:
        _, start, end = matches[0]
        output.append((nc.text, d[start+1:end].text))
    
print(output)
[('The product', 'very good')]

或者，您可以使用依赖项解析器提供的信息扩展匹配模式，该信息将添加形容词短语的定义：

output = []
matcher = Matcher(nlp.vocab, validate=True)
matcher.add("mood",None,[{"LOWER":{"IN":["is","are"]}},{"LOWER":{"IN":["no","not"]},"OP":"?"},{"DEP":"advmod","OP":"?"},{"DEP":"acomp"}])
for nc in doc.noun_chunks:
    d = doc[nc.root.right_edge.i+1:nc.root.right_edge.i+1+3]
    matches = matcher(d)
    if matches:
        _, start, end = matches[0]
        output.append((nc.text, d[start+1:end].text))
    
print(output)
[('The product', 'very good')]

相关问题更多 >

编程相关推荐

热门问题

热门文章