基于Python的Stanford-NLP实体识别

from stanfordcorenlp import StanfordCoreNLP import logging import json class StanfordNLP: def __init__(self, host='http://localhost', port=9000): self.nlp = StanfordCoreNLP(host, port=port, timeout=30000 , quiet=True, logging_level=logging.DEBUG) self.props = { 'annotators': 'tokenize,ssplit,pos,lemma,ner,parse,depparse,dcoref,relation,sentiment', 'pipelineLanguage': 'en', 'outputFormat': 'json' } def word_tokenize(self, sentence): return self.nlp.word_tokenize(sentence) def pos(self, sentence): return self.nlp.pos_tag(sentence) def ner(self, sentence): return self.nlp.ner(sentence) def parse(self, sentence): return self.nlp.parse(sentence) def dependency_parse(self, sentence): return self.nlp.dependency_parse(sentence) def annotate(self, sentence): return json.loads(self.nlp.annotate(sentence, properties=self.props)) @staticmethod def tokens_to_dict(_tokens): tokens = defaultdict(dict) for token in _tokens: tokens[int(token['index'])] = { 'word': token['word'], 'lemma': token['lemma'], 'pos': token['pos'], 'ner': token['ner'] } return tokens if __name__ == '__main__': sNLP = StanfordNLP() text = r'China on Wednesday issued a $50-billion list of U.S. goods including soybeans and small aircraft for possible tariff hikes in an escalating technology dispute with Washington that companies worry could set back the global economic recovery.The country\'s tax agency gave no date for the 25 percent increase...' ANNOTATE = sNLP.annotate(text) POS = sNLP.pos(text) TOKENS = sNLP.word_tokenize(text) NER = sNLP.ner(text) PARSE = sNLP.parse(text) DEP_PARSE = sNLP.dependency_parse(text)

1条回答

网友

1楼 · 发布于 2024-10-01 22:25:29

这里有个方法可以解决这个问题

请务必下载Stanford CoreNLP 3.9.1和必要的模型JAR

在此文件“ner”中设置服务器属性-服务器.属性““

annotators = tokenize,ssplit,pos,lemma,ner
ner.applyFineGrained = false

使用以下命令启动服务器：

^{pr2}$

请确保已安装此Python包：

https://github.com/stanfordnlp/python-stanford-corenlp

运行以下Python代码：

import corenlp
client = corenlp.CoreNLPClient(start_server=False, annotators=["tokenize", "ssplit", "pos", "lemma", "ner"])
sample_text = "Joe Smith was born in Hawaii."
ann = client.annotate(sample_text)
for mention in ann.sentence[0].mentions:
    print([x.word for x in ann.sentence[0].token[mention.tokenStartInSentenceInclusive:mention.tokenEndInSentenceExclusive]])

以下是实体实体实体中可用的所有字段：

sentenceIndex: 0
tokenStartInSentenceInclusive: 5
tokenEndInSentenceExclusive: 7
ner: "MONEY"
normalizedNER: "$5.0E10"
entityType: "MONEY"

相关问题更多 >

编程相关推荐

热门问题

热门文章