将列表中的文本与json属性SPacy匹配

[ { "id": "1", "task_id": "1", "team": "Top", "message": "Failure indicated something else [gdfgdfgg]", }, { "id": "2", "task_id": "2", "team": "Ten", "message": "Internal server error 500 something else [dasdasdasdasdas]", } ]

import json from spacy.lang.en import English from spacy.matcher import PhraseMatcher message_list = ['Failure indicated','Internal server error 500'] def matching_data(data): nlp= English() extract_data: list = [msg["message"] for msg in data] matcher= PhraseMatcher(nlp.vocab, attr="LOWER") patterns= [nlp.make_doc(msg) for msg in extract_data] matcher.add("Messages", None, *patterns) match_check= any([item in extract_data for item in message_list]) if not match_check: print("No matches found") else: for msg in message_list: doc= nlp(msg) for match_id, start, end in matcher(doc): print("Message matched based on lowercase token text:", doc[start:end]) matching_data(json.loads(open("messages.json").read()))

1条回答

网友

1楼 · 发布于 2024-06-23 19:00:59

查看关于如何向短语匹配器添加模式的spacy documentation。首先，将message_list中的短语添加到短语匹配器中，然后在从json文件提取的消息列表中找到这些模式

import json
from spacy.lang.en import English
from spacy.matcher import PhraseMatcher

with open('messages.json') as f:
    data = json.load(f)
    extract_data = [msg["message"] for msg in data]

nlp = English()
matcher= PhraseMatcher(nlp.vocab, attr="LOWER")
message_list = ['Failure indicated', 'Internal server error 500']
# add the multi-token phrases that you want to find to the PhraseMatcher  
patterns = [nlp.make_doc(text) for text in message_list]
matcher.add("MessageList", None, *patterns)

for msg in extract_data:
    doc = nlp(msg)
    matches = matcher(doc)
    for match_id, start, end in matches:
        print("Matched based on lowercase token text:", doc[start:end])

相关问题更多 >

编程相关推荐

热门问题

热门文章