将父标记内的XML节点值与作为lis中元素的元组序列进行比较

<?xml version="1.0"?> <root> <things count="720"> <tokens> <token> <fruit>mango</fruit> </token> <token> <fruit>apple</fruit> </token> </tokens> <indices> ... </indices> </things> <things count="484"> <tokens> <token> <fruit>mango</fruit> </token> <token> <plant>coconut</plant> </token> </tokens> <indices> ... </indices> </things> <things count="455"> <tokens> <token> <fruit>mango</fruit> </token> <token> <fruit>apple</fruit> </token> <token> <livingthing> coconut <subtoken> <fruit>cocunut</fruit> <fruit>drycocunut</fruit> </subtoken> </livingthing> </token> </tokens> <indices> ... </indices> </things> ... </root>

[(('mango', 'FRUIT'), ('coconut', 'PLANT')), (('mango', 'PLANT'), ('coconut', 'PLANT')), ... (('apple', 'PLANT'), ('orange', 'FRUIT'), ('coconut', 'PLANT')), ... (('mango', 'FRUIT'), ('apple', 'FRUIT'), ('coconut', 'LIVING')), (('apple', 'PLANT'), ('orange', 'LIVING'), ('coconut', 'PLANT')), ... ]

from lxml import etree doc = etree.parse(<path_to_xml_file>) root = doc.getroot() numThings= len(root.getchildren()) for i in range(numThings): toks = root[i] numTokens = len(toks.getchildren()) for j in range(numTokens): tok = toks[j] numToks = len(tok.getchildren()) for k in range(numToks): t = tok[k] numVals = len(t.getchildren()) if t.tag != 'indices': flagMatch = False for tupseq in lstTupSeq: for l in range(len(tupseq)): te = tupseq[l] v = t[l] if te[0] == v.text and te[1].lower() in v.tag: flagMatch = True else: flagMatch = False break; if flagMatch: print(tupseq, i, j, k) break;

1条回答

网友

1楼 · 发布于 2024-09-26 18:03:57

这里有一个解决方案，让我知道它是否有帮助

from lxml import etree

doc = etree.parse('scratch.xml')
root = doc.getroot()
things = {}
compare_list = [
    (('mango', 'FRUIT'), ('coconut', 'PLANT')),
    (('mango', 'PLANT'), ('coconut', 'PLANT')),
    (('apple', 'PLANT'), ('orange', 'FRUIT'), ('coconut', 'PLANT')),
    (('mango', 'FRUIT'), ('apple', 'FRUIT'), ('coconut', 'LIVING')),
    (('apple', 'PLANT'), ('orange', 'LIVING'), ('coconut', 'PLANT')),
]

def func():
    # for each <things> tag
    for child in root.getchildren():
        l = []
        for node in child:

            # if the node tag inside <things> child is 'tokens'
            if node.tag == 'tokens':

                # for each 'token' in 'tokens'
                for token in node:

                    # for each tag inside 'token'
                    for item in token:

                        # store the tag name and text into a list
                        if item.tag == 'livingthing':
                            l.append((item.text, 'LIVING'))
                        else:
                            l.append((item.text, item.tag.upper()))

                        # convert the list into a tuple and checks if there is a similar tuple in compare_list
                        if tuple(l) in compare_list:
                            # return things count if found
                            return child.attrib['count']

print(func())

使用您提供的xml的输出是：

它打印出找到的第一个匹配项

相关问题更多 >

编程相关推荐

热门问题

热门文章