Python将XML同级放入字典

f = 'path to file' tree = ET.parse(f) root = tree.getroot() self.tree = tree self.root = root gs = len(self.tree.getiterator('G')) g = {} for i in range(0, gs): d = {} for elem in self.tree.getiterator('G')[i]: if elem.text == "\n " and elem.tag not in ['GP']: dd = {} for parent in elem: if parent.text == "\n ": ddd = {} for child in parent: ddd[child.tag] = child.text dd[parent.tag] = ddd else: dd[parent.tag] = parent.text d[elem.tag] = dd else: d[elem.tag] = elem.text g[i+1] = d # Build GP count = 0 gp = {} for elem in self.tree.getiterator('GP'): d = {} for parent in elem: if parent.text == "\n ": dd = {} for child in parent: dd[child.tag] = child.text d[parent.tag] = dd else: d[parent.tag] = parent.text count += 1 gp[count] = d g["GP"] = gp

1条回答

网友

1楼 · 发布于 2024-10-03 11:15:54

代码.py：

#!/usr/bin/env python3

import sys
import xml.etree.ElementTree as ET
from pprint import pprint as pp


FILE_NAME = "data.xml"


def convert_node(node, depth_level=0):
    #print("  " * depth_level + node.tag)
    child_nodes = list(node)
    if not child_nodes:
        return (node.text or "").strip()
    ret_dict = dict()
    child_node_tags = [item.tag for item in child_nodes]
    child_index = 0
    for child_node in child_nodes:
        tag = child_node.tag
        if child_node_tags.count(tag) > 1:
            sub_obj_dict = ret_dict.get(tag, dict())
            child_index += 1
            sub_obj_dict[str(child_index)] = convert_node(child_node, depth_level=depth_level + 1)
            ret_dict[tag] = sub_obj_dict
        else:
            ret_dict[tag] = convert_node(child_node, depth_level=depth_level + 1)
    return ret_dict


def main():
    tree = ET.parse(FILE_NAME)
    root_node = tree.getroot()
    converted_xml = convert_node(root_node)
    print("\nResulting dict(s):\n")
    for key in converted_xml: # converted_xml should be a dictionary having only one key (in our case "G" - we only care about its value, to match the required output)
        pp(converted_xml[key])


if __name__ == "__main__":
    print("Python {:s} on {:s}\n".format(sys.version, sys.platform))
    main()

注意事项：

FILE_NAME包含包含输入xml的文件名。请随意更改，以便与您的相匹配
转换发生在转换节点中。它是对每个xml节点调用的递归函数，并返回Python字典（或字符串）。算法：
- 对于每个节点，获取其（直接）子节点的列表。如果节点没有任何节点（它是一个leaf节点-类似于G或GP节点），它将返回其文本
- 如果节点有多个子节点具有特定标记，则其内容将添加到表示其索引的键（如G或GP节点）下，并与子标记键对应的当前字典的子字典中
- 所有具有唯一标记的子级都将把它们的内容放在一个与当前字典下的标记相等的键下
- depth_level没有使用（您可以删除它），我用它以树的形式打印xml节点标记；它是xml树中的深度（root-0，G-1，G，GP-2，GP-3，…）
该规范旨在：
- 常规：注意没有硬编码的密钥名
- 可伸缩的：如果某个时候xml变得更加复杂（例如，在一个GP节点下会有一个GPD节点，并且该节点也会有子节点-基本上，xml将获得一个更高的深度），代码将不做任何更改地处理它
- python3和python2兼容

输出：

(py_064_03.05.04_test0) e:\Work\Dev\StackOverflow\q045799991>"e:\Work\Dev\VEnvs\py_064_03.05.04_test0\Scripts\python.exe" code.py
Python 3.5.4 (v3.5.4:3f56838, Aug  8 2017, 02:17:05) [MSC v.1900 64 bit (AMD64)] on win32

Resulting dict(s):

{'1': {'G1': '1',
       'G2': 'some text',
       'G3': 'some text',
       'GP': {'1': {'GP1': '1', 'GP2': 'a', 'GP3': 'a'},
              '2': {'GP1': '2', 'GP2': 'b', 'GP3': 'b'},
              '3': {'GP1': '3', 'GP2': 'c', 'GP3': 'c'}}},
 '2': {'G1': '2',
       'G2': 'some text',
       'G3': 'some text',
       'GP': {'1': {'GP1': '1', 'GP2': 'aa', 'GP3': 'aa'},
              '2': {'GP1': '2', 'GP2': 'bb', 'GP3': 'bb'},
              '3': {'GP1': '3', 'GP2': 'cc', 'GP3': 'cc'}}},
 '3': {'G1': '3',
       'G2': 'some text',
       'G3': 'some text',
       'GP': {'1': {'GP1': '1', 'GP2': 'aaa', 'GP3': 'aaa'},
              '2': {'GP1': '2', 'GP2': 'bbb', 'GP3': 'bbb'},
              '3': {'GP1': '3', 'GP2': 'ccc', 'GP3': 'ccc'}}}}

相关问题更多 >

编程相关推荐

热门问题

热门文章