<p>下面是一个使用<a href="http://docs.python.org/library/xml.etree.elementtree.html?highlight=iterparse#module-xml.etree.ElementTree" rel="nofollow">ElementTree</a>的解决方案:</p>
<pre><code>from xml.etree import ElementTree as ET
from io import StringIO
from collections import defaultdict
data = '''\
<keywords>
<layer id="wheat">
<layer id="indian">
<keyword>chapati</keyword>
<layer id="mumbai">
<keyword>puri</keyword>
</layer>
</layer>
<keyword>bread</keyword>
<keyword>pita</keyword>
<keyword>narn</keyword>
<keyword>loaf</keyword>
</layer>
<layer id="fruit">
<keyword>apple</keyword>
<keyword>orange</keyword>
<keyword>pear</keyword>
<keyword>lemon</keyword>
</layer>
</keywords>
'''
path = ['ROOT'] # stack for layer names
items = defaultdict(list) # key=layer, value=list of items @ layer
f = StringIO(data)
for evt,e in ET.iterparse(f,('start','end')):
if evt == 'start':
if e.tag == 'layer':
path.append(e.attrib['id']) # new layer added to path
elif e.tag == 'keyword':
items[path[-1]].append(e.text) # add item to last layer in path
elif evt == 'end':
if e.tag == 'layer':
layer = path.pop()
parent = path[-1]
print layer,len(path),parent,items[layer]
</code></pre>
<h3>输出</h3>
^{pr2}$