<p>您可以尝试使用以下代码从嵌套的XML文件中获取所有数据,但我认为有一种优雅的方法可以实现此结果:</p>
<pre><code>import pandas as pd, numpy as np
import xml.etree.ElementTree as ET
xml_data = 'your xml data'
# Prepare for the list of variable to save XML data
date=[]
ticket=[]
value=[]
notenders=[]
tendertype=[]
tenderamt=[]
receipeno=[]
price=[]
qty=[]
# Parse the XML File to get the desired data
root = ET.fromstring(xml_data)
# Get header data from XML (date, ticket, value, notenders, tenderdetail)
date.append(root.find('date').text)
ticket.append(root.find('ticket').text)
value.append(root.find('value').text)
notenders.append(int(root.find('notenders').text))
nested_node0=root.findall('tenderdetail')
for child0 in nested_node0:
tendertype.append(int(child0.find('tendertype').text))
tenderamt.append(int(child0.find('tenderamt').text))
# Get all data under first item tag
nested_node1 = root.findall('item') #1
for child in nested_node1:
receipeno.append(int(child.find('receipeno').text))
price.append(int(child.find('price').text))
qty.append(int(child.find('qty').text))
# Get all data under first items tag
nested_node2 = child.findall('items') #2
for child2 in nested_node2:
# Get all data under second item tag
nested_node3 = child2.findall('item') #3
for child3 in nested_node3:
receipeno.append(int(child3.find('receipeno').text))
price.append(int(child3.find('price').text))
qty.append(int(child3.find('qty').text))
# Get all data under second items tag
nested_node4 = child3.findall('items') #4
for child4 in nested_node4:
# Get all data under third item tag
nested_node5 = child4.findall('item') #5
for child5 in nested_node5:
receipeno.append(int(child5.find('receipeno').text))
price.append(int(child5.find('price').text))
qty.append(int(child5.find('qty').text))
# Make the same length of every list of data with the max length
date.extend([np.nan]*(len(receipeno)-len(date)))
ticket.extend([np.nan]*(len(receipeno)-len(ticket)))
value.extend([np.nan]*(len(receipeno)-len(value)))
notenders.extend([np.nan]*(len(receipeno)-len(notenders)))
tendertype.extend([np.nan]*(len(receipeno)-len(tendertype)))
tenderamt.extend([np.nan]*(len(receipeno)-len(tenderamt)))
data={'date':date,
'ticket':ticket,
'value':value,
'notenders':notenders,
'tendertype':tendertype,
'tenderamt':tenderamt,
'receipeno': receipeno,
'price': price,
'qty':qty}
# Create DataFrame from data
df = pd.DataFrame(data)
df = df.fillna(method='ffill')
df
</code></pre>
<p>输出:</p>
<p><a href="https://i.stack.imgur.com/xp9jz.png" rel="nofollow noreferrer"><img src="https://i.stack.imgur.com/xp9jz.png" alt=""/></a></p>
<p>希望这能对你有所帮助。你知道吗</p>