需要使用python(Pandas)或任何其他库将XML文件展平为CSV。我有多个子标签下面和一个单一的标题要跨子元素复制。这是我用Python编写的第一个程序,无法成功执行。请告诉我这里的任何方向
-我的输入数据的预期输出。
chName,envText,rptCod,rptNam,membId,membLglNam,rptPrntEffDat,rptPrntRunDat,membClgIdCod,membClgIdNam,clgCurrTypCod,poolId,currTypCod,membChgIdCod,membChgIdNam,acctTypFlexible,prtMgnUnadj,prtMgnReqt,ChgRat,totMgnClgCurr,sumChgMbrPrtgbReq,sumChgMbrClgCurr,sumPoolIdTotal,sumPoolIdClgCurr,sumClgMbrTotMgnClgCurr
DEBSIL,S,PTR001,DD01 Report,POP,ABC LT,08/04/2021,08/04/2021,UVBV,YEZZE,USD,POPXCLIENTECM,USD,POP123,ABC LT123,A1,5656.6,5634.6,1,34543.6,5656.6,5634.6,5656.6,5634.6,700090
DEBSIL,S,PTR001,DD01 Report,POP,ABC LT,08/04/2021,08/04/2021,UVBV,YEZZE,USD,POPXCLIENTNET,NZD,POP676,SEEN TD,A9,8989.9,45656.9,1,1780688.9,8989.9,45656.9,8989.9,45656.9,700090
DEBSIL,S,PTR001,DD01 Report,POP,ABC LT,08/04/2021,08/04/2021,UVBV,YEZZE,USD,POPXSTANDARD,MXM,POP343,ACMI,P1,345.5,4545.5,1,4545.5,345.5,4545.5,345.5,4545.5,700090
输入数据
<PTR001>
<rptHdr>
<chName>DEBSIL</chName>
<envText>S</envText>
<rptCod>PTR001</rptCod>
<rptNam>DD01 Report</rptNam>
<membId>POP</membId>
<membLglNam>ABC LT</membLglNam>
<rptPrntEffDat>2021-04-08</rptPrntEffDat>
<rptPrntRunDat>2021-04-08</rptPrntRunDat>
</rptHdr>
<ptr001Grp>
<ptr001KeyGrp>
<membClgIdCod>UVBV</membClgIdCod>
<membClgIdNam>YEZZE</membClgIdNam>
</ptr001KeyGrp>
<clgCurrTypCod>USD</clgCurrTypCod>
<ptr001Grp1>
<ptr001KeyGrp1>
<poolId>CSBXCLIENTECM</poolId>
<currTypCod>USD</currTypCod>
</ptr001KeyGrp1>
<ptr001Grp3>
<ptr001KeyGrp3>
<membChgIdCod>POP123</membChgIdCod>
<membChgIdNam>ABC LT123</membChgIdNam>
</ptr001KeyGrp3>
<ptr001Rec>
<acctTypFlexible>A1</acctTypFlexible>
<prtMgnUnadj>5656.60</prtMgnUnadj>
<prtMgnReqt>5634.60</prtMgnReqt>
<ChgRat>1.000000</ChgRat>
<totMgnClgCurr>34543.60</totMgnClgCurr>
</ptr001Rec>
<sumChgMbrPrtMgbReq>5656.60</sumChgMbrPrtMgbReq>
<sumChgMbrClgCurr>5634.60</sumChgMbrClgCurr>
</ptr001Grp3>
<sumPoolIdTotal>5656.60</sumPoolIdTotal>
<sumPoolIdClgCurr>5634.60</sumPoolIdClgCurr>
</ptr001Grp1>
<ptr001Grp1>
<ptr001KeyGrp1>
<poolId>POPXCLIENTNET</poolId>
<currTypCod>NZD</currTypCod>
</ptr001KeyGrp1>
<ptr001Grp3>
<ptr001KeyGrp3>
<membChgIdCod>POP676</membChgIdCod>
<membChgIdNam>SEEN TD</membChgIdNam>
</ptr001KeyGrp3>
<ptr001Rec>
<acctTypFlexible>A9</acctTypFlexible>
<prtMgnUnadj>8989.90</prtMgnUnadj>
<prtMgnReqt>45656.90</prtMgnReqt>
<ChgRat>1.000000</ChgRat>
<totMgnClgCurr>1780688.90</totMgnClgCurr>
</ptr001Rec>
<sumChgMbrPrtMgbReq>8989.90</sumChgMbrPrtMgbReq>
<sumChgMbrClgCurr>45656.90</sumChgMbrClgCurr>
</ptr001Grp3>
<sumPoolIdTotal>8989.90</sumPoolIdTotal>
<sumPoolIdClgCurr>45656.90</sumPoolIdClgCurr>
</ptr001Grp1>
<ptr001Grp1>
<ptr001KeyGrp1>
<poolId>POPXSTANDARD</poolId>
<currTypCod>MXM</currTypCod>
</ptr001KeyGrp1>
<ptr001Grp3>
<ptr001KeyGrp3>
<membChgIdCod>POP343</membChgIdCod>
<membChgIdNam>ACMI</membChgIdNam>
</ptr001KeyGrp3>
<ptr001Rec>
<acctTypFlexible>P1</acctTypFlexible>
<prtMgnUnadj>345.50</prtMgnUnadj>
<prtMgnReqt>4545.50</prtMgnReqt>
<ChgRat>1.000000</ChgRat>
<totMgnClgCurr>4545.50</totMgnClgCurr>
</ptr001Rec>
<sumChgMbrPrtMgbReq>345.50</sumChgMbrPrtMgbReq>
<sumChgMbrClgCurr>4545.50</sumChgMbrClgCurr>
</ptr001Grp3>
<sumPoolIdTotal>345.50</sumPoolIdTotal>
<sumPoolIdClgCurr>4545.50</sumPoolIdClgCurr>
</ptr001Grp1>
<sumClgMbrTotMgnClgCurr>700090.00</sumClgMbrTotMgnClgCurr>
</ptr001Grp>
</PTR001>
# Importing the required libraries
import xml.etree.ElementTree as Xet
import pandas as pd
cols = ["poolId", "currTypCod", "membChgIdCod", "membChgIdNam", "acctTypFlexible", "prtMgnReqt", "prtMgnReqt", "ChgRat", "totMgnClgCurr", "sumChgMbrPrtgbReq", "sumChgMbrClgCurr", "sumPoolIdTotal", "sumPoolIdClgCurr"]
rows = []
# Parsing the XML file
xmlparse = Xet.parse('myReport.xml')
root = xmlparse.getroot()
for i in root:
poolId = i.find("poolId").text
currTypCod = i.find("currTypCod").text
membChgIdCod = i.find("membChgIdCod").text
membChgIdNam = i.find("membChgIdNam").text
acctTypFlexible = i.find("acctTypFlexible").text
prtMgnReqt = i.find("prtMgnReqt").text
ChgRat = i.find("ChgRat").text
totMgnClgCurr = i.find("totMgnClgCurr").text
sumChgMbrPrtgbReq = i.find("sumChgMbrPrtgbReq").text
sumChgMbrClgCurr = i.find("sumChgMbrClgCurr").text
sumPoolIdTotal = i.find("sumPoolIdTotal").text
sumPoolIdClgCurr = i.find("sumPoolIdClgCurr").text
rows.append({"poolId": poolId,
"currTypCod": currTypCod,
"membChgIdCod": membChgIdCod,
"membChgIdNam": membChgIdNam,
"acctTypFlexible": acctTypFlexible,
"prtMgnReqt": prtMgnReqt,
"ChgRat": ChgRat,
"totMgnClgCurr": totMgnClgCurr,
"sumChgMbrPrtgbReq": sumChgMbrPrtgbReq,
"sumChgMbrClgCurr": sumChgMbrClgCurr,
"sumPoolIdTotal": sumPoolIdTotal,
"sumPoolIdClgCurr": sumPoolIdClgCurr})
df = pd.DataFrame(rows, columns=cols)
# Writing dataframe to csv
df.to_csv('myReport.csv')
我将把CSV列的排序留给您来解决。在这里搜索SO应该会得到很多帖子
相关问题 更多 >
编程相关推荐