我正在尝试将一些csv
文件中的值分组到XML
文件(groups.xml
)中的容器中。我有以下代码,在一定程度上是有效的,但没有给出我期望的结果:
import os, sys
import glob
import pandas as pd
import xml.etree.cElementTree as ET
def xml_parse():
try:
os.chdir("path/to/files")
filename = [file1 for file1 in glob.glob("*.csv")]
filename = [i.split('.', 1)[0] for i in filename]
#filename = '\n'.join(filename)
os.chdir('..')
output = []
doc = ET.parse("groups.xml").getroot()
for root_ele in doc.findall('Groups'):
tag_ele = root_ele.find('GroupID').text
for name in filename:
if name == tag_ele.lower():
for root_ele1 in root_ele.findall('groupname'):
displayname = root_ele1.find('Name').text
minval = root_ele1.find('min').text
mininc = root_ele1.find('minInc').text
maxvalue = root_ele1.find('max')
maxinclusive = root_ele1.find('maxInc')
lists = []
frame = pd.DataFrame()
fname = "path/to/files" + name + ".csv"
df = pd.read_csv(fname, index_col=None, header=None)
lists.append(df)
frame = pd.concat(lists)
if maxvalue is not None:
maxval = maxvalue.text
if maxinclusive is not None:
maxinc = maxinclusive.text
df['bin'] = pd.cut(frame[1], [float(minval),float(maxval)], right= maxinc, include_lowest= mininc)
out = str(pd.concat([df['bin'], frame[1]], axis=1))
out = out.split("\n")[2:]
for a in out:
print a
else:
df['bin'] = pd.cut(frame[1], [float(minval)], include_lowest= mininc)
out = str(pd.concat([df['bin'], frame[1]], axis=1))
out = out.split("\n")[2:]
for a in out:
print a
break
except AttributeError:
pass
电流输出:
^{pr2}$有一个错误:
Traceback (most recent call last):
File "groups.py", line 69, in <module>
xml_parse()
File "groups.py", line 44, in xml_parse
df['bin'] = pd.cut(frame[1], [float(minval)], include_lowest= mininc)
File "C:\Python27\lib\site-packages\pandas\tools\tile.py", line 113, in cut
include_lowest=include_lowest)
File "C:\Python27\lib\site-packages\pandas\tools\tile.py", line 203, in _bins_to_cuts
include_lowest=include_lowest)
File "C:\Python27\lib\site-packages\pandas\tools\tile.py", line 252, in _format_levels
levels[0] = '[' + levels[0][1:]
IndexError: list index out of range
预期产量:
1 [10, 18] 10.18
2 [18, 35] 25.16
3 [35, 50] 44.48
4 [>= 75] 85.24 #however >=75 can be represented
5 [35, 50] 36.71
6 [>= 75] 77.09
7 [>= 75] 81.88
8 [18, 35] 22.92
9 [35, 50] 44.31
10 [10, 18] 15.79
开始于:
以及
^{pr2}$您可以使用
BeautifulSoup
来提取bin
参数,构造标签并应用pd.cut()
:在这一点上
接下来,我们将展平
list
的list
,去掉重复项并添加一个上限:结果是:
构建标签:
并使用
pd.cut()
:产生:
相关问题 更多 >
编程相关推荐