开始学习python,我正在搜索如何提高已注释的循环START/END的执行速度。您认为多处理是最好的解决方案,还是有其他方法可以提高性能,因为只使用了10%的CPU
def merge_csv(names):
n = 0
while n < len(names[0]):
_, filename["%s" %n] = os.path.split(names[0][n])
temp_data["%s" %n] = pandas.read_csv(open(names[0][n]), sep=',|;')
data["%s" %n] = pandas.DataFrame(temp_data["%s" %n])
data["%s" %n].insert(1, "oround", 1)
data["%s" %n].insert(3, "pround", 1)
for i in range(len(data["%s" %n].mz)):
data["%s" %n].iloc[i,1]=round(data["%s" %n].iloc[i,0],2)
data["%s" %n].iloc[i,3]=round(data["%s" %n].iloc[i,2],1)
liste = list(zip(data["%s" %n]["oround"], data["%s" %n]["pround"]))
listeall.extend(liste)
n = n+1
liste2 = list(set([u for u in listeall]))
merged = pandas.DataFrame(liste2)
colonnes = ["o", "oround", "p", "pround", "td", "u", "counts"]
m = 0
j = 0
for j in range(len(names[0])):
name_data = os.path.basename(names[0][j])
for m in range(len(colonnes)):
colm = colonnes[m] + name_data
merged.insert(len(colonnes)*j+2+m, colm, 0)
"""
START
p = 0
n = 0
x=0
y=0
for n in range(len(names[0])):
for p in range(len(liste2)):
couple = liste2[p]
resultmz = data["%s" %n]["oround"].isin([couple[0]])
listemz = list(resultmz[resultmz == True].index)
resultrt = data["%s" %n]["pround"].isin([couple[1]])
listert = list(resultrt[resultrt == True].index)
if len(listemz)>0 and len(listert)>0:
for x in range(len(listemz)) :
for y in range(len(listert)) :
if listemz[x] == listert[y]:
for k in range(7):
merged.iloc[p,7*n+2+k] = data["%s" %n].iloc[int(listemz[x]),k]
END
"""
csvmerged = results.to_csv('merged.csv')
print(csvmerged)
return merged
我知道如何在Python中启动单线程,但不知道如何“收集”结果
并行化这个循环最简单的方法是什么
for n in range(len(names[0])):
建议后的代码:
def merge_file_CCS(names):
n = 0
while n < len(names[0]):
_, filename["%s" %n] = os.path.split(names[0][n])
temp_data["%s" %n] = pandas.read_csv(open(names[0][n]), sep=',|;')
data["%s" %n] = pandas.DataFrame(temp_data["%s" %n])
data["%s" %n].insert(1, "oround", 1)
data["%s" %n].insert(3, "pround", 1)
for i in range(len(data["%s" %n].mz)):
data["%s" %n].iloc[i,1]=round(data["%s" %n].iloc[i,0],2)
data["%s" %n].iloc[i,3]=round(data["%s" %n].iloc[i,2],1)
liste = list(zip(data["%s" %n]["oround"], data["%s" %n]["pround"]))
listeall.extend(liste)
print("CSV read in --- %s seconds ---" % (time.time() - start_time))
n = n+1
print("All CSV read in --- %s seconds ---" % (time.time() - start_time))
liste2 = list(set([u for u in listeall]))
merged = pandas.DataFrame(liste2)
colonnes = ["o", "oround", "p", "pround", "td", "u", "counts"]
m = 0
j = 0
for j in range(len(names[0])):
name_data = os.path.basename(names[0][j])
for m in range(len(colonnes)):
colm = colonnes[m] + name_data
merged.insert(len(colonnes)*j+2+m, colm, 0)
pools = Pool(num_pool)
results = []
for result in pools.imap(pool_merge, names):
results.append(result)
pools.close()
pools.join()
csvmerged = merged.to_csv('merged.csv')
print(csvmerged)
return merged
def pool_merge(n):
p = 0
x = 0
y = 0
for p in range(len(liste2)):
couple = liste2[p]
resultmz = data["%s" %n]["oround"].isin([couple[0]])
listemz = list(resultmz[resultmz == True].index)
resultrt = data["%s" %n]["pround"].isin([couple[1]])
listert = list(resultrt[resultrt == True].index)
if len(listemz)>0 and len(listert)>0:
for x in range(len(listemz)) :
for y in range(len(listert)) :
if listemz[x] == listert[y]:
for k in range(7):
merged.iloc[p,7*n+2+k] = data["%s" %n].iloc[int(listemz[x]),k]
但它给了我错误
AttributeError: Can't get attribute 'pool_merge' on <module '__main__' (built-in)>
您可以使用
Pool
names
是传递给函数的名称列表merge_csv
。您可以使用pd.concat(results)
来确定最终结果https://docs.python.org/3/library/multiprocessing.html
相关问题 更多 >
编程相关推荐