我正在运行以下代码:
def get_previous_next_returns(portfolio,total_returns):
assets=[]
i=0
for asset in portfolio:
i+=1
try:
for offset in [1,5,15,30,45,60,75,90,120,150,
200,250,500,750,1000,1250,1500]:
print(i,asset.name,offset)
asset['return_stock'] = (asset.Close - asset.Close.shift(1)) / (asset.Close.shift(1))
merged_data = pd.merge(asset, sp_500, on='Date')
total_positive_days=0
total_beating_sp_days=0
total_days = offset
for index in range(0,len(merged_data)):
if index-offset>0:
#for index, row in merged_data.iterrows():
#print(offset, index)
sliced = merged_data.iloc[index - offset : index]
total_positive_days = (sliced.Close_x > sliced.Close_x.shift(1)).sum()
total_beating_sp_days = (sliced.return_stock > sliced.return_sp).sum()
percentage_of_positive_days = float(total_positive_days/total_days)
percentage_of_beating_days = float(total_beating_sp_days/total_days)
asset.loc[index,'Pct_positive_'+str(offset)] = percentage_of_positive_days
asset.loc[index,'Pct_beating_'+str(offset)] = percentage_of_beating_days
# previous period returns
asset['Pct_change_'+str(offset)] = asset['Close'].pct_change(periods = offset)
# next period returns
asset['Pct_change_plus_'+str(offset)] = asset['Close'].pct_change(periods = -offset)
assets.append(asset)
total_returns=total_returns.append(asset)
except IndexError:
print("Index error")
return assets, total_returns
问题是我运行它的数据帧(合并的数据)非常大(超过一百万行),因此代码需要很多小时才能完成。。。有没有办法加快它的速度(即用更有效的代码块重新协调for循环)?你知道吗
将数据帧划分为多个部分,并使用python的多处理模块并行执行它们。 你可以用多处理池或者多处理过程为此目的
相关问题 更多 >
编程相关推荐