擅长:python、mysql、java
<pre><code>
<pre><code>import pandas as pd
pd.__version__ # u'0.24.2'
from pandas import concat
def handler(grouped):
se = grouped.set_index('Date')['Sale'].sort_index()
return concat(
{
'MeanToDate': se.expanding().mean(), # cumulative mean
'MaxToDate': se.expanding().max(), # cumulative max
'SaleCount': se.expanding().count(), # cumulative count
'Sale': se, # simple copy
'PrevSale': se.shift(1) # previous sale
},
axis=1
)
###########################
from datetime import datetime
df = pd.DataFrame({'Basket':[88,88,88,123,477,477,566],
'Sale':[15,30,16,90,77,57,90],
'Date':[datetime.strptime(ds,'%d/%m/%Y')
for ds in ['3/01/2012','11/02/2012','16/08/2012','18/06/2012',
'19/08/2012','11/12/2012','6/07/2012']]})
#########
new_df = df.groupby('Basket').apply(handler).reset_index()
</code></pre>
</code></pre>