def func(s):
m = s['order'].eq('yes')
f = s['month'].where(m).ffill()
return f.mask(m)
df['last_order'] = df.groupby('customer', group_keys=False).apply(func)
month order customer last_order
0 Jan yes 020 NaN
1 Feb yes 041 NaN
2 March no 020 Jan
说明
在{{CD7}}之后的每个组中发生的是下面的,例如考虑组,其中{{CD8>}是^ {< CD9>}
month order
0 jan yes
1 apr no
2 may no
3 jun yes
4 jul no
m = df['order'].eq('yes') # True where `order` is 'yes'
f = df['month'].where(m)#.ffill()
f
0 jan # -> \
1 NaN \ #`jan` and `jun` are visible as
2 NaN / # they were the months with `order` 'yes'
3 jun # -> /
4 NaN
Name: month, dtype: object
# If you chain the above with with `ffill` it would fill the NaN values.
f = df['month'].where(m).ffill()
f
0 jan
1 jan # filled with valid above value i.e Jan
2 jan # filled with valid above value i.e Jan
3 jun
4 jun # filled with valid above value i.e Jun
Name: month, dtype: object
f.mask(m) # works opposite of `pd.Series.where`
0 NaN # ->\
1 jan \ # Marked values `NaN` where order was `yes`.
2 jan /
3 NaN # ->/
4 jun
Name: month, dtype: object
df = pd.DataFrame([{'month': 'Jan', 'order': 'yes', 'customer': '020', 'month_2': 1, 'last_order': None},
{'month': 'Feb', 'order': 'yes', 'customer': '041', 'month_2': 2, 'last_order': None},
{'month': 'April', 'order': 'no', 'customer': '020', 'month_2': 4, 'last_order': 'Jan'},
{'month': 'May', 'order': 'no', 'customer': '020', 'month_2': 5, 'last_order': 'Jan'}])
#Lets convert months to numeric value
dict_months = dict(Jan=1, Feb=2, March=3, April = 4,May=5, June = 6,Jul = 7, Aug = 8, Sep = 9, Oct = 10, Nov =11, Dec = 12)
df['month_2'] = df.month.map(dict_months)
#Insert a blank column for last_order
df['last_order'] = None
#Let's iter throught rows
for idx, row in df.iterrows():
if row['order'] == "yes": continue
#For each row, grab the customer and the current month and searchfor orders in previous months
df_temp = df[(df.customer == row['customer']) & (df.month_2 < row['month_2'] )& (df.order == "yes")]
#If any result found, let pick the last know order and update accordingly the DataFrame
if df_temp.shape[0]>0: df.loc[[idx],'last_order'] = df_temp['month'].iloc[-1]
#remove unecessary column
del df['month_2']
输出
| month | order | customer | last_order |
|: |: | -:|: -|
| Jan | yes | 020 | |
| Feb | yes | 041 | |
| April | no | 020 | Jan |
| May | no | 020 | Jan |
您可以^{} 和^{} 检查值是否为} 和^{} ,然后使用^{} 屏蔽
yes
,然后使用^{说明
在{{CD7}}之后的每个组中发生的是下面的,例如考虑组,其中{{CD8>}是^ {< CD9>}
您可以使用
df.iterrows
执行此操作:输出
相关问题 更多 >
编程相关推荐