#for test missing values
m = df['Ctgr'].isna()
#for test emtsy strings
#m = df['Ctgr'].eq('')
df['subctgr'] = np.where(m,np.nan,df['Ctgr'].where(m.ne(m.shift())).ffill())
print (df)
Ctgr subctgr
0 A A
1 B A
2 B A
3 C A
4 NaN NaN
5 D D
6 E D
7 F D
详细信息:
print (df.assign(m = df['Ctgr'].isna(),
mask = m.ne(m.shift()),
first = df['Ctgr'].where(m.ne(m.shift())),
ffill = df['Ctgr'].where(m.ne(m.shift())).ffill(),
subctgr = np.where(m,np.nan,df['Ctgr'].where(m.ne(m.shift())).ffill())))
Ctgr m mask first ffill subctgr
0 A False True A A A
1 B False False NaN A A
2 B False False NaN A A
3 C False False NaN A A
4 NaN True True NaN A NaN
5 D False True D D D
6 E False False NaN D D
7 F False False NaN D D
# create a group counter column
df['Counter'] = df['Ctgr'].isna().cumsum()
# drop NA
df2 = df[df['Ctgr'].notna()].reset_index(drop=True)
# join cols
def solve(f):
# filter data
f['col'] = f['Ctgr'].apply(lambda x: x + '|' + f['Ctgr'].iloc[0])
return f
df2 = df2.groupby('Counter').apply(solve)
Ctgr Counter col
0 A 0 A|A
1 B 0 B|A
2 B 0 B|A
3 C 0 C|A
4 D 1 D|D
5 E 1 E|D
6 F 1 F|D
首先通过^{} 测试缺失的行,然后通过与mask的^{} ed值比较获得} 设置新列:
groups
的第一个值,并通过ffill
创建由先前值填充的新列。上次按^{详细信息:
这可以使用pandas
apply
方法来完成,我假设如果字符是<='C'
,那么您要放入'A'
,否则'D'
假设数据帧的名称是
df
:输出如下:
这里有一个方法:
相关问题 更多 >
编程相关推荐