d = StringIO("""Name Month Grade
Sue Jan D
Sue Feb D
Jason Mar B
Sue Dec D
Jason Jan B
Sue Apr A
Jason Feb C""")
df = pd.read_csv(d,sep='\s+')
df['date'] = pd.to_datetime(df['Month'],format='%b').dt.normalize()
# set any values greater than June to the previous year.
df['date'] = np.where(df['date'].dt.month > 6,
(df['date'] - pd.DateOffset(years=1)),df['date'])
df.sort_values(['Name','date'],inplace=True)
def month_diff(date):
cumlative_months = (
np.round(((date.sub(date.shift(1)) / np.timedelta64(1, "M")))).eq(1).cumsum()
) + 1
return cumlative_months
df['count'] = df.groupby(["Name", "Grade"])["date"].apply(month_diff)
print(df.drop('date',axis=1))
Name Month Grade count
4 Jason Jan B 1
6 Jason Feb C 1
2 Jason Mar B 1
3 Sue Dec D 1
0 Sue Jan D 2
1 Sue Feb D 3
5 Sue Apr A 1
print(df.loc[df['Name'] == 'Sue'])
Name Month Grade date count
3 Sue Dec D 1899-12-01 1
0 Sue Jan D 1900-01-01 2
1 Sue Feb D 1900-02-01 3
5 Sue Apr A 1900-04-01 1
df['Month_Nr'] = pd.to_datetime(df.Month, format='%b').dt.month
names = df.Name.unique()
students = np.array([])
for name in names:
filter = df[(df.Name==name) & (df.Grade=='D')].sort_values('Month_Nr')
if filter['Month_Nr'].diff().cumsum().max() >= 2:
students = np.append(students, name)
print(students)
你有几种方法可以解决这个问题,首先使用我以前的解决方案,但这需要将学术数字映射到月份(即9月=1日,8月=12日),这样你就可以应用数学计算出连续值
下面是将月份转换为日期时间并计算出月份的差值,然后我们可以应用累积和并过滤任何大于3的值
我试图解决你的问题。我确实为您提供了一个解决方案,但就效率/代码执行而言,它可能不是最快的。请参阅下文:
输出如下所示:
如果只需要名称,请使用以下命令
我想到了这个
输出:
相关问题 更多 >
编程相关推荐