擅长:python、mysql、java
<pre class="lang-py prettyprint-override"><code>
from io import stringIO
data = StringIO("""
uid, date, target
a1, 2019-11-01, 0
a1, 2019-12-01, 0
a1, 2020-01-01, 1
a1, 2020-02-01, 1
a1, 2020-03-01, 0
a2, 2019-11-01, 0
a2, 2019-12-01, 1
a2, 2020-03-01, 0
a2, 2020-04-01, 1
"""
)
df = pd.read_csv(data).rename(columns=lambda x: x.strip())
def filter_in_group(df: pd.DataFrame):
ind = np.argmax(df.target)
return df.loc[:, ['date', 'target']].iloc[:ind+1]
df_filtered = (
df
.groupby('uid')
.apply(lambda x: filter_in_group(x))
.reset_index()
.drop('level_1', axis=1)
)
</code></pre>