重塑dataframe，使计算链接到2列

data = [['20-10-2020', 'PSG', 'Man U', 1, 2], ['20-10-2020', 'Leipzig','Istanbul',2,0], ['27-10-2020', 'Istanbul','PSG',0,2], ['27-10-2020', 'Man U','Leipzig',5,0]] df = pd.DataFrame(data, columns = ['Date', 'Home', 'Away', 'HG', 'AG']) print(df) Date Home Away HG AG 0 20-10-2020 PSG Man U 1 2 1 20-10-2020 Leipzig Istanbul 2 0 2 27-10-2020 Istanbul PSG 0 2 3 27-10-2020 Man U Leipzig 5 0

df_home = df.reset_index(level=0) columns = { "Date": 'date', "Home": "team", "Away": "opponent", 'HG': 'team_goals', 'AG': 'opponent_goals', } df_home = df_home.rename(columns=columns) df_home['site'] = 'H' df_away = df.reset_index(level=0) columns = { "Date": 'date', "Home": "opponent", "Away": "team", 'HG': 'opponent_goals', 'AG': 'team_goals', } df_away = df_away.rename(columns=columns) df_away['site'] = 'A' df_team = pd.concat([df_home, df_away], ignore_index=True).sort_values(['date']) df_team['team'] = df_team['team'].astype('category') df_team['opponent'] = df_team['opponent'].astype('category') print(df_team) index date team opponent team_goals opponent_goals site 0 0 20-10-2020 PSG Man U 1 2 H 1 1 20-10-2020 Leipzig Istanbul 2 0 H 4 0 20-10-2020 Man U PSG 2 1 A 5 1 20-10-2020 Istanbul Leipzig 0 2 A 2 2 27-10-2020 Istanbul PSG 0 2 H 3 3 27-10-2020 Man U Leipzig 5 0 H 6 2 27-10-2020 PSG Istanbul 2 0 A 7 3 27-10-2020 Leipzig Man U 0 5 A

conditions = [df_team['team_goals'] > df_team['opponent_goals'], df_team['team_goals'] == df_team['opponent_goals']] choices = [3, 1] df_team['pts'] = np.select(conditions, choices, default=0) f = lambda x: x.shift(1).rolling(1).sum() df_team['form_l1_before'] = df_team.groupby(['team'])['pts'].apply(f) df_team['goal_l1_before'] = df_team.groupby(['team'])['team_goals'].apply(f) print(df_team) index date team opponent team_goals opponent_goals site \ 0 0 20-10-2020 PSG Man U 1 2 H 1 1 20-10-2020 Leipzig Istanbul 2 0 H 4 0 20-10-2020 Man U PSG 2 1 A 5 1 20-10-2020 Istanbul Leipzig 0 2 A 2 2 27-10-2020 Istanbul PSG 0 2 H 3 3 27-10-2020 Man U Leipzig 5 0 H 6 2 27-10-2020 PSG Istanbul 2 0 A 7 3 27-10-2020 Leipzig Man U 0 5 A pts form_l1_before goal_l1_before 0 0 NaN NaN 1 3 NaN NaN 4 3 NaN NaN 5 0 NaN NaN 2 0 0.0 0.0 3 3 3.0 2.0 6 3 0.0 1.0 7 0 3.0 2.0

# Ex second game for Istanbul and PSG with stats from the previous game expected_data = [['27-10-2020', 'Istanbul','PSG',0,2,0,0,0,1]] df_target = pd.DataFrame(expected_data, columns = ['date', 'Home', 'Away', 'HG', 'AG', 'Home_form_l1_before', 'Home_goal_l1_before', 'Away_form_l1_before', 'Away_goal_l1_before']) print(df_target) date Home Away HG AG Home_form_l1_before \ 0 27-10-2020 Istanbul PSG 0 2 0 Home_goal_l1_before Away_form_l1_before Away_goal_l1_before 0 0 0 1

1条回答

网友

1楼 · 发布于 2024-09-28 21:10:20

这里有一个方法。我们可以使用site标志重新塑造df_team，然后从H（home）的角度查看所有信息，除了您需要的主客场信息（ha_fields）。后者为两个站点保留，并连接到家庭数据

ha_fields = ["form_l1_before", "goal_l1_before"]

unstacked_team = df_team.set_index(["index", "site", "date"]).unstack("site")

ha_df = unstacked_team[ha_fields]
ha_df.columns = ha_df.columns.to_flat_index().map(lambda t: "_".join([t[1], t[0]]))

df_final = (
    unstacked_team.swaplevel(axis=1)["H"]
    .drop(ha_fields, axis=1)
    .join(ha_df)
    .reset_index("date")
)

print(df_final)

             date      team  opponent  team_goals  opponent_goals  pts  \
index                                                                    
0      20-10-2020       PSG     Man U           1               2    0   
1      20-10-2020   Leipzig  Istanbul           2               0    3   
2      27-10-2020  Istanbul       PSG           0               2    0   
3      27-10-2020     Man U   Leipzig           5               0    3   

       A_form_l1_before  H_form_l1_before  A_goal_l1_before  H_goal_l1_before  
index                                                                          
0                   NaN               NaN               NaN               NaN  
1                   NaN               NaN               NaN               NaN  
2                   0.0               0.0               1.0               0.0  
3                   3.0               3.0               2.0               2.0

相关问题更多 >

编程相关推荐

热门问题

热门文章