快速复杂字典操作

df = pd.DataFrame({'MyColumn1': ['A', 'A', 'B', 'B'], 'MyColumn2': ['M', 'N', 'M', 'P'], 'Value': [1, 1, 1, 1]}) table = pd.pivot_table(df, values='Value', index=['MyColumn1', 'MyColumn2'], aggfunc=np.sum, fill_value = 0, dropna = False) df2 = pd.DataFrame({'MyColumn1': ['A', 'A', 'A', 'B', 'B', 'B'], 'MyColumn2': ['M', 'N', 'P', 'M', 'N', 'P'], 'Value': [5, 10, 15, 20, 25, 30]}) table2 = pd.pivot_table(df2, values='Value', index=['MyColumn1', 'MyColumn2'], aggfunc=np.sum) myDictionary = {'MyColumn1': {('A', 'A'): 10, ('A', 'B'): 20, ('B', 'A'): 30, ('B', 'B'): 40}, 'MyColumn2': {('M', 'M'): 1, ('M', 'N'): 2, ('M', 'P'): 3, ('N', 'M'): 4, ('N', 'N'): 5, ('N', 'P'): 6, ('P', 'M'): 7, ('P', 'N'): 8, ('P', 'P'): 9}}

TABLE Value MyColumn1 MyColumn2 A M 1 N 1 P 0 B M 1 N 0 P 1 TABLE2 Value MyColumn1 MyColumn2 A M 5 N 10 P 15 B M 20 N 25 P 30

(A, P) -> (A, M) = 10 * 7 = 70, 70 * 15 = 1050 (A, P) -> (A, N) = 10 * 8 = 80, 80 * 15 = 1200 (A, P) -> (B, M) = 20 * 7 = 140, 140 * 15 = 2100 (A, P) -> (B, P) = 20 * 9 = 180, 180 * 15 = 2700 (B, N) -> (A, M) = 30 * 4 = 120, 120 * 25 = 3000 (B, N) -> (A, N) = 30 * 5 = 150, 150 * 25 = 3750 (B, N) -> (B, M) = 40 * 4 = 160, 160 * 25 = 4000 (B, N) -> (B, P) = 40 * 6 = 240, 240 * 25 = 6000

Value MyColumn1 MyColumn2 A M 4050 N 4950 P 0 B M 6100 N 0 P 8700

1条回答

网友

1楼 · 发布于 2024-05-18 09:40:35

不确定这对您的实际数据有多快，但我会这样做：

col1_df = pd.Series(myDictionary['MyColumn1']).unstack()
col2_df = pd.Series(myDictionary['MyColumn2']).unstack()

out_df = pd.DataFrame()

# loop through columns
for col in table.columns:    
    zeros = table['Value'].eq(0)
    non_zero_idx = np.array(table.index[~zeros].to_list())
    zero_idx = np.array(table.index[zeros].to_list())

    num_nz, num_z = len(non_zero_idx), len(zero_idx)

    xs,ys = np.meshgrid(np.arange(num_z),np.arange(num_nz))
    xs, ys = xs.ravel(), ys.ravel()

    col1 = col1_df.lookup(zero_idx[xs,0], non_zero_idx[ys,0])
    col2 = col2_df.lookup(zero_idx[xs,1], non_zero_idx[ys,1])

    prods = (col1* col2).reshape(num_nz, num_z).T

    values = table2.loc[zeros, ['Value']].values

    out_df[col] = (pd.Series((prods * values).sum(0), index=non_zero_idx)
                     .reindex(table.index, fill_value=0)
                  )

输出：

                     Value
MyColumn1 MyColumn2       
A         M           4050
          N           4950
          P              0
B         M           6100
          N              0
          P           8700

相关问题更多 >

编程相关推荐

热门问题

热门文章