索引到新列时出现TypeError

import pandas as pd import numpy as np import random k = 5 N = 10 df = pd.DataFrame({ 'Frame' : range(1, N + 1 ,1), 'John Doe_X' : np.random.uniform(k, k + 100 , size=N), 'John Doe_Y' : np.random.uniform(k, k + 100 , size=N), 'Kevin Lee_X' : np.random.uniform(k, k + 100 , size=N), 'Kevin Lee_Y' : np.random.uniform(k, k + 100 , size=N), 'Liam Smith_X' : np.random.uniform(k, k + -100 , size=N), 'Liam Smith_Y' : np.random.uniform(k, k + 100 , size=N), 'Henry Conr_X' : np.random.uniform(k, k + 100 , size=N), 'Henry Conr_Y' : np.random.uniform(k, k + 100 , size=N), 'Dan On_X' : np.random.uniform(k, k + 100 , size=N), 'Dan On_Y' : np.random.uniform(k, k + 100 , size=N), 'Player' : ['nan','John Doe','nan','Liam Smith','Henry john-Conr','John Doe','nan','nan','Kevin Lee','Kevin Lee']}) df['X'] = df.apply(lambda row: row.get(row['Player']+'_X'), axis=1) df['Y'] = df.apply(lambda row: row.get(row['Player']+'_Y'), axis=1)

Dan On_X Dan On_Y Henry john-Conr_X Henry john-Conr_Y John Doe_X \ 0 44.768998 84.473350 73.295603 9.432975 45.098229 1 22.078255 83.308328 68.302211 96.222105 45.511418 2 8.958403 32.589984 36.570954 11.388089 76.967684 3 88.710772 39.136471 46.045362 11.080126 73.837371 4 51.959727 95.460223 8.558665 57.413331 43.943096 5 32.703784 50.699055 54.201836 81.972841 35.237445 6 10.533631 53.555805 81.192208 63.985641 88.664571 7 31.100324 76.484070 62.607465 59.920721 36.455336 8 58.747071 16.464910 104.331561 102.078546 28.662882 9 28.593463 15.175946 87.434676 101.343336 63.139973 John Doe_Y Kevin Lee_X Kevin Lee_Y Liam Smith_X Liam Smith_Y \ 0 32.444921 75.818689 56.684665 1.367917 67.193555 1 48.496147 17.040589 13.027425 -16.125411 75.731070 2 103.802791 24.569848 89.321086 -32.929826 28.512003 3 103.249107 64.077139 81.212017 -11.560951 18.367170 4 21.053600 65.848794 42.016315 -17.002813 67.181597 5 38.703567 43.069560 74.596956 -34.210479 91.396546 6 35.942260 22.064457 24.871318 -4.745078 81.645923 7 78.317289 5.980761 74.924581 -72.922074 97.187079 8 53.819660 102.053136 77.254865 -57.583882 24.007903 9 20.381099 31.395646 27.660293 -20.299473 19.465666 Player X Y 0 nan NaN NaN 1 John Doe 45.511418 48.496147 2 nan NaN NaN 3 Liam Smith -11.560951 18.367170 4 Henry john-Conr 8.558665 57.413331 5 John Doe 35.237445 38.703567 6 nan NaN NaN 7 nan NaN NaN 8 Kevin Lee 102.053136 77.254865 9 Kevin Lee 31.395646 27.660293

1条回答

网友

1楼 · 发布于 2024-06-14 12:02:44

有些值是NaN的，因此建议只在列中添加string：

df = pd.DataFrame({ 'Frame' : range(1, N + 1 ,1),
    'John Doe_X' : np.random.uniform(k, k + 100 , size=N),
    'John Doe_Y' : np.random.uniform(k, k + 100 , size=N),
    'Kevin Lee_X' : np.random.uniform(k, k + 100 , size=N),
    'Kevin Lee_Y' : np.random.uniform(k, k + 100 , size=N),   
    'Liam Smith_X' : np.random.uniform(k, k + -100 , size=N),
    'Liam Smith_Y' : np.random.uniform(k, k + 100 , size=N),
    'Henry Conr_X' : np.random.uniform(k, k + 100 , size=N),
    'Henry Conr_Y' : np.random.uniform(k, k + 100 , size=N),
    'Dan On_X' : np.random.uniform(k, k + 100 , size=N),
    'Dan On_Y' : np.random.uniform(k, k + 100 , size=N),
    'Player' : [np.nan,'John Doe','nan','Liam Smith','Henry Conr','John Doe','nan','nan','Kevin Lee','Kevin Lee']})

df['X'] = df['Player']+'_X'
df['Y'] = df['Player']+'_Y'
print (df)
   Frame  John Doe_X      ...                  X             Y
0      1   10.240240      ...                NaN           NaN
1      2   91.760964      ...         John Doe_X    John Doe_Y
2      3   76.286604      ...              nan_X         nan_Y
3      4   24.907703      ...       Liam Smith_X  Liam Smith_Y
4      5   57.319837      ...       Henry Conr_X  Henry Conr_Y
5      6   18.380069      ...         John Doe_X    John Doe_Y
6      7    6.361868      ...              nan_X         nan_Y
7      8   24.305204      ...              nan_X         nan_Y
8      9   22.669279      ...        Kevin Lee_X   Kevin Lee_Y
9     10   97.934949      ...        Kevin Lee_X   Kevin Lee_Y

如果需要apply添加if-else，但作为第一个解决方案，它比较慢：

df['X'] = df.apply(lambda row: row.get(row['Player']+'_X') if pd.notnull(row['Player']) else np.nan, axis=1)
df['Y'] = df.apply(lambda row: row.get(row['Player']+'_Y') if pd.notnull(row['Player']) else np.nan, axis=1)

相关问题更多 >

编程相关推荐

热门问题

热门文章