按组用另一个数据帧的增长率填充数据帧的缺失值

import pandas as pd import numpy as np df = pd.DataFrame({'X':[np.nan, np.nan, 6, 6.7, np.nan, 5, 9, 10], 'Y':[5.4, 5.7, 5.5, 6.1, 2.1, 1.5, 5.1, 2.1,], 'G': ['A', 'A', 'A', 'A', 'B', 'B', 'B', 'B']}) def Fillbackwards(DB, Sname, Growthrate): first_non_nan = DB[Sname].isnull().idxmin() while first_non_nan-DB.index[0] > 0: # Note the index of the group within the dataframe does not start at 0 as it's part of a larger frame - DB.index[0] restarts from zero DB.loc[first_non_nan-1-DB.index[0], Sname] = DB.loc[first_non_nan-DB.index[0], Sname]/(DB.loc[first_non_nan-DB.index[0], Growthrate]/100+1) first_non_nan -= 1 df = df.groupby('G').apply(lambda x: Fillbackwards(x, 'X', 'Y'))

2条回答

网友

1楼 · 编辑于 2024-10-01 00:19:41

您只是想用Y中的值填充X中的NaN值吗

import pandas as pd
import numpy as np


df = pd.DataFrame({'X':[np.nan, np.nan, 6, 6.7, np.nan, 5, 9, 10], 
                   'Y':[5.4, 5.7, 5.5, 6.1, 2.1, 1.5, 5.1, 2.1,],
                   'G': ['A', 'A', 'A', 'A', 'B', 'B', 'B', 'B']})

df['X'] = df['X'].fillna(df['Y'])

熊猫有一种天生的处理方式

网友

2楼 · 编辑于 2024-10-01 00:19:41

我已经意识到我的错误：我的函数末尾没有“return”，所以它为每个组返回null

另外，我在索引方面对上面的一些代码进行了调整，使其现在可以正常工作

无论如何：这里有一些代码，可以让您通过“向后增长”来填充序列“X”中缺失的值，分别使用序列“Y”中的增长率来填充“G”中的每组：

import pandas as pd
import numpy as np


df = pd.DataFrame({'X':[np.nan, np.nan, 6, 6.7, np.nan, 5, 9, 10], 
                   'Y':[5.4, 5.7, 5.5, 6.1, 2.1, 1.5, 5.1, 2.1,],
                   'G': ['A', 'A', 'A', 'A', 'B', 'B', 'B', 'B']})


def Fillbackwards(DB, Sname, Growthrate):
    first_non_nan = DB[Sname].isnull().idxmin()
    while first_non_nan > DB.index[0]:
        # Note the index of the group within the dataframe does not start at 0 as it's part of a larger frame - DB.index[0] restarts from zero
        DB.loc[first_non_nan-1, Sname] = DB.loc[first_non_nan, Sname]/(DB.loc[first_non_nan, Growthrate]/100+1)
        first_non_nan -= 1
    return DB

df = df.groupby('G').apply(lambda x: Fillbackwards(x, 'X', 'Y'))

check = df

相关问题更多 >

编程相关推荐

热门问题

热门文章