索引语句和索引

test_tabData = test_data.groupby('PID').apply(tabulateHireSeps) def tabulateHireSeps(df): # We need to create the job, hire and separation column within the data frame. df['estJobs'] = 0 df['tot_sep'] = 0 df['tot_hir'] = 0 # These constants enable the logical statements below to be easier read # The naming acts a self-description employee = (df.EmpStat == 1) non_employee = (df.EmpStat != 1) new_emp = (df.newEmp == 2) new_occ = (df.newOcc == 2) maxMonth = max(df.Month) minMonth = min(df.Month) if ((maxMonth == 12) & (minMonth == 01)): maxMonth = 01 minMonth = 12 lengthMonth = len(df.Month) if (lengthMonth > 1): # If an individual went from unemployed in the previous month if ((df['Month'] == minMonth) & (non_employee)).any(): # to employed in the current month, we calculate that as a hire df.ix[((df.Month == maxMonth) & (employee)), 'tot_hir'] = 1 # If an individual went from employed in the previous month elif ((df['Month'] == minMonth) & (employee)).any(): # to employed in the current month with a new employer if ((df['Month'] == maxMonth) & (employee) & (new_emp)).any(): # and have a new occupation if (new_occ): # we calculate a hire in the current month df.ix[(df['Month'] == maxMonth), 'tot_hir'] = 1 # we calculate a seperation in the previous month df.ix[(df['Month'] == minMonth), 'tot_sep'] = 1 else: # we calculate a hire and seperation for the current month df.ix[(df['Month'] == maxMonth), ['tot_hir', 'tot_sep']] = 1, 1 else: # to unemployed in the current month we calcualte a seperation df.ix[((df['Month'] == maxMonth) & (non_employee)), 'tot_sep'] = 1 else: df.ix[(df['Month'].isin(maxMonth) & (employee) & ((new_occ) | (new_emp))), \ 'tot_hir'] = 1 # We keep only those columns we need to move through the process df = df[['Year', 'Month', 'OccID', 'ageGrp', 'Sex', 'Race', 'Hisp', 'educGrp', 'OutWgt', 'LongWgt', 'FinWgt', 'Sample', 'PID', 'COW', 'estJobs', 'tot_sep', 'tot_hir']] return df

data_set = [[2015, 05, 9130, 4, 1, 1, 2, 1, 0, 51727030, 36527035, 3, 1, 1001, 1, 1, 1], [2015, 04, 9130, 4, 1, 1, 2, 1, 0, 51897025, 36164620, 2, 1, 1001, 2, 1, 7], [2015, 05, 9130, 4, 1, 1, 2, 1, 0, 51727030, 36527035, 3, 1, 1002, 2, 1, 7], [2015, 04, 9130, 4, 1, 1, 2, 1, 0, 51897025, 36164620, 2, 1, 1002, 1, 1, 1], [2015, 05, 9130, 4, 1, 1, 2, 1, 0, 51727030, 36527035, 3, 2, 1003, 1, 2, 1], [2015, 04, 9130, 4, 1, 1, 2, 1, 0, 51897025, 36164620, 2, 1, 1003, 1, 1, 1]] test_data = pandas.DataFrame(data = data_set, columns = ['Year', 'Month', 'OccID', 'ageGrp', 'Sex', 'Race', 'Hisp', 'educGrp', 'OutWgt', 'LongWgt', 'FinWgt', 'Sample', 'newEmp', 'PID', 'EmpStat', 'newOcc', 'COW'])

expected_set = [[2015, 05, 9130, 4, 1, 1, 2, 1, 0, 51727030, 36527035, 3, 1001, 1, 0, 0, 1], [2015, 04, 9130, 4, 1, 1, 2, 1, 0, 51897025, 36164620, 2, 1001, 7, 0, 0, 0], [2015, 05, 9130, 4, 1, 1, 2, 1, 0, 51727030, 36527035, 3, 1002, 1, 0, 1, 0], [2015, 04, 9130, 4, 1, 1, 2, 1, 0, 51897025, 36164620, 2, 1002, 7, 0, 0, 0], [2015, 05, 9130, 4, 1, 1, 2, 1, 0, 51727030, 36527035, 3, 1003, 1, 0, 0, 1], [2015, 04, 9130, 4, 1, 1, 2, 1, 0, 51897025, 36164620, 2, 1003, 7, 0, 1, 0]] expected_data = pandas.DataFrame(data = expected_set, columns = ['Year', 'Month', 'OccID', 'ageGrp', 'Sex', 'Race', 'Hisp', 'educGrp', 'OutWgt', 'LongWgt', 'FinWgt', 'Sample', 'PID', 'COW', 'estJobs', 'tot_sep', 'tot_hir'])

Year Month OccID ageGrp Sex Race Hisp educGrp OutWgt LongWgt \ 2015 5 9130 4 1 1 2 1 0 51727030 2015 4 9130 4 1 1 2 1 0 51897025 2015 5 9130 4 1 1 2 1 0 51727030 2015 4 9130 4 1 1 2 1 0 51897025 2015 5 9130 4 1 1 2 1 0 51727030 2015 4 9130 4 1 1 2 1 0 51897025 FinWgt Sample PID COW estJobs tot_sep tot_hir 36527035 3 1001 1 0 0 1 36164620 2 1001 7 0 0 0 36527035 3 1001 1 0 0 1 36164620 2 1001 7 0 0 0 36527035 3 1001 1 0 0 1 36164620 2 1001 7 0 0 0

1条回答

网友

1楼 · 发布于 2024-09-28 21:54:48

您使用的是.apply，它在数据帧的每一行上调用tabulateHireSeps。在函数中，您正在重置数据帧。你知道吗

试试这个：

def tabulateHireSeps(df):
# We need to create the job, hire and separation column within the data frame. 
df['estJobs']   = 0
df['tot_sep']   = 0
df['tot_hir']   = 0 

# These constants enable the logical statements below to be easier read
# The naming acts a self-description
employee     = (df.EmpStat == 1)
non_employee = (df.EmpStat != 1)
new_emp      = (df.newEmp  == 2)
new_occ      = (df.newOcc  == 2)

maxMonth    = max(df.Month)
minMonth    = min(df.Month)
if ((maxMonth == 12) & (minMonth == 1)):
    maxMonth = 1
    minMonth = 12

lengthMonth = len(df.Month)
if (lengthMonth > 1):
    # If an individual went from unemployed in the previous month 
    if ((df['Month'] == minMonth) & (non_employee)).any():
        # to employed in the current month, we calculate that as a hire
        df.ix[((df.Month == maxMonth) & (employee)), 'tot_hir'] = 1

    # If an individual went from employed in the previous month
    elif ((df['Month'] == minMonth) & (employee)).any():
        # to employed in the current month with a new employer 
        if ((df['Month'] == maxMonth) & (employee) & (new_emp)).any():
            # and have a new occupation
            if (new_occ).any():
                # we calculate a hire in the current month
                df.ix[(df['Month'] == maxMonth), 'tot_hir'] = 1
                # we calculate a seperation in the previous month
                df.ix[(df['Month'] == minMonth), 'tot_sep'] = 1
            else:
                # we calculate a hire and seperation for the current month
                df.ix[(df['Month'] == maxMonth), ['tot_hir', 'tot_sep']] = 1, 1

        else:
            # to unemployed in the current month we calcualte a seperation
            df.ix[((df['Month'] == maxMonth) & (non_employee)), 'tot_sep'] = 1

else:
    df.ix[(df['Month'].isin(maxMonth) & (employee) & ((new_occ) | (new_emp))), \
        'tot_hir'] = 1  

## We keep only those columns we need to move through the process
#df  = df[['Year', 'Month', 'OccID', 'ageGrp', 'Sex', 'Race', 'Hisp', 'educGrp', 
#          'OutWgt', 'LongWgt', 'FinWgt', 'Sample', 'PID', 'COW', 'estJobs', 
#          'tot_sep', 'tot_hir']]    

return df

相关问题更多 >

编程相关推荐

热门问题

热门文章