循环函数的优雅方法

2024-10-03 04:28:18 发布

您现在位置:Python中文网/ 问答频道 /正文

我有一个binning函数和mapping函数,我想遍历一个列表。为此我创建了一个简单的表达式。现在我要把它转换成一个泛型函数

import pandas as pd
import numpy as np

xls = pd.ExcelFile('/python_sample_data.xlsx') #Read all Excel
#xls.sheet_names # Read all sheet names
df_config = xls.parse('Config')#Read config data
df_input = xls.parse('Data') #Read Input Data
df_ss_arts = xls.parse('arts_scale') #Read individual scoring scales
df_ss_music = xls.parse('music_scale') #Read individual scoring scales
df_ss_vocal = xls.parse('vocal_scale') #Read individual scoring scales

# Scoring categorical variables
#parameters- list, df_config, df_input

list = ['arts', 'music','vocal']
for i in list:
    #Set the ref scale
    exec "{0}_refscale = df_config.loc[df_config.subjectid == '{0}', 'refscale'].tolist()".format(i)
    #Set the ref score
    exec "{0}_refscore = df_config.loc[df_config.subjectid == '{0}', 'refscore'].tolist()".format(i)
    #Read the refscale and ref score to dict
    exec "di_ss_{0} = df_ss_{0}.set_index(str({0}_refscale[0].split('.')[1]))[str({0}_refscore[0].split('.')[1])].to_dict()".format(i) 
    #Apply the mapping on a cat.variable
    exec "df_input['{0}'+'_score'] = df_input['{0}'].map(eval('di_ss_'+'{0}'))".format(i)
    #Imputation -Handling Nulls
    exec "imp_null_{0} = df_config.loc[df_config.subjectid == '{0}','imputation_nulls_miss'].tolist()[0]".format(i)
    exec "df_input.loc[df_input['{0}'].isnull() , '{0}_score'] = imp_null_{0}".format(i)

    #Imputation-Handling Invalids
    exec "imp_inv_{0} = df_config.loc[df_config.subjectid == '{0}','imputation_invalid'].tolist()[0]".format(i)
    exec "df_input.loc[df_input['{0}_score'].isnull() , '{0}_score'] = imp_inv_{0}".format(i)


# Scoring cont. variables
list = ['maths','science','social']
for i in list:
    exec "{0}_bin = df_config.loc[df_config.subjectid == '{0}', 'refscale'].tolist()[0]".format(i) 
    exec "{0}_bin_score = df_config.loc[df_config.subjectid == '{0}', 'refscore'].tolist()[0]".format(i)
    exec "df_input['{0}'+'_score'] = pd.cut(df_input['{0}'], eval({0}_bin), labels = eval({0}_bin_score))".format(i)

Tags: configformatdfreadinputparsexlsss