如何为DecisionTreeClassifier Sciki准备数据

,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,class 0,"(16.927, 41.333]", State-gov,"(10806.885, 504990]", Bachelors,"(12, 16]", Never-married, Adm-clerical, Not-in-family, White, Male,"(0, 5000]",,"(30, 50]", United-States, <=50K 1,"(41.333, 65.667]", Self-emp-not-inc,"(10806.885, 504990]", Bachelors,"(12, 16]", Married-civ-spouse, Exec-managerial, Husband, White, Male,,,"(0, 30]", United-States, <=50K 2,"(16.927, 41.333]", Private,"(10806.885, 504990]", HS-grad,"(8, 12]", Divorced, Handlers-cleaners, Not-in-family, White, Male,,,"(30, 50]", United-States, <=50K 3,"(41.333, 65.667]", Private,"(10806.885, 504990]", 11th,"(-1, 8]", Married-civ-spouse, Handlers-cleaners, Husband, Black, Male,,,"(30, 50]", United-States, <=50K 4,"(16.927, 41.333]", Private,"(10806.885, 504990]", Bachelors,"(12, 16]", Married-civ-spouse, Prof-specialty, Wife, Black, Female,,,"(30, 50]", Cuba, <=50K

def catToInt(df): mapper={} categorical_list = list(df.columns.values) newdf = pd.DataFrame(columns=categorical_list) #Converting Categorical Data for x in categorical_list: mapper[x]=preprocessing.LabelEncoder() for x in categorical_list: someinput = df.__getattr__(x) newcol = mapper[x].fit_transform(someinput) newdf[x]= newcol return newdf

print cross_val_score(model, newdf, newdf[:,14], cv=10) File "C:\Python27\lib\site-packages\pandas\core\frame.py", line 1787, in __getitem__ return self._getitem_column(key) File "C:\Python27\lib\site-packages\pandas\core\frame.py", line 1794, in _getitem_column return self._get_item_cache(key) File "C:\Python27\lib\site-packages\pandas\core\generic.py", line 1077, in _get_item_cache res = cache.get(item) TypeError: unhashable type

1条回答

网友

1楼 · 发布于 2024-10-04 15:28:51

这就是我根据上面的评论和更多的搜索得到的解决方案。我得到了预期的结果，但我知道会有更精细的方法来做到这一点。在

from sklearn.tree import DecisionTreeClassifier
from sklearn.cross_validation import cross_val_score
import pandas as pd
from sklearn import preprocessing
def main():
    df, _ = readCSVFile("../Data/discretized.csv")
    newdf, classl = catToInt(df)
    model = DecisionTreeClassifier()
    print cross_val_score(model, newdf, classl, cv=10)


def readCSVFile(filepath):
    df = pd.read_csv(filepath, index_col=0)
    (_, _, sufix) = filepath.rpartition('\\')
    (prefix, _, _) =sufix.rpartition('.')
    print "csv read and converted to dataframe !!"
    # df['class'] = df['class'].apply(replaceLabel)
    return df, prefix

def catToInt(df):
    # replace the Nan with "NA" which acts as a unique category
    df.fillna("NA", inplace=True)
    mapper={}

    # make list of all column headers 
    categorical_list = list(df.columns.values)

    #exclude the class column
    categorical_list.remove('class')
    newdf = pd.DataFrame(columns=categorical_list)

    #Converting Categorical Data to integer labels
    for x in categorical_list:
        mapper[x]=preprocessing.LabelEncoder()
    for x in categorical_list:
        newdf[x]= mapper[x].fit_transform(df.__getattr__(x))

    # make a class series encoded : 
    le = preprocessing.LabelEncoder()
    myclass = le.fit_transform(df.__getattr__('class'))

   #newdf is the dataframe with all columns except classcoumn and myclass is the class column 
    return newdf, myclass

main()

除了上面的评论之外，还有一些对我有帮助的链接：

输出：

^{pr2}$

它可能会帮助像我这样的sklearn新手用户。欢迎提供建议/编辑和更好的答案。在

相关问题更多 >

编程相关推荐

热门问题

热门文章