打印k值位于GridSearchCV的param_网格内的SelectKBest的功能名称

#THE FIRST FEATURE HAS TO BE THE LABEL featurelist = ['poi', 'exercised_stock_options', 'expenses', 'from_messages', 'from_poi_to_this_person', 'from_this_person_to_poi', 'other', 'restricted_stock', 'salary', 'shared_receipt_with_poi', 'to_messages', 'total_payments', 'total_stock_value', 'ratio_from_poi', 'ratio_to_poi'] enronml = pd.DataFrame(enron[['poi', 'exercised_stock_options', 'expenses', 'from_messages', 'from_poi_to_this_person', 'from_this_person_to_poi', 'other', 'restricted_stock', 'salary', 'shared_receipt_with_poi', 'to_messages', 'total_payments', 'total_stock_value', 'ratio_from_poi', 'ratio_to_poi']].copy()) enronml = enronml.to_dict(orient="index") dataset = enronml #featureFormat, takes the dictionary as the dataset, converts the first feature in featurelist into label data = featureFormat(dataset, featurelist, sort_keys = True) labels, features = targetFeatureSplit(data) from sklearn.cross_validation import train_test_split from sklearn.naive_bayes import GaussianNB from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.20, random_state=0) pca = PCA() gnba = GaussianNB() steps = [('scaler', MinMaxScaler()), ('best', SelectKBest()), ('pca', pca), ('gnba', gnba)] pipeline = Pipeline(steps) parameters = [ { 'best__k':[3], 'pca__n_components': [1,2] }, { 'best__k':[4], 'pca__n_components': [1,2,3] }, { 'best__k':[5], 'pca__n_components': [1,2,3,4] }, ] cv = StratifiedShuffleSplit(test_size=0.2, random_state=42) gnbawithpca = GridSearchCV(pipeline, param_grid = parameters, cv=cv, scoring="f1") gnbawithpca.fit(X_train,y_train) means = gnbawithpca.cv_results_['mean_test_score'] stds = gnbawithpca.cv_results_['std_test_score'] for mean, std, params in zip(means, stds, gnbawithpca.cv_results_['params']): print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))

1条回答

网友

1楼 · 发布于 2024-09-29 18:52:19

已解决

定义要在GridSearchCV中使用的管道时，请为每个步骤命名：

steps = [('scaler', MinMaxScaler()),
     ('best', SelectKBest()),
     ('pca', pca),
     ('gnba', gnba)]

pipeline = Pipeline(steps)

你这么做有两个原因：

因此，您可以在参数网格中定义参数（需要名称来标识为哪个步骤定义参数）。在

因此您可以从GridSearchCV对象访问步骤的属性（这将回答您的问题）。在

^{pr2}$

相关问题更多 >

编程相关推荐

热门问题

热门文章