GridSearchCV中的自定义分数函数：访问未缩放的功能，而不是在mod中

# building pipelines from sklearn.preprocessing import OneHotEncoder from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler num_pipeline = Pipeline([ ('std_scaler', StandardScaler()), ],verbose=True) cat_pipeline = Pipeline([ ('one_hot_enc',OneHotEncoder(sparse=False,handle_unknown='ignore')), ],verbose=True) from sklearn.compose import ColumnTransformer full_pipeline = ColumnTransformer([ ("num", num_pipeline, df_num_reg_attributes), ("cat", cat_pipeline, df_cat_attributes) ]) # fitting pipelines X_train_prepared_reg = full_pipeline.fit_transform(X_res_df) listColPrepared=np.concatenate((df_num_reg_attributes,full_pipeline.named_transformers_['cat'].named_steps['one_hot_enc'].get_feature_names())) scalerX_train = full_pipeline.named_transformers_['num'].named_steps['std_scaler'] X_test_prepared_reg = full_pipeline.transform(X_test) y_train = y_balanced # scorer def my_scorer(clf, X, y_true): DCWorkCost = 5.00 OPWorkCost = 2.50 mergedDataset = pd.DataFrame(data=X,index=np.arange(0,len(X)),columns=listColPrepared) ### this is the column I want -- I tried to unscale the data to access the column but it did not work mergedDataset['Margin'] = scalerX_train.inverse_transform(mergedDataset['Margin']) mergedDataset['True'] = y_true mergedDataset['Pred'] = clf.predict(X) # rest of the scorer......... return revenue # grid search sgd_clf_cv = SGDClassifier(max_iter=5,tol=-np.infty, random_state=42) parameters = {'class_weight':({0:.1,1:.9},{0:.2,1:.8},{0:.3,1:.7},{0:.25,1:.75},{0:.15,1:.85},{0:.35,1:.65},{0:.4,1:.6})} grid = GridSearchCV(estimator=sgd_clf_cv, param_grid=parameters, scoring=my_scorer,verbose=10) grid.fit(X_train_prepared_reg, y_train) grid.best_estimator_

1条回答

网友

1楼 · 发布于 2024-09-30 16:27:43

需要两个步骤才能拥有自己的自定义评分函数，该函数还可以访问另一个常量对象。你知道吗

您的自定义分数函数需要传递给^{}。评分函数的格式需要是def f(y_true, y_predicted)
您的score函数需要第三个命名参数，您可以在其中添加其他对象。你知道吗

在您的情况下，代码应该类似于

def my_scorer(y_true, y_pred, scaler=None):
    DCWorkCost = 5.00
    OPWorkCost = 2.50
    mergedDataset = pd.DataFrame(data=X, index=np.arange(0, len(y_true)), columns=listColPrepared)
    ### this is the column I want   I tried to unscale the data to access the column but it did not work    
    mergedDataset['Margin'] = scaler.inverse_transform(mergedDataset['Margin'])
    mergedDataset['True'] = y_true
    mergedDataset['Pred'] = y_pred
   # rest of the scorer.........

    return revenue

...
scalerX_train = full_pipeline.named_transformers_['num'].named_steps['std_scaler']
...
sgd_clf_cv = SGDClassifier(max_iter=5,tol=-np.infty, random_state=42)
...
custom_score = make_scorer(my_scorer, scaler=scalarX_train)
...
grid = GridSearchCV(estimator=sgd_clf_cv, param_grid=parameters, scoring=custom_score, verbose=10)

相关问题更多 >

编程相关推荐

热门问题

热门文章