相互信息分类单独使用和通过选择K测试时的不同分数

import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.feature_selection import SelectKBest from sklearn.feature_selection import mutual_info_classif df = pd.DataFrame({'A':[1,2,3,4,5], 'B':[1,3,3,3,1], 'C':[1,2,3,4,5], 'D':[1,2,3,4,5], 'E':[1,2,3,4,5], 'target':[1,0,0,0,1]}) X = df.drop(['target'],axis=1) y = df.target threshold = 3 # the number of most relevant features

high_score_features1 = [] feature_scores = mutual_info_classif(X, y, random_state=0, n_neighbors=3,discrete_features='auto') for score, f_name in sorted(zip(feature_scores, X.columns), reverse=True)[:threshold]: print(f_name, score) high_score_features1.append(f_name) feature_scores

def my_func(X, y): return mutual_info_classif(X, y, random_state=0, n_neighbors=3, discrete_features='auto') high_score_features1=[] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) f_selector = SelectKBest(score_func=my_func, k=threshold) f_selector.fit(X_train, y_train) for score, f_name in sorted(zip(f_selector.scores_, X.columns), reverse=True)[:threshold]: print(f_name, score) high_score_features1.append(f_name) f_selector.scores_

1条回答

网友

1楼 · 发布于 2024-09-28 01:27:05

看起来，直接使用mutual_info_classif模型和使用SelectKBest模型得到不同结果的原因是，在不同的数据集上拟合它们。您的SelectKBest模型正在训练集上拟合，而您的mutual_info_classif模型正在拟合整个数据。如果在训练数据上拟合两个模型，则两个模型给出相同的输出

相关问题更多 >

编程相关推荐

热门问题

热门文章