具有自定义内核的GridSearchCV

import numpy as np from sklearn import svm from sklearn import datasets from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score def tanimotoKernel(xs, ys): a = 0 b = 0 for x, y in zip(xs, ys): a += min(x, y) b += max(x, y) return a / b def tanimotoLambdaKernel(xs,ys, gamma = 0.01): return np.exp(gamma * tanimotoKernel(xs,ys)) / (np.exp(gamma) - 1) def GramMatrix(X1, X2, K_function=tanimotoLambdaKernel): gram_matrix = np.zeros((X1.shape[0], X2.shape[0])) for i, x1 in enumerate(X1): for j, x2 in enumerate(X2): gram_matrix[i, j] = K_function(x1, x2) return gram_matrix X, y = datasets.load_iris(return_X_y=True) x_train, x_test, y_train, y_test = train_test_split(X, y) clf.fit(x_train, y_train) accuracy_score(clf.predict(x_test), y_test) clf = svm.SVC(kernel=GramMatrix)

1条回答

网友

1楼 · 发布于 2024-10-02 22:29:44

这似乎不可能直接实现；内置内核的参数都经过了烘焙

一种方法是自己传递不同的内核。这有点复杂，因为您的嵌套函数定义了内核，所以我使用partial：

from functools import partial
param_space = {
    kernel: [
        partial(
            GramMatrix,
            K_function=partial(
                tanimotoLambdaKernel,
                gamma=g,
            )
        )
        for g in <your list of gammas>
    ]
}

另一种方法是定制类。这在hyperparameter搜索中更清晰，因为“hyperparameter”可以是gamma，但在维护类时可能需要做更多的工作。在这种情况下，我通过重用gamma参数来避免重写__init__，并将内核设置为fit时间，以便set_params对gamma正常工作

class SVC_tanimoto(svm.SVC):
    """SVC with a Tanimoto kernel."""
    def fit(self, X, y, sample_weight=None):
        self.kernel = partial(
            GramMatrix,
            K_function=partial(
                tanimotoLambdaKernel,
                gamma=self.gamma,
            )
        )
        super().fit(X, y, sample_weight=sample_weight)
        return self

相关问题更多 >

编程相关推荐

热门问题

热门文章