为什么会出错？

from neuraxle.base import BaseTransformer from neuraxle.pipeline import Pipeline from neuraxle.hyperparams.space import HyperparameterSpace from neuraxle.steps.numpy import NumpyRavel from neuraxle.steps.output_handlers import OutputTransformerWrapper from typing import List from sklearn.preprocessing import OneHotEncoder from neuraxle.pipeline import Pipeline from neuraxle.union import FeatureUnion from sklearn.impute import SimpleImputer # sklearn classifiers, and sklearn wrapper for neuraxle from neuraxle.steps.sklearn import SKLearnWrapper from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier from sklearn.linear_model import RidgeClassifier, LogisticRegression # neuraxle distributions from neuraxle.hyperparams.distributions import Choice, RandInt, Boolean, LogUniform from neuraxle.steps.flow import ChooseOneStepOf from neuraxle.base import BaseTransformer, ForceHandleMixin from neuraxle.metaopt.auto_ml import ValidationSplitter from neuraxle.metaopt.callbacks import ScoringCallback from sklearn.metrics import accuracy_score from neuraxle.metaopt.callbacks import MetricCallback from sklearn.metrics import f1_score, precision_score, recall_score from neuraxle.metaopt.auto_ml import InMemoryHyperparamsRepository from neuraxle.plotting import TrialMetricsPlottingObserver from neuraxle.metaopt.tpe import TreeParzenEstimatorHyperparameterSelectionStrategy from sklearn.model_selection import train_test_split import numpy as np import pandas as pd from neuraxle.metaopt.auto_ml import AutoML import os classifiers: List[BaseTransformer] = [ SKLearnWrapper(DecisionTreeClassifier(), HyperparameterSpace({ 'criterion': Choice(['gini', 'entropy']), 'splitter': Choice(['best', 'random']), 'min_samples_leaf': RandInt(2, 5), 'min_samples_split': RandInt(1, 3) })).set_name('DecisionTreeClassifier'), Pipeline([ OutputTransformerWrapper(NumpyRavel()), SKLearnWrapper(RidgeClassifier(), HyperparameterSpace({ 'alpha': Choice([(0.0, 1.0, 10.0), (0.0, 10.0, 100.0)]), 'fit_intercept': Boolean(), 'normalize': Boolean() })) ]).set_name('RidgeClassifier'), Pipeline([ OutputTransformerWrapper(NumpyRavel()), SKLearnWrapper(LogisticRegression(), HyperparameterSpace({ 'C': LogUniform(0.01, 10.0), 'fit_intercept': Boolean(), 'dual': Boolean(), 'penalty': Choice(['l1', 'l2']), 'max_iter': RandInt(20, 200) })) ]).set_name('LogisticRegression') ] class ColumnSelectTransformer(BaseTransformer, ForceHandleMixin): def __init__(self, required_columns): BaseTransformer.__init__(self) ForceHandleMixin.__init__(self) self.required_columns = required_columns def inverse_transform(self, processed_outputs): pass def fit(self, X, y=None): return self def transform(self, X): if not isinstance(X, pd.DataFrame): X = pd.DataFrame(X) return X[self.required_columns] columns = ['BEDCERT', 'RESTOT', 'INHOSP', 'CCRC_FACIL', 'SFF', 'CHOW_LAST_12MOS', 'SPRINKLER_STATUS', 'EXP_TOTAL', 'ADJ_TOTAL'] simple_features = Pipeline([ColumnSelectTransformer(columns), SimpleImputer(missing_values=np.nan, strategy='mean')]) categorical_features = Pipeline([ColumnSelectTransformer(['OWNERSHIP', 'CERTIFICATION']), OneHotEncoder(sparse=False) ]) business_features = FeatureUnion([simple_features, categorical_features]) p: Pipeline = Pipeline([ business_features, ChooseOneStepOf(classifiers) ]) validation_splitter = ValidationSplitter(test_size=0.20) scoring_callback = ScoringCallback( metric_function=accuracy_score, name='accuracy', higher_score_is_better=False, print_metrics=False ) callbacks = [ MetricCallback('f1', metric_function=f1_score, higher_score_is_better=True, print_metrics=False), MetricCallback('precision', metric_function=precision_score, higher_score_is_better=True, print_metrics=False), MetricCallback('recall', metric_function=recall_score, higher_score_is_better=True, print_metrics=False) ] hyperparams_repository = InMemoryHyperparamsRepository(cache_folder='cache') hyperparams_repository.subscribe(TrialMetricsPlottingObserver( plotting_folder_name='metric_results', save_plots=False, plot_trial_on_next=False, plot_all_trials_on_complete=True, plot_individual_trials_on_complete=False )) hyperparams_optimizer = TreeParzenEstimatorHyperparameterSelectionStrategy( number_of_initial_random_step=10, quantile_threshold=0.3, number_good_trials_max_cap=25, number_possible_hyperparams_candidates=100, prior_weight=0., use_linear_forgetting_weights=False, number_recent_trial_at_full_weights=25 ) tmpdir = 'cache' if not os.path.exists(tmpdir): os.makedirs(tmpdir) n_trials = 10 n_epochs = 10 auto_ml = AutoML( pipeline=p, validation_splitter=validation_splitter, refit_trial=True, n_trials=n_trials, epochs=n_epochs, cache_folder_when_no_handle=str(tmpdir), scoring_callback=scoring_callback, callbacks=callbacks, hyperparams_repository=hyperparams_repository ) def generate_classification_data(): # data_inputs, expected_outputs = make_classification( # n_samples=10000, # n_repeated=0, # n_classes=3, # n_features=4, # n_clusters_per_class=1, # class_sep=1.5, # flip_y=0, # weights=[0.5, 0.5, 0.5] # ) data = pd.read_csv('./ml-data/providers-train.csv', encoding='latin1') fine_counts = data.pop('FINE_CNT') fine_totals = data.pop('FINE_TOT') cycle_2_score = data.pop('CYCLE_2_TOTAL_SCORE') X_train, X_test, y_train, y_test = train_test_split( data, fine_counts > 1, test_size=0.20 ) return X_train, y_train, X_test, y_test X_train, y_train, X_test, y_test = generate_classification_data() auto_ml = auto_ml.fit(X_train, y_train) Output as follows:-

1条回答

网友

1楼 · 发布于 2024-10-03 02:45:39

以下几点可以帮助您解决当前问题：

“UnboundLocalError:local变量'repo_trial_split'在赋值之前引用”是一个错误，当AutoML循环中的管道内发生崩溃时会发生。你应该把真正的错误记录在你在这里发布的错误之上。另外，Neuraxe版本0.5.7（尚未发布，但可在github上获得）通过添加一个名为“continue\u loop\u on\u error”的参数来修复此问题，您应该将该参数设置为False
您似乎正在为ColumnSelectTransformer实例使用ForceHandleMixin。使用ForceHandleMixin意味着您应该定义以下函数：fit\u data\u container、\u transform\u data\u container和\u fit\u transform\u data\u container以及not fit/fit\u transform/transform
您可能需要编写一个NeuralAxix类来包装scikit的SimpleComputer

希望这对你有帮助。一旦你做了这些更改，请随时在这里发布更新，我很乐意帮助你。你也可以在NeuralAxex的Slack上发帖，我可能会更快地回答

干杯

另一方面，我将在未来几天发布0.5.7版本

相关问题更多 >

编程相关推荐

热门问题

热门文章