我想建立一个sklearn管道(更大管道的一部分),它:
我使用了this管道示例:
自定义transformerminin的this示例:
我在第4步得到一个错误(如果我评论第4步,则没有错误):
AttributeError Traceback (most recent call last) in () ----> 1 X_train_transformed = pipe.fit_transform(X_train) .... AttributeError: 'numpy.ndarray' object has no attribute 'fit'
我的代码:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.decomposition import TruncatedSVD
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.compose import ColumnTransformer
# does nothing, but is here to collect numerical columns
class nothing(BaseEstimator, TransformerMixin):
def fit(self, X, y=None):
return self
def transform(self, X):
return X
class Aggregator(BaseEstimator, TransformerMixin):
def fit(self, X, y=None):
return self
def transform(self, X):
X = pd.DataFrame(X)
X = X.rename(columns = {0 :'InvoiceNo', 1 : 'amount', 2:'Quantity',
3:'UnitPrice',4:'CustomerID' })
X['InvoiceNo'] = X['InvoiceNo'].astype('int')
X['Quantity'] = X['Quantity'].astype('float64')
X['UnitPrice'] = X['UnitPrice'].astype('float64')
aggregations = dict()
for col in range(5, X.shape[1]-1) :
aggregations[col] = 'max'
aggregations.update({ 'CustomerID' : 'first',
'amount' : "sum",'Quantity' : 'mean', 'UnitPrice' : 'mean'})
# aggregating all basket lines
result = X.groupby('InvoiceNo').agg(aggregations)
# add number of lines in the basket
result['lines_nb'] = X.groupby('InvoiceNo').size()
return result
numeric_features = ['InvoiceNo','amount', 'Quantity', 'UnitPrice',
'CustomerID']
numeric_transformer = Pipeline(steps=[('nothing', nothing())])
categorical_features = ['StockCode', 'Country']
preprocessor = ColumnTransformer(
[
# 'num' transformer does nothing, but is here to
# collect numerical columns
('num', numeric_transformer ,numeric_features ),
('cat', Pipeline([
('onehot', OneHotEncoder(handle_unknown='ignore')),
('best', TruncatedSVD(n_components=100)),
]), categorical_features)
]
)
# edit with Artem solution
# aggregator = ('agg', Aggregator())
pipe = Pipeline(steps=[
('preprocessor', preprocessor),
# edit with Artem solution
# ('aggregator', aggregator),
('aggregator', Aggregator())
])
X_train_transformed = pipe.fit_transform(X_train)
管道步骤在from('name',Class)中,但原始任务本质上有:
这使得它
('aggregator', ('agg', Aggregator()))
相关问题 更多 >
编程相关推荐