TypeError:只有具有一个元素的整数数组才能转换为索引

Traceback (most recent call last): File "/Users/.../srl/main.py", line 32, in <module> argident_sys.train_classifier() File "/Users/.../srl/identification.py", line 194, in train_classifier feat_selector.fit(train_argcands_feats,train_argcands_target) File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/sklearn/feature_selection/rfe.py", line 298, in fit ranking_ = rfe.fit(X[train], y[train]).ranking_ TypeError: only integer arrays with one element can be converted to an index

def train_classifier(self): # Get the argument candidates argcands = self.get_argcands(self.reader) # Extract the necessary features from the argument candidates train_argcands_feats = [] train_argcands_target = [] for argcand in argcands: train_argcands_feats.append(self.extract_features(argcand)) if argcand["info"]["label"] == "NULL": train_argcands_target.append("NULL") else: train_argcands_target.append("ARG") # Transform the features to the format required by the classifier self.feat_vectorizer = DictVectorizer() train_argcands_feats = self.feat_vectorizer.fit_transform(train_argcands_feats) # Transform the target labels to the format required by the classifier self.target_names = list(set(train_argcands_target)) train_argcands_target = [self.target_names.index(target) for target in train_argcands_target] ## Train the appropriate supervised model # Recursive Feature Elimination self.classifier = LogisticRegression() feat_selector = RFECV(estimator=self.classifier, step=1, cv=StratifiedKFold(train_argcands_target, 10)) feat_selector.fit(train_argcands_feats,train_argcands_target) print feat_selector.n_features_ print feat_selector.support_ print feat_selector.ranking_ print feat_selector.cv_scores_ return

train_argcands_feats = [{'head_lemma': u'Bras\xedlia', 'head': u'Bras\xedlia', 'head_postag': u'PROP'}, {'head_lemma': u'Pesquisa_Datafolha', 'head': u'Pesquisa_Datafolha', 'head_postag': u'N'}, {'head_lemma': u'dado', 'head': u'dado', 'head_postag': u'N'}, {'head_lemma': u'postura', 'head': u'postura', 'head_postag': u'N'}, {'head_lemma': u'maioria', 'head': u'maioria', 'head_postag': u'N'}, {'head_lemma': u'querer', 'head': u'quer', 'head_postag': u'V-FIN'}, {'head_lemma': u'PT', 'head': u'PT', 'head_postag': u'PROP'}, {'head_lemma': u'participar', 'head': u'participando', 'head_postag': u'V-GER'}, {'head_lemma': u'surpreendente', 'head': u'supreendente', 'head_postag': u'ADJ'}, {'head_lemma': u'Bras\xedlia', 'head': u'Bras\xedlia', 'head_postag': u'PROP'}, {'head_lemma': u'Pesquisa_Datafolha', 'head': u'Pesquisa_Datafolha', 'head_postag': u'N'}, {'head_lemma': u'revelar', 'head': u'revela', 'head_postag': u'V-FIN'}, {'head_lemma': u'recusar', 'head': u'recusando', 'head_postag': u'V-GER'}, {'head_lemma': u'maioria', 'head': u'maioria', 'head_postag': u'N'}, {'head_lemma': u'PT', 'head': u'PT', 'head_postag': u'PROP'}, {'head_lemma': u'participar', 'head': u'participando', 'head_postag': u'V-GER'}, {'head_lemma': u'surpreendente', 'head': u'supreendente', 'head_postag': u'ADJ'}, {'head_lemma': u'Bras\xedlia', 'head': u'Bras\xedlia', 'head_postag': u'PROP'}, {'head_lemma': u'Pesquisa_Datafolha', 'head': u'Pesquisa_Datafolha', 'head_postag': u'N'}, {'head_lemma': u'revelar', 'head': u'revela', 'head_postag': u'V-FIN'}, {'head_lemma': u'governo', 'head': u'Governo', 'head_postag': u'N'}, {'head_lemma': u'de', 'head': u'de', 'head_postag': u'PRP'}, {'head_lemma': u'governo', 'head': u'Governo', 'head_postag': u'N'}, {'head_lemma': u'recusar', 'head': u'recusando', 'head_postag': u'V-GER'}, {'head_lemma': u'maioria', 'head': u'maioria', 'head_postag': u'N'}, {'head_lemma': u'querer', 'head': u'quer', 'head_postag': u'V-FIN'}, {'head_lemma': u'PT', 'head': u'PT', 'head_postag': u'PROP'}, {'head_lemma': u'surpreendente', 'head': u'supreendente', 'head_postag': u'ADJ'}, {'head_lemma': u'Bras\xedlia', 'head': u'Bras\xedlia', 'head_postag': u'PROP'}, {'head_lemma': u'Pesquisa_Datafolha', 'head': u'Pesquisa_Datafolha', 'head_postag': u'N'}, {'head_lemma': u'revelar', 'head': u'revela', 'head_postag': u'V-FIN'}, {'head_lemma': u'muito', 'head': u'Muitas', 'head_postag': u'PRON-DET'}, {'head_lemma': u'prioridade', 'head': u'prioridades', 'head_postag': u'N'}, {'head_lemma': u'com', 'head': u'com', 'head_postag': u'PRP'}, {'head_lemma': u'prioridade', 'head': u'prioridades', 'head_postag': u'N'}] train_argcands_target = ['NULL', 'ARG', 'ARG', 'ARG', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'ARG', 'ARG', 'ARG', 'ARG', 'NULL', 'NULL', 'NULL', 'NULL', 'ARG', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'ARG', 'NULL', 'NULL', 'NULL', 'NULL', 'ARG', 'ARG', 'NULL', 'NULL']

2条回答

网友

1楼 · 编辑于 2024-09-20 22:53:50

如果还有人感兴趣

我用CountVectorizer在一些非常相似的东西上，它给了我同样的错误。我意识到矢量器给了我一个COO稀疏矩阵，它基本上是一个坐标列表。不能通过行索引访问COO矩阵中的元素。最好将其转换为按行索引的CSR矩阵（压缩稀疏行）。转换可以很容易地完成。不需要其他改变，这对我有效。

网友

2楼 · 编辑于 2024-09-20 22:53:50

我终于解决了这个问题。必须做两件事：

train_argcands_target是一个列表，它必须是一个numpy数组。我很惊讶以前我直接使用估计器时效果很好。
出于某种原因（我还不知道为什么），如果使用DictVectorizer创建的稀疏矩阵，它也不会工作。我不得不“手动”地将每个功能字典转换为一个只包含代表每个功能值的整数的功能数组。转换过程与我在目标值的代码中给出的过程类似。

感谢所有想帮忙的人！

相关问题更多 >

编程相关推荐

热门问题

热门文章