Numpy标量数组

2024-09-28 23:31:13 发布

您现在位置:Python中文网/ 问答频道 /正文

我正在处理这个数据集: https://archive.ics.uci.edu/ml/datasets/breast+cancer+wisconsin+(original)

我最初编写了以下代码片段:

 import bso as opt
 from sklearn import svm
 import numpy as np

 with open("breastcancer/train_data.txt") as f:
    tr_d=np.array([[float(d) for d  in data.split(',')] for data in 
    f.read().splitlines()])
 with open("breastcancer/test_data.txt") as f:
    te_d=np.array([[float(d) for d  in data.split(',')] for data in 
    f.read().splitlines()])

 with open("breastcancer/train_data_label.txt") as f:
    tr_l=np.array([int(data) for data in f.read().splitlines()])

 with open("breastcancer/test_data_label.txt") as f:
    te_l=np.array([int(data) for data in f.read().splitlines()])


  def check(gen,tr_d,tr_l,te_d,te_l):
    mask=np.array(gen) > 0
    al_data=np.array([al[mask] for al in tr_d])
    al_test_data=np.array([al[mask] for al in te_d])
    res=svm.LinearSVC().fit(al_data,tr_l).predict(al_test_data)
    score=np.count_nonzero(te_l==res)/len(te_l)
    return score

  gen1=[1]*9
  print("all_feature:\n\t{0}   {1}  
{2}".format("".join(map(str,gen1)),check(gen1,tr_d,tr_l,te_d,te_l),len(gen1)))

class Evaluate:
    def __init__(self):
    self.train_l=tr_l
    self.train_d=tr_d
    self.test_l=te_l
    self.test_d=te_d
    self.dim=len(tr_d[0])
def evaluate(self,gen):
    mask=np.array(gen) > 0
    print (mask)
    al_data=np.array([al[mask] for al in self.train_d])
    al_test_data=np.array([al[mask] for al in self.test_d])
    res=svm.LinearSVC().fit(al_data,self.train_l).predict(al_test_data)
    score=np.count_nonzero(self.test_l==res)/len(self.test_l)
    return score
def check_dimentions(self,dim):
    if dim==None:
        return len(self.train_d[0])
    else:
        return dim

这对我来说非常有效。你知道吗

为了方便起见,我开始使用熊猫,这是我修改后的代码:

import bso as opt
from sklearn import svm
import numpy as np
import sys
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

try:

    data_df = pd.read_csv("breast-cancer-wisconsin.csv")

except Exception as e:
    print(e)
   sys.exit(1)
data_df.columns = ['id', 'f0', 'f1','f2','f3','f4','f5','f6','f7','f8','label']
data_df.drop(['id'],axis=1,inplace=True)
data_df['label'] = [0 if x == 2 else 1 for x in data_df['label']]
X = data_df.drop(['label'],axis=1)
y = data_df.label

tr_d, te_d, tr_l, te_l = train_test_split(X,y,test_size=0.3,random_state=42)





def check(gen,tr_d,tr_l,te_d,te_l):
    mask=np.array(gen) > 0
    al_data=np.array([al[mask] for al in tr_d])
    al_test_data=np.array([al[mask] for al in te_d])
    rfc= RandomForestClassifier(n_estimators=10)
    rfc.fit(tr_d,tr_l)
    score = rfc.score(te_d,te_l)
    return score

gen1=[1]*9
print("all_feature:\n\t{0}   {1}  {2}".format("".join(map(str,gen1)),check(gen1,tr_d,tr_l,te_d,te_l),len(gen1)))


class Evaluate:
   def __init__(self):
       self.train_l=tr_l
       self.train_d=tr_d
       self.test_l=te_l
       self.test_d=te_d
       self.dim=9
   def evaluate(self,gen):
        mask=np.array(gen) > 0
        al_data=np.array([al[mask] for al in self.train_d])
        al_test_data=np.array([al[mask] for al in self.test_d])
        rfc= RandomForestClassifier(n_estimators=10)
        res=rfc.fit(tr_d,tr_l)
        score = rfc.score(te_d,te_l)
    return score 
    def check_dimentions(self,dim):
        if dim==None:
            return 9
        else:
            return dim

我现在得到这个错误:

 Traceback (most recent call last):
   File "script.py", line 42, in <module>
   print("all_feature:\n\t{0}   {1}  {2}".format("".join(map(str,gen1)),check(gen1,tr_d,tr_l,te_d,te_l),len(gen1)))
   File "script.py", line 31, in check
   al_data=np.array([al[mask] for al in tr_d])
   File "script.py", line 31, in <listcomp>
   al_data=np.array([al[mask] for al in tr_d])
   TypeError: only integer scalar arrays can be converted to a scalar index

我在第31行得到了这个,任何关于如何解决这个问题的帮助,这样我就可以在后面的代码中使用这个功能了。你知道吗


Tags: intestimportselffordataasnp