为什么在scikitlearner中使用svm模型识别MNIST中的手写体数字时效果不理想？

# coding:utf-8 from numpy import * from sklearn import svm from sklearn.externals import joblib def loadData(fileName): fr = open(fileName) numFeat = len(fr.readline().split(',')) - 1 featMatTrain = [] labelVecTrain = [] featMatTest = [] labelVecTest = [] i = 0 for line in fr.readlines(): i = i + 1 if i != 1 and i <=30000: curLine = line.strip().split(',') curLine = map(float,curLine) labelVecTrain.append(curLine[0]) featMatTrain.append(curLine[1:numFeat]) if i >= 30000: curLine = line.strip().split(',') curLine = map(float,curLine) labelVecTest.append(curLine[0]) featMatTest.append(curLine[1:numFeat]) print '*************************** the training data we got: *****************************' print 'featMat:''type of element:',type(featMatTrain) ,'shape of featMat:', shape(featMatTrain) print 'labelVec:''type of element:',type(labelVecTrain),'shape of labelVec:',shape(labelVecTrain) print 'featMat:''type of element:',type(featMatTest) ,'shape of featMat:', shape(featMatTest) print 'labelVec:''type of element:',type(labelVecTest),'shape of labelVec:',shape(labelVecTest) return array(featMatTrain),array(labelVecTrain),array(featMatTest),array(labelVecTest) featMatTrain,labelVecTrain,featMatTest,labelVecTest= loadData('C:/Users/sun/Desktop/train.csv') clf = svm.SVC() clf.fit(featMatTrain,labelVecTrain) joblib.dump(clf,'svmModel.pkl') print '***************** we finish training **********************' labelVecPredict1 = clf.predict(featMatTrain) labelVecPredict2 = clf.predict(featMatTest) print '***************** we finish predicting **********************' count1 = 0.0 for i in range(len(featMatTrain)): if labelVecPredict1[i] == labelVecTrain[i]: count1 = count1 + 1 print '************* the result of predicting training set ***************' print 'the number of figures that predict right: ',count1 print 'the accuary is :',count1/len(featMatTrain) count2 = 0.0 for i in range(len(featMatTest)): if labelVecPredict2[i] == labelVecTest[i]: count2 = count2 + 1 print '************ the result to predicting testing set ************' print 'the number of figures that predict right:',count2 print 'the accuary is:',count2/len(featMatTest)

2条回答

网友

1楼 · 编辑于 2024-06-28 11:35:05

有一些原因会导致模型过拟合。你知道吗

你在小数据集上使用了太强大的模型。也许你可以试试线性模型。你知道吗
训练数据集太小，无法进行训练，因此可以从测试数据集中添加一些训练数据。你知道吗

如何划分MINST数据集？你可能有不平衡的数据集削减。你知道吗

网友

2楼 · 编辑于 2024-06-28 11:35:05

过度拟合对结果的影响绝对很大。SVC是“强学习者”，这意味着有了足够的特性，它就可以过拟合任何数据集（其他强学习者包括决策树和最近邻模型）。你知道吗

要解决这个问题，可以使用更简单的模型或模型平均法。简单的模型包括LinearSVC；模型平均包括BaggingClassifier和RandomForestClassifier。你知道吗

相关问题更多 >

编程相关推荐

热门问题

热门文章