“值错误:x和y必须具有相同的第一维度”,在pylab中绘制图形时出错

2024-05-17 17:24:20 发布

您现在位置:Python中文网/ 问答频道 /正文

我正在使用this tutorial来构建一个Adaboost.SAMME对象识别的分类器,使用HoG特征。 这是我下面的代码,大多数情况下只有顶部部分是根据我的问题定制的,否则大部分与教程中的相同。这是一个非常小的测试,总共只有17张图片,10张用于培训,7张用于测试。一旦我启动并运行,我将添加更多的图像以进行适当的训练。在

import sys from scipy 
import misc, ndimage from skimage 
import data, io, filter, color, exposure 
from skimage.viewer import ImageViewer 
from skimage.feature import hog from skimage.transform 
import resize import matplotlib.pyplot as plt 
from sklearn.datasets import make_gaussian_quantiles 
from sklearn.ensemble import AdaBoostClassifier 
from sklearn.externals.six.moves import xrange 
from sklearn.metrics import accuracy_score 
from sklearn.tree import DecisionTreeClassifier 
import pylab as pl from sklearn.externals.six.moves 
import zip

f = open("PATH_TO_LIST_OF_SAMPLES\\samples.txt",'r') 
out = f.read().splitlines() import numpy as np

imgs = [] tmp_hogs = []
#tmp_hogs = np.zeros((17,1728)) labels = [1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0]

i=0 for file in out:
        filepath = "PATH_TO_IMAGES\\imgs\\"
        readfile = filepath + file
        curr_img = color.rgb2gray(io.imread(readfile))
        imgs.append(curr_img)
        fd, hog_image = hog(curr_img, orientations=8, pixels_per_cell=(16, 16),
                 cells_per_block=(1, 1), visualise=True, normalise=True)
        tmp_hogs.append(fd) 
        i+=1
        img_hogs = np.array(tmp_hogs, dtype =float) 

n_split = 10 
X_train, X_test = np.array(img_hogs[:n_split]), np.array(img_hogs[n_split:]) 
y_train, y_test = np.array(labels[:n_split]), np.array(labels[n_split:])


#now all the code below is straight off the example on scikit-learn's website

bdt_real = AdaBoostClassifier(
    DecisionTreeClassifier(max_depth=2),
    n_estimators=600,
    learning_rate=1)

bdt_discrete = AdaBoostClassifier(
    DecisionTreeClassifier(max_depth=2),
    n_estimators=600,
    learning_rate=1.5,
    algorithm="SAMME")

bdt_real.fit(X_train, y_train)
bdt_discrete.fit(X_train, y_train)

real_test_errors = []
discrete_test_errors = []

for real_test_predict, discrete_train_predict in zip(
        bdt_real.staged_predict(X_test), bdt_discrete.staged_predict(X_test)):
    real_test_errors.append(
        1. - accuracy_score(real_test_predict, y_test))
    discrete_test_errors.append(
        1. - accuracy_score(discrete_train_predict, y_test))

n_trees = xrange(1, len(bdt_discrete) + 1)

pl.figure(figsize=(15, 5))

pl.subplot(131)
pl.plot(n_trees, discrete_test_errors, c='black', label='SAMME')
pl.plot(n_trees, real_test_errors, c='black',
        linestyle='dashed', label='SAMME.R')
pl.legend()
pl.ylim(0.18, 0.62)
pl.ylabel('Test Error')
pl.xlabel('Number of Trees')

pl.subplot(132)
pl.plot(n_trees, bdt_discrete.estimator_errors_, "b", label='SAMME', alpha=.5)
pl.plot(n_trees, bdt_real.estimator_errors_, "r", label='SAMME.R', alpha=.5)
pl.legend()
pl.ylabel('Error')
pl.xlabel('Number of Trees')
pl.ylim((.2,
        max(bdt_real.estimator_errors_.max(),
            bdt_discrete.estimator_errors_.max()) * 1.2))
pl.xlim((-20, len(bdt_discrete) + 20))

pl.subplot(133)
pl.plot(n_trees, bdt_discrete.estimator_weights_, "b", label='SAMME')
pl.legend()
pl.ylabel('Weight')
pl.xlabel('Number of Trees')
pl.ylim((0, bdt_discrete.estimator_weights_.max() * 1.2))
pl.xlim((-20, len(bdt_discrete) + 20))

# prevent overlapping y-axis labels
pl.subplots_adjust(wspace=0.25)
pl.show()

但我得到了以下错误:

^{pr2}$

因此,我在代码的教程部分之前添加了以下行,以便查看X和Y数组的尺寸:

print X_train.shape 
print y_train.shape
print X_test.shape 
print y_test.shape

结果是:

(10L, 48L)
(10L,)
(7L, 48L)
(7L,)

但我不确定错误中的x和y是否是指我的x和y。。。因为训练和测试数据集有不同的大小是正常的。 我做错什么了?在


Tags: fromtestimportbdtimgnptrainsklearn