我构建了一个CNN来识别224x224x3图像中的9类手势。我试着通过在16张图片上训练它来测试它的功能,看看它是否超过100的精确度。这是我的网络
import torch.nn as nn
class learn_gesture(nn.Module):
def __init__(self):
super(learn_gesture, self).__init__()
self.name = "gesture_learner"
self.conv1 = nn.Conv2d(in_channels=3, out_channels=20, kernel_size=5, stride=1, padding=2)
self.conv2 = nn.Conv2d(in_channels=20, out_channels=50, kernel_size=5, stride=1, padding=2)
self.conv3 = nn.Conv2d(in_channels=50, out_channels=100, kernel_size=5, stride=1, padding=2)
self.conv4 = nn.Conv2d(in_channels=100, out_channels=200, kernel_size=5, stride=1, padding=2)
self.conv5 = nn.Conv2d(in_channels=200, out_channels=400, kernel_size=5, stride=1, padding=2)
self.pool1 = nn.MaxPool2d(2,2)
self.pool2 = nn.MaxPool2d(2,2)
self.pool3 = nn.MaxPool2d(2,2)
self.pool4 = nn.MaxPool2d(2,2)
self.pool5 = nn.MaxPool2d(2,2)
self.fc1 = nn.Linear(7*7*400, 10000)
self.fc2 = nn.Linear(10000, 3000)
self.fc3 = nn.Linear(3000, 9)
def forward(self, x):
x = self.pool1(F.relu(self.conv1(x))) # gives 112*20
x = self.pool2(F.relu(self.conv2(x))) # gives 56*50
x = self.pool3(F.relu(self.conv3(x))) # gives 28*100
x = self.pool4(F.relu(self.conv4(x))) # gives 14*200
x = self.pool5(F.relu(self.conv5(x))) # gives 7*400
x = x.view(-1, 7*7*400)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
return F.softmax(self.fc3(x), dim=1)
以下是培训代码:
overfit_model = learn_gesture()
num_epochs = 200 #set it high so that it will converge
## loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(over_model.parameters(), lr=0.001, momentum=0.9) #optimizer is SGD with momentum
## set up some empty np arrays to store our result for plotting later
train_err = np.zeros(num_epochs)
train_loss = np.zeros(num_epochs)
################################################ train the network
for epoch in range(num_epochs):
total_train_loss = 0
total_train_err = 0
total_epoch = 0
for i, data in enumerate(smallLoader, 0):
inputs, labels = data
outputs = over_model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
optimizer.zero_grad()
corr = (determine_corr(outputs, labels)) # get a list of bool representing right or wrong predictions in the batch
total_train_err += corr.count(False)
total_train_loss += loss.item()
total_epoch += len(labels)
train_err[epoch] = float(total_train_err) / total_epoch
train_loss[epoch] = float(total_train_loss) / (i+1)
print(("Epoch {}: Train err: {}, Train loss: {}").format(
enter code hereepoch + 1,
train_err[epoch],
train_loss[epoch]))
训练没有效果,准确性和损失也没有改善。我完全搞不清楚错误在哪里。非常感谢您的帮助
###############################
我去掉了前进功能中的softmax。令人惊讶的是,该模型的性能变化不大。我注意到现在输出中的一些元素是负数,所有类中的元素都不会加1。这应该发生吗? 输出:
tensor([[ 0.0165, -0.0041, 0.0043, 0.0017, 0.0238, 0.0329, -0.0265, -0.0224,
-0.0187],
[ 0.0163, -0.0044, 0.0036, 0.0028, 0.0248, 0.0334, -0.0268, -0.0218,
-0.0194],
[ 0.0161, -0.0046, 0.0041, 0.0019, 0.0240, 0.0333, -0.0266, -0.0223,
-0.0192],
[ 0.0190, -0.0044, 0.0035, 0.0015, 0.0244, 0.0322, -0.0267, -0.0223,
-0.0187],
[ 0.0174, -0.0048, 0.0033, 0.0021, 0.0251, 0.0328, -0.0257, -0.0225,
-0.0190],
[ 0.0175, -0.0041, 0.0033, 0.0031, 0.0241, 0.0329, -0.0264, -0.0222,
-0.0192],
[ 0.0168, -0.0042, 0.0033, 0.0022, 0.0251, 0.0335, -0.0269, -0.0225,
-0.0195],
[ 0.0163, -0.0047, 0.0037, 0.0030, 0.0243, 0.0336, -0.0265, -0.0227,
-0.0192],
[ 0.0165, -0.0043, 0.0038, 0.0026, 0.0242, 0.0337, -0.0264, -0.0222,
-0.0191],
[ 0.0163, -0.0051, 0.0038, 0.0016, 0.0236, 0.0338, -0.0258, -0.0223,
-0.0195],
[ 0.0173, -0.0037, 0.0038, 0.0018, 0.0236, 0.0322, -0.0269, -0.0225,
-0.0191],
[ 0.0174, -0.0044, 0.0031, 0.0019, 0.0241, 0.0334, -0.0266, -0.0224,
-0.0200],
[ 0.0164, -0.0038, 0.0034, 0.0029, 0.0245, 0.0342, -0.0269, -0.0225,
-0.0200],
[ 0.0173, -0.0046, 0.0036, 0.0021, 0.0245, 0.0328, -0.0264, -0.0221,
-0.0192],
[ 0.0168, -0.0046, 0.0034, 0.0025, 0.0248, 0.0336, -0.0262, -0.0222,
-0.0194],
[ 0.0166, -0.0051, 0.0033, 0.0015, 0.0234, 0.0331, -0.0270, -0.0218,
-0.0186]], grad_fn=<AddmmBackward>)
Epoch 199: Train err: 0.8125, Train loss: 2.1874701976776123
首先,您应该删除'nn.CrossEntropyLoss'之前的
softmax
,因为其他答案已经发现现在实际的问题是,你的神经CNN模型很深,但是你的数据集很小。在这种情况下,您可能需要数千个epoches,甚至模型也无法收敛。如果您可以增加数据集或使用深度较低的CNN模型,则可以克服您现在面临的收敛问题
但你不想改变这些,我的建议是:
引用自here
让我们谈谈批量大小。如果您的数据集非常小,模型太深,则中等批量大小(即32或16)可能不起作用。当您想要测试过拟合模型时,请尝试使用非常小的批量(4或8)。这将有助于快速找到少量样本的局部最大值
如果您已经尝试过,请尝试提高学习速度
它并没有减少,因为在softmax之后使用的是CrossEntropyLoss,所以基本上只是减少了梯度回流的量
只需卸下softmax,它就会工作
(稍后将对其进行编辑以添加原因)
是的,这是合理的
似乎您正在使用一个名为
overfit_model
的模型,在该模型中将over_model.parameters()
传递给优化器:应替换为
ovrefit_model.parameters()
您将在反向传播后将渐变设置为零右,这应该在之前完成!因此,以下几行:
应替换为:
不需要调用
F.softmax
in因为您正在使用^{} 调用^{} 其中natively bundles ^{} before calling ^{}
相关问题 更多 >
编程相关推荐