我实现了一个神经网络类,它总是只有一个隐藏层,没有使用库,甚至没有numpy。我已经按照我理解的方式做了每件事,但这根本不是学习,损失实际上在不断增加,即使在网上看了很多例子,我也找不到哪里出了问题。在
下面是我的MLP类,以及它尝试学习XOR函数的演示:
import random
from math import exp
class MLP:
def __init__(self, numInputs, numHidden, numOutputs):
# MLP architecture sizes
self.numInputs = numInputs
self.numHidden = numHidden
self.numOutputs = numOutputs
# MLP weights
self.IH_weights = [[random.random() for i in range(numHidden)] for j in range(numInputs)]
self.HO_weights = [[random.random() for i in range(numOutputs)] for j in range(numHidden)]
# Gradients corresponding to weight matrices computed during backprop
self.IH_gradients = [[0 for i in range(numHidden)] for j in range(numInputs)]
self.HO_gradients = [[0 for i in range(numOutputs)] for j in range(numHidden)]
# Input, hidden and output neuron values
self.I = None
self.H = [0 for i in range(numHidden)]
self.O = [0 for i in range(numOutputs)]
self.H_deltas = [0 for i in range(numHidden)]
self.O_deltas = [0 for i in range(numOutputs)]
# Sigmoid
def activation(self, x):
return 1 / (1 + exp(-x))
# Derivative of Sigmoid
def activationDerivative(self, x):
return x * (1 - x)
# Squared Error
def calculateError(self, prediction, label):
return (prediction - label) ** 2
def forward(self, input):
self.I = input
for i in range(self.numHidden):
for j in range(self.numInputs):
self.H[i] += self.I[j] * self.IH_weights[j][i]
self.H[i] = self.activation(self.H[i])
for i in range(self.numOutputs):
for j in range(self.numHidden):
self.O[i] += self.activation(self.H[j] * self.HO_weights[j][i])
self.O[i] = self.activation(self.O[i])
return self.O
def backwards(self, label):
if label != list:
label = [label]
error = 0
for i in range(self.numOutputs):
neuronError = self.calculateError(self.O[i], label[i])
error += neuronError
self.O_deltas[i] = neuronError * self.activationDerivative(self.O[i])
for j in range(self.numHidden):
self.HO_gradients[j][i] += self.O_deltas[i] * self.H[j]
for i in range(self.numHidden):
neuronError = 0
for j in range(self.numOutputs):
neuronError += self.HO_weights[i][j] * self.O_deltas[j]
self.H_deltas[i] = neuronError * self.activationDerivative(self.H[i])
for j in range(self.numInputs):
self.IH_gradients[j][i] += self.H_deltas[i] * self.I[j]
return error
def updateWeights(self, learningRate):
for i in range(self.numInputs):
for j in range(self.numHidden):
self.IH_weights[i][j] += learningRate * self.IH_gradients[i][j]
for i in range(self.numHidden):
for j in range(self.numOutputs):
self.HO_weights[i][j] += learningRate * self.HO_gradients[i][j]
self.IH_gradients = [[0 for i in range(self.numHidden)] for j in range(self.numInputs)]
self.HO_gradients = [[0 for i in range(self.numOutputs)] for j in range(self.numHidden)]
data = [
[[0, 0], 0],
[[0, 1], 1],
[[1, 0], 1],
[[1, 1], 0]
]
mlp = MLP(2, 5, 1)
for epoch in range(100):
epochError = 0
for i in range(len(data)):
mlp.forward(data[i][0])
epochError += mlp.backwards(data[i][1])
print(epochError / len(data))
mlp.updateWeights(0.001)
如果我正确地理解了你的实现,那么我认为你的问题是在计算向后函数中的权重更新,更新应该是误差(不是误差平方)乘以sigmoid导数,所以我会看看/重做计算。在
你怎么做到的?我把它给一个朋友看了——我们都发现你在没有太多抽象的情况下完成算法的目标是很有启发性的,尽管试图找出错误是很困难的。在
他发现的改进是updateWeights需要是一个负反馈循环,因此将“+=”改为“-=”分两行给出:
以及
^{pr2}$另一个因素是提高学习率。随着这些变化,错误下降到16%左右(对我来说,我可能已经做了另一个我没有看到的变化),然后开始逐渐上升到27%,可能是因为过度训练,学习率太高。在
我使学习率取决于时代
稳定下降,稳定在0.161490。。。在
但是如果你从'forward'得到预测值,它的预测值总是0.66,输入值已经被抹去了。所以。。。那太糟糕了。在
^{4}$相关问题 更多 >
编程相关推荐