我尝试实现以下用于异或门近似的神经网络。我用了二元交叉熵作为代价函数。成本总是在0.69左右,并达到饱和。对于所有输入,网络输出为0.5。我尝试了不同的时代数、学习率和层数,但没有改变。我哪里做错了
import numpy as np
import matplotlib.pyplot as plt
epsilon = 1e-15
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def relu(x):
return np.maximum(0, x)
def sigmoid_backwards(A):
return A * (1 - A)
def relu_backwards(A):
return A >= 0
def init_parameters(layer_dims):
paramters = {}
L = len(layer_dims)
for i in range(1, L):
paramters['W' + str(i)] = np.random.randn(layer_dims[i], layer_dims[i-1]) * 0.001
paramters['b' + str(i)] = np.zeros((layer_dims[i], 1))
return paramters
def forward_pass(X, paramters, g):
layer_vals = [X]
A = X
L = len(g)
for i in range(1, L):
A_prev = A
Z = np.dot(paramters['W' + str(i)], A_prev) + paramters['b' + str(i)]
A = activations[g[i]](Z)
layer_vals.append(A)
return layer_vals
def predict(X, paramters, g):
layer_vals = [X]
A = X
L = len(g)
for i in range(1, L):
A_prev = A
Z = np.dot(paramters['W' + str(i)], A_prev) + paramters['b' + str(i)]
A = activations[g[i]](Z)
layer_vals.append(A)
return layer_vals[-1][0]
def backward_pass(y_true, layer_vals, paramters, g, learning_rate=0.01):
m = y_true.shape[1]
dA = -y_true/(layer_vals[-1] + epsilon) + (1-y_true)/(1-layer_vals[-1] + epsilon)
for i in range(len(layer_vals)-1, 0, -1):
dZ = dA * activations_backwards[g[i]](layer_vals[i])
dA_prev = np.dot(paramters['W' + str(i)].T, dZ)
dW = 1/m * np.dot(dZ, layer_vals[i-1].T)
db = 1/m * np.sum(dZ, axis=1, keepdims=True)
dA = dA_prev
paramters['W' + str(i)] -= learning_rate * dW
paramters['b' + str(i)] -= learning_rate * db
return paramters
def compute_cost(y, output):
m = y.shape[1]
return -1/m * np.sum(y * np.log(output+epsilon) + (1-y) * np.log(1-output+epsilon))
activations = {
'sigmoid': sigmoid,
'relu': relu
}
activations_backwards = {
'sigmoid': sigmoid_backwards,
'relu': relu_backwards
}
X = np.array([[0.000000, 0.000000, 1.000000, 1.000000],
[0.000000, 1.000000, 0.000000, 1.000000]], dtype=float)
y = np.array([[0.0, 1.0, 1.0, 0.0]], dtype=float)
layer_dims = (2, 3, 3, 1)
#g = ['linear', 'sigmoid', 'sigmoid', 'sigmoid']
g = ['linear', 'relu', 'relu', 'sigmoid']
epochs = 1000
learning_rate = 0.01
paramters = init_parameters(layer_dims)
layer_vals = forward_pass(X, paramters, g)
costs = []
for i in range(epochs):
parameters = backward_pass(y, layer_vals, paramters, g, learning_rate=learning_rate)
layer_vals = forward_pass(X, paramters, g)
cost = compute_cost(y, layer_vals[-1])
costs.append(cost)
if (i+1) % 10 == 0:
print(f"After {i+1} epochs at learning rate {learning_rate:.4f}, cost: ", cost)
plt.plot(costs)
plt.show()
print(predict(X, paramters, g))
这是成本函数值的曲线
Cost Curve。
而且输出总是
[0.5 0.5 0.5 0.5]
神经网络的不错尝试;你很接近!执行以下操作可修复代码:
parameters = backward_pass()
中有一个输入错误,导致您的参数无法更新李>以下是工作代码:
相关问题 更多 >
编程相关推荐