用于异或近似的神经网络对于所有输入总是输出0.5

2024-10-05 14:29:24 发布

您现在位置:Python中文网/ 问答频道 /正文

我尝试实现以下用于异或门近似的神经网络。我用了二元交叉熵作为代价函数。成本总是在0.69左右,并达到饱和。对于所有输入,网络输出为0.5。我尝试了不同的时代数、学习率和层数,但没有改变。我哪里做错了

import numpy as np
import matplotlib.pyplot as plt

epsilon = 1e-15

def sigmoid(x):
    return 1 / (1 + np.exp(-x))


def relu(x):
    return np.maximum(0, x)


def sigmoid_backwards(A):
    return A * (1 - A)

def relu_backwards(A):
    return A >= 0

def init_parameters(layer_dims):
    paramters = {}
    L = len(layer_dims)
    for i in range(1, L):
        paramters['W' + str(i)] = np.random.randn(layer_dims[i], layer_dims[i-1]) * 0.001
        paramters['b' + str(i)] = np.zeros((layer_dims[i], 1))
    return paramters

def forward_pass(X, paramters, g):
    layer_vals = [X]
    A = X
    L = len(g)
    for i in range(1, L):
        A_prev = A
        Z = np.dot(paramters['W' + str(i)], A_prev) + paramters['b' + str(i)]
        A = activations[g[i]](Z)
        layer_vals.append(A)

    return layer_vals

def predict(X, paramters, g):
    layer_vals = [X]
    A = X
    L = len(g)
    for i in range(1, L):
        A_prev = A
        Z = np.dot(paramters['W' + str(i)], A_prev) + paramters['b' + str(i)]
        A = activations[g[i]](Z)
        layer_vals.append(A)

    return layer_vals[-1][0]


def backward_pass(y_true, layer_vals, paramters, g, learning_rate=0.01):
    m = y_true.shape[1]
    dA = -y_true/(layer_vals[-1] + epsilon) + (1-y_true)/(1-layer_vals[-1] + epsilon)
    for i in range(len(layer_vals)-1, 0, -1):
        dZ = dA * activations_backwards[g[i]](layer_vals[i])
        dA_prev = np.dot(paramters['W' + str(i)].T, dZ)
        dW = 1/m * np.dot(dZ, layer_vals[i-1].T)
        db = 1/m * np.sum(dZ, axis=1, keepdims=True)
        dA = dA_prev

        paramters['W' + str(i)] -= learning_rate * dW
        paramters['b' + str(i)] -= learning_rate * db

    return paramters

def compute_cost(y, output):
    m = y.shape[1]
    return -1/m * np.sum(y * np.log(output+epsilon) + (1-y) * np.log(1-output+epsilon))


activations = {
    'sigmoid': sigmoid,
    'relu': relu
}

activations_backwards = {
    'sigmoid': sigmoid_backwards,
    'relu': relu_backwards
}


X = np.array([[0.000000, 0.000000, 1.000000, 1.000000],
              [0.000000, 1.000000, 0.000000, 1.000000]], dtype=float)
y = np.array([[0.0, 1.0, 1.0, 0.0]], dtype=float)


layer_dims = (2, 3, 3, 1)
#g = ['linear', 'sigmoid', 'sigmoid', 'sigmoid']
g = ['linear', 'relu', 'relu', 'sigmoid']
epochs = 1000
learning_rate = 0.01
paramters = init_parameters(layer_dims)
layer_vals = forward_pass(X, paramters, g)
costs = []
for i in range(epochs):
    parameters = backward_pass(y, layer_vals, paramters, g, learning_rate=learning_rate)
    layer_vals = forward_pass(X, paramters, g)
    cost = compute_cost(y, layer_vals[-1])
    costs.append(cost)
    if (i+1) % 10 == 0:
        print(f"After {i+1} epochs at learning rate {learning_rate:.4f}, cost: ", cost)

plt.plot(costs)
plt.show()
print(predict(X, paramters, g))

这是成本函数值的曲线 Cost Curve。 而且输出总是 [0.5 0.5 0.5 0.5]


Tags: layerreturnratedefnplearningrelucost
1条回答
网友
1楼 · 发布于 2024-10-05 14:29:24

神经网络的不错尝试;你很接近!执行以下操作可修复代码:

  1. 您在parameters = backward_pass()中有一个输入错误,导致您的参数无法更新
  2. 修复此问题会使您的网络仅在1/1000的情况下聚合。使用标准偏差1初始化权重。每层增加#个单位可以使代码完美地工作

以下是工作代码:

import numpy as np
import matplotlib.pyplot as plt

epsilon = 1e-15

def sigmoid(x):
    return 1. / (1. + np.exp(-x))


def relu(x):
    return np.maximum(0, x)


def sigmoid_backwards(A):
    return A * (1 - A)

def relu_backwards(A):
    return A >= 0

def init_parameters(layer_dims):
    paramters = {}
    L = len(layer_dims)
    for i in range(1, L):
        paramters['W' + str(i)] = np.random.randn(layer_dims[i], layer_dims[i-1]) * 1.
        paramters['b' + str(i)] = np.random.randn(layer_dims[i], 1) * 1. # np.zeros((layer_dims[i], 1))
    return paramters

def forward_pass(X, paramters, g):
    layer_vals = [X]
    A = X
    L = len(g)
    for i in range(1, L):
        A_prev = A
        Z = np.dot(paramters['W' + str(i)], A_prev) + paramters['b' + str(i)]
        A = activations[g[i]](Z)
        layer_vals.append(A)

    return layer_vals

def predict(X, paramters, g):
    layer_vals = [X]
    A = X
    L = len(g)
    for i in range(1, L):
        A_prev = A
        Z = np.dot(paramters['W' + str(i)], A_prev) + paramters['b' + str(i)]
        A = activations[g[i]](Z)
        layer_vals.append(A)

    return layer_vals[-1][0]


def backward_pass(y_true, layer_vals, paramters, g, learning_rate=0.01):
    m = y_true.shape[1]
    dA = -y_true/(layer_vals[-1] + epsilon) + (1-y_true)/(1-layer_vals[-1] + epsilon)
    #dA = -y_true * np.log(layer_vals[-1]+epsilon) + (1.-y_true) * np.log(1-layer_vals[-1]+epsilon)
    #print('dAnew', dA)
    for i in range(len(layer_vals)-1, 0, -1):
        #import pdb;pdb.set_trace()
        dZ = dA * activations_backwards[g[i]](layer_vals[i])
        dA_prev = np.dot(paramters['W' + str(i)].T, dZ)
        dW = 1/m * np.dot(dZ, layer_vals[i-1].T)
        db = 1/m * np.sum(dZ, axis=1, keepdims=True)
        dA = dA_prev
        paramters['W' + str(i)] -= learning_rate * dW
        paramters['b' + str(i)] -= learning_rate * db

    return paramters

def compute_cost(y, output):
    m = y.shape[1]
    return -1/m * np.sum(y * np.log(output+epsilon) + (1-y) * np.log(1-output+epsilon))


activations = {
    'sigmoid': sigmoid,
    'relu': relu
}

activations_backwards = {
    'sigmoid': sigmoid_backwards,
    'relu': relu_backwards
}


X = np.array([[0.000000, 0.000000, 1.000000, 1.000000],
              [0.000000, 1.000000, 0.000000, 1.000000]], dtype=float)
y = np.array([[0.0, 1.0, 1.0, 0.0]], dtype=float)


layer_dims = (2, 32, 32, 1)
#g = ['linear', 'sigmoid', 'sigmoid', 'sigmoid']
g = ['linear', 'relu', 'relu', 'sigmoid']
epochs = 10000
learning_rate = 0.001
paramters = init_parameters(layer_dims)
layer_vals = forward_pass(X, paramters, g)
costs = []
for i in range(epochs):
    paramters = backward_pass(y, layer_vals, paramters, g, learning_rate=learning_rate)
    layer_vals = forward_pass(X, paramters, g)
    cost = compute_cost(y, layer_vals[-1])
    costs.append(cost)
    if (i+1) % 10 == 0:
        print(f"After {i+1} epochs at learning rate {learning_rate:.4f}, cost: ", cost)

plt.plot(costs)
plt.savefig("delete.png")
print(predict(X, paramters, g))

相关问题 更多 >