简单的神经网络模型以tanh（x）为激活函数收敛，但对泄漏ReLu不收敛

import numpy as np import pandas as pd # Base de dados a ser treinada x = pd.DataFrame( [[1], [2], [3]], columns=['valores x']) d = pd.DataFrame( [[5], [4], [3]], columns=['valores desejados']) # Convertendo o dataframe em array e normalizando os valores desejados para ficar entre 0 e +1. x = x.to_numpy() d = d/(1.05*d.max()) d = d.to_numpy() # Derivada de tanh(x) = sech²(x) = 1 - (tanh(x))² def df(x): y = 1 - np.power(np.tanh(x), 2) return y #def rede_mlp(n, x, d, net, k, precisao): # Construindo a rede de duas camadas # net = número de neurônios na primeira camada # n = taxa de aprendizagem # precisao = precisão do erro quadrático médio net=3 n = 0.1 precisao=0.00001 w1 = np.random.rand(net,len(x[0])) w2 = np.random.rand(1,net) E_M=1 epocas=0 while E_M>precisao: E_M=0 errofinal=0 for i in range(0,len(x)): # FOWARD i1 = np.matmul(w1, x[i].reshape(len(x[i]),1)) y1 = np.tanh(i1) i2 = np.matmul(w2, y1) y2 = np.tanh(i2) # erro com o valor desejado erro = d[i].reshape(len(d[i]),1) - y2 # BACKPROPAGATION delta_2 = erro*df(i2) w2 = w2 + n*(np.matmul(delta_2, y1.reshape(1, net))) delta_1 = (np.matmul(w2.T, delta_2))*df(i1) w1 = w1 + n*(np.matmul(delta_1, x[i].reshape(1, len(x[i])))) errofinal = errofinal + 0.5*erro**2 E_M = errofinal/len(x) epocas+=1 print(E_M)

import numpy as np import pandas as pd # Base de dados a ser treinada x = pd.DataFrame( [[1], [2], [3]], columns=['valores x']) d = pd.DataFrame( [[5], [4], [3]], columns=['valores desejados']) # Convertendo o dataframe em array e normalizando os valores desejados para ficar entre 0 e +1. x = x.to_numpy() d = d.to_numpy() def df(x): x = np.array(x) x[x<=0] = 0.01 x[x>0] = 1 return x def f(x): return(np.where(x > 0, x, x * 0.01)) #def rede_mlp(n, x, d, net, k, precisao): # Construindo a rede de duas camadas # net = número de neurônios na primeira camada # n = taxa de aprendizagem # precisao = precisão do erro quadrático médio net=3 n = 1e-4 precisao=0.0001 w1 = np.random.rand(net,len(x[0])) w2 = np.random.rand(1,net) E_M=20 epocas=0 while E_M>precisao: E_M=0 errofinal=0 for i in range(0,len(x)): # FOWARD i1 = np.matmul(w1, x[i].reshape(len(x[i]),1)) y1 = f(i1) i2 = np.matmul(w2, y1) y2 = f(i2) # erro com o valor desejado erro = d[i].reshape(len(d[i]),1) - y2 # BACKPROPAGATION delta_2 = erro*df(i2) w2 = w2 + n*(np.matmul(delta_2, y1.reshape(1, net))) delta_1 = (np.matmul(w2.T, delta_2))*df(i1) w1 = w1 + n*(np.matmul(delta_1, x[i].reshape(1, len(x[i])))) errofinal = errofinal + 0.5*erro**2 #E_M = errofinal/len(x) E_M = errofinal epocas+=1 print(E_M)

import numpy as np import pandas as pd # Base de dados a ser treinada x = pd.DataFrame( [[1], [2], [3]], columns=['valores x']) d = pd.DataFrame( [[5], [4], [3]], columns=['valores desejados']) # Convertendo o dataframe em array e normalizando os valores desejados para ficar entre 0 e +1. x = x.to_numpy() d = d.to_numpy() def df(x): return(np.where(x <= 0, 0.01, 1)) def f(x): return(np.where(x > 0, x, x * 0.01)) #def rede_mlp(n, x, d, net, k, precisao): # Construindo a rede de duas camadas # net = número de neurônios na primeira camada # n = taxa de aprendizagem # precisao = precisão do erro quadrático médio net=3 n = 1e-3 precisao=0.1 w1 = np.random.rand(net,len(x[0])) w2 = np.random.rand(1,net) E_M=20 epocas=0 while E_M>precisao: E_M=0 errofinal=0 for i in range(0,len(x)): # FOWARD i1 = np.matmul(w1, x[i].reshape(len(x[i]),1)) y1 = f(i1) i2 = np.matmul(w2, y1) y2 = f(i2) # erro com o valor desejado erro = d[i].reshape(len(d[i]),1) - y2 # BACKPROPAGATION delta_2 = erro*df(i2) delta_1 = (np.matmul(w2.T, delta_2))*df(i1) w2 = w2 + n*(np.matmul(delta_2, y1.reshape(1, net))) w1 = w1 + n*(np.matmul(delta_1, x[i].reshape(1, len(x[i])))) errofinal = errofinal + 0.5*erro**2 #E_M = errofinal/len(x) E_M = errofinal epocas+=1 print(E_M)

1条回答

网友

1楼 · 发布于 2024-09-23 10:34:17

您需要为网络添加一个偏差

您试图建模的方程是y = 6 - x，如果您可以使用6作为截距（偏差），这是微不足道的，但我认为如果您不这样做，实际上是不可能的

一旦添加了偏差，许多函数就更容易表示，这就是为什么包含一个是标准实践的原因。这个Q&A on the role of bias in NNs解释得更透彻

我修改了您的代码以添加偏差，并遵循更典型的命名约定，它对我来说是收敛的

net = 3
n = 1e-3
precisao = 0.0001 

w1 = np.random.rand(net, len(x[0])) 
bias1 = np.random.rand()

w2 = np.random.rand(1, net) 
bias2 = np.random.rand()

E_M = 20 
epocas = 0 

while E_M > precisao: 
    E_M = 0 
    errofinal = 0 
    for i in range(0,len(x)): 
        a0 = x[i].reshape(-1, 1) 
        targ = d[i].reshape(-1, 1) 

        z1 = np.matmul(w1, a0) + bias1
        a1 = f(z1) 

        z2 = np.matmul(w2, a1) + bias2
        a2 = f(z2) 

        erro = a2 - targ

        # BACKPROPAGATION 
        delta_2 = erro * df(z2) 
        delta_1 = np.matmul(w2.T, delta_2) * df(z1) 
        bias2 -= n * delta_2
        bias1 -= n * delta_1
        w2 -= n * np.matmul(delta_2, a1.T)
        w1 -= n * np.matmul(delta_1, a0.T)

        errofinal = errofinal + 0.5*erro**2 

    #E_M = errofinal/len(x) 
    E_M = errofinal 
    epocas += 1 
    if epocas % 1000 == 0:
        print(epocas, E_M)

我提高了学习速度，以便更快地收敛

1000 [[0.14401507]]
2000 [[0.00028834]]

早期错误修复建议

将导数设置为始终等于1

def df(x):
    x = np.array(x)
    x[x<=0] = 0.01
    x[x>0] = 1
    return x

行x[x<=0] = 0.01将所有非正值设置为1/100，一个正值。在这之后，每个值都是正值，因为已经是正值的值不会受到影响，负值或零值只是变成正值。因此，下一行x[x>0] = 1将所有导数设置为1

试试这个：

def df(x):
    return np.where(np.array(x) <= 0, 0.01, 1)

相关问题更多 >

编程相关推荐

热门问题

热门文章