numpy dot produ中的nan

2024-07-01 08:20:36 发布

您现在位置:Python中文网/ 问答频道 /正文

下面是一段代码,用于在numpy中实现一个用于拟合问题的2层神经元网络。激活素的功能是ReLU。训练算法是Adam。损失函数是均方误差的一半。然而,当批量较大时(如10000),经过一些迭代后,损失将变为nan。小批量不会出现问题。有人能帮我解释一下为什么会这样吗?(数据来自matlab最终工作空间)在

#
import scipy.io as sio
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
data = sio.loadmat('6_final_mapping_pos.mat')
class NeuralNetwork():
    def __init__(self):

    self.batch_size = 256
    self.input_size = 5 # input dimension is 5
    self.hidden_layer1_size = 50
    self.output_size = 1  # output dimension is 5
    self.train_data = data['training_data_pos']
    self.df_traindata = pd.DataFrame(data=self.train_data)
    self.validation_data_num = 17142
    self.valid_data = data['validation_data_pos']
    self.df_validdata = pd.DataFrame(data=self.valid_data)

    # weight initialization for ReLu
    self.W1 = np.random.randn(self.input_size, self.hidden_layer1_size)/ np.sqrt(self.input_size/2)
    self.W2 = np.random.randn(self.hidden_layer1_size, self.output_size)/ np.sqrt(self.hidden_layer1_size/2)

    #bias initialization
    self.b1 = np.zeros((1,self.hidden_layer1_size))
    self.b2 = np.zeros((1,self.output_size))

    self.lr = 5e-3      # learning rate
    self.reg = 1e-3       # regularization strength
    self.p = 0.5          # dropout probability = 1-p

    self.first_moment_W3=0
    self.second_moment_W3=0
    self.first_moment_W2=0
    self.second_moment_W2=0
    self.first_moment_W1=0
    self.second_moment_W1=0
    self.first_moment_b3=0
    self.second_moment_b3=0
    self.first_moment_b2=0
    self.second_moment_b2=0
    self.first_moment_b1=0
    self.second_moment_b1=0

def feedforward(self):   
    ### randomly selected mini-batch as inputs
    self.df_sample_t = self.df_traindata.sample(n = self.batch_size)
    self.train_input = self.df_sample_t.as_matrix(columns=[0,1,2,3,4])
    self.train_output = self.df_sample_t.as_matrix(columns=[5])

    #hidden layer with dropput technique
    self.hidden_layer1 = np.maximum(0, (np.dot(self.train_input, self.W1) + self.b1))
    U1= np.random.rand(*self.hidden_layer1.shape) < self.p  # drop mask
    self.hidden_layer1 *= U1  # drop!

    self.output_layer = np.dot(self.hidden_layer1, self.W2) + self.b2
    self.data_loss = np.sum(0.5*(self.output_layer-self.train_output)**2) / self.batch_size
    self.reg_loss = 0.5*self.reg*np.sum(self.W1*self.W1) + 0.5*self.reg*np.sum(self.W2*self.W2)
    self.total_loss = self.data_loss + self.reg_loss

def backpropagation(self):

    self.d_output = (self.output_layer-self.train_output)/ self.batch_size

    #data part            
    self.dW2 = np.dot(self.hidden_layer1.T, self.d_output)
    self.db2 = np.sum(self.d_output, axis=0, keepdims=True)
    self.dhidden1 = np.dot(self.d_output, self.W2.T)
    self.dhidden1[self.hidden_layer1<= 0] = 0

    self.dW1 = np.dot(self.train_input.T, self.dhidden1)
    self.db1 = np.sum(self.dhidden1, axis=0, keepdims=True)

    #regularization part
    self.dW2 = self.dW2 + self.reg * self.W2
    self.dW1 = self.dW1 + self.reg * self.W1

def Adam(self, epoch, dW2, dW1, db2, db1):

    beta1 = 0.9
    beta2 = 0.99

    self.first_moment_W2 = beta1*self.first_moment_W2 + (1-beta1)*dW2
    self.second_moment_W2 = beta2*self.second_moment_W2 + (1-beta2)*dW2*dW2
    first_unbias_W2 = self.first_moment_W2 /(1-beta1 ** epoch)
    second_unbias_W2 = self.second_moment_W2 /(1-beta2 ** epoch)
    self.W2 -= self.lr * first_unbias_W2 / (np.sqrt(second_unbias_W2) +1e-7)

    self.first_moment_W1 = beta1*self.first_moment_W1 + (1-beta1)*dW1
    self.second_moment_W1 = beta2*self.second_moment_W1 + (1-beta2)*dW1*dW1
    first_unbias_W1 = self.first_moment_W1 /(1-beta1 ** epoch)
    second_unbias_W1 = self.second_moment_W1 /(1-beta2 ** epoch)
    self.W1 -= self.lr * first_unbias_W1 / (np.sqrt(second_unbias_W1) +1e-7)

    self.first_moment_b2 = beta1*self.first_moment_b2 + (1-beta1)*db2
    self.second_moment_b2 = beta2*self.second_moment_b2 + (1-beta2)*db2*db2
    first_unbias_b2 = self.first_moment_b2 /(1-beta1 ** epoch)
    second_unbias_b2 = self.second_moment_b2 /(1-beta2 ** epoch)
    self.b2 -= self.lr * first_unbias_b2 / (np.sqrt(second_unbias_b2) +1e-7)

    self.first_moment_b1 = beta1*self.first_moment_b1 + (1-beta1)*db1
    self.second_moment_b1 = beta2*self.second_moment_b1 + (1-beta2)*db1*db1
    first_unbias_b1 = self.first_moment_b1 /(1-beta1 ** epoch)
    second_unbias_b1 = self.second_moment_b1 /(1-beta2 ** epoch)
    self.b1 -= self.lr * first_unbias_b1 / (np.sqrt(second_unbias_b1) +1e-7)

def validation(self):
    self.df_sample_v = self.df_validdata.sample(n = self.validation_data_num)
    self.valid_input = self.df_sample_v.as_matrix(columns=[0,1,2,3,4])
    self.valid_output = self.df_sample_v.as_matrix(columns=[5])
    self.hidden_layer1 = np.maximum(0, np.dot(self.valid_input, self.W1) + self.b1) *self.p
    self.output_layer = np.dot(self.hidden_layer1, self.W2) + self.b2
    self.data_loss = np.sum(0.5*(self.output_layer-self.valid_output)**2) / self.validation_data_num
    self.reg_loss = 0.5*self.reg*np.sum(self.W1*self.W1) + 0.5*self.reg*np.sum(self.W2*self.W2)
    self.total_loss = self.data_loss + self.reg_loss


NN = NeuralNetwork()
num_iterations = 120

training_loss = np.array([])
validation_loss = np.array([])
validation_dataloss = np.array([])
t=1
T=np.array([range(1,num_iterations)]).T

# Training and validation
while(t < num_iterations):
    NN.feedforward()
    NN.backpropagation()
    NN.Adam(t, NN.dW2, NN.dW1, NN.db2, NN.db1)
    training_loss = np.append(training_loss, NN.total_loss)
    if t % 10 == 0:
        print ("training:" + "total loss = %f, data loss = %f, regularization loss = %f" % (NN.total_loss,NN.data_loss,NN.reg_loss))
    NN.validation()
    validation_loss = np.append(validation_loss, NN.total_loss)
    validation_dataloss = np.append(validation_dataloss, NN.data_loss)
    if t % 10 == 0:
        print ("validation:" + "total loss = %f, data loss = %f, regularization loss = %f" % (NN.total_loss,NN.data_loss,NN.reg_loss))
    t+=1

Tags: selfoutputdatasizenpnnb2b1

热门问题