以向量为中心的神经网络Python实现问题的回答

以向量为中心的神经网络Python实现

回答此问题可获得 20 贡献值，回答如果被采纳可获得 50 分。

你好，我尝试使用一个以向量为中心的神经网络，一个输入节点，一个输出节点和两个隐藏层，每个隐藏层有3个节点，以适应一个非常简单的x**2函数。-只是为了验证它的功能。因此，我使用下面的代码。结果我得到了橙色的线，蓝色的线是真实的线 <a href="https://i.stack.imgur.com/6WYIW.jpg" rel="nofollow noreferrer"><img src="https://i.stack.imgur.com/6WYIW.jpg" alt="enter image description here"/></a> 正如你所看到的，有些东西不起作用。我试图改变迭代次数和学习率的值，但没有成功。如果我绘制迭代过程中的损失，我会得到100次迭代的下图： <a href="https://i.stack.imgur.com/1cPOv.jpg" rel="nofollow noreferrer"><img src="https://i.stack.imgur.com/1cPOv.jpg" alt="enter image description here"/></a> 我还没有添加偏差，但我认为这个简单的函数应该适合没有额外偏差节点。此外，我假设代码中的失败最有可能出现在代码的“计算相对于权重的梯度”部分 所以原则上我有两个问题： <ol> <li>我的代码中是否有任何基本故障导致代码无法工作</li> <li>如果不是，为什么我的模型不能适应easy数据</li> </ol> 提前谢谢你的帮助 下面是代码-它已准备好播放： <pre><code>class Neural_Net: """ """ def __init__(self, activation_function, learning_rate, runs): self.activation_function = activation_function self.X_train = np.linspace(0,1,1000) self.y_train = self.X_train**2 plt.plot(self.X_train, self.y_train) self.y_pred = None self.W_input = np.random.randn(1, 3) self.Partials_W_input = np.random.randn(1, 3) self.W_hidden = np.random.randn(3,3) self.Partials_W_hidden = np.random.randn(3,3) self.W_output = np.random.randn(3,1) self.Partials_W_output = np.random.randn(3,1) self.Activations = np.ones((3,2)) self.Partials = np.ones((3,2)) self.Output_Gradient = None self.Loss = 0 self.learning_rate = learning_rate self.runs = runs self.Losses = [] self.i = 0 def apply_activation_function(self, activation_vector): return 1/(1+np.exp(-activation_vector)) def forward_pass(self, training_instance): for layer in range(len(self.Activations[0])): # For the first layer between X and the first hidden layer pre_activation_first = self.W_input.T @ training_instance.reshape(1,1) # print('pre activation: ', pre_activation) # Apply the activation function self.Activations[:,0] = self.apply_activation_function(pre_activation_first).ravel() else: pre_activation_hidden = self.W_hidden.T @ self.Activations[:, layer-1] self.Activations[:, layer] = self.apply_activation_function(pre_activation_hidden) # print('Activations: ', self.Activations) output = self.W_output.T @ self.Activations[:, -1] # print('output: ', output) return output def backpropagation(self, y_true, training_instance): if self.activation_function == 'linear': # Calculate the ouput gradient self.Output_Gradient = -(y_true-self.y_pred) # print('Output Gradient: ', self.Output_Gradient) # Calculate the partial gradients of the Error with respect to the pre acitvation values in the nodes self.Partials[:, 1] = self.Activations[:, 1]*(1-self.Activations[:, 1])*(self.W_output @ self.Output_Gradient) self.Partials[:, 0] = self.Activations[:, 0]*(1-self.Activations[:, 0])*(self.W_hidden @ self.Partials[:, 1]) # print('Partials: ', self.Partials) # Calculate the Gradients with respect to the weights self.Partials_W_output = self.Output_Gradient * self.Activations[:, -1] # print('Partials_W_output: ', self.Partials_W_output) self.Partials_W_hidden = self.Partials[:, -1].reshape(3,1) * self.Activations[:, 0].reshape(1,3) # print('Partials_W_hidden: ',self.Partials_W_hidden) self.Partials_W_input = (self.Partials[:, 0].reshape(3,1) * training_instance.T).T # print('Partials_W_input: ', self.Partials_W_input) def weight_update(self, training_instance, learning_rate): # Output Layer weights w_output_old = self.W_output.copy() self.W_output = w_output_old - learning_rate*self.Output_Gradient # Hidden Layer weights w_hidden_old = self.W_hidden.copy() self.W_hidden = w_hidden_old - learning_rate * self.W_hidden # print('W_hidden new: ', self.W_hidden) # Input Layer weights w_input_old = self.W_input.copy() self.W_input = w_input_old - learning_rate * self.W_input # print('W_input new: ', self.W_input) def train_model(self): for _ in range(self.runs): for instance in range(len(self.X_train)): # forward pass self.y_pred = self.forward_pass(self.X_train[instance]) # Calculate loss self.Loss = self.calc_loss(self.y_pred, self.y_train[instance]) # print('Loss: ', self.Loss) # Calculate backpropagation self.backpropagation(self.y_train[instance], self.X_train[instance]) # Update weights self.weight_update(self.X_train[instance], self.learning_rate) # print(self.Losses) # plt.plot(range(len(self.Losses)), self.Losses) # plt.show() # Make predictions on training data to check if the model is basically able to fit the training data predictions = [] for i in np.linspace(0,1,1000): predictions.append(self.make_prediction(i)) plt.plot(np.linspace(0,1,1000), predictions) def make_prediction(self, X_new): return self.forward_pass(X_new) def calc_loss(self, y_pred, y_true): loss = (1/2)*(y_true-y_pred)**2 self.Losses.append(loss[0]) return (1/2)*(y_true-y_pred)**2 def accuracy(self): pass Neural_Net('linear', 0.0001, 10).train_model() </code></pre>

0 条评论
分类：Python问答

默认排序时间排序

1 个回答

匿名 1天前

　擅长：python、mysql、java

我已经解决了这个问题：首先，我混淆了一些我已经纠正过的维度。尽管如此，真正的问题是重量更新，我尝试用例如 <pre><code>self.W_hidden = w_hidden_old - learning_rate * self.W_hidden </code></pre> 但这是错误的，因为学习率必须乘以关于权重的部分误差，而不是权重矩阵本身。因此，正确的方法是： <pre><code>self.W_hidden = w_hidden_old - learning_rate * self.Partials_W_hidden </code></pre> 之后，我收到以下结果和损失曲线： <a href="https://i.stack.imgur.com/C6crj.jpg" rel="nofollow noreferrer"><img src="https://i.stack.imgur.com/C6crj.jpg" alt="enter image description here"/></a> <a href="https://i.stack.imgur.com/X7bsv.jpg" rel="nofollow noreferrer"><img src="https://i.stack.imgur.com/X7bsv.jpg" alt="enter image description here"/></a> 最后的代码是： <pre><code>class Neural_Net: """ """ def __init__(self, activation_function, learning_rate, runs): self.activation_function = activation_function self.Data = pd.read_csv(r"U:\19_035_Machine_Learning_Workshop\2_Workshopinhalt\Weitere\Neural Networks\AirQualityUCI\AirQualityUCI.csv", sep=';', decimal=b',').iloc[:, :-2].dropna() self.X_train = np.linspace(0,5,1000) self.y_train = np.sin(self.X_train) plt.plot(self.X_train, self.y_train) self.y_pred = None self.W_input = np.random.randn(1, 3) self.Partials_W_input = np.random.randn(1, 3) self.W_hidden = np.random.randn(3,3) self.Partials_W_hidden = np.random.randn(3,3) self.W_output = np.random.randn(3,1) self.Partials_W_output = np.random.randn(3,1) self.Activations = np.zeros((3,2)) self.Partials = np.zeros((3,2)) self.Output_Gradient = None self.Loss = 0 self.learning_rate = learning_rate self.runs = runs self.Losses = [] self.i = 0 def apply_activation_function(self, activation_vector): # print('activation: ', 1/(1+np.exp(-activation_vector))) return 1/(1+np.exp(-activation_vector)) def forward_pass(self, training_instance): for layer in range(len(self.Activations[0])): # For the first layer between X and the first hidden layer if layer == 0: pre_activation_first = self.W_input.T @ training_instance.reshape(1,1) # print('pre activation: ', pre_activation) # Apply the activation function self.Activations[:,0] = self.apply_activation_function(pre_activation_first).ravel() else: pre_activation_hidden = self.W_hidden.T @ self.Activations[:, layer-1] self.Activations[:, layer] = self.apply_activation_function(pre_activation_hidden) # print('Activations: ', self.Activations) output = self.W_output.T @ self.Activations[:, -1].reshape(-1,1) # print('output: ', output) return output def backpropagation(self, y_true, training_instance): if self.activation_function == 'sigmoid': pass if self.activation_function == 'linear': # Calculate the ouput gradient self.Output_Gradient = -(y_true-self.y_pred) # print('Output Gradient: ', self.Output_Gradient) # Calculate the partial gradients of the Error with respect to the pre acitvation values in the nodes self.Partials[:, 1] = ((self.Activations[:, 1]*(1-self.Activations[:, 1])).reshape(-1,1)*(self.W_output @ self.Output_Gradient)).ravel() self.Partials[:, 0] = self.Activations[:, 0]*(1-self.Activations[:, 0])*(self.W_hidden @ self.Partials[:, 1]) # print('Partials: ', self.Partials) # Calculate the Gradients with respect to the weights self.Partials_W_output = self.Output_Gradient * self.Activations[:, -1] # print('Partials_W_output: ', self.Partials_W_output) self.Partials_W_hidden = self.Partials[:, -1].reshape(3,1) * self.Activations[:, 0].reshape(1,3) # print('Partials_W_hidden: ',self.Partials_W_hidden) self.Partials_W_input = (self.Partials[:, 0].reshape(3,1) * training_instance.T).T # print('Partials_W_input: ', self.Partials_W_input) def weight_update(self, training_instance, learning_rate): # Output Layer weights w_output_old = self.W_output.copy() self.W_output = w_output_old - learning_rate*self.Partials_W_output.reshape(-1,1) # Hidden Layer weights w_hidden_old = self.W_hidden.copy() self.W_hidden = w_hidden_old - learning_rate * self.Partials_W_hidden # print('W_hidden new: ', self.W_hidden) # Input Layer weights w_input_old = self.W_input.copy() self.W_input = w_input_old - learning_rate * self.Partials_W_input # print('W_input new: ', self.W_input) def train_model(self): # print('Initially predicted Value: ', self.make_prediction(self.X_test[0])) # print('True value: ', self.y_test[0]) for _ in range(self.runs): for instance in range(len(self.X_train)): # forward pass self.y_pred = self.forward_pass(self.X_train[instance]) # Calculate loss self.Loss = self.calc_loss(self.y_pred, self.y_train[instance]) # print('Loss: ', self.Loss) # Calculate backpropagation self.backpropagation(self.y_train[instance], self.X_train[instance]) # Update weights self.weight_update(self.X_train[instance], self.learning_rate) # print(self.Losses) # plt.plot(range(len(self.Losses)), self.Losses) # plt.show() # Make predictions predictions = [] for i in np.linspace(0,5,1000): predictions.append(self.make_prediction(i)[0]) plt.plot(np.linspace(0,5,1000), predictions) def make_prediction(self, X_new): return self.forward_pass(X_new) def calc_loss(self, y_pred, y_true): loss = (1/2)*(y_true-y_pred)**2 self.Losses.append(loss[0]) return (1/2)*(y_true-y_pred)**2 def accuracy(self): pass Neural_Net('linear', 0.1, 1500).train_model() </code></pre> 为了优化代码，我们必须在隐藏层的输入中添加偏差

以向量为中心的神经网络Python实现

1 个回答

相关Python问题