为什么这种反向传播实现不能正确训练权重？

def backprop(train_set, wts, bias, eta): learning_coef = eta / len(train_set[0]) for next_set in train_set: # These record the sum of the cost gradients in the batch sum_del_w = [np.zeros(w.shape) for w in wts] sum_del_b = [np.zeros(b.shape) for b in bias] for test, sol in next_set: del_w = [np.zeros(wt.shape) for wt in wts] del_b = [np.zeros(bt.shape) for bt in bias] # These two helper functions take training set data and make them useful next_input = conv_to_col(test) outp = create_tgt_vec(sol) # Feedforward step pre_sig = []; post_sig = [] for w, b in zip(wts, bias): next_input = np.dot(w, next_input) + b pre_sig.append(next_input) post_sig.append(sigmoid(next_input)) next_input = sigmoid(next_input) # Backpropagation gradient delta = cost_deriv(post_sig[-1], outp) * sigmoid_deriv(pre_sig[-1]) del_b[-1] = delta del_w[-1] = np.dot(delta, post_sig[-2].transpose()) for i in range(2, len(wts)): pre_sig_vec = pre_sig[-i] sig_deriv = sigmoid_deriv(pre_sig_vec) delta = np.dot(wts[-i+1].transpose(), delta) * sig_deriv del_b[-i] = delta del_w[-i] = np.dot(delta, post_sig[-i-1].transpose()) sum_del_w = [dw + sdw for dw, sdw in zip(del_w, sum_del_w)] sum_del_b = [db + sdb for db, sdb in zip(del_b, sum_del_b)] # Modify weights based on current batch wts = [wt - learning_coef * dw for wt, dw in zip(wts, sum_del_w)] bias = [bt - learning_coef * db for bt, db in zip(bias, sum_del_b)] return wts, bias

1条回答

网友

1楼 · 发布于 2024-06-25 23:12:37

我想你的问题是初始权重的选择和权重初始化算法的选择。Jeff Heaton作者Encog声称它通常比其他初始化方法执行得差。Here是权重初始化算法性能的另一个结果。根据我自己的经验，建议你用不同的符号值初始化你的权重。即使在我所有的输出都是正的情况下，不同符号的权重比相同符号的权重表现得更好。在

相关问题更多 >

编程相关推荐

热门问题

热门文章