Tensorflow：错误率并没有提高，即使有更多的迭代或改变学习

import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data from MyNet import weight_variable,bias_variable,conv_layer,pooling_layer,relu_layer,fully_connecd,softmax_layer def compute_accuracy(v_xs,v_ys): global prediction y_pre = sess.run(prediction,feed_dict={xs:v_xs}) correct_prediction = tf.equal(tf.argmax(y_pre,1),tf.argmax(v_ys,1)) acc = tf.reduce_mean(tf.cast(correct_prediction,tf.float32)) result = sess.run(acc,feed_dict={xs:v_xs,ys:v_ys}) return result xs = tf.placeholder(tf.float32,[None,784]) ys = tf.placeholder(tf.float32,[None,10]) x_img = tf.reshape(xs,[-1,28,28,1]) ########## LAYER DEFINITION START ########## # layer 1 conv1_w = weight_variable([5,5,1,6]) # [cols,rows,channels,n] conv1_b = bias_variable([6]) # [28*28*1]->[24*24*6] conv1 = conv_layer(x_img, conv1_w, name='conv1') + conv1_b # [24*24*6]->[12*12*6] pool1 = pooling_layer(conv1, name='pool1') relu1 = relu_layer(pool1,name='relu1') # layer 2 conv2_w = weight_variable([5,5,6,16]) # [cols,rows,channels,n] conv2_b = bias_variable([16]) # [12*12*6]->[8*8*16] conv2 = conv_layer(relu1, conv2_w, name='conv2') + conv2_b # [8*8*16]->[4*4*16] pool2 = pooling_layer(conv2, name='pool2') relu2 = relu_layer(pool2, name='relu2') # layer 3 (fc) fc_in_size = (relu2.get_shape()[1]*relu2.get_shape()[2]*relu2.get_shape()[3]).value fc3_w = weight_variable([fc_in_size,120]) fc3_b = bias_variable([120]) relu2_col = tf.reshape(relu2,[-1,fc_in_size]) fc3 = fully_connecd(relu2_col,fc3_w, name='fc3')+fc3_b relu3 = relu_layer(fc3, name='relu3') # layer 4 (fc) fc4_w = weight_variable([120,10]) fc4_b = bias_variable([10]) fc4 = fully_connecd(relu3,fc4_w, name='fc3')+fc4_b relu4 = relu_layer(fc4, name='relu4') # layer 5 (prediction) prediction = softmax_layer(relu4) # training solver cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys*tf.log(prediction), reduction_indices=[1])) train_step = tf.train.GradientDescentOptimizer(1e-4).minimize(cross_entropy) ########## LAYER DEFINITION END ########## # start training mnist = input_data.read_data_sets('MNIST_data', one_hot=True) sess = tf.Session() sess.run(tf.initialize_all_variables()) for step in range(500): batch_xs, batch_ys = mnist.train.next_batch(100) sess.run(train_step,feed_dict={xs:batch_xs, ys:batch_ys}) if step % 50 == 0: print( compute_accuracy(mnist.test.images, mnist.test.labels) ) sess.close()

Extracting MNIST_data/train-images-idx3-ubyte.gz Extracting MNIST_data/train-labels-idx1-ubyte.gz Extracting MNIST_data/t10k-images-idx3-ubyte.gz Extracting MNIST_data/t10k-labels-idx1-ubyte.gz 0.098 0.098 0.098 0.098 0.098 0.098 0.098 0.098 0.098 0.098 >>>

2条回答

网友

1楼 · 编辑于 2024-09-24 20:36:38

在你的权重初始化中替换tf.truncated_正常（形状，标准）具有：

def weight_variable(shape,stddev=0.1):
    init = tf.truncated_normal(shape,stddev = stddev)
    return tf.Variable(init)

请看我在评论中的解释。举例说明：

^{pr2}$

网友

2楼 · 编辑于 2024-09-24 20:36:38

你的代码看起来不错，但是你用一个简单的梯度下降优化器训练了一个5层的神经网络，迭代次数只有500次。这还不够。我建议你：

在训练期间打印交叉熵（如果它没有减少：你代码中可能有错误）
增加迭代次数（例如10000次）
改变你的优化器来加速学习（动量或亚当式的） here）

相关问题更多 >

编程相关推荐

热门问题

热门文章