我如何修复此错误。。 我不知道为什么会发生这个错误。。这个错误使我耽搁了两天
叹息。。在
ValueError: The shape for my_cost_value/scan/while/Merge_1:0 is not an invariant for the loop. It enters the loop with shape (), but has shape after one iteration. Provide shape invariants using either the
shape_invariants
argument of tf.while_loop or set_shape() on the loop variables
下面是我的代码
################# constant
bach_size = 100
layers = 1
directions = 1
hiddensize = 100
self.hiddensize = hiddensize
self.def_output_length = 2
self.enc = encoder
input_length = 484
teacher_forcing = False
with tf.variable_scope('self_outputinit') as scope:
self.output_ini = tf.Variable(tf.zeros([1, self.hiddensize]),dtype='float32') #my output segment
################# training val
with tf.variable_scope('decoder_var') as scope:
self.W1 = tf.Variable(tf.random_normal([self.hiddensize*2, input_length]), dtype='float32')
self.W2 = tf.Variable(tf.random_normal([self.hiddensize*2, self.hiddensize]), dtype='float32')
self.b1 = tf.Variable(tf.random_normal([1, input_length]), dtype='float32')
self.b2 = tf.Variable(tf.random_normal([1, self.hiddensize]), dtype='float32')
with tf.variable_scope("mygru") as scope:
self.gru = tf.nn.rnn_cell.GRUCell(num_units = 100)
################### inputs
with tf.variable_scope('encoder_output') as scope:# shape = (100,484,100)
self.input_v = encoder.getoutput()
with tf.variable_scope('realsent') as scope: # shape = (100,seq,100)
self.realsent = tf.placeholder(dtype='float32')
with tf.variable_scope('decoder_res_seq') as scope: # shape = (100,seq)
self.output_length = tf.placeholder(dtype='float32')
with tf.variable_scope('encoder_state') as scope: #shape = (100,100)
self.grustate = encoder.getstate()
self.grustate = tf.reshape(self.grustate,shape=(100,1,100))
################## input concat ====>> (100,4) .. 4 = (484,100) + (seq,100) + seq + 100
#with tf.variable_scope('concat_cost_input') as scope:
# self.concat_input = tf.stack([self.input_v,self.realsent,self.output_length,self.grustate],axis=1)
################## cost cal
with tf.variable_scope('my_cost_value') as scope:
#self.cost = tf.Variable(0.0,dtype="float32")
newar = np.array([],dtype="float32")
for i in range(100):
newar = np.append(newar,i)
self.lastcost = tf.scan(self._bach_calcost, newar.astype("float32"))
################## training
with tf.variable_scope('adamtrain') as scope:
self.adamtrain = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(self.lastcost)
with tf.variable_scope('gradtrain') as scope:
self.gradtrain = tf.train.GradientDescentOptimizer(learning_rate=10).minimize(self.lastcost)
with tf.variable_scope('gradtrain') as scope:
self.gradtrain2 = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(self.lastcost)
self.input_seg_v = tf.Variable(tf.zeros(shape=(484,100)),dtype="float32")
################## for scan func in _bach_calcost
def _nt_atten(self, grustate_output ,ignore): #not teacher forcing
with tf.variable_scope("split") as scope:
pre_new_grustate, pre_new_output = tf.split(grustate_output, num_or_size_splits=2, axis=1)
_ = ignore
with tf.variable_scope("grucall") as scope:
new_grustate, new_output =self.gru.call(pre_new_grustate, pre_new_output )
#word = myprepdata.findwordsfromvec(output.eval())
#print(word)
#np_output = myprepdata.findvecfromwords(word)
#output = tf.Variable(np_output, dtype="float32")
with tf.variable_scope("atten") as scope:
attn = tf.concat([new_grustate, new_output],1)#concat and linear
attn = tf.add(tf.matmul(attn , self.W1),self.b1) # needs W initialize
attn = tf.nn.softmax(tf.nn.relu(attn))
with tf.variable_scope("bmm") as scope: #make 1 wordlike 1 484 * 484 100 == 1 100
bmm = tf.matmul( attn,tf.squeeze(self.input_seg_v))
with tf.variable_scope("comb") as scope:
attn_com = tf.concat([bmm, self.output],1 ) # 1 100 + 1 100 = 1 200
attn_com = tf.add(tf.matmul(attn_com,self.W2),self.b2) # 1 200 * 200 100 = 1 100
self.output = tf.nn.relu(attn_com)
with tf.variable_scope("concat") as scope:
grustate_output = tf.concat([new_grustate, new_output],1) # 1 100 + 1 100 = 1 200
grustate_output = tf.reshape(grustate_output, [1,200], name='grustate_output')
return grustate_output
################# for scan func in __init__
def _bach_calcost(self,coste, conc_input ):
with tf.variable_scope('decoder_output_segment') as scope:
self.output = self.output_ini
newarv = tf.cast(conc_input,tf.int32)
with tf.variable_scope('split_input') as scope:
grustate = tf.gather(self.grustate, newarv)
self.input_seg_v = tf.gather(self.input_v, newarv)
output_length = tf.gather(self.output_length,newarv)
realsent = tf.gather(self.realsent, newarv)
# (input_v, realsent, output_length, grustate) = conc_input
with tf.variable_scope('encoder_concat') as scope:
encoder_output_state = tf.concat([self.output, grustate],1) # 1*100 + 1*100 = 1*200
with tf.variable_scope('makesent') as scope:
self.last_status = tf.scan(self._nt_atten, output_length , initializer = encoder_output_state) #my full output
with tf.variable_scope('sent_postprocess') as scope:
pre_sentence = tf.squeeze(self.last_status) # 1 3 1 200 -> 3 200
_ , self.sentence = tf.split(pre_sentence, num_or_size_splits=2, axis = 1) # 3 200 -> ignore(3,100) acc(3,100)
with tf.variable_scope('calcost') as scope:
self.precost = (realsent - self.sentence) * (realsent - self.sentence)
newcost = tf.reduce_mean(self.precost)
return newcost
################# print bachcost
def calcost(self,sess,realsente,output_length,line): #reduce demention
return sess.run([self.lastcost],feed_dict = {self.realsent: realsente, self.output_length:output_length,
self.enc.input:line})
################# for training
def adamtraining(self,sess,realsente,output_length,line):
#summary = tf.summary.merge_all()
writer = tf.summary.FileWriter("./testgru")
writer.add_graph(sess.graph)
#s = sess.run(summary,feed_dict = {self.realsent: realsente, self.output_length:output_length,
# self.enc.input:line})
#writer.add_summary(s,0)
return sess.run([self.adamtrain],feed_dict = {self.realsent: realsente, self.output_length:output_length,
self.enc.input:line})
def gradtraining(self,sess,realsente,output_length,line):
#summary = tf.summary.merge_all()
writer = tf.summary.FileWriter("./testgru")
writer.add_graph(sess.graph)
#s = sess.run(summary,feed_dict = {self.realsent: realsente, self.output_length:output_length,
# self.enc.input:line})
#writer.add_summary(s,0)
return sess.run([self.gradtrain],feed_dict = {self.realsent: realsente, self.output_length:output_length,
self.enc.input:line})
def gradtraining2(self,sess,realsente,output_length,line):
#summary = tf.summary.merge_all()
writer = tf.summary.FileWriter("./testgru")
writer.add_graph(sess.graph)
#s = sess.run(summary,feed_dict = {self.realsent: realsente, self.output_length:output_length,
# self.enc.input:line})
#writer.add_summary(s,0)
return sess.run([self.gradtrain2],feed_dict = {self.realsent: realsente, self.output_length:output_length,
self.enc.input:line})
def hidden_init(self,sess):
return sess.run(tf.global_variables_initializer())
我在代码中没有看到while循环来指出确切的问题,但这里是核心问题。在
当使用
tf.while_loop
时,提供一组张量作为您的loop_vars
。在执行while循环期间,这些张量的形状“不能有太大的变化”。通常情况下,形状根本不会改变。如果是这样的话,这很可能是一个错误的迹象-例如,当从body
输出张量时,您意外地重新排列了它们的顺序。在高级用户可以显式地指定
shape_invariants
中的张量。关于https://www.tensorflow.org/api_docs/python/tf/while_loop中的形状不变量有一个相当详细的讨论。在相关问题 更多 >
编程相关推荐