在自然语言处理中,填充一批序列是很常见的。 这是填充函数。在
def pad_sequences(sequences, pad_tok=0):
"""
Args:
sequences: a generator of list or tuple
pad_tok: the char to pad with
Returns:
a list of list where each sublist has same length
a list record original length of sequences
"""
sequence_padded, sequence_length = [], []
sequence_padded = tf.keras.preprocessing.sequence.pad_sequences(sequences,
padding='post', value=pad_tok)
max_sen_len = 0
for seq in sequences:
seq = list(seq)
if len(seq) > max_sen_len:
max_sen_len = len(seq)
return sequence_padded, max_sen_len
输入是一批长度可变的句子。 每个句子都是一个id列表,每个id代表词汇表中的一个单词。在
这是feeddict
^{pr2}$我更喜欢在每个批中构建动态最大句子长度的嵌入。在
这是我的嵌入函数
def add_sentence_embeddings_op(self, word_ids, pos1_ids, pos2_ids, maxlen):
"""Defines sentence_embeddings
If self.config.embeddings is not None and is a np array initialized
with pre-trained word vectors, the word embeddings is just a look-up
and we don't train the vectors. Otherwise, a random matrix with
the correct shape is initialized.
"""
with tf.variable_scope("words", reuse=tf.AUTO_REUSE):
if self.config.embeddings is None:
self.logger.info("WARNING: randomly initializing word vectors")
_word_embeddings = tf.get_variable(
name="_word_embeddings",
dtype=tf.float32,
shape=[self.config.nwords, self.config.dim_word])
else:
_word_embeddings = tf.Variable(
self.config.embeddings,
name="_word_embeddings",
dtype=tf.float32,
trainable=self.config.train_word_embeddings)
word_embeddings = tf.nn.embedding_lookup(_word_embeddings, \
word_ids, name="word_embeddings")
with tf.variable_scope("pos1", reuse=tf.AUTO_REUSE):
self.logger.info("randomly initializing pos1 vectors")
_pos1_embeddings = tf.get_variable(
name="_pos1_embeddings",
dtype=tf.float32,
shape=[self.config.nposition, self.config.dim_pos])
pos1_embeddings = tf.nn.embedding_lookup(_pos1_embeddings, \
pos1_ids, name="pos1_embeddings")
with tf.variable_scope("pos2", reuse=tf.AUTO_REUSE):
self.logger.info("randomly initializing pos2 vectors")
_pos2_embeddings = tf.get_variable(
name="_pos2_embeddings",
dtype=tf.float32,
shape=[self.config.nposition, self.config.dim_pos])
pos2_embeddings = tf.nn.embedding_lookup(_pos2_embeddings, \
pos2_ids, name="pos2_embeddings")
word_emb_shape = word_embeddings.get_shape().as_list()
pos1_emb_shape = pos1_embeddings.get_shape().as_list()
pos2_emb_shape = pos2_embeddings.get_shape().as_list()
assert word_emb_shape[0] == pos1_emb_shape[0] == pos2_emb_shape[0]
assert word_emb_shape[1] == pos1_emb_shape[1] == pos2_emb_shape[1]
assert word_emb_shape[2] == self.config.dim_word
assert pos1_emb_shape[2] == self.config.dim_pos
assert pos2_emb_shape[2] == self.config.dim_pos
sentence_embeddings = tf.concat([word_embeddings, \
pos1_embeddings, pos2_embeddings], 2)
sen_emb_shape = sentence_embeddings.get_shape().as_list()
assert sen_emb_shape[2] == self.config.dim
# (batch_size, max length of sentences in batch, vector representation dimension, 1)
sentence_embeddings = tf.reshape(sentence_embeddings, [-1, maxlen, self.config.dim, 1])
return sentence_embeddings
不幸的是变形步骤不起作用。在
sentence_embeddings = tf.reshape(sentence_embeddings, [-1, maxlen, self.config.dim, 1])
编译时的错误。在
Traceback (most recent call last):
File "train.py", line 26, in <module>
main()
File "train.py", line 12, in main
model.build()
File "/Users/randypen/Code/test_pcnn/model/pcnn_model.py", line 295, in build
self.add_concat_op()
File "/Users/randypen/Code/test_pcnn/model/pcnn_model.py", line 243, in add_concat_op
self.pos1_ids_left, self.pos2_ids_left, self.maxlen_left)
File "/Users/randypen/Code/test_pcnn/model/pcnn_model.py", line 202, in add_sentence_embeddings_op
sentence_embeddings = tf.reshape(sentence_embeddings, [-1, maxlen, self.config.dim, 1])
File "/Users/randypen/.virtualenvs/DataEnv/lib/python3.6/site-packages/tensorflow/python/ops/gen_array_ops.py", line 6113, in reshape
"Reshape", tensor=tensor, shape=shape, name=name)
File "/Users/randypen/.virtualenvs/DataEnv/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 528, in _apply_op_helper
(input_name, err))
ValueError: Tried to convert 'shape' to a tensor and failed. Error: Shapes must be equal rank, but are 1 and 0
From merging shape 1 with other shapes. for 'Reshape/packed' (op: 'Pack') with input shapes: [], [1], [], [].
我已经看到了其他一些相关的回购,其中一些将max length定义为常量。 有没有可能在张量流中用动态最大长度重塑一批张量?在
目前没有回答
相关问题 更多 >
编程相关推荐