用LSTM-ptb模型张量流examp预测下一个词

lstm = rnn_cell.BasicLSTMCell(lstm_size) # Initial state of the LSTM memory. state = tf.zeros([batch_size, lstm.state_size]) loss = 0.0 for current_batch_of_words in words_in_dataset: # The value of state is updated after processing each batch of words. output, state = lstm(current_batch_of_words, state) # The LSTM output can be used to make next word predictions logits = tf.matmul(output, softmax_w) + softmax_b probabilities = tf.nn.softmax(logits) loss += loss_function(probabilities, target_words)

class PTBModel(object): """The PTB model.""" def __init__(self, is_training, config): # General definition of LSTM (unrolled) # identical to tensorflow example ... # omitted for brevity ... # computing the logits (also from example code) logits = tf.nn.xw_plus_b(output, tf.get_variable("softmax_w", [size, vocab_size]), tf.get_variable("softmax_b", [vocab_size])) loss = seq2seq.sequence_loss_by_example([logits], [tf.reshape(self._targets, [-1])], [tf.ones([batch_size * num_steps])], vocab_size) self._cost = cost = tf.reduce_sum(loss) / batch_size self._final_state = states[-1] # my addition: storing the probabilities and logits self.probabilities = tf.nn.softmax(logits) self.logits = logits # more model definition ...

def run_epoch(session, m, data, eval_op, verbose=True): """Runs the model on the given data.""" # first part of function unchanged from example for step, (x, y) in enumerate(reader.ptb_iterator(data, m.batch_size, m.num_steps)): # evaluate proobability and logit tensors too: cost, state, probs, logits, _ = session.run([m.cost, m.final_state, m.probabilities, m.logits, eval_op], {m.input_data: x, m.targets: y, m.initial_state: state}) costs += cost iters += m.num_steps if verbose and step % (epoch_size // 10) == 10: print("%.3f perplexity: %.3f speed: %.0f wps, n_iters: %s" % (step * 1.0 / epoch_size, np.exp(costs / iters), iters * m.batch_size / (time.time() - start_time), iters)) chosen_word = np.argmax(probs, 1) print("Probabilities shape: %s, Logits shape: %s" % (probs.shape, logits.shape) ) print(chosen_word) print("Batch size: %s, Num steps: %s" % (m.batch_size, m.num_steps)) return np.exp(costs / iters)

2条回答

网友

1楼 · 编辑于 2024-05-19 09:48:51

我也在实现seq2seq模型。

所以让我试着用我的理解来解释：

LSTM模型的输出是一个长度为步数的二维张量列表，其大小为[批量大小，大小]。

代码行：

output = tf.reshape(tf.concat(1, outputs), [-1, size])

将产生一个新的输出，这是一个大小为[批量大小x步数，大小]的二维张量。

对于您的情况，batch_size=1，num_steps=20-->；输出形状为[20，size]。

代码行：

logits = tf.nn.xw_plus_b(output, tf.get_variable("softmax_w", [size, vocab_size]), tf.get_variable("softmax_b", [vocab_size]))

<；=>；输出[批量大小x步数，大小]x软最大值w[大小，声音大小]将输出大小的登录名[批量大小x步数，声音大小]。
对于您的案例，登录名的大小为[20，声卡大小] -->；probs张量的大小与logits的大小相同，[20，vocab_size]。

代码行：

chosen_word = np.argmax(probs, 1)

将输出所选单词的大小张量[20，1]，每个值是当前单词的下一个预测单词索引。

代码行：

loss = seq2seq.sequence_loss_by_example([logits], [tf.reshape(self._targets, [-1])], [tf.ones([batch_size * num_steps])])

是计算序列的批处理大小的softmax交叉熵损失。

网友

2楼 · 编辑于 2024-05-19 09:48:51

output张量包含每个时间步的LSTM单元输出的具体表示（请参见其定义here）。因此，您可以通过chosen_word[-1]（或者chosen_word[sequence_length - 1]，如果序列被填充以匹配展开的LSTM）来找到下一个单词的预测。

^{}操作以不同的名称记录在公共API中。出于技术原因，它调用了一个生成的包装函数，该函数不会出现在GitHub存储库中。OP的实现是在C++，^ {A3}中实现的。

相关问题更多 >

编程相关推荐

热门问题

热门文章