Tensorflow LSTM闸门重量

class BasicLSTMCell(RNNCell): """Basic LSTM recurrent network cell. The implementation is based on: http://arxiv.org/abs/1409.2329. We add forget_bias (default: 1) to the biases of the forget gate in order to reduce the scale of forgetting in the beginning of the training. It does not allow cell clipping, a projection layer, and does not use peep-hole connections: it is the basic baseline. For advanced models, please use the full LSTMCell that follows. """ def __init__(self, num_units, forget_bias=1.0, input_size=None, state_is_tuple=True, activation=tanh): """Initialize the basic LSTM cell. Args: num_units: int, The number of units in the LSTM cell. forget_bias: float, The bias added to forget gates (see above). input_size: Deprecated and unused. state_is_tuple: If True, accepted and returned states are 2-tuples of the `c_state` and `m_state`. If False, they are concatenated along the column axis. The latter behavior will soon be deprecated. activation: Activation function of the inner states. """ if not state_is_tuple: logging.warn("%s: Using a concatenated state is slower and will soon be " "deprecated. Use state_is_tuple=True.", self) if input_size is not None: logging.warn("%s: The input_size parameter is deprecated.", self) self._num_units = num_units self._forget_bias = forget_bias self._state_is_tuple = state_is_tuple self._activation = activation @property def state_size(self): return (LSTMStateTuple(self._num_units, self._num_units) if self._state_is_tuple else 2 * self._num_units) @property def output_size(self): return self._num_units def __call__(self, inputs, state, scope=None): """Long short-term memory cell (LSTM).""" with vs.variable_scope(scope or type(self).__name__): # "BasicLSTMCell" # Parameters of gates are concatenated into one multiply for efficiency. if self._state_is_tuple: c, h = state else: c, h = array_ops.split(1, 2, state) concat = _linear([inputs, h], 4 * self._num_units, True) # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = array_ops.split(1, 4, concat) new_c = (c * sigmoid(f + self._forget_bias) + sigmoid(i) * self._activation(j)) new_h = self._activation(new_c) * sigmoid(o) if self._state_is_tuple: new_state = LSTMStateTuple(new_c, new_h) else: new_state = array_ops.concat(1, [new_c, new_h]) return new_h, new_state def _get_concat_variable(name, shape, dtype, num_shards): """Get a sharded variable concatenated into one tensor.""" sharded_variable = _get_sharded_variable(name, shape, dtype, num_shards) if len(sharded_variable) == 1: return sharded_variable[0] concat_name = name + "/concat" concat_full_name = vs.get_variable_scope().name + "/" + concat_name + ":0" for value in ops.get_collection(ops.GraphKeys.CONCATENATED_VARIABLES): if value.name == concat_full_name: return value concat_variable = array_ops.concat(0, sharded_variable, name=concat_name) ops.add_to_collection(ops.GraphKeys.CONCATENATED_VARIABLES, concat_variable) return concat_variable def _get_sharded_variable(name, shape, dtype, num_shards): """Get a list of sharded variables with the given dtype.""" if num_shards > shape[0]: raise ValueError("Too many shards: shape=%s, num_shards=%d" % (shape, num_shards)) unit_shard_size = int(math.floor(shape[0] / num_shards)) remaining_rows = shape[0] - unit_shard_size * num_shards shards = [] for i in range(num_shards): current_size = unit_shard_size if i < remaining_rows: current_size += 1 shards.append(vs.get_variable(name + "_%d" % i, [current_size] + shape[1:], dtype=dtype)) return shards

1条回答

网友
1楼 · 发布于 2024-10-02 10:22:50

首先，澄清一些混淆：i、j、f和o张量不是权重矩阵；它们是依赖于特定LSTM单元输入的中间计算步骤。LSTM单元的所有权值都存储在变量self.\u kernel and self.\u bias中，并且存储在一个常量self.\u forget_bias中。在
所以，为了回答你的问题的两种可能的解释，我将展示如何打印self.\u kernel和self.\u bias的值，以及每一步I，j，f和o张量的值。在
假设我们有下图：
import numpy as np import tensorflow as tf timesteps = 7 num_input = 4 num_units = 3 x_val = np.random.normal(size=(1, timesteps, num_input)) lstm = tf.nn.rnn_cell.BasicLSTMCell(num_units = num_units) X = tf.placeholder("float", [1, timesteps, num_input]) inputs = tf.unstack(X, timesteps, 1) outputs, state = tf.contrib.rnn.static_rnn(lstm, inputs, dtype=tf.float32)
如果我们知道任何张量的名称，我们就可以找到它的值。找到张量名称的一种方法是查看张量板。在
^{pr2}$
现在我们可以通过终端命令启动TensorBoard
tensorboard logdir=graph host=localhost
并发现产生i，j，f，o张量的运算名为“rnn/basic_lstm_cell/split”，而kernel和bias分别称为“rnn/basic_lstm_cell/kernel”和“rnn/basic_lstm_cell/bias”：
在tf.contrib.rnn公司.static\u rnn函数调用我们的基本lstm单元7次，每次调用一次。当Tensorflow被要求在同一名称下创建多个操作时，它会为它们添加后缀，如下所示： rnn/基本单元/分割， rnn/基本单元/分割单元1， ..., rnn/基本单元/分割单元6。这些是我们行动的名称。在
tensorflow中张量的名称由产生张量的操作的名称组成，后跟冒号，后跟产生此张量的操作输出的索引。Kernel和bias ops只有一个输出，因此张量名称为
kernel = graph.get_tensor_by_name("rnn/basic_lstm_cell/kernel:0") bias = graph.get_tensor_by_name("rnn/basic_lstm_cell/bias:0")
拆分操作产生四个输出：i、j、f和o，因此这些张量的名称为：
i_list = [] j_list = [] f_list = [] o_list = [] for suffix in ["", "_1", "_2", "_3", "_4", "_5", "_6"]: i_list.append(graph.get_tensor_by_name( "rnn/basic_lstm_cell/split{}:0".format(suffix) )) j_list.append(graph.get_tensor_by_name( "rnn/basic_lstm_cell/split{}:1".format(suffix) )) f_list.append(graph.get_tensor_by_name( "rnn/basic_lstm_cell/split{}:2".format(suffix) )) o_list.append(graph.get_tensor_by_name( "rnn/basic_lstm_cell/split{}:3".format(suffix) ))
现在我们可以找到所有张量的值：
with tf.Session(graph=graph) as sess: train_writer = tf.summary.FileWriter('./graph', sess.graph) sess.run(init) weights = sess.run([kernel, bias]) print("Weights:\n", weights) i_values, j_values, f_values, o_values = sess.run([i_list, j_list, f_list, o_list], feed_dict = {X:x_val}) print("i values:\n", i_values) print("j values:\n", j_values) print("f_values:\n", f_values) print("o_values:\n", o_values)
或者，我们可以通过查看图中所有张量的列表来找到张量名称，可以通过以下方式生成：
tensors_per_node = [node.values() for node in graph.get_operations()] tensor_names = [tensor.name for tensors in tensors_per_node for tensor in tensors] print(tensor_names)
或者，对于所有操作的简短列表：
print([node.name for node in graph.get_operations()])
第三种方法是读取source code并找出哪些名称被分配给哪些张量。在

相关问题更多 >

编程相关推荐

热门问题

热门文章