我创建了这个LSTM类:
import tensorflow as tf
import Constants
class LSTM():
def __init__(self,
inputShape,
outputShape,
numLayers=Constants.numLayers,
numHidden=Constants.numHidden,
learningRate=Constants.learningRate,
forgetBias=Constants.forgetBias):
self.inputs = tf.placeholder(tf.float32, [None] + inputShape)
self.labels = tf.placeholder(tf.float32, [None] + outputShape)
self.inputTensors = tf.unstack(self.inputs, axis=1)
self.weights = tf.Variable(tf.random_normal([numHidden] + outputShape))
self.bias = tf.Variable(tf.random_normal(outputShape))
layers = [tf.contrib.rnn.LSTMCell(numHidden, forget_bias=forgetBias, state_is_tuple=True)] * numLayers
self.cell = tf.contrib.rnn.MultiRNNCell(layers, state_is_tuple=True)
self.optimiser = tf.train.GradientDescentOptimizer(learningRate)
self.forgetBias = forgetBias
self.batchDict = None
self.outputs = None
self.finalStates = None
self.predictions = None
self.loss = None
self.accuracy = None
self.optimise = None
self.session = tf.Session()
self.__buildGraph()
def __buildGraph(self):
outputs, finalStates = tf.nn.static_rnn(self.cell, self.inputTensors, dtype=tf.float32)
predictions = tf.add(tf.matmul(outputs[-1], self.weights), self.bias)
self.predictions = tf.minimum(tf.maximum(predictions, 0), 1)
self.loss = tf.losses.mean_squared_error(predictions=self.predictions, labels=self.labels)
self.accuracy = tf.reduce_mean(1 - tf.abs(self.labels - self.predictions) / 1.0)
self.optimise = self.optimiser.minimize(self.loss)
self.session.run(tf.global_variables_initializer())
def __execute(self, operation):
return self.session.run(operation, self.batchDict)
def setBatch(self, inputs, labels):
self.batchDict = {self.inputs: inputs, self.labels: labels}
def batchLabels(self):
return self.__execute(self.labels)
def batchPredictions(self):
return self.__execute(self.predictions)
def batchLoss(self):
return self.__execute(self.loss)
def batchAccuracy(self):
return self.__execute(self.accuracy)
def processBatch(self):
self.__execute(self.optimise)
def kill(self):
self.session.close()
我是这样运行的:
^{pr2}$这一切都很正常。然而,我使用的是时间序列数据,它包含的金融股票跨越的时间戳范围远远大于我的LSTM展开的时间步数-Constants.sequenceLength
。正因为如此,要处理一个股票需要许多连续的批处理,所以LSTM的状态/内存需要在批之间传递。同样,在一个批处理完成一个ID的生命周期之后,下一个批将从数据集的初始时间戳传入一个新的ID,因此我想重置内存。在
然而,There are many questions asking something similar, and all of the answers are adequate似乎没有解决使用可变批大小的问题-批大小初始化为None
,然后在传入批时进行推断。我的批次通常是一个恒定的大小,但在某些情况下会发生变化,我不能改变这个。如果没有指定批大小,如何控制批处理之间的状态传递以及重置状态?在
目前没有回答
相关问题 更多 >
编程相关推荐