对于多代理RL问题,我同时训练多个模型。目前我的结构如下所示:
class testmodel(tf.keras.Model):
def __init__(self):
super().__init__()
self.l1 = tf.keras.layers.Dense(20)
self.l2 = tf.keras.layers.Dense(20)
self.l3 = tf.keras.layers.Dense(2, activation = "softmax")
def call(self, x):
y = self.l1(x)
y = self.l2(y)
y = self.l3(y)
return y
class MARL():
def __init__(self, nAgents, input_shape):
self.nAgents = nAgents
self.list_of_actors = list()
self.list_of_optimizers = list()
for agent in range(nAgents):
self.list_of_actors.append(testmodel())
self.list_of_optimizers.append(tf.keras.optimizers.Adam(learning_rate = 0.001))
self.list_of_actors[agent].build(input_shape = input_shape)
@tf.function
def learn_for_loop(self):
x = np.random.random_sample((20,)).tolist()
x = tf.expand_dims(x, 0)
for agent in range(self.nAgents):
with tf.GradientTape() as g:
y_hat = self.list_of_actors[agent](x)
loss = y_hat - tf.constant([0.,0])
grads = g.gradient(loss, self.list_of_actors[agent].trainable_variables)
self.list_of_optimizers[agent].apply_gradients(zip(grads, self.list_of_actors[agent].trainable_variables))
@tf.function
def learn_tf_loop(self):
def body(i,x):
with tf.GradientTape() as g:
y_hat = self.list_of_actors[i](x) ### throws error a)
loss = y_hat - tf.constant([0.,0])
grads = g.gradient(loss, self.list_of_actors[i].trainable_variables)
self.list_of_optimizers[agent].apply_gradients(zip(grads, self.list_of_actors[agent].trainable_variables)) ### throws error b)
return (tf.add(i,1),x)
def condition(i,x):
return tf.less(i,self.nAgents)
i = tf.constant(0)
x = np.random.random_sample((20,)).tolist()
x = tf.expand_dims(x, 0)
r = tf.while_loop(condition, body, (i,x))
如果现在比较CPU上的运行时,会得到以下结果:
test_instance = MARL(10, (1,20))
tic = time.time()
for _ in range(100):
test_instance.learn_for_loop()
print(time.time() - tic)
# without @tf.function: ~ 7s
# with @tf.function: ~ 3.5s # cut runtime by half, GREAT
tic = time.time()
for _ in range(100):
test_instance.learn_tf_loop()
print(time.time() - tic)
# without @tf.function: ~ 7s
# with @tf.function: super problematic
在我的理解中,更紧张的“learn_tf_loop”应该比“learn_for_loop”更快,尤其是对于更大的型号和使用GPU时。我希望这些效果会变得明显,尤其是使用tf.function decorator。不幸的是,这会导致错误,例如a)“TypeError:列表索引必须是整数或切片,而不是张量”和b)“ValueError:tf.function-decorated函数试图在非第一次调用时创建变量。”这些错误分别在a)Select an item from a list of object of any type when using tensorflow 2.x和b)https://github.com/tensorflow/tensorflow/issues/27120处处理,但不幸的是,我无法使这些解决方案工作,因为我a)需要在调用模型时为模型提供输入,b)不想为我的n个代理创建n个单独的函数。如何使“learn\u tf\u loop”与tf.function decorator一起工作?我认为这归结为以下两个问题:
目前没有回答
相关问题 更多 >
编程相关推荐