同时训练多个tensorflow模型

如何解决同时训练多个tensorflow模型

对于多智能体 RL 问题，我同时训练多个模型。目前我的结构如下：

class testmodel(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.l1 = tf.keras.layers.Dense(20)
        self.l2 = tf.keras.layers.Dense(20)
        self.l3 = tf.keras.layers.Dense(2,activation = "softmax")

    def call(self,x):
        y = self.l1(x)
        y = self.l2(y)
        y = self.l3(y)
        return y

class MARL():
    def __init__(self,nAgents,input_shape):
        self.nAgents = nAgents
        self.list_of_actors = list()
        self.list_of_optimizers = list()
        for agent in range(nAgents):
            self.list_of_actors.append(testmodel())
            self.list_of_optimizers.append(tf.keras.optimizers.Adam(learning_rate = 0.001))
            self.list_of_actors[agent].build(input_shape = input_shape)

    @tf.function
    def learn_for_loop(self):
        x = np.random.random_sample((20,)).tolist()
        x = tf.expand_dims(x,0)

        for agent in range(self.nAgents):
            with tf.GradientTape() as g:

                y_hat = self.list_of_actors[agent](x)
                loss = y_hat - tf.constant([0.,0])

            grads = g.gradient(loss,self.list_of_actors[agent].trainable_variables)
            self.list_of_optimizers[agent].apply_gradients(zip(grads,self.list_of_actors[agent].trainable_variables))

    @tf.function
    def learn_tf_loop(self):

        def body(i,x):
            with tf.GradientTape() as g:

                y_hat = self.list_of_actors[i](x) ### throws error a)
                loss = y_hat - tf.constant([0.,self.list_of_actors[i].trainable_variables)
            self.list_of_optimizers[agent].apply_gradients(zip(grads,self.list_of_actors[agent].trainable_variables)) ### throws error b)

            return (tf.add(i,1),x)

        def condition(i,x):
            return tf.less(i,self.nAgents)

        i = tf.constant(0)
        x = np.random.random_sample((20,0)

        r = tf.while_loop(condition,body,(i,x))

如果我现在比较 cpu 上的运行时间，我会得到以下结果：

test_instance = MARL(10,(1,20))

tic = time.time()   
for _ in range(100):
    test_instance.learn_for_loop()
print(time.time() - tic)
# without @tf.function: ~ 7s
# with @tf.function: ~ 3.5s # cut runtime by half,GREAT

tic = time.time()   
for _ in range(100):
    test_instance.learn_tf_loop()
print(time.time() - tic)
# without @tf.function: ~ 7s
# with @tf.function: super problematic

根据我的理解，张量流越多的“learn_tf_loop”应该比“learn_for_loop”快，尤其是对于较大的模型和使用 GPU 时。我希望这些效果会变得明显，尤其是使用 tf.function 装饰器。不幸的是，这会导致错误，例如 a)“类型错误：列表索引必须是整数或切片，而不是张量”和 b)“ValueError：tf.function-decorated 函数试图在非第一次调用时创建变量。”这些错误已分别在 a) Select an item from a list of object of any type when using tensorflow 2.x 和 b) https://github.com/tensorflow/tensorflow/issues/27120 处处理，但不幸的是，我无法使这些解决方案起作用，因为我 a) 在调用它们时需要向我的模型提供输入和b) 不想为我的 n 个代理创建 n 个单独的函数。如何使“learn_tf_loop”与 tf.function 装饰器一起工作？我认为归结为以下两个问题：

tl;博士：

如何以一种方式存储模型，使它们仍然可以在图形模式下使用张量访问？
如何在图形模式下同时训练多个模型？