TensorFlow 2:在图形模式下无限期地进行模型训练

如何解决TensorFlow 2:在图形模式下无限期地进行模型训练

我正在以 Eager Execution 模式和图形模式训练以下模型。该模型在 Eager Execution 模式下训练良好,但在图形模式下无限期运行。我尝试以多种方式调试,但没有成功。

class CustomModelV2(tf.keras.Model):
    def __init__(self):
        super(CustomModelV2,self).__init__()
        self.encoder = Encoder(32)
        self.encoder.build(input_shape=(None,32))
        self.loss_tracker = tf.keras.metrics.Mean(name="loss")
        
    def call(self,inputs,training):
        return self.encoder(inputs,training)
        
    @property
    def metrics(self):
        # We list our `Metric` objects here so that `reset_states()` can be
        # called automatically at the start of each epoch
        # or at the start of `evaluate()`.
        # If you don't implement this property,you have to call
        # `reset_states()` yourself at the time of your choosing.
        return [self.loss_tracker]

    @tf.function
    def train_step(self,data):
        # Unpack the data. Its structure depends on your model and
        # on what you pass to `fit()`.
        x,y = data

        with tf.GradientTape() as tape:
            y_pred = self.call(x,training=True)  # Forward pass

            # Compute the loss value
            # (the loss function is configured in `compile()`)
            r_loss = tf.keras.losses.mean_squared_error(y,y_pred)
            loss = r_loss 

        # Compute gradients
        trainable_vars = self.trainable_variables
        gradients = tape.gradient(loss,trainable_vars)
        
        # Update weights
        self.optimizer.apply_gradients(zip(gradients,trainable_vars))
        
        # Update metrics (includes the metric that tracks the loss)
        self.loss_tracker.update_state(loss)
        
        # Return a dict mapping metric names to current value
        return {"loss": self.loss_tracker.result()}

class Encoder(tf.keras.Model):
    def __init__(self,input_size):
        super(Encoder,self).__init__(name = 'Encoder')
        self.input_layer   = DenseLayer(128,input_size,0.0,'float32')
        self.hidden_layer1 = DenseLayer(128,128,0.001,'float32')
        self.dropout_laye1 = tf.keras.layers.Dropout(0.2)
        self.hidden_layer2 = DenseLayer(64,'float32')      
        self.dropout_laye2 = tf.keras.layers.Dropout(0.2)
        self.hidden_layer3 = DenseLayer(64,64,'float32')
        self.dropout_laye3 = tf.keras.layers.Dropout(0.2)           
        self.output_layer  = LinearLayer(64,'float32')
        
    def call(self,input_data,training):
        fx = self.input_layer(input_data)        
        fx = self.hidden_layer1(fx)
        if training:
            fx = self.dropout_laye1(fx)     
        fx = self.hidden_layer2(fx)
        if training:
            fx = self.dropout_laye2(fx) 
        fx = self.hidden_layer3(fx)
        if training:
            fx = self.dropout_laye3(fx) 
        return self.output_layer(fx)

class LinearLayer(tf.keras.layers.Layer):

    def __init__(self,units,input_dim,weights_regularizer,bias_regularizer,d_type):
        super(LinearLayer,self).__init__()
        self.w = self.add_weight(name='w_linear',shape = (input_dim,units),initializer = tf.keras.initializers.RandomUniform(
                                    minval=-tf.cast(tf.math.sqrt(6/(input_dim+units)),dtype = d_type),maxval=tf.cast(tf.math.sqrt(6/(input_dim+units)),seed=16751),regularizer = tf.keras.regularizers.l1(weights_regularizer),trainable = True)
        self.b = self.add_weight(name='b_linear',shape = (units,),initializer = tf.zeros_initializer(),regularizer = tf.keras.regularizers.l1(bias_regularizer),trainable = True)

    def call(self,inputs):
        return tf.matmul(inputs,self.w) + self.b

class DenseLayer(tf.keras.layers.Layer):

    def __init__(self,d_type):
        super(DenseLayer,self).__init__()
        self.w = self.add_weight(name='w_dense',initializer = tf.keras.initializers.RandomUniform(
                                     minval=-tf.cast(tf.math.sqrt(6.0/(input_dim+units)),maxval=tf.cast(tf.math.sqrt(6.0/(input_dim+units)),trainable = True)
        self.b = self.add_weight(name='b_dense',inputs):
        x = tf.matmul(inputs,self.w) + self.b
        return tf.nn.elu(x)

以下是训练模型的脚本:

# Just use `fit` as usual
x = tf.data.Dataset.from_tensor_slices(np.random.random((5000,32)))

y_numpy = np.random.random((5000,1))
y_numpy[:,3:] = None
y = tf.data.Dataset.from_tensor_slices(y_numpy)

x_window = x.window(30,shift=10,stride=1)
flat_x = x_window.flat_map(lambda t: t)
flat_x_scaled = flat_x.map(lambda t: t * 2)

y_window = y.window(30,stride=1)
flat_y = y_window.flat_map(lambda t: t)
flat_y_scaled = flat_y.map(lambda t: t * 2)

z = tf.data.Dataset.zip((flat_x_scaled,flat_y_scaled)).batch(32).cache().shuffle(buffer_size=32).prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

# Stopping criteria if the training loss doesn't go down by 1e-3
early_stop_cb = tf.keras.callbacks.EarlyStopping(
    monitor='loss',min_delta = 1e-3,verbose = 1,mode='min',patience = 3,baseline=None,restore_best_weights=True)

# Construct and compile an instance of CustomModel
model = CustomModelV2()


  
model.compile(optimizer=tf.optimizers.Adagrad(0.01))

history = model.fit(z,epochs=3,callbacks=[early_stop_cb])

以下是图形模式下的输出:

WARNING:tensorflow:Output output_1 missing from loss dictionary. We assume this was done on purpose. The fit and evaluate APIs will not be expecting any data to be passed to output_1.
WARNING:tensorflow:From C:\Users\jain432\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\keras\optimizer_v2\adagrad.py:87: calling Constant.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Train on None steps
Epoch 1/3
 479916/Unknown - 667s 1ms/step - batch: 239957.5000 - size: 1.0000 - loss: 2.1716e-04

正如我所说,在急切执行模式下工作正常:

Epoch 1/3
468/468 [==============================] - 2s 3ms/step - loss: 0.4173
Epoch 2/3
468/468 [==============================] - 1s 3ms/step - loss: 0.3695
Epoch 3/3
468/468 [==============================] - 1s 3ms/step - loss: 0.3608

有人能帮我了解这里发生了什么以及我哪里做错了吗?

版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。

相关推荐


使用本地python环境可以成功执行 import pandas as pd import matplotlib.pyplot as plt # 设置字体 plt.rcParams['font.sans-serif'] = ['SimHei'] # 能正确显示负号 p
错误1:Request method ‘DELETE‘ not supported 错误还原:controller层有一个接口,访问该接口时报错:Request method ‘DELETE‘ not supported 错误原因:没有接收到前端传入的参数,修改为如下 参考 错误2:cannot r
错误1:启动docker镜像时报错:Error response from daemon: driver failed programming external connectivity on endpoint quirky_allen 解决方法:重启docker -> systemctl r
错误1:private field ‘xxx‘ is never assigned 按Altʾnter快捷键,选择第2项 参考:https://blog.csdn.net/shi_hong_fei_hei/article/details/88814070 错误2:启动时报错,不能找到主启动类 #
报错如下,通过源不能下载,最后警告pip需升级版本 Requirement already satisfied: pip in c:\users\ychen\appdata\local\programs\python\python310\lib\site-packages (22.0.4) Coll
错误1:maven打包报错 错误还原:使用maven打包项目时报错如下 [ERROR] Failed to execute goal org.apache.maven.plugins:maven-resources-plugin:3.2.0:resources (default-resources)
错误1:服务调用时报错 服务消费者模块assess通过openFeign调用服务提供者模块hires 如下为服务提供者模块hires的控制层接口 @RestController @RequestMapping("/hires") public class FeignControl
错误1:运行项目后报如下错误 解决方案 报错2:Failed to execute goal org.apache.maven.plugins:maven-compiler-plugin:3.8.1:compile (default-compile) on project sb 解决方案:在pom.
参考 错误原因 过滤器或拦截器在生效时,redisTemplate还没有注入 解决方案:在注入容器时就生效 @Component //项目运行时就注入Spring容器 public class RedisBean { @Resource private RedisTemplate<String
使用vite构建项目报错 C:\Users\ychen\work>npm init @vitejs/app @vitejs/create-app is deprecated, use npm init vite instead C:\Users\ychen\AppData\Local\npm-
参考1 参考2 解决方案 # 点击安装源 协议选择 http:// 路径填写 mirrors.aliyun.com/centos/8.3.2011/BaseOS/x86_64/os URL类型 软件库URL 其他路径 # 版本 7 mirrors.aliyun.com/centos/7/os/x86
报错1 [root@slave1 data_mocker]# kafka-console-consumer.sh --bootstrap-server slave1:9092 --topic topic_db [2023-12-19 18:31:12,770] WARN [Consumer clie
错误1 # 重写数据 hive (edu)> insert overwrite table dwd_trade_cart_add_inc > select data.id, > data.user_id, > data.course_id, > date_format(
错误1 hive (edu)> insert into huanhuan values(1,'haoge'); Query ID = root_20240110071417_fe1517ad-3607-41f4-bdcf-d00b98ac443e Total jobs = 1
报错1:执行到如下就不执行了,没有显示Successfully registered new MBean. [root@slave1 bin]# /usr/local/software/flume-1.9.0/bin/flume-ng agent -n a1 -c /usr/local/softwa
虚拟及没有启动任何服务器查看jps会显示jps,如果没有显示任何东西 [root@slave2 ~]# jps 9647 Jps 解决方案 # 进入/tmp查看 [root@slave1 dfs]# cd /tmp [root@slave1 tmp]# ll 总用量 48 drwxr-xr-x. 2
报错1 hive> show databases; OK Failed with exception java.io.IOException:java.lang.RuntimeException: Error in configuring object Time taken: 0.474 se
报错1 [root@localhost ~]# vim -bash: vim: 未找到命令 安装vim yum -y install vim* # 查看是否安装成功 [root@hadoop01 hadoop]# rpm -qa |grep vim vim-X11-7.4.629-8.el7_9.x
修改hadoop配置 vi /usr/local/software/hadoop-2.9.2/etc/hadoop/yarn-site.xml # 添加如下 <configuration> <property> <name>yarn.nodemanager.res