如何解决TF2.5: tensorflow:Gradients 在最小化损失时不存在变量 渐变胶带,keras
我正在努力建立一个深度生存模型,并从一个简单的比例风险模型开始,混合使用 keras 和非 keras tensorflow。我的问题是 GradientTape
似乎没有跟踪我使用 tf.Variable
定义的参数,只有 Keras 中的参数。
这是我的模型:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input,Conv1D
class phmodel(tf.Module):
def __init__(self,lr = .0001,timesteps = 100,**kwargs):
super().__init__(**kwargs)
self.beta = tf.Variable(0.0) # initialize to 0 = log(1)
self.timesteps = timesteps
self.lr = lr
self.makemodel()
self.opt = tf.keras.optimizers.SGD(learning_rate = lr)
def makemodel(self):
inp = Input((self.timesteps,1,))
lay = Conv1D(filters = 1,kernel_size = 1,use_bias = False)(inp) # this is equivalent to XB -- it applies the same weight to each of the 100 timesteps
elay = tf.math.exp(lay)
beta = tf.math.exp(self.beta) #re-exponentiate: it's trained in logs. This line is to make the code less confusing
baseline_hazard = tf.expand_dims(1-tf.math.exp(-beta*tf.range(0,self.timesteps,delta=1,dtype='float32')),-1)
cumulative_hazard = tf.math.cumsum(baseline_hazard*elay,axis = 1)
out = 1-tf.math.exp(-cumulative_hazard)
self.model = Model(inp,out)
def __call__(self,x):
return self.model(x)
def train_step(self,x,y,w = None,verbose = False):
with tf.GradientTape() as g:
yhat = self.model(x)
loss = tf.keras.losses.BinaryCrossentropy()(y,yhat,sample_weight = w)
grad = g.gradient(loss,self.model.trainable_variables)
self.opt.apply_gradients(zip(grad,self.model.trainable_variables))
print(loss) if verbose is True else None
看起来像这样:
model = phmodel(100)
model.model.summary()
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_4 (InputLayer) [(None,100,1)] 0
_________________________________________________________________
conv1d_3 (Conv1D) (None,1) 1
_________________________________________________________________
tf.math.exp_5 (TFOpLambda) (None,1) 0
_________________________________________________________________
tf.math.multiply_2 (TFOpLamb (None,1) 0
_________________________________________________________________
tf.math.cumsum_2 (TFOpLambda (None,1) 0
_________________________________________________________________
tf.math.negative_2 (TFOpLamb (None,1) 0
_________________________________________________________________
tf.math.exp_6 (TFOpLambda) (None,1) 0
_________________________________________________________________
tf.math.subtract_2 (TFOpLamb (None,1) 0
=================================================================
Total params: 1
Trainable params: 1
Non-trainable params: 0
_________________________________________________________________
定义一些虚拟数据:
xtr = np.random.normal(size = 100).reshape(1,1)
ytr = np.concatenate([np.zeros(50),np.ones(50)]).reshape(1,1)
前传:
yhat = model(xtr)
看看可训练的变量:
model.trainable_variables
(<tf.Variable 'Variable:0' shape=() dtype=float32,numpy=0.0>,<tf.Variable 'conv1d_4/kernel:0' shape=(1,1) dtype=float32,numpy=array([[[1.2811402]]],dtype=float32)>)
损失似乎是有序的:
loss = tf.keras.losses.BinaryCrossentropy()(ytr,sample_weight = None)
loss
<tf.Tensor: shape=(),dtype=float32,numpy=6.711515>
计算梯度:
with tf.GradientTape() as g:
yhat = model(xtr)
loss = tf.keras.losses.BinaryCrossentropy()(ytr,sample_weight = None)
grad = g.gradient(loss,model.trainable_variables)
grad
(None,<tf.Tensor: shape=(1,1),numpy=array([[[0.51390326]]],dtype=float32)>)
问题来了。 Variable:0
没有渐变。是什么赋予了?当我去应用渐变时,我收到警告、问题和NaN
s
model.opt.apply_gradients(zip(grad,model.trainable_variables))
WARNING:tensorflow:Gradients do not exist for variables ['Variable:0'] when minimizing the loss.
如果我运行训练循环超过几个步骤,则卷积层上的梯度开始为 NaN
。
(<tf.Variable 'Variable:0' shape=() dtype=float32,numpy=-6.2>,<tf.Variable 'conv1d_12/kernel:0' shape=(1,numpy=array([[[nan]]],dtype=float32)>)
是什么?
编辑:我通过从 makemodel
方法中取出一堆东西并将其放入 __call__
中设法解决了这个问题。我不知道这为什么有效。也许它会帮助一些未来的谷歌员工?我很乐意给任何能解释为什么这有效的人打 15 分,因为我没有任何线索。
class phmodel(tf.Module):
def __init__(self,**kwargs):
super().__init__(**kwargs)
self.beta = tf.Variable(-10.2) # initialize to -3.2 = log(.04)
self.timesteps = timesteps
self.timerange = tf.range(0,dtype='float32')
self.lr = lr
self.makemodel()
self.opt = tf.keras.optimizers.SGD(learning_rate = lr)
def makemodel(self):
inp = Input((self.timesteps,use_bias = False)(inp) # this is equivalent to XB -- it applies the same weight to each of the 100 timesteps
self.model = Model(inp,lay)
def __call__(self,x):
mout = self.model(x)
elay = tf.math.exp(mout)
beta = tf.math.exp(self.beta) #re-exponentiate: it's trained in logs. This line is to make the code less confusing
baseline_hazard = tf.expand_dims(1-tf.math.exp(tf.multiply(-beta,self.timerange)),-1)
cumulative_hazard = tf.math.cumsum(tf.multiply(baseline_hazard,elay),axis = 1)
out = 1-tf.math.exp(-cumulative_hazard)
return out
def train_step(self,self.model.trainable_variables))
print(loss) if verbose is True else None
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。