如何解决训练CNN:ValueError:没有为任何变量提供渐变
我想用以下几层训练模型:
embedding_dim = 80
model = tf.keras.Sequential()
model.add(tf.keras.layers.Conv2D(64,3,padding='same',activation='relu',input_shape=(50,120,3)))
model.add(tf.keras.layers.MaxPool2D(padding='same'))
model.add(tf.keras.layers.Conv2D(128,activation='relu'))
model.add(tf.keras.layers.MaxPool2D(padding='same'))
model.add(tf.keras.layers.Conv2D(256,activation='relu'))
model.add(tf.keras.layers.Conv2D(256,activation='relu'))
model.add(tf.keras.layers.Conv2D(512,2,strides=(2,4),activation='relu'))
model.add(tf.keras.layers.Lambda(add_timing_signal_nd))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(embedding_dim))
之后,我跑
model.compile(optimizer='adam',loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True,reduction='none'),metrics=['accuracy'])
model.fit(image_dataset,epochs=10,validation_data=val_dataset)
我收到以下错误
ValueError: No gradients provided for any variable: ['conv2d/kernel:0','conv2d/bias:0','conv2d_1/kernel:0','conv2d_1/bias:0','conv2d_2/kernel:0','conv2d_2/bias:0','conv2d_3/kernel:0','conv2d_3/bias:0','conv2d_4/kernel:0','conv2d_4/bias:0','conv2d_5/kernel:0','conv2d_5/bias:0','conv2d_6/kernel:0','conv2d_6/bias:0','dense/kernel:0','dense/bias:0'].
为进一步说明,add_timing_signa_nd
定义如下
def add_timing_signal_nd(x,min_timescale=1.0,max_timescale=1.0e4):
"""
Args:
x: a Tensor with shape [batch,d1 ... dn,channels]
min_timescale: a float
max_timescale: a float
Returns:
a Tensor the same shape as x.
"""
static_shape = x.get_shape().as_list()
num_dims = len(static_shape) - 2
channels = tf.shape(x)[-1]
num_timescales = channels // (num_dims * 2)
log_timescale_increment = (
math.log(float(max_timescale) / float(min_timescale)) /
(tf.cast(num_timescales,dtype=tf.float32) - 1))
inv_timescales = min_timescale * tf.exp(
tf.cast(tf.range(num_timescales),dtype=tf.float32) * -log_timescale_increment)
for dim in xrange(num_dims):
length = tf.shape(x)[dim + 1]
position = tf.cast(tf.range(length),dtype=tf.float32)
scaled_time = tf.expand_dims(position,1) * tf.expand_dims(
inv_timescales,0)
signal = tf.concat([tf.sin(scaled_time),tf.cos(scaled_time)],axis=1)
prepad = dim * 2 * num_timescales
postpad = channels - (dim + 1) * 2 * num_timescales
signal = tf.pad(signal,[[0,0],[prepad,postpad]])
for _ in xrange(1 + dim):
signal = tf.expand_dims(signal,0)
for _ in xrange(num_dims - 1 - dim):
signal = tf.expand_dims(signal,-2)
x += signal
return x
如果对输入大小有帮助,请参见以下
(3,50,3)
(3,3)
(1,3)
谢谢!
解决方法
我猜想image_dataset或val_dataset不正确。按照您的代码,我模拟了一些数据(包括标签)进行训练,并且可以正常运行。
image_dataset = np.random.uniform(0,1,(3000,50,120,3))
image_dataset_y = np.random.uniform(0,embedding_dim,)).astype(np.int)
val_dataset = np.random.uniform(0,(300,3))
val_dataset_y = np.random.uniform(0,)).astype(np.int)
model.fit(image_dataset,image_dataset_y,batch_size=30,epochs=10,validation_data=(val_dataset,val_dataset_y))
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。