如何解决TensorFlow 自定义损失 ValueError:“没有为任何变量提供梯度:[....'Layers'....]”
我正在尝试使用内置的 TensorFlow 函数作为损失函数来编译我的模型。是因为从 numpy 数组变成了张量,还是与函数有关。 我的代码:
import numpy as np
from pandas import read_csv
from keras.utils import to_categorical
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense,Dropout,Embedding,Flatten,LSTM,Timedistributed,Bidirectional,RepeatVector,SimpleRNN,GRU
from keras.layers.normalization import Batchnormalization
import tensorflow as tf
import math
def edit_dist(y_pred,y_true):
dist = tf.edit_distance(tf.sparse.from_dense([y_pred]),tf.sparse.from_dense([y_true]),normalize=False)
return dist
def baseline_model():
model = Sequential()
model.add(Bidirectional(LSTM(200,input_shape=(1,x_train.shape[2]))))
model.add(RepeatVector(y_train.shape[1]))
model.add(Bidirectional(LSTM(100,return_sequences = True)))
model.add(Dropout(0.2))
model.add(Bidirectional(LSTM(100,return_sequences = True)))
model.add(Dropout(0.2))
model.add(Timedistributed(Dense(y_train.shape[2],activation='softmax')))
model.compile(loss=edit_dist,optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),metrics=['accuracy'])
return model
ips = read_csv('Data (1).csv',dtype=float)
fullset = np.array(ips)
fullset=fullset[2:,:]
xy = fullset[np.where(fullset[:,0]==518)]
np.random.shuffle(xy)
x = xy[:,4:76]
y_1 = xy[:,76:]
x=x/x.max(axis=0)
k=int(np.around(0.85*x.shape[0]))
l=int(np.around(0.85*x.shape[0]))
x_train = x[0:l,:]
y_train = y_1[0:l,:]
x_val = x[l+1:,:]
y_val = y_1[l+1:,:]
y_train = to_categorical(y_train)
y_val = to_categorical(y_val)
x_train = np.reshape(x_train,(x_train.shape[0],1,x_train.shape[1]))
x_val = np.reshape(x_val,(x_val.shape[0],x_val.shape[1]))
model = baseline_model()
hist = model.fit(x_train,y_train,epochs=300,batch_size = 128,verbose=1,validation_data=(x_val,y_val))
plt.plot(hist.history['val_accuracy'])
plt.show()
错误:
ValueError: in user code:
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:805 train_function *
return step_function(self,iterator)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:795 step_function **
outputs = model.distribute_strategy.run(run_step,args=(data,))
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:1259 run
return self._extended.call_for_each_replica(fn,args=args,kwargs=kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:2730 call_for_each_replica
return self._call_for_each_replica(fn,args,kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:3417 _call_for_each_replica
return fn(*args,**kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:788 run_step **
outputs = model.train_step(data)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:757 train_step
self.optimizer.minimize(loss,self.trainable_variables,tape=tape)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/optimizer_v2/optimizer_v2.py:498 minimize
return self.apply_gradients(grads_and_vars,name=name)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/optimizer_v2/optimizer_v2.py:598 apply_gradients
grads_and_vars = optimizer_utils.filter_empty_gradients(grads_and_vars)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/optimizer_v2/utils.py:79 filter_empty_gradients
([v.name for _,v in grads_and_vars],))
ValueError: No gradients provided for any variable: ['bidirectional_27/forward_lstm_27/lstm_cell_82/kernel:0','bidirectional_27/forward_lstm_27/lstm_cell_82/recurrent_kernel:0','bidirectional_27/forward_lstm_27/lstm_cell_82/bias:0','bidirectional_27/backward_lstm_27/lstm_cell_83/kernel:0','bidirectional_27/backward_lstm_27/lstm_cell_83/recurrent_kernel:0','bidirectional_27/backward_lstm_27/lstm_cell_83/bias:0','bidirectional_28/forward_lstm_28/lstm_cell_85/kernel:0','bidirectional_28/forward_lstm_28/lstm_cell_85/recurrent_kernel:0','bidirectional_28/forward_lstm_28/lstm_cell_85/bias:0','bidirectional_28/backward_lstm_28/lstm_cell_86/kernel:0','bidirectional_28/backward_lstm_28/lstm_cell_86/recurrent_kernel:0','bidirectional_28/backward_lstm_28/lstm_cell_86/bias:0','bidirectional_29/forward_lstm_29/lstm_cell_88/kernel:0','bidirectional_29/forward_lstm_29/lstm_cell_88/recurrent_kernel:0','bidirectional_29/forward_lstm_29/lstm_cell_88/bias:0','bidirectional_29/backward_lstm_29/lstm_cell_89/kernel:0','bidirectional_29/backward_lstm_29/lstm_cell_89/recurrent_kernel:0','bidirectional_29/backward_lstm_29/lstm_cell_89/bias:0','time_distributed_9/kernel:0','time_distributed_9/bias:0'].
另外,我应该制作自己的编辑距离函数来用作损失函数,而不是使用内置的函数来实现更快的操作吗?
解决方法
编辑距离不可微,因此您不能将其用作损失函数。您只能使用可微函数作为损失。
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。