如何解决logits 和标签必须具有相同的形状 ((None, None, 8922) vs (None, 8922))
我正在尝试在我的模型中实现 keras SeqSelfAttention 层。我得到了我的模型的摘要,但是当我把它放在火车上时,它给了我错误的数据。 以下是我的模型的架构。
import keras
from keras_self_attention import SeqSelfAttention
model = keras.models.Sequential()
model.add(keras.layers.Embedding(input_dim=150854,output_dim=100,input_length=1500,embeddings_initializer=keras.initializers.Constant(emb_matrix),trainable=False))
model.add(keras.layers.Bidirectional(keras.layers.LSTM(units=128,return_sequences=True)))
model.add(SeqSelfAttention(attention_activation='sigmoid'))
model.add(keras.layers.Dense(units=8922,activation='sigmoid'))
model.summary(line_length=100)
Model: "sequential_5"
____________________________________________________________________________________________________
Layer (type) Output Shape Param #
====================================================================================================
embedding_11 (Embedding) (None,1500,100) 15085400
____________________________________________________________________________________________________
bidirectional_11 (Bidirectional) (None,256) 234496
____________________________________________________________________________________________________
seq_self_attention_5 (SeqSelfAttention) (None,None,256) 16449
____________________________________________________________________________________________________
dense_2 (Dense) (None,8922) 2292954
====================================================================================================
Total params: 17,629,299
Trainable params: 2,543,899
Non-trainable params: 15,085,400
____________________________________________________________________________________________________
当我拟合模型时
history=model.fit(x_train,y_train,validation_data=(x_valid,y_valid),epochs=30,batch_size=16,callbacks=[callback,chk])
我收到以下错误:
Epoch 1/30
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-61-f96237c73ff2> in <module>()
----> 1 history=model.fit(x_train,chk])
9 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py in fit(self,x,y,batch_size,epochs,verbose,callbacks,validation_split,validation_data,shuffle,class_weight,sample_weight,initial_epoch,steps_per_epoch,validation_steps,validation_batch_size,validation_freq,max_queue_size,workers,use_multiprocessing)
1098 _r=1):
1099 callbacks.on_train_batch_begin(step)
-> 1100 tmp_logs = self.train_function(iterator)
1101 if data_handler.should_sync:
1102 context.async_wait()
/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/def_function.py in __call__(self,*args,**kwds)
826 tracing_count = self.experimental_get_tracing_count()
827 with trace.Trace(self._name) as tm:
--> 828 result = self._call(*args,**kwds)
829 compiler = "xla" if self._experimental_compile else "nonXla"
830 new_tracing_count = self.experimental_get_tracing_count()
/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/def_function.py in _call(self,**kwds)
869 # This is the first call of __call__,so we have to initialize.
870 initializers = []
--> 871 self._initialize(args,kwds,add_initializers_to=initializers)
872 finally:
873 # At this point we kNow that the initialization is complete (or less
/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/def_function.py in _initialize(self,args,add_initializers_to)
724 self._concrete_stateful_fn = (
725 self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
--> 726 *args,**kwds))
727
728 def invalid_creator_scope(*unused_args,**unused_kwds):
/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/function.py in _get_concrete_function_internal_garbage_collected(self,**kwargs)
2967 args,kwargs = None,None
2968 with self._lock:
-> 2969 graph_function,_ = self._maybe_define_function(args,kwargs)
2970 return graph_function
2971
/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/function.py in _maybe_define_function(self,kwargs)
3359
3360 self._function_cache.missed.add(call_context_key)
-> 3361 graph_function = self._create_graph_function(args,kwargs)
3362 self._function_cache.primary[cache_key] = graph_function
3363
/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/function.py in _create_graph_function(self,kwargs,override_flat_arg_shapes)
3204 arg_names=arg_names,3205 override_flat_arg_shapes=override_flat_arg_shapes,-> 3206 capture_by_value=self._capture_by_value),3207 self._function_attributes,3208 function_spec=self.function_spec,/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/func_graph.py in func_graph_from_py_func(name,python_func,signature,func_graph,autograph,autograph_options,add_control_dependencies,arg_names,op_return_value,collections,capture_by_value,override_flat_arg_shapes)
988 _,original_func = tf_decorator.unwrap(python_func)
989
--> 990 func_outputs = python_func(*func_args,**func_kwargs)
991
992 # invariant: `func_outputs` contains only Tensors,CompositeTensors,/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/def_function.py in wrapped_fn(*args,**kwds)
632 xla_context.Exit()
633 else:
--> 634 out = weak_wrapped_fn().__wrapped__(*args,**kwds)
635 return out
636
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/func_graph.py in wrapper(*args,**kwargs)
975 except Exception as e: # pylint:disable=broad-except
976 if hasattr(e,"ag_error_Metadata"):
--> 977 raise e.ag_error_Metadata.to_exception(e)
978 else:
979 raise
ValueError: in user code:
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:805 train_function *
return step_function(self,iterator)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:795 step_function **
outputs = model.distribute_strategy.run(run_step,args=(data,))
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:1259 run
return self._extended.call_for_each_replica(fn,args=args,kwargs=kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:2730 call_for_each_replica
return self._call_for_each_replica(fn,kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:3417 _call_for_each_replica
return fn(*args,**kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:788 run_step **
outputs = model.train_step(data)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:756 train_step
y,y_pred,regularization_losses=self.losses)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/compile_utils.py:203 __call__
loss_value = loss_obj(y_t,y_p,sample_weight=sw)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/losses.py:152 __call__
losses = call_fn(y_true,y_pred)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/losses.py:256 call **
return ag_fn(y_true,**self._fn_kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/util/dispatch.py:201 wrapper
return target(*args,**kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/losses.py:1608 binary_crossentropy
K.binary_crossentropy(y_true,from_logits=from_logits),axis=-1)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/util/dispatch.py:201 wrapper
return target(*args,**kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/backend.py:4979 binary_crossentropy
return nn.sigmoid_cross_entropy_with_logits(labels=target,logits=output)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/util/dispatch.py:201 wrapper
return target(*args,**kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/nn_impl.py:174 sigmoid_cross_entropy_with_logits
(logits.get_shape(),labels.get_shape()))
ValueError: logits and labels must have the same shape ((None,8922) vs (None,8922))
我试图用 Reshape 层解决它:
model = keras.models.Sequential()
model.add(keras.layers.Embedding(input_dim=150854,return_sequences=True)))
model.add(SeqSelfAttention(attention_activation='sigmoid'))
model.add(keras.layers.Reshape((8922,)))
model.add(keras.layers.Dense(units=8922,activation='sigmoid'))
model.summary(line_length=100)
Model: "sequential_6"
____________________________________________________________________________________________________
Layer (type) Output Shape Param #
====================================================================================================
embedding_12 (Embedding) (None,100) 15085400
____________________________________________________________________________________________________
bidirectional_12 (Bidirectional) (None,256) 234496
____________________________________________________________________________________________________
seq_self_attention_6 (SeqSelfAttention) (None,256) 16449
____________________________________________________________________________________________________
reshape_4 (Reshape) (None,8922) 0
____________________________________________________________________________________________________
dense_3 (Dense) (None,8922) 79611006
====================================================================================================
Total params: 94,947,351
Trainable params: 79,861,951
Non-trainable params: 15,400
____________________________________________________________________________________________________
仍然出现以下错误:
Epoch 1/30
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
<ipython-input-64-f96237c73ff2> in <module>()
----> 1 history=model.fit(x_train,chk])
6 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py in fit(self,**kwds)
886 # Lifting succeeded,so variables are initialized and we can run the
887 # stateless function.
--> 888 return self._stateless_fn(*args,**kwds)
889 else:
890 _,_,filtered_flat_args = \
/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/function.py in __call__(self,**kwargs)
2941 filtered_flat_args) = self._maybe_define_function(args,kwargs)
2942 return graph_function._call_flat(
-> 2943 filtered_flat_args,captured_inputs=graph_function.captured_inputs) # pylint: disable=protected-access
2944
2945 @property
/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/function.py in _call_flat(self,captured_inputs,cancellation_manager)
1917 # No tape is watching; skip to running the function.
1918 return self._build_call_outputs(self._inference_function.call(
-> 1919 ctx,cancellation_manager=cancellation_manager))
1920 forward_backward = self._select_forward_and_backward_functions(
1921 args,/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/function.py in call(self,ctx,cancellation_manager)
558 inputs=args,559 attrs=attrs,--> 560 ctx=ctx)
561 else:
562 outputs = execute.execute_with_cancellation(
/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/execute.py in quick_execute(op_name,num_outputs,inputs,attrs,name)
58 ctx.ensure_initialized()
59 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle,device_name,op_name,---> 60 inputs,num_outputs)
61 except core._NotOkStatusException as e:
62 if name is not None:
InvalidArgumentError: Input to reshape is a tensor with 6144000 values,but the requested shape has 142752
[[node sequential_6/reshape_4/Reshape (defined at <ipython-input-64-f96237c73ff2>:1) ]] [Op:__inference_train_function_52586]
Errors may have originated from an input operation.
Input Source operations connected to node sequential_6/reshape_4/Reshape:
sequential_6/seq_self_attention_6/MatMul_3 (defined at /usr/local/lib/python3.7/dist-packages/keras_self_attention/seq_self_attention.py:182)
Function call stack:
train_function
感谢帮助
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。