训练时使用 BERT 模型进行文本情感分析时出现错误

如何解决训练时使用 BERT 模型进行文本情感分析时出现错误

我正在使用 BERT 对推文进行情感分析。在训练模型时出现错误。我无法理解此错误的来源，更不用说修复它了。

完整代码如下：

import os
import shutil

import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text
from official.nlp import optimization  # to create AdamW optimizer

import matplotlib.pyplot as plt

tf.get_logger().setLevel('ERROR')

main_file = 'C:\\Users\\meena\\Desktop\\Meenal\\Studies\\College\\Third Year\\Capstone Project\\Execution\\FYP-II-master\\data\\Annotated4.csv'
test_file = "C:\\Users\\meena\\Desktop\\Meenal\\Studies\\College\\Third Year\\Capstone Project\\Execution\\FYP-II-master\\data\\AnnotatedData2.csv"

df = pd.read_csv(main_file)

#BERT cannot work with 'no' or 'yes' so using 1 = yes,0 = undefined,-1 = no
for i in range(len(df)):
    if(df.loc[i,'class'] == "yes" or df.loc[i,'class'] == "PI" or df.loc[i,'class'] == "Yes"):
        df.loc[i,'class'] = '1'
    elif(df.loc[i,'class'] == "no" or df.loc[i,'class'] == "no PI" or df.loc[i,'class'] == "No"):
        df.loc[i,'class'] = '-1'
    elif(df.loc[i,'class'] == "Undefined" or df.loc[i,'class'] == "undefined"):
        df.loc[i,'class'] = '0'
#print(df.head(5))

from sklearn.model_selection import train_test_split

x_train,x_test,y_train,y_test = train_test_split(df['text'],df['class'])
x_train,x_val,y_val = train_test_split(x_train,y_train)

bert_model_name = 'albert_en_base' 

map_name_to_handle = {
    'bert_en_uncased_L-12_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/3','bert_en_cased_L-12_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_cased_L-12_H-768_A-12/3','bert_multi_cased_L-12_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_multi_cased_L-12_H-768_A-12/3','small_bert/bert_en_uncased_L-2_H-128_A-2':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-128_A-2/1','small_bert/bert_en_uncased_L-2_H-256_A-4':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-256_A-4/1','small_bert/bert_en_uncased_L-2_H-512_A-8':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-512_A-8/1','small_bert/bert_en_uncased_L-2_H-768_A-12':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-768_A-12/1','small_bert/bert_en_uncased_L-4_H-128_A-2':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-128_A-2/1','small_bert/bert_en_uncased_L-4_H-256_A-4':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-256_A-4/1','small_bert/bert_en_uncased_L-4_H-512_A-8':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-512_A-8/1','small_bert/bert_en_uncased_L-4_H-768_A-12':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-768_A-12/1','small_bert/bert_en_uncased_L-6_H-128_A-2':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-6_H-128_A-2/1','small_bert/bert_en_uncased_L-6_H-256_A-4':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-6_H-256_A-4/1','small_bert/bert_en_uncased_L-6_H-512_A-8':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-6_H-512_A-8/1','small_bert/bert_en_uncased_L-6_H-768_A-12':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-6_H-768_A-12/1','small_bert/bert_en_uncased_L-8_H-128_A-2':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-8_H-128_A-2/1','small_bert/bert_en_uncased_L-8_H-256_A-4':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-8_H-256_A-4/1','small_bert/bert_en_uncased_L-8_H-512_A-8':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-8_H-512_A-8/1','small_bert/bert_en_uncased_L-8_H-768_A-12':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-8_H-768_A-12/1','small_bert/bert_en_uncased_L-10_H-128_A-2':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-10_H-128_A-2/1','small_bert/bert_en_uncased_L-10_H-256_A-4':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-10_H-256_A-4/1','small_bert/bert_en_uncased_L-10_H-512_A-8':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-10_H-512_A-8/1','small_bert/bert_en_uncased_L-10_H-768_A-12':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-10_H-768_A-12/1','small_bert/bert_en_uncased_L-12_H-128_A-2':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-12_H-128_A-2/1','small_bert/bert_en_uncased_L-12_H-256_A-4':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-12_H-256_A-4/1','small_bert/bert_en_uncased_L-12_H-512_A-8':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-12_H-512_A-8/1','small_bert/bert_en_uncased_L-12_H-768_A-12':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-12_H-768_A-12/1','albert_en_base':
        'https://tfhub.dev/tensorflow/albert_en_base/2','electra_small':
        'https://tfhub.dev/google/electra_small/2','electra_base':
        'https://tfhub.dev/google/electra_base/2','experts_pubmed':
        'https://tfhub.dev/google/experts/bert/pubmed/2','experts_wiki_books':
        'https://tfhub.dev/google/experts/bert/wiki_books/2','talking-heads_base':
        'https://tfhub.dev/tensorflow/talkheads_ggelu_bert_en_base/1',}

map_model_to_preprocess = {
    'bert_en_uncased_L-12_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3','bert_en_cased_L-12_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_cased_preprocess/3','small_bert/bert_en_uncased_L-2_H-128_A-2':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3','small_bert/bert_en_uncased_L-2_H-256_A-4':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3','small_bert/bert_en_uncased_L-2_H-512_A-8':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3','small_bert/bert_en_uncased_L-2_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3','small_bert/bert_en_uncased_L-4_H-128_A-2':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3','small_bert/bert_en_uncased_L-4_H-256_A-4':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3','small_bert/bert_en_uncased_L-4_H-512_A-8':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3','small_bert/bert_en_uncased_L-4_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3','small_bert/bert_en_uncased_L-6_H-128_A-2':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3','small_bert/bert_en_uncased_L-6_H-256_A-4':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3','small_bert/bert_en_uncased_L-6_H-512_A-8':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3','small_bert/bert_en_uncased_L-6_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3','small_bert/bert_en_uncased_L-8_H-128_A-2':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3','small_bert/bert_en_uncased_L-8_H-256_A-4':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3','small_bert/bert_en_uncased_L-8_H-512_A-8':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3','small_bert/bert_en_uncased_L-8_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3','small_bert/bert_en_uncased_L-10_H-128_A-2':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3','small_bert/bert_en_uncased_L-10_H-256_A-4':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3','small_bert/bert_en_uncased_L-10_H-512_A-8':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3','small_bert/bert_en_uncased_L-10_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3','small_bert/bert_en_uncased_L-12_H-128_A-2':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3','small_bert/bert_en_uncased_L-12_H-256_A-4':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3','small_bert/bert_en_uncased_L-12_H-512_A-8':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3','small_bert/bert_en_uncased_L-12_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3','bert_multi_cased_L-12_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_multi_cased_preprocess/3','albert_en_base':
        'https://tfhub.dev/tensorflow/albert_en_preprocess/3','electra_small':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3','electra_base':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3','experts_pubmed':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3','experts_wiki_books':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3','talking-heads_base':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',}

tfhub_handle_encoder = map_name_to_handle[bert_model_name]
tfhub_handle_preprocess = map_model_to_preprocess[bert_model_name]

print(f'BERT model selected           : {tfhub_handle_encoder}')
print(f'Preprocess model auto-selected: {tfhub_handle_preprocess}')

def build_classifier_model():
  text_input = tf.keras.layers.Input(shape=(),dtype=tf.string,name='text')
  preprocessing_layer = hub.KerasLayer(tfhub_handle_preprocess,name='preprocessing')
  encoder_inputs = preprocessing_layer(text_input)
  encoder = hub.KerasLayer(tfhub_handle_encoder,trainable=True,name='BERT_encoder')
  outputs = encoder(encoder_inputs)
  net = outputs['pooled_output']
  net = tf.keras.layers.Dropout(0.1)(net)
  net = tf.keras.layers.Dense(1,activation=None,name='classifier')(net)
  return tf.keras.Model(text_input,net)

classifier_model = build_classifier_model()

loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
metrics = tf.metrics.CategoricalAccuracy()

import numpy as np

y_train = np.asarray(y_train).astype('float32').reshape((-1,1))
y_val = np.asarray(y_val).astype('float32').reshape((-1,1))
y_test = np.asarray(y_test).astype('float32').reshape((-1,1))

train_ds = tf.data.Dataset.from_tensor_slices((x_train,y_train))
val_ds = tf.data.Dataset.from_tensor_slices((x_val,y_val))
test_ds = tf.data.Dataset.from_tensor_slices((x_test,y_test))

epochs = 5
steps_per_epoch = tf.data.experimental.cardinality(train_ds).numpy()
num_train_steps = steps_per_epoch * epochs
num_warmup_steps = int(0.1*num_train_steps)

init_lr = 3e-5
optimizer = optimization.create_optimizer(init_lr=init_lr,num_train_steps=num_train_steps,num_warmup_steps=num_warmup_steps,optimizer_type='adamw')

classifier_model.compile(optimizer=optimizer,loss=loss,metrics=metrics)

print(f'Training model with {tfhub_handle_encoder}')
history = classifier_model.fit(x=train_ds,validation_data=val_ds,epochs=epochs)

我使用的数据库有两列类和文本：

我得到的完整错误是：

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-108-71a9fbf2d907> in <module>
      2 history = classifier_model.fit(x=train_ds,3                                validation_data=val_ds,----> 4                                epochs=epochs)

~\AppData\Roaming\Python\python36\site-packages\tensorflow\python\keras\engine\training.py in fit(self,x,y,batch_size,epochs,verbose,callbacks,validation_split,validation_data,shuffle,class_weight,sample_weight,initial_epoch,steps_per_epoch,validation_steps,validation_batch_size,validation_freq,max_queue_size,workers,use_multiprocessing)
   1181                 _r=1):
   1182               callbacks.on_train_batch_begin(step)
-> 1183               tmp_logs = self.train_function(iterator)
   1184               if data_handler.should_sync:
   1185                 context.async_wait()

~\AppData\Roaming\Python\python36\site-packages\tensorflow\python\eager\def_function.py in __call__(self,*args,**kwds)
    887 
    888       with OptionalXlaContext(self._jit_compile):
--> 889         result = self._call(*args,**kwds)
    890 
    891       new_tracing_count = self.experimental_get_tracing_count()

~\AppData\Roaming\Python\python36\site-packages\tensorflow\python\eager\def_function.py in _call(self,**kwds)
    931       # This is the first call of __call__,so we have to initialize.
    932       initializers = []
--> 933       self._initialize(args,kwds,add_initializers_to=initializers)
    934     finally:
    935       # At this point we kNow that the initialization is complete (or less

~\AppData\Roaming\Python\python36\site-packages\tensorflow\python\eager\def_function.py in _initialize(self,args,add_initializers_to)
    762     self._concrete_stateful_fn = (
    763         self._stateful_fn._get_concrete_function_internal_garbage_collected(  # pylint: disable=protected-access
--> 764             *args,**kwds))
    765 
    766     def invalid_creator_scope(*unused_args,**unused_kwds):

~\AppData\Roaming\Python\python36\site-packages\tensorflow\python\eager\function.py in _get_concrete_function_internal_garbage_collected(self,**kwargs)
   3048       args,kwargs = None,None
   3049     with self._lock:
-> 3050       graph_function,_ = self._maybe_define_function(args,kwargs)
   3051     return graph_function
   3052 

~\AppData\Roaming\Python\python36\site-packages\tensorflow\python\eager\function.py in _maybe_define_function(self,kwargs)
   3442 
   3443           self._function_cache.missed.add(call_context_key)
-> 3444           graph_function = self._create_graph_function(args,kwargs)
   3445           self._function_cache.primary[cache_key] = graph_function
   3446 

~\AppData\Roaming\Python\python36\site-packages\tensorflow\python\eager\function.py in _create_graph_function(self,kwargs,override_flat_arg_shapes)
   3287             arg_names=arg_names,3288             override_flat_arg_shapes=override_flat_arg_shapes,-> 3289             capture_by_value=self._capture_by_value),3290         self._function_attributes,3291         function_spec=self.function_spec,~\AppData\Roaming\Python\python36\site-packages\tensorflow\python\framework\func_graph.py in func_graph_from_py_func(name,python_func,signature,func_graph,autograph,autograph_options,add_control_dependencies,arg_names,op_return_value,collections,capture_by_value,override_flat_arg_shapes)
    997         _,original_func = tf_decorator.unwrap(python_func)
    998 
--> 999       func_outputs = python_func(*func_args,**func_kwargs)
   1000 
   1001       # invariant: `func_outputs` contains only Tensors,CompositeTensors,~\AppData\Roaming\Python\python36\site-packages\tensorflow\python\eager\def_function.py in wrapped_fn(*args,**kwds)
    670         # the function a weak reference to itself to avoid a reference cycle.
    671         with OptionalXlaContext(compile_with_xla):
--> 672           out = weak_wrapped_fn().__wrapped__(*args,**kwds)
    673         return out
    674 

~\AppData\Roaming\Python\python36\site-packages\tensorflow\python\framework\func_graph.py in wrapper(*args,**kwargs)
    984           except Exception as e:  # pylint:disable=broad-except
    985             if hasattr(e,"ag_error_Metadata"):
--> 986               raise e.ag_error_Metadata.to_exception(e)
    987             else:
    988               raise

ValueError: in user code:

    C:\Users\meena\AppData\Roaming\Python\python36\site-packages\tensorflow\python\keras\engine\training.py:855 train_function  *
        return step_function(self,iterator)
    C:\Users\meena\AppData\Roaming\Python\python36\site-packages\tensorflow_hub\keras_layer.py:237 call  *
        result = smart_cond.smart_cond(training,C:\Users\meena\AppData\Roaming\Python\python36\site-packages\tensorflow\python\saved_model\load.py:670 _call_attribute  **
        return instance.__call__(*args,**kwargs)
    C:\Users\meena\AppData\Roaming\Python\python36\site-packages\tensorflow\python\eager\def_function.py:889 __call__
        result = self._call(*args,**kwds)
    C:\Users\meena\AppData\Roaming\Python\python36\site-packages\tensorflow\python\eager\def_function.py:924 _call
        results = self._stateful_fn(*args,**kwds)
    C:\Users\meena\AppData\Roaming\Python\python36\site-packages\tensorflow\python\eager\function.py:3022 __call__
        filtered_flat_args) = self._maybe_define_function(args,kwargs)
    C:\Users\meena\AppData\Roaming\Python\python36\site-packages\tensorflow\python\eager\function.py:3444 _maybe_define_function
        graph_function = self._create_graph_function(args,kwargs)
    C:\Users\meena\AppData\Roaming\Python\python36\site-packages\tensorflow\python\eager\function.py:3289 _create_graph_function
        capture_by_value=self._capture_by_value),C:\Users\meena\AppData\Roaming\Python\python36\site-packages\tensorflow\python\framework\func_graph.py:999 func_graph_from_py_func
        func_outputs = python_func(*func_args,**func_kwargs)
    C:\Users\meena\AppData\Roaming\Python\python36\site-packages\tensorflow\python\eager\def_function.py:672 wrapped_fn
        out = weak_wrapped_fn().__wrapped__(*args,**kwds)
    C:\Users\meena\AppData\Roaming\Python\python36\site-packages\tensorflow\python\saved_model\function_deserialization.py:291 restored_function_body
        "\n\n".join(signature_descriptions)))

    ValueError: Could not find matching function to call loaded from the SavedModel. Got:
      Positional arguments (3 total):
        * Tensor("inputs:0",shape=(),dtype=string)
        * False
        * None
      Keyword arguments: {}
    
    Expected these arguments to match one of the following 4 option(s):
    
    Option 1:
      Positional arguments (3 total):
        * TensorSpec(shape=(None,),name='sentences')
        * True
        * None
      Keyword arguments: {}
    
    Option 2:
      Positional arguments (3 total):
        * TensorSpec(shape=(None,name='sentences')
        * False
        * None
      Keyword arguments: {}
    
    Option 3:
      Positional arguments (3 total):
        * TensorSpec(shape=(None,name='inputs')
        * True
        * None
      Keyword arguments: {}
    
    Option 4:
      Positional arguments (3 total):
        * TensorSpec(shape=(None,name='inputs')
        * False
        * None
      Keyword arguments: {}

我在 Jupyter Notebook 上运行它，并且我使用 GPU 来加快速度。我正在尝试训练模型，但它不断出现此错误。我使用的是 Python 3.6。