如何在Keras API自定义层tanh_layer中输入4个张量head，arch，pos，embedding

如何解决如何在Keras API自定义层tanh_layer中输入4个张量head，arch，pos，embedding

在将以下hidden_state *（word_head，word_arch，word_pos，嵌入隐藏状态，所有shape（？，256）张量的张量）输入到自定义层**（tanh_layer下面）时，我遇到了几天的问题。它弹出一条错误消息，提示输入声明与图层不匹配。在类方法init，build或tanh_layer的调用中，如何定义4个张量作为输入，以便模型识别4个输入张量？我怀疑会在调用方法中传递参数“ inputs”，并使head，arch，pos，text = input传递4个张量，但该层无法识别。
这是用于神经机器翻译的biLSTM +注意解码器模型，我通过tanh层而不是串联来添加依赖项解析上下文（头部，拱门，pos）（我做到了没有问题）。请帮助和赞赏。（下面的1.我的模型的源代码2. tanh_layer的源代码）

ValueError：层tanh_layer期望有1个输入，但它收到了4个输入张量。收到的输入：[，，，]

def define_model(src_vocab,tar_vocab,src_timesteps,tar_timesteps,word_pos_vocab_size,n_units):
    word_head_input = Input(shape=(src_timesteps,),name = 'word_head')
    word_head_reshape = Reshape((src_timesteps,1),name='word_head_reshape') (word_head_input)
    word_head_hidden_state = Bidirectional(LSTM(n_units,dropout =0.3,input_shape=(src_timesteps,1)))(word_head_reshape)

    word_arch_input = Input(shape=(src_timesteps,name = 'word_arch')
    word_arch_reshape = Reshape((src_timesteps,name='word_arch_reshape') (word_arch_input)
    word_arch_hidden_state = Bidirectional(LSTM(n_units,dropout = 0.3,1)))(word_arch_reshape)

    word_pos_input = Input(shape=(src_timesteps,name="word_pos")
    word_pos_one_hot = Embedding(word_pos_vocab_size,n_units,input_length=src_timesteps,mask_zero=True)(word_pos_input)
    word_pos_hidden_state = Bidirectional(LSTM(n_units,dropout = 0.3))(word_pos_one_hot)

    encoder_input = Input(shape=(src_timesteps,name="word_text")
    one_hot = Embedding(src_vocab,mask_zero=True)(encoder_input)
    embedding_hidden_state = Bidirectional(LSTM(n_units,dropout=0.3))(one_hot)

*  hidden_state = [word_head_hidden_state,word_arch_hidden_state,word_pos_hidden_state,embedding_hidden_state]

**  tanh_hidden_state = tanh_layer(n_units,tar_timesteps)(hidden_state)

    decoder_output = AttentionDecoder(n_units,src_timesteps)(tanh_hidden_state)
    model = Model(inputs = [encoder_input,word_head_input,word_arch_input,word_pos_input],outputs = decoder_output,name="Autoencoder")
return model

import tensorflow as tf
from keras import backend as K
from keras.layers import Layer
from keras import regularizers,constraints,initializers,activations
from keras.engine import InputSpec


class tanh_layer(Layer):
    def __init__(self,units,activation ='tanh',name ='tanh_layer',kernel_initializer='glorot_uniform',bias_initializer='zeros',kernel_regularizer=None,bias_regularizer=None,kernel_constraint=None,bias_constraint=None,**kwargs):
        """
        Implements a tanh layer to integrate the head word,word arch,POS tags and text in a FFN
        """
        self.units = units
        self.tar_timesteps = tar_timesteps
        self.activation = activations.get(activation)
        self.kernel_initializer = initializers.get(kernel_initializer)
        self.bias_initializer = initializers.get(bias_initializer)
        print('start _init_')
        print('self.units',self.units)

        self.kernel_regularizer = regularizers.get(kernel_regularizer)
        self.bias_regularizer = regularizers.get(bias_regularizer)
        self.kernel_constraint = constraints.get(kernel_constraint)
        self.bias_constraint = constraints.get(bias_constraint)

        super(tanh_layer,self).__init__(**kwargs)
        self.name = name
        print('self.name',self.name)
        print('end_initi_')


    def build(self,input_shape,*arg,**kwargs):
        """
        Initialize Um,Vm,Wm,Xm and bm for the merging layer
        Matrices for creating the context vector
        """
        
        print('start build')
        print('input_shape_dim',input_shape)
        print(input_shape[0],input_shape[1],input_shape[2],input_shape[3])
        self.batch_size,self.units = input_shape[0]
        self.batch_size,self.units = input_shape[1]
        self.batch_size,self.units = input_shape[2]
        self.batch_size,self.units = input_shape[3]
        print('input_shape',self.units)

        self.U_m = self.add_weight(shape=(self.units,self.units),name='U_m',initializer=self.kernel_initializer,regularizer=self.kernel_regularizer,constraint=self.kernel_constraint,trainable = True)
        self.V_m = self.add_weight(shape=(self.units,name='V_m',trainable = True)
        self.W_m = self.add_weight(shape=(self.units,name='W_m',trainable = True)
        self.X_m = self.add_weight(shape=(self.units,name='X_m',trainable = True)
        self.b_m = self.add_weight(shape=(self.units,name='b_m',initializer=self.bias_initializer,regularizer=self.bias_regularizer,constraint=self.bias_constraint,trainable = True)

        self.input_spec = InputSpec(shape=input_shape)
        print(self.input_spec)
        super(tanh_layer,self).build(input_shape)
        
        print('U_m',self.U_m)
        print('V_m',self.V_m)
        print('W_m',self.W_m)
        print('X_m',self.X_m)
        print('b_m',self.b_m)
        print('U_m',type(self.U_m))
        print('V_m',type(self.V_m))
        print('W_m',type(self.W_m))
        print('X_m',type(self.X_m))
        print('b_m',type(self.b_m))
        
        print('end build')
        self.built = True

    def call(self,inputs,**kwargs):
        """
        Call the merging all four components in here
        Merging all four components in here
        """
        print('start call')
        self.inputs = inputs
        text,head,arch,pos = self.inputs

        print('self.head',type(self.head))
        print('self.arch',type(self.arch))
        print('self.pos',type(self.pos))
        print('self.text',type(self.text))
        print('self.units',self.units)
        print('self.head',self.head)
        print('self.arch',self.arch)
        print('self.pos',self.pos)
        print('self.text',self.text)
        print('hidden_state',self.hidden_state)
        print('hidden_state',type(self.hidden_state))
#        h_state = head + arch + pos + text
        # calculate the sum of the hidden state:
        self.tanh_hidden_state = activations.tanh(
            K.dot(self.head,self.U_m)
            + K.dot(self.arch,self.V_m)
            + K.dot(self.pos,self.W_m)
            + K.dot(self.text,self.X_m)
            + self.b_m)
        
        self.tanh_hidden_state = tf.reshape(self.tanh_hidden_state,[-1,self.tar_timesteps,self.units])
        
        print('tanh_hidden_state',K.int_shape(self.tanh_hidden_state))
        print('tanh_hidden_state',self.tanh_hidden_state)
        print('type tanh_hidden_state',type(self.tanh_hidden_state))
        print('endcall')
        return self.tanh_hidden_state
    
    def compute_output_shape(self,input_shape):
        print('compute_output_shape',K.int_shape(self.tanh_hidden_state))
        return K.int_shape(self.tanh_hidden_state)

如何在Keras API自定义层tanh_layer中输入4个张量head，arch，pos，embedding

如何解决如何在Keras API自定义层tanh_layer中输入4个张量head，arch，pos，embedding

相关推荐