如何使用带有注意力的 Seq2Seq 编码器-解码器模型进行多步预测

如何解决如何使用带有注意力的 Seq2Seq 编码器-解码器模型进行多步预测

我正在尝试使用 Seq2Seq 模型，通过使用注意力增强的编码器-解码器模型对工厂生产数据进行预测。我有点卡住了，因为模型的输出似乎是一个常数，并且具有与输入相同大小的序列长度，实际上我希望能够指定我想要预测 3/5/9 个月到未来。你能看一下我的代码，也许能建议我哪里出错了吗？

我在 PyTorch Seq2Seq 教程和以下 Github 配置文件上花费了大量时间，这些配置文件都面向 NLP 和机器翻译，但我一直无法将它们改造为经济数据。

GitHub：https://github.com/aladdinpersson/Machine-Learning-Collection/blob/master/ML/Pytorch/more_advanced/Seq2Seq_attention/seq2seq_attention.py

目标据我所知，我是在预测这家工厂未来某种给定材料的产量。所以它的维数是1，当然是整数。

编码器 编码器将长度为 168 的序列作为输入，每个输入是前 20 天的数据，以及 37 个工厂级特征，例如工人数量等。

解码器 据我了解，解码器应该将之前的时间步长生产级别作为输入（意思是维度 1），以及之前的隐藏和单元状态。问题似乎是解码器似乎每次都输出一个（几乎）恒定的值，而这似乎并不理解数据（预测总是积极的）。

代码

class EncoderRNN(nn.Module):
    def __init__(self,input_size,hidden_size,num_layers,p):
        super(EncoderRNN,self).__init__()
        
        self.lstm = nn.LSTM(input_size,dropout = p,bidirectional = True) #batch_first=True,self.fc_hidden = nn.Linear(hidden_size*2,hidden_size) 
        self.fc_cell = nn.Linear(hidden_size*2,hidden_size)

    def forward(self,input):
        #print(f"Encoder input shape is {input.shape}")
        
        encoder_states,(hidden,cell_state) = self.lstm(input)
        hidden = self.fc_hidden(torch.cat((hidden[0:1],hidden[1:2]),dim = 2))
        cell = self.fc_cell(torch.cat((cell_state[0:1],cell_state[1:2]),dim = 2))

        #print(f"Encoder Hidden: {hidden.shape}")
        #print(f"Encoder Cell: {cell.shape}")
        
        return encoder_states,hidden,cell


class Decoder_LSTMwAttention(nn.Module):
    def __init__(self,output_size,p):
        super(Decoder_LSTMwAttention,self).__init__()
       
        self.rnn = nn.LSTM(hidden_size*2 + input_size,num_layers) #batch_first=True

        self.energy = nn.Linear(hidden_size * 3,1)
        self.softmax = nn.softmax(dim=1)
        self.dropout = nn.Dropout(p)
        self.relu = nn.ReLU()  
        self.tanh = nn.Tanh()
        self.fc = nn.Linear(hidden_size,output_size)
    
        self.attention_combine = nn.Linear(hidden_size,hidden_size)


    def forward(self,input,encoder_states,cell):

        batch_size = encoder_states.shape[1]    

        input = input.unsqueeze(0)
        input = input.unsqueeze(0)
        input = input.repeat(1,batch_size,1)

        input = self.relu(self.dropout(input))

        #print('\n')
        #print(f"Decoder input Shape: {input.shape}")

        sequence_length = encoder_states.shape[0]
        #print(f"Sequence Length: {sequence_length}") 
        #print(f"Encoder states shape[1]: {encoder_states.shape[1]}")
        #print(f"Hidden size: {hidden.size()}") 

        h_reshaped = hidden.repeat(sequence_length,1,1)

        #print(f"h_reshaped size: {h_reshaped.shape}") # h_reshaped size: torch.Size([168,168,1024])
        #print(f"Encoder states size: {encoder_states.shape}") # Encoder states size: torch.Size([168,36,2048])

        concatenated = torch.cat((h_reshaped,encoder_states),dim = 2)
        #print(f"Concatenated size: {concatenated.shape}")

        #energy = self.relu(self.energy(concatenated))
        energy = self.relu(self.energy(concatenated))
        attention = self.softmax(energy)
        #print(f"Attention size: {attention.shape}")
        #attention = attention.permute(1,2)

        #encoder_states = encoder_states.permute(1,2)
        #print("\n")
        #print(f"Encoder states size: {attention.shape}")

        context_vector = torch.einsum("snk,snl->knl",attention,encoder_states)

        #print("\n")
        #print(f"Context vector size: {context_vector.shape}")
        
        rnn_input = torch.cat((context_vector,input),dim = 2)

        #print("\n")
        #print(f"RNN input size: {rnn_input.shape}")

        output,cell) = self.rnn(rnn_input,cell))
        
        #print("\n")
        #print(f"Penultimate output size: {output.shape}")
        #print(f"Hidden size: {hidden.shape}")
        #print(f"Cell size: {cell.shape}")
        output = self.tanh(self.fc(output[:,-1,:]).squeeze(0)) 

        #print("\n")
        #print(f"Final output size: {output.shape}")
        
        return output,cell


class Seq2Seq(nn.Module):
    def __init__(self,encoder,decoder):
        super(Seq2Seq,self).__init__()
        self.encoder = encoder
        self.decoder = decoder

    def forward(self,source,target,teacher_force_ratio=0.1):
        batch_size = source.shape[1]
        target_len = target.shape[0]
        #print(f"Target len : {target_len}")

        outputs = torch.zeros(target_len).to(device)
        #print(f"Seq2Seq Output Tensor shape: {outputs.shape}")
        #print('\n')
        encoder_states,cell = self.encoder(source)

        # First input will be <SOS> token
        x = target[0]
        #print(f"The input to the decoder is of shape: {x.shape}")

        for t in range(1,target_len):
            # At every time step use encoder_states and update hidden,cell
            output,cell = self.decoder(x,cell)

            #print("\n")
            #print(f"Seq2Seq Hidden Tensor shape: {hidden.shape}")
            #print(f"Seq2Seq Output Tensor shape: {output.shape}")
            
            #print(f"Seq2Seq Cell Tensor shape: {cell.shape}")
            #print("\n")
            # Store prediction for current time step
            outputs[t] = output

            # Get the best word the Decoder predicted (index in the vocabulary)
            best_guess = output #.argmax(1)

            # With probability of teacher_force_ratio we take the actual next word
            # otherwise we take the word that the Decoder predicted it to be.
            # Teacher Forcing is used so that the model gets used to seeing
            # similar inputs at training and testing time,if teacher forcing is 1
            # then inputs at test time might be completely different than what the
            # network is used to. This was a long comment.
            x = target[t] if random.random() < teacher_force_ratio else best_guess

        return outputs

这是模型预测的输出图：

如何使用带有注意力的 Seq2Seq 编码器-解码器模型进行多步预测

如何解决如何使用带有注意力的 Seq2Seq 编码器-解码器模型进行多步预测

相关推荐