如何解决如何使用带有注意力的 Seq2Seq 编码器-解码器模型进行多步预测
我正在尝试使用 Seq2Seq 模型,通过使用注意力增强的编码器-解码器模型对工厂生产数据进行预测。我有点卡住了,因为模型的输出似乎是一个常数,并且具有与输入相同大小的序列长度,实际上我希望能够指定我想要预测 3/5/9 个月到未来。你能看一下我的代码,也许能建议我哪里出错了吗?
我在 PyTorch Seq2Seq 教程和以下 Github 配置文件上花费了大量时间,这些配置文件都面向 NLP 和机器翻译,但我一直无法将它们改造为经济数据。
目标 据我所知,我是在预测这家工厂未来某种给定材料的产量。所以它的维数是1,当然是整数。
编码器 编码器将长度为 168 的序列作为输入,每个输入是前 20 天的数据,以及 37 个工厂级特征,例如工人数量等。
解码器 据我了解,解码器应该将之前的时间步长生产级别作为输入(意思是维度 1),以及之前的隐藏和单元状态。问题似乎是解码器似乎每次都输出一个(几乎)恒定的值,而这似乎并不理解数据(预测总是积极的)。
class EncoderRNN(nn.Module):
def __init__(self,input_size,hidden_size,num_layers,p):
super(EncoderRNN,self).__init__()
self.lstm = nn.LSTM(input_size,dropout = p,bidirectional = True) #batch_first=True,self.fc_hidden = nn.Linear(hidden_size*2,hidden_size)
self.fc_cell = nn.Linear(hidden_size*2,hidden_size)
def forward(self,input):
#print(f"Encoder input shape is {input.shape}")
encoder_states,(hidden,cell_state) = self.lstm(input)
hidden = self.fc_hidden(torch.cat((hidden[0:1],hidden[1:2]),dim = 2))
cell = self.fc_cell(torch.cat((cell_state[0:1],cell_state[1:2]),dim = 2))
#print(f"Encoder Hidden: {hidden.shape}")
#print(f"Encoder Cell: {cell.shape}")
return encoder_states,hidden,cell
class Decoder_LSTMwAttention(nn.Module):
def __init__(self,output_size,p):
super(Decoder_LSTMwAttention,self).__init__()
self.rnn = nn.LSTM(hidden_size*2 + input_size,num_layers) #batch_first=True
self.energy = nn.Linear(hidden_size * 3,1)
self.softmax = nn.softmax(dim=1)
self.dropout = nn.Dropout(p)
self.relu = nn.ReLU()
self.tanh = nn.Tanh()
self.fc = nn.Linear(hidden_size,output_size)
self.attention_combine = nn.Linear(hidden_size,hidden_size)
def forward(self,input,encoder_states,cell):
batch_size = encoder_states.shape[1]
input = input.unsqueeze(0)
input = input.unsqueeze(0)
input = input.repeat(1,batch_size,1)
input = self.relu(self.dropout(input))
#print('\n')
#print(f"Decoder input Shape: {input.shape}")
sequence_length = encoder_states.shape[0]
#print(f"Sequence Length: {sequence_length}")
#print(f"Encoder states shape[1]: {encoder_states.shape[1]}")
#print(f"Hidden size: {hidden.size()}")
h_reshaped = hidden.repeat(sequence_length,1,1)
#print(f"h_reshaped size: {h_reshaped.shape}") # h_reshaped size: torch.Size([168,168,1024])
#print(f"Encoder states size: {encoder_states.shape}") # Encoder states size: torch.Size([168,36,2048])
concatenated = torch.cat((h_reshaped,encoder_states),dim = 2)
#print(f"Concatenated size: {concatenated.shape}")
#energy = self.relu(self.energy(concatenated))
energy = self.relu(self.energy(concatenated))
attention = self.softmax(energy)
#print(f"Attention size: {attention.shape}")
#attention = attention.permute(1,2)
#encoder_states = encoder_states.permute(1,2)
#print("\n")
#print(f"Encoder states size: {attention.shape}")
context_vector = torch.einsum("snk,snl->knl",attention,encoder_states)
#print("\n")
#print(f"Context vector size: {context_vector.shape}")
rnn_input = torch.cat((context_vector,input),dim = 2)
#print("\n")
#print(f"RNN input size: {rnn_input.shape}")
output,cell) = self.rnn(rnn_input,cell))
#print("\n")
#print(f"Penultimate output size: {output.shape}")
#print(f"Hidden size: {hidden.shape}")
#print(f"Cell size: {cell.shape}")
output = self.tanh(self.fc(output[:,-1,:]).squeeze(0))
#print("\n")
#print(f"Final output size: {output.shape}")
return output,cell
class Seq2Seq(nn.Module):
def __init__(self,encoder,decoder):
super(Seq2Seq,self).__init__()
self.encoder = encoder
self.decoder = decoder
def forward(self,source,target,teacher_force_ratio=0.1):
batch_size = source.shape[1]
target_len = target.shape[0]
#print(f"Target len : {target_len}")
outputs = torch.zeros(target_len).to(device)
#print(f"Seq2Seq Output Tensor shape: {outputs.shape}")
#print('\n')
encoder_states,cell = self.encoder(source)
# First input will be <SOS> token
x = target[0]
#print(f"The input to the decoder is of shape: {x.shape}")
for t in range(1,target_len):
# At every time step use encoder_states and update hidden,cell
output,cell = self.decoder(x,cell)
#print("\n")
#print(f"Seq2Seq Hidden Tensor shape: {hidden.shape}")
#print(f"Seq2Seq Output Tensor shape: {output.shape}")
#print(f"Seq2Seq Cell Tensor shape: {cell.shape}")
#print("\n")
# Store prediction for current time step
outputs[t] = output
# Get the best word the Decoder predicted (index in the vocabulary)
best_guess = output #.argmax(1)
# With probability of teacher_force_ratio we take the actual next word
# otherwise we take the word that the Decoder predicted it to be.
# Teacher Forcing is used so that the model gets used to seeing
# similar inputs at training and testing time,if teacher forcing is 1
# then inputs at test time might be completely different than what the
# network is used to. This was a long comment.
x = target[t] if random.random() < teacher_force_ratio else best_guess
return outputs
这是模型预测的输出图:
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。