如何解决PyTorch LSTM分类模型-输出到目标映射
我有一个网络,输出长度为2的向量。我的目标是1或零的形式,指的是两种可能的类别。获得损失的最佳方法是什么-即我应该将目标转换为例如2维矢量,还是应该转换网络的输出,例如以最大数字的位置作为输出?
我的网络看起来像:
class LSTMClassifier(nn.Module):
def __init__(self,input_dim,hidden_dim,layer_dim,output_dim):
super().__init__()
self.hidden_dim = hidden_dim
self.layer_dim = layer_dim
self.lstm1 = nn.LSTM(input_dim,batch_first=True)
self.lstm2 = nn.LSTM(hidden_dim,batch_first=True)
self.fc1 = nn.Linear(hidden_dim,32)
self.fc2 = nn.Linear(32,1)
self.dropout = nn.Dropout(p=0.2)
self.batch_normalisation1 = nn.Batchnorm1d(layer_dim)
self.batch_normalisation2 = nn.Batchnorm1d(2)
self.activation = nn.softmax(dim=2)
def forward(self,x):
h0,c0 = self.init_hidden(x)
out,(hn1,cn1) = self.lstm1(x,(h0,c0))
out = self.dropout(out,)
out = self.batch_normalisation1(out)
h1,c1 = self.init_hidden(out)
out,(hn2,cn2) = self.lstm2(out,(h1,c1))
out = self.dropout(out)
out = self.batch_normalisation1(out)
h2,c2 = self.init_hidden(out)
out,(hn3,cn3) = self.lstm2(out,(h2,c2))
out = self.dropout(out)
out = self.batch_normalisation1(out)
out = self.fc1(out[:,-1,:])
out = self.dropout(out)
out = self.fc2(out)
return out
def init_hidden(self,x):
h0 = torch.zeros(self.layer_dim,x.size(0),self.hidden_dim)
c0 = torch.zeros(self.layer_dim,self.hidden_dim)
return [t for t in (h0,c0)]
def pred(self,x):
out = self(x)
return out > 0
此网络的输入示例为:
tensor([[[0.0000e+00,0.0000e+00,0.0000e+00],[2.3597e-04,1.1507e-02,8.7719e-02,6.1093e-02,9.5556e-01],[2.1474e-03,5.3805e-03,9.6491e-02,2.2508e-01,8.2222e-01]]])
形状为torch.Size([1,3,5])
的
tensor([[0.5293,0.4707]],grad_fn=<softmaxBackward>)
更新:
我现在可以按照答案中的建议训练模型:
model = LSTMClassifier(5,128,1)
Epochs = 10
batch_size = 32
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=0.01,weight_decay=1e-6)
for epoch in range(Epochs):
if epoch == 0:
accurate = 0
for X_instance,y_instance in zip(val_x,val_y):
if int(y_instance) == 1 and model.pred(X_instance.view(-1,5)).item():
accurate += 1
print(f"Untrained accuracy test set: {accurate/len(val_x)}")
print(f"Epoch {epoch + 1}")
for n,(X,y) in enumerate(train_batches):
model.train()
optimizer.zero_grad()
y_pred = model(X)
loss = criterion(y_pred,y)
loss.backward()
optimizer.step()
model.eval()
accurate = 0
for X_instance,val_y):
if int(y_instance) == 1 and model.pred(X_instance.view(-1,5)).item():
accurate += 1
print(f"Accuracy test set: {accurate/len(val_x)}")
解决方法
您不应在网络末端使用任何激活功能,而只能输出单个神经元,而不是两个神经元(受BCEWithLogitsLoss
训练)。
下面是您的神经网络代码,其中包含注释并删除了不必要的部分:
class LSTMClassifier(nn.Module):
def __init__(self,input_dim,hidden_dim,layer_dim,output_dim):
super().__init__()
self.hidden_dim = hidden_dim
self.layer_dim = layer_dim
self.lstm1 = nn.LSTM(input_dim,batch_first=True)
self.lstm2 = nn.LSTM(hidden_dim,batch_first=True)
self.fc1 = nn.Linear(hidden_dim,32)
# Output 1 neuron instead of two
self.fc2 = nn.Linear(32,1)
# Model should not depend on batch size
# self.batch_size = None
# You are not using this variable
# self.hidden = None
self.dropout = nn.Dropout(p=0.2)
self.batch_normalisation1 = nn.BatchNorm1d(layer_dim)
self.batch_normalisation2 = nn.BatchNorm1d(2)
def forward(self,x):
# Hidden are initialized with 0 explicitly
# h0,c0 = self.init_hidden(x)
out,_ = self.lstm1(x)
# No need for initial values
# out,(hn1,cn1) = self.lstm1(x,(h0,c0))
out = self.dropout(out)
out = self.batch_normalisation1(out)
# Same for all other cells you re-init with zeros,it's implicit
out,_ = self.lstm2(out)
out = self.dropout(out)
out = self.batch_normalisation1(out)
out,_ = self.lstm2(out)
out = self.dropout(out)
out = self.batch_normalisation1(out)
out = self.fc1(out[:,-1,:])
out = self.dropout(out)
# No need for activation
# out = F.softmax(self.fc2(out))
out = self.fc2(out)
return out
# Return True (1) or False (0)
def pred(self,x):
return self(x) > 0
我还添加了pred
方法,该方法将logit转换为目标(例如,用于某些指标)。
基本上,如果您的登录数小于0
,则为False
,否则为True
。在这种情况下,无需激活。
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。