PyTorch LSTM分类模型-输出到目标映射

如何解决PyTorch LSTM分类模型-输出到目标映射

我有一个网络，输出长度为2的向量。我的目标是1或零的形式，指的是两种可能的类别。获得损失的最佳方法是什么-即我应该将目标转换为例如2维矢量，还是应该转换网络的输出，例如以最大数字的位置作为输出？

我的网络看起来像：

class LSTMClassifier(nn.Module):
    def __init__(self,input_dim,hidden_dim,layer_dim,output_dim):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.layer_dim = layer_dim
        self.lstm1 = nn.LSTM(input_dim,batch_first=True)
        self.lstm2 = nn.LSTM(hidden_dim,batch_first=True)
        self.fc1 = nn.Linear(hidden_dim,32)
        self.fc2 = nn.Linear(32,1)
        self.dropout = nn.Dropout(p=0.2)
        self.batch_normalisation1 = nn.Batchnorm1d(layer_dim)
        self.batch_normalisation2 = nn.Batchnorm1d(2)
        self.activation = nn.softmax(dim=2)
    
    def forward(self,x):
        h0,c0 = self.init_hidden(x)
        out,(hn1,cn1) = self.lstm1(x,(h0,c0))
        out = self.dropout(out,)
        out = self.batch_normalisation1(out)
        
        h1,c1 = self.init_hidden(out)
        out,(hn2,cn2) = self.lstm2(out,(h1,c1))
        out = self.dropout(out)
        out = self.batch_normalisation1(out)
        
        h2,c2 = self.init_hidden(out)
        out,(hn3,cn3) = self.lstm2(out,(h2,c2))
        out = self.dropout(out)
        out = self.batch_normalisation1(out)
        
        out = self.fc1(out[:,-1,:])
        out = self.dropout(out)
        out = self.fc2(out)
        return out
    
    def init_hidden(self,x):
        h0 = torch.zeros(self.layer_dim,x.size(0),self.hidden_dim)
        c0 = torch.zeros(self.layer_dim,self.hidden_dim)
        return [t for t in (h0,c0)]
    
    def pred(self,x):
        out = self(x)
        return out > 0

此网络的输入示例为：

tensor([[[0.0000e+00,0.0000e+00,0.0000e+00],[2.3597e-04,1.1507e-02,8.7719e-02,6.1093e-02,9.5556e-01],[2.1474e-03,5.3805e-03,9.6491e-02,2.2508e-01,8.2222e-01]]])

形状为torch.Size([1,3,5])的

。目标当前为1或0。但是，网络会输出一个矢量，例如：

tensor([[0.5293,0.4707]],grad_fn=<softmaxBackward>)

设置这些目标与网络输出之间的损耗的最佳方法是什么？

更新：

我现在可以按照答案中的建议训练模型：

model = LSTMClassifier(5,128,1)
Epochs = 10
batch_size = 32

criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=0.01,weight_decay=1e-6)

for epoch in range(Epochs):
    if epoch == 0:
        accurate = 0
        for X_instance,y_instance in zip(val_x,val_y):
            if int(y_instance) == 1 and model.pred(X_instance.view(-1,5)).item():
                accurate += 1
        print(f"Untrained accuracy test set: {accurate/len(val_x)}")
    print(f"Epoch {epoch + 1}")
    
    for n,(X,y) in enumerate(train_batches):
        model.train()
        optimizer.zero_grad()
        y_pred = model(X)
        loss = criterion(y_pred,y)
        loss.backward()
        optimizer.step()

    model.eval()
    accurate = 0
    for X_instance,val_y):
        if int(y_instance) == 1 and model.pred(X_instance.view(-1,5)).item():
            accurate += 1
    print(f"Accuracy test set: {accurate/len(val_x)}")

解决方法

您不应在网络末端使用任何激活功能，而只能输出单个神经元，而不是两个神经元（受BCEWithLogitsLoss训练）。

下面是您的神经网络代码，其中包含注释并删除了不必要的部分：

class LSTMClassifier(nn.Module):
    def __init__(self,input_dim,hidden_dim,layer_dim,output_dim):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.layer_dim = layer_dim
        self.lstm1 = nn.LSTM(input_dim,batch_first=True)
        self.lstm2 = nn.LSTM(hidden_dim,batch_first=True)
        self.fc1 = nn.Linear(hidden_dim,32)
        # Output 1 neuron instead of two
        self.fc2 = nn.Linear(32,1)
        # Model should not depend on batch size
        # self.batch_size = None
        # You are not using this variable
        # self.hidden = None
        self.dropout = nn.Dropout(p=0.2)
        self.batch_normalisation1 = nn.BatchNorm1d(layer_dim)
        self.batch_normalisation2 = nn.BatchNorm1d(2)

    def forward(self,x):
        # Hidden are initialized with 0 explicitly
        # h0,c0 = self.init_hidden(x)
        out,_ = self.lstm1(x)
        # No need for initial values
        # out,(hn1,cn1) = self.lstm1(x,(h0,c0))
        out = self.dropout(out)
        out = self.batch_normalisation1(out)

        # Same for all other cells you re-init with zeros,it's implicit
        out,_ = self.lstm2(out)
        out = self.dropout(out)
        out = self.batch_normalisation1(out)

        out,_ = self.lstm2(out)
        out = self.dropout(out)
        out = self.batch_normalisation1(out)

        out = self.fc1(out[:,-1,:])
        out = self.dropout(out)
        # No need for activation
        # out = F.softmax(self.fc2(out))
        out = self.fc2(out)
        return out

    # Return True (1) or False (0)
    def pred(self,x):
        return self(x) > 0

我还添加了pred方法，该方法将logit转换为目标（例如，用于某些指标）。

基本上，如果您的登录数小于0，则为False，否则为True。在这种情况下，无需激活。