如何解决pytorch 闪电的不同测试结果
我使用 Pytorch Lightning 用膜翅目照片(灵感来自 here)训练一个小型神经网络迁移学习)。
在test_step 方法中,它打印真实类(classes)和预测(preds)。 训练后,我做同样的事情(验证步骤),但得到不同的结果。
import torch
from torch import nn
from torch.optim import Adam,SGD
import pytorch_lightning as pl
from torchvision import models
from torch.optim import lr_scheduler
from pytorch_lightning.metrics.functional import accuracy
from pytorch_lightning.loggers import TensorBoardLogger
from hymenoptereDataModule import HymenopteraDataModule
class LitHymenoptera(pl.LightningModule):
def __init__(self,batch_size=4):
super().__init__()
torch.manual_seed(42)
self.batch_size = batch_size
self.dataModule = HymenopteraDataModule()
self.dataModule.setup()
self.criterion = nn.CrossEntropyLoss()
self.logger = TensorBoardLogger('tb_logs',name=f'Model')
# Define the model
self.model = models.resnet18(pretrained=True)
num_ftrs = self.model.fc.in_features
self.model.fc = nn.Linear(num_ftrs,2)
def forward(self,x):
return self.model(x)
def training_step(self,batch,batch_idx):
x,y = batch
logits = self.model(x)
# Compute loss
loss = self.criterion(logits,y)
# training metrics
preds = torch.argmax(logits,dim=1)
acc = accuracy(preds,y)
num_correct = torch.eq(preds.view(-1),y.view(-1)).sum()
return {'loss': loss,'acc': acc,'num_correct': num_correct}
def training_epoch_end(self,outputs):
self.exp_lr_scheduler.step()
def validation_step(self,y = batch
logits = self.model(x)
loss = self.criterion(logits,y)
# validation metrics
preds = torch.argmax(logits,'num_correct': num_correct}
def test_step(self,batch_idx):
inputs,classes = batch
logits = self(inputs)
preds = torch.argmax(logits,dim=1)
print('###############################')
print('classes1 = ',classes)
print('preds1 = ',preds)
print(logits)
def configure_optimizers(self):
optimizer = SGD(self.parameters(),lr=0.001,momentum=0.9)
# Decay LR by a factor of 0.1 every 7 epochs
self.exp_lr_scheduler = lr_scheduler.StepLR(optimizer,step_size=7,gamma=0.1)
return optimizer
model = LitHymenoptera()
trainer = pl.Trainer(gpus=1,max_epochs=5,progress_bar_refresh_rate=100)
trainer.fit(model,model.dataModule)
trainer.test(model)
# Now,another test
for inputs,classes in model.dataModule.val_dataloader():
print('###############################')
logits = model(inputs.cuda())
preds = torch.argmax(logits,dim=1)
print('classes2 = ',classes)
print('preds2 = ',preds)
print(logits)
这是 test_step 方法的第一个输出:
classes1 = tensor([0,0],device='cuda:0')
preds1 = tensor([1,device='cuda:0') tensor([[0.1626,0.2195],[1.1437,0.5745],[0.9351,0.4271],[0.7365,0.5342]],device='cuda:0')
现在是验证步骤的第一个输出:
classes2 = tensor([0,0])
preds2 = tensor([1,1,device='cuda:0')
tensor([[-0.0168,0.0800],[ 0.6817,0.2949],[-0.2205,0.1009],[ 0.6126,0.4924]],device='cuda:0',grad_fn=<AddmmBackward>)
两个类是相同的(我检查图像,它们是相同的)但是preds是不同的。 它来自哪里?
解决方法
我意识到我忘记添加:
model.freeze()
在第二次使用模型之前。 所以,现在,两个结果都是一样的。
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。