如何解决损失函数不递减;验证损失远非训练损失; f1 太可笑了
我正在尝试使用一个采用图神经网络 (GNN) 的简单模型来实现二元分类。我的输入是包含图形的数据集,两组用于训练(信号、背景)和两组用于验证(信号、背景)。目标是让网络能够预测图是类似信号的(因此应该得到 1 的分数)还是类似背景(因此应该得到 0 的分数)。每个图都是全连接的,并具有多个节点和边特征。 我的模型很简单:
import dgl
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import Conv2d,ReLU,MaxPool2d,Linear,Batchnorm1d
node_hidden_size = 25
class EdgeNetwork(nn.Module):
def __init__(self,inputsize,hidden_layer_size,output_size):
super().__init__()
self.net = nn.Sequential(
nn.Linear( inputsize,hidden_layer_size*3),nn.ReLU(),nn.Linear(hidden_layer_size*3,output_size)
)
def forward(self,x):
input_data = torch.cat((
x.dst['features'],x.dst['node_hidden_rep'],x.src['features'],x.src['node_hidden_rep'],x.data['features']),dim=-1)
output = self.net( input_data )
return {'edge_hidden_rep': output }
class NodeNetwork(nn.Module):
def __init__(self,output_size):
super().__init__()
self.net = nn.Sequential(
nn.Linear (inputsize,hidden_layer_size*3 ),output_size)
)
def forward(self,x):
message_sum = torch.sum(x.mailBox['edge_hidden_rep'],dim=1)
input_data = torch.cat((message_sum,x.data['features'],x.data['node_hidden_rep']),dim=1)
out = self.net( input_data )
return {'node_hidden_rep': out }
class Classifier(nn.Module):
def __init__(self):
super().__init__()
# a network to init the hidden rep of the nodes
self.node_init = nn.Sequential(
nn.Linear(4,node_hidden_size*3),nn.Linear(node_hidden_size*3,node_hidden_size)
) #4 = dimension of node_features
self.edge_network = EdgeNetwork( 4 + node_hidden_size+ 4 + node_hidden_size + 2,50,node_hidden_size ) # source features + destination features + edge features
self.node_network = NodeNetwork( node_hidden_size + 4 + node_hidden_size,node_hidden_size )
# self.edge_classifier = EdgeNetwork( 4*2 + 2*node_hidden_size + 2*1 + 2,200,1)
self.node_classifier = nn.Sequential(
nn.Linear(node_hidden_size,50),nn.Linear(50,100),nn.Linear(100,Batchnorm1d(50),1)
)
### responsible for the prediction
self.mlp = nn.Sequential(
nn.Linear( node_hidden_size+4,50 ),nn.Linear( 50,1)
)
def forward(self,batched_g):
batched_g.ndata['node_hidden_rep'] = self.node_init(batched_g.ndata['features'])
GN_block_iterations = 2
for i in range( GN_block_iterations ):
batched_g.update_all(self.edge_network,self.node_network)
new_global_mean = dgl.mean_nodes(batched_g,'node_hidden_rep') #new_global_mean must have shape of new_hidden_rep.size = 25,broadcasted_sum = dgl.broadcast_nodes(batched_g,new_global_mean)
batched_g.ndata['global_rep'] = torch.cat((broadcasted_sum,batched_g.ndata['features'] ),dim=1)
global_rep = dgl.mean_nodes(batched_g,'global_rep')
return self.mlp(global_rep)
我正在使用一个常见的逻辑损失:BCEWithLogitsLoss
并且无法解决两个问题:
- 几乎没有进行培训。
- 验证和训练损失相距甚远。
请参见图来说明这一点: validation and training losses
我做错了什么?我错过了什么?
loss_func = nn.BCEWithLogitsLoss(pos_weight=torch.tensor(10.0))
optimizer = optim.Adam(net.parameters(),lr=0.001)
from tqdm import tqdm
def compute_f1_and_loss(DataLoader,net):
true_pos = 0
false_pos = 0
false_neg = 0
loss = 0
if torch.cuda.is_available():
net.cuda()
net.eval()
n_batches = 0
with torch.no_grad():
for batched_g,batched_label in DataLoader:
#if n_batches % 10 !=0:
# continue
n_batches+=1
if torch.cuda.is_available():
batched_g = batched_g.to(torch.device('cuda'))
batched_label = batched_label.to(torch.device('cuda'))
target = batched_label.unsqueeze(-1)
pred = net(batched_g)
loss+= loss_func(pred,target).item()
true_pos+=len(torch.where( (pred>0) & (target==1) )[0])
false_pos+=len(torch.where( (pred>0) & (target==0) )[0])
false_neg+=len(torch.where( (pred<0) & (target==1) )[0])
f1 = true_pos/(true_pos+0.5*(false_pos+false_neg))
loss = loss/n_batches
return f1,loss
最后在训练循环中使用所有内容:
from tqdm import tqdm
n_epochs = 50
#training_loss_vs_epoch = []
#validation_loss_vs_epoch = []
#training_f1_vs_epoch = []
#validation_f1_vs_epoch = []
pbar = tqdm( range(n_epochs) )
for epoch in pbar:
if len(validation_loss_vs_epoch) > 1:
pbar.set_description(
' val f1:'+'{0:.5f}'.format(validation_f1_vs_epoch[-1]) +
'val loss:'+ str(validation_loss_vs_epoch[-1])
)
net.train() # put the net into "training mode"
n_batches=0
for batched_g,batched_label in data_loader:
n_batches+=1
#if n_batches % 4 != 0:
# continue
if torch.cuda.is_available():
batched_g = batched_g.to(torch.device('cuda'))
batched_label = batched_label.to(torch.device('cuda'))
optimizer.zero_grad()
target = batched_label.unsqueeze(-1)
pred = net(batched_g)
loss = loss_func(pred,target)
loss.backward()
optimizer.step()
net.eval() #put the net into evaluation mode
train_f1,train_loss = compute_f1_and_loss(data_loader,net)
valid_f1,valid_loss = compute_f1_and_loss(validation_data_loader,net)
training_loss_vs_epoch.append(train_loss)
training_f1_vs_epoch.append( train_f1 )
validation_loss_vs_epoch.append(valid_loss)
validation_f1_vs_epoch.append(valid_f1)
if len(validation_loss_vs_epoch)==1 or validation_loss_vs_epoch[-2] > validation_loss_vs_epoch[-1]:
torch.save(net.state_dict(),'trained_model.pt')
希望这会有所帮助。提前致谢!
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。