如何解决损失永远不会低于 0.35
我正在构建模型来对图像序列进行分类。我尝试了不同的模型,但我无法将训练或验证损失降至 0.35 以下。 我能得到的最好结果是这样的: Train_Valid_Loss
在训练普通 CNN 分类器时,我习惯于将损失值低于 0.1。 我尝试了几项改进措施,包括:
- 改变网络复杂性
- 不同的优化器
- 改变辍学
- 图像增强
- 实现学习率调度器
我仍然无法获得低于该点的损失。我正在使用 JAAD 数据库并希望对人们的行为(步行或站立)进行分类。我正在使用最后 10 帧来获取功能。我首先尝试了 CNN-LSTM,但由于我无法将损失降至 0.35 以下,我尝试了不同的方法并为每个人提取姿势。这样我就可以摆脱 cnn 并使用以关键点为特征的普通 lstm 网络。但是,我仍然无法将损失降至 0.35 以下。 我怎样才能改善损失?我在 Pytorch 中使用 nn.bceloss。 我开始认为,代码中有错误,结果不是因为数据集。 代码中最重要的部分如下:
采样器和数据加载器:
class MySampler(torch.utils.data.Sampler):
def __init__(self,end_idx,seq_length):
indices = []
for i in range(len(end_idx)-1):
start = end_idx[i]
end = end_idx[i+1] - seq_length
indices.append(torch.arange(start,end))
indices = torch.cat(indices)
self.indices = indices
def __iter__(self):
indices = self.indices[torch.randperm(len(self.indices))]
return iter(indices.tolist())
def __len__(self):
return len(self.indices)
class MyDataset(Dataset):
def __init__(self,image_paths,seq_length,length,batch):
self.image_paths = image_paths
self.seq_length = seq_length
self.length = length
self.batch=batch
def __getitem__(self,index):
start = index
end = index + self.seq_length
#print('Getting images from {} to {}'.format(start,end))
indices = list(range(start,end))
#print(indices)
images = []
for i in indices:
image_path = self.image_paths[i][0]
image = np.load(image_path)
image=torch.from_numpy(image)
images.append(image)
x = torch.stack(images)
y = torch.tensor([self.batch,self.image_paths[start][1]],dtype=torch.long)
return x,y
def __len__(self):
return self.length
BATCH=1
root_dir = 'path tofolder...\\Dataset\\JAAD\\Pose\\dataset\\Train\\'
class_paths = [d.path for d in os.scandir(root_dir) if d.is_dir]
class_image_paths = []
end_idx = []
for c,class_path in enumerate(class_paths):
for d in os.scandir(class_path):
if d.is_dir:
paths = sorted(glob.glob(os.path.join(d.path,'*.npy')))
# Add class idx to paths
paths = [(p,c) for p in paths]
class_image_paths.extend(paths)
end_idx.extend([len(paths)])
end_idx = [0,*end_idx]
end_idx = torch.cumsum(torch.tensor(end_idx),0)
seq_length = 6
sampler = MySampler(end_idx,seq_length)
dataset_train = MyDataset(image_paths=class_image_paths,seq_length=seq_length,length=len(sampler),batch=BATCH)
loader_train = DataLoader(dataset_train,batch_size=BATCH,sampler=sampler)
no_stand=0
no_walk=0
lab_list=[]
for data,target in tqdm(loader_train):
lab=int(target[0][1])
lab_list.append(lab)
if lab ==0:
no_stand+=1
elif lab==1:
no_walk+=1
arr = np.array(lab_list)
uni=np.unique(arr)
print(uni)
print('stand: '+ str(no_stand)+ '\t walk: '+ str(no_walk))
模型架构:
class Net(nn.Module):
def __init__(self):
super(Net,self).__init__()
self.lstm = nn.LSTM(input_size=31,hidden_size=20,num_layers=2,dropout=0.5,batch_first=True).cuda()
self.lstm2 = nn.LSTM(input_size=20,hidden_size=10,batch_first=True).cuda()#,hidden_size =self.hidden_size)
self.fc = nn.Linear(60,2)
nn.init.xavier_uniform_(self.fc.weight)
self.self_attn = torch.nn.MultiheadAttention(embed_dim=20,num_heads=2)
def forward(self,x):
torch.nan_to_num(x,nan=-1)
x=x.view(BATCH,6,x.shape[2]))
x,hidden = self.lstm(x)
x,weight = self.self_attn(query=x,key=x,value=x)
x,hidden = self.lstm2(x)
x = x.contiguous().view(x.shape[0],x.shape[1]*x.shape[2])
x = self.fc(x)
return x
train_on_gpu = torch.cuda.is_available()
model=Net()
if not train_on_gpu:
print('CUDA is not available. Training on cpu ...')
else:
print('CUDA is available! Training on GPU ...')
if train_on_gpu:
model.cuda()
优化器和损失:
optimizer= optim.Adadelta(model.parameters(),lr=0.01,rho=0.9,eps=1e-06,weight_decay=0.01)
criterion=nn.BCEWithLogitsLoss()
scheduler=ReduceLROnPlateau(optimizer,mode='max',factor=0.7,patience=3,verbose=True)
火车循环:
n_epochs = 50
NEW_INI = True
loss_values_train=[]
loss_values_valid=[]
if NEW_INI:
valid_loss_min = np.Inf
NEW_INI = False
print(train_on_gpu)
for epoch in range(1,n_epochs+1):
train_loss = 0.0
valid_loss = 0.0
###################
# train the model #
###################
model.train()
for data,target in tqdm(loader_train):
if train_on_gpu:
data,target = data.cuda(),target.cuda()
optimizer.zero_grad()
output = model(data.float())
#print(data.float())
#print(output)
loss = criterion(output,target.float().cuda())
#print(loss)
loss.backward()
torch.nn.utils.clip_grad_value_(model.parameters(),1)
torch.nn.utils.clip_grad_norm_(model.parameters(),0.5)
optimizer.step()
train_loss += loss.item()*data.size(0)
######################
# validate the model #
######################
model.eval()
for data,target in loader_valid:
if train_on_gpu:
data,target.cuda()
output=model(data.float())
loss = criterion(output,target.float().cuda())
valid_loss += loss.item()*data.size(0)
train_loss = train_loss/len(loader_train.sampler)
valid_loss = valid_loss/len(loader_valid.sampler)
loss_values_train.append(train_loss)
loss_values_valid.append(valid_loss)
#train_loss = train_loss/len(train_loader.sampler)
#print('Epoch: {} \tTraining Loss: {:.6f}'.format(epoch,train_loss))
print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f} \tlr: {:.6f}'.format(epoch,train_loss,valid_loss,optimizer.param_groups[0]['lr']))
if valid_loss <= valid_loss_min:
print('Validation loss decreased ({:.6f} --> {:.6f}). Saving model ...'.format(valid_loss_min,valid_loss))
torch.save(model.state_dict(),'model_pose-v'+str(VERSION)+'.pt')
valid_loss_min = valid_loss
#print('Epoch-{0} lr: {1}'.format(epoch,optimizer.param_groups[0]['lr']))
scheduler.step(valid_loss)
准确率为 47%。你能发现任何可能导致这个问题的东西吗? 谢谢:)
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。