微信公众号搜"智元新知"关注
微信扫一扫可直接关注哦!

在自定义数据集上实现更快的 RCNN

如何解决在自定义数据集上实现更快的 RCNN

我有一些用 LabelMe 标记的图像数据集,它生成的 xml 文件包含每个文件中存在的各种对象的边界框信息(Pascal voc 格式)

我已经提取了边界框信息并为所有图像中存在的所有对象生成一个 csv 文件。 csv 文件中的每一行都包含类似这种格式的信息(filename,x_min,y_min,x_max,y_max,class)

然后,我实施了一些增强技术并调整了增加 csv 文件大小的边界框信息。该文件分为 80% 的训练数据和 20% 的验证数据。属于 train 或 test 的每个图像文件都移动到相应的文件夹(train/val)。

现在,我正在努力实现更快的 rcnn,其中平均精度指标将与损失函数一起计算,其他性能指标将有所帮助但不是强制性的。我检查了许多 github 存储库,但它们似乎都令人困惑。

我已经实现并运行(google colab)以下代码,该代码仅给出损失结果而不是平均精度(mAP)、准确度。如何计算 mAP 和准确度?

DIR_INPUT = '../'
DIR_TRAIN = DIR_INPUT + 'Model Dataset/train/'
DIR_TEST = DIR_INPUT + 'Model Dataset/test/'

if not os.path.exists(DIR_TRAIN):
    os.makedirs(DIR_TRAIN)
if not os.path.exists(DIR_TEST):
    os.makedirs(DIR_TEST)

annotation_file = pd.read_csv(DIR_INPUT+'data files/annotation_all_with_augmented.csv')

all_image = DIR_INPUT+"/topics_in_wind_engineering_final/All images/"
split = int(len(os.listdir(all_image)) * .8)

count = 1
for i in os.listdir(all_image):
  if count < split:
    if not os.path.exists(DIR_TRAIN+i):
      shutil.copy(all_image+i,DIR_TRAIN)
  else:
    if not os.path.exists(DIR_TEST+i):
      shutil.copy(all_image+i,DIR_TEST)

  count = count + 1
  
train_df = pd.DataFrame(columns = annotation_file.columns)
valid_df = pd.DataFrame(columns = annotation_file.columns)

for f_name in os.listdir(DIR_TRAIN):
  r = annotation_file.loc[annotation_file['image_id'] == f_name]
  train_df = train_df.append(r)

for f_name in os.listdir(DIR_TEST):
  r = annotation_file.loc[annotation_file['image_id'] == f_name]
  valid_df = valid_df.append(r)

train_df['image_id'] = "E:/Wind_Engineering/Model Dataset/train/" + train_df['image_id']

train_df.to_csv('../../frcnn-from-scratch-with-keras-master/dataset.txt',index=False,header = False)

class AerialDataset(Dataset):

    def __init__(self,dataframe,image_dir,transforms=None):
        super().__init__()

        self.image_ids = dataframe['image_id'].unique()
        self.df = dataframe
        self.image_dir = image_dir
        self.transforms = transforms

    def __getitem__(self,index: int):

        image_id = self.image_ids[index]
        records = self.df[self.df['image_id'] == image_id]

        image = cv2.imread(f'{self.image_dir}/{image_id}',cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0

        Boxes = records[['xmin','ymin','xmax','ymax']].values
        
        area = (Boxes[:,3] - Boxes[:,1]) * (Boxes[:,2] - Boxes[:,0])
        area = torch.as_tensor(area,dtype=torch.float32)

        # there is only one class
        labels = torch.ones((records.shape[0],),dtype=torch.int64)
        
        # suppose all instances are not crowd
        iscrowd = torch.zeros((records.shape[0],dtype=torch.int64)
        
        target = {}
        target['Boxes'] = Boxes
        target['labels'] = labels
        target['image_id'] = torch.tensor([index])
        target['area'] = area
        target['iscrowd'] = iscrowd

        if self.transforms:
            sample = {
                'image': image,'bBoxes': target['Boxes'],'labels': labels
            }
            sample = self.transforms(**sample)
            image = sample['image']
            
            target['Boxes'] = torch.stack(tuple(map(torch.tensor,zip(*sample['bBoxes'])))).permute(1,0)

        return image,target,image_id

    def __len__(self) -> int:
        return self.image_ids.shape[0]
   
def get_train_transform():
    return A.Compose([
        ToTensorV2(p=1.0)
    ],bBox_params={'format': 'pascal_voc','label_fields': ['labels']})

def get_valid_transform():
    return A.Compose([
        ToTensorV2(p=1.0)
    ],'label_fields': ['labels']})

model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

num_classes = 2  # 1 class (wheat) + background

# get number of input features for the classifier
in_features = model.roi_heads.Box_predictor.cls_score.in_features

# replace the pre-trained head with a new one
model.roi_heads.Box_predictor = FastRCNNPredictor(in_features,num_classes)

class Averager:
    def __init__(self):
        self.current_total = 0.0
        self.iterations = 0.0

    def send(self,value):
        self.current_total += value
        self.iterations += 1

    @property
    def value(self):
        if self.iterations == 0:
            return 0
        else:
            return 1.0 * self.current_total / self.iterations

    def reset(self):
        self.current_total = 0.0
        self.iterations = 0.0
        
def collate_fn(batch):
    return tuple(zip(*batch))

train_dataset = AerialDataset(train_df,DIR_TRAIN,get_train_transform())
valid_dataset = AerialDataset(valid_df,DIR_TEST,get_valid_transform())

# split the dataset in train and test set
indices = torch.randperm(len(train_dataset)).tolist()

train_data_loader = DataLoader(
    train_dataset,batch_size=16,shuffle=False,num_workers=4,collate_fn=collate_fn
)

valid_data_loader = DataLoader(
    valid_dataset,collate_fn=collate_fn
)

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

images,targets,image_ids = next(iter(train_data_loader))
images = list(image.to(device) for image in images)
targets = [{k: v.to(device) for k,v in t.items()} for t in targets]

Boxes = targets[2]['Boxes'].cpu().numpy().astype(np.int32)
sample = images[2].permute(1,2,0).cpu().numpy()

model.to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params,lr=0.005,momentum=0.9,weight_decay=0.0005)
# lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,step_size=3,gamma=0.1)
lr_scheduler = None

num_epochs = 5

loss_hist = Averager()
itr = 1
loss_track = []
epoch_loss = []

for epoch in range(num_epochs):
    loss_hist.reset()
    
    for images,image_ids in train_data_loader:

        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k,v in t.items()} for t in targets]

        loss_dict = model(images,targets)

        losses = sum(loss for loss in loss_dict.values())
        loss_value = losses.item()

        loss_hist.send(loss_value)

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
        loss_track.append(loss_value)

        if itr % 50 == 0:
            print(f"Iteration #{itr} loss: {loss_value}")
   
        itr += 1
    
    # update the learning rate
    if lr_scheduler is not None:
        lr_scheduler.step()

    print(f"Epoch #{epoch} loss: {loss_hist.value}")  
    epoch_loss.append(loss_hist.value)
    
images,image_ids = next(iter(valid_data_loader))

images = list(img.to(device) for img in images)
targets = [{k: v.to(device) for k,v in t.items()} for t in targets]

Boxes = targets[1]['Boxes'].cpu().numpy().astype(np.int32)
sample = images[1].permute(1,0).cpu().numpy()

model.eval()
cpu_device = torch.device("cpu")

outputs = model(images)
outputs = [{k: v.to(cpu_device) for k,v in t.items()} for t in outputs]

版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。

相关推荐


Selenium Web驱动程序和Java。元素在(x,y)点处不可单击。其他元素将获得点击?
Python-如何使用点“。” 访问字典成员?
Java 字符串是不可变的。到底是什么意思?
Java中的“ final”关键字如何工作?(我仍然可以修改对象。)
“loop:”在Java代码中。这是什么,为什么要编译?
java.lang.ClassNotFoundException:sun.jdbc.odbc.JdbcOdbcDriver发生异常。为什么?
这是用Java进行XML解析的最佳库。
Java的PriorityQueue的内置迭代器不会以任何特定顺序遍历数据结构。为什么?
如何在Java中聆听按键时移动图像。
Java“Program to an interface”。这是什么意思?