暹罗网络的损耗没有减少,准确率低

2024-10-01 11:23:51 发布

您现在位置:Python中文网/ 问答频道 /正文

我删除了更快的rcnn模型的分类部分,然后添加了另一个输出层以获得特征嵌入。我用暹罗网络训练它,用余弦距离对比损耗。 我将这个对比损失与更快的rcnn的损失一起训练,但是对比损失没有减少,而其他损失却减少了

这是faster rcnn model的链接

到目前为止,我所尝试的:

  • 将学习率从1e-3更改为1e-5
  • 只训练了几个时代后的对比损失

还是不行

这是我的头网:

class HeadNet(nn.Module):
    def __init__(self, roi_size, spatial_scale,
                 classifier):
        super(HeadNet, self).__init__()

        self.classifier = classifier
        self.roi_size = roi_size
        self.spatial_scale = spatial_scale
        self.roi_pool = RoIPool((self.roi_size, self.roi_size), self.spatial_scale)

        # self.fc_4096 = nn.Sequential(
        #     nn.Linear(512 * 7 * 7, 4096),
        #     nn.ReLU(),
        #     nn.Linear(4096, 4096),
        #     nn.ReLU()
        # )

        self.fc_embedding = nn.Linear(4096, 128)
        self.fc_cls_loc = nn.Linear(4096, 8)  # For 2 classes, foreground and background
        self.fc_score = nn.Linear(4096, 2)

        normal_init(self.fc_cls_loc, 0, 0.001)
        normal_init(self.fc_score, 0, 0.01)
        normal_init(self.fc_embedding, 0, 0.01)

    def forward(self, x, rois, roi_indices, n_pos):
        """

        :param roi_indices: batch of image
        :param x: 4D image variable.
        :param rois:
        :return: roi_cls_locs, roi_scores, embeddings for filtered positive
                 proposals of each image, format:[N,8]
                 N is the number of pos.rois

        Tips: The pos_rois are only for training. It is different by testing
        """

        # precessing the input data, for all rois
        roi_indices = at.totensor(roi_indices).float()
        rois = at.totensor(rois).float()
        indices_and_rois = t.cat([roi_indices[:, None], rois], dim=1)

        xy_indices_and_rois = indices_and_rois[:, [0, 2, 1, 4, 3]]
        indices_and_rois = xy_indices_and_rois.contiguous()
        pool = self.roi_pool(x, indices_and_rois)

        if n_pos is not None:
            pool_pos = pool[:4]
            # Get the positive part
        else:
            pool_pos = pool

        pool = pool.view(pool.size(0), -1)
        # fc_4096 = self.fc_4096(pool)
        fc_4096 = self.classifier(pool)
        pool_pos = pool_pos.view(pool_pos.size(0), -1)  # Reformat to [N,25088]
        # fc_4096_pos = self.fc_4096(pool_pos)
        fc_4096_pos = self.classifier(pool_pos)

        roi_cls_locs = self.fc_cls_loc(fc_4096)
        roi_scores = self.fc_score(fc_4096)  # [N,2]
        embeddings = self.fc_embedding(fc_4096_pos)  # Format [n_pos,128]

        return roi_scores, roi_cls_locs, embeddings

这是网络结构:

class SiameseReID(nn.Module):
    feat_stride = 16

    def __init__(self):
        super(SiameseReID, self).__init__()
        self.extractor, self.cls_layers = vgg_16()
        self.rpn = RegionProposalNetwork(512, 512, ratios=[0.5, 1, 2], anchor_scales=[8, 16, 32],
                                         feat_stride=self.feat_stride)
        self.head = HeadNet(roi_size=7, spatial_scale=(1. / self.feat_stride), classifier=self.cls_layers)
        # self.proposal_target_layer = ProposalFilter()

    def forward_once(self, im):
        """
        :param im: 4D image
        :return:
        """
        im_size = im.shape[2:]
        features = self.extractor(im)

        rpn_locs, rpn_scores, rois, roi_indices, anchor = \
            self.rpn(features, im_size, scale=1)

        roi_scores, roi_locs_reg, embeddings = self.head(features, rois, roi_indices, None)

        return rois, embeddings, roi_locs_reg, roi_scores

这是对比损失:(其余损失与我在上面发布的链接中使用的相同)

    def forward(self, img1, img2, bbox1, bbox2, target, scale):
        """
        :param target: 1, when positive pair; 0, when negative pair
        """
        embedding1, losses1 = self.forward_once(img1, bbox1, scale)
        embedding2, losses2 = self.forward_once(img2, bbox2, scale)

        # -------------------- Similarity Measure Loss ------------------------- #
        n_embd1 = embedding1.shape[0]
        n_embd2 = embedding2.shape[0]
        embd1 = at.totensor(embedding1)
        embd2 = at.totensor(embedding2)
        # if n_embd1 > 3:
        #     embd1 = embd1[0:3, :]
        # if n_embd2 > 3:
        #     embd2 = embd2[0:3, :]

        embd1.requires_grad = True
        embd2.requires_grad = True
        # embd1 = Variable(embd1.data, requires_grad = True)
        # embd2 = Variable(embd2.data, requires_grad=True)

        # check = t.eq(embd1[0], embd2[0])
        # check = at.tonumpy(check)
        # print(at.tonumpy(embd1[0])[np.where(check == False)])
        embd1 = l2_norm(embd1)
        embd2 = l2_norm(embd2)

        # cos = pairwise.linear_kernel(embd1.cpu(), embd2.cpu())  # Output size (n_embd1, n_embd2)
        cos = t.einsum('ik,jk->ij', embd1, embd2)
        # cos = t.matmul(embd1, embd2)
        cos = t.clamp(cos, min=1e-6, max=1 - 1e-6)

        # cos_dist = 1 - np.arccos(cos) / np.pi   # [0,1]
        # ang_dist = 1 - t.div(t.acos(cos), math.pi)
        ang_dist = t.div(t.acos(cos), math.pi)

        # ang_dist = t.sort(ang_dist)
        # if len(ang_dist) > 2:
        #     if target.item() == 1.:
        #         # mask = np.where(ang_dist > thres_l)
        #         # cos_dist = ang_dist[mask]
        #         ang_dist =
        #     elif target.item() == 0.:
        #         mask = np.where(ang_dist < thres_h)
        #         cos_dist = ang_dist[mask]

        # if cos_dist.size == 0:
        #     sm_loss = t.tensor([1 - 1e-4]).cuda()
        # else:
        avg_ang_dist = t.mean(ang_dist)
        sm_loss = 0.5 * (target.float() * avg_ang_dist +
                         (1 + (-1 * target)).float() * F.relu(1 - (avg_ang_dist + 1e-7).sqrt()).pow(2))
        # log_ang = t.log(avg_ang_dist)
        # neg_log_ang = t.log(1 - avg_ang_dist)
        # sm_loss = -(target * log_ang + (1 - target) * neg_log_ang)

        # Task Importance parameter
        # alpha_rpn = 0.5
        # alpha_roi = 0.5
        # alpha_sm = 1.0

        rpn_loc_loss_total = (losses1[0] + losses2[0]) / 2
        rpn_cls_loss_total = (losses1[1] + losses2[1]) / 2
        roi_loc_loss_total = (losses1[2] + losses2[2]) / 2
        roi_cls_loss_total = (losses1[3] + losses2[3]) / 2

        Losses = [rpn_loc_loss_total, rpn_cls_loss_total, roi_loc_loss_total, roi_cls_loss_total, sm_loss]
        # Losses = [rpn_cls_loss_total, roi_cls_loss_total, sm_loss]
        # LossTotal = alpha_rpn * (Losses[0] + Losses[1]) + alpha_roi * (Losses[2] + Losses[3]) + alpha_sm * Losses[4]
        LossTotal = Losses[4]
        Losses = Losses + [LossTotal]

        return LossTuple(*Losses)

最后是从第4期到第8期的训练损失图像:

enter image description here

sm_loss是对比损失


Tags: posselfsizedistcosclsfcpool