我删除了更快的rcnn模型的分类部分,然后添加了另一个输出层以获得特征嵌入。我用暹罗网络训练它,用余弦距离对比损耗。 我将这个对比损失与更快的rcnn的损失一起训练,但是对比损失没有减少,而其他损失却减少了
这是faster rcnn model的链接
到目前为止,我所尝试的:
还是不行
这是我的头网:
class HeadNet(nn.Module):
def __init__(self, roi_size, spatial_scale,
classifier):
super(HeadNet, self).__init__()
self.classifier = classifier
self.roi_size = roi_size
self.spatial_scale = spatial_scale
self.roi_pool = RoIPool((self.roi_size, self.roi_size), self.spatial_scale)
# self.fc_4096 = nn.Sequential(
# nn.Linear(512 * 7 * 7, 4096),
# nn.ReLU(),
# nn.Linear(4096, 4096),
# nn.ReLU()
# )
self.fc_embedding = nn.Linear(4096, 128)
self.fc_cls_loc = nn.Linear(4096, 8) # For 2 classes, foreground and background
self.fc_score = nn.Linear(4096, 2)
normal_init(self.fc_cls_loc, 0, 0.001)
normal_init(self.fc_score, 0, 0.01)
normal_init(self.fc_embedding, 0, 0.01)
def forward(self, x, rois, roi_indices, n_pos):
"""
:param roi_indices: batch of image
:param x: 4D image variable.
:param rois:
:return: roi_cls_locs, roi_scores, embeddings for filtered positive
proposals of each image, format:[N,8]
N is the number of pos.rois
Tips: The pos_rois are only for training. It is different by testing
"""
# precessing the input data, for all rois
roi_indices = at.totensor(roi_indices).float()
rois = at.totensor(rois).float()
indices_and_rois = t.cat([roi_indices[:, None], rois], dim=1)
xy_indices_and_rois = indices_and_rois[:, [0, 2, 1, 4, 3]]
indices_and_rois = xy_indices_and_rois.contiguous()
pool = self.roi_pool(x, indices_and_rois)
if n_pos is not None:
pool_pos = pool[:4]
# Get the positive part
else:
pool_pos = pool
pool = pool.view(pool.size(0), -1)
# fc_4096 = self.fc_4096(pool)
fc_4096 = self.classifier(pool)
pool_pos = pool_pos.view(pool_pos.size(0), -1) # Reformat to [N,25088]
# fc_4096_pos = self.fc_4096(pool_pos)
fc_4096_pos = self.classifier(pool_pos)
roi_cls_locs = self.fc_cls_loc(fc_4096)
roi_scores = self.fc_score(fc_4096) # [N,2]
embeddings = self.fc_embedding(fc_4096_pos) # Format [n_pos,128]
return roi_scores, roi_cls_locs, embeddings
这是网络结构:
class SiameseReID(nn.Module):
feat_stride = 16
def __init__(self):
super(SiameseReID, self).__init__()
self.extractor, self.cls_layers = vgg_16()
self.rpn = RegionProposalNetwork(512, 512, ratios=[0.5, 1, 2], anchor_scales=[8, 16, 32],
feat_stride=self.feat_stride)
self.head = HeadNet(roi_size=7, spatial_scale=(1. / self.feat_stride), classifier=self.cls_layers)
# self.proposal_target_layer = ProposalFilter()
def forward_once(self, im):
"""
:param im: 4D image
:return:
"""
im_size = im.shape[2:]
features = self.extractor(im)
rpn_locs, rpn_scores, rois, roi_indices, anchor = \
self.rpn(features, im_size, scale=1)
roi_scores, roi_locs_reg, embeddings = self.head(features, rois, roi_indices, None)
return rois, embeddings, roi_locs_reg, roi_scores
这是对比损失:(其余损失与我在上面发布的链接中使用的相同)
def forward(self, img1, img2, bbox1, bbox2, target, scale):
"""
:param target: 1, when positive pair; 0, when negative pair
"""
embedding1, losses1 = self.forward_once(img1, bbox1, scale)
embedding2, losses2 = self.forward_once(img2, bbox2, scale)
# -------------------- Similarity Measure Loss ------------------------- #
n_embd1 = embedding1.shape[0]
n_embd2 = embedding2.shape[0]
embd1 = at.totensor(embedding1)
embd2 = at.totensor(embedding2)
# if n_embd1 > 3:
# embd1 = embd1[0:3, :]
# if n_embd2 > 3:
# embd2 = embd2[0:3, :]
embd1.requires_grad = True
embd2.requires_grad = True
# embd1 = Variable(embd1.data, requires_grad = True)
# embd2 = Variable(embd2.data, requires_grad=True)
# check = t.eq(embd1[0], embd2[0])
# check = at.tonumpy(check)
# print(at.tonumpy(embd1[0])[np.where(check == False)])
embd1 = l2_norm(embd1)
embd2 = l2_norm(embd2)
# cos = pairwise.linear_kernel(embd1.cpu(), embd2.cpu()) # Output size (n_embd1, n_embd2)
cos = t.einsum('ik,jk->ij', embd1, embd2)
# cos = t.matmul(embd1, embd2)
cos = t.clamp(cos, min=1e-6, max=1 - 1e-6)
# cos_dist = 1 - np.arccos(cos) / np.pi # [0,1]
# ang_dist = 1 - t.div(t.acos(cos), math.pi)
ang_dist = t.div(t.acos(cos), math.pi)
# ang_dist = t.sort(ang_dist)
# if len(ang_dist) > 2:
# if target.item() == 1.:
# # mask = np.where(ang_dist > thres_l)
# # cos_dist = ang_dist[mask]
# ang_dist =
# elif target.item() == 0.:
# mask = np.where(ang_dist < thres_h)
# cos_dist = ang_dist[mask]
# if cos_dist.size == 0:
# sm_loss = t.tensor([1 - 1e-4]).cuda()
# else:
avg_ang_dist = t.mean(ang_dist)
sm_loss = 0.5 * (target.float() * avg_ang_dist +
(1 + (-1 * target)).float() * F.relu(1 - (avg_ang_dist + 1e-7).sqrt()).pow(2))
# log_ang = t.log(avg_ang_dist)
# neg_log_ang = t.log(1 - avg_ang_dist)
# sm_loss = -(target * log_ang + (1 - target) * neg_log_ang)
# Task Importance parameter
# alpha_rpn = 0.5
# alpha_roi = 0.5
# alpha_sm = 1.0
rpn_loc_loss_total = (losses1[0] + losses2[0]) / 2
rpn_cls_loss_total = (losses1[1] + losses2[1]) / 2
roi_loc_loss_total = (losses1[2] + losses2[2]) / 2
roi_cls_loss_total = (losses1[3] + losses2[3]) / 2
Losses = [rpn_loc_loss_total, rpn_cls_loss_total, roi_loc_loss_total, roi_cls_loss_total, sm_loss]
# Losses = [rpn_cls_loss_total, roi_cls_loss_total, sm_loss]
# LossTotal = alpha_rpn * (Losses[0] + Losses[1]) + alpha_roi * (Losses[2] + Losses[3]) + alpha_sm * Losses[4]
LossTotal = Losses[4]
Losses = Losses + [LossTotal]
return LossTuple(*Losses)
最后是从第4期到第8期的训练损失图像:
sm_loss
是对比损失
目前没有回答
相关问题 更多 >
编程相关推荐