본문 바로가기
잡지식 저장고/Pytorch

detectron2 에서 Faster R-CNN RPN에 GradCAM 붙이기

by Slate_Knowledge 2021. 3. 30.
728x90

기본적인 코드 세팅은 github.com/yizt/Grad-CAM.pytorch 를 참조한다.

이 외에 detectron2 설치폴더 밑 modeling/proposal_generator/rpn.py 에서 RPN 클래스 하부 predict_proposals 함수에서

def predict_proposals(
        self,
        anchors: List[Boxes],
        pred_objectness_logits: List[torch.Tensor],
        pred_anchor_deltas: List[torch.Tensor],
        image_sizes: List[Tuple[int, int]],
    ):
        """
        Decode all the predicted box regression deltas to proposals. Find the top proposals
        by applying NMS and removing boxes that are too small.

        Returns:
            proposals (list[Instances]): list of N Instances. The i-th Instances
                stores post_nms_topk object proposals for image i, sorted by their
                objectness score in descending order.
        """
        # The proposals are treated as fixed for joint training with roi heads.
        # This approach ignores the derivative w.r.t. the proposal boxes’ coordinates that
        # are also network responses.
        #with torch.no_grad():
        pred_proposals = self._decode_proposals(anchors, pred_anchor_deltas)
        return find_top_rpn_proposals(
        pred_proposals,
        pred_objectness_logits,
        image_sizes,
        self.nms_thresh,
        self.pre_nms_topk[self.training],
        self.post_nms_topk[self.training],
        self.min_box_size,
        self.training,
        )

와 같이 torch.no_grad() 를 주석처리해준다.

그 다음 위 깃헙 레포의 demo.py를 실행하는 방법대로 실행하면 되는데, 이때 demo.py 에서 

# layer_name = get_last_conv_name(model)
layer_name = 'proposal_generator.rpn_head.conv'

grad_cam = GradCAM(model, layer_name)
mask, box, class_id = grad_cam(inputs)  # cam mask
grad_cam.remove_handlers()

image_dict = {}
img = original_image[..., ::-1]
# x1, y1, x2, y2 = box
image_dict['predict_box'] = img#[y1:y2, x1:x2]
image_dict['heatmap'], _ = gen_cam(img, mask)#gen_cam(img[y1:y2, x1:x2], mask)

# Grad-CAM++
grad_cam_plus_plus = GradCamPlusPlus(model, layer_name)
mask_plus_plus = grad_cam_plus_plus(inputs)  # cam mask
image_dict['heatmap++'], _ = gen_cam(img, mask_plus_plus)#gen_cam(img[y1:y2, x1:x2], mask_plus_plus)
grad_cam_plus_plus.remove_handlers()

# 获取类别名称
meta = MetadataCatalog.get(
cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused"
)
# label = meta.thing_classes[class_id]

# print("label:{}".format(label))

와 같이 수정하고

grad_cam.py 에서 정의 되어있는 __call__ 함수 두개를 각각

def __call__(self, inputs, index=0):
        """

        :param inputs: {"image": [C,H,W], "height": height, "width": width}
        :param index: 第几个边框
        :return:
        """
        self.net.zero_grad()
        # output = self.net.inference([inputs])
        # breakpoint()
        # score = output[0]['instances'].scores[index]
        # proposal_idx = output[0]['instances'].indices[index]  # box来自第几个proposal
        # score.backward()
        proposal_idx = 0
        self.net.training=True
        feature = self.net.backbone(inputs['image'].unsqueeze(0).cuda())
        proposals, loss = self.net.proposal_generator(self.net.preprocess_image([inputs]), feature, None)
        torch.sum(proposals[0].objectness_logits).backward()

        gradient = self.gradient[proposal_idx].cpu().data.numpy()  # [C,H,W]
        weight = np.mean(gradient, axis=(1, 2))  # [C]

        feature = self.feature[proposal_idx].cpu().data.numpy()  # [C,H,W]

        cam = feature * weight[:, np.newaxis, np.newaxis]  # [C,H,W]
        cam = np.sum(cam, axis=0)  # [H,W]
        cam = np.maximum(cam, 0)  # ReLU

        # 数值归一化
        cam -= np.min(cam)
        cam /= np.max(cam)
        # resize to 224*224
        # box = output[0]['instances'].pred_boxes.tensor[index].detach().numpy().astype(np.int32)
        # x1, y1, x2, y2 = box
        # cam = cv2.resize(cam, (x2 - x1, y2 - y1))

        # class_id = output[0]['instances'].pred_classes[index].detach().numpy()
        box = None
        class_id = None
        cam = cv2.resize(cam, (224,224,))
        return cam, box, class_id
def __call__(self, inputs, index=0):
        """

        :param inputs: {"image": [C,H,W], "height": height, "width": width}
        :param index: 第几个边框
        :return:
        """
        self.net.zero_grad()
        # output = self.net.inference([inputs])
        # print(output)
        # score = output[0]['instances'].scores[index]
        # proposal_idx = output[0]['instances'].indices[index]  # box来自第几个proposal
        # score.backward()
        proposal_idx = 0
        self.net.training=True
        feature = self.net.backbone(inputs['image'].unsqueeze(0).cuda())
        proposals, loss = self.net.proposal_generator(self.net.preprocess_image([inputs]), feature, None)
        torch.sum(proposals[0].objectness_logits).backward()

        gradient = self.gradient[proposal_idx].cpu().data.numpy()  # [C,H,W]
        gradient = np.maximum(gradient, 0.)  # ReLU
        indicate = np.where(gradient > 0, 1., 0.)  # 示性函数
        norm_factor = np.sum(gradient, axis=(1, 2))  # [C]归一化
        for i in range(len(norm_factor)):
            norm_factor[i] = 1. / norm_factor[i] if norm_factor[i] > 0. else 0.  # 避免除零
        alpha = indicate * norm_factor[:, np.newaxis, np.newaxis]  # [C,H,W]

        weight = np.sum(gradient * alpha, axis=(1, 2))  # [C]  alpha*ReLU(gradient)

        feature = self.feature[proposal_idx].cpu().data.numpy()  # [C,H,W]

        cam = feature * weight[:, np.newaxis, np.newaxis]  # [C,H,W]
        cam = np.sum(cam, axis=0)  # [H,W]
        # cam = np.maximum(cam, 0)  # ReLU

        # 数值归一化
        cam -= np.min(cam)
        cam /= np.max(cam)
        cam = cv2.resize(cam, (224,224,))
        # resize to box scale
        # box = output[0]['instances'].pred_boxes.tensor[index].detach().numpy().astype(np.int32)
        # x1, y1, x2, y2 = box
        # cam = cv2.resize(cam, (x2 - x1, y2 - y1))

        return cam

와 같이 수정해주면 된다. 그러면 결과물이

GradCAM overlay
GradCAM++ overlay

와 같이 나오게 된다.

728x90
반응형

댓글