MMDet逐行解读之AnchorGenerator

文章目录前言1、base_anchors的生成2、grid_anchors的生成总结前言本篇主要介绍mmdet/core/anchor/anchor_generator.py文件下的AnchorGenerator类。以RetinaNet的配置作为说明。anchor_generator_cfg = dict(type='AnchorGenerator',octave_base_scale=4,#

武乐乐~

7645人浏览 · 2022-01-15 15:46:40

武乐乐~ · 2022-01-15 15:46:40 发布

文章目录

前言
1、base_anchors的生成
2、grid_anchors的生成
3、valid_flags介绍
总结

前言

本篇主要介绍mmdet/core/anchor/anchor_generator.py文件下的AnchorGenerator类。以RetinaNet的配置作为说明。

anchor_generator_cfg = dict(
    type='AnchorGenerator',
    octave_base_scale=4,        # base_anchor的大小
    scales_per_octave=3,        # 每个base_anchor有3个比例
    ratios=[0.5, 1.0, 2.0],     # 每个anchor的宽高比
    strides=[8, 16, 32, 64, 128]) # 每个特征图步长，可以理解为感受野或者下采样率。

1、base_anchors的生成

所谓base_anchors是在初始化AnchorGenerator类借助gen_base_anchors方法产生了基础的9个anchor，这些anchor是原图上的anchor。

@ANCHOR_GENERATORS.register_module()
class AnchorGenerator(object):
    def __init__(self,
                 strides,                 # 五个特征图的base_anchor_size:[8, 16, 32, 64, 128]
                 ratios,                  # anchor的三种宽高比[0.5, 1.0, 2.0]
                 scales=None,             # None
                 base_sizes=None,         # None
                 scale_major=True,        # True
                 octave_base_scale=None,  # 4
                 scales_per_octave=None,  # 3
                 centers=None,
                 center_offset=0.):
        # [(8,8),(16,16),(32,32),(64,64),(128,128)]
        self.strides = [_pair(stride) for stride in strides]
        # [8, 16, 32, 64, 128]因为stride含义是感受野大小，故代码将其命名为base_sizes.
        self.base_sizes = [min(stride) for stride in self.strides
                           ] if base_sizes is None else base_sizes
        # octave这两个参数和scales不能共存
        assert ((octave_base_scale is not None
                and scales_per_octave is not None) ^ (scales is not None)), \
            'scales and octave_base_scale with scales_per_octave cannot' \
            ' be set at the same time'
        if scales is not None:
            self.scales = torch.Tensor(scales)
        # anchor的三种尺寸： octave_base_scale * [2**0, 2**(1/3), 2**(2/3)] = [4,5,6]
        elif octave_base_scale is not None and scales_per_octave is not None:
            octave_scales = np.array(
                [2**(i / scales_per_octave) for i in range(scales_per_octave)])
            scales = octave_scales * octave_base_scale
            self.scales = torch.Tensor(scales)
        # 调用生成anchor的方法
        self.base_anchors = self.gen_base_anchors()

现在具体看下gen_base_anchors方法：

    def gen_base_anchors(self):
        """Generate base anchors

        Returns:
            list(torch.Tensor): Base anchors of a feature grid in multiple
                feature levels.
        """
        multi_level_base_anchors = []               # 存储五个特征图的base_anchors
        for i, base_size in enumerate(self.base_sizes):
            center = None
            if self.centers is not None:
                center = self.centers[i]
            multi_level_base_anchors.append(
                self.gen_single_level_base_anchors( # 调用当前特征图的base_anchors
                    base_size,                      # 8 / 16 / 32 /64 /128
                    scales=self.scales,             # [4,5,6]
                    ratios=self.ratios,             # [0.5,1,2]
                    center=center))
        return multi_level_base_anchors

    def gen_single_level_base_anchors(self,
                                      base_size,     # 8
                                      scales,        # [4,5,6]
                                      ratios,        # [0.5,1,2]
                                      center=None):

        w = base_size   # w = 8
        h = base_size   # h = 8
        if center is None:
            x_center = self.center_offset * w   # 0
            y_center = self.center_offset * h   # 0
        else:
            x_center, y_center = center
		# 分别得到9种宽和高
        h_ratios = torch.sqrt(ratios)           # 高的比例的开方
        w_ratios = 1 / h_ratios                 
        if self.scale_major:
        	# 8 * ([3,1]) * ([1,3]) = 9个w
            ws = (w * w_ratios[:, None] * scales[None, :]).view(-1)# [strid * w_ratio * scales]
            hs = (h * h_ratios[:, None] * scales[None, :]).view(-1)
        else:
            ws = (w * scales[:, None] * w_ratios[None, :]).view(-1)
            hs = (h * scales[:, None] * h_ratios[None, :]).view(-1)

        # 将[cx,cy,w,h] --> [xmin, ymin, xmax,ymax]
        base_anchors = [
            x_center - 0.5 * ws, y_center - 0.5 * hs, x_center + 0.5 * ws,
            y_center + 0.5 * hs
        ]
        base_anchors = torch.stack(base_anchors, dim=-1)  # 堆叠9个anchor。
 
        return base_anchors

其实上面代码就是下图干的事情：就是stride * scales* ratios = 9
在这里插入图片描述
用一个公式表示为：
$base\_anchors = cur\_stride * scale * ratios = 1 * 3 *3 = 9$

2、grid_anchors的生成

在生成base_anchor基础上，之后需要通过改变每个anchor的中心来广播到整张特征图上面。以grid_anchors方法实现：

    def grid_anchors(self, featmap_sizes, device='cuda'):
        assert self.num_levels == len(featmap_sizes)
        multi_level_anchors = []
        for i in range(self.num_levels):
            anchors = self.single_level_grid_anchors(    # 内部调用了单张特征图的方法
                self.base_anchors[i].to(device),
                featmap_sizes[i],
                self.strides[i],
                device=device)
            multi_level_anchors.append(anchors)
        return multi_level_anchors

贴下single_level_grid_anchors方法

    def _meshgrid(self, x, y, row_major=True):
        """Generate mesh grid of x and y

        Args:
            x (torch.Tensor): Grids of x dimension.
            y (torch.Tensor): Grids of y dimension.
            row_major (bool, optional): Whether to return y grids first.
                Defaults to True.

        Returns:
            tuple[torch.Tensor]: The mesh grids of x and y.
        """
        xx = x.repeat(len(y))  # 将x重复len(y)次。    [0,1,2,0,1,2]
        yy = y.view(-1, 1).repeat(1, len(x)).view(-1) # [0,0,0,1,1,1]
        if row_major:
            return xx, yy
        else:
            return yy, xx
            
    def single_level_grid_anchors(self,
                                  base_anchors,
                                  featmap_size,
                                  stride=(16, 16),
                                  device='cuda'):
        """Generate grid anchors of a single level.
        Note:
            This function is usually called by method ``self.grid_anchors``.

        Args:
            base_anchors (torch.Tensor): The base anchors of a feature grid.
            featmap_size (tuple[int]): Size of the feature maps.
            stride (tuple[int], optional): Stride of the feature map.
                Defaults to (16, 16).
            device (str, optional): Device the tensor will be put on.
                Defaults to 'cuda'.
        Returns:
            torch.Tensor: Anchors in the overall feature maps.
        """
        feat_h, feat_w = featmap_size  # 获取当前特征图的宽和高
        # 因为*stride，故生成相对原图的偏移量：[0, stride[0], 2*stride[0]...]
        shift_x = torch.arange(0, feat_w, device=device) * stride[0]  
        shift_y = torch.arange(0, feat_h, device=device) * stride[1]
        shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)  #组合生成图像中二元坐标(xx,yy)
        shifts = torch.stack([shift_xx, shift_yy, shift_xx, shift_yy], dim=-1) # 堆叠
        shifts = shifts.type_as(base_anchors)
        # first feat_w elements correspond to the first row of shifts
        # add A anchors (1, A, 4) to K shifts (K, 1, 4) to get
        # shifted anchors (K, A, 4), reshape to (K*A, 4)
		# base_anchor是[x,y,x,y]格式，故直接加上偏移量即可。
        all_anchors = base_anchors[None, :, :] + shifts[:, None, :]
        all_anchors = all_anchors.view(-1, 4)
        return all_anchors

3、valid_flags介绍

简单说下这个方法作用：在模型批次训练过程中，往往会对图像进行pad，pad会出现黑边，后面撒anchor会在pad部分也回撒上anchor，其实这部分anchor应该忽略掉。故该函数就是赋予每个anchor一个标签，若anchor在有效像素位置上，则Ture；否则赋为FALSE。

    def valid_flags(self, featmap_sizes, pad_shape, device='cuda'):
        """
		输入特征图原始尺寸和pad后尺寸
        Return:
            list(torch.Tensor):返回一个和anchor数量相等的bool型张量
        """
        assert self.num_levels == len(featmap_sizes)
        multi_level_flags = []
        for i in range(self.num_levels):
            anchor_stride = self.strides[i]
            feat_h, feat_w = featmap_sizes[i]
            h, w = pad_shape[:2]
            valid_feat_h = min(int(np.ceil(h / anchor_stride[0])), feat_h) # 获取有效的宽和高
            valid_feat_w = min(int(np.ceil(w / anchor_stride[1])), feat_w)
            flags = self.single_level_valid_flags((feat_h, feat_w),        # 遍历每层特征图
                                                  (valid_feat_h, valid_feat_w),
                                                  self.num_base_anchors[i],# 9个
                                                  device=device)
            multi_level_flags.append(flags)
        return multi_level_flags

    def single_level_valid_flags(self,
                                 featmap_size,
                                 valid_size,
                                 num_base_anchors,
                                 device='cuda'):
        """Generate the valid flags of anchor in a single feature map

        Args:
            featmap_size (tuple[int]): 原始特征图
            valid_size (tuple[int]): pad后有效尺寸
            num_base_anchors (int): 9
            device (str, optional): Device where the flags will be put on.
                Defaults to 'cuda'.

        Returns:
            torch.Tensor: The valid flags of each anchor in a single level
                feature map.
        """
        feat_h, feat_w = featmap_size
        valid_h, valid_w = valid_size
        assert valid_h <= feat_h and valid_w <= feat_w
        valid_x = torch.zeros(feat_w, dtype=torch.bool, device=device) # 赋值为FALSE
        valid_y = torch.zeros(feat_h, dtype=torch.bool, device=device)
        valid_x[:valid_w] = 1   # 将valid_w前的赋值为Ture.
        valid_y[:valid_h] = 1
        valid_xx, valid_yy = self._meshgrid(valid_x, valid_y) # 同样获取二维坐标
        valid = valid_xx & valid_yy                           # 只有同时为Ture的位置才是有效范围
        valid = valid[:, None].expand(valid.size(0),          # 将其拓展成基础anchor的9倍即可
                                      num_base_anchors).contiguous().view(-1)
        return valid

总结

下篇会介绍MaxIOUAssigner，敬请期待。

2048 AI社区

有“AI”的1024 = 2048，欢迎大家加入2048 AI社区

更多推荐

C++23 全语法（1）：程序基本构成与预处理语法

本文系统介绍C++23标准下程序基础构成与预处理语法。主要内容包括：1) 程序入口main函数的两种标准写法及其语法规则；2) 单行与多行注释语法；3) C++23模块化编程的导出与导入机制。预处理部分详细讲解#include头文件包含、#define宏定义、条件编译等指令，并补充#pragma、#line等特殊指令的用法。全文通过最小可运行代码示例，完整呈现C++程序基础结构与预处理核心语法，为

2048 AI社区

软件开发生命周期已死

每个阶段都有自己的工具、仪式和产业链：Jira 管需求，Figma 管设计，VS Code 管编码，Jest 管测试，GitHub 管审查，AWS 管部署，Datadog 管监控。你给出方向，智能体先做一个版本，你看一眼，调整，再试另一种方案。当受众不再是需要跨流水线协调的人类，而是直接消费上下文的智能体时，Jira 就从项目管理工具变成了上下文仓库——而且是个很糟糕的仓库。未来的可观测性不是仪表