dtcms插件 数据库 kubeflow django wxRuby installation sdk 如何做网络营销推广 sqlalchemy ansible jqgrid vue手册 网站后台管理模板 linux内存管理 matlab停止运行命令 mysql统计数量 solr索引 当前时间减一天 yml文件注释 python创建数据库 python类和对象 python安装 java环境 java时间戳转换成时间 java继承关键字 java获取当前年月 javac javalist数组 心理学与生活下载 subprocess su版本转换器 maxtoc4d 编程语言实现模式 mac地址修改 html5下载 ip切换软件 linux安卓模拟器 asp编程 文件粉碎工具 回收站在哪个盘
当前位置: 首页 > 学习教程  > 编程语言

mxnet复现SSD之模型架构

2020/9/19 15:50:39 文章标签:

mxnet复现SSD系列文章目录

一、数据集的导入.
二、SSD模型架构.
三、训练脚本的实现.
四、损失、评价函数.
五、预测结果.


文章目录

  • mxnet复现SSD系列文章目录
  • 前言
  • 一、模型架构
  • 二、实现代码
  • 参考链接


前言

本项目是按照pascal voc的格式读取数据集,数据集为kaggle官网提供的口罩检测数据集,地址:Face Mask Detection,模型架构参考自gluoncv ssd_300_vgg16_atrous_voc源码


一、模型架构

SSD(
  (features): VGG_atrous(
    (stages): HybridSequential(
      (0): HybridSequential(
        (0): Conv2D(None -> 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): Activation(relu)
        (2): Conv2D(None -> 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (3): Activation(relu)
      )
      (1): HybridSequential(
        (0): Conv2D(None -> 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): Activation(relu)
        (2): Conv2D(None -> 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (3): Activation(relu)
      )
      (2): HybridSequential(
        (0): Conv2D(None -> 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): Activation(relu)
        (2): Conv2D(None -> 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (3): Activation(relu)
        (4): Conv2D(None -> 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (5): Activation(relu)
      )
      (3): HybridSequential(
        (0): Conv2D(None -> 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): Activation(relu)
        (2): Conv2D(None -> 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (3): Activation(relu)
        (4): Conv2D(None -> 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (5): Activation(relu)
      )
      (4): HybridSequential(
        (0): Conv2D(None -> 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): Activation(relu)
        (2): Conv2D(None -> 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (3): Activation(relu)
        (4): Conv2D(None -> 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (5): Activation(relu)
      )
      (5): HybridSequential(
        (0): Conv2D(None -> 1024, kernel_size=(3, 3), stride=(1, 1), padding=(6, 6), dilation=(6, 6))
        (1): Activation(relu)
        (2): Conv2D(None -> 1024, kernel_size=(1, 1), stride=(1, 1))
        (3): Activation(relu)
      )
    )
    (norm4): Normalize(
    
    )
    (extras): HybridSequential(
      (0): HybridSequential(
        (0): Conv2D(None -> 256, kernel_size=(1, 1), stride=(1, 1))
        (1): Activation(relu)
        (2): Conv2D(None -> 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        (3): Activation(relu)
      )
      (1): HybridSequential(
        (0): Conv2D(None -> 128, kernel_size=(1, 1), stride=(1, 1))
        (1): Activation(relu)
        (2): Conv2D(None -> 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        (3): Activation(relu)
      )
      (2): HybridSequential(
        (0): Conv2D(None -> 128, kernel_size=(1, 1), stride=(1, 1))
        (1): Activation(relu)
        (2): Conv2D(None -> 256, kernel_size=(3, 3), stride=(1, 1))
        (3): Activation(relu)
      )
      (3): HybridSequential(
        (0): Conv2D(None -> 128, kernel_size=(1, 1), stride=(1, 1))
        (1): Activation(relu)
        (2): Conv2D(None -> 256, kernel_size=(3, 3), stride=(1, 1))
        (3): Activation(relu)
      )
    )
  )
  (bbox_predictor): HybridSequential(
    (0): Conv2D(None -> 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): Conv2D(None -> 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (2): Conv2D(None -> 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): Conv2D(None -> 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): Conv2D(None -> 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): Conv2D(None -> 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  )
  (cls_predictor): HybridSequential(
    (0): Conv2D(None -> 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): Conv2D(None -> 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (2): Conv2D(None -> 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): Conv2D(None -> 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): Conv2D(None -> 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): Conv2D(None -> 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  )
)

二、实现代码

import mxnet as mx
from mxnet import nd, init
from mxnet.gluon import nn

vgg_spec = {
    16: ([2, 2, 3, 3, 3], [64, 128, 256, 512, 512])
}

extra_spec = {
    300: [((256, 1, 1, 0), (512, 3, 2, 1)),
          ((128, 1, 1, 0), (256, 3, 2, 1)),
          ((128, 1, 1, 0), (256, 3, 1, 0)),
          ((128, 1, 1, 0), (256, 3, 1, 0))]
}

layers, filters = vgg_spec[16]
extras = extra_spec[300]


class Normalize(nn.HybridBlock):
    """Normalize layer described in https://arxiv.org/abs/1512.02325.

    Parameters
    ----------
    n_channel : int
        Number of channels of input.
    initial : float
        Initial value for the rescaling factor.
    eps : float
        Small value to avoid division by zero.

    """
    def __init__(self, n_channel, initial=1, eps=1e-5):
        super(Normalize, self).__init__()
        self.eps = eps
        with self.name_scope():
            self.scale = self.params.get('normalize_scale', shape=(1, n_channel, 1, 1),
                                         init=mx.init.Constant(initial))

    def hybrid_forward(self, F, x, scale):
        x = F.L2Normalization(x, mode='channel', eps=self.eps)
        return F.broadcast_mul(x, scale)


class VGG_atrous(nn.HybridBlock):
    def __init__(self):
        super(VGG_atrous, self).__init__()

        self.init = {
            'weight_initializer': init.Xavier(
                rnd_type='gaussian', factor_type='out', magnitude=2),
            'bias_initializer': 'zeros'
        }
        with self.name_scope():
            init_scale = mx.nd.array([0.229, 0.224, 0.225]).reshape((1, 3, 1, 1)) * 255
            self.init_scale = self.params.get_constant('init_scale', init_scale)
            self.stages = nn.HybridSequential()
            for l, f in zip(layers, filters):
                stage = nn.HybridSequential(prefix='')
                with stage.name_scope():
                    for _ in range(l):
                        stage.add(nn.Conv2D(f, kernel_size=3, padding=1, **self.init))
                        stage.add(nn.Activation('relu'))
                self.stages.add(stage)

            stage = nn.HybridSequential(prefix='dilated_')
            with stage.name_scope():
                stage.add(nn.Conv2D(1024, kernel_size=3, padding=6, dilation=6, **self.init))
                stage.add(nn.Activation('relu'))
                stage.add(nn.Conv2D(1024, kernel_size=1, **self.init))
                stage.add(nn.Activation('relu'))

            self.stages.add(stage)
            self.norm4 = Normalize(filters[3], 20)

            self.extras = nn.HybridSequential()
            for i, config in enumerate(extras):
                extra = nn.HybridSequential(prefix='extra%d_'%(i))
                with extra.name_scope():
                    for f, k, s, p in config:
                        extra.add(nn.Conv2D(f, k, s, p, **self.init))
                        extra.add(nn.Activation('relu'))
                self.extras.add(extra)

    def hybrid_forward(self, F, x, init_scale):
        x = F.broadcast_mul(x, init_scale)
        assert len(self.stages) == 6
        outputs = []
        for stage in self.stages[:3]:
            x = stage(x)
            x = F.Pooling(x, pool_type='max', kernel=(2, 2), stride=(2, 2),
                          pooling_convention='full')
        x = self.stages[3](x)
        norm = self.norm4(x)
        outputs.append(norm)
        x = F.Pooling(x, pool_type='max', kernel=(2, 2), stride=(2, 2),
                      pooling_convention='full')
        x = self.stages[4](x)
        x = F.Pooling(x, pool_type='max', kernel=(3, 3), stride=(1, 1), pad=(1, 1),
                      pooling_convention='full')
        x = self.stages[5](x)
        outputs.append(x)
        for extra in self.extras:
            x = extra(x)
            outputs.append(x)
        return outputs


class SSD(nn.HybridBlock):
    def __init__(self, num_classes):
        super(SSD, self).__init__()

        self.num_classes = num_classes
        self.sizes = [[.1, .141], [.2, .272], [.37, .447], [.54, .619], [.71, .79], [.88, .961]]
        self.ratios = [[1, 2, .5], [1, 2, .5, 3, 1. / 3], [1, 2, .5, 3, 1. / 3], [1, 2, .5, 3, 1. / 3], \
                       [1, 2, .5], [1, 2, .5]]

        self.features = VGG_atrous()

        self.bbox_predictor = nn.HybridSequential()
        self.cls_predictor = nn.HybridSequential()

        for s, r in zip(self.sizes, self.ratios):
            num_anchors = len(s) + len(r) - 1  # 生成的锚框数量
            self.bbox_predictor.add(nn.Conv2D(num_anchors * 4,
                     kernel_size=3, padding=1))
            self.cls_predictor.add(nn.Conv2D(num_anchors * (self.num_classes + 1),
                     kernel_size=3, padding=1))

    # 以(批量大小, 宽×高×通道数)的统一格式转换二维,方便后续连接
    def flatten_pred(self, pred):
        return pred.transpose((0, 2, 3, 1)).flatten()

    # 连接column轴
    def concat_preds(self, F, preds):
        return F.concat(*[self.flatten_pred(p) for p in preds], dim=1)

    def hybrid_forward(self, F, x):
        outputs = self.features(x)
        anchors, cls_preds, bbox_preds = [None] * 6, [None] * 6, [None] * 6
        for i, x in enumerate(outputs):
            cls_preds[i] = self.cls_predictor[i](x)
            bbox_preds[i] = self.bbox_predictor[i](x)
            anchors[i] = F.contrib.MultiBoxPrior(x, sizes=self.sizes[i], ratios=self.ratios[i])

        bbox_preds = self.concat_preds(F, bbox_preds)
        cls_preds = self.concat_preds(F, cls_preds).reshape((0, -1, self.num_classes + 1))
        anchors = F.concat(*anchors, dim=1)

        return anchors, bbox_preds, cls_preds


def get_model(num_classes, pretrained_model=None, pretrained=False, pretrained_base=False, ctx=mx.gpu()):
    net = SSD(num_classes)
    if pretrained_base:
        net.initialize(init=init.Xavier(), ctx=ctx)
        pretrained_base_model = 'model/vgg16_atrous_300.params'
        net.features.load_parameters(pretrained_base_model, allow_missing=True)
    elif pretrained:
        net.load_parameters(pretrained_model, ctx=ctx)
    return net

其中的预训练模型为gluoncv官方提供的模型

大致分为特征提取层和边界框预测,类别预测层。

参考链接

https://zh.d2l.ai/chapter_computer-vision/ssd.html
https://gluon-cv.mxnet.io/model_zoo/detection.html#ssd


本文链接: http://www.dtmao.cc/news_show_200328.shtml

附件下载

相关教程

    暂无相关的数据...

共有条评论 网友评论

验证码: 看不清楚?