基于Pytorch SSD模型分析


Posted in Python onFebruary 18, 2020

本文参考github上SSD实现,对模型进行分析,主要分析模型组成及输入输出大小.SSD网络结构如下图:

基于Pytorch SSD模型分析

每输入的图像有8732个框输出;

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
#from layers import *
from data import voc, coco
import os
base = {
 '300': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M',
   512, 512, 512],
 '512': [],
}
extras = {
 '300': [256, 'S', 512, 128, 'S', 256, 128, 256, 128, 256],
 '512': [],
}
mbox = {
 '300': [4, 6, 6, 6, 4, 4], # number of boxes per feature map location
 '512': [],
}

VGG基础网络结构:

def vgg(cfg, i, batch_norm=False):
 layers = []
 in_channels = i
 for v in cfg:
  if v == 'M':
   layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
  elif v == 'C':
   layers += [nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)]
  else:
   conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
   if batch_norm:
    layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
   else:
    layers += [conv2d, nn.ReLU(inplace=True)]
   in_channels = v
 pool5 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
 conv6 = nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6)
 conv7 = nn.Conv2d(1024, 1024, kernel_size=1)
 layers += [pool5, conv6,
    nn.ReLU(inplace=True), conv7, nn.ReLU(inplace=True)]
 return layers
size=300
vgg=vgg(base[str(size)], 3)
print(vgg)

输出为:

Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
ReLU(inplace)
Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
ReLU(inplace)
MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
ReLU(inplace)
Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
ReLU(inplace)
MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
ReLU(inplace)
Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
ReLU(inplace)
Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
ReLU(inplace)
MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=True)
Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
ReLU(inplace)
Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
ReLU(inplace)
Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
ReLU(inplace)
MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
ReLU(inplace)
Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
ReLU(inplace)
Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
ReLU(inplace)
MaxPool2d(kernel_size=3, stride=1, padding=1, dilation=1, ceil_mode=False)
Conv2d(512, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(6, 6), dilation=(6, 6))
ReLU(inplace)
Conv2d(1024, 1024, kernel_size=(1, 1), stride=(1, 1))
ReLU(inplace)

SSD中添加的网络

add_extras函数构建基本的卷积层

def add_extras(cfg, i, batch_norm=False):
 # Extra layers added to VGG for feature scaling
 layers = []
 in_channels = i
 flag = False
 for k, v in enumerate(cfg):
  if in_channels != 'S':
   if v == 'S':
    layers += [nn.Conv2d(in_channels, cfg[k + 1],
       kernel_size=(1, 3)[flag], stride=2, padding=1)]
   else:
    layers += [nn.Conv2d(in_channels, v, kernel_size=(1, 3)[flag])]
   flag = not flag
  in_channels = v
 return layers
extra_layers=add_extras(extras[str(size)], 1024)
for layer in extra_layers:
 print(layer)

输出为:

Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1))
Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1))
Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1))

multibox函数得到每个特征图的默认box的位置计算网络和分类得分网络

def multibox(vgg, extra_layers, cfg, num_classes):
 loc_layers = []
 conf_layers = []
 vgg_source = [21, -2]
 for k, v in enumerate(vgg_source):
  loc_layers += [nn.Conv2d(vgg[v].out_channels,
         cfg[k] * 4, kernel_size=3, padding=1)]
  conf_layers += [nn.Conv2d(vgg[v].out_channels,
      cfg[k] * num_classes, kernel_size=3, padding=1)]
 for k, v in enumerate(extra_layers[1::2], 2):
  loc_layers += [nn.Conv2d(v.out_channels, cfg[k]
         * 4, kernel_size=3, padding=1)]
  conf_layers += [nn.Conv2d(v.out_channels, cfg[k]
         * num_classes, kernel_size=3, padding=1)]
 return vgg, extra_layers, (loc_layers, conf_layers)
base_, extras_, head_ = multibox(vgg(base[str(size)], 3), ## 产生vgg19基本模型
          add_extras(extras[str(size)], 1024), 
          mbox[str(size)], num_classes)
#mbox[str(size)]为:[4, 6, 6, 6, 4, 4]

得到的输出为:

base_为上述描述的vgg网络,extras_为extra_layers网络,head_为:

([Conv2d(512, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 Conv2d(1024, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 Conv2d(512, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 Conv2d(256, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 Conv2d(256, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 Conv2d(256, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))],
 [Conv2d(512, 84, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 Conv2d(1024, 126, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 Conv2d(512, 126, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 Conv2d(256, 126, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 Conv2d(256, 84, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 Conv2d(256, 84, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))])

SSD网络及forward函数为:

class SSD(nn.Module):
 """Single Shot Multibox Architecture
 The network is composed of a base VGG network followed by the
 added multibox conv layers. Each multibox layer branches into
  1) conv2d for class conf scores
  2) conv2d for localization predictions
  3) associated priorbox layer to produce default bounding
   boxes specific to the layer's feature map size.
 See: https://arxiv.org/pdf/1512.02325.pdf for more details.

 Args:
  phase: (string) Can be "test" or "train"
  size: input image size
  base: VGG16 layers for input, size of either 300 or 500
  extras: extra layers that feed to multibox loc and conf layers
  head: "multibox head" consists of loc and conf conv layers
 """

 def __init__(self, phase, size, base, extras, head, num_classes):
  super(SSD, self).__init__()
  self.phase = phase
  self.num_classes = num_classes 
  self.cfg = (coco, voc)[num_classes == 21]
  self.priorbox = PriorBox(self.cfg)
  self.priors = Variable(self.priorbox.forward(), volatile=True)
  self.size = size

  # SSD network
  self.vgg = nn.ModuleList(base)
  # Layer learns to scale the l2 normalized features from conv4_3
  self.L2Norm = L2Norm(512, 20)
  self.extras = nn.ModuleList(extras)

  self.loc = nn.ModuleList(head[0])
  self.conf = nn.ModuleList(head[1])

  if phase == 'test':
   self.softmax = nn.Softmax(dim=-1)
   self.detect = Detect(num_classes, 0, 200, 0.01, 0.45)

 def forward(self, x):
  """Applies network layers and ops on input image(s) x.

  Args:
   x: input image or batch of images. Shape: [batch,3,300,300].

  Return:
   Depending on phase:
   test:
    Variable(tensor) of output class label predictions,
    confidence score, and corresponding location predictions for
    each object detected. Shape: [batch,topk,7]

   train:
    list of concat outputs from:
     1: confidence layers, Shape: [batch*num_priors,num_classes]
     2: localization layers, Shape: [batch,num_priors*4]
     3: priorbox layers, Shape: [2,num_priors*4]
  """
  sources = list()
  loc = list()
  conf = list()

  # apply vgg up to conv4_3 relu
  for k in range(23):
   x = self.vgg[k](x) ##得到的x尺度为[1,512,38,38]

  s = self.L2Norm(x)
  sources.append(s)

  # apply vgg up to fc7
  for k in range(23, len(self.vgg)):
   x = self.vgg[k](x) ##得到的x尺寸为[1,1024,19,19]
  sources.append(x)

  # apply extra layers and cache source layer outputs
  for k, v in enumerate(self.extras):
   x = F.relu(v(x), inplace=True)
   if k % 2 == 1:
    sources.append(x)
  '''
  上述得到的x输出分别为:
  torch.Size([1, 512, 10, 10])
  torch.Size([1, 256, 5, 5])
  torch.Size([1, 256, 3, 3])
  torch.Size([1, 256, 1, 1])
  '''

  # apply multibox head to source layers
  for (x, l, c) in zip(sources, self.loc, self.conf):
   loc.append(l(x).permute(0, 2, 3, 1).contiguous())
   conf.append(c(x).permute(0, 2, 3, 1).contiguous())

  loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1)
  conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1)
  if self.phase == "test":
   output = self.detect(
    loc.view(loc.size(0), -1, 4),     # loc preds
    self.softmax(conf.view(conf.size(0), -1,
        self.num_classes)),    # conf preds
    self.priors.type(type(x.data))     # default boxes
   )
  else:
   output = (
    loc.view(loc.size(0), -1, 4), #[1,8732,4]
    conf.view(conf.size(0), -1, self.num_classes),#[1,8732,21]
    self.priors
   )
  return output

上述代码中sources中保存的数据输出如下,即用于边框提取的特征图:

torch.Size([1, 512, 38, 38])
torch.Size([1, 1024, 19, 19])
torch.Size([1, 512, 10, 10])
torch.Size([1, 256, 5, 5])
torch.Size([1, 256, 3, 3])
torch.Size([1, 256, 1, 1])

模型输入为

x=Variable(torch.randn(1,3,300,300))

以上这篇基于Pytorch SSD模型分析就是小编分享给大家的全部内容了,希望能给大家一个参考,也希望大家多多支持三水点靠木。

Python 相关文章推荐
Python中map,reduce,filter和sorted函数的使用方法
Aug 17 Python
Python中Class类用法实例分析
Nov 12 Python
Python内建函数之raw_input()与input()代码解析
Oct 26 Python
python 3.7.0 下pillow安装方法
Aug 27 Python
python3安装speech语音模块的方法
Dec 24 Python
Python+selenium点击网页上指定坐标的实例
Jul 05 Python
Python3爬虫关于识别点触点选验证码的实例讲解
Jul 30 Python
python图片合成的示例
Nov 09 Python
Python基于Faker假数据构造库
Nov 30 Python
用Python可视化新冠疫情数据
Jan 18 Python
OpenCV项目实践之停车场车位实时检测
Apr 11 Python
Python中np.random.randint()参数详解及用法实例
Sep 23 Python
Python3使用腾讯云文字识别(腾讯OCR)提取图片中的文字内容实例详解
Feb 18 #Python
Python动态导入模块和反射机制详解
Feb 18 #Python
pytorch进行上采样的种类实例
Feb 18 #Python
new_zeros() pytorch版本的转换方式
Feb 18 #Python
对pytorch的函数中的group参数的作用介绍
Feb 18 #Python
基于python3实现倒叙字符串
Feb 18 #Python
Python日期格式和字符串格式相互转换的方法
Feb 18 #Python
You might like
php使用str_shuffle()函数生成随机字符串的方法分析
2017/02/17 PHP
基于JQuery 的消息提示框效果代码
2011/07/31 Javascript
JavaScript在XHTML中的用法详解
2013/04/11 Javascript
javascript动态添加、修改、删除对象的属性与方法详解
2014/01/27 Javascript
Jquery弹出层插件ThickBox的使用方法
2014/12/09 Javascript
jQuery中extend()和fn.extend()方法详解
2015/06/03 Javascript
JS+CSS实现的蓝色table选项卡效果
2015/10/08 Javascript
js实现弹窗居中的简单实例
2016/10/09 Javascript
基于JavaScript实现自定义滚动条
2017/01/25 Javascript
元素全屏的设置与监听实例
2017/11/28 Javascript
详解vue通过NGINX部署在子目录或者二级目录实践
2018/09/03 Javascript
PostgreSQL Node.js实现函数计算方法示例
2019/02/12 Javascript
node.js的http.createServer过程深入解析
2019/06/06 Javascript
javascript如何实现create方法
2019/11/04 Javascript
Python中Continue语句的用法的举例详解
2015/05/14 Python
python定时利用QQ邮件发送天气预报的实例
2017/11/17 Python
TensorFlow实现简单卷积神经网络
2018/05/24 Python
python 文件转成16进制数组的实例
2018/07/09 Python
Django安装配置mysql的方法步骤
2018/10/15 Python
Python操作SQLite数据库过程解析
2019/09/02 Python
TensorFlow绘制loss/accuracy曲线的实例
2020/01/21 Python
Python如何把多个PDF文件合并代码实例
2020/02/13 Python
解决Python数据可视化中文部分显示方块问题
2020/05/16 Python
使用numpngw和matplotlib生成png动画的示例代码
2021/01/24 Python
Python之多进程与多线程的使用
2021/02/23 Python
CSS3实现的文本3D效果附图
2014/09/03 HTML / CSS
使用html2canvas.js实现页面截图并显示或上传的示例代码
2018/12/18 HTML / CSS
西班牙创意礼品和小工具网上商店:Curiosite
2016/07/26 全球购物
小学教师听课制度
2014/02/01 职场文书
2014年乡镇植树节活动方案
2014/02/28 职场文书
保护环境倡议书
2014/04/14 职场文书
三八妇女节超市活动方案
2014/08/18 职场文书
小学生纪念九一八事变演讲稿
2014/09/14 职场文书
初中成绩单评语
2014/12/29 职场文书
感谢信的格式
2015/01/21 职场文书
vue本地构建热更新卡顿的问题“75 advanced module optimization”完美解决方案
2022/08/05 Vue.js