[enco] Shape inference on Pooling layers with Padding (#1354)
author박종현/동작제어Lab(SR)/Staff Engineer/삼성전자 <jh1302.park@samsung.com>
Thu, 6 Sep 2018 01:04:52 +0000 (10:04 +0900)
committerGitHub Enterprise <noreply-CODE@samsung.com>
Thu, 6 Sep 2018 01:04:52 +0000 (10:04 +0900)
This commit revises PoolingSpec to infer the ouput shape of pooling
layers with padding.

Signed-off-by: Jonghyun Park <jh1302.park@samsung.com>
contrib/enco/frontend/caffe/src/PoolingSpec.cpp
contrib/enco/frontend/caffe/src/PoolingSpec.h
contrib/enco/frontend/caffe/src/PoolingSpec.test.cpp

index 232ba4d..0a0cbaa 100644 (file)
@@ -58,6 +58,38 @@ uint32_t PoolingSpec::window_width(void) const
   return _param.kernel_size();
 }
 
+uint32_t PoolingSpec::vertical_pad(void) const
+{
+  if (_param.has_pad_h())
+  {
+    return _param.pad_h();
+  }
+
+  if (_param.has_pad())
+  {
+    return _param.pad();
+  }
+
+  // NOTE Default padding is 0
+  return 0;
+}
+
+uint32_t PoolingSpec::horizontal_pad(void) const
+{
+  if (_param.has_pad_w())
+  {
+    return _param.pad_w();
+  }
+
+  if (_param.has_pad())
+  {
+    return _param.pad();
+  }
+
+  // NOTE Default padding is 0
+  return 0;
+}
+
 uint32_t PoolingSpec::vertical_stride(void) const
 {
   if (_param.has_stride_h())
@@ -102,10 +134,12 @@ nncc::core::ADT::tensor::Shape PoolingSpec::ofm_shape(void) const
   res.dim(1) = _ifm_shape.dim(1);
 
   // H and W are derived from IFM, Window, and Padding
+  const auto effective_input_height = _ifm_shape.dim(2) + 2 * vertical_pad() - window_height();
+  const auto effective_input_width = _ifm_shape.dim(3) + 2 * horizontal_pad() - window_width();
   // TODO Remove the following asserts
-  assert((_ifm_shape.dim(2) - window_height()) % vertical_stride() == 0);
-  assert((_ifm_shape.dim(3) - window_width()) % horizontal_stride() == 0);
-  res.dim(2) = (_ifm_shape.dim(2) - window_height()) / vertical_stride() + 1;
-  res.dim(3) = (_ifm_shape.dim(3) - window_width()) / horizontal_stride() + 1;
+  assert(effective_input_height % vertical_stride() == 0);
+  assert(effective_input_width % horizontal_stride() == 0);
+  res.dim(2) = effective_input_height / vertical_stride() + 1;
+  res.dim(3) = effective_input_width / horizontal_stride() + 1;
   return res;
 }
index 13fd67e..39d611c 100644 (file)
@@ -28,6 +28,10 @@ public:
   uint32_t window_width(void) const;
 
 public:
+  uint32_t vertical_pad(void) const;
+  uint32_t horizontal_pad(void) const;
+
+public:
   uint32_t vertical_stride(void) const;
   uint32_t horizontal_stride(void) const;
 
index c1e3ad6..687589f 100644 (file)
@@ -160,6 +160,51 @@ TEST_F(PoolingSpecTest, kernel_size_same_for_all)
   }
 }
 
+TEST_F(PoolingSpecTest, pad_for_all)
+{
+  const tensor::Shape ifm_shape{1, 3, 15, 15};
+
+  ::caffe::NetParameter param;
+  {
+    SequentialBuilder builder{&param};
+
+    builder.addInputLayer(ifm_shape);
+
+    // clang-format off
+    const char *prototxt = STRING(
+      name : "pool"
+      type : "Pooling"
+      bottom : "data"
+      top : "pool"
+      pooling_param {
+        pool: MAX
+        kernel_size : 3
+        pad: 2
+      }
+    );
+    // clang-format on
+
+    builder.addLayer(prototxt);
+  }
+
+  ::caffe::Net<float> net{param};
+
+  PoolingSpec spec{param.layer(1).pooling_param()};
+
+  spec.ifm_shape(ifm_shape);
+
+  ASSERT_EQ(spec.vertical_pad(), 2);
+  ASSERT_EQ(spec.horizontal_pad(), 2);
+
+  // Check 'ofm_shape'
+  {
+    auto expected = as_tensor_shape(net.blob_by_name("pool")->shape());
+    auto obtained = spec.ofm_shape();
+
+    ASSERT_EQ(expected, obtained);
+  }
+}
+
 TEST_F(PoolingSpecTest, stride_for_all)
 {
   const tensor::Shape ifm_shape{1, 3, 15, 15};