Add a flag to manage average pooling with padding
authorDmitry Kurtaev <dmitry.kurtaev+github@gmail.com>
Wed, 14 Feb 2018 13:56:31 +0000 (16:56 +0300)
committerDmitry Kurtaev <dmitry.kurtaev+github@gmail.com>
Wed, 14 Feb 2018 13:56:31 +0000 (16:56 +0300)
modules/dnn/include/opencv2/dnn/all_layers.hpp
modules/dnn/src/layers/pooling_layer.cpp
modules/dnn/src/tensorflow/tf_importer.cpp
modules/dnn/src/torch/torch_importer.cpp
modules/dnn/test/test_layers.cpp

index 0704f6b..4219108 100644 (file)
@@ -239,6 +239,10 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
         bool computeMaxIdx;
         String padMode;
         bool ceilMode;
+        // If true for average pooling with padding, divide an every output region
+        // by a whole kernel area. Otherwise exclude zero padded values and divide
+        // by number of real values.
+        bool avePoolPaddedArea;
         // ROIPooling parameters.
         Size pooledSize;
         float spatialScale;
index 7cd8a34..2f5f041 100644 (file)
@@ -106,6 +106,7 @@ public:
         setParamsFrom(params);
         ceilMode = params.get<bool>("ceil_mode", true);
         spatialScale = params.get<float>("spatial_scale", 1);
+        avePoolPaddedArea = params.get<bool>("ave_pool_padded_area", true);
     }
 
 #ifdef HAVE_OPENCL
@@ -259,7 +260,7 @@ public:
         const Mat* src, *rois;
         Mat *dst, *mask;
         Size kernel, stride, pad;
-        String padMode;
+        bool avePoolPaddedArea;
         int nstripes;
         bool computeMaxIdx;
         std::vector<int> ofsbuf;
@@ -270,7 +271,7 @@ public:
                            computeMaxIdx(0), poolingType(MAX), spatialScale(0) {}
 
         static void run(const Mat& src, const Mat& rois, Mat& dst, Mat& mask, Size kernel,
-                        Size stride, Size pad, String padMode, int poolingType, float spatialScale,
+                        Size stride, Size pad, bool avePoolPaddedArea, int poolingType, float spatialScale,
                         bool computeMaxIdx, int nstripes)
         {
             CV_Assert(src.isContinuous(), dst.isContinuous(),
@@ -289,7 +290,7 @@ public:
             p.kernel = kernel;
             p.stride = stride;
             p.pad = pad;
-            p.padMode = padMode;
+            p.avePoolPaddedArea = avePoolPaddedArea;
             p.nstripes = nstripes;
             p.computeMaxIdx = computeMaxIdx;
             p.poolingType = poolingType;
@@ -369,6 +370,7 @@ public:
                     yend = min(ystart + kernel_h, inp_height + pad_h);
                     srcData = src->ptr<float>(n, c);
                 }
+                int ydelta = yend - ystart;
                 ystart = max(ystart, 0);
                 yend = min(yend, inp_height);
                 float *dstData = dst->ptr<float>(n, c, y0);
@@ -532,14 +534,14 @@ public:
                     }
                 else if (poolingType == AVE)
                 {
-                    bool isSamePad = padMode == "SAME";
                     for( ; x0 < x1; x0++ )
                     {
                         int xstart = x0 * stride_w - pad_w;
                         int xend = min(xstart + kernel_w, inp_width + pad_w);
+                        int xdelta = xend - xstart;
                         xstart = max(xstart, 0);
                         xend = min(xend, inp_width);
-                        float inv_kernel_area = isSamePad ? (yend - ystart) * (xend - xstart) : kernel.area();
+                        float inv_kernel_area = avePoolPaddedArea ? xdelta * ydelta : ((yend - ystart) * (xend - xstart));
                         inv_kernel_area = 1.0 / inv_kernel_area;
 #if CV_SIMD128
                         if( xstart > 0 && x0 + 7 < x1 && (x0 + 7) * stride_w - pad_w + kernel_w < inp_width )
@@ -651,21 +653,21 @@ public:
     {
         const int nstripes = getNumThreads();
         Mat rois;
-        PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad, padMode, type, spatialScale, computeMaxIdx, nstripes);
+        PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad, avePoolPaddedArea, type, spatialScale, computeMaxIdx, nstripes);
     }
 
     void avePooling(Mat &src, Mat &dst)
     {
         const int nstripes = getNumThreads();
         Mat rois, mask;
-        PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad, padMode, type, spatialScale, computeMaxIdx, nstripes);
+        PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad, avePoolPaddedArea, type, spatialScale, computeMaxIdx, nstripes);
     }
 
     void roiPooling(const Mat &src, const Mat &rois, Mat &dst)
     {
         const int nstripes = getNumThreads();
         Mat mask;
-        PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad, padMode, type, spatialScale, computeMaxIdx, nstripes);
+        PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad, avePoolPaddedArea, type, spatialScale, computeMaxIdx, nstripes);
     }
 
     virtual Ptr<BackendNode> initMaxPoolingHalide(const std::vector<Ptr<BackendWrapper> > &inputs)
index bc112d3..274a234 100644 (file)
@@ -1078,6 +1078,7 @@ void TFImporter::populateNet(Net dstNet)
         else if (type == "AvgPool")
         {
             layerParams.set("pool", "ave");
+            layerParams.set("ave_pool_padded_area", false);
 
             setKSize(layerParams, layer);
             setStrides(layerParams, layer);
index b664d79..db660ff 100644 (file)
@@ -559,7 +559,11 @@ struct TorchImporter
                     layerParams.set("indices_blob_id", tensorParams["indices"].first);
                 }
                 if (nnName == "SpatialAveragePooling")
+                {
                     layerParams.set("pool", "AVE");
+                    layerParams.set("ave_pool_padded_area", scalarParams.has("count_include_pad") &&
+                                                            scalarParams.get<bool>("count_include_pad"));
+                }
                 convertTorchKernelsParams(scalarParams, layerParams);
 
                 CV_Assert(scalarParams.has("ceil_mode"));
index 856f06c..784c13c 100644 (file)
@@ -806,4 +806,29 @@ INSTANTIATE_TEST_CASE_P(Layer_Test, Crop, Combine(
 /*offset value*/        Values(3, 4)
 ));
 
+// Check that by default average pooling layer should not count zero padded values
+// into the normalization area.
+TEST(Layer_Test_Average_pooling_kernel_area, Accuracy)
+{
+    LayerParams lp;
+    lp.name = "testAvePool";
+    lp.type = "Pooling";
+    lp.set("kernel_size", 2);
+    lp.set("stride", 2);
+    lp.set("pool", "AVE");
+
+    Net net;
+    net.addLayerToPrev(lp.name, lp.type, lp);
+    // 1 2 | 3
+    // 4 5 | 6
+    // ----+--
+    // 7 8 | 9
+    Mat inp = (Mat_<float>(3, 3) << 1, 2, 3, 4, 5, 6, 7, 8, 9);
+    Mat target = (Mat_<float>(2, 2) << (1 + 2 + 4 + 5) / 4.f, (3 + 6) / 2.f, (7 + 8) / 2.f, 9);
+    Mat tmp = blobFromImage(inp);
+    net.setInput(blobFromImage(inp));
+    Mat out = net.forward();
+    normAssert(out, blobFromImage(target));
+}
+
 }} // namespace