1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14 // Copyright (C) 2017, Intel Corporation, all rights reserved.
15 // Third party copyrights are property of their respective owners.
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
43 #include "../precomp.hpp"
44 #include "layers_common.hpp"
45 #include "opencv2/core/hal/intrin.hpp"
46 #include "op_halide.hpp"
47 #include "opencl_kernels_dnn.hpp"
53 using namespace cv::dnn::ocl4dnn;
60 static inline int scaleAndRoundRoi(float f, float scale)
62 return (int)(f * scale + (f >= 0.f ? 0.5f : -0.5f));
65 class PoolingLayerImpl : public PoolingLayer
68 PoolingLayerImpl(const LayerParams& params)
72 globalPooling = false;
74 if (params.has("pool"))
76 String pool = params.get<String>("pool").toLowerCase();
79 else if (pool == "ave")
81 else if (pool == "stochastic")
84 CV_Error(Error::StsBadArg, "Unknown pooling type \"" + pool + "\"");
85 getPoolingKernelParams(params, kernel.height, kernel.width, globalPooling,
86 pad.height, pad.width, stride.height, stride.width, padMode);
88 else if (params.has("pooled_w") || params.has("pooled_h") || params.has("spatial_scale"))
91 computeMaxIdx = false;
93 setParamsFrom(params);
94 ceilMode = params.get<bool>("ceil_mode", true);
95 pooledSize.width = params.get<uint32_t>("pooled_w", 1);
96 pooledSize.height = params.get<uint32_t>("pooled_h", 1);
97 spatialScale = params.get<float>("spatial_scale", 1);
101 Ptr<OCL4DNNPool<float> > poolOp;
104 void finalize(const std::vector<Mat*> &inputs, std::vector<Mat> &outputs)
106 CV_Assert(!inputs.empty());
108 cv::Size inp(inputs[0]->size[3], inputs[0]->size[2]),
109 out(outputs[0].size[3], outputs[0].size[2]);
116 getConvPoolPaddings(inp, out, kernel, stride, padMode, Size(1, 1), pad);
119 virtual bool supportBackend(int backendId)
121 return backendId == DNN_BACKEND_DEFAULT ||
122 backendId == DNN_BACKEND_HALIDE && haveHalide() &&
123 (type == MAX || type == AVE && !pad.width && !pad.height);
127 bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, InputArrayOfArrays internals)
129 std::vector<UMat> inputs;
130 std::vector<UMat> outputs;
132 inps.getUMatVector(inputs);
133 outs.getUMatVector(outputs);
137 OCL4DNNPoolConfig config;
139 config.in_shape = shape(inputs[0]);
140 config.out_shape = shape(outputs[0]);
141 config.kernel = kernel;
143 config.stride = stride;
144 config.channels = inputs[0].size[1];
145 config.pool_method = type == MAX ? LIBDNN_POOLING_METHOD_MAX :
146 (type == AVE ? LIBDNN_POOLING_METHOD_AVE :
147 LIBDNN_POOLING_METHOD_STO);
148 poolOp = Ptr<OCL4DNNPool<float> >(new OCL4DNNPool<float>(config));
151 for (size_t ii = 0; ii < inputs.size(); ii++)
153 UMat& inpMat = inputs[ii];
154 int out_index = (type == MAX) ? 2 : 1;
155 UMat& outMat = outputs[out_index * ii];
156 UMat maskMat = (type == MAX) ? outputs[2 * ii + 1] : UMat();
158 CV_Assert(inpMat.offset == 0 && outMat.offset == 0);
160 if (!poolOp->Forward(inpMat, outMat, maskMat))
168 void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
171 CV_TRACE_ARG_VALUE(name, "name", name.c_str());
173 CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
174 OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
175 forward_ocl(inputs_arr, outputs_arr, internals_arr))
177 Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
180 void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
183 CV_TRACE_ARG_VALUE(name, "name", name.c_str());
188 CV_Assert(inputs.size() == 1, outputs.size() == 2);
189 maxPooling(*inputs[0], outputs[0], outputs[1]);
192 CV_Assert(inputs.size() == 1, outputs.size() == 1);
193 avePooling(*inputs[0], outputs[0]);
196 CV_Assert(inputs.size() == 2, outputs.size() == 1);
197 roiPooling(*inputs[0], *inputs[1], outputs[0]);
200 CV_Error(Error::StsNotImplemented, "Not implemented");
205 virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs)
208 return initMaxPoolingHalide(inputs);
209 else if (type == AVE)
210 return initAvePoolingHalide(inputs);
212 return Ptr<BackendNode>();
215 class PoolingInvoker : public ParallelLoopBody
218 const Mat* src, *rois;
220 Size kernel, stride, pad;
223 std::vector<int> ofsbuf;
227 PoolingInvoker() : src(0), rois(0), dst(0), mask(0), nstripes(0),
228 computeMaxIdx(0), poolingType(MAX), spatialScale(0) {}
230 static void run(const Mat& src, const Mat& rois, Mat& dst, Mat& mask, Size kernel,
231 Size stride, Size pad, int poolingType, float spatialScale,
232 bool computeMaxIdx, int nstripes)
234 CV_Assert(src.isContinuous() && dst.isContinuous() &&
235 src.type() == CV_32F && src.type() == dst.type() &&
236 src.dims == 4 && dst.dims == 4 &&
237 (poolingType == ROI && dst.size[0] == rois.size[0] ||
238 src.size[0] == dst.size[0]) && src.size[1] == dst.size[1] &&
239 (mask.empty() || (mask.type() == src.type() && mask.size == dst.size)));
250 p.nstripes = nstripes;
251 p.computeMaxIdx = computeMaxIdx;
252 p.poolingType = poolingType;
253 p.spatialScale = spatialScale;
257 p.ofsbuf.resize(kernel.width*kernel.height);
258 for( int i = 0; i < kernel.height; i++ )
259 for( int j = 0; j < kernel.width; j++ )
260 p.ofsbuf[i*kernel.width + j] = src.size[3]*i + j;
263 parallel_for_(Range(0, nstripes), p, nstripes);
266 void operator()(const Range& r) const
268 int channels = dst->size[1], width = dst->size[3], height = dst->size[2];
269 int inp_width = src->size[3], inp_height = src->size[2];
270 size_t total = dst->total();
271 size_t stripeSize = (total + nstripes - 1)/nstripes;
272 size_t stripeStart = r.start*stripeSize;
273 size_t stripeEnd = std::min(r.end*stripeSize, total);
274 int kernel_w = kernel.width, kernel_h = kernel.height;
275 int pad_w = pad.width, pad_h = pad.height;
276 int stride_w = stride.width, stride_h = stride.height;
277 bool compMaxIdx = computeMaxIdx;
280 const int* ofsptr = &ofsbuf[0];
281 v_float32x4 idx00(0.f, (float)stride_w, (float)(stride_w*2), (float)(stride_w*3));
282 v_float32x4 ones = v_setall_f32(1.f);
283 v_float32x4 idx_delta = v_setall_f32((float)(inp_width - kernel_w));
286 for( size_t ofs0 = stripeStart; ofs0 < stripeEnd; )
289 int x0 = (int)(ofs % width);
291 int y0 = (int)(ofs % height);
293 int c = (int)(ofs % channels);
294 int n = (int)(ofs / channels);
297 const float *srcData;
298 if (poolingType == ROI)
300 const float *roisData = rois->ptr<float>(n);
301 int ystartROI = scaleAndRoundRoi(roisData[2], spatialScale);
302 int yendROI = scaleAndRoundRoi(roisData[4], spatialScale);
303 int roiHeight = std::max(yendROI - ystartROI + 1, 1);
304 float roiRatio = (float)roiHeight / height;
306 ystart = ystartROI + y0 * roiRatio;
307 yend = ystartROI + std::ceil((y0 + 1) * roiRatio);
309 CV_Assert(roisData[0] < src->size[0]);
310 srcData = src->ptr<float>(roisData[0], c);
314 ystart = y0 * stride_h - pad_h;
315 yend = min(ystart + kernel_h, inp_height + pad_h);
316 srcData = src->ptr<float>(n, c);
318 int ydelta = yend - ystart;
319 ystart = max(ystart, 0);
320 yend = min(yend, inp_height);
321 float *dstData = dst->ptr<float>(n, c, y0);
322 float *dstMaskData = mask->data ? mask->ptr<float>(n, c, y0) : 0;
324 int delta = std::min((int)(stripeEnd - ofs0), width - x0);
328 if( poolingType == MAX)
329 for( ; x0 < x1; x0++ )
331 int xstart = x0 * stride_w - pad_w;
332 int xend = min(xstart + kernel_w, inp_width);
333 xstart = max(xstart, 0);
334 if (xstart >= xend || ystart >= yend)
337 if (compMaxIdx && dstMaskData)
338 dstMaskData[x0] = -1;
342 if( xstart > 0 && x0 + 7 < x1 && (x0 + 7) * stride_w - pad_w + kernel_w < inp_width )
346 v_float32x4 max_val0 = v_setall_f32(-FLT_MAX);
347 v_float32x4 max_val1 = max_val0;
348 v_float32x4 max_idx0 = v_setall_f32(-1.f);
349 v_float32x4 max_idx1 = max_idx0;
350 int index0 = ystart * inp_width + xstart;
351 v_float32x4 idx0 = idx00 + v_setall_f32((float)index0);
352 v_float32x4 idx1 = idx0 + v_setall_f32((float)(stride_w*4));
354 for (int y = ystart; y < yend; ++y)
356 for (int x = xstart; x < xend; ++x, idx0 += ones, idx1 += ones)
358 const int index = y * inp_width + x;
359 v_float32x4 v0(srcData[index], srcData[index + stride_w],
360 srcData[index + stride_w*2], srcData[index + stride_w*3]);
361 v_float32x4 v1(srcData[index + stride_w*4], srcData[index + stride_w*5],
362 srcData[index + stride_w*6], srcData[index + stride_w*7]);
363 max_idx0 = v_select(v0 > max_val0, idx0, max_idx0);
364 max_idx1 = v_select(v1 > max_val1, idx1, max_idx1);
365 max_val0 = v_max(max_val0, v0);
366 max_val1 = v_max(max_val1, v1);
371 v_store(dstData + x0, max_val0);
372 v_store(dstData + x0 + 4, max_val1);
375 v_store(dstMaskData + x0, max_idx0);
376 v_store(dstMaskData + x0 + 4, max_idx1);
382 v_float32x4 max_val0 = v_setall_f32(-FLT_MAX);
383 v_float32x4 max_val1 = max_val0;
385 if( yend - ystart == kernel_h )
387 const float* srcData1 = srcData + ystart*inp_width + xstart;
389 for (int k = 0; k < kernel_w*kernel_h; k++)
391 int index = ofsptr[k];
392 v_float32x4 v0 = v_load(srcData1 + index);
393 v_float32x4 v1 = v_load(srcData1 + index + 4);
394 max_val0 = v_max(max_val0, v0);
395 max_val1 = v_max(max_val1, v1);
398 else if( stride_w == 2 )
399 for (int k = 0; k < kernel_w*kernel_h; k++)
401 int index = ofsptr[k];
402 v_float32x4 v00 = v_load(srcData1 + index), v01 = v_load(srcData1 + index + 4);
403 v_float32x4 v0(_mm_shuffle_ps(v00.val, v01.val, _MM_SHUFFLE(2, 0, 2, 0)));
404 v_float32x4 v10 = v_load(srcData1 + index + 8), v11 = v_load(srcData1 + index + 12);
405 v_float32x4 v1(_mm_shuffle_ps(v10.val, v11.val, _MM_SHUFFLE(2, 0, 2, 0)));
406 max_val0 = v_max(max_val0, v0);
407 max_val1 = v_max(max_val1, v1);
411 for (int k = 0; k < kernel_w*kernel_h; k++)
413 int index = ofsptr[k];
414 v_float32x4 v0(srcData1[index], srcData1[index + stride_w],
415 srcData1[index + stride_w*2], srcData1[index + stride_w*3]);
416 v_float32x4 v1(srcData1[index + stride_w*4], srcData1[index + stride_w*5],
417 srcData1[index + stride_w*6], srcData1[index + stride_w*7]);
418 max_val0 = v_max(max_val0, v0);
419 max_val1 = v_max(max_val1, v1);
424 for (int y = ystart; y < yend; ++y)
426 for (int x = xstart; x < xend; ++x)
428 const int index = y * inp_width + x;
429 v_float32x4 v0(srcData[index], srcData[index + stride_w],
430 srcData[index + stride_w*2], srcData[index + stride_w*3]);
431 v_float32x4 v1(srcData[index + stride_w*4], srcData[index + stride_w*5],
432 srcData[index + stride_w*6], srcData[index + stride_w*7]);
433 max_val0 = v_max(max_val0, v0);
434 max_val1 = v_max(max_val1, v1);
438 v_store(dstData + x0, max_val0);
439 v_store(dstData + x0 + 4, max_val1);
446 float max_val = -FLT_MAX;
450 for (int y = ystart; y < yend; ++y)
451 for (int x = xstart; x < xend; ++x)
453 const int index = y * inp_width + x;
454 float val = srcData[index];
462 dstData[x0] = max_val;
464 dstMaskData[x0] = max_index;
468 for (int y = ystart; y < yend; ++y)
469 for (int x = xstart; x < xend; ++x)
471 const int index = y * inp_width + x;
472 float val = srcData[index];
473 max_val = std::max(max_val, val);
476 dstData[x0] = max_val;
480 else if (poolingType == AVE)
482 for( ; x0 < x1; x0++ )
484 int xstart = x0 * stride_w - pad_w;
485 int xend = min(xstart + kernel_w, inp_width + pad_w);
486 int xdelta = xend - xstart;
487 xstart = max(xstart, 0);
488 xend = min(xend, inp_width);
489 float inv_kernel_area = 1.f/(ydelta*xdelta);
492 if( xstart > 0 && x0 + 7 < x1 && (x0 + 7) * stride_w - pad_w + kernel_w < inp_width )
494 v_float32x4 sum_val0 = v_setzero_f32(), sum_val1 = v_setzero_f32();
495 v_float32x4 ikarea = v_setall_f32(inv_kernel_area);
497 for (int y = ystart; y < yend; ++y)
499 for (int x = xstart; x < xend; ++x)
501 const int index = y * inp_width + x;
502 v_float32x4 v0(srcData[index], srcData[index + stride_w],
503 srcData[index + stride_w*2], srcData[index + stride_w*3]);
504 v_float32x4 v1(srcData[index + stride_w*4], srcData[index + stride_w*5],
505 srcData[index + stride_w*6], srcData[index + stride_w*7]);
510 v_store(dstData + x0, sum_val0*ikarea);
511 v_store(dstData + x0 + 4, sum_val1*ikarea);
518 for (int y = ystart; y < yend; ++y)
519 for (int x = xstart; x < xend; ++x)
521 const int index = y * inp_width + x;
522 float val = srcData[index];
526 dstData[x0] = sum_val*inv_kernel_area;
532 const float *roisData = rois->ptr<float>(n);
533 int xstartROI = scaleAndRoundRoi(roisData[1], spatialScale);
534 int xendROI = scaleAndRoundRoi(roisData[3], spatialScale);
535 int roiWidth = std::max(xendROI - xstartROI + 1, 1);
536 float roiRatio = (float)roiWidth / width;
537 for( ; x0 < x1; x0++ )
539 int xstart = xstartROI + x0 * roiRatio;
540 int xend = xstartROI + std::ceil((x0 + 1) * roiRatio);
541 xstart = max(xstart, 0);
542 xend = min(xend, inp_width);
543 if (xstart >= xend || ystart >= yend)
546 if (compMaxIdx && dstMaskData)
547 dstMaskData[x0] = -1;
550 float max_val = -FLT_MAX;
551 for (int y = ystart; y < yend; ++y)
552 for (int x = xstart; x < xend; ++x)
554 const int index = y * inp_width + x;
555 float val = srcData[index];
556 max_val = std::max(max_val, val);
558 dstData[x0] = max_val;
565 void maxPooling(Mat &src, Mat &dst, Mat &mask)
567 const int nstripes = getNumThreads();
569 PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad, type, spatialScale, computeMaxIdx, nstripes);
572 void avePooling(Mat &src, Mat &dst)
574 const int nstripes = getNumThreads();
576 PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad, type, spatialScale, computeMaxIdx, nstripes);
579 void roiPooling(const Mat &src, const Mat &rois, Mat &dst)
581 const int nstripes = getNumThreads();
583 PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad, type, spatialScale, computeMaxIdx, nstripes);
586 virtual Ptr<BackendNode> initMaxPoolingHalide(const std::vector<Ptr<BackendWrapper> > &inputs)
589 Halide::Buffer<float> inputBuffer = halideBuffer(inputs[0]);
590 const int inWidth = inputBuffer.width();
591 const int inHeight = inputBuffer.height();
593 Halide::Var x("x"), y("y"), c("c"), n("n");
594 Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
595 Halide::RDom r(0, kernel.width, 0, kernel.height);
597 if (pad.width || pad.height)
599 kx = clamp(x * stride.width + r.x - pad.width, 0, inWidth - 1);
600 ky = clamp(y * stride.height + r.y - pad.height, 0, inHeight - 1);
604 kx = min(x * stride.width + r.x, inWidth - 1);
605 ky = min(y * stride.height + r.y, inHeight - 1);
608 // Halide::argmax returns tuple (r.x, r.y, max).
609 Halide::Tuple res = argmax(inputBuffer(kx, ky, c, n));
611 // Compute offset from argmax in range [0, kernel_size).
612 Halide::Expr max_index;
613 if (pad.width || pad.height)
615 max_index = clamp(y * stride.height + res[1] - pad.height,
616 0, inHeight - 1) * inWidth +
617 clamp(x * stride.width + res[0] - pad.width,
622 max_index = min(y * stride.height + res[1], inHeight - 1) * inWidth +
623 min(x * stride.width + res[0], inWidth - 1);
625 top(x, y, c, n) = { res[2], Halide::cast<float>(max_index) };
626 return Ptr<BackendNode>(new HalideBackendNode(top));
627 #endif // HAVE_HALIDE
628 return Ptr<BackendNode>();
631 virtual Ptr<BackendNode> initAvePoolingHalide(const std::vector<Ptr<BackendWrapper> > &inputs)
634 Halide::Buffer<float> inputBuffer = halideBuffer(inputs[0]);
636 const int inW = inputBuffer.width(), inH = inputBuffer.height();
637 if ((inW - kernel.width) % stride.width || (inH - kernel.height) % stride.height)
639 CV_Error(cv::Error::StsNotImplemented,
640 "Halide backend for average pooling with partial "
641 "kernels is not implemented");
644 const float norm = 1.0f / (kernel.width * kernel.height);
646 Halide::Var x("x"), y("y"), c("c"), n("n");
647 Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
648 Halide::RDom r(0, kernel.width, 0, kernel.height);
649 top(x, y, c, n) = sum(
650 inputBuffer(x * stride.width + r.x,
651 y * stride.height + r.y, c, n)) * norm;
652 return Ptr<BackendNode>(new HalideBackendNode(top));
653 #endif // HAVE_HALIDE
654 return Ptr<BackendNode>();
657 virtual void applyHalideScheduler(Ptr<BackendNode>& node,
658 const std::vector<Mat*> &inputs,
659 const std::vector<Mat> &outputs,
663 if (targetId != DNN_TARGET_CPU)
665 Layer::applyHalideScheduler(node, inputs, outputs, targetId);
668 Halide::Var x("x"), y("y"), c("c"), n("n"), tile("tile"),
669 xi("xi"), yi("yi"), ci("ci"), xo("xo"), yo("yo"), co("co");
670 Halide::Func& top = node.dynamicCast<HalideBackendNode>()->funcs.back();
672 int outW, outH, outC, outN;
673 getCanonicalSize(outputs[0].size, &outW, &outH, &outC, &outN);
675 if (outW < 8 || outH < 8)
678 top.split(c, co, ci, 8)
679 .fuse(x, y, tile).fuse(co, tile, tile).fuse(n, tile, tile)
684 top.fuse(y, c, tile).fuse(n, tile, tile)
693 top.split(x, xo, xi, 8).split(y, yo, yi, 8).split(c, co, ci, 8)
694 .fuse(xo, yo, tile).fuse(co, tile, tile).fuse(n, tile, tile)
698 top.split(x, xo, xi, 8).split(y, yo, yi, 8)
699 .fuse(xo, yo, tile).fuse(c, tile, tile).fuse(n, tile, tile)
703 #endif // HAVE_HALIDE
706 bool getMemoryShapes(const std::vector<MatShape> &inputs,
707 const int requiredOutputs,
708 std::vector<MatShape> &outputs,
709 std::vector<MatShape> &internals) const
711 CV_Assert(inputs.size() != 0);
712 Size in(inputs[0][3], inputs[0][2]), out;
719 else if (type == ROI)
721 out.height = pooledSize.height;
722 out.width = pooledSize.width;
724 else if (padMode.empty())
726 float height = (float)(in.height + 2 * pad.height - kernel.height) / stride.height;
727 float width = (float)(in.width + 2 * pad.width - kernel.width) / stride.width;
728 out.height = 1 + (ceilMode ? ceil(height) : floor(height));
729 out.width = 1 + (ceilMode ? ceil(width) : floor(width));
731 if (pad.height || pad.width)
733 // If we have padding, ensure that the last pooling starts strictly
734 // inside the image (instead of at the padding); otherwise clip the last.
735 if ((out.height - 1) * stride.height >= in.height + pad.height)
737 if ((out.width - 1) * stride.width >= in.width + pad.width)
739 CV_Assert((out.height - 1) * stride.height < in.height + pad.height);
740 CV_Assert((out.width - 1) * stride.width < in.width + pad.width);
745 getConvPoolOutParams(in, kernel, stride, padMode, Size(1, 1), out);
748 int dims[] = {inputs[0][0], inputs[0][1], out.height, out.width};
751 CV_Assert(inputs.size() == 2);
752 dims[0] = inputs[1][0]; // Number of proposals;
754 outputs.assign(type == MAX ? 2 : 1, shape(dims));
758 virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
759 const std::vector<MatShape> &outputs) const
761 (void)inputs; // suppress unused variable warning
764 for(int i = 0; i < outputs.size(); i++)
769 flops += total(outputs[i])*kernel.area();
773 flops += total(outputs[i])*(kernel.area() + 1);
788 Ptr<PoolingLayer> PoolingLayer::create(const LayerParams& params)
790 return Ptr<PoolingLayer>(new PoolingLayerImpl(params));