1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
5 // Copyright (C) 2016, Intel Corporation, all rights reserved.
6 // Third party copyrights are property of their respective owners.
9 Implementation of Batch Normalization layer.
12 #include "../precomp.hpp"
13 #include "layers_common.hpp"
14 #include "../op_cuda.hpp"
15 #include "../op_halide.hpp"
16 #include <opencv2/dnn/shape_utils.hpp>
19 #include "../cuda4dnn/primitives/max_unpooling.hpp"
20 using namespace cv::dnn::cuda4dnn;
28 class MaxUnpoolLayerImpl CV_FINAL : public MaxUnpoolLayer
31 MaxUnpoolLayerImpl(const LayerParams& params)
33 setParamsFrom(params);
34 poolKernel = Size(params.get<int>("pool_k_w"), params.get<int>("pool_k_h"));
35 poolPad = Size(params.get<int>("pool_pad_w"), params.get<int>("pool_pad_h"));
36 poolStride = Size(params.get<int>("pool_stride_w"), params.get<int>("pool_stride_h"));
39 virtual bool supportBackend(int backendId) CV_OVERRIDE
41 return backendId == DNN_BACKEND_OPENCV ||
42 backendId == DNN_BACKEND_CUDA ||
43 (backendId == DNN_BACKEND_HALIDE && haveHalide() && !poolPad.width && !poolPad.height);
46 bool getMemoryShapes(const std::vector<MatShape> &inputs,
47 const int requiredOutputs,
48 std::vector<MatShape> &outputs,
49 std::vector<MatShape> &internals) const CV_OVERRIDE
51 CV_Assert(inputs.size() == 2 || inputs.size() == 3);
52 CV_Assert(total(inputs[0]) == total(inputs[1]));
55 if (inputs.size() == 2)
58 outShape[2] = (outShape[2] - 1) * poolStride.height + poolKernel.height - 2 * poolPad.height;
59 outShape[3] = (outShape[3] - 1) * poolStride.width + poolKernel.width - 2 * poolPad.width;
65 outputs.push_back(outShape);
70 void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
73 CV_TRACE_ARG_VALUE(name, "name", name.c_str());
75 if (inputs_arr.depth() == CV_16S)
77 forward_fallback(inputs_arr, outputs_arr, internals_arr);
81 std::vector<Mat> inputs, outputs;
82 inputs_arr.getMatVector(inputs);
83 outputs_arr.getMatVector(outputs);
85 CV_Assert(inputs.size() == 2 || inputs.size() == 3);
86 Mat& input = inputs[0];
87 Mat& indices = inputs[1];
89 CV_Assert(input.total() == indices.total());
90 CV_Assert(input.size[0] == 1);
91 CV_Assert(input.isContinuous());
93 for(int i_n = 0; i_n < outputs.size(); i_n++)
95 Mat& outBlob = outputs[i_n];
97 CV_Assert(input.size[1] == outBlob.size[1]);
98 int outPlaneTotal = outBlob.size[2]*outBlob.size[3];
100 for (int i_c = 0; i_c < input.size[1]; i_c++)
102 Mat outPlane = getPlane(outBlob, 0, i_c);
103 int wh_area = input.size[2]*input.size[3];
104 const float* inptr = input.ptr<float>(0, i_c);
105 const float* idxptr = indices.ptr<float>(0, i_c);
106 float* outptr = outPlane.ptr<float>();
108 for(int i_wh = 0; i_wh < wh_area; i_wh++)
110 int index = idxptr[i_wh];
111 if (!(0 <= index && index < outPlaneTotal))
114 << "i_n=" << i_n << std::endl
115 << "i_c=" << i_c << std::endl
116 << "i_wh=" << i_wh << std::endl
117 << "index=" << index << std::endl
118 << "maxval=" << inptr[i_wh] << std::endl
119 << "outPlaneTotal=" << outPlaneTotal << std::endl
120 << "input.size=" << input.size << std::endl
121 << "indices.size=" << indices.size << std::endl
122 << "outBlob=" << outBlob.size << std::endl
124 CV_Assert(0 <= index && index < outPlaneTotal);
126 outptr[index] = inptr[i_wh];
133 Ptr<BackendNode> initCUDA(
135 const std::vector<Ptr<BackendWrapper>>& inputs,
136 const std::vector<Ptr<BackendWrapper>>& outputs
139 auto context = reinterpret_cast<csl::CSLContext*>(context_);
141 cuda4dnn::MaxUnpoolingConfiguration config;
142 auto& window_size = config.window_size;
143 window_size.resize(2);
144 window_size[0] = poolKernel.height;
145 window_size[1] = poolKernel.width;
147 auto& strides = config.strides;
149 strides[0] = poolStride.height;
150 strides[1] = poolStride.width;
152 auto& pads_begin = config.pads_begin;
153 pads_begin.resize(2);
154 pads_begin[0] = poolPad.height;
155 pads_begin[1] = poolPad.width;
157 return make_cuda_node<cuda4dnn::MaxUnpoolingOp>(preferableTarget, std::move(context->stream), config);
161 virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &input) CV_OVERRIDE
164 // Meaningless operation if false because if kernel > stride
165 // it is not deterministic and if kernel < stride we just
166 // skip a part of input data (you'd better change your model).
167 if (poolKernel.width != poolStride.width ||
168 poolKernel.height != poolStride.height)
169 CV_Error(cv::Error::StsNotImplemented,
170 "Halide backend for maximum unpooling "
171 "is not support cases when kernel != stride");
173 Halide::Var x("x"), y("y"), c("c"), n("n");
174 Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
175 Halide::Buffer<float> inputBuffer = halideBuffer(input[0]);
176 Halide::Buffer<float> indices = halideBuffer(input[1]);
178 Halide::Expr pooledX = x / poolKernel.width;
179 Halide::Expr pooledY = y / poolKernel.height;
181 const int outW = inputBuffer.width() * poolKernel.width;
182 top(x, y, c, n) = select(y * outW + x == indices(pooledX, pooledY, c, n),
183 inputBuffer(pooledX, pooledY, c, n), 0.0f);
184 return Ptr<BackendNode>(new HalideBackendNode(top));
185 #endif // HAVE_HALIDE
186 return Ptr<BackendNode>();
190 Ptr<MaxUnpoolLayer> MaxUnpoolLayer::create(const LayerParams& params)
192 return Ptr<MaxUnpoolLayer>(new MaxUnpoolLayerImpl(params));