Merge pull request #14827 from YashasSamaga:cuda4dnn-csl-low
[platform/upstream/opencv.git] / modules / dnn / src / layers / max_unpooling_layer.cpp
1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
4
5 // Copyright (C) 2016, Intel Corporation, all rights reserved.
6 // Third party copyrights are property of their respective owners.
7
8 /*
9 Implementation of Batch Normalization layer.
10 */
11
12 #include "../precomp.hpp"
13 #include "layers_common.hpp"
14 #include "../op_cuda.hpp"
15 #include "../op_halide.hpp"
16 #include <opencv2/dnn/shape_utils.hpp>
17
18 #ifdef HAVE_CUDA
19 #include "../cuda4dnn/primitives/max_unpooling.hpp"
20 using namespace cv::dnn::cuda4dnn;
21 #endif
22
23 namespace cv
24 {
25 namespace dnn
26 {
27
28 class MaxUnpoolLayerImpl CV_FINAL : public MaxUnpoolLayer
29 {
30 public:
31     MaxUnpoolLayerImpl(const LayerParams& params)
32     {
33         setParamsFrom(params);
34         poolKernel = Size(params.get<int>("pool_k_w"), params.get<int>("pool_k_h"));
35         poolPad = Size(params.get<int>("pool_pad_w"), params.get<int>("pool_pad_h"));
36         poolStride = Size(params.get<int>("pool_stride_w"), params.get<int>("pool_stride_h"));
37     }
38
39     virtual bool supportBackend(int backendId) CV_OVERRIDE
40     {
41         return backendId == DNN_BACKEND_OPENCV ||
42                backendId == DNN_BACKEND_CUDA ||
43                (backendId == DNN_BACKEND_HALIDE && haveHalide() && !poolPad.width && !poolPad.height);
44     }
45
46     bool getMemoryShapes(const std::vector<MatShape> &inputs,
47                          const int requiredOutputs,
48                          std::vector<MatShape> &outputs,
49                          std::vector<MatShape> &internals) const CV_OVERRIDE
50     {
51         CV_Assert(inputs.size() == 2 || inputs.size() == 3);
52         CV_Assert(total(inputs[0]) == total(inputs[1]));
53
54         MatShape outShape;
55         if (inputs.size() == 2)
56         {
57             outShape = inputs[0];
58             outShape[2] = (outShape[2] - 1) * poolStride.height + poolKernel.height - 2 * poolPad.height;
59             outShape[3] = (outShape[3] - 1) * poolStride.width + poolKernel.width - 2 * poolPad.width;
60         }
61         else
62             outShape = inputs[2];
63
64         outputs.clear();
65         outputs.push_back(outShape);
66
67         return false;
68     }
69
70     void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
71     {
72         CV_TRACE_FUNCTION();
73         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
74
75         if (inputs_arr.depth() == CV_16S)
76         {
77             forward_fallback(inputs_arr, outputs_arr, internals_arr);
78             return;
79         }
80
81         std::vector<Mat> inputs, outputs;
82         inputs_arr.getMatVector(inputs);
83         outputs_arr.getMatVector(outputs);
84
85         CV_Assert(inputs.size() == 2 || inputs.size() == 3);
86         Mat& input = inputs[0];
87         Mat& indices = inputs[1];
88
89         CV_Assert(input.total() == indices.total());
90         CV_Assert(input.size[0] == 1);
91         CV_Assert(input.isContinuous());
92
93         for(int i_n = 0; i_n < outputs.size(); i_n++)
94         {
95             Mat& outBlob = outputs[i_n];
96             outBlob.setTo(0);
97             CV_Assert(input.size[1] == outBlob.size[1]);
98             int outPlaneTotal = outBlob.size[2]*outBlob.size[3];
99
100             for (int i_c = 0; i_c < input.size[1]; i_c++)
101             {
102                 Mat outPlane = getPlane(outBlob, 0, i_c);
103                 int wh_area = input.size[2]*input.size[3];
104                 const float* inptr = input.ptr<float>(0, i_c);
105                 const float* idxptr = indices.ptr<float>(0, i_c);
106                 float* outptr = outPlane.ptr<float>();
107
108                 for(int i_wh = 0; i_wh < wh_area; i_wh++)
109                 {
110                     int index = idxptr[i_wh];
111                     if (!(0 <= index && index < outPlaneTotal))
112                     {
113                         std::cerr
114                             << "i_n=" << i_n << std::endl
115                             << "i_c=" << i_c << std::endl
116                             << "i_wh=" << i_wh << std::endl
117                             << "index=" << index << std::endl
118                             << "maxval=" << inptr[i_wh] << std::endl
119                             << "outPlaneTotal=" << outPlaneTotal << std::endl
120                             << "input.size=" << input.size << std::endl
121                             << "indices.size=" << indices.size << std::endl
122                             << "outBlob=" << outBlob.size << std::endl
123                             ;
124                         CV_Assert(0 <= index && index < outPlaneTotal);
125                     }
126                     outptr[index] = inptr[i_wh];
127                 }
128             }
129         }
130     }
131
132 #ifdef HAVE_CUDA
133     Ptr<BackendNode> initCUDA(
134         void *context_,
135         const std::vector<Ptr<BackendWrapper>>& inputs,
136         const std::vector<Ptr<BackendWrapper>>& outputs
137     ) override
138     {
139         auto context = reinterpret_cast<csl::CSLContext*>(context_);
140
141         cuda4dnn::MaxUnpoolingConfiguration config;
142         auto& window_size = config.window_size;
143         window_size.resize(2);
144         window_size[0] = poolKernel.height;
145         window_size[1] = poolKernel.width;
146
147         auto& strides = config.strides;
148         strides.resize(2);
149         strides[0] = poolStride.height;
150         strides[1] = poolStride.width;
151
152         auto& pads_begin = config.pads_begin;
153         pads_begin.resize(2);
154         pads_begin[0] = poolPad.height;
155         pads_begin[1] = poolPad.width;
156
157         return make_cuda_node<cuda4dnn::MaxUnpoolingOp>(preferableTarget, std::move(context->stream), config);
158     }
159 #endif
160
161     virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &input) CV_OVERRIDE
162     {
163 #ifdef HAVE_HALIDE
164         // Meaningless operation if false because if kernel > stride
165         // it is not deterministic and if kernel < stride we just
166         // skip a part of input data (you'd better change your model).
167         if (poolKernel.width != poolStride.width ||
168             poolKernel.height != poolStride.height)
169             CV_Error(cv::Error::StsNotImplemented,
170                      "Halide backend for maximum unpooling "
171                      "is not support cases when kernel != stride");
172
173         Halide::Var x("x"), y("y"), c("c"), n("n");
174         Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
175         Halide::Buffer<float> inputBuffer = halideBuffer(input[0]);
176         Halide::Buffer<float> indices = halideBuffer(input[1]);
177
178         Halide::Expr pooledX = x / poolKernel.width;
179         Halide::Expr pooledY = y / poolKernel.height;
180
181         const int outW = inputBuffer.width() * poolKernel.width;
182         top(x, y, c, n) = select(y * outW + x == indices(pooledX, pooledY, c, n),
183                                  inputBuffer(pooledX, pooledY, c, n), 0.0f);
184         return Ptr<BackendNode>(new HalideBackendNode(top));
185 #endif  // HAVE_HALIDE
186         return Ptr<BackendNode>();
187     }
188 };
189
190 Ptr<MaxUnpoolLayer> MaxUnpoolLayer::create(const LayerParams& params)
191 {
192     return Ptr<MaxUnpoolLayer>(new MaxUnpoolLayerImpl(params));
193 }
194
195 }
196 }