Merge pull request #14827 from YashasSamaga:cuda4dnn-csl-low
[platform/upstream/opencv.git] / modules / dnn / src / layers / scale_layer.cpp
1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
4
5 // Copyright (C) 2016, Intel Corporation, all rights reserved.
6 // Third party copyrights are property of their respective owners.
7
8 /*
9 Implementation of Scale layer.
10 */
11
12 #include "../precomp.hpp"
13 #include "layers_common.hpp"
14 #include "../op_cuda.hpp"
15 #include "../op_halide.hpp"
16 #include "../op_inf_engine.hpp"
17 #include <opencv2/dnn/shape_utils.hpp>
18
19 #ifdef HAVE_CUDA
20 #include "../cuda4dnn/primitives/scale_shift.hpp"
21 using namespace cv::dnn::cuda4dnn;
22 #endif
23
24 namespace cv
25 {
26 namespace dnn
27 {
28
29 class ScaleLayerImpl CV_FINAL : public ScaleLayer
30 {
31 public:
32     ScaleLayerImpl(const LayerParams& params)
33     {
34         setParamsFrom(params);
35         hasBias = params.get<bool>("bias_term", false);
36         axis = params.get<int>("axis", 1);
37         hasWeights = false;
38     }
39
40     bool getMemoryShapes(const std::vector<MatShape> &inputs,
41                          const int requiredOutputs,
42                          std::vector<MatShape> &outputs,
43                          std::vector<MatShape> &internals) const CV_OVERRIDE
44     {
45         outputs.assign(1, inputs[0]);
46         return true;
47     }
48
49     virtual void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
50     {
51         std::vector<Mat> inputs;
52         inputs_arr.getMatVector(inputs);
53         hasWeights = blobs.size() == 2 || (blobs.size() == 1 && !hasBias);
54         CV_Assert((inputs.size() == 2 && blobs.empty()) || blobs.size() == (int)hasWeights + (int)hasBias);
55     }
56
57     virtual bool supportBackend(int backendId) CV_OVERRIDE
58     {
59         return backendId == DNN_BACKEND_OPENCV ||
60                backendId == DNN_BACKEND_CUDA ||
61                backendId == DNN_BACKEND_HALIDE ||
62                (backendId == DNN_BACKEND_INFERENCE_ENGINE && axis == 1);
63     }
64
65     void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
66     {
67         CV_TRACE_FUNCTION();
68         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
69
70         if (inputs_arr.depth() == CV_16S)
71         {
72             forward_fallback(inputs_arr, outputs_arr, internals_arr);
73             return;
74         }
75
76         std::vector<Mat> inputs, outputs;
77         inputs_arr.getMatVector(inputs);
78         outputs_arr.getMatVector(outputs);
79
80         CV_Assert_N(outputs.size() == 1, !blobs.empty() || inputs.size() == 2);
81
82         Mat &inpBlob = inputs[0];
83         Mat &outBlob = outputs[0];
84         // There is a mode when we multiply a first blob by a second one
85         // instead of trainable weights.
86         Mat weights = blobs.empty() ? inputs[1] : (hasWeights ? blobs[0] : Mat());
87         Mat bias = hasBias ? blobs.back().reshape(1, 1) : Mat();
88         if (!weights.empty())
89             weights = weights.reshape(1, 1);
90         MatShape inpShape = shape(inpBlob);
91         const int numWeights = !weights.empty() ? weights.total() : bias.total();
92         CV_Assert(numWeights != 0);
93         if (hasWeights && hasBias)
94             CV_CheckEQ(weights.total(), bias.total(), "Incompatible weights/bias blobs");
95
96         int endAxis;
97         for (endAxis = axis + 1; endAxis <= inpBlob.dims; ++endAxis)
98         {
99             if (total(inpShape, axis, endAxis) == numWeights)
100                 break;
101         }
102         CV_Assert(total(inpShape, axis, endAxis) == numWeights);
103         CV_Assert(!hasBias || numWeights == bias.total());
104         CV_CheckTypeEQ(inpBlob.type(), CV_32FC1, ""); CV_CheckTypeEQ(outBlob.type(), CV_32FC1, "");
105
106         int numSlices = total(inpShape, 0, axis);
107         float* inpData = (float*)inpBlob.data;
108         float* outData = (float*)outBlob.data;
109
110         if (endAxis != inpBlob.dims)
111         {
112             float* weightsData = !weights.empty() ? (float*)weights.data : 0;
113             float* biasesData = hasBias ? (float*)bias.data : 0;
114             int spatialSize = total(inpShape, endAxis);  // spatialSize != 1
115             for (int i = 0; i < numSlices; ++i)
116             {
117                 for (int j = 0; j < numWeights; ++j)
118                 {
119                     float w = weightsData ? weightsData[j] : 1;
120                     float b = biasesData ? biasesData[j] : 0;
121                     Mat inpSlice(1, spatialSize, CV_32F, inpData);
122                     Mat outSlice(1, spatialSize, CV_32F, outData);
123                     inpSlice.convertTo(outSlice, CV_32F, w, b);
124                     inpData += spatialSize;
125                     outData += spatialSize;
126                 }
127             }
128         }
129         else
130         {
131             for (int i = 0; i < numSlices; ++i)
132             {
133                 Mat inpSlice(1, numWeights, CV_32F, inpData);
134                 Mat outSlice(1, numWeights, CV_32F, outData);
135                 if (!weights.empty())
136                 {
137                     multiply(inpSlice, weights, outSlice);
138                     if (hasBias)
139                         add(outSlice, bias, outSlice);
140                 }
141                 else if (hasBias)
142                     add(inpSlice, bias, outSlice);
143                 inpData += numWeights;
144                 outData += numWeights;
145             }
146         }
147     }
148
149 #ifdef HAVE_CUDA
150     Ptr<BackendNode> initCUDA(
151         void *context_,
152         const std::vector<Ptr<BackendWrapper>>& inputs,
153         const std::vector<Ptr<BackendWrapper>>& outputs
154     ) override
155     {
156         auto context = reinterpret_cast<csl::CSLContext*>(context_);
157
158         CV_Assert(!blobs.empty() || inputs.size() == 2);
159
160         cv::Mat weightsMat = hasWeights ? blobs[0] : Mat();
161
162         /* if the weights are provided, bias will be in blobs[1]; otherwise, it will be in blobs[0]
163          * in either case, it is at the end of the blobs vector => bias = blobs.back()
164          */
165         cv::Mat biasMat = hasBias ? blobs.back() : Mat();
166
167         return make_cuda_node<cuda4dnn::ScaleShiftOp>(preferableTarget, std::move(context->stream), axis, weightsMat, biasMat);
168     }
169 #endif
170
171     virtual Ptr<BackendNode> tryAttach(const Ptr<BackendNode>& node) CV_OVERRIDE
172     {
173         switch (node->backendId)
174         {
175             case DNN_BACKEND_HALIDE:
176             {
177 #ifdef HAVE_HALIDE
178                 auto base = node.dynamicCast<HalideBackendNode>();
179                 Halide::Func& input = base->funcs.back();
180                 Halide::Var x("x"), y("y"), c("c"), n("n");
181                 Halide::Func top = attachHalide(input(x, y, c, n));
182                 return Ptr<BackendNode>(new HalideBackendNode(base, top));
183 #endif  // HAVE_HALIDE
184                 break;
185             }
186         }
187         return Ptr<BackendNode>();
188     }
189
190     virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
191     {
192 #ifdef HAVE_HALIDE
193         Halide::Buffer<float> input = halideBuffer(inputs[0]);
194         Halide::Var x("x"), y("y"), c("c"), n("n");
195         Halide::Func top = attachHalide(input(x, y, c, n));
196         return Ptr<BackendNode>(new HalideBackendNode(top));
197 #endif  // HAVE_HALIDE
198         return Ptr<BackendNode>();
199     }
200
201 #ifdef HAVE_HALIDE
202     // attachHalide can work both with Halide::Buffer and Halide::Func. In the
203     // second case it will be a fusion.
204     Halide::Func attachHalide(const Halide::Expr& input)
205     {
206         Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
207         Halide::Var x("x"), y("y"), c("c"), n("n");
208
209         const int numChannels = blobs[0].total();
210
211         Halide::Expr topExpr = input;
212         if (hasWeights)
213         {
214             auto weights = wrapToHalideBuffer(blobs[0], {numChannels});
215             topExpr *= weights(c);
216         }
217         if (hasBias)
218         {
219             auto bias = wrapToHalideBuffer(blobs.back(), {numChannels});
220             topExpr += bias(c);
221         }
222         top(x, y, c, n) = topExpr;
223         return top;
224     }
225 #endif  // HAVE_HALIDE
226
227 #ifdef HAVE_INF_ENGINE
228     virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
229     {
230         InferenceEngine::Builder::Layer l = InferenceEngine::Builder::ScaleShiftLayer(name);
231
232         CV_Assert(!blobs.empty());
233         const size_t numChannels = blobs[0].total();
234         if (hasWeights)
235         {
236             addConstantData("weights", wrapToInfEngineBlob(blobs[0], {numChannels}, InferenceEngine::Layout::C), l);
237         }
238         else
239         {
240             auto weights = InferenceEngine::make_shared_blob<float>({
241                                InferenceEngine::Precision::FP32, {(size_t)numChannels},
242                                InferenceEngine::Layout::C
243                            });
244             weights->allocate();
245             float* buf = weights->buffer().as<float*>();
246             std::fill(buf, buf + numChannels, 1);
247             addConstantData("weights", weights, l);
248         }
249         if (hasBias)
250             addConstantData("biases", wrapToInfEngineBlob(blobs.back(), {numChannels}, InferenceEngine::Layout::C), l);
251         return Ptr<BackendNode>(new InfEngineBackendNode(l));
252     }
253 #endif  // HAVE_INF_ENGINE
254
255     void getScaleShift(Mat& scale, Mat& shift) const CV_OVERRIDE
256     {
257         scale = hasWeights ? blobs[0] : Mat();
258         shift = hasBias ? blobs.back() : Mat();
259     }
260
261     virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
262                            const std::vector<MatShape> &outputs) const CV_OVERRIDE
263     {
264         CV_UNUSED(outputs); // suppress unused variable warning
265         long flops = 0;
266         for(int i = 0; i < inputs.size(); i++)
267         {
268             flops += 2*total(inputs[i]);
269         }
270         return flops;
271     }
272
273 private:
274     bool hasWeights;
275 };
276
277
278 Ptr<ScaleLayer> ScaleLayer::create(const LayerParams& params)
279 {
280     return Ptr<ScaleLayer>(new ScaleLayerImpl(params));
281 }
282
283 Ptr<Layer> ShiftLayer::create(const LayerParams& params)
284 {
285     LayerParams scaleParams;
286     scaleParams.name = params.name;
287     scaleParams.type = "Scale";
288     scaleParams.blobs = params.blobs;
289     scaleParams.set("bias_term", true);
290     scaleParams.set("axis", 0);
291     return Ptr<ScaleLayer>(new ScaleLayerImpl(scaleParams));
292 }
293
294 }  // namespace dnn
295 }  // namespace cv