1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
5 // Copyright (C) 2016, Intel Corporation, all rights reserved.
6 // Third party copyrights are property of their respective owners.
9 Implementation of Scale layer.
12 #include "../precomp.hpp"
13 #include "layers_common.hpp"
14 #include "../op_cuda.hpp"
15 #include "../op_halide.hpp"
16 #include "../op_inf_engine.hpp"
17 #include <opencv2/dnn/shape_utils.hpp>
20 #include "../cuda4dnn/primitives/scale_shift.hpp"
21 using namespace cv::dnn::cuda4dnn;
29 class ScaleLayerImpl CV_FINAL : public ScaleLayer
32 ScaleLayerImpl(const LayerParams& params)
34 setParamsFrom(params);
35 hasBias = params.get<bool>("bias_term", false);
36 axis = params.get<int>("axis", 1);
40 bool getMemoryShapes(const std::vector<MatShape> &inputs,
41 const int requiredOutputs,
42 std::vector<MatShape> &outputs,
43 std::vector<MatShape> &internals) const CV_OVERRIDE
45 outputs.assign(1, inputs[0]);
49 virtual void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
51 std::vector<Mat> inputs;
52 inputs_arr.getMatVector(inputs);
53 hasWeights = blobs.size() == 2 || (blobs.size() == 1 && !hasBias);
54 CV_Assert((inputs.size() == 2 && blobs.empty()) || blobs.size() == (int)hasWeights + (int)hasBias);
57 virtual bool supportBackend(int backendId) CV_OVERRIDE
59 return backendId == DNN_BACKEND_OPENCV ||
60 backendId == DNN_BACKEND_CUDA ||
61 backendId == DNN_BACKEND_HALIDE ||
62 (backendId == DNN_BACKEND_INFERENCE_ENGINE && axis == 1);
65 void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
68 CV_TRACE_ARG_VALUE(name, "name", name.c_str());
70 if (inputs_arr.depth() == CV_16S)
72 forward_fallback(inputs_arr, outputs_arr, internals_arr);
76 std::vector<Mat> inputs, outputs;
77 inputs_arr.getMatVector(inputs);
78 outputs_arr.getMatVector(outputs);
80 CV_Assert_N(outputs.size() == 1, !blobs.empty() || inputs.size() == 2);
82 Mat &inpBlob = inputs[0];
83 Mat &outBlob = outputs[0];
84 // There is a mode when we multiply a first blob by a second one
85 // instead of trainable weights.
86 Mat weights = blobs.empty() ? inputs[1] : (hasWeights ? blobs[0] : Mat());
87 Mat bias = hasBias ? blobs.back().reshape(1, 1) : Mat();
89 weights = weights.reshape(1, 1);
90 MatShape inpShape = shape(inpBlob);
91 const int numWeights = !weights.empty() ? weights.total() : bias.total();
92 CV_Assert(numWeights != 0);
93 if (hasWeights && hasBias)
94 CV_CheckEQ(weights.total(), bias.total(), "Incompatible weights/bias blobs");
97 for (endAxis = axis + 1; endAxis <= inpBlob.dims; ++endAxis)
99 if (total(inpShape, axis, endAxis) == numWeights)
102 CV_Assert(total(inpShape, axis, endAxis) == numWeights);
103 CV_Assert(!hasBias || numWeights == bias.total());
104 CV_CheckTypeEQ(inpBlob.type(), CV_32FC1, ""); CV_CheckTypeEQ(outBlob.type(), CV_32FC1, "");
106 int numSlices = total(inpShape, 0, axis);
107 float* inpData = (float*)inpBlob.data;
108 float* outData = (float*)outBlob.data;
110 if (endAxis != inpBlob.dims)
112 float* weightsData = !weights.empty() ? (float*)weights.data : 0;
113 float* biasesData = hasBias ? (float*)bias.data : 0;
114 int spatialSize = total(inpShape, endAxis); // spatialSize != 1
115 for (int i = 0; i < numSlices; ++i)
117 for (int j = 0; j < numWeights; ++j)
119 float w = weightsData ? weightsData[j] : 1;
120 float b = biasesData ? biasesData[j] : 0;
121 Mat inpSlice(1, spatialSize, CV_32F, inpData);
122 Mat outSlice(1, spatialSize, CV_32F, outData);
123 inpSlice.convertTo(outSlice, CV_32F, w, b);
124 inpData += spatialSize;
125 outData += spatialSize;
131 for (int i = 0; i < numSlices; ++i)
133 Mat inpSlice(1, numWeights, CV_32F, inpData);
134 Mat outSlice(1, numWeights, CV_32F, outData);
135 if (!weights.empty())
137 multiply(inpSlice, weights, outSlice);
139 add(outSlice, bias, outSlice);
142 add(inpSlice, bias, outSlice);
143 inpData += numWeights;
144 outData += numWeights;
150 Ptr<BackendNode> initCUDA(
152 const std::vector<Ptr<BackendWrapper>>& inputs,
153 const std::vector<Ptr<BackendWrapper>>& outputs
156 auto context = reinterpret_cast<csl::CSLContext*>(context_);
158 CV_Assert(!blobs.empty() || inputs.size() == 2);
160 cv::Mat weightsMat = hasWeights ? blobs[0] : Mat();
162 /* if the weights are provided, bias will be in blobs[1]; otherwise, it will be in blobs[0]
163 * in either case, it is at the end of the blobs vector => bias = blobs.back()
165 cv::Mat biasMat = hasBias ? blobs.back() : Mat();
167 return make_cuda_node<cuda4dnn::ScaleShiftOp>(preferableTarget, std::move(context->stream), axis, weightsMat, biasMat);
171 virtual Ptr<BackendNode> tryAttach(const Ptr<BackendNode>& node) CV_OVERRIDE
173 switch (node->backendId)
175 case DNN_BACKEND_HALIDE:
178 auto base = node.dynamicCast<HalideBackendNode>();
179 Halide::Func& input = base->funcs.back();
180 Halide::Var x("x"), y("y"), c("c"), n("n");
181 Halide::Func top = attachHalide(input(x, y, c, n));
182 return Ptr<BackendNode>(new HalideBackendNode(base, top));
183 #endif // HAVE_HALIDE
187 return Ptr<BackendNode>();
190 virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
193 Halide::Buffer<float> input = halideBuffer(inputs[0]);
194 Halide::Var x("x"), y("y"), c("c"), n("n");
195 Halide::Func top = attachHalide(input(x, y, c, n));
196 return Ptr<BackendNode>(new HalideBackendNode(top));
197 #endif // HAVE_HALIDE
198 return Ptr<BackendNode>();
202 // attachHalide can work both with Halide::Buffer and Halide::Func. In the
203 // second case it will be a fusion.
204 Halide::Func attachHalide(const Halide::Expr& input)
206 Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
207 Halide::Var x("x"), y("y"), c("c"), n("n");
209 const int numChannels = blobs[0].total();
211 Halide::Expr topExpr = input;
214 auto weights = wrapToHalideBuffer(blobs[0], {numChannels});
215 topExpr *= weights(c);
219 auto bias = wrapToHalideBuffer(blobs.back(), {numChannels});
222 top(x, y, c, n) = topExpr;
225 #endif // HAVE_HALIDE
227 #ifdef HAVE_INF_ENGINE
228 virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
230 InferenceEngine::Builder::Layer l = InferenceEngine::Builder::ScaleShiftLayer(name);
232 CV_Assert(!blobs.empty());
233 const size_t numChannels = blobs[0].total();
236 addConstantData("weights", wrapToInfEngineBlob(blobs[0], {numChannels}, InferenceEngine::Layout::C), l);
240 auto weights = InferenceEngine::make_shared_blob<float>({
241 InferenceEngine::Precision::FP32, {(size_t)numChannels},
242 InferenceEngine::Layout::C
245 float* buf = weights->buffer().as<float*>();
246 std::fill(buf, buf + numChannels, 1);
247 addConstantData("weights", weights, l);
250 addConstantData("biases", wrapToInfEngineBlob(blobs.back(), {numChannels}, InferenceEngine::Layout::C), l);
251 return Ptr<BackendNode>(new InfEngineBackendNode(l));
253 #endif // HAVE_INF_ENGINE
255 void getScaleShift(Mat& scale, Mat& shift) const CV_OVERRIDE
257 scale = hasWeights ? blobs[0] : Mat();
258 shift = hasBias ? blobs.back() : Mat();
261 virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
262 const std::vector<MatShape> &outputs) const CV_OVERRIDE
264 CV_UNUSED(outputs); // suppress unused variable warning
266 for(int i = 0; i < inputs.size(); i++)
268 flops += 2*total(inputs[i]);
278 Ptr<ScaleLayer> ScaleLayer::create(const LayerParams& params)
280 return Ptr<ScaleLayer>(new ScaleLayerImpl(params));
283 Ptr<Layer> ShiftLayer::create(const LayerParams& params)
285 LayerParams scaleParams;
286 scaleParams.name = params.name;
287 scaleParams.type = "Scale";
288 scaleParams.blobs = params.blobs;
289 scaleParams.set("bias_term", true);
290 scaleParams.set("axis", 0);
291 return Ptr<ScaleLayer>(new ScaleLayerImpl(scaleParams));