1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14 // Copyright (C) 2017, Intel Corporation, all rights reserved.
15 // Third party copyrights are property of their respective owners.
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
43 #include "../precomp.hpp"
44 #include "layers_common.hpp"
45 #include "../op_halide.hpp"
46 #include "../op_inf_engine.hpp"
47 #include "../op_vkcom.hpp"
53 #include "opencl_kernels_dnn.hpp"
54 using namespace cv::dnn::ocl4dnn;
62 class SoftMaxLayerImpl CV_FINAL : public SoftmaxLayer
66 SoftMaxLayerImpl(const LayerParams& params)
68 axisRaw = params.get<int>("axis", 1);
69 logSoftMax = params.get<bool>("log_softmax", false);
70 setParamsFrom(params);
74 Ptr<OCL4DNNSoftmax<float> > softmaxOp;
77 bool getMemoryShapes(const std::vector<MatShape> &inputs,
78 const int requiredOutputs,
79 std::vector<MatShape> &outputs,
80 std::vector<MatShape> &internals) const CV_OVERRIDE
82 bool inplace = Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);
83 MatShape shape = inputs[0];
84 int cAxis = clamp(axisRaw, shape.size());
86 internals.assign(1, shape);
90 virtual bool supportBackend(int backendId) CV_OVERRIDE
92 return backendId == DNN_BACKEND_OPENCV ||
93 (backendId == DNN_BACKEND_HALIDE && haveHalide() && axisRaw == 1) ||
94 (backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine() && !logSoftMax) ||
95 (backendId == DNN_BACKEND_VKCOM && haveVulkan());
99 virtual void finalize(const std::vector<Mat*> &inputs, std::vector<Mat> &outputs) CV_OVERRIDE
104 bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
106 std::vector<UMat> inputs;
107 std::vector<UMat> outputs;
108 std::vector<UMat> internals;
110 bool use_half = (inputs_.depth() == CV_16S);
111 inputs_.getUMatVector(inputs);
112 outputs_.getUMatVector(outputs);
113 internals_.getUMatVector(internals);
115 UMat& src = inputs[0];
116 UMat& dstMat = outputs[0];
117 int axis = clamp(axisRaw, src.dims);
119 if (softmaxOp.empty())
121 OCL4DNNSoftmaxConfig config;
122 config.in_shape = shape(inputs[0]);
124 config.channels = inputs[0].size[axis];
125 config.logsoftmax = logSoftMax;
126 config.use_half = use_half;
128 softmaxOp = Ptr<OCL4DNNSoftmax<float> >(new OCL4DNNSoftmax<float>(config));
131 if (softmaxOp->Forward(src, dstMat))
134 UMat& bufMat = internals[0];
135 MatShape s = shape(src);
136 size_t outerSize = total(s, 0, axis);
137 size_t channels = src.size[axis];
138 size_t innerSize = total(s, axis + 1);
140 String buildOpts = format("-DT=%s", use_half ? "half" : "float");
141 ocl::Kernel kmax, ksub, ksum, kdiv;
143 if (!kmax.create("kernel_channel_max", ocl::dnn::softmax_oclsrc, buildOpts))
146 if (!ksub.create("kernel_channel_subtract", ocl::dnn::softmax_oclsrc, buildOpts))
149 if (!ksum.create("kernel_channel_sum", ocl::dnn::softmax_oclsrc, buildOpts))
152 if (logSoftMax) buildOpts += " -DLOG_SOFTMAX ";
153 if (!kdiv.create("kernel_channel_div", ocl::dnn::softmax_oclsrc, buildOpts))
156 size_t bufSize = internals[0].total();
157 size_t totalSize = src.total();
159 size_t internal_globalSize[1] = { bufSize };
160 size_t total_globalSize[1] = { totalSize };
162 kmax.args((int)outerSize, (int)channels, (int)innerSize,
163 ocl::KernelArg::PtrReadOnly(src), ocl::KernelArg::PtrReadWrite(bufMat));
164 if (!kmax.run(1, internal_globalSize, NULL, false))
167 ksub.args((int)totalSize, (int)outerSize, (int)channels, (int)innerSize,
168 ocl::KernelArg::PtrReadOnly(bufMat),
169 ocl::KernelArg::PtrReadOnly(src), ocl::KernelArg::PtrWriteOnly(dstMat));
170 if (!ksub.run(1, total_globalSize, NULL, false))
173 ksum.args((int)outerSize, (int)channels, (int)innerSize,
174 ocl::KernelArg::PtrReadOnly(dstMat), ocl::KernelArg::PtrReadWrite(bufMat));
175 if (!ksum.run(1, internal_globalSize, NULL, false))
178 kdiv.args((int)totalSize, (int)outerSize, (int)channels, (int)innerSize,
179 ocl::KernelArg::PtrReadOnly(bufMat), ocl::KernelArg::PtrReadWrite(dstMat));
180 if (!kdiv.run(1, total_globalSize, NULL, false))
187 void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
190 CV_TRACE_ARG_VALUE(name, "name", name.c_str());
192 CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
193 forward_ocl(inputs_arr, outputs_arr, internals_arr))
195 if (inputs_arr.depth() == CV_16S)
197 forward_fallback(inputs_arr, outputs_arr, internals_arr);
201 std::vector<Mat> inputs, outputs, internals;
202 inputs_arr.getMatVector(inputs);
203 outputs_arr.getMatVector(outputs);
204 internals_arr.getMatVector(internals);
206 const Mat &src = inputs[0];
207 Mat &dst = outputs[0];
209 int axis = clamp(axisRaw, src.dims);
210 size_t outerSize = src.total(0, axis), channels = src.size[axis],
211 innerSize = src.total(axis + 1);
213 CV_Assert(src.type() == CV_32F);
214 CV_Assert(src.isContinuous() && dst.isContinuous());
216 const float *srcPtr = src.ptr<float>();
217 float *dstPtr = dst.ptr<float>();
218 float *bufPtr = internals[0].ptr<float>();
220 size_t outerStep = src.total(axis);
221 size_t cnStep = src.total(axis + 1);
223 //compute max along axis
224 for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
226 size_t srcOffset = outerDim * outerStep;
227 size_t bufOffset = outerDim * cnStep;
229 memcpy(bufPtr + bufOffset, srcPtr + srcOffset, innerSize * sizeof(float));
231 for (size_t cnDim = 1; cnDim < channels; cnDim++)
233 for (size_t i = 0; i < innerSize; i++)
234 bufPtr[bufOffset + i] = std::max(bufPtr[bufOffset + i], srcPtr[srcOffset + cnDim * cnStep + i]);
239 for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
241 size_t srcOffset = outerDim * outerStep;
242 size_t bufOffset = outerDim * cnStep;
244 for (size_t cnDim = 0; cnDim < channels; cnDim++)
246 const int offset = srcOffset + cnDim * cnStep;
247 for (size_t i = 0; i < innerSize; i++)
248 dstPtr[offset + i] = srcPtr[offset + i] - bufPtr[bufOffset + i];
254 for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
256 size_t srcOffset = outerDim * outerStep;
257 size_t bufOffset = outerDim * cnStep;
260 for (size_t i = 0; i < innerSize; i++)
261 bufPtr[bufOffset + i] = 0.f;
263 for (size_t cnDim = 0; cnDim < channels; cnDim++)
265 const int offset = srcOffset + cnDim * cnStep;
266 for (size_t i = 0; i < innerSize; i++)
267 bufPtr[bufOffset + i] += dstPtr[offset + i];
270 //divide by computed sum
271 for (size_t cnDim = 0; cnDim < channels; cnDim++)
273 const int offset = srcOffset + cnDim * cnStep;
274 for (size_t i = 0; i < innerSize; i++)
275 dstPtr[offset + i] /= bufPtr[bufOffset + i];
279 for (size_t cnDim = 0; cnDim < channels; cnDim++)
281 const int offset = srcOffset + cnDim * cnStep;
282 for (size_t i = 0; i < innerSize; i++)
283 dstPtr[offset + i] = log(dstPtr[offset + i]);
289 virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
292 vkcom::Tensor in = VkComTensor(inputs[0]);
293 int cAxis = clamp(axisRaw, in.dimNum());
294 std::shared_ptr<vkcom::OpBase> op(new vkcom::OpSoftmax(cAxis, logSoftMax));
295 return Ptr<BackendNode>(new VkComBackendNode(inputs, op));
296 #endif // HAVE_VULKAN
297 return Ptr<BackendNode>();
301 virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
304 Halide::Buffer<float> inputBuffer = halideBuffer(inputs[0]);
305 int inW, inH, inC, inN;
306 getCanonicalSize(inputBuffer, &inW, &inH, &inC, &inN);
308 if (inW != 1 || inH != 1)
309 CV_Error(cv::Error::StsNotImplemented,
310 "Halide backend for SoftMax with spatial size "
311 "more than 1x1 is not implemented");
313 Halide::Var x("x"), y("y"), c("c"), n("n");
314 Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
316 Halide::Func expInput("expInput");
317 Halide::RDom r(0, inW, 0, inH, 0, inC);
318 expInput(x, y, c, n) = exp(inputBuffer(x, y, c, n));
319 Halide::Expr globalSum = sum(expInput(r.x, r.y, r.z, n));
320 top(x, y, c, n) = expInput(x, y, c, n) / globalSum;
321 return Ptr<BackendNode>(new HalideBackendNode(top));
322 #endif // HAVE_HALIDE
323 return Ptr<BackendNode>();
326 #ifdef HAVE_INF_ENGINE
327 virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
329 InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
331 InferenceEngine::Builder::SoftMaxLayer ieLayer(name);
332 ieLayer.setAxis(clamp(axisRaw, input->getDims().size()));
334 return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
336 #endif // HAVE_INF_ENGINE
338 int64 getFLOPS(const std::vector<MatShape> &inputs,
339 const std::vector<MatShape> &outputs) const CV_OVERRIDE
341 CV_UNUSED(outputs); // suppress unused variable warning
344 for (int i = 0; i < inputs.size(); i++)
346 flops += 4*total(inputs[i]);
355 Ptr<SoftmaxLayer> SoftmaxLayer::create(const LayerParams& params)
357 return Ptr<SoftmaxLayer>(new SoftMaxLayerImpl(params));