1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14 // Copyright (C) 2017, Intel Corporation, all rights reserved.
15 // Third party copyrights are property of their respective owners.
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
43 #include "../precomp.hpp"
44 #include "layers_common.hpp"
45 #include "op_halide.hpp"
46 #include "opencl_kernels_dnn.hpp"
51 using namespace cv::dnn::ocl4dnn;
59 class SoftMaxLayerImpl : public SoftmaxLayer
63 SoftMaxLayerImpl(const LayerParams& params)
65 axisRaw = params.get<int>("axis", 1);
66 logSoftMax = params.get<int>("log_softmax", false);
67 setParamsFrom(params);
71 Ptr<OCL4DNNSoftmax<float> > softmaxOp;
74 bool getMemoryShapes(const std::vector<MatShape> &inputs,
75 const int requiredOutputs,
76 std::vector<MatShape> &outputs,
77 std::vector<MatShape> &internals) const
79 bool inplace = Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);
80 MatShape shape = inputs[0];
81 int cAxis = clamp(axisRaw, shape.size());
83 internals.assign(1, shape);
87 virtual bool supportBackend(int backendId)
89 return backendId == DNN_BACKEND_DEFAULT ||
90 backendId == DNN_BACKEND_HALIDE && haveHalide() && axisRaw == 1;
94 bool forward_ocl(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
96 if (softmaxOp.empty())
98 OCL4DNNSoftmaxConfig config;
100 config.in_shape = shape(*inputs[0]);
101 config.axis = axisRaw;
102 config.channels = inputs[0]->size[axisRaw];
103 config.logsoftmax = logSoftMax;
105 softmaxOp = Ptr<OCL4DNNSoftmax<float> >(new OCL4DNNSoftmax<float>(config));
109 srcMat = inputs[0]->getUMat(ACCESS_READ);
110 dstMat = outputs[0].getUMat(ACCESS_WRITE);
112 if (softmaxOp->Forward(srcMat, dstMat))
115 const Mat &src = *inputs[0];
116 UMat bufMat = internals[0].getUMat(ACCESS_WRITE);
117 srcMat.copyTo(dstMat);
119 int axis = clamp(axisRaw, src.dims);
120 size_t outerSize = src.total(0, axis);
121 size_t channels = src.size[axis];
122 size_t innerSize = src.total(axis + 1);
124 String buildOpts = String("-DT=") + ocl::typeToStr(src.type());
125 ocl::Kernel kmax, ksub, ksum, kdiv;
127 if (!kmax.create("kernel_channel_max", ocl::dnn::softmax_oclsrc, buildOpts))
130 if (!ksub.create("kernel_channel_subtract", ocl::dnn::softmax_oclsrc, buildOpts))
133 if (!ksum.create("kernel_channel_sum", ocl::dnn::softmax_oclsrc, buildOpts))
136 if (logSoftMax) buildOpts += " -DLOG_SOFTMAX ";
137 if (!kdiv.create("kernel_channel_div", ocl::dnn::softmax_oclsrc, buildOpts))
140 size_t wgSize = ocl::Device::getDefault().maxWorkGroupSize();
141 size_t bufSize = internals[0].total();
142 size_t totalSize = src.total();
144 kmax.args((int)outerSize, (int)channels, (int)innerSize,
145 ocl::KernelArg::PtrReadOnly(dstMat), ocl::KernelArg::PtrReadWrite(bufMat));
146 if (!kmax.run(1, &bufSize, &wgSize, false))
149 ksub.args((int)totalSize, (int)outerSize, (int)channels, (int)innerSize,
150 ocl::KernelArg::PtrReadOnly(bufMat), ocl::KernelArg::PtrReadWrite(dstMat));
151 if (!ksub.run(1, &totalSize, &wgSize, false))
154 cv::exp(dstMat, dstMat);
156 ksum.args((int)outerSize, (int)channels, (int)innerSize,
157 ocl::KernelArg::PtrReadOnly(dstMat), ocl::KernelArg::PtrReadWrite(bufMat));
158 if (!ksum.run(1, &bufSize, &wgSize, false))
161 kdiv.args((int)totalSize, (int)outerSize, (int)channels, (int)innerSize,
162 ocl::KernelArg::PtrReadOnly(bufMat), ocl::KernelArg::PtrReadWrite(dstMat));
163 if (!kdiv.run(1, &totalSize, &wgSize, false))
170 void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
173 CV_TRACE_ARG_VALUE(name, "name", name.c_str());
175 CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
176 OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
177 forward_ocl(inputs, outputs, internals))
179 const Mat &src = *inputs[0];
180 Mat &dst = outputs[0];
182 int axis = clamp(axisRaw, src.dims);
183 size_t outerSize = src.total(0, axis), channels = src.size[axis],
184 innerSize = src.total(axis + 1);
186 CV_Assert(src.type() == CV_32F);
187 CV_Assert(src.isContinuous() && dst.isContinuous());
189 const float *srcPtr = src.ptr<float>();
190 float *dstPtr = dst.ptr<float>();
191 float *bufPtr = internals[0].ptr<float>();
193 size_t outerStep = src.total(axis);
194 size_t cnStep = src.total(axis + 1);
196 //compute max along axis
197 for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
199 size_t srcOffset = outerDim * outerStep;
200 size_t bufOffset = outerDim * cnStep;
202 memcpy(bufPtr + bufOffset, srcPtr + srcOffset, innerSize * sizeof(float));
204 for (size_t cnDim = 1; cnDim < channels; cnDim++)
206 for (size_t i = 0; i < innerSize; i++)
207 bufPtr[bufOffset + i] = std::max(bufPtr[bufOffset + i], srcPtr[srcOffset + cnDim * cnStep + i]);
212 for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
214 size_t srcOffset = outerDim * outerStep;
215 size_t bufOffset = outerDim * cnStep;
217 for (size_t cnDim = 0; cnDim < channels; cnDim++)
219 const int offset = srcOffset + cnDim * cnStep;
220 for (size_t i = 0; i < innerSize; i++)
221 dstPtr[offset + i] = srcPtr[offset + i] - bufPtr[bufOffset + i];
227 for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
229 size_t srcOffset = outerDim * outerStep;
230 size_t bufOffset = outerDim * cnStep;
233 for (size_t i = 0; i < innerSize; i++)
234 bufPtr[bufOffset + i] = 0.f;
236 for (size_t cnDim = 0; cnDim < channels; cnDim++)
238 const int offset = srcOffset + cnDim * cnStep;
239 for (size_t i = 0; i < innerSize; i++)
240 bufPtr[bufOffset + i] += dstPtr[offset + i];
243 //divide by computed sum
244 for (size_t cnDim = 0; cnDim < channels; cnDim++)
246 const int offset = srcOffset + cnDim * cnStep;
247 for (size_t i = 0; i < innerSize; i++)
248 dstPtr[offset + i] /= bufPtr[bufOffset + i];
252 for (size_t cnDim = 0; cnDim < channels; cnDim++)
254 const int offset = srcOffset + cnDim * cnStep;
255 for (size_t i = 0; i < innerSize; i++)
256 dstPtr[offset + i] = log(dstPtr[offset + i]);
262 virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs)
265 Halide::Buffer<float> inputBuffer = halideBuffer(inputs[0]);
266 int inW, inH, inC, inN;
267 getCanonicalSize(inputBuffer, &inW, &inH, &inC, &inN);
269 if (inW != 1 || inH != 1)
270 CV_Error(cv::Error::StsNotImplemented,
271 "Halide backend for SoftMax with spatial size "
272 "more than 1x1 is not implemented");
274 Halide::Var x("x"), y("y"), c("c"), n("n");
275 Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
277 Halide::Func expInput("expInput");
278 Halide::RDom r(0, inW, 0, inH, 0, inC);
279 expInput(x, y, c, n) = exp(inputBuffer(x, y, c, n));
280 Halide::Expr globalSum = sum(expInput(r.x, r.y, r.z, n));
281 top(x, y, c, n) = expInput(x, y, c, n) / globalSum;
282 return Ptr<BackendNode>(new HalideBackendNode(top));
283 #endif // HAVE_HALIDE
284 return Ptr<BackendNode>();
287 int64 getFLOPS(const std::vector<MatShape> &inputs,
288 const std::vector<MatShape> &outputs) const
290 (void)outputs; // suppress unused variable warning
293 for (int i = 0; i < inputs.size(); i++)
295 flops += 4*total(inputs[i]);
304 Ptr<SoftmaxLayer> SoftmaxLayer::create(const LayerParams& params)
306 return Ptr<SoftmaxLayer>(new SoftMaxLayerImpl(params));