1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14 // Copyright (C) 2017, Intel Corporation, all rights reserved.
15 // Third party copyrights are property of their respective owners.
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
43 #include "../precomp.hpp"
44 #include "layers_common.hpp"
45 #include "../op_cuda.hpp"
46 #include "../op_halide.hpp"
47 #include "../op_inf_engine.hpp"
48 #include "../op_vkcom.hpp"
49 #include <opencv2/dnn/shape_utils.hpp>
53 #include "opencl_kernels_dnn.hpp"
57 #include "../cuda4dnn/primitives/activation.hpp"
58 using namespace cv::dnn::cuda4dnn;
71 template<typename Func>
72 class ElementWiseLayer : public Func::Layer
75 class PBody : public cv::ParallelLoopBody
83 PBody(const Func &func, const Mat &src, Mat& dst, int nstripes)
91 void operator()(const Range &r) const CV_OVERRIDE
93 int nstripes = nstripes_, nsamples = 1, outCn = 1;
98 nsamples = src_->size[0];
99 outCn = src_->size[1];
102 outCn = src_->size[0];
104 for (int i = 2; i < src_->dims; ++i)
105 planeSize *= src_->size[i];
107 size_t stripeSize = (planeSize + nstripes - 1)/nstripes;
108 size_t stripeStart = r.start*stripeSize;
109 size_t stripeEnd = std::min(r.end*stripeSize, planeSize);
111 for( int i = 0; i < nsamples; i++ )
113 const float* srcptr = src_->ptr<float>(i) + stripeStart;
114 float* dstptr = dst_->ptr<float>(i) + stripeStart;
115 func_->apply(srcptr, dstptr, (int)(stripeEnd - stripeStart), planeSize, 0, outCn);
120 ElementWiseLayer(const Func &f=Func()) : run_parallel(false) { func = f; }
122 virtual bool supportBackend(int backendId) CV_OVERRIDE
124 return func.supportBackend(backendId, this->preferableTarget);
127 virtual Ptr<BackendNode> tryAttach(const Ptr<BackendNode>& node) CV_OVERRIDE
129 switch (node->backendId)
131 case DNN_BACKEND_HALIDE:
134 auto base = node.dynamicCast<HalideBackendNode>();
135 Halide::Func& input = base->funcs.back();
136 Halide::Var x("x"), y("y"), c("c"), n("n");
137 Halide::Func top = (this->name.empty() ? Halide::Func() : Halide::Func(this->name));
138 func.attachHalide(input(x, y, c, n), top);
139 return Ptr<BackendNode>(new HalideBackendNode(base, top));
140 #endif // HAVE_HALIDE
144 return Ptr<BackendNode>();
147 virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
150 Halide::Buffer<float> input = halideBuffer(inputs[0]);
151 Halide::Var x("x"), y("y"), c("c"), n("n");
152 Halide::Func top = (this->name.empty() ? Halide::Func() : Halide::Func(this->name));
153 func.attachHalide(input(x, y, c, n), top);
154 return Ptr<BackendNode>(new HalideBackendNode(top));
155 #endif // HAVE_HALIDE
156 return Ptr<BackendNode>();
159 #ifdef HAVE_INF_ENGINE
160 virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
162 InferenceEngine::Builder::Layer ieLayer = func.initInfEngineBuilderAPI();
163 ieLayer.setName(this->name);
164 return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
166 #endif // HAVE_INF_ENGINE
168 virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
171 return Ptr<BackendNode>(new VkComBackendNode(inputs, func.initVkCom()));
172 #endif // HAVE_VULKAN
173 return Ptr<BackendNode>();
176 virtual bool tryFuse(Ptr<dnn::Layer>& top) CV_OVERRIDE
178 return func.tryFuse(top);
181 void getScaleShift(Mat& scale_, Mat& shift_) const CV_OVERRIDE
183 func.getScaleShift(scale_, shift_);
186 bool getMemoryShapes(const std::vector<MatShape> &inputs,
187 const int requiredOutputs,
188 std::vector<MatShape> &outputs,
189 std::vector<MatShape> &internals) const CV_OVERRIDE
191 Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);
195 void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
199 CV_OCL_RUN(IS_DNN_OPENCL_TARGET(this->preferableTarget),
200 func.applyOCL(inputs_arr, outputs_arr, internals_arr))
202 if (inputs_arr.depth() == CV_16S)
204 Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
208 std::vector<Mat> inputs, outputs;
209 inputs_arr.getMatVector(inputs);
210 outputs_arr.getMatVector(outputs);
212 for (size_t i = 0; i < inputs.size(); i++)
214 const Mat &src = inputs[i];
215 Mat &dst = outputs[i];
216 CV_Assert(src.size == dst.size && src.type() == dst.type() &&
217 src.isContinuous() && dst.isContinuous() && src.type() == CV_32F);
219 const int nstripes = getNumThreads();
220 PBody body(func, src, dst, nstripes);
221 parallel_for_(Range(0, nstripes), body, nstripes);
225 void forwardSlice(const float* src, float* dst, int len, size_t planeSize, int cn0, int cn1) const CV_OVERRIDE
227 func.apply(src, dst, len, planeSize, cn0, cn1);
231 Ptr<BackendNode> initCUDA(
233 const std::vector<Ptr<BackendWrapper>>& inputs,
234 const std::vector<Ptr<BackendWrapper>>& outputs
237 auto context = reinterpret_cast<csl::CSLContext*>(context_);
238 return func.initCUDA(Layer::preferableTarget, context->stream);
242 virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
243 const std::vector<MatShape> &outputs) const CV_OVERRIDE
246 for (int i = 0; i < outputs.size(); i++)
248 flops += total(outputs[i]) * func.getFLOPSPerElement();
258 static String oclGetTMacro(const UMat &m)
260 String str_name = ocl::typeToStr(m.type());
262 if (str_name == "short")
265 return format("-DT=%s -Dconvert_T=convert_%s ", str_name.c_str(), str_name.c_str());
271 typedef ReLULayer Layer;
274 explicit ReLUFunctor(float slope_=1.f) : slope(slope_) {}
276 bool supportBackend(int backendId, int)
278 #ifdef HAVE_INF_ENGINE
279 if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
280 return slope >= 0 || !INF_ENGINE_VER_MAJOR_EQ(INF_ENGINE_RELEASE_2019R1);
282 return backendId == DNN_BACKEND_OPENCV ||
283 backendId == DNN_BACKEND_CUDA ||
284 backendId == DNN_BACKEND_HALIDE ||
285 backendId == DNN_BACKEND_VKCOM;
288 void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
291 for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
295 v_float32x4 s4 = v_setall_f32(s), z = v_setzero_f32();
296 for( ; i <= len - 16; i += 16 )
298 v_float32x4 x0 = v_load(srcptr + i);
299 v_float32x4 x1 = v_load(srcptr + i + 4);
300 v_float32x4 x2 = v_load(srcptr + i + 8);
301 v_float32x4 x3 = v_load(srcptr + i + 12);
302 x0 = v_select(x0 >= z, x0, x0*s4);
303 x1 = v_select(x1 >= z, x1, x1*s4);
304 x2 = v_select(x2 >= z, x2, x2*s4);
305 x3 = v_select(x3 >= z, x3, x3*s4);
306 v_store(dstptr + i, x0);
307 v_store(dstptr + i + 4, x1);
308 v_store(dstptr + i + 8, x2);
309 v_store(dstptr + i + 12, x3);
312 for( ; i < len; i++ )
315 dstptr[i] = x >= 0.f ? x : s*x;
321 Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
323 return make_cuda_node<cuda4dnn::ReLUOp>(target, stream, slope);
328 bool initKernel(ocl::Kernel &ker, const UMat &src) const
330 const char *buildoptSlope = (slope == 0) ? "-DRELU_NO_SLOPE" : "";
331 String buildopt = oclGetTMacro(src) + buildoptSlope;
333 if (!ker.create("ReLUForward", ocl::dnn::activations_oclsrc, buildopt))
337 ker.set(3, (float)slope);
342 bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
344 std::vector<UMat> inputs;
345 std::vector<UMat> outputs;
347 inps.getUMatVector(inputs);
348 outs.getUMatVector(outputs);
350 for (size_t i = 0; i < inputs.size(); i++)
352 UMat& src = inputs[i];
353 UMat& dst = outputs[i];
354 CV_Assert(src.isContinuous() && dst.isContinuous() && !src.offset && !dst.offset);
357 CV_Assert(initKernel(kernel, src));
358 kernel.set(0, (int)src.total());
359 kernel.set(1, ocl::KernelArg::PtrReadOnly(src));
360 kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));
362 size_t gSize = src.total();
363 CV_Assert(kernel.run(1, &gSize, NULL, false));
371 void attachHalide(const Halide::Expr& input, Halide::Func& top)
373 Halide::Var x("x"), y("y"), c("c"), n("n");
376 top(x, y, c, n) = select(input >= 0.0f, input, slope * input);
380 top(x, y, c, n) = max(input, 0.0f);
383 #endif // HAVE_HALIDE
385 #ifdef HAVE_INF_ENGINE
386 InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
388 return InferenceEngine::Builder::ReLULayer("").setNegativeSlope(slope);
390 #endif // HAVE_INF_ENGINE
393 std::shared_ptr<vkcom::OpBase> initVkCom()
395 std::shared_ptr<vkcom::OpBase> op(new vkcom::OpReLU(slope));
398 #endif // HAVE_VULKAN
400 bool tryFuse(Ptr<dnn::Layer>&) { return false; }
402 void getScaleShift(Mat&, Mat&) const {}
404 int64 getFLOPSPerElement() const { return 1; }
409 typedef ReLU6Layer Layer;
410 float minValue, maxValue;
412 ReLU6Functor(float minValue_ = 0.0f, float maxValue_ = 6.0f)
413 : minValue(minValue_), maxValue(maxValue_)
415 CV_Assert(minValue <= maxValue);
418 bool supportBackend(int backendId, int)
420 return backendId == DNN_BACKEND_OPENCV ||
421 backendId == DNN_BACKEND_CUDA ||
422 backendId == DNN_BACKEND_HALIDE ||
423 backendId == DNN_BACKEND_INFERENCE_ENGINE;
426 void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
428 for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
432 v_float32x4 minV = v_setall_f32(minValue), maxV = v_setall_f32(maxValue);
433 for( ; i <= len - 16; i += 16 )
435 v_float32x4 x0 = v_load(srcptr + i);
436 v_float32x4 x1 = v_load(srcptr + i + 4);
437 v_float32x4 x2 = v_load(srcptr + i + 8);
438 v_float32x4 x3 = v_load(srcptr + i + 12);
439 x0 = v_min(v_max(minV, x0), maxV);
440 x1 = v_min(v_max(minV, x1), maxV);
441 x2 = v_min(v_max(minV, x2), maxV);
442 x3 = v_min(v_max(minV, x3), maxV);
443 v_store(dstptr + i, x0);
444 v_store(dstptr + i + 4, x1);
445 v_store(dstptr + i + 8, x2);
446 v_store(dstptr + i + 12, x3);
449 for( ; i < len; i++ )
453 dstptr[i] = x <= maxValue ? x : maxValue;
455 dstptr[i] = minValue;
461 bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
463 std::vector<UMat> inputs;
464 std::vector<UMat> outputs;
466 inps.getUMatVector(inputs);
467 outs.getUMatVector(outputs);
468 String buildopt = oclGetTMacro(inputs[0]);
470 for (size_t i = 0; i < inputs.size(); i++)
472 UMat& src = inputs[i];
473 UMat& dst = outputs[i];
475 ocl::Kernel kernel("ReLU6Forward", ocl::dnn::activations_oclsrc, buildopt);
476 kernel.set(0, (int)src.total());
477 kernel.set(1, ocl::KernelArg::PtrReadOnly(src));
478 kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));
479 kernel.set(3, (float)minValue);
480 kernel.set(4, (float)maxValue);
482 size_t gSize = src.total();
483 CV_Assert(kernel.run(1, &gSize, NULL, false));
491 Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
493 return make_cuda_node<cuda4dnn::ClippedReLUOp>(target, stream, minValue, maxValue);
498 void attachHalide(const Halide::Expr& input, Halide::Func& top)
500 Halide::Var x("x"), y("y"), c("c"), n("n");
501 top(x, y, c, n) = clamp(input, minValue, maxValue);
503 #endif // HAVE_HALIDE
505 #ifdef HAVE_INF_ENGINE
506 InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
508 return InferenceEngine::Builder::ClampLayer("").setMinValue(minValue).setMaxValue(maxValue);
510 #endif // HAVE_INF_ENGINE
513 std::shared_ptr<vkcom::OpBase> initVkCom()
515 // TODO: add vkcom implementation
516 return std::shared_ptr<vkcom::OpBase>();
518 #endif // HAVE_VULKAN
520 bool tryFuse(Ptr<dnn::Layer>&) { return false; }
522 void getScaleShift(Mat&, Mat&) const {}
524 int64 getFLOPSPerElement() const { return 2; }
529 typedef TanHLayer Layer;
531 bool supportBackend(int backendId, int)
533 return backendId == DNN_BACKEND_OPENCV ||
534 backendId == DNN_BACKEND_CUDA ||
535 backendId == DNN_BACKEND_HALIDE ||
536 backendId == DNN_BACKEND_INFERENCE_ENGINE;
539 void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
541 for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
543 for( int i = 0; i < len; i++ )
552 bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
554 std::vector<UMat> inputs;
555 std::vector<UMat> outputs;
557 inps.getUMatVector(inputs);
558 outs.getUMatVector(outputs);
559 String buildopt = oclGetTMacro(inputs[0]);
561 for (size_t i = 0; i < inputs.size(); i++)
563 UMat& src = inputs[i];
564 UMat& dst = outputs[i];
566 ocl::Kernel kernel("TanHForward", ocl::dnn::activations_oclsrc, buildopt);
567 kernel.set(0, (int)src.total());
568 kernel.set(1, ocl::KernelArg::PtrReadOnly(src));
569 kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));
571 size_t gSize = src.total();
572 CV_Assert(kernel.run(1, &gSize, NULL, false));
580 Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
582 return make_cuda_node<cuda4dnn::TanHOp>(target, stream);
587 void attachHalide(const Halide::Expr& input, Halide::Func& top)
589 Halide::Var x("x"), y("y"), c("c"), n("n");
590 top(x, y, c, n) = tanh(input);
592 #endif // HAVE_HALIDE
594 #ifdef HAVE_INF_ENGINE
595 InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
597 return InferenceEngine::Builder::TanHLayer("");
599 #endif // HAVE_INF_ENGINE
602 std::shared_ptr<vkcom::OpBase> initVkCom()
604 // TODO: add vkcom implementation
605 return std::shared_ptr<vkcom::OpBase>();
607 #endif // HAVE_VULKAN
609 bool tryFuse(Ptr<dnn::Layer>&) { return false; }
611 void getScaleShift(Mat&, Mat&) const {}
613 int64 getFLOPSPerElement() const { return 1; }
616 struct SigmoidFunctor
618 typedef SigmoidLayer Layer;
620 bool supportBackend(int backendId, int)
622 return backendId == DNN_BACKEND_OPENCV ||
623 backendId == DNN_BACKEND_CUDA ||
624 backendId == DNN_BACKEND_HALIDE ||
625 backendId == DNN_BACKEND_INFERENCE_ENGINE;
628 void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
630 for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
632 for( int i = 0; i < len; i++ )
635 dstptr[i] = 1.f/(1.f + exp(-x));
641 bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
643 std::vector<UMat> inputs;
644 std::vector<UMat> outputs;
646 inps.getUMatVector(inputs);
647 outs.getUMatVector(outputs);
648 String buildopt = oclGetTMacro(inputs[0]);
650 for (size_t i = 0; i < inputs.size(); i++)
652 UMat& src = inputs[i];
653 UMat& dst = outputs[i];
655 ocl::Kernel kernel("SigmoidForward", ocl::dnn::activations_oclsrc, buildopt);
656 kernel.set(0, (int)src.total());
657 kernel.set(1, ocl::KernelArg::PtrReadOnly(src));
658 kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));
660 size_t gSize = src.total();
661 CV_Assert(kernel.run(1, &gSize, NULL, false));
669 Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
671 return make_cuda_node<cuda4dnn::SigmoidOp>(target, stream);
676 void attachHalide(const Halide::Expr& input, Halide::Func& top)
678 Halide::Var x("x"), y("y"), c("c"), n("n");
679 top(x, y, c, n) = 1.0f / (1.0f + exp(-input));
681 #endif // HAVE_HALIDE
683 #ifdef HAVE_INF_ENGINE
684 InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
686 return InferenceEngine::Builder::SigmoidLayer("");
688 #endif // HAVE_INF_ENGINE
691 std::shared_ptr<vkcom::OpBase> initVkCom()
693 // TODO: add vkcom implementation
694 return std::shared_ptr<vkcom::OpBase>();
696 #endif // HAVE_VULKAN
698 bool tryFuse(Ptr<dnn::Layer>&) { return false; }
700 void getScaleShift(Mat&, Mat&) const {}
702 int64 getFLOPSPerElement() const { return 3; }
707 typedef ELULayer Layer;
709 explicit ELUFunctor() {}
711 bool supportBackend(int backendId, int)
713 return backendId == DNN_BACKEND_OPENCV ||
714 backendId == DNN_BACKEND_CUDA ||
715 backendId == DNN_BACKEND_HALIDE ||
716 backendId == DNN_BACKEND_INFERENCE_ENGINE;
719 void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
721 for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
723 for(int i = 0; i < len; i++ )
726 dstptr[i] = x >= 0.f ? x : exp(x) - 1;
732 bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
734 std::vector<UMat> inputs;
735 std::vector<UMat> outputs;
737 inps.getUMatVector(inputs);
738 outs.getUMatVector(outputs);
739 String buildopt = oclGetTMacro(inputs[0]);
741 for (size_t i = 0; i < inputs.size(); i++)
743 UMat& src = inputs[i];
744 UMat& dst = outputs[i];
746 ocl::Kernel kernel("ELUForward", ocl::dnn::activations_oclsrc, buildopt);
747 kernel.set(0, (int)src.total());
748 kernel.set(1, ocl::KernelArg::PtrReadOnly(src));
749 kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));
751 size_t gSize = src.total();
752 CV_Assert(kernel.run(1, &gSize, NULL, false));
760 Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
762 return make_cuda_node<cuda4dnn::ELUOp>(target, stream);
767 void attachHalide(const Halide::Expr& input, Halide::Func& top)
769 Halide::Var x("x"), y("y"), c("c"), n("n");
770 top(x, y, c, n) = select(input >= 0.0f, input, exp(input) - 1);
772 #endif // HAVE_HALIDE
774 #ifdef HAVE_INF_ENGINE
775 InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
777 return InferenceEngine::Builder::ELULayer("");
779 #endif // HAVE_INF_ENGINE
782 std::shared_ptr<vkcom::OpBase> initVkCom()
784 // TODO: add vkcom implementation
785 return std::shared_ptr<vkcom::OpBase>();
787 #endif // HAVE_VULKAN
789 bool tryFuse(Ptr<dnn::Layer>&) { return false; }
791 void getScaleShift(Mat&, Mat&) const {}
793 int64 getFLOPSPerElement() const { return 2; }
798 typedef AbsLayer Layer;
800 bool supportBackend(int backendId, int)
802 #ifdef HAVE_INF_ENGINE
803 if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
804 return !INF_ENGINE_VER_MAJOR_EQ(INF_ENGINE_RELEASE_2019R1);
806 return backendId == DNN_BACKEND_OPENCV ||
807 backendId == DNN_BACKEND_CUDA ||
808 backendId == DNN_BACKEND_HALIDE;
811 void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
813 for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
815 for( int i = 0; i < len; i++ )
824 bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
826 std::vector<UMat> inputs;
827 std::vector<UMat> outputs;
829 inps.getUMatVector(inputs);
830 outs.getUMatVector(outputs);
831 String buildopt = oclGetTMacro(inputs[0]);
833 for (size_t i = 0; i < inputs.size(); i++)
835 UMat& src = inputs[i];
836 UMat& dst = outputs[i];
838 ocl::Kernel kernel("AbsValForward", ocl::dnn::activations_oclsrc, buildopt);
839 kernel.set(0, (int)src.total());
840 kernel.set(1, ocl::KernelArg::PtrReadOnly(src));
841 kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));
843 size_t gSize = src.total();
844 CV_Assert(kernel.run(1, &gSize, NULL, false));
852 Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
854 return make_cuda_node<cuda4dnn::AbsValOp>(target, stream);
859 void attachHalide(const Halide::Expr& input, Halide::Func& top)
861 Halide::Var x("x"), y("y"), c("c"), n("n");
862 top(x, y, c, n) = abs(input);
864 #endif // HAVE_HALIDE
866 #ifdef HAVE_INF_ENGINE
867 InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
869 return InferenceEngine::Builder::ReLULayer("").setNegativeSlope(-0.999999f);
871 #endif // HAVE_INF_ENGINE
874 std::shared_ptr<vkcom::OpBase> initVkCom()
876 // TODO: add vkcom implementation
877 return std::shared_ptr<vkcom::OpBase>();
879 #endif // HAVE_VULKAN
881 bool tryFuse(Ptr<dnn::Layer>&) { return false; }
883 void getScaleShift(Mat&, Mat&) const {}
885 int64 getFLOPSPerElement() const { return 1; }
890 typedef BNLLLayer Layer;
892 bool supportBackend(int backendId, int)
894 return backendId == DNN_BACKEND_OPENCV ||
895 backendId == DNN_BACKEND_CUDA ||
896 backendId == DNN_BACKEND_HALIDE;
899 void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
901 for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
903 for( int i = 0; i < len; i++ )
906 // https://github.com/BVLC/caffe/blame/1.0/src/caffe/layers/bnll_layer.cpp#L17
907 dstptr[i] = x > 0 ? x + log(1. + exp(-x)) : log(1. + exp(x));
913 bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
915 std::vector<UMat> inputs;
916 std::vector<UMat> outputs;
918 inps.getUMatVector(inputs);
919 outs.getUMatVector(outputs);
920 String buildopt = oclGetTMacro(inputs[0]);
922 for (size_t i = 0; i < inputs.size(); i++)
924 UMat& src = inputs[i];
925 UMat& dst = outputs[i];
927 ocl::Kernel kernel("BNLLForward", ocl::dnn::activations_oclsrc, buildopt);
928 kernel.set(0, (int)src.total());
929 kernel.set(1, ocl::KernelArg::PtrReadOnly(src));
930 kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));
932 size_t gSize = src.total();
933 CV_Assert(kernel.run(1, &gSize, NULL, false));
941 Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
943 return make_cuda_node<cuda4dnn::BNLLOp>(target, stream);
948 void attachHalide(const Halide::Expr& input, Halide::Func& top)
950 Halide::Var x("x"), y("y"), c("c"), n("n");
951 // https://github.com/BVLC/caffe/blame/1.0/src/caffe/layers/bnll_layer.cpp#L17
952 top(x, y, c, n) = max(input, 0) + log(1.0f + exp(-abs(input)));
954 #endif // HAVE_HALIDE
956 #ifdef HAVE_INF_ENGINE
957 InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
959 CV_Error(Error::StsNotImplemented, "");
961 #endif // HAVE_INF_ENGINE
964 std::shared_ptr<vkcom::OpBase> initVkCom()
966 // TODO: add vkcom implementation
967 return std::shared_ptr<vkcom::OpBase>();
969 #endif // HAVE_VULKAN
971 bool tryFuse(Ptr<dnn::Layer>&) { return false; }
973 void getScaleShift(Mat&, Mat&) const {}
975 int64 getFLOPSPerElement() const { return 5; }
980 typedef PowerLayer Layer;
986 explicit PowerFunctor(float power_ = 1.f, float scale_ = 1.f, float shift_ = 0.f)
987 : power(power_), scale(scale_), shift(shift_) {}
989 bool supportBackend(int backendId, int targetId)
991 if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
992 return (targetId != DNN_TARGET_OPENCL && targetId != DNN_TARGET_OPENCL_FP16) || power == 1.0 || power == 0.5;
994 return backendId == DNN_BACKEND_OPENCV ||
995 backendId == DNN_BACKEND_CUDA ||
996 backendId == DNN_BACKEND_HALIDE;
999 void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
1001 float a = scale, b = shift, p = power;
1004 for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
1006 for( int i = 0; i < len; i++ )
1008 float x = srcptr[i];
1009 dstptr[i] = a*x + b;
1015 for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
1017 for( int i = 0; i < len; i++ )
1019 float x = srcptr[i];
1020 dstptr[i] = pow(a*x + b, p);
1027 bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
1029 std::vector<UMat> inputs;
1030 std::vector<UMat> outputs;
1032 inps.getUMatVector(inputs);
1033 outs.getUMatVector(outputs);
1034 String buildopt = oclGetTMacro(inputs[0]);
1036 for (size_t i = 0; i < inputs.size(); i++)
1038 UMat& src = inputs[i];
1039 UMat& dst = outputs[i];
1041 ocl::Kernel kernel("PowForward", ocl::dnn::activations_oclsrc, buildopt);
1042 kernel.set(0, (int)src.total());
1043 kernel.set(1, ocl::KernelArg::PtrReadOnly(src));
1044 kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));
1045 kernel.set(3, (float)power);
1046 kernel.set(4, (float)scale);
1047 kernel.set(5, (float)shift);
1049 size_t gSize = src.total();
1050 CV_Assert(kernel.run(1, &gSize, NULL, false));
1058 Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
1060 return make_cuda_node<cuda4dnn::PowerOp>(target, stream, power, scale, shift);
1065 void attachHalide(const Halide::Expr& input, Halide::Func& top)
1067 Halide::Var x("x"), y("y"), c("c"), n("n");
1068 Halide::Expr topExpr = (scale == 1.0f ? input : input * scale);
1075 topExpr = pow(topExpr, power);
1077 top(x, y, c, n) = topExpr;
1079 #endif // HAVE_HALIDE
1081 #ifdef HAVE_INF_ENGINE
1082 InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
1084 return InferenceEngine::Builder::PowerLayer("").setPower(power)
1088 #endif // HAVE_INF_ENGINE
1091 std::shared_ptr<vkcom::OpBase> initVkCom()
1093 // TODO: add vkcom implementation
1094 return std::shared_ptr<vkcom::OpBase>();
1096 #endif // HAVE_VULKAN
1098 bool tryFuse(Ptr<dnn::Layer>& top)
1100 if (power != 1.0f && shift != 0.0f)
1104 top->getScaleShift(w, b);
1105 if ((w.empty() && b.empty()) || w.total() > 1 || b.total() > 1)
1108 float nextScale = w.empty() ? 1.0f : w.at<float>(0);
1109 float nextShift = b.empty() ? 0.0f : b.at<float>(0);
1110 scale = std::pow(scale, power) * nextScale;
1111 shift = nextScale * shift + nextShift;
1115 void getScaleShift(Mat& _scale, Mat& _shift) const
1119 _scale = Mat(1, 1, CV_32F, Scalar(scale));
1120 _shift = Mat(1, 1, CV_32F, Scalar(shift));
1124 int64 getFLOPSPerElement() const { return power == 1 ? 2 : 10; }
1128 struct ChannelsPReLUFunctor
1130 typedef ChannelsPReLULayer Layer;
1136 explicit ChannelsPReLUFunctor(const Mat& scale_=Mat()) : scale(scale_)
1140 bool supportBackend(int backendId, int)
1142 return backendId == DNN_BACKEND_OPENCV ||
1143 backendId == DNN_BACKEND_CUDA ||
1144 backendId == DNN_BACKEND_HALIDE ||
1145 backendId == DNN_BACKEND_INFERENCE_ENGINE;
1148 void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
1150 CV_Assert(scale.isContinuous() && scale.type() == CV_32F);
1152 const float* scaleptr = scale.ptr<float>();
1153 CV_Assert( 0 <= cn0 && cn0 < cn1 && cn1 <= (int)scale.total() );
1155 for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
1157 float s = scaleptr[cn];
1160 v_float32x4 s4 = v_setall_f32(s), z = v_setzero_f32();
1161 for( ; i <= len - 16; i += 16 )
1163 v_float32x4 x0 = v_load(srcptr + i);
1164 v_float32x4 x1 = v_load(srcptr + i + 4);
1165 v_float32x4 x2 = v_load(srcptr + i + 8);
1166 v_float32x4 x3 = v_load(srcptr + i + 12);
1167 x0 = v_select(x0 >= z, x0, x0*s4);
1168 x1 = v_select(x1 >= z, x1, x1*s4);
1169 x2 = v_select(x2 >= z, x2, x2*s4);
1170 x3 = v_select(x3 >= z, x3, x3*s4);
1171 v_store(dstptr + i, x0);
1172 v_store(dstptr + i + 4, x1);
1173 v_store(dstptr + i + 8, x2);
1174 v_store(dstptr + i + 12, x3);
1177 for( ; i < len; i++ )
1179 float x = srcptr[i];
1180 dstptr[i] = x >= 0.f ? x : s*x;
1186 bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
1188 if (scale_umat.empty())
1189 scale.copyTo(scale_umat);
1191 std::vector<UMat> inputs;
1192 std::vector<UMat> outputs;
1194 inps.getUMatVector(inputs);
1195 outs.getUMatVector(outputs);
1196 String buildopt = oclGetTMacro(inputs[0]);
1198 for (size_t i = 0; i < inputs.size(); i++)
1200 UMat& src = inputs[i];
1201 UMat& dst = outputs[i];
1203 ocl::Kernel kernel("PReLUForward", ocl::dnn::activations_oclsrc, buildopt);
1204 kernel.set(0, (int)src.total());
1205 kernel.set(1, (int)src.size[1]);
1206 kernel.set(2, (int)total(shape(src), 2));
1207 kernel.set(3, ocl::KernelArg::PtrReadOnly(src));
1208 kernel.set(4, ocl::KernelArg::PtrWriteOnly(dst));
1209 kernel.set(5, ocl::KernelArg::PtrReadOnly(scale_umat));
1211 size_t gSize = src.total();
1212 CV_Assert(kernel.run(1, &gSize, NULL, false));
1220 Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
1222 return make_cuda_node<cuda4dnn::ChannelwiseReLUOp>(target, stream, scale);
1227 void attachHalide(const Halide::Expr& input, Halide::Func& top)
1229 Halide::Var x("x"), y("y"), c("c"), n("n");
1230 auto weights = wrapToHalideBuffer(scale, {(int)scale.total()});
1231 top(x, y, c, n) = select(input >= 0.0f, input, weights(c) * input);
1233 #endif // HAVE_HALIDE
1235 #ifdef HAVE_INF_ENGINE
1236 InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
1238 InferenceEngine::Builder::Layer l = InferenceEngine::Builder::PReLULayer("");
1239 const size_t numChannels = scale.total();
1240 addConstantData("weights", wrapToInfEngineBlob(scale, {numChannels}, InferenceEngine::Layout::C), l);
1243 #endif // HAVE_INF_ENGINE
1246 std::shared_ptr<vkcom::OpBase> initVkCom()
1248 // TODO: add vkcom implementation
1249 return std::shared_ptr<vkcom::OpBase>();
1251 #endif // HAVE_VULKAN
1253 bool tryFuse(Ptr<dnn::Layer>&) { return false; }
1255 void getScaleShift(Mat&, Mat&) const {}
1257 int64 getFLOPSPerElement() const { return 1; }
1260 #define ACTIVATION_CREATOR_FOR(_Layer, _Functor, ...) \
1261 Ptr<_Layer> _Layer::create() { \
1262 return return Ptr<_Layer>( new ElementWiseLayer<_Functor>(_Functor()) ); }
1265 Ptr<ReLULayer> ReLULayer::create(const LayerParams& params)
1267 float negativeSlope = params.get<float>("negative_slope", 0.f);
1268 Ptr<ReLULayer> l(new ElementWiseLayer<ReLUFunctor>(ReLUFunctor(negativeSlope)));
1269 l->setParamsFrom(params);
1270 l->negativeSlope = negativeSlope;
1275 Ptr<ReLU6Layer> ReLU6Layer::create(const LayerParams& params)
1277 float minValue = params.get<float>("min_value", 0.0f);
1278 float maxValue = params.get<float>("max_value", 6.0f);
1279 Ptr<ReLU6Layer> l(new ElementWiseLayer<ReLU6Functor>(ReLU6Functor(minValue, maxValue)));
1280 l->setParamsFrom(params);
1281 l->minValue = minValue;
1282 l->maxValue = maxValue;
1287 Ptr<TanHLayer> TanHLayer::create(const LayerParams& params)
1289 Ptr<TanHLayer> l(new ElementWiseLayer<TanHFunctor>());
1290 l->setParamsFrom(params);
1295 Ptr<SigmoidLayer> SigmoidLayer::create(const LayerParams& params)
1297 Ptr<SigmoidLayer> l(new ElementWiseLayer<SigmoidFunctor>());
1298 l->setParamsFrom(params);
1303 Ptr<ELULayer> ELULayer::create(const LayerParams& params)
1305 Ptr<ELULayer> l(new ElementWiseLayer<ELUFunctor>(ELUFunctor()));
1306 l->setParamsFrom(params);
1311 Ptr<AbsLayer> AbsLayer::create(const LayerParams& params)
1313 Ptr<AbsLayer> l(new ElementWiseLayer<AbsValFunctor>());
1314 l->setParamsFrom(params);
1319 Ptr<BNLLLayer> BNLLLayer::create(const LayerParams& params)
1321 Ptr<BNLLLayer> l(new ElementWiseLayer<BNLLFunctor>());
1322 l->setParamsFrom(params);
1327 Ptr<PowerLayer> PowerLayer::create(const LayerParams& params)
1329 float power = params.get<float>("power", 1.0f);
1330 float scale = params.get<float>("scale", 1.0f);
1331 float shift = params.get<float>("shift", 0.0f);
1332 Ptr<PowerLayer> l(new ElementWiseLayer<PowerFunctor>(PowerFunctor(power, scale, shift)));
1333 l->setParamsFrom(params);
1341 Ptr<Layer> ChannelsPReLULayer::create(const LayerParams& params)
1343 CV_Assert(params.blobs.size() == 1);
1344 if (params.blobs[0].total() == 1)
1346 LayerParams reluParams = params;
1347 reluParams.set("negative_slope", params.blobs[0].at<float>(0));
1348 return ReLULayer::create(reluParams);
1350 Ptr<ChannelsPReLULayer> l(new ElementWiseLayer<ChannelsPReLUFunctor>(ChannelsPReLUFunctor(params.blobs[0])));
1351 l->setParamsFrom(params);