1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14 // Copyright (C) 2017, Intel Corporation, all rights reserved.
15 // Third party copyrights are property of their respective owners.
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
43 #include "../precomp.hpp"
44 #include "layers_common.hpp"
45 #include "../op_halide.hpp"
46 #include "../op_inf_engine.hpp"
47 #include "../op_vkcom.hpp"
48 #include <opencv2/dnn/shape_utils.hpp>
52 #include "opencl_kernels_dnn.hpp"
65 template<typename Func>
66 class ElementWiseLayer : public Func::Layer
69 class PBody : public cv::ParallelLoopBody
77 PBody(const Func &func, const Mat &src, Mat& dst, int nstripes)
85 void operator()(const Range &r) const CV_OVERRIDE
87 int nstripes = nstripes_, nsamples = 1, outCn = 1;
92 nsamples = src_->size[0];
93 outCn = src_->size[1];
96 outCn = src_->size[0];
98 for (int i = 2; i < src_->dims; ++i)
99 planeSize *= src_->size[i];
101 size_t stripeSize = (planeSize + nstripes - 1)/nstripes;
102 size_t stripeStart = r.start*stripeSize;
103 size_t stripeEnd = std::min(r.end*stripeSize, planeSize);
105 for( int i = 0; i < nsamples; i++ )
107 const float* srcptr = src_->ptr<float>(i) + stripeStart;
108 float* dstptr = dst_->ptr<float>(i) + stripeStart;
109 func_->apply(srcptr, dstptr, (int)(stripeEnd - stripeStart), planeSize, 0, outCn);
114 ElementWiseLayer(const Func &f=Func()) : run_parallel(false) { func = f; }
116 virtual bool supportBackend(int backendId) CV_OVERRIDE
118 return func.supportBackend(backendId, this->preferableTarget);
121 virtual Ptr<BackendNode> tryAttach(const Ptr<BackendNode>& node) CV_OVERRIDE
123 switch (node->backendId)
125 case DNN_BACKEND_HALIDE:
128 auto base = node.dynamicCast<HalideBackendNode>();
129 Halide::Func& input = base->funcs.back();
130 Halide::Var x("x"), y("y"), c("c"), n("n");
131 Halide::Func top = (this->name.empty() ? Halide::Func() : Halide::Func(this->name));
132 func.attachHalide(input(x, y, c, n), top);
133 return Ptr<BackendNode>(new HalideBackendNode(base, top));
134 #endif // HAVE_HALIDE
138 return Ptr<BackendNode>();
141 virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
144 Halide::Buffer<float> input = halideBuffer(inputs[0]);
145 Halide::Var x("x"), y("y"), c("c"), n("n");
146 Halide::Func top = (this->name.empty() ? Halide::Func() : Halide::Func(this->name));
147 func.attachHalide(input(x, y, c, n), top);
148 return Ptr<BackendNode>(new HalideBackendNode(top));
149 #endif // HAVE_HALIDE
150 return Ptr<BackendNode>();
153 #ifdef HAVE_INF_ENGINE
154 virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
156 InferenceEngine::Builder::Layer ieLayer = func.initInfEngineBuilderAPI();
157 ieLayer.setName(this->name);
158 return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
160 #endif // HAVE_INF_ENGINE
162 virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
165 return Ptr<BackendNode>(new VkComBackendNode(inputs, func.initVkCom()));
166 #endif // HAVE_VULKAN
167 return Ptr<BackendNode>();
170 virtual bool tryFuse(Ptr<dnn::Layer>& top) CV_OVERRIDE
172 return func.tryFuse(top);
175 void getScaleShift(Mat& scale_, Mat& shift_) const CV_OVERRIDE
177 func.getScaleShift(scale_, shift_);
180 bool getMemoryShapes(const std::vector<MatShape> &inputs,
181 const int requiredOutputs,
182 std::vector<MatShape> &outputs,
183 std::vector<MatShape> &internals) const CV_OVERRIDE
185 Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);
189 void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
193 CV_OCL_RUN(IS_DNN_OPENCL_TARGET(this->preferableTarget),
194 func.applyOCL(inputs_arr, outputs_arr, internals_arr))
196 if (inputs_arr.depth() == CV_16S)
198 Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
202 std::vector<Mat> inputs, outputs;
203 inputs_arr.getMatVector(inputs);
204 outputs_arr.getMatVector(outputs);
206 for (size_t i = 0; i < inputs.size(); i++)
208 const Mat &src = inputs[i];
209 Mat &dst = outputs[i];
210 CV_Assert(src.size == dst.size && src.type() == dst.type() &&
211 src.isContinuous() && dst.isContinuous() && src.type() == CV_32F);
213 const int nstripes = getNumThreads();
214 PBody body(func, src, dst, nstripes);
215 parallel_for_(Range(0, nstripes), body, nstripes);
219 void forwardSlice(const float* src, float* dst, int len, size_t planeSize, int cn0, int cn1) const CV_OVERRIDE
221 func.apply(src, dst, len, planeSize, cn0, cn1);
224 virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
225 const std::vector<MatShape> &outputs) const CV_OVERRIDE
228 for (int i = 0; i < outputs.size(); i++)
230 flops += total(outputs[i]) * func.getFLOPSPerElement();
240 static String oclGetTMacro(const UMat &m)
242 String str_name = ocl::typeToStr(m.type());
244 if (str_name == "short")
247 return format("-DT=%s -Dconvert_T=convert_%s ", str_name.c_str(), str_name.c_str());
253 typedef ReLULayer Layer;
256 explicit ReLUFunctor(float slope_=1.f) : slope(slope_) {}
258 bool supportBackend(int backendId, int)
260 #ifdef HAVE_INF_ENGINE
261 if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
262 return slope >= 0 || !INF_ENGINE_VER_MAJOR_EQ(INF_ENGINE_RELEASE_2019R1);
264 return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE ||
265 backendId == DNN_BACKEND_VKCOM;
268 void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
271 for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
275 v_float32x4 s4 = v_setall_f32(s), z = v_setzero_f32();
276 for( ; i <= len - 16; i += 16 )
278 v_float32x4 x0 = v_load(srcptr + i);
279 v_float32x4 x1 = v_load(srcptr + i + 4);
280 v_float32x4 x2 = v_load(srcptr + i + 8);
281 v_float32x4 x3 = v_load(srcptr + i + 12);
282 x0 = v_select(x0 >= z, x0, x0*s4);
283 x1 = v_select(x1 >= z, x1, x1*s4);
284 x2 = v_select(x2 >= z, x2, x2*s4);
285 x3 = v_select(x3 >= z, x3, x3*s4);
286 v_store(dstptr + i, x0);
287 v_store(dstptr + i + 4, x1);
288 v_store(dstptr + i + 8, x2);
289 v_store(dstptr + i + 12, x3);
292 for( ; i < len; i++ )
295 dstptr[i] = x >= 0.f ? x : s*x;
301 bool initKernel(ocl::Kernel &ker, const UMat &src) const
303 const char *buildoptSlope = (slope == 0) ? "-DRELU_NO_SLOPE" : "";
304 String buildopt = oclGetTMacro(src) + buildoptSlope;
306 if (!ker.create("ReLUForward", ocl::dnn::activations_oclsrc, buildopt))
310 ker.set(3, (float)slope);
315 bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
317 std::vector<UMat> inputs;
318 std::vector<UMat> outputs;
320 inps.getUMatVector(inputs);
321 outs.getUMatVector(outputs);
323 for (size_t i = 0; i < inputs.size(); i++)
325 UMat& src = inputs[i];
326 UMat& dst = outputs[i];
327 CV_Assert(src.isContinuous() && dst.isContinuous() && !src.offset && !dst.offset);
330 CV_Assert(initKernel(kernel, src));
331 kernel.set(0, (int)src.total());
332 kernel.set(1, ocl::KernelArg::PtrReadOnly(src));
333 kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));
335 size_t gSize = src.total();
336 CV_Assert(kernel.run(1, &gSize, NULL, false));
344 void attachHalide(const Halide::Expr& input, Halide::Func& top)
346 Halide::Var x("x"), y("y"), c("c"), n("n");
349 top(x, y, c, n) = select(input >= 0.0f, input, slope * input);
353 top(x, y, c, n) = max(input, 0.0f);
356 #endif // HAVE_HALIDE
358 #ifdef HAVE_INF_ENGINE
359 InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
361 return InferenceEngine::Builder::ReLULayer("").setNegativeSlope(slope);
363 #endif // HAVE_INF_ENGINE
366 std::shared_ptr<vkcom::OpBase> initVkCom()
368 std::shared_ptr<vkcom::OpBase> op(new vkcom::OpReLU(slope));
371 #endif // HAVE_VULKAN
375 bool tryFuse(Ptr<dnn::Layer>&) { return false; }
377 void getScaleShift(Mat&, Mat&) const {}
379 int64 getFLOPSPerElement() const { return 1; }
384 typedef ReLU6Layer Layer;
385 float minValue, maxValue;
387 ReLU6Functor(float minValue_ = 0.0f, float maxValue_ = 6.0f)
388 : minValue(minValue_), maxValue(maxValue_)
390 CV_Assert(minValue <= maxValue);
393 bool supportBackend(int backendId, int)
395 return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE ||
396 backendId == DNN_BACKEND_INFERENCE_ENGINE;
399 void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
401 for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
405 v_float32x4 minV = v_setall_f32(minValue), maxV = v_setall_f32(maxValue);
406 for( ; i <= len - 16; i += 16 )
408 v_float32x4 x0 = v_load(srcptr + i);
409 v_float32x4 x1 = v_load(srcptr + i + 4);
410 v_float32x4 x2 = v_load(srcptr + i + 8);
411 v_float32x4 x3 = v_load(srcptr + i + 12);
412 x0 = v_min(v_max(minV, x0), maxV);
413 x1 = v_min(v_max(minV, x1), maxV);
414 x2 = v_min(v_max(minV, x2), maxV);
415 x3 = v_min(v_max(minV, x3), maxV);
416 v_store(dstptr + i, x0);
417 v_store(dstptr + i + 4, x1);
418 v_store(dstptr + i + 8, x2);
419 v_store(dstptr + i + 12, x3);
422 for( ; i < len; i++ )
426 dstptr[i] = x <= maxValue ? x : maxValue;
428 dstptr[i] = minValue;
434 bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
436 std::vector<UMat> inputs;
437 std::vector<UMat> outputs;
439 inps.getUMatVector(inputs);
440 outs.getUMatVector(outputs);
441 String buildopt = oclGetTMacro(inputs[0]);
443 for (size_t i = 0; i < inputs.size(); i++)
445 UMat& src = inputs[i];
446 UMat& dst = outputs[i];
448 ocl::Kernel kernel("ReLU6Forward", ocl::dnn::activations_oclsrc, buildopt);
449 kernel.set(0, (int)src.total());
450 kernel.set(1, ocl::KernelArg::PtrReadOnly(src));
451 kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));
452 kernel.set(3, (float)minValue);
453 kernel.set(4, (float)maxValue);
455 size_t gSize = src.total();
456 CV_Assert(kernel.run(1, &gSize, NULL, false));
464 void attachHalide(const Halide::Expr& input, Halide::Func& top)
466 Halide::Var x("x"), y("y"), c("c"), n("n");
467 top(x, y, c, n) = clamp(input, minValue, maxValue);
469 #endif // HAVE_HALIDE
471 #ifdef HAVE_INF_ENGINE
472 InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
474 return InferenceEngine::Builder::ClampLayer("").setMinValue(minValue).setMaxValue(maxValue);
476 #endif // HAVE_INF_ENGINE
479 std::shared_ptr<vkcom::OpBase> initVkCom()
481 // TODO: add vkcom implementation
482 return std::shared_ptr<vkcom::OpBase>();
484 #endif // HAVE_VULKAN
486 bool tryFuse(Ptr<dnn::Layer>&) { return false; }
488 void getScaleShift(Mat&, Mat&) const {}
490 int64 getFLOPSPerElement() const { return 2; }
495 typedef TanHLayer Layer;
497 bool supportBackend(int backendId, int)
499 return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE ||
500 backendId == DNN_BACKEND_INFERENCE_ENGINE;
503 void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
505 for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
507 for( int i = 0; i < len; i++ )
516 bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
518 std::vector<UMat> inputs;
519 std::vector<UMat> outputs;
521 inps.getUMatVector(inputs);
522 outs.getUMatVector(outputs);
523 String buildopt = oclGetTMacro(inputs[0]);
525 for (size_t i = 0; i < inputs.size(); i++)
527 UMat& src = inputs[i];
528 UMat& dst = outputs[i];
530 ocl::Kernel kernel("TanHForward", ocl::dnn::activations_oclsrc, buildopt);
531 kernel.set(0, (int)src.total());
532 kernel.set(1, ocl::KernelArg::PtrReadOnly(src));
533 kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));
535 size_t gSize = src.total();
536 CV_Assert(kernel.run(1, &gSize, NULL, false));
544 void attachHalide(const Halide::Expr& input, Halide::Func& top)
546 Halide::Var x("x"), y("y"), c("c"), n("n");
547 top(x, y, c, n) = tanh(input);
549 #endif // HAVE_HALIDE
551 #ifdef HAVE_INF_ENGINE
552 InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
554 return InferenceEngine::Builder::TanHLayer("");
556 #endif // HAVE_INF_ENGINE
559 std::shared_ptr<vkcom::OpBase> initVkCom()
561 // TODO: add vkcom implementation
562 return std::shared_ptr<vkcom::OpBase>();
564 #endif // HAVE_VULKAN
566 bool tryFuse(Ptr<dnn::Layer>&) { return false; }
568 void getScaleShift(Mat&, Mat&) const {}
570 int64 getFLOPSPerElement() const { return 1; }
573 struct SigmoidFunctor
575 typedef SigmoidLayer Layer;
577 bool supportBackend(int backendId, int)
579 return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE ||
580 backendId == DNN_BACKEND_INFERENCE_ENGINE;
583 void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
585 for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
587 for( int i = 0; i < len; i++ )
590 dstptr[i] = 1.f/(1.f + exp(-x));
596 bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
598 std::vector<UMat> inputs;
599 std::vector<UMat> outputs;
601 inps.getUMatVector(inputs);
602 outs.getUMatVector(outputs);
603 String buildopt = oclGetTMacro(inputs[0]);
605 for (size_t i = 0; i < inputs.size(); i++)
607 UMat& src = inputs[i];
608 UMat& dst = outputs[i];
610 ocl::Kernel kernel("SigmoidForward", ocl::dnn::activations_oclsrc, buildopt);
611 kernel.set(0, (int)src.total());
612 kernel.set(1, ocl::KernelArg::PtrReadOnly(src));
613 kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));
615 size_t gSize = src.total();
616 CV_Assert(kernel.run(1, &gSize, NULL, false));
624 void attachHalide(const Halide::Expr& input, Halide::Func& top)
626 Halide::Var x("x"), y("y"), c("c"), n("n");
627 top(x, y, c, n) = 1.0f / (1.0f + exp(-input));
629 #endif // HAVE_HALIDE
631 #ifdef HAVE_INF_ENGINE
632 InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
634 return InferenceEngine::Builder::SigmoidLayer("");
636 #endif // HAVE_INF_ENGINE
639 std::shared_ptr<vkcom::OpBase> initVkCom()
641 // TODO: add vkcom implementation
642 return std::shared_ptr<vkcom::OpBase>();
644 #endif // HAVE_VULKAN
646 bool tryFuse(Ptr<dnn::Layer>&) { return false; }
648 void getScaleShift(Mat&, Mat&) const {}
650 int64 getFLOPSPerElement() const { return 3; }
655 typedef ELULayer Layer;
657 explicit ELUFunctor() {}
659 bool supportBackend(int backendId, int)
661 return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE ||
662 backendId == DNN_BACKEND_INFERENCE_ENGINE;
665 void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
667 for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
669 for(int i = 0; i < len; i++ )
672 dstptr[i] = x >= 0.f ? x : exp(x) - 1;
678 bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
680 std::vector<UMat> inputs;
681 std::vector<UMat> outputs;
683 inps.getUMatVector(inputs);
684 outs.getUMatVector(outputs);
685 String buildopt = oclGetTMacro(inputs[0]);
687 for (size_t i = 0; i < inputs.size(); i++)
689 UMat& src = inputs[i];
690 UMat& dst = outputs[i];
692 ocl::Kernel kernel("ELUForward", ocl::dnn::activations_oclsrc, buildopt);
693 kernel.set(0, (int)src.total());
694 kernel.set(1, ocl::KernelArg::PtrReadOnly(src));
695 kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));
697 size_t gSize = src.total();
698 CV_Assert(kernel.run(1, &gSize, NULL, false));
706 void attachHalide(const Halide::Expr& input, Halide::Func& top)
708 Halide::Var x("x"), y("y"), c("c"), n("n");
709 top(x, y, c, n) = select(input >= 0.0f, input, exp(input) - 1);
711 #endif // HAVE_HALIDE
713 #ifdef HAVE_INF_ENGINE
714 InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
716 return InferenceEngine::Builder::ELULayer("");
718 #endif // HAVE_INF_ENGINE
721 std::shared_ptr<vkcom::OpBase> initVkCom()
723 // TODO: add vkcom implementation
724 return std::shared_ptr<vkcom::OpBase>();
726 #endif // HAVE_VULKAN
728 bool tryFuse(Ptr<dnn::Layer>&) { return false; }
730 void getScaleShift(Mat&, Mat&) const {}
732 int64 getFLOPSPerElement() const { return 2; }
737 typedef AbsLayer Layer;
739 bool supportBackend(int backendId, int)
741 #ifdef HAVE_INF_ENGINE
742 if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
743 return !INF_ENGINE_VER_MAJOR_EQ(INF_ENGINE_RELEASE_2019R1);
745 return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE;
748 void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
750 for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
752 for( int i = 0; i < len; i++ )
761 bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
763 std::vector<UMat> inputs;
764 std::vector<UMat> outputs;
766 inps.getUMatVector(inputs);
767 outs.getUMatVector(outputs);
768 String buildopt = oclGetTMacro(inputs[0]);
770 for (size_t i = 0; i < inputs.size(); i++)
772 UMat& src = inputs[i];
773 UMat& dst = outputs[i];
775 ocl::Kernel kernel("AbsValForward", ocl::dnn::activations_oclsrc, buildopt);
776 kernel.set(0, (int)src.total());
777 kernel.set(1, ocl::KernelArg::PtrReadOnly(src));
778 kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));
780 size_t gSize = src.total();
781 CV_Assert(kernel.run(1, &gSize, NULL, false));
789 void attachHalide(const Halide::Expr& input, Halide::Func& top)
791 Halide::Var x("x"), y("y"), c("c"), n("n");
792 top(x, y, c, n) = abs(input);
794 #endif // HAVE_HALIDE
796 #ifdef HAVE_INF_ENGINE
797 InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
799 return InferenceEngine::Builder::ReLULayer("").setNegativeSlope(-0.999999f);
801 #endif // HAVE_INF_ENGINE
804 std::shared_ptr<vkcom::OpBase> initVkCom()
806 // TODO: add vkcom implementation
807 return std::shared_ptr<vkcom::OpBase>();
809 #endif // HAVE_VULKAN
811 bool tryFuse(Ptr<dnn::Layer>&) { return false; }
813 void getScaleShift(Mat&, Mat&) const {}
815 int64 getFLOPSPerElement() const { return 1; }
820 typedef BNLLLayer Layer;
822 bool supportBackend(int backendId, int)
824 return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE;
827 void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
829 for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
831 for( int i = 0; i < len; i++ )
834 // https://github.com/BVLC/caffe/blame/1.0/src/caffe/layers/bnll_layer.cpp#L17
835 dstptr[i] = x > 0 ? x + log(1. + exp(-x)) : log(1. + exp(x));
841 bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
843 std::vector<UMat> inputs;
844 std::vector<UMat> outputs;
846 inps.getUMatVector(inputs);
847 outs.getUMatVector(outputs);
848 String buildopt = oclGetTMacro(inputs[0]);
850 for (size_t i = 0; i < inputs.size(); i++)
852 UMat& src = inputs[i];
853 UMat& dst = outputs[i];
855 ocl::Kernel kernel("BNLLForward", ocl::dnn::activations_oclsrc, buildopt);
856 kernel.set(0, (int)src.total());
857 kernel.set(1, ocl::KernelArg::PtrReadOnly(src));
858 kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));
860 size_t gSize = src.total();
861 CV_Assert(kernel.run(1, &gSize, NULL, false));
869 void attachHalide(const Halide::Expr& input, Halide::Func& top)
871 Halide::Var x("x"), y("y"), c("c"), n("n");
872 // https://github.com/BVLC/caffe/blame/1.0/src/caffe/layers/bnll_layer.cpp#L17
873 top(x, y, c, n) = max(input, 0) + log(1.0f + exp(-abs(input)));
875 #endif // HAVE_HALIDE
877 #ifdef HAVE_INF_ENGINE
878 InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
880 CV_Error(Error::StsNotImplemented, "");
882 #endif // HAVE_INF_ENGINE
885 std::shared_ptr<vkcom::OpBase> initVkCom()
887 // TODO: add vkcom implementation
888 return std::shared_ptr<vkcom::OpBase>();
890 #endif // HAVE_VULKAN
892 bool tryFuse(Ptr<dnn::Layer>&) { return false; }
894 void getScaleShift(Mat&, Mat&) const {}
896 int64 getFLOPSPerElement() const { return 5; }
901 typedef PowerLayer Layer;
907 explicit PowerFunctor(float power_ = 1.f, float scale_ = 1.f, float shift_ = 0.f)
908 : power(power_), scale(scale_), shift(shift_) {}
910 bool supportBackend(int backendId, int targetId)
912 if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
913 return (targetId != DNN_TARGET_OPENCL && targetId != DNN_TARGET_OPENCL_FP16) || power == 1.0 || power == 0.5;
915 return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE;
918 void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
920 float a = scale, b = shift, p = power;
923 for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
925 for( int i = 0; i < len; i++ )
934 for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
936 for( int i = 0; i < len; i++ )
939 dstptr[i] = pow(a*x + b, p);
946 bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
948 std::vector<UMat> inputs;
949 std::vector<UMat> outputs;
951 inps.getUMatVector(inputs);
952 outs.getUMatVector(outputs);
953 String buildopt = oclGetTMacro(inputs[0]);
955 for (size_t i = 0; i < inputs.size(); i++)
957 UMat& src = inputs[i];
958 UMat& dst = outputs[i];
960 ocl::Kernel kernel("PowForward", ocl::dnn::activations_oclsrc, buildopt);
961 kernel.set(0, (int)src.total());
962 kernel.set(1, ocl::KernelArg::PtrReadOnly(src));
963 kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));
964 kernel.set(3, (float)power);
965 kernel.set(4, (float)scale);
966 kernel.set(5, (float)shift);
968 size_t gSize = src.total();
969 CV_Assert(kernel.run(1, &gSize, NULL, false));
977 void attachHalide(const Halide::Expr& input, Halide::Func& top)
979 Halide::Var x("x"), y("y"), c("c"), n("n");
980 Halide::Expr topExpr = (scale == 1.0f ? input : input * scale);
987 topExpr = pow(topExpr, power);
989 top(x, y, c, n) = topExpr;
991 #endif // HAVE_HALIDE
993 #ifdef HAVE_INF_ENGINE
994 InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
996 return InferenceEngine::Builder::PowerLayer("").setPower(power)
1000 #endif // HAVE_INF_ENGINE
1003 std::shared_ptr<vkcom::OpBase> initVkCom()
1005 // TODO: add vkcom implementation
1006 return std::shared_ptr<vkcom::OpBase>();
1008 #endif // HAVE_VULKAN
1010 bool tryFuse(Ptr<dnn::Layer>& top)
1012 if (power != 1.0f && shift != 0.0f)
1016 top->getScaleShift(w, b);
1017 if ((w.empty() && b.empty()) || w.total() > 1 || b.total() > 1)
1020 float nextScale = w.empty() ? 1.0f : w.at<float>(0);
1021 float nextShift = b.empty() ? 0.0f : b.at<float>(0);
1022 scale = std::pow(scale, power) * nextScale;
1023 shift = nextScale * shift + nextShift;
1027 void getScaleShift(Mat& _scale, Mat& _shift) const
1031 _scale = Mat(1, 1, CV_32F, Scalar(scale));
1032 _shift = Mat(1, 1, CV_32F, Scalar(shift));
1036 int64 getFLOPSPerElement() const { return power == 1 ? 2 : 10; }
1040 struct ChannelsPReLUFunctor
1042 typedef ChannelsPReLULayer Layer;
1048 explicit ChannelsPReLUFunctor(const Mat& scale_=Mat()) : scale(scale_)
1052 bool supportBackend(int backendId, int)
1054 return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE ||
1055 backendId == DNN_BACKEND_INFERENCE_ENGINE;
1058 void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
1060 CV_Assert(scale.isContinuous() && scale.type() == CV_32F);
1062 const float* scaleptr = scale.ptr<float>();
1063 CV_Assert( 0 <= cn0 && cn0 < cn1 && cn1 <= (int)scale.total() );
1065 for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
1067 float s = scaleptr[cn];
1070 v_float32x4 s4 = v_setall_f32(s), z = v_setzero_f32();
1071 for( ; i <= len - 16; i += 16 )
1073 v_float32x4 x0 = v_load(srcptr + i);
1074 v_float32x4 x1 = v_load(srcptr + i + 4);
1075 v_float32x4 x2 = v_load(srcptr + i + 8);
1076 v_float32x4 x3 = v_load(srcptr + i + 12);
1077 x0 = v_select(x0 >= z, x0, x0*s4);
1078 x1 = v_select(x1 >= z, x1, x1*s4);
1079 x2 = v_select(x2 >= z, x2, x2*s4);
1080 x3 = v_select(x3 >= z, x3, x3*s4);
1081 v_store(dstptr + i, x0);
1082 v_store(dstptr + i + 4, x1);
1083 v_store(dstptr + i + 8, x2);
1084 v_store(dstptr + i + 12, x3);
1087 for( ; i < len; i++ )
1089 float x = srcptr[i];
1090 dstptr[i] = x >= 0.f ? x : s*x;
1096 bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
1098 if (scale_umat.empty())
1099 scale.copyTo(scale_umat);
1101 std::vector<UMat> inputs;
1102 std::vector<UMat> outputs;
1104 inps.getUMatVector(inputs);
1105 outs.getUMatVector(outputs);
1106 String buildopt = oclGetTMacro(inputs[0]);
1108 for (size_t i = 0; i < inputs.size(); i++)
1110 UMat& src = inputs[i];
1111 UMat& dst = outputs[i];
1113 ocl::Kernel kernel("PReLUForward", ocl::dnn::activations_oclsrc, buildopt);
1114 kernel.set(0, (int)src.total());
1115 kernel.set(1, (int)src.size[1]);
1116 kernel.set(2, (int)total(shape(src), 2));
1117 kernel.set(3, ocl::KernelArg::PtrReadOnly(src));
1118 kernel.set(4, ocl::KernelArg::PtrWriteOnly(dst));
1119 kernel.set(5, ocl::KernelArg::PtrReadOnly(scale_umat));
1121 size_t gSize = src.total();
1122 CV_Assert(kernel.run(1, &gSize, NULL, false));
1130 void attachHalide(const Halide::Expr& input, Halide::Func& top)
1132 Halide::Var x("x"), y("y"), c("c"), n("n");
1133 auto weights = wrapToHalideBuffer(scale, {(int)scale.total()});
1134 top(x, y, c, n) = select(input >= 0.0f, input, weights(c) * input);
1136 #endif // HAVE_HALIDE
1138 #ifdef HAVE_INF_ENGINE
1139 InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
1141 InferenceEngine::Builder::Layer l = InferenceEngine::Builder::PReLULayer("");
1142 const size_t numChannels = scale.total();
1143 addConstantData("weights", wrapToInfEngineBlob(scale, {numChannels}, InferenceEngine::Layout::C), l);
1146 #endif // HAVE_INF_ENGINE
1149 std::shared_ptr<vkcom::OpBase> initVkCom()
1151 // TODO: add vkcom implementation
1152 return std::shared_ptr<vkcom::OpBase>();
1154 #endif // HAVE_VULKAN
1156 bool tryFuse(Ptr<dnn::Layer>&) { return false; }
1158 void getScaleShift(Mat&, Mat&) const {}
1160 int64 getFLOPSPerElement() const { return 1; }
1163 #define ACTIVATION_CREATOR_FOR(_Layer, _Functor, ...) \
1164 Ptr<_Layer> _Layer::create() { \
1165 return return Ptr<_Layer>( new ElementWiseLayer<_Functor>(_Functor()) ); }
1168 Ptr<ReLULayer> ReLULayer::create(const LayerParams& params)
1170 float negativeSlope = params.get<float>("negative_slope", 0.f);
1171 Ptr<ReLULayer> l(new ElementWiseLayer<ReLUFunctor>(ReLUFunctor(negativeSlope)));
1172 l->setParamsFrom(params);
1173 l->negativeSlope = negativeSlope;
1178 Ptr<ReLU6Layer> ReLU6Layer::create(const LayerParams& params)
1180 float minValue = params.get<float>("min_value", 0.0f);
1181 float maxValue = params.get<float>("max_value", 6.0f);
1182 Ptr<ReLU6Layer> l(new ElementWiseLayer<ReLU6Functor>(ReLU6Functor(minValue, maxValue)));
1183 l->setParamsFrom(params);
1184 l->minValue = minValue;
1185 l->maxValue = maxValue;
1190 Ptr<TanHLayer> TanHLayer::create(const LayerParams& params)
1192 Ptr<TanHLayer> l(new ElementWiseLayer<TanHFunctor>());
1193 l->setParamsFrom(params);
1198 Ptr<SigmoidLayer> SigmoidLayer::create(const LayerParams& params)
1200 Ptr<SigmoidLayer> l(new ElementWiseLayer<SigmoidFunctor>());
1201 l->setParamsFrom(params);
1206 Ptr<ELULayer> ELULayer::create(const LayerParams& params)
1208 Ptr<ELULayer> l(new ElementWiseLayer<ELUFunctor>(ELUFunctor()));
1209 l->setParamsFrom(params);
1214 Ptr<AbsLayer> AbsLayer::create(const LayerParams& params)
1216 Ptr<AbsLayer> l(new ElementWiseLayer<AbsValFunctor>());
1217 l->setParamsFrom(params);
1222 Ptr<BNLLLayer> BNLLLayer::create(const LayerParams& params)
1224 Ptr<BNLLLayer> l(new ElementWiseLayer<BNLLFunctor>());
1225 l->setParamsFrom(params);
1230 Ptr<PowerLayer> PowerLayer::create(const LayerParams& params)
1232 float power = params.get<float>("power", 1.0f);
1233 float scale = params.get<float>("scale", 1.0f);
1234 float shift = params.get<float>("shift", 0.0f);
1235 Ptr<PowerLayer> l(new ElementWiseLayer<PowerFunctor>(PowerFunctor(power, scale, shift)));
1236 l->setParamsFrom(params);
1244 Ptr<Layer> ChannelsPReLULayer::create(const LayerParams& params)
1246 CV_Assert(params.blobs.size() == 1);
1247 if (params.blobs[0].total() == 1)
1249 LayerParams reluParams = params;
1250 reluParams.set("negative_slope", params.blobs[0].at<float>(0));
1251 return ReLULayer::create(reluParams);
1253 Ptr<ChannelsPReLULayer> l(new ElementWiseLayer<ChannelsPReLUFunctor>(ChannelsPReLUFunctor(params.blobs[0])));
1254 l->setParamsFrom(params);