1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14 // Copyright (C) 2017, Intel Corporation, all rights reserved.
15 // Third party copyrights are property of their respective owners.
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
43 #include "../precomp.hpp"
44 #include "layers_common.hpp"
45 #include <opencv2/dnn/shape_utils.hpp>
46 #include "math_functions.hpp"
47 #include "opencl_kernels_dnn.hpp"
54 class MVNLayerImpl : public MVNLayer
57 MVNLayerImpl(const LayerParams& params)
59 setParamsFrom(params);
60 normVariance = params.get<bool>("normalize_variance", true);
61 acrossChannels = params.get<bool>("across_channels", false);
62 eps = params.get<double>("eps", 1e-9);
66 bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
68 std::vector<UMat> inputs;
69 std::vector<UMat> outputs;
71 inputs_.getUMatVector(inputs);
72 outputs_.getUMatVector(outputs);
74 for (size_t inpIdx = 0; inpIdx < inputs.size(); inpIdx++)
76 UMat &inpBlob = inputs[inpIdx];
77 UMat &outBlob = outputs[inpIdx];
79 int splitDim = (acrossChannels) ? 1 : 2;
81 for( i = 0; i < splitDim; i++ )
82 newRows *= inpBlob.size[i];
84 MatShape s = shape(newRows, inpBlob.total() / newRows);
85 UMat& inpMat = inpBlob;
86 UMat& outMat = outBlob;
87 UMat oneMat = UMat::ones(s[1], 1, CV_32F);
88 UMat meanMat = UMat(s[0], 1, CV_32F);
89 UMat devMat = UMat(s[0], 1, CV_32F);
90 UMat tmpMat = UMat(s[0], s[1], CV_32F);
91 float alpha = 1.0f / s[1];
93 bool ret = ocl4dnn::ocl4dnnGEMV<float>(ocl4dnn::CblasNoTrans, s[0], s[1], alpha,
94 inpMat, 0, oneMat, 0, 0.0f, meanMat, 0);
98 int number = (s[1] % 8 == 0) ? 8 : ((s[1] % 4 == 0) ? 4 : 1);
99 size_t global[] = { (size_t)s[0], (size_t)(s[1] / number) };
100 String buildopt = format("-DNUM=%d ", number);
103 String kname = format("calc_mean%d", number);
104 ocl::Kernel kernel(kname.c_str(), ocl::dnn::mvn_oclsrc, buildopt);
108 kernel.set(0, ocl::KernelArg::PtrReadOnly(inpMat));
109 kernel.set(1, (int)s[0]);
110 kernel.set(2, (int)s[1]);
111 kernel.set(3, ocl::KernelArg::PtrReadOnly(meanMat));
112 kernel.set(4, ocl::KernelArg::PtrWriteOnly(tmpMat));
113 ret = kernel.run(2, global, NULL, false);
117 ret = ocl4dnn::ocl4dnnGEMV<float>(ocl4dnn::CblasNoTrans, s[0], s[1], alpha,
118 tmpMat, 0, oneMat, 0, 0.0f, devMat, 0);
123 String kname = format("mvn%d", number);
125 buildopt += "-DNORM_VARIANCE";
126 ocl::Kernel kernel1(kname.c_str(), ocl::dnn::mvn_oclsrc, buildopt);
129 kernel1.set(0, ocl::KernelArg::PtrReadOnly(inpMat));
130 kernel1.set(1, (int)s[0]);
131 kernel1.set(2, (int)s[1]);
132 kernel1.set(3, (float)eps);
133 kernel1.set(4, ocl::KernelArg::PtrReadOnly(meanMat));
134 kernel1.set(5, ocl::KernelArg::PtrReadOnly(devMat));
135 kernel1.set(6, ocl::KernelArg::PtrWriteOnly(outMat));
136 ret = kernel1.run(2, global, NULL, false);
144 void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
147 CV_TRACE_ARG_VALUE(name, "name", name.c_str());
149 CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
150 OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
151 forward_ocl(inputs_arr, outputs_arr, internals_arr))
153 Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
156 void forward(std::vector<Mat *> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
159 CV_TRACE_ARG_VALUE(name, "name", name.c_str());
161 for (size_t inpIdx = 0; inpIdx < inputs.size(); inpIdx++)
163 Mat &inpBlob = *inputs[inpIdx];
164 Mat &outBlob = outputs[inpIdx];
166 int splitDim = (acrossChannels) ? 1 : 2;
168 for( i = 0; i < splitDim; i++ )
169 newRows *= inpBlob.size[i];
170 Mat inpMat = inpBlob.reshape(1, newRows);
171 Mat outMat = outBlob.reshape(1, newRows);
174 for ( i = 0; i < newRows; i++)
176 Mat inpRow = inpMat.row(i);
177 Mat outRow = outMat.row(i);
179 cv::meanStdDev(inpRow, mean, (normVariance) ? dev : noArray());
180 double alpha = (normVariance) ? 1/(eps + dev[0]) : 1;
181 inpRow.convertTo(outRow, outRow.type(), alpha, -mean[0] * alpha);
186 virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
187 const std::vector<MatShape> &outputs) const
189 (void)outputs; // suppress unused variable warning
191 for(int i = 0; i < inputs.size(); i++)
193 flops += 6*total(inputs[i]) + 3*total(inputs[i], 0, normVariance ? 2 : 1);
199 Ptr<MVNLayer> MVNLayer::create(const LayerParams& params)
201 return Ptr<MVNLayer>(new MVNLayerImpl(params));