#include "opencv2/core/hal/hal.hpp"
#include "opencv2/core/hal/intrin.hpp"
#include <iostream>
+#include "opencl_kernels_dnn.hpp"
#ifdef HAVE_OPENCL
using namespace cv::dnn::ocl4dnn;
{
public:
Mat weightsMat, biasesMat;
+ UMat umat_weights;
+ UMat umat_biases;
MatShape computeColRowShape(const MatShape &inpShape, const MatShape &outShape) const
{
}
};
+#ifdef HAVE_OPENCL
+ bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
+ {
+ std::vector<UMat> inputs;
+ std::vector<UMat> outputs;
+ std::vector<UMat> internals;
+
+ inputs_.getUMatVector(inputs);
+ outputs_.getUMatVector(outputs);
+ internals_.getUMatVector(internals);
+
+ int outCn = numOutput;
+ int inpCn = inputs[0].size[1];
+
+ if (is1x1())
+ return false;
+
+ if (umat_weights.empty())
+ {
+ transpose(blobs[0].reshape(1, inpCn), umat_weights);
+ umat_biases = hasBias() ? blobs[1].reshape(1, outCn).getUMat(ACCESS_READ) :
+ UMat::zeros(outCn, 1, CV_32F);
+ }
+
+ String buildopt = format("-DT=%s ", ocl::typeToStr(inputs[0].type()));
+ buildopt += format("-DPAD_H=%d -DPAD_W=%d -DKERNEL_H=%d -DKERNEL_W=%d -DSTRIDE_H=%d -DSTRIDE_W=%d ",
+ pad.height, pad.width, kernel.height, kernel.width, stride.height, stride.width);
+
+ for (size_t ii = 0; ii < outputs.size(); ii++)
+ {
+ int ngroups = outCn / blobs[0].size[1];
+ int inpGroupCn = inpCn / ngroups;
+ int outGroupCn = blobs[0].size[1];
+ const UMat& inp = inputs[ii];
+ UMat& out = outputs[ii];
+ int numImg = inp.size[0];
+ int inpH = inp.size[2], inpW = inp.size[3];
+ int outH = out.size[2], outW = out.size[3];
+
+ MatShape inpshape = shape(numImg*inpCn, inpH*inpW);
+ MatShape outshape = shape(numImg*outCn, outH*outW);
+ UMat convBlob = inputs[ii].reshape(1, inpshape.size(), &inpshape[0]);
+ UMat decnBlob = out.reshape(1, outshape.size(), &outshape[0]);
+ int rows = internals[0].rows / ngroups;
+
+ for (int n = 0; n < numImg; n++)
+ {
+ for (int g = 0; g < ngroups; g++)
+ {
+ UMat colMat = internals[0].rowRange(_Range(g * rows, rows));
+ UMat convMat = convBlob.rowRange(_Range((g + n * ngroups) * inpGroupCn, inpGroupCn));
+ UMat wghtMat = umat_weights.colRange(_Range(g * inpGroupCn, inpGroupCn));
+ gemm(wghtMat, convMat, 1, noArray(), 0, colMat, 0);
+ }
+
+ for (int g = 0; g < ngroups; g++)
+ {
+ int total = outGroupCn * decnBlob.cols;
+ int index = 0;
+ int height_col = (outH + 2 * pad.height - kernel.height) / stride.height + 1;
+ int width_col = (outW + 2 * pad.width - kernel.width) / stride.width + 1;
+ int coeff_h = (1 - stride.height * kernel.width * height_col) * width_col;
+ int coeff_w = (1 - stride.width * height_col * width_col);
+
+ ocl::Kernel k("col2im", ocl::dnn::col2im_oclsrc, buildopt);
+ k.set(index++, total);
+ k.set(index++, ocl::KernelArg::PtrReadOnly(internals[0]));
+ k.set(index++, (int)(g * rows * internals[0].cols));
+ k.set(index++, outGroupCn);
+ k.set(index++, outH);
+ k.set(index++, outW);
+ k.set(index++, height_col);
+ k.set(index++, width_col);
+ k.set(index++, coeff_h);
+ k.set(index++, coeff_w);
+ k.set(index++, ocl::KernelArg::PtrReadOnly(umat_biases));
+ k.set(index++, (int)(g * outGroupCn * umat_biases.cols));
+ k.set(index++, ocl::KernelArg::PtrWriteOnly(decnBlob));
+ k.set(index++, (int)((g + n * ngroups) * outGroupCn * decnBlob.cols));
+
+ size_t global[] = { (size_t)total };
+ bool ret = k.run(1, global, NULL, false);
+ if (!ret)
+ return false;
+ }
+ }
+ }
+
+ return true;
+ }
+#endif
+
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+ CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
+ OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+ forward_ocl(inputs_arr, outputs_arr, internals_arr))
+
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
-/*************************************************************************************
- * Copyright (c) 2015, Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation and/or
- * other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
- * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
- * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- **************************************************************************************/
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2017, Intel Corporation, all rights reserved.
+// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
-__kernel void col2im(const int n, __global const T* data_col, const int col_offset,
- const int height, const int width, const int channels,
- const int patch_h, const int patch_w,
- const int pad_h, const int pad_w,
- const int stride_h, const int stride_w,
- const int height_col, const int width_col,
- __global T* data_im, const int img_offset)
+__kernel void col2im(const int n, __global const T* data_col,
+ const int data_col_offset,
+ const int channels,
+ const int height, const int width,
+ const int height_col, const int width_col,
+ const int coeff_h, const int coeff_w,
+ __global const T* biasvec,
+ const int bias_offset,
+ __global T* data_im,
+ const int data_im_offset)
{
- data_col = data_col + col_offset;
- data_im = data_im + img_offset;
- int index = get_global_id(0);
- if(index < n) {
- T val = 0;
- int w = index % width + pad_w;
- int h = (index / width) % height + pad_h;
- int c = index / (width * height);
+ data_col = data_col + data_col_offset;
+ biasvec = biasvec + bias_offset;
+ data_im = data_im + data_im_offset;
+ int index = get_global_id(0);
- // compute the start and end of the output
- int w_col_start = (w < patch_w) ? 0 : (w - patch_w) / stride_w + 1;
- int w_col_end = min(w / stride_w + 1, width_col);
- int h_col_start = (h < patch_h) ? 0 : (h - patch_h) / stride_h + 1;
- int h_col_end = min(h / stride_h + 1, height_col);
+ if(index < n)
+ {
+ T val = 0.f;
+ int w = index % width + PAD_W;
+ int h = (index / width) % height + PAD_H;
+ int c = index / (width * height);
+ int h_col_start = (h < KERNEL_H) ? 0 : (h - KERNEL_H) / STRIDE_H + 1;
+ int h_col_end = min(h / STRIDE_H + 1, height_col);
+ int plane_size_col = height_col * width_col;
+ int offset = (c * KERNEL_H * KERNEL_W + h * KERNEL_W + w) * plane_size_col;
- // equivalent implementation
- int offset =
- (c * patch_h * patch_w + h * patch_w + w) * height_col * width_col;
- int coeff_h_col = (1 - stride_h * patch_w * height_col) * width_col;
- int coeff_w_col = (1 - stride_w * height_col * width_col);
- for (int h_col = h_col_start; h_col < h_col_end; ++h_col) {
- for (int w_col = w_col_start; w_col < w_col_end; ++w_col) {
- val += data_col[offset + h_col * coeff_h_col + w_col * coeff_w_col];
- }
+ int w_col_start = (w < KERNEL_W) ? 0 : (w - KERNEL_W) / STRIDE_W + 1;
+ int w_col_end = min(w / STRIDE_W + 1, width_col);
+
+ for (int h_col = h_col_start; h_col < h_col_end; ++h_col)
+ for (int w_col = w_col_start; w_col < w_col_end; ++w_col)
+ val += data_col[offset + h_col * coeff_h + w_col * coeff_w];
+
+ data_im[index] = val + biasvec[c];
}
- data_im[index] = val;
- }
}