#include "../precomp.hpp"
#include "layers_common.hpp"
+#include "../op_cuda.hpp"
#include "../op_halide.hpp"
#include "../op_inf_engine.hpp"
+#include "../op_vkcom.hpp"
#include "opencv2/core/hal/hal.hpp"
#include "opencv2/core/hal/intrin.hpp"
#include <iostream>
using namespace cv::dnn::ocl4dnn;
#endif
+#ifdef HAVE_CUDA
+#include "../cuda4dnn/primitives/convolution.hpp"
+#include "../cuda4dnn/primitives/transpose_convolution.hpp"
+using namespace cv::dnn::cuda4dnn;
+#endif
+
namespace cv
{
namespace dnn
BaseConvolutionLayerImpl(const LayerParams ¶ms)
{
setParamsFrom(params);
- getConvolutionKernelParams(params, kernel_size, pads_begin, pads_end, strides, dilations, padMode);
+ getConvolutionKernelParams(params, kernel_size, pads_begin, pads_end, strides, dilations, padMode, adjust_pads);
numOutput = params.get<int>("num_output");
int ngroups = params.get<int>("group", 1);
pad = Size(pads_begin[1], pads_begin[0]);
dilation = Size(dilations[1], dilations[0]);
- adjust_pads.push_back(params.get<int>("adj_h", 0));
- adjust_pads.push_back(params.get<int>("adj_w", 0));
-
adjustPad.height = adjust_pads[0];
adjustPad.width = adjust_pads[1];
- CV_Assert(adjustPad.width < stride.width &&
- adjustPad.height < stride.height);
}
+
+ for (int i = 0; i < adjust_pads.size(); i++) {
+ CV_Assert(adjust_pads[i] < strides[i]);
+ }
+
fusedWeights = false;
fusedBias = false;
}
MatShape computeColRowShape(const MatShape &inpShape, const MatShape &outShape) const CV_OVERRIDE
{
- Size out(outShape[3], outShape[2]);
+ int dims = inpShape.size();
+ int inpD = dims == 5 ? inpShape[2] : 1;
+ int inpH = inpShape[dims - 2];
+ int inpW = inpShape.back();
int inpGroupCn = blobs[0].size[1];
- int ksize = inpGroupCn * kernel.height * kernel.width;
- return shape(out.area(), ksize);
+ int ksize = inpGroupCn * std::accumulate(kernel_size.begin(), kernel_size.end(),
+ 1, std::multiplies<size_t>());
+ return shape(inpD * inpH * inpW, ksize);
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
+ if (backendId == DNN_BACKEND_CUDA)
+ {
+ /* only convolution 2d and 3d supported */
+ if(kernel_size.size() == 2 || kernel_size.size() == 3)
+ return true;
+
+ return false;
+ }
+
#ifdef HAVE_INF_ENGINE
if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
{
}
else
#endif
- return (kernel_size.size() == 3 && preferableTarget == DNN_TARGET_CPU && backendId == DNN_BACKEND_OPENCV) ||
- (kernel_size.size() == 2 && (backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE));
+ {
+ if (kernel_size.size() == 3)
+ return (preferableTarget == DNN_TARGET_CPU && backendId == DNN_BACKEND_OPENCV);
+ else if (kernel_size.size() == 2)
+ return backendId == DNN_BACKEND_OPENCV ||
+ backendId == DNN_BACKEND_HALIDE ||
+ (backendId == DNN_BACKEND_VKCOM && haveVulkan());
+ else
+ return false;
+ }
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
biasvec[outCn] = biasvec[outCn+1] = biasvec[outCn-1];
}
+ virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
+ {
+#ifdef HAVE_VULKAN
+ int out_channel = blobs[0].size[0];
+ bool has_bias = hasBias() || fusedBias;
+ int filter_size[2] = {kernel.height, kernel.width};
+ int pad_size[2] = {pad.height, pad.width};
+ int stride_size[2] = {stride.height, stride.width};
+ int dilation_size[2] = {dilation.height, dilation.width};
+ int activation = 0;
+ vkcom::Tensor input_tensor = VkComTensor(inputs[0]);
+ int in_channel = input_tensor.dimSize(1);
+ int group = in_channel / blobs[0].size[1];
+
+ // TODO: support group > 1
+ if (group != 1)
+ return Ptr<BackendNode>();
+
+ int padding_mode;
+ if (padMode.empty())
+ {
+ padding_mode = vkcom::kPaddingModeCaffe;
+ }
+ else if (padMode == "VALID")
+ {
+ padding_mode = vkcom::kPaddingModeValid;
+ }
+ else if (padMode == "SAME")
+ {
+ padding_mode = vkcom::kPaddingModeSame;
+ }
+ else
+ CV_Error(Error::StsError, "Unsupported padding mode " + padMode);
+
+ std::shared_ptr<vkcom::OpBase> op(new vkcom::OpConv(out_channel, has_bias,
+ filter_size, pad_size,
+ stride_size, dilation_size,
+ activation, group,
+ padding_mode));
+
+ std::vector<Ptr<BackendWrapper> > blobsWrapper;
+
+ if (fusedWeights)
+ {
+ Mat wm;
+ weightsMat.copyTo(wm); // to handle the case of isContinuous() == false
+ wm = wm.reshape(1, blobs[0].dims, blobs[0].size);
+ blobsWrapper.push_back(Ptr<BackendWrapper>(new VkComBackendWrapper(wm)));
+ }
+ else
+ {
+ blobsWrapper.push_back(Ptr<BackendWrapper>(new VkComBackendWrapper(blobs[0])));
+ }
+
+ if (has_bias)
+ {
+ Mat biasesMat({out_channel}, CV_32F, &biasvec[0]);
+ blobsWrapper.push_back(Ptr<BackendWrapper>(new VkComBackendWrapper(biasesMat)));
+ }
+
+ return Ptr<BackendNode>(new VkComBackendNode(inputs, op, blobsWrapper));
+#endif // HAVE_VULKAN
+ return Ptr<BackendNode>();
+ }
+
virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
{
#ifdef HAVE_HALIDE
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
{
InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
- CV_Assert(input->dims.size() == 4 || input->dims.size() == 5);
-
- const int inpCn = input->dims[input->dims.size() - 2]; // NOTE: input->dims are reversed (WHIO or WHDIO)
+ std::vector<size_t> dims = input->getDims();
+ CV_Assert(dims.size() == 4 || dims.size() == 5);
+ const int inpCn = dims[1];
const int outCn = blobs[0].size[0];
const int inpGroupCn = blobs[0].size[1];
const int group = inpCn / inpGroupCn;
-
- InferenceEngine::Layout layout = (input->dims.size() == 4) ? InferenceEngine::Layout::OIHW :
- InferenceEngine::Layout::NCDHW;
+ InferenceEngine::Layout layout = (dims.size() == 4) ? InferenceEngine::Layout::OIHW :
+ InferenceEngine::Layout::NCDHW;
auto ieWeights = wrapToInfEngineBlob(blobs[0], layout);
if (fusedWeights)
}
else
{
- ieWeights = InferenceEngine::make_shared_blob<float>(
- InferenceEngine::Precision::FP32, layout,
- ieWeights->dims());
+ ieWeights = InferenceEngine::make_shared_blob<float>({
+ InferenceEngine::Precision::FP32,
+ ieWeights->getTensorDesc().getDims(), layout
+ });
ieWeights->allocate();
Mat newWeights = infEngineBlobToMat(ieWeights).reshape(1, outCn);
kernel_size, strides, pads_begin, pads_end, dilations, activ.get(), ngroups, nstripes);
}
+#ifdef HAVE_CUDA
+ Ptr<BackendNode> initCUDA(
+ void *context_,
+ const std::vector<Ptr<BackendWrapper>>& inputs,
+ const std::vector<Ptr<BackendWrapper>>& outputs
+ ) override
+ {
+ auto context = reinterpret_cast<csl::CSLContext*>(context_);
+
+ CV_Assert(inputs.size() == 1);
+ auto input_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
+ auto input_shape = input_wrapper->getShape();
+
+ CV_Assert(outputs.size() == 1);
+ auto output_wrapper = outputs[0].dynamicCast<CUDABackendWrapper>();
+ auto output_shape = output_wrapper->getShape();
+
+ const auto output_feature_maps = blobs[0].size[0];
+ const auto input_feature_maps = input_shape[1];
+ const auto input_feature_maps_per_group = blobs[0].size[1];
+ const auto groups = input_feature_maps / input_feature_maps_per_group;
+
+ ConvolutionConfiguration config;
+ config.kernel_size.assign(std::begin(kernel_size), std::end(kernel_size));
+ config.dilations.assign(std::begin(dilations), std::end(dilations));
+ config.strides.assign(std::begin(strides), std::end(strides));
+
+ if (padMode.empty())
+ {
+ config.padMode = ConvolutionConfiguration::PaddingMode::MANUAL;
+ config.pads_begin.assign(std::begin(pads_begin), std::end(pads_begin));
+ config.pads_end.assign(std::begin(pads_end), std::end(pads_end));
+ }
+ else if (padMode == "VALID")
+ {
+ config.padMode = ConvolutionConfiguration::PaddingMode::VALID;
+ }
+ else if (padMode == "SAME")
+ {
+ config.padMode = ConvolutionConfiguration::PaddingMode::SAME;
+ }
+ else
+ {
+ CV_Error(Error::StsNotImplemented, padMode + " padding mode not supported by ConvolutionLayer");
+ }
+
+ config.input_shape.assign(std::begin(input_shape), std::end(input_shape));
+ config.output_shape.assign(std::begin(output_shape), std::end(output_shape));
+ config.groups = groups;
+
+ Mat filtersMat = fusedWeights ? weightsMat : blobs[0];
+ Mat biasMat = (hasBias() || fusedBias) ? Mat(output_feature_maps, 1, CV_32F, biasvec.data()) : Mat();
+ if (countNonZero(biasMat) == 0)
+ biasMat = Mat();
+
+ return make_cuda_node<cuda4dnn::ConvolutionOp>(
+ preferableTarget, std::move(context->stream), std::move(context->cudnn_handle), config, filtersMat, biasMat);
+ }
+#endif
+
virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
const std::vector<MatShape> &outputs) const CV_OVERRIDE
{
MatShape computeColRowShape(const MatShape &inpShape, const MatShape &outShape) const CV_OVERRIDE
{
+ int dims = inpShape.size();
int inpCn = inpShape[1];
- int inpH = inpShape[2];
- int inpW = inpShape[3];
+ int inpD = dims == 5 ? inpShape[2] : 1;
+ int inpH = inpShape[dims - 2];
+ int inpW = inpShape.back();
int outCn = outShape[1];
int ngroups = inpCn / blobs[0].size[0];
int outGroupCn = outCn / ngroups;
- int ksize = outGroupCn * kernel.height * kernel.width;
- return shape(ksize, inpH * inpW);
+ int ksize = outGroupCn * std::accumulate(kernel_size.begin(), kernel_size.end(),
+ 1, std::multiplies<size_t>());
+ return shape(ksize, inpD * inpH * inpW);
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
+ if (backendId == DNN_BACKEND_CUDA)
+ {
+ /* only deconvolution 2d and 3d supported */
+ if (kernel_size.size() == 2 || kernel_size.size() == 3)
+ return true;
+
+ return false;
+ }
+
#ifdef HAVE_INF_ENGINE
- const int outGroupCn = blobs[0].size[1]; // Weights are in IOHW layout
+ const int outGroupCn = blobs[0].size[1]; // Weights are in IOHW or IODHW layout
const int group = numOutput / outGroupCn;
if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
{
- if (kernel_size.size() == 3)
- CV_Error(Error::StsNotImplemented, "Unsupported deconvolution3D layer");
+ if (kernel_size.size() == 3 && preferableTarget != DNN_TARGET_CPU) {
+ return false;
+ }
- if (adjustPad.height || adjustPad.width)
+ if (std::accumulate(adjust_pads.begin(), adjust_pads.end(), 0, std::plus<size_t>()) > 0)
{
if (padMode.empty())
{
if (preferableTarget != DNN_TARGET_CPU && group != 1)
{
- if ((adjustPad.height && pad.height) || (adjustPad.width && pad.width))
+ for (int i = 0; i < adjust_pads.size(); i++) {
+ if (adjust_pads[i] && pads_begin[i])
+ return false;
+ }
+ }
+ for (int i = 0; i < adjust_pads.size(); i++) {
+ if (pads_end[i] < adjust_pads[i])
return false;
}
- return pad.width >= adjustPad.width && pad.height >= adjustPad.height;
+ return true;
}
else if (padMode == "SAME")
{
- return kernel.width >= pad.width + 1 + adjustPad.width &&
- kernel.height >= pad.height + 1 + adjustPad.height;
+ for (int i = 0; i < adjust_pads.size(); i++) {
+ if (kernel_size[i] < pads_begin[i] + 1 + adjust_pads[i])
+ return false;
+ }
+ return true;
}
else if (padMode == "VALID")
return false;
return preferableTarget == DNN_TARGET_CPU;
}
if (preferableTarget == DNN_TARGET_OPENCL || preferableTarget == DNN_TARGET_OPENCL_FP16)
- return dilation.width == 1 && dilation.height == 1;
+ return std::accumulate(dilations.begin(), dilations.end(), 1, std::multiplies<size_t>()) == 1;
return true;
}
else
#endif // HAVE_INF_ENGINE
- return kernel_size.size() == 2 && (backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE);
+ return backendId == DNN_BACKEND_CUDA ||
+ (kernel_size.size() == 2 && (backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE));
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
}
}
+#ifdef HAVE_CUDA
+ Ptr<BackendNode> initCUDA(
+ void *context_,
+ const std::vector<Ptr<BackendWrapper>>& inputs,
+ const std::vector<Ptr<BackendWrapper>>& outputs
+ ) override
+ {
+ auto context = reinterpret_cast<csl::CSLContext*>(context_);
+
+ CV_Assert(inputs.size() == 1);
+ auto input_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
+ auto input_shape = input_wrapper->getShape();
+
+ CV_Assert(outputs.size() == 1);
+ auto output_wrapper = outputs[0].dynamicCast<CUDABackendWrapper>();
+ auto output_shape = output_wrapper->getShape();
+
+ const auto output_feature_maps = numOutput;
+ const auto output_feature_maps_per_group = blobs[0].size[1];
+ const auto groups = output_feature_maps / output_feature_maps_per_group;
+
+ TransposeConvolutionConfiguration config;
+ config.kernel_size.assign(std::begin(kernel_size), std::end(kernel_size));
+ config.dilations.assign(std::begin(dilations), std::end(dilations));
+ config.strides.assign(std::begin(strides), std::end(strides));
+
+ if (padMode.empty())
+ {
+ config.padMode = TransposeConvolutionConfiguration::PaddingMode::MANUAL;
+ config.pads_begin.assign(std::begin(pads_begin), std::end(pads_begin));
+ config.pads_end.assign(std::begin(pads_end), std::end(pads_end));
+ }
+ else if (padMode == "VALID")
+ {
+ config.padMode = TransposeConvolutionConfiguration::PaddingMode::VALID;
+ }
+ else if (padMode == "SAME")
+ {
+ config.padMode = TransposeConvolutionConfiguration::PaddingMode::SAME;
+ }
+ else
+ {
+ CV_Error(Error::StsNotImplemented, padMode + " padding mode not supported by DeconvolutionLayer");
+ }
+
+ config.input_shape.assign(std::begin(input_shape), std::end(input_shape));
+ config.output_shape.assign(std::begin(output_shape), std::end(output_shape));
+ config.groups = groups;
+
+ CV_Assert(blobs.size() >= 1);
+ Mat filtersMat = fusedWeights ? weightsMat.t() : blobs[0];
+
+ Mat biasMat = (hasBias() || fusedBias) ? biasesMat : Mat();
+ if (countNonZero(biasMat) == 0)
+ biasMat = Mat();
+
+ return make_cuda_node<cuda4dnn::TransposeConvolutionOp>(
+ preferableTarget, std::move(context->stream), std::move(context->cudnn_handle), config, filtersMat, biasMat);
+ }
+#endif
+
virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
{
#ifdef HAVE_HALIDE
#ifdef HAVE_INF_ENGINE
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> > &) CV_OVERRIDE
{
- auto ieWeights = wrapToInfEngineBlob(blobs[0], InferenceEngine::Layout::OIHW);
+ InferenceEngine::Layout layout = blobs[0].dims == 5? InferenceEngine::Layout::NCDHW :
+ InferenceEngine::Layout::OIHW;
+
+ auto ieWeights = wrapToInfEngineBlob(blobs[0], layout);
if (fusedWeights)
{
- ieWeights = InferenceEngine::make_shared_blob<float>(
- InferenceEngine::Precision::FP32, InferenceEngine::Layout::OIHW,
- ieWeights->dims());
+ ieWeights = InferenceEngine::make_shared_blob<float>({
+ InferenceEngine::Precision::FP32,
+ ieWeights->getTensorDesc().getDims(), layout
+ });
ieWeights->allocate();
int inpCn = blobs[0].size[0];
transpose(weightsMat, newWeights);
}
- const int outGroupCn = blobs[0].size[1]; // Weights are in IOHW layout
+ const int outGroupCn = blobs[0].size[1]; // Weights are in IOHW or OIDHW layout
const int group = numOutput / outGroupCn;
InferenceEngine::Builder::DeconvolutionLayer ieLayer(name);
if (padMode.empty())
{
- ieLayer.setPaddingsEnd({pads_end[0] - adjust_pads[0], pads_end[1] - adjust_pads[1]});
+ std::vector<size_t> paddings_end;
+ for (int i = 0; i < pads_end.size(); i++) {
+ paddings_end.push_back(pads_end[i] - adjust_pads[i]);
+ }
+ ieLayer.setPaddingsEnd(paddings_end);
}
else if (padMode == "SAME")
{
- ieLayer.setPaddingsEnd({kernel_size[0] - pads_begin[0] - 1 - adjust_pads[0],
- kernel_size[1] - pads_begin[1] - 1 - adjust_pads[1]});
+ std::vector<size_t> paddings_end;
+ for (int i = 0; i < pads_begin.size(); i++) {
+ paddings_end.push_back(kernel_size[i] - pads_begin[i] - 1 - adjust_pads[i]);
+ }
+ ieLayer.setPaddingsEnd(paddings_end);
}
ieLayer.setGroup((size_t)group);
ieLayer.setOutDepth((size_t)numOutput);
float flops = 0;
int outChannels = blobs[0].size[0];
+ size_t karea = std::accumulate(kernel_size.begin(), kernel_size.end(),
+ 1, std::multiplies<size_t>());
for (int i = 0; i < inputs.size(); i++)
{
- flops += CV_BIG_INT(2)*outChannels*kernel.area()*total(inputs[i]);
+ flops += CV_BIG_INT(2)*outChannels*karea*total(inputs[i]);
}
return flops;