X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=modules%2Fdnn%2Fsrc%2Flayers%2Fconvolution_layer.cpp;h=09bdd9360117b5ab763564b43cf8ea40aafb7162;hb=613c12e59015f4bd7909916ceee195edd7ef88d0;hp=83e881381c5346e59a55b34847b30a89c425f0dd;hpb=426482e05b9db0e891539d01177dc5be120af370;p=platform%2Fupstream%2Fopencv.git diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index 83e8813..09bdd93 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -42,6 +42,7 @@ #include "../precomp.hpp" #include "layers_common.hpp" +#include "../op_cuda.hpp" #include "../op_halide.hpp" #include "../op_inf_engine.hpp" #include "../op_vkcom.hpp" @@ -55,6 +56,12 @@ using namespace cv::dnn::ocl4dnn; #endif +#ifdef HAVE_CUDA +#include "../cuda4dnn/primitives/convolution.hpp" +#include "../cuda4dnn/primitives/transpose_convolution.hpp" +using namespace cv::dnn::cuda4dnn; +#endif + namespace cv { namespace dnn @@ -241,14 +248,27 @@ public: MatShape computeColRowShape(const MatShape &inpShape, const MatShape &outShape) const CV_OVERRIDE { - Size out(outShape[3], outShape[2]); + int dims = inpShape.size(); + int inpD = dims == 5 ? inpShape[2] : 1; + int inpH = inpShape[dims - 2]; + int inpW = inpShape.back(); int inpGroupCn = blobs[0].size[1]; - int ksize = inpGroupCn * kernel.height * kernel.width; - return shape(out.area(), ksize); + int ksize = inpGroupCn * std::accumulate(kernel_size.begin(), kernel_size.end(), + 1, std::multiplies()); + return shape(inpD * inpH * inpW, ksize); } virtual bool supportBackend(int backendId) CV_OVERRIDE { + if (backendId == DNN_BACKEND_CUDA) + { + /* only convolution 2d and 3d supported */ + if(kernel_size.size() == 2 || kernel_size.size() == 3) + return true; + + return false; + } + #ifdef HAVE_INF_ENGINE if (backendId == DNN_BACKEND_INFERENCE_ENGINE) { @@ -487,8 +507,6 @@ public: return Ptr(); } - - virtual Ptr initHalide(const std::vector > &inputs) CV_OVERRIDE { #ifdef HAVE_HALIDE @@ -541,15 +559,14 @@ public: virtual Ptr initInfEngine(const std::vector > &inputs) CV_OVERRIDE { InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]); - CV_Assert(input->dims.size() == 4 || input->dims.size() == 5); - - const int inpCn = input->dims[input->dims.size() - 2]; // NOTE: input->dims are reversed (WHIO or WHDIO) + std::vector dims = input->getDims(); + CV_Assert(dims.size() == 4 || dims.size() == 5); + const int inpCn = dims[1]; const int outCn = blobs[0].size[0]; const int inpGroupCn = blobs[0].size[1]; const int group = inpCn / inpGroupCn; - - InferenceEngine::Layout layout = (input->dims.size() == 4) ? InferenceEngine::Layout::OIHW : - InferenceEngine::Layout::NCDHW; + InferenceEngine::Layout layout = (dims.size() == 4) ? InferenceEngine::Layout::OIHW : + InferenceEngine::Layout::NCDHW; auto ieWeights = wrapToInfEngineBlob(blobs[0], layout); if (fusedWeights) @@ -561,9 +578,10 @@ public: } else { - ieWeights = InferenceEngine::make_shared_blob( - InferenceEngine::Precision::FP32, layout, - ieWeights->dims()); + ieWeights = InferenceEngine::make_shared_blob({ + InferenceEngine::Precision::FP32, + ieWeights->getTensorDesc().getDims(), layout + }); ieWeights->allocate(); Mat newWeights = infEngineBlobToMat(ieWeights).reshape(1, outCn); @@ -1277,6 +1295,66 @@ public: kernel_size, strides, pads_begin, pads_end, dilations, activ.get(), ngroups, nstripes); } +#ifdef HAVE_CUDA + Ptr initCUDA( + void *context_, + const std::vector>& inputs, + const std::vector>& outputs + ) override + { + auto context = reinterpret_cast(context_); + + CV_Assert(inputs.size() == 1); + auto input_wrapper = inputs[0].dynamicCast(); + auto input_shape = input_wrapper->getShape(); + + CV_Assert(outputs.size() == 1); + auto output_wrapper = outputs[0].dynamicCast(); + auto output_shape = output_wrapper->getShape(); + + const auto output_feature_maps = blobs[0].size[0]; + const auto input_feature_maps = input_shape[1]; + const auto input_feature_maps_per_group = blobs[0].size[1]; + const auto groups = input_feature_maps / input_feature_maps_per_group; + + ConvolutionConfiguration config; + config.kernel_size.assign(std::begin(kernel_size), std::end(kernel_size)); + config.dilations.assign(std::begin(dilations), std::end(dilations)); + config.strides.assign(std::begin(strides), std::end(strides)); + + if (padMode.empty()) + { + config.padMode = ConvolutionConfiguration::PaddingMode::MANUAL; + config.pads_begin.assign(std::begin(pads_begin), std::end(pads_begin)); + config.pads_end.assign(std::begin(pads_end), std::end(pads_end)); + } + else if (padMode == "VALID") + { + config.padMode = ConvolutionConfiguration::PaddingMode::VALID; + } + else if (padMode == "SAME") + { + config.padMode = ConvolutionConfiguration::PaddingMode::SAME; + } + else + { + CV_Error(Error::StsNotImplemented, padMode + " padding mode not supported by ConvolutionLayer"); + } + + config.input_shape.assign(std::begin(input_shape), std::end(input_shape)); + config.output_shape.assign(std::begin(output_shape), std::end(output_shape)); + config.groups = groups; + + Mat filtersMat = fusedWeights ? weightsMat : blobs[0]; + Mat biasMat = (hasBias() || fusedBias) ? Mat(output_feature_maps, 1, CV_32F, biasvec.data()) : Mat(); + if (countNonZero(biasMat) == 0) + biasMat = Mat(); + + return make_cuda_node( + preferableTarget, std::move(context->stream), std::move(context->cudnn_handle), config, filtersMat, biasMat); + } +#endif + virtual int64 getFLOPS(const std::vector &inputs, const std::vector &outputs) const CV_OVERRIDE { @@ -1304,18 +1382,30 @@ public: MatShape computeColRowShape(const MatShape &inpShape, const MatShape &outShape) const CV_OVERRIDE { + int dims = inpShape.size(); int inpCn = inpShape[1]; - int inpH = inpShape[2]; - int inpW = inpShape[3]; + int inpD = dims == 5 ? inpShape[2] : 1; + int inpH = inpShape[dims - 2]; + int inpW = inpShape.back(); int outCn = outShape[1]; int ngroups = inpCn / blobs[0].size[0]; int outGroupCn = outCn / ngroups; - int ksize = outGroupCn * kernel.height * kernel.width; - return shape(ksize, inpH * inpW); + int ksize = outGroupCn * std::accumulate(kernel_size.begin(), kernel_size.end(), + 1, std::multiplies()); + return shape(ksize, inpD * inpH * inpW); } virtual bool supportBackend(int backendId) CV_OVERRIDE { + if (backendId == DNN_BACKEND_CUDA) + { + /* only deconvolution 2d and 3d supported */ + if (kernel_size.size() == 2 || kernel_size.size() == 3) + return true; + + return false; + } + #ifdef HAVE_INF_ENGINE const int outGroupCn = blobs[0].size[1]; // Weights are in IOHW or IODHW layout const int group = numOutput / outGroupCn; @@ -1365,7 +1455,8 @@ public: } else #endif // HAVE_INF_ENGINE - return kernel_size.size() == 2 && (backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE); + return backendId == DNN_BACKEND_CUDA || + (kernel_size.size() == 2 && (backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE)); } bool getMemoryShapes(const std::vector &inputs, @@ -1891,6 +1982,67 @@ public: } } +#ifdef HAVE_CUDA + Ptr initCUDA( + void *context_, + const std::vector>& inputs, + const std::vector>& outputs + ) override + { + auto context = reinterpret_cast(context_); + + CV_Assert(inputs.size() == 1); + auto input_wrapper = inputs[0].dynamicCast(); + auto input_shape = input_wrapper->getShape(); + + CV_Assert(outputs.size() == 1); + auto output_wrapper = outputs[0].dynamicCast(); + auto output_shape = output_wrapper->getShape(); + + const auto output_feature_maps = numOutput; + const auto output_feature_maps_per_group = blobs[0].size[1]; + const auto groups = output_feature_maps / output_feature_maps_per_group; + + TransposeConvolutionConfiguration config; + config.kernel_size.assign(std::begin(kernel_size), std::end(kernel_size)); + config.dilations.assign(std::begin(dilations), std::end(dilations)); + config.strides.assign(std::begin(strides), std::end(strides)); + + if (padMode.empty()) + { + config.padMode = TransposeConvolutionConfiguration::PaddingMode::MANUAL; + config.pads_begin.assign(std::begin(pads_begin), std::end(pads_begin)); + config.pads_end.assign(std::begin(pads_end), std::end(pads_end)); + } + else if (padMode == "VALID") + { + config.padMode = TransposeConvolutionConfiguration::PaddingMode::VALID; + } + else if (padMode == "SAME") + { + config.padMode = TransposeConvolutionConfiguration::PaddingMode::SAME; + } + else + { + CV_Error(Error::StsNotImplemented, padMode + " padding mode not supported by DeconvolutionLayer"); + } + + config.input_shape.assign(std::begin(input_shape), std::end(input_shape)); + config.output_shape.assign(std::begin(output_shape), std::end(output_shape)); + config.groups = groups; + + CV_Assert(blobs.size() >= 1); + Mat filtersMat = fusedWeights ? weightsMat.t() : blobs[0]; + + Mat biasMat = (hasBias() || fusedBias) ? biasesMat : Mat(); + if (countNonZero(biasMat) == 0) + biasMat = Mat(); + + return make_cuda_node( + preferableTarget, std::move(context->stream), std::move(context->cudnn_handle), config, filtersMat, biasMat); + } +#endif + virtual Ptr initHalide(const std::vector > &inputs) CV_OVERRIDE { #ifdef HAVE_HALIDE @@ -1953,9 +2105,10 @@ public: auto ieWeights = wrapToInfEngineBlob(blobs[0], layout); if (fusedWeights) { - ieWeights = InferenceEngine::make_shared_blob( - InferenceEngine::Precision::FP32, layout, - ieWeights->dims()); + ieWeights = InferenceEngine::make_shared_blob({ + InferenceEngine::Precision::FP32, + ieWeights->getTensorDesc().getDims(), layout + }); ieWeights->allocate(); int inpCn = blobs[0].size[0];