result;
result.d = 0;
NAryMatIterator it(arrays, ptrs);
- int j, total = (int)it.size, blockSize = total;
- bool blockSum = depth == CV_16F || (normType == NORM_L1 && depth <= CV_16S) ||
- ((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S);
- int isum = 0;
- int *ibuf = &result.i;
- AutoBuffer<float> fltbuf_;
- float* fltbuf = 0;
- size_t esz = 0;
-
- if( blockSum )
- {
- esz = src.elemSize();
+ CV_CheckLT((size_t)it.size, (size_t)INT_MAX, "");
- if( depth == CV_16F )
- {
- blockSize = std::min(blockSize, 1024);
- fltbuf_.allocate(blockSize);
- fltbuf = fltbuf_.data();
- }
- else
+ if ((normType == NORM_L1 && depth <= CV_16S) ||
+ ((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S))
+ {
+ // special case to handle "integer" overflow in accumulator
+ const size_t esz = src.elemSize();
+ const int total = (int)it.size;
+ const int intSumBlockSize = (normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15))/cn;
+ const int blockSize = std::min(total, intSumBlockSize);
+ int isum = 0;
+ int count = 0;
+
+ for (size_t i = 0; i < it.nplanes; i++, ++it)
{
- int intSumBlockSize = (normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15))/cn;
- blockSize = std::min(blockSize, intSumBlockSize);
- ibuf = &isum;
+ for (int j = 0; j < total; j += blockSize)
+ {
+ int bsz = std::min(total - j, blockSize);
+ func(ptrs[0], ptrs[1], (uchar*)&isum, bsz, cn);
+ count += bsz;
+ if (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total))
+ {
+ result.d += isum;
+ isum = 0;
+ count = 0;
+ }
+ ptrs[0] += bsz*esz;
+ if (ptrs[1])
+ ptrs[1] += bsz;
+ }
}
}
-
- for( size_t i = 0; i < it.nplanes; i++, ++it )
++ else if (depth == CV_16F)
+ {
- for( j = 0; j < total; j += blockSize )
++ const size_t esz = src.elemSize();
++ const int total = (int)it.size;
++ const int blockSize = std::min(total, divUp(1024, cn));
++ AutoBuffer<float, 1024> fltbuf(blockSize);
++ float* data0 = fltbuf.data();
++ for (size_t i = 0; i < it.nplanes; i++, ++it)
+ {
- int bsz = std::min(total - j, blockSize);
- const uchar* data = ptrs[0];
- if( depth == CV_16F )
- {
- hal::cvt16f32f((const float16_t*)ptrs[0], fltbuf, bsz);
- data = (const uchar*)fltbuf;
- }
- func( data, ptrs[1], (uchar*)ibuf, bsz, cn );
- if( blockSum && depth != CV_16F )
++ for (int j = 0; j < total; j += blockSize)
+ {
- result.d += isum;
- isum = 0;
++ int bsz = std::min(total - j, blockSize);
++ hal::cvt16f32f((const float16_t*)ptrs[0], data0, bsz * cn);
++ func((uchar*)data0, ptrs[1], (uchar*)&result.d, bsz, cn);
++ ptrs[0] += bsz*esz;
++ if (ptrs[1])
++ ptrs[1] += bsz;
+ }
- ptrs[0] += bsz*esz;
- if( ptrs[1] )
- ptrs[1] += bsz;
++ }
++ }
+ else
+ {
+ // generic implementation
+ for (size_t i = 0; i < it.nplanes; i++, ++it)
+ {
+ func(ptrs[0], ptrs[1], (uchar*)&result, (int)it.size, cn);
}
}
if( normType == NORM_INF )
{
-- if( depth == CV_64F )
- ;
- else if( depth == CV_32F )
- result.d = result.f;
++ if(depth == CV_64F || depth == CV_16F)
+ return result.d;
- else if( depth == CV_32F )
++ else if (depth == CV_32F)
+ return result.f;
else
- result.d = result.i;
+ return result.i;
}
else if( normType == NORM_L2 )
- result.d = std::sqrt(result.d);
+ return std::sqrt(result.d);
return result.d;
}
result;
result.d = 0;
NAryMatIterator it(arrays, ptrs);
- int j, total = (int)it.size, blockSize = total;
- bool blockSum = depth == CV_16F || (normType == NORM_L1 && depth <= CV_16S) ||
- ((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S);
- unsigned isum = 0;
- unsigned *ibuf = &result.u;
- AutoBuffer<float> fltbuf_;
- float* fltbuf = 0;
- size_t esz = 0;
-
- if( blockSum )
- {
- esz = src1.elemSize();
+ CV_CheckLT((size_t)it.size, (size_t)INT_MAX, "");
- if( depth == CV_16F )
- {
- blockSize = std::min(blockSize, 1024);
- fltbuf_.allocate(blockSize*2);
- fltbuf = fltbuf_.data();
- }
- else
+ if ((normType == NORM_L1 && depth <= CV_16S) ||
+ ((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S))
+ {
+ // special case to handle "integer" overflow in accumulator
+ const size_t esz = src1.elemSize();
+ const int total = (int)it.size;
+ const int intSumBlockSize = normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15);
+ const int blockSize = std::min(total, intSumBlockSize);
+ int isum = 0;
+ int count = 0;
+
+ for (size_t i = 0; i < it.nplanes; i++, ++it)
{
- int intSumBlockSize = (normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15))/cn;
- blockSize = std::min(blockSize, intSumBlockSize);
- ibuf = &isum;
+ for (int j = 0; j < total; j += blockSize)
+ {
+ int bsz = std::min(total - j, blockSize);
+ func(ptrs[0], ptrs[1], ptrs[2], (uchar*)&isum, bsz, cn);
+ count += bsz;
+ if (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total))
+ {
+ result.d += isum;
+ isum = 0;
+ count = 0;
+ }
+ ptrs[0] += bsz*esz;
+ ptrs[1] += bsz*esz;
+ if (ptrs[2])
+ ptrs[2] += bsz;
+ }
}
}
-
- for( size_t i = 0; i < it.nplanes; i++, ++it )
++ else if (depth == CV_16F)
+ {
- for( j = 0; j < total; j += blockSize )
++ const size_t esz = src1.elemSize();
++ const int total = (int)it.size;
++ const int blockSize = std::min(total, divUp(512, cn));
++ AutoBuffer<float, 1024> fltbuf(blockSize * 2);
++ float* data0 = fltbuf.data();
++ float* data1 = fltbuf.data() + blockSize * cn;
++ for (size_t i = 0; i < it.nplanes; i++, ++it)
+ {
- int bsz = std::min(total - j, blockSize);
- const uchar *data0 = ptrs[0], *data1 = ptrs[1];
- if( depth == CV_16F )
- {
- hal::cvt16f32f((const float16_t*)ptrs[0], fltbuf, bsz);
- hal::cvt16f32f((const float16_t*)ptrs[1], fltbuf + bsz, bsz);
- data0 = (const uchar*)fltbuf;
- data1 = (const uchar*)(fltbuf + bsz);
- }
- func( data0, data1, ptrs[2], (uchar*)ibuf, bsz, cn );
- if( blockSum && depth != CV_16F )
++ for (int j = 0; j < total; j += blockSize)
+ {
- result.d += isum;
- isum = 0;
++ int bsz = std::min(total - j, blockSize);
++ hal::cvt16f32f((const float16_t*)ptrs[0], data0, bsz * cn);
++ hal::cvt16f32f((const float16_t*)ptrs[1], data1, bsz * cn);
++ func((uchar*)data0, (uchar*)data1, ptrs[2], (uchar*)&result.d, bsz, cn);
++ ptrs[0] += bsz*esz;
++ ptrs[1] += bsz*esz;
++ if (ptrs[2])
++ ptrs[2] += bsz;
+ }
- ptrs[0] += bsz*esz;
- ptrs[1] += bsz*esz;
- if( ptrs[2] )
- ptrs[2] += bsz;
++ }
++ }
+ else
+ {
+ // generic implementation
+ for (size_t i = 0; i < it.nplanes; i++, ++it)
+ {
+ func(ptrs[0], ptrs[1], ptrs[2], (uchar*)&result, (int)it.size, cn);
}
}
if( normType == NORM_INF )
{
-- if( depth == CV_64F )
- ;
- else if( depth == CV_32F )
- result.d = result.f;
++ if (depth == CV_64F || depth == CV_16F)
+ return result.d;
- else if( depth == CV_32F )
++ else if (depth == CV_32F)
+ return result.f;
else
- result.d = result.u;
+ return result.u;
}
else if( normType == NORM_L2 )
- result.d = std::sqrt(result.d);
+ return std::sqrt(result.d);
return result.d;
}
inputs[i].copyTo(outputs[i]);
}
- #ifdef HAVE_CUDA
- Ptr<BackendNode> initCUDA(
- void *context_,
- const std::vector<Ptr<BackendWrapper>>& inputs,
- const std::vector<Ptr<BackendWrapper>>& outputs
- ) override
- {
- auto context = reinterpret_cast<csl::CSLContext*>(context_);
- return make_cuda_node<cuda4dnn::ReshapeOp>(preferableTarget, std::move(context->stream));
- }
- #endif
+
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
{
return Ptr<BackendNode>(new InfEngineNgraphNode(blank));
}
#endif // HAVE_DNN_NGRAPH
++
++
++#ifdef HAVE_CUDA
++ Ptr<BackendNode> initCUDA(
++ void *context_,
++ const std::vector<Ptr<BackendWrapper>>& inputs,
++ const std::vector<Ptr<BackendWrapper>>& outputs
++ ) override
++ {
++ auto context = reinterpret_cast<csl::CSLContext*>(context_);
++ return make_cuda_node<cuda4dnn::ReshapeOp>(preferableTarget, std::move(context->stream));
++ }
++#endif
++
};
Ptr<Layer> BlankLayer::create(const LayerParams& params)
blobs[0].copyTo(outputs[0]);
}
++
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
blobs[0].data);
return Ptr<BackendNode>(new InfEngineNgraphNode(node));
}
- #endif // HAVE_DNN_IE_NN_BUILDER_2019
-#endif // HAVE_NGRAPH
++#endif // HAVE_DNN_NGRAPH
++
+
+#ifdef HAVE_CUDA
+ Ptr<BackendNode> initCUDA(
+ void *context_,
+ const std::vector<Ptr<BackendWrapper>>& inputs,
+ const std::vector<Ptr<BackendWrapper>>& outputs
+ ) override
+ {
+ auto context = reinterpret_cast<csl::CSLContext*>(context_);
+
+ CV_Assert(blobs.size() == 1);
+ return make_cuda_node<cuda4dnn::ConstOp>(preferableTarget, std::move(context->stream), blobs[0]);
+ }
+#endif
};
}
}
- #ifdef HAVE_CUDA
- Ptr<BackendNode> initCUDA(
- void *context_,
- const std::vector<Ptr<BackendWrapper>>& inputs,
- const std::vector<Ptr<BackendWrapper>>& outputs
- ) override
- {
- auto context = reinterpret_cast<csl::CSLContext*>(context_);
- return make_cuda_node<cuda4dnn::ReshapeOp>(preferableTarget, std::move(context->stream));
- }
- #endif
+
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
{
}
#endif // HAVE_DNN_IE_NN_BUILDER_2019
++
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
}
#endif // HAVE_DNN_NGRAPH
++
++#ifdef HAVE_CUDA
++ Ptr<BackendNode> initCUDA(
++ void *context_,
++ const std::vector<Ptr<BackendWrapper>>& inputs,
++ const std::vector<Ptr<BackendWrapper>>& outputs
++ ) override
++ {
++ auto context = reinterpret_cast<csl::CSLContext*>(context_);
++ return make_cuda_node<cuda4dnn::ReshapeOp>(preferableTarget, std::move(context->stream));
++ }
++#endif
++
++
int _startAxis;
int _endAxis;
};
}
}
- #ifdef HAVE_CUDA
- Ptr<BackendNode> initCUDA(
- void *context_,
- const std::vector<Ptr<BackendWrapper>>& inputs,
- const std::vector<Ptr<BackendWrapper>>& outputs
- ) override
- {
- auto context = reinterpret_cast<csl::CSLContext*>(context_);
-
- if(pnorm != 1 && pnorm != 2)
- CV_Error(Error::StsNotImplemented, "Unsupported normalization mode");
-
- auto input_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
- auto input_shape = input_wrapper->getShape();
-
- NormalizeConfiguration<float> config;
- config.input_shape.assign(std::begin(input_shape), std::end(input_shape));
- config.axis_start = clamp(startAxis, input_shape.size());
- config.axis_end = clamp(endAxis, input_shape.size()) + 1; /* +1 because NormalizeOp follows [start, end) convention */
- config.norm = pnorm;
- config.eps = epsilon;
-
- const auto& weightsMat = blobs.empty() ? Mat() : blobs[0];
- return make_cuda_node<cuda4dnn::NormalizeOp>(preferableTarget, std::move(context->stream), weightsMat, config);
- }
- #endif
+
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
{
}
#endif // HAVE_DNN_IE_NN_BUILDER_2019
++
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
}
#endif // HAVE_DNN_NGRAPH
++
++#ifdef HAVE_CUDA
++ Ptr<BackendNode> initCUDA(
++ void *context_,
++ const std::vector<Ptr<BackendWrapper>>& inputs,
++ const std::vector<Ptr<BackendWrapper>>& outputs
++ ) override
++ {
++ auto context = reinterpret_cast<csl::CSLContext*>(context_);
++
++ if(pnorm != 1 && pnorm != 2)
++ CV_Error(Error::StsNotImplemented, "Unsupported normalization mode");
++
++ auto input_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
++ auto input_shape = input_wrapper->getShape();
++
++ NormalizeConfiguration<float> config;
++ config.input_shape.assign(std::begin(input_shape), std::end(input_shape));
++ config.axis_start = clamp(startAxis, input_shape.size());
++ config.axis_end = clamp(endAxis, input_shape.size()) + 1; /* +1 because NormalizeOp follows [start, end) convention */
++ config.norm = pnorm;
++ config.eps = epsilon;
++
++ const auto& weightsMat = blobs.empty() ? Mat() : blobs[0];
++ return make_cuda_node<cuda4dnn::NormalizeOp>(preferableTarget, std::move(context->stream), weightsMat, config);
++ }
++#endif
++
++
private:
int startAxis, endAxis;
};
}
}
- #ifdef HAVE_CUDA
- Ptr<BackendNode> initCUDA(
- void *context_,
- const std::vector<Ptr<BackendWrapper>>& inputs,
- const std::vector<Ptr<BackendWrapper>>& outputs
- ) override
- {
- auto context = reinterpret_cast<csl::CSLContext*>(context_);
- return make_cuda_node<cuda4dnn::PermuteOp>(preferableTarget, std::move(context->stream), _order);
- }
- #endif
-
- virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &input) CV_OVERRIDE
- {
- #ifdef HAVE_VULKAN
- CV_Assert(!_order.empty());
- std::shared_ptr<vkcom::OpBase> op(new vkcom::OpPermute(_order));
- return Ptr<BackendNode>(new VkComBackendNode(input, op));
- #endif // HAVE_VULKAN
- return Ptr<BackendNode>();
- }
+
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
}
#endif // HAVE_DNN_IE_NN_BUILDER_2019
++
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
}
#endif // HAVE_DNN_NGRAPH
++
++#ifdef HAVE_CUDA
++ Ptr<BackendNode> initCUDA(
++ void *context_,
++ const std::vector<Ptr<BackendWrapper>>& inputs,
++ const std::vector<Ptr<BackendWrapper>>& outputs
++ ) override
++ {
++ auto context = reinterpret_cast<csl::CSLContext*>(context_);
++ return make_cuda_node<cuda4dnn::PermuteOp>(preferableTarget, std::move(context->stream), _order);
++ }
++#endif
++
++
++#ifdef HAVE_VULKAN
++ virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &input) CV_OVERRIDE
++ {
++ CV_Assert(!_order.empty());
++ std::shared_ptr<vkcom::OpBase> op(new vkcom::OpPermute(_order));
++ return Ptr<BackendNode>(new VkComBackendNode(input, op));
++ }
++#endif // HAVE_VULKAN
++
++
size_t _count;
std::vector<size_t> _order;
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
+ if (backendId == DNN_BACKEND_CUDA)
+ {
+ return type == MAX || type == AVE || type == ROI;
+ }
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
- else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
+ if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
{
if (computeMaxIdx)
return false;
{
return !computeMaxIdx && type != STOCHASTIC;
}
- else if (backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE || backendId == DNN_BACKEND_VKCOM)
- else if (backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE)
++ if (backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE || backendId == DNN_BACKEND_VKCOM)
{
if (kernel_size.size() == 3)
return (backendId == DNN_BACKEND_OPENCV && preferableTarget == DNN_TARGET_CPU);
}
}
- #ifdef HAVE_VULKAN
+#ifdef HAVE_CUDA
+ Ptr<BackendNode> initCUDA(
+ void *context_,
+ const std::vector<Ptr<BackendWrapper>>& inputs,
+ const std::vector<Ptr<BackendWrapper>>& outputs
+ ) override
+ {
+ auto context = reinterpret_cast<csl::CSLContext*>(context_);
+ if (type == ROI)
+ return make_cuda_node<cuda4dnn::ROIPoolingOp>(preferableTarget, std::move(context->stream), spatialScale);
+
+ auto input_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
+ auto input_shape = input_wrapper->getShape();
+
+ /* storing max indices is a special case and we deal with it separately */
+ if (computeMaxIdx) {
+ CV_Assert(type == MAX);
+
+ cuda4dnn::MaxPoolingConfiguration config;
+ config.window_size.assign(std::begin(kernel_size), std::end(kernel_size));
+ config.strides.assign(std::begin(strides), std::end(strides));
+
+ if (padMode.empty())
+ {
+ config.padMode = MaxPoolingConfiguration::PaddingMode::MANUAL;
+ config.pads_begin.assign(std::begin(pads_begin), std::end(pads_begin));
+ }
+ else if (padMode == "VALID")
+ {
+ config.padMode = MaxPoolingConfiguration::PaddingMode::VALID;
+ }
+ else if (padMode == "SAME")
+ {
+ config.padMode = MaxPoolingConfiguration::PaddingMode::SAME;
+ }
+ else
+ {
+ CV_Error(Error::StsNotImplemented, padMode + " padding mode not supported by PoolingLayer");
+ }
+
+ config.input_shape.assign(std::begin(input_shape), std::end(input_shape));
+
+ return make_cuda_node<cuda4dnn::MaxPoolingOp>(preferableTarget, std::move(context->stream), config);
+ }
+
+ PoolingConfiguration config;
+ if (type == MAX)
+ {
+ config.poolMode = PoolingConfiguration::PoolingMode::MAX;
+ }
+ else if (type == AVE && !avePoolPaddedArea)
+ {
+ config.poolMode = PoolingConfiguration::PoolingMode::AVERAGE_EXCLUDE_PADDING;
+ }
+ else if (type == AVE && avePoolPaddedArea)
+ {
+ config.poolMode = PoolingConfiguration::PoolingMode::AVERAGE_INCLUDE_PADDING;
+ }
+ else
+ {
+ CV_Error(Error::StsNotImplemented, "Unsupported pooling mode");
+ }
+
+ config.window_size.assign(std::begin(kernel_size), std::end(kernel_size));
+ config.strides.assign(std::begin(strides), std::end(strides));
+
+ if (padMode.empty())
+ {
+ config.padMode = PoolingConfiguration::PaddingMode::MANUAL;
+ config.pads_begin.assign(std::begin(pads_begin), std::end(pads_begin));
+ config.pads_end.assign(std::begin(pads_end), std::end(pads_end));
+ }
+ else if (padMode == "VALID")
+ {
+ config.padMode = PoolingConfiguration::PaddingMode::VALID;
+ }
+ else if (padMode == "SAME")
+ {
+ config.padMode = PoolingConfiguration::PaddingMode::SAME;
+ }
+ else
+ {
+ CV_Error(Error::StsNotImplemented, padMode + " padding mode not supported by PoolingLayer");
+ }
+
+ if (ceilMode)
+ config.roundMode = PoolingConfiguration::RoundingMode::CEIL;
+ else
+ config.roundMode = PoolingConfiguration::RoundingMode::FLOOR;
+
+ config.input_shape.assign(std::begin(input_shape), std::end(input_shape));
+
+ return make_cuda_node<cuda4dnn::PoolingOp>(preferableTarget, std::move(context->cudnn_handle), config);
+ }
+#endif
+
++
++#ifdef HAVE_VULKAN
+ virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
+ {
- #endif
- return Ptr<BackendNode>();
+ int padding_mode;
+ vkcom::PoolType pool_type;
+ int filter_size[2] = {kernel.height, kernel.width};
+ int pad_size[2] = {pad.height, pad.width};
+ int stride_size[2] = {stride.height, stride.width};
+ pool_type = type == MAX ? vkcom::kPoolTypeMax:
+ (type == AVE ? vkcom::kPoolTypeAvg:
+ vkcom::kPoolTypeNum);
+
+ if (padMode.empty())
+ {
+ padding_mode = vkcom::kPaddingModeCaffe;
+ }
+ else if (padMode == "VALID")
+ {
+ padding_mode = vkcom::kPaddingModeValid;
+ }
+ else if (padMode == "SAME")
+ {
+ padding_mode = vkcom::kPaddingModeSame;
+ }
+ else
+ CV_Error(Error::StsError, "Unsupported padding mode " + padMode);
+
+ std::shared_ptr<vkcom::OpBase> op(new vkcom::OpPool(filter_size, pad_size,
+ stride_size, padding_mode,
+ pool_type, avePoolPaddedArea));
+ return Ptr<BackendNode>(new VkComBackendNode(inputs, op));
+ }
++#endif
++
+
virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
{
if (type == MAX)
#endif // HAVE_DNN_IE_NN_BUILDER_2019
--
#ifdef HAVE_DNN_NGRAPH
--virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
-- const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
--{
-- CV_Assert_N((inputs.size() == 1 && (type == MAX || type == AVE)) || inputs.size() == 2, nodes.size() == inputs.size());
-- auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
--
-- ngraph::op::PadType pad_type = ngraph::op::PadType::EXPLICIT;
-- if (!padMode.empty())
-- pad_type = padMode == "VALID" ? ngraph::op::PadType::VALID : ngraph::op::PadType::SAME_UPPER;
--
-- auto rounding_type = ceilMode ? ngraph::op::RoundingType::CEIL : ngraph::op::RoundingType::FLOOR;
-- if (type == AVE) {
-- auto exclude_pad = !avePoolPaddedArea;
-- auto ave_pool = std::make_shared<ngraph::op::v1::AvgPool>(ieInpNode, ngraph::Strides(strides),
-- ngraph::Shape(pads_begin), ngraph::Shape(pads_end), ngraph::Shape(kernel_size),
-- exclude_pad, rounding_type, pad_type);
-- return Ptr<BackendNode>(new InfEngineNgraphNode(ave_pool));
-- }
-- else if (type == MAX) {
-- auto max_pool = std::make_shared<ngraph::op::v1::MaxPool>(ieInpNode, ngraph::Strides(strides),
-- ngraph::Shape(pads_begin), ngraph::Shape(pads_end), ngraph::Shape(kernel_size),
-- rounding_type, pad_type);
-- return Ptr<BackendNode>(new InfEngineNgraphNode(max_pool));
-- }
-- else if (type == ROI) {
-- auto& coords = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
-- auto roi = std::make_shared<ngraph::op::ROIPooling>(ieInpNode, coords,
-- ngraph::Shape{(size_t)pooledSize.height, (size_t)pooledSize.width}, spatialScale, "max");
-- return Ptr<BackendNode>(new InfEngineNgraphNode(roi));
-- }
-- else if (type == PSROI) {
-- auto& coords = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
-- auto psroi = std::make_shared<ngraph::op::PSROIPooling>(ieInpNode, coords,
-- (size_t)psRoiOutChannels, (size_t)pooledSize.width, spatialScale, 1, 1, "average");
-- return Ptr<BackendNode>(new InfEngineNgraphNode(psroi));
++ virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
++ const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
++ {
++ CV_Assert_N((inputs.size() == 1 && (type == MAX || type == AVE)) || inputs.size() == 2, nodes.size() == inputs.size());
++ auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
++
++ ngraph::op::PadType pad_type = ngraph::op::PadType::EXPLICIT;
++ if (!padMode.empty())
++ pad_type = padMode == "VALID" ? ngraph::op::PadType::VALID : ngraph::op::PadType::SAME_UPPER;
++
++ auto rounding_type = ceilMode ? ngraph::op::RoundingType::CEIL : ngraph::op::RoundingType::FLOOR;
++ if (type == AVE) {
++ auto exclude_pad = !avePoolPaddedArea;
++ auto ave_pool = std::make_shared<ngraph::op::v1::AvgPool>(ieInpNode, ngraph::Strides(strides),
++ ngraph::Shape(pads_begin), ngraph::Shape(pads_end), ngraph::Shape(kernel_size),
++ exclude_pad, rounding_type, pad_type);
++ return Ptr<BackendNode>(new InfEngineNgraphNode(ave_pool));
++ }
++ else if (type == MAX) {
++ auto max_pool = std::make_shared<ngraph::op::v1::MaxPool>(ieInpNode, ngraph::Strides(strides),
++ ngraph::Shape(pads_begin), ngraph::Shape(pads_end), ngraph::Shape(kernel_size),
++ rounding_type, pad_type);
++ return Ptr<BackendNode>(new InfEngineNgraphNode(max_pool));
++ }
++ else if (type == ROI) {
++ auto& coords = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
++ auto roi = std::make_shared<ngraph::op::ROIPooling>(ieInpNode, coords,
++ ngraph::Shape{(size_t)pooledSize.height, (size_t)pooledSize.width}, spatialScale, "max");
++ return Ptr<BackendNode>(new InfEngineNgraphNode(roi));
++ }
++ else if (type == PSROI) {
++ auto& coords = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
++ auto psroi = std::make_shared<ngraph::op::PSROIPooling>(ieInpNode, coords,
++ (size_t)psRoiOutChannels, (size_t)pooledSize.width, spatialScale, 1, 1, "average");
++ return Ptr<BackendNode>(new InfEngineNgraphNode(psroi));
++ }
++ else
++ CV_Error(Error::StsNotImplemented, "Unsupported pooling type");
}
-- else
-- CV_Error(Error::StsNotImplemented, "Unsupported pooling type");
--}
#endif // HAVE_DNN_NGRAPH
}
}
- #ifdef HAVE_CUDA
- Ptr<BackendNode> initCUDA(
- void *context_,
- const std::vector<Ptr<BackendWrapper>>& inputs,
- const std::vector<Ptr<BackendWrapper>>& outputs
- ) override
- {
- auto context = reinterpret_cast<csl::CSLContext*>(context_);
-
- auto feature_map_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
- auto feature_map_shape = feature_map_wrapper->getShape();
-
- auto image_wrapper = inputs[1].dynamicCast<CUDABackendWrapper>();
- auto image_shape = image_wrapper->getShape();
-
- PriorBoxConfiguration config;
- config.feature_map_width = feature_map_shape.rbegin()[0];
- config.feature_map_height = feature_map_shape.rbegin()[1];
- config.image_width = image_shape.rbegin()[0];
- config.image_height = image_shape.rbegin()[1];
-
- config.num_priors = _numPriors;
- config.box_widths = _boxWidths;
- config.box_heights = _boxHeights;
- config.offsets_x = _offsetsX;
- config.offsets_y = _offsetsY;
- config.stepX = _stepX;
- config.stepY = _stepY;
-
- config.variance = _variance;
-
- config.clip = _clip;
- config.normalize = _bboxesNormalized;
-
- return make_cuda_node<cuda4dnn::PriorBoxOp>(preferableTarget, std::move(context->stream), config);
- }
- #endif
-
- virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &input) CV_OVERRIDE
- {
- #ifdef HAVE_VULKAN
- std::shared_ptr<vkcom::OpBase> op(new vkcom::OpPriorBox(_stepX, _stepY,
- _clip, _numPriors,
- _variance, _offsetsX,
- _offsetsY, _boxWidths,
- _boxHeights));
- return Ptr<BackendNode>(new VkComBackendNode(input, op));
- #endif // HAVE_VULKAN
- return Ptr<BackendNode>();
- }
+
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
}
#endif // HAVE_DNN_IE_NN_BUILDER_2019
++
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs, const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
#endif // HAVE_DNN_NGRAPH
++#ifdef HAVE_CUDA
++ Ptr<BackendNode> initCUDA(
++ void *context_,
++ const std::vector<Ptr<BackendWrapper>>& inputs,
++ const std::vector<Ptr<BackendWrapper>>& outputs
++ ) override
++ {
++ auto context = reinterpret_cast<csl::CSLContext*>(context_);
++
++ auto feature_map_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
++ auto feature_map_shape = feature_map_wrapper->getShape();
++
++ auto image_wrapper = inputs[1].dynamicCast<CUDABackendWrapper>();
++ auto image_shape = image_wrapper->getShape();
++
++ PriorBoxConfiguration config;
++ config.feature_map_width = feature_map_shape.rbegin()[0];
++ config.feature_map_height = feature_map_shape.rbegin()[1];
++ config.image_width = image_shape.rbegin()[0];
++ config.image_height = image_shape.rbegin()[1];
++
++ config.num_priors = _numPriors;
++ config.box_widths = _boxWidths;
++ config.box_heights = _boxHeights;
++ config.offsets_x = _offsetsX;
++ config.offsets_y = _offsetsY;
++ config.stepX = _stepX;
++ config.stepY = _stepY;
++
++ config.variance = _variance;
++
++ config.clip = _clip;
++ config.normalize = _bboxesNormalized;
++
++ return make_cuda_node<cuda4dnn::PriorBoxOp>(preferableTarget, std::move(context->stream), config);
++ }
++#endif
++
++
++#ifdef HAVE_VULKAN
++ virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &input) CV_OVERRIDE
++ {
++ std::shared_ptr<vkcom::OpBase> op(new vkcom::OpPriorBox(_stepX, _stepY,
++ _clip, _numPriors,
++ _variance, _offsetsX,
++ _offsetsY, _boxWidths,
++ _boxHeights));
++ return Ptr<BackendNode>(new VkComBackendNode(input, op));
++ }
++#endif // HAVE_VULKAN
++
++
virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
const std::vector<MatShape> &outputs) const CV_OVERRIDE
{
permute->forward(inputs, outputs, internals_arr);
}
- #ifdef HAVE_CUDA
- Ptr<BackendNode> initCUDA(
- void *context_,
- const std::vector<Ptr<BackendWrapper>>& inputs,
- const std::vector<Ptr<BackendWrapper>>& outputs
- ) override
- {
- auto context = reinterpret_cast<csl::CSLContext*>(context_);
- return make_cuda_node<cuda4dnn::ReorgOp>(preferableTarget, std::move(context->stream), reorgStride);
- }
- #endif
+
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
}
#endif // HAVE_DNN_IE_NN_BUILDER_2019
++
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> > &inputs,
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
}
#endif // HAVE_DNN_NGRAPH
++
++#ifdef HAVE_CUDA
++ Ptr<BackendNode> initCUDA(
++ void *context_,
++ const std::vector<Ptr<BackendWrapper>>& inputs,
++ const std::vector<Ptr<BackendWrapper>>& outputs
++ ) override
++ {
++ auto context = reinterpret_cast<csl::CSLContext*>(context_);
++ return make_cuda_node<cuda4dnn::ReorgOp>(preferableTarget, std::move(context->stream), reorgStride);
++ }
++#endif
++
++
virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
const std::vector<MatShape> &outputs) const CV_OVERRIDE
{
}
}
- #ifdef HAVE_CUDA
- Ptr<BackendNode> initCUDA(
- void *context_,
- const std::vector<Ptr<BackendWrapper>>& inputs,
- const std::vector<Ptr<BackendWrapper>>& outputs
- ) override
- {
- auto context = reinterpret_cast<csl::CSLContext*>(context_);
- return make_cuda_node<cuda4dnn::ReshapeOp>(preferableTarget, std::move(context->stream));
- }
- #endif
+
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
{
}
#endif // HAVE_DNN_IE_NN_BUILDER_2019
++
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
}
#endif // HAVE_DNN_NGRAPH
++
++#ifdef HAVE_CUDA
++ Ptr<BackendNode> initCUDA(
++ void *context_,
++ const std::vector<Ptr<BackendWrapper>>& inputs,
++ const std::vector<Ptr<BackendWrapper>>& outputs
++ ) override
++ {
++ auto context = reinterpret_cast<csl::CSLContext*>(context_);
++ return make_cuda_node<cuda4dnn::ReshapeOp>(preferableTarget, std::move(context->stream));
++ }
++#endif
++
++
private:
std::vector<MatShape> outShapes;
};
CV_Error(Error::StsNotImplemented, "Unknown interpolation: " + interpolation);
}
- #ifdef HAVE_CUDA
- Ptr<BackendNode> initCUDA(
- void *context_,
- const std::vector<Ptr<BackendWrapper>>& inputs,
- const std::vector<Ptr<BackendWrapper>>& outputs
- ) override
- {
- auto context = reinterpret_cast<csl::CSLContext*>(context_);
-
- cuda4dnn::InterpolationType itype;
- if (interpolation == "nearest")
- itype = InterpolationType::NEAREST_NEIGHBOUR;
- else if (interpolation == "bilinear")
- itype = InterpolationType::BILINEAR;
- else
- CV_Error(Error::StsNotImplemented, "Requested interpolation mode is not available in resize layer.");
-
- return make_cuda_node<cuda4dnn::ResizeOp>(preferableTarget, std::move(context->stream), itype, scaleHeight, scaleWidth);
- }
- #endif
+
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
}
#endif // HAVE_DNN_NGRAPH
++
++#ifdef HAVE_CUDA
++ Ptr<BackendNode> initCUDA(
++ void *context_,
++ const std::vector<Ptr<BackendWrapper>>& inputs,
++ const std::vector<Ptr<BackendWrapper>>& outputs
++ ) override
++ {
++ auto context = reinterpret_cast<csl::CSLContext*>(context_);
++
++ cuda4dnn::InterpolationType itype;
++ if (interpolation == "nearest")
++ itype = InterpolationType::NEAREST_NEIGHBOUR;
++ else if (interpolation == "bilinear")
++ itype = InterpolationType::BILINEAR;
++ else
++ CV_Error(Error::StsNotImplemented, "Requested interpolation mode is not available in resize layer.");
++
++ return make_cuda_node<cuda4dnn::ResizeOp>(preferableTarget, std::move(context->stream), itype, scaleHeight, scaleWidth);
++ }
++#endif
++
++
protected:
int outWidth, outHeight;
const int zoomFactorWidth, zoomFactorHeight;
}
}
- #ifdef HAVE_CUDA
- Ptr<BackendNode> initCUDA(
- void *context_,
- const std::vector<Ptr<BackendWrapper>>& inputs,
- const std::vector<Ptr<BackendWrapper>>& outputs
- ) override
- {
- auto context = reinterpret_cast<csl::CSLContext*>(context_);
-
- std::vector<std::vector<std::size_t>> offsets;
- for (const auto& ranges : sliceRanges)
- {
- std::vector<std::size_t> offsets_i;
- for (const auto& range : ranges)
- offsets_i.push_back(range.start);
- offsets.push_back(std::move(offsets_i));
- }
-
- return make_cuda_node<cuda4dnn::SliceOp>(preferableTarget, std::move(context->stream), std::move(offsets));
- }
- #endif
+
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1)
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
#endif
#endif
++
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
}
#endif // HAVE_DNN_NGRAPH
++
++#ifdef HAVE_CUDA
++ Ptr<BackendNode> initCUDA(
++ void *context_,
++ const std::vector<Ptr<BackendWrapper>>& inputs,
++ const std::vector<Ptr<BackendWrapper>>& outputs
++ ) override
++ {
++ auto context = reinterpret_cast<csl::CSLContext*>(context_);
++
++ std::vector<std::vector<std::size_t>> offsets;
++ for (const auto& ranges : sliceRanges)
++ {
++ std::vector<std::size_t> offsets_i;
++ for (const auto& range : ranges)
++ offsets_i.push_back(range.start);
++ offsets.push_back(std::move(offsets_i));
++ }
++
++ return make_cuda_node<cuda4dnn::SliceOp>(preferableTarget, std::move(context->stream), std::move(offsets));
++ }
++#endif
++
};
class CropLayerImpl CV_FINAL : public SliceLayerImpl