From cf93df41fc4f4d81f46cb0bd54cb32b69324a7b2 Mon Sep 17 00:00:00 2001 From: YashasSamaga Date: Mon, 16 Dec 2019 15:38:12 +0530 Subject: [PATCH] enable tensor cores for fp16 convolutions --- modules/dnn/src/cuda4dnn/csl/cudnn/convolution.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/dnn/src/cuda4dnn/csl/cudnn/convolution.hpp b/modules/dnn/src/cuda4dnn/csl/cudnn/convolution.hpp index 792776e..679429b 100644 --- a/modules/dnn/src/cuda4dnn/csl/cudnn/convolution.hpp +++ b/modules/dnn/src/cuda4dnn/csl/cudnn/convolution.hpp @@ -224,6 +224,8 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { namespace cu ); } CUDA4DNN_CHECK_CUDNN(cudnnSetConvolutionGroupCount(descriptor, group_count)); + if (std::is_same::value) + CUDA4DNN_CHECK_CUDNN(cudnnSetConvolutionMathType(descriptor, CUDNN_TENSOR_OP_MATH)); } catch (...) { /* cudnnDestroyConvolutionDescriptor will not fail for a valid desriptor object */ CUDA4DNN_CHECK_CUDNN(cudnnDestroyConvolutionDescriptor(descriptor)); -- 2.7.4