enable tensor cores for fp16 convolutions
authorYashasSamaga <yashas_2010@yahoo.com>
Mon, 16 Dec 2019 10:08:12 +0000 (15:38 +0530)
committerYashasSamaga <yashas_2010@yahoo.com>
Mon, 16 Dec 2019 10:08:12 +0000 (15:38 +0530)
modules/dnn/src/cuda4dnn/csl/cudnn/convolution.hpp

index 792776e..679429b 100644 (file)
@@ -224,6 +224,8 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { namespace cu
                     );
                 }
                 CUDA4DNN_CHECK_CUDNN(cudnnSetConvolutionGroupCount(descriptor, group_count));
+                if (std::is_same<T, half>::value)
+                    CUDA4DNN_CHECK_CUDNN(cudnnSetConvolutionMathType(descriptor, CUDNN_TENSOR_OP_MATH));
             } catch (...) {
                 /* cudnnDestroyConvolutionDescriptor will not fail for a valid desriptor object */
                 CUDA4DNN_CHECK_CUDNN(cudnnDestroyConvolutionDescriptor(descriptor));