From 65a6674c6ed8096bde0132a13daf1055283e7677 Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Thu, 1 Feb 2018 11:35:35 +0300 Subject: [PATCH] ocl4dnnGEMV in case of row_size < 4 --- modules/dnn/src/ocl4dnn/src/math_functions.cpp | 38 ++++++++++++++------------ 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/modules/dnn/src/ocl4dnn/src/math_functions.cpp b/modules/dnn/src/ocl4dnn/src/math_functions.cpp index 5fe52ac..c52a8a9 100644 --- a/modules/dnn/src/ocl4dnn/src/math_functions.cpp +++ b/modules/dnn/src/ocl4dnn/src/math_functions.cpp @@ -451,23 +451,27 @@ bool ocl4dnnGEMV(const CBLAS_TRANSPOSE TransA, uint row_size = M; uint col_size = N; - size_t localsize[] = { 128 }; - size_t globalsize[] = { row_size / 4 * localsize[0] }; - - uint argId = 0; - k.set(argId++, ocl::KernelArg::PtrReadOnly(A)); - k.set(argId++, offA); - k.set(argId++, cl_uint(col_size)); - k.set(argId++, cl_uint(col_size%4)); - k.set(argId++, ocl::KernelArg::PtrReadOnly(x)); - k.set(argId++, offx); - k.set(argId++, alpha); - k.set(argId++, beta); - k.set(argId++, ocl::KernelArg::PtrWriteOnly(y)); - k.set(argId++, offy); - k.set(argId++, NULL, localsize[0] * sizeof(cl_float4)); - - ret = k.run(1, globalsize, localsize, false); + + if (row_size >= 4) + { + size_t localsize[] = { 128 }; + size_t globalsize[] = { row_size / 4 * localsize[0] }; + + uint argId = 0; + k.set(argId++, ocl::KernelArg::PtrReadOnly(A)); + k.set(argId++, offA); + k.set(argId++, cl_uint(col_size)); + k.set(argId++, cl_uint(col_size%4)); + k.set(argId++, ocl::KernelArg::PtrReadOnly(x)); + k.set(argId++, offx); + k.set(argId++, alpha); + k.set(argId++, beta); + k.set(argId++, ocl::KernelArg::PtrWriteOnly(y)); + k.set(argId++, offy); + k.set(argId++, NULL, localsize[0] * sizeof(cl_float4)); + + ret = k.run(1, globalsize, localsize, false); + } if ((row_size % 4) != 0 && ret) { -- 2.7.4