From 050cca0422d461fdb0c0a1c5567eb42b8cef06c8 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Mon, 3 Mar 2014 19:04:32 +0400 Subject: [PATCH] improved performance of cv::flip (flip cols) --- modules/core/src/copy.cpp | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/modules/core/src/copy.cpp b/modules/core/src/copy.cpp index 3c051e6..2e72d0d 100644 --- a/modules/core/src/copy.cpp +++ b/modules/core/src/copy.cpp @@ -481,14 +481,13 @@ enum { FLIP_COLS = 1 << 0, FLIP_ROWS = 1 << 1, FLIP_BOTH = FLIP_ROWS | FLIP_COLS static bool ocl_flip(InputArray _src, OutputArray _dst, int flipCode ) { - int type = _src.type(), cn = CV_MAT_CN(type); + CV_Assert(flipCode >= - 1 && flipCode <= 1); + int type = _src.type(), cn = CV_MAT_CN(type), flipType; if (cn > 4 || cn == 3) return false; const char * kernelName; - int flipType; - if (flipCode == 0) kernelName = "arithm_flip_rows", flipType = FLIP_ROWS; else if (flipCode > 0) @@ -514,11 +513,16 @@ static bool ocl_flip(InputArray _src, OutputArray _dst, int flipCode ) _dst.create(size, type); UMat src = _src.getUMat(), dst = _dst.getUMat(); - cols = flipType == FLIP_COLS ? ((cols+1)/2) : cols; - rows = flipType & FLIP_ROWS ? ((rows+1)/2) : rows; + cols = flipType == FLIP_COLS ? (cols + 1) >> 1 : cols; + rows = flipType & FLIP_ROWS ? (rows + 1) >> 1 : rows; + + k.args(ocl::KernelArg::ReadOnlyNoSize(src), + ocl::KernelArg::WriteOnly(dst), rows, cols); - size_t globalsize[2] = { cols, rows }; - return k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst), rows, cols).run(2, globalsize, NULL, false); + size_t maxWorkGroupSize = ocl::Device::getDefault().maxWorkGroupSize(); + CV_Assert(maxWorkGroupSize % 4 == 0); + size_t globalsize[2] = { cols, rows }, localsize[2] = { maxWorkGroupSize / 4, 4 }; + return k.run(2, globalsize, flipType == FLIP_COLS ? localsize : NULL, false); } #endif -- 2.7.4