From 9c8b9fc73384ca3945962a7850eb5e58837bab80 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Fri, 27 Jun 2014 12:44:32 +0400 Subject: [PATCH] cv::transpose --- modules/core/src/matrix.cpp | 4 ++-- modules/core/src/opencl/transpose.cl | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/modules/core/src/matrix.cpp b/modules/core/src/matrix.cpp index 653efe6..c27c961 100644 --- a/modules/core/src/matrix.cpp +++ b/modules/core/src/matrix.cpp @@ -3002,8 +3002,8 @@ static bool ocl_transpose( InputArray _src, OutputArray _dst ) k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnlyNoSize(dst)); - size_t localsize[3] = { TILE_DIM, BLOCK_ROWS, 1 }; - size_t globalsize[3] = { src.cols, inplace ? src.rows : divUp(src.rows, TILE_DIM) * BLOCK_ROWS, 1 }; + size_t localsize[2] = { TILE_DIM, BLOCK_ROWS }; + size_t globalsize[2] = { src.cols, inplace ? src.rows : divUp(src.rows, TILE_DIM) * BLOCK_ROWS }; return k.run(2, globalsize, localsize, false); } diff --git a/modules/core/src/opencl/transpose.cl b/modules/core/src/opencl/transpose.cl index b5ec4b6..d56e499 100644 --- a/modules/core/src/opencl/transpose.cl +++ b/modules/core/src/opencl/transpose.cl @@ -53,7 +53,7 @@ #define TSIZE ((int)sizeof(T1)*3) #endif -#define LDS_STEP TILE_DIM +#define LDS_STEP (TILE_DIM + 1) __kernel void transpose(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols, __global uchar * dstptr, int dst_step, int dst_offset) @@ -90,6 +90,7 @@ __kernel void transpose(__global const uchar * srcptr, int src_step, int src_off { int index_src = mad24(y, src_step, mad24(x, TSIZE, src_offset)); + #pragma unroll for (int i = 0; i < TILE_DIM; i += BLOCK_ROWS) if (y + i < src_rows) { @@ -103,6 +104,7 @@ __kernel void transpose(__global const uchar * srcptr, int src_step, int src_off { int index_dst = mad24(y_index, dst_step, mad24(x_index, TSIZE, dst_offset)); + #pragma unroll for (int i = 0; i < TILE_DIM; i += BLOCK_ROWS) if ((y_index + i) < src_cols) { -- 2.7.4