From febe528363bf158500c4ef355927934822d530d1 Mon Sep 17 00:00:00 2001 From: Alexander Karsakov Date: Mon, 16 Dec 2013 12:55:14 +0400 Subject: [PATCH] Added ocl::pyrUp to T-API --- modules/imgproc/src/opencl/pyr_down.cl | 1021 ++-------------------------- modules/imgproc/src/opencl/pyr_up.cl | 159 +++++ modules/imgproc/src/pyramids.cpp | 82 ++- modules/imgproc/test/ocl/test_pyramids.cpp | 89 ++- 4 files changed, 325 insertions(+), 1026 deletions(-) create mode 100644 modules/imgproc/src/opencl/pyr_up.cl diff --git a/modules/imgproc/src/opencl/pyr_down.cl b/modules/imgproc/src/opencl/pyr_down.cl index 015c0b0..745813f 100644 --- a/modules/imgproc/src/opencl/pyr_down.cl +++ b/modules/imgproc/src/opencl/pyr_down.cl @@ -43,10 +43,12 @@ // //M*/ -#if cn == 4 -#define FT float4 -#else -#define FT float +#ifdef DOUBLE_SUPPORT +#ifdef cl_amd_fp64 +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#elif defined (cl_khr_fp64) +#pragma OPENCL EXTENSION cl_khr_fp64:enable +#endif #endif #define noconvert @@ -81,266 +83,32 @@ inline int idx_col(int x, int last_col) return idx_col_low(idx_col_high(x, last_col), last_col); } -__kernel void pyrDown(__global const uchar * srcData, int src_step, int src_offset, int src_rows, int src_cols, +__kernel void pyrDown(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols, __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols) { const int x = get_global_id(0); const int y = get_group_id(1); __local FT smem[256 + 4]; + __global T * dstData = (__global T *)(dst + dst_offset); + __global const uchar * srcData = (__global const uchar*)(src + src_offset); FT sum; - FT co1 = 0.375f; FT co2 = 0.25f; FT co3 = 0.0625f; const int src_y = 2*y; - const int last_row = srcRows - 1; - const int last_col = srcCols - 1; - - if (src_y >= 2 && src_y < srcRows - 2 && x >= 2 && x < srcCols - 2) - { - sum = co3 * convertToFT(((__global T*)(srcData + (src_y - 2) * srcStep / cn))[x]); - sum = sum + co2 * convertToFT(((__global T*)(srcData + (src_y - 1) * srcStep / cn))[x]); - sum = sum + co1 * convertToFT(((__global T*)(srcData + (src_y ) * srcStep / cn))[x]); - sum = sum + co2 * convertToFT(((__global T*)(srcData + (src_y + 1) * srcStep / cn))[x]); - sum = sum + co3 * convertToFT(((__global T*)(srcData + (src_y + 2) * srcStep / cn))[x]); - - smem[2 + get_local_id(0)] = sum; - - if (get_local_id(0) < 2) - { - const int left_x = x - 2; - - sum = co3 * convertToFT(((__global T*)(srcData + (src_y - 2) * srcStep / cn))[left_x]); - sum = sum + co2 * convertToFT(((__global T*)(srcData + (src_y - 1) * srcStep / cn))[left_x]); - sum = sum + co1 * convertToFT(((__global T*)(srcData + (src_y ) * srcStep / cn))[left_x]); - sum = sum + co2 * convertToFT(((__global T*)(srcData + (src_y + 1) * srcStep / cn))[left_x]); - sum = sum + co3 * convertToFT(((__global T*)(srcData + (src_y + 2) * srcStep / cn))[left_x]); - - smem[get_local_id(0)] = sum; - } - - if (get_local_id(0) > 253) - { - const int right_x = x + 2; - - sum = co3 * convertToFT(((__global T*)(srcData + (src_y - 2) * srcStep / cn))[right_x]); - sum = sum + co2 * convertToFT(((__global T*)(srcData + (src_y - 1) * srcStep / cn))[right_x]); - sum = sum + co1 * convertToFT(((__global T*)(srcData + (src_y ) * srcStep / cn))[right_x]); - sum = sum + co2 * convertToFT(((__global T*)(srcData + (src_y + 1) * srcStep / cn))[right_x]); - sum = sum + co3 * convertToFT(((__global T*)(srcData + (src_y + 2) * srcStep / cn))[right_x]); - - smem[4 + get_local_id(0)] = sum; - } - } - else - { - int col = idx_col(x, last_col); - - sum = co3 * convertToFT(((__global T*)(srcData + idx_row(src_y - 2, last_row) * srcStep / cn))[col]); - sum = sum + co2 * convertToFT(((__global T*)(srcData + idx_row(src_y - 1, last_row) * srcStep / cn))[col]); - sum = sum + co1 * convertToFT(((__global T*)(srcData + idx_row(src_y , last_row) * srcStep / cn))[col]); - sum = sum + co2 * convertToFT(((__global T*)(srcData + idx_row(src_y + 1, last_row) * srcStep / cn))[col]); - sum = sum + co3 * convertToFT(((__global T*)(srcData + idx_row(src_y + 2, last_row) * srcStep / cn))[col]); - - smem[2 + get_local_id(0)] = sum; - - if (get_local_id(0) < 2) - { - const int left_x = x - 2; - - col = idx_col(left_x, last_col); - - sum = co3 * convertToFT(((__global T*)(srcData + idx_row(src_y - 2, last_row) * srcStep / cn))[col]); - sum = sum + co2 * convertToFT(((__global T*)(srcData + idx_row(src_y - 1, last_row) * srcStep / cn))[col]); - sum = sum + co1 * convertToFT(((__global T*)(srcData + idx_row(src_y , last_row) * srcStep / cn))[col]); - sum = sum + co2 * convertToFT(((__global T*)(srcData + idx_row(src_y + 1, last_row) * srcStep / cn))[col]); - sum = sum + co3 * convertToFT(((__global T*)(srcData + idx_row(src_y + 2, last_row) * srcStep / cn))[col]); - - smem[get_local_id(0)] = sum; - } - - if (get_local_id(0) > 253) - { - const int right_x = x + 2; - - col = idx_col(right_x, last_col); - - sum = co3 * convertToFT(((__global T*)(srcData + idx_row(src_y - 2, last_row) * srcStep / cn))[col]); - sum = sum + co2 * convertToFT(((__global T*)(srcData + idx_row(src_y - 1, last_row) * srcStep / cn))[col]); - sum = sum + co1 * convertToFT(((__global T*)(srcData + idx_row(src_y , last_row) * srcStep / cn))[col]); - sum = sum + co2 * convertToFT(((__global T*)(srcData + idx_row(src_y + 1, last_row) * srcStep / cn))[col]); - sum = sum + co3 * convertToFT(((__global T*)(srcData + idx_row(src_y + 2, last_row) * srcStep / cn))[col]); - - smem[4 + get_local_id(0)] = sum; - } - } - - barrier(CLK_LOCAL_MEM_FENCE); - - if (get_local_id(0) < 128) - { - const int tid2 = get_local_id(0) * 2; - - sum = co3 * smem[2 + tid2 - 2]; - sum = sum + co2 * smem[2 + tid2 - 1]; - sum = sum + co1 * smem[2 + tid2 ]; - sum = sum + co2 * smem[2 + tid2 + 1]; - sum = sum + co3 * smem[2 + tid2 + 2]; - - const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2; - - if (dst_x < dstCols) - dst[y * dstStep + dst_x] = convertToT(sum); - } - -} - -/////////////////////////////////////////////////////////////////////// -////////////////////////// CV_8UC1 /////////////////////////////////// -/////////////////////////////////////////////////////////////////////// - -__kernel void pyrDown_C1_D0(__global uchar * srcData, int srcStep, int srcRows, int srcCols, __global uchar *dst, int dstStep, int dstCols) -{ - const int x = get_global_id(0); - const int y = get_group_id(1); - - __local float smem[256 + 4]; - - float sum; - - const int src_y = 2*y; - const int last_row = srcRows - 1; - const int last_col = srcCols - 1; - - if (src_y >= 2 && src_y < srcRows - 2 && x >= 2 && x < srcCols - 2) - { - sum = 0.0625f * (((srcData + (src_y - 2) * srcStep))[x]); - sum = sum + 0.25f * (((srcData + (src_y - 1) * srcStep))[x]); - sum = sum + 0.375f * (((srcData + (src_y ) * srcStep))[x]); - sum = sum + 0.25f * (((srcData + (src_y + 1) * srcStep))[x]); - sum = sum + 0.0625f * (((srcData + (src_y + 2) * srcStep))[x]); - - smem[2 + get_local_id(0)] = sum; - - if (get_local_id(0) < 2) - { - const int left_x = x - 2; - - sum = 0.0625f * (((srcData + (src_y - 2) * srcStep))[left_x]); - sum = sum + 0.25f * (((srcData + (src_y - 1) * srcStep))[left_x]); - sum = sum + 0.375f * (((srcData + (src_y ) * srcStep))[left_x]); - sum = sum + 0.25f * (((srcData + (src_y + 1) * srcStep))[left_x]); - sum = sum + 0.0625f * (((srcData + (src_y + 2) * srcStep))[left_x]); - - smem[get_local_id(0)] = sum; - } - - if (get_local_id(0) > 253) - { - const int right_x = x + 2; - - sum = 0.0625f * (((srcData + (src_y - 2) * srcStep))[right_x]); - sum = sum + 0.25f * (((srcData + (src_y - 1) * srcStep))[right_x]); - sum = sum + 0.375f * (((srcData + (src_y ) * srcStep))[right_x]); - sum = sum + 0.25f * (((srcData + (src_y + 1) * srcStep))[right_x]); - sum = sum + 0.0625f * (((srcData + (src_y + 2) * srcStep))[right_x]); - - smem[4 + get_local_id(0)] = sum; - } - } - else - { - int col = idx_col(x, last_col); - - sum = 0.0625f * (((srcData + idx_row(src_y - 2, last_row) * srcStep))[col]); - sum = sum + 0.25f * (((srcData + idx_row(src_y - 1, last_row) * srcStep))[col]); - sum = sum + 0.375f * (((srcData + idx_row(src_y , last_row) * srcStep))[col]); - sum = sum + 0.25f * (((srcData + idx_row(src_y + 1, last_row) * srcStep))[col]); - sum = sum + 0.0625f * (((srcData + idx_row(src_y + 2, last_row) * srcStep))[col]); - - smem[2 + get_local_id(0)] = sum; - - if (get_local_id(0) < 2) - { - const int left_x = x - 2; - - col = idx_col(left_x, last_col); - - sum = 0.0625f * (((srcData + idx_row(src_y - 2, last_row) * srcStep))[col]); - sum = sum + 0.25f * (((srcData + idx_row(src_y - 1, last_row) * srcStep))[col]); - sum = sum + 0.375f * (((srcData + idx_row(src_y , last_row) * srcStep))[col]); - sum = sum + 0.25f * (((srcData + idx_row(src_y + 1, last_row) * srcStep))[col]); - sum = sum + 0.0625f * (((srcData + idx_row(src_y + 2, last_row) * srcStep))[col]); - - smem[get_local_id(0)] = sum; - } - - if (get_local_id(0) > 253) - { - const int right_x = x + 2; - - col = idx_col(right_x, last_col); - - sum = 0.0625f * (((srcData + idx_row(src_y - 2, last_row) * srcStep))[col]); - sum = sum + 0.25f * (((srcData + idx_row(src_y - 1, last_row) * srcStep))[col]); - sum = sum + 0.375f * (((srcData + idx_row(src_y , last_row) * srcStep))[col]); - sum = sum + 0.25f * (((srcData + idx_row(src_y + 1, last_row) * srcStep))[col]); - sum = sum + 0.0625f * (((srcData + idx_row(src_y + 2, last_row) * srcStep))[col]); - - smem[4 + get_local_id(0)] = sum; - } - } - - barrier(CLK_LOCAL_MEM_FENCE); - - if (get_local_id(0) < 128) - { - const int tid2 = get_local_id(0) * 2; - - sum = 0.0625f * smem[2 + tid2 - 2]; - sum = sum + 0.25f * smem[2 + tid2 - 1]; - sum = sum + 0.375f * smem[2 + tid2 ]; - sum = sum + 0.25f * smem[2 + tid2 + 1]; - sum = sum + 0.0625f * smem[2 + tid2 + 2]; - - const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2; - - if (dst_x < dstCols) - dst[y * dstStep + dst_x] = convert_uchar_sat_rte(sum); - } -} - -/////////////////////////////////////////////////////////////////////// -////////////////////////// CV_8UC4 /////////////////////////////////// -/////////////////////////////////////////////////////////////////////// - -__kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcRows, int srcCols, __global uchar4 *dst, int dstStep, int dstCols) -{ - const int x = get_global_id(0); - const int y = get_group_id(1); - - __local float4 smem[256 + 4]; - - float4 sum; - - const int src_y = 2*y; - const int last_row = srcRows - 1; - const int last_col = srcCols - 1; + const int last_row = src_rows - 1; + const int last_col = src_cols - 1; - float4 co1 = 0.375f; - float4 co2 = 0.25f; - float4 co3 = 0.0625f; - - if (src_y >= 2 && src_y < srcRows - 2 && x >= 2 && x < srcCols - 2) + if (src_y >= 2 && src_y < src_rows - 2 && x >= 2 && x < src_cols - 2) { - sum = co3 * convert_float4((((srcData + (src_y - 2) * srcStep / 4))[x])); - sum = sum + co2 * convert_float4((((srcData + (src_y - 1) * srcStep / 4))[x])); - sum = sum + co1 * convert_float4((((srcData + (src_y ) * srcStep / 4))[x])); - sum = sum + co2 * convert_float4((((srcData + (src_y + 1) * srcStep / 4))[x])); - sum = sum + co3 * convert_float4((((srcData + (src_y + 2) * srcStep / 4))[x])); + sum = co3 * convertToFT(((__global T*)(srcData + (src_y - 2) * src_step))[x]); + sum = sum + co2 * convertToFT(((__global T*)(srcData + (src_y - 1) * src_step))[x]); + sum = sum + co1 * convertToFT(((__global T*)(srcData + (src_y ) * src_step))[x]); + sum = sum + co2 * convertToFT(((__global T*)(srcData + (src_y + 1) * src_step))[x]); + sum = sum + co3 * convertToFT(((__global T*)(srcData + (src_y + 2) * src_step))[x]); smem[2 + get_local_id(0)] = sum; @@ -348,11 +116,11 @@ __kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcRows, { const int left_x = x - 2; - sum = co3 * convert_float4((((srcData + (src_y - 2) * srcStep / 4))[left_x])); - sum = sum + co2 * convert_float4((((srcData + (src_y - 1) * srcStep / 4))[left_x])); - sum = sum + co1 * convert_float4((((srcData + (src_y ) * srcStep / 4))[left_x])); - sum = sum + co2 * convert_float4((((srcData + (src_y + 1) * srcStep / 4))[left_x])); - sum = sum + co3 * convert_float4((((srcData + (src_y + 2) * srcStep / 4))[left_x])); + sum = co3 * convertToFT(((__global T*)(srcData + (src_y - 2) * src_step))[left_x]); + sum = sum + co2 * convertToFT(((__global T*)(srcData + (src_y - 1) * src_step))[left_x]); + sum = sum + co1 * convertToFT(((__global T*)(srcData + (src_y ) * src_step))[left_x]); + sum = sum + co2 * convertToFT(((__global T*)(srcData + (src_y + 1) * src_step))[left_x]); + sum = sum + co3 * convertToFT(((__global T*)(srcData + (src_y + 2) * src_step))[left_x]); smem[get_local_id(0)] = sum; } @@ -361,11 +129,11 @@ __kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcRows, { const int right_x = x + 2; - sum = co3 * convert_float4((((srcData + (src_y - 2) * srcStep / 4))[right_x])); - sum = sum + co2 * convert_float4((((srcData + (src_y - 1) * srcStep / 4))[right_x])); - sum = sum + co1 * convert_float4((((srcData + (src_y ) * srcStep / 4))[right_x])); - sum = sum + co2 * convert_float4((((srcData + (src_y + 1) * srcStep / 4))[right_x])); - sum = sum + co3 * convert_float4((((srcData + (src_y + 2) * srcStep / 4))[right_x])); + sum = co3 * convertToFT(((__global T*)(srcData + (src_y - 2) * src_step))[right_x]); + sum = sum + co2 * convertToFT(((__global T*)(srcData + (src_y - 1) * src_step))[right_x]); + sum = sum + co1 * convertToFT(((__global T*)(srcData + (src_y ) * src_step))[right_x]); + sum = sum + co2 * convertToFT(((__global T*)(srcData + (src_y + 1) * src_step))[right_x]); + sum = sum + co3 * convertToFT(((__global T*)(srcData + (src_y + 2) * src_step))[right_x]); smem[4 + get_local_id(0)] = sum; } @@ -374,11 +142,11 @@ __kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcRows, { int col = idx_col(x, last_col); - sum = co3 * convert_float4((((srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[col])); - sum = sum + co2 * convert_float4((((srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col])); - sum = sum + co1 * convert_float4((((srcData + idx_row(src_y , last_row) * srcStep / 4))[col])); - sum = sum + co2 * convert_float4((((srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col])); - sum = sum + co3 * convert_float4((((srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[col])); + sum = co3 * convertToFT(((__global T*)(srcData + idx_row(src_y - 2, last_row) * src_step))[col]); + sum = sum + co2 * convertToFT(((__global T*)(srcData + idx_row(src_y - 1, last_row) * src_step))[col]); + sum = sum + co1 * convertToFT(((__global T*)(srcData + idx_row(src_y , last_row) * src_step))[col]); + sum = sum + co2 * convertToFT(((__global T*)(srcData + idx_row(src_y + 1, last_row) * src_step))[col]); + sum = sum + co3 * convertToFT(((__global T*)(srcData + idx_row(src_y + 2, last_row) * src_step))[col]); smem[2 + get_local_id(0)] = sum; @@ -388,11 +156,11 @@ __kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcRows, col = idx_col(left_x, last_col); - sum = co3 * convert_float4((((srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[col])); - sum = sum + co2 * convert_float4((((srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col])); - sum = sum + co1 * convert_float4((((srcData + idx_row(src_y , last_row) * srcStep / 4))[col])); - sum = sum + co2 * convert_float4((((srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col])); - sum = sum + co3 * convert_float4((((srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[col])); + sum = co3 * convertToFT(((__global T*)(srcData + idx_row(src_y - 2, last_row) * src_step))[col]); + sum = sum + co2 * convertToFT(((__global T*)(srcData + idx_row(src_y - 1, last_row) * src_step))[col]); + sum = sum + co1 * convertToFT(((__global T*)(srcData + idx_row(src_y , last_row) * src_step))[col]); + sum = sum + co2 * convertToFT(((__global T*)(srcData + idx_row(src_y + 1, last_row) * src_step))[col]); + sum = sum + co3 * convertToFT(((__global T*)(srcData + idx_row(src_y + 2, last_row) * src_step))[col]); smem[get_local_id(0)] = sum; } @@ -403,11 +171,11 @@ __kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcRows, col = idx_col(right_x, last_col); - sum = co3 * convert_float4((((srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[col])); - sum = sum + co2 * convert_float4((((srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col])); - sum = sum + co1 * convert_float4((((srcData + idx_row(src_y , last_row) * srcStep / 4))[col])); - sum = sum + co2 * convert_float4((((srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col])); - sum = sum + co3 * convert_float4((((srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[col])); + sum = co3 * convertToFT(((__global T*)(srcData + idx_row(src_y - 2, last_row) * src_step))[col]); + sum = sum + co2 * convertToFT(((__global T*)(srcData + idx_row(src_y - 1, last_row) * src_step))[col]); + sum = sum + co1 * convertToFT(((__global T*)(srcData + idx_row(src_y , last_row) * src_step))[col]); + sum = sum + co2 * convertToFT(((__global T*)(srcData + idx_row(src_y + 1, last_row) * src_step))[col]); + sum = sum + co3 * convertToFT(((__global T*)(srcData + idx_row(src_y + 2, last_row) * src_step))[col]); smem[4 + get_local_id(0)] = sum; } @@ -427,709 +195,8 @@ __kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcRows, const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2; - if (dst_x < dstCols) - dst[y * dstStep / 4 + dst_x] = convert_uchar4_sat_rte(sum); - } -} - -/////////////////////////////////////////////////////////////////////// -////////////////////////// CV_16UC1 ////////////////////////////////// -/////////////////////////////////////////////////////////////////////// - -__kernel void pyrDown_C1_D2(__global ushort * srcData, int srcStep, int srcRows, int srcCols, __global ushort *dst, int dstStep, int dstCols) -{ - const int x = get_global_id(0); - const int y = get_group_id(1); - - __local float smem[256 + 4]; - - float sum; - - const int src_y = 2*y; - const int last_row = srcRows - 1; - const int last_col = srcCols - 1; - - if (src_y >= 2 && src_y < srcRows - 2 && x >= 2 && x < srcCols - 2) - { - sum = 0.0625f * ((__global ushort*)((__global char*)srcData + (src_y - 2) * srcStep))[x]; - sum = sum + 0.25f * ((__global ushort*)((__global char*)srcData + (src_y - 1) * srcStep))[x]; - sum = sum + 0.375f * ((__global ushort*)((__global char*)srcData + (src_y ) * srcStep))[x]; - sum = sum + 0.25f * ((__global ushort*)((__global char*)srcData + (src_y + 1) * srcStep))[x]; - sum = sum + 0.0625f * ((__global ushort*)((__global char*)srcData + (src_y + 2) * srcStep))[x]; - - smem[2 + get_local_id(0)] = sum; - - if (get_local_id(0) < 2) - { - const int left_x = x - 2; - - sum = 0.0625f * ((__global ushort*)((__global char*)srcData + (src_y - 2) * srcStep))[left_x]; - sum = sum + 0.25f * ((__global ushort*)((__global char*)srcData + (src_y - 1) * srcStep))[left_x]; - sum = sum + 0.375f * ((__global ushort*)((__global char*)srcData + (src_y ) * srcStep))[left_x]; - sum = sum + 0.25f * ((__global ushort*)((__global char*)srcData + (src_y + 1) * srcStep))[left_x]; - sum = sum + 0.0625f * ((__global ushort*)((__global char*)srcData + (src_y + 2) * srcStep))[left_x]; - - smem[get_local_id(0)] = sum; - } - - if (get_local_id(0) > 253) - { - const int right_x = x + 2; - - sum = 0.0625f * ((__global ushort*)((__global char*)srcData + (src_y - 2) * srcStep))[right_x]; - sum = sum + 0.25f * ((__global ushort*)((__global char*)srcData + (src_y - 1) * srcStep))[right_x]; - sum = sum + 0.375f * ((__global ushort*)((__global char*)srcData + (src_y ) * srcStep))[right_x]; - sum = sum + 0.25f * ((__global ushort*)((__global char*)srcData + (src_y + 1) * srcStep))[right_x]; - sum = sum + 0.0625f * ((__global ushort*)((__global char*)srcData + (src_y + 2) * srcStep))[right_x]; - - smem[4 + get_local_id(0)] = sum; - } - } - else - { - int col = idx_col(x, last_col); - - sum = 0.0625f * ((__global ushort*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[col]; - sum = sum + 0.25f * ((__global ushort*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[col]; - sum = sum + 0.375f * ((__global ushort*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[col]; - sum = sum + 0.25f * ((__global ushort*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[col]; - sum = sum + 0.0625f * ((__global ushort*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[col]; - - smem[2 + get_local_id(0)] = sum; - - if (get_local_id(0) < 2) - { - const int left_x = x - 2; - - col = idx_col(left_x, last_col); - - sum = 0.0625f * ((__global ushort*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[col]; - sum = sum + 0.25f * ((__global ushort*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[col]; - sum = sum + 0.375f * ((__global ushort*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[col]; - sum = sum + 0.25f * ((__global ushort*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[col]; - sum = sum + 0.0625f * ((__global ushort*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[col]; - - smem[get_local_id(0)] = sum; - } - - if (get_local_id(0) > 253) - { - const int right_x = x + 2; - - col = idx_col(right_x, last_col); - - sum = 0.0625f * ((__global ushort*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[col]; - sum = sum + 0.25f * ((__global ushort*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[col]; - sum = sum + 0.375f * ((__global ushort*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[col]; - sum = sum + 0.25f * ((__global ushort*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[col]; - sum = sum + 0.0625f * ((__global ushort*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[col]; - - smem[4 + get_local_id(0)] = sum; - } - } - - barrier(CLK_LOCAL_MEM_FENCE); - - if (get_local_id(0) < 128) - { - const int tid2 = get_local_id(0) * 2; - - sum = 0.0625f * smem[2 + tid2 - 2]; - sum = sum + 0.25f * smem[2 + tid2 - 1]; - sum = sum + 0.375f * smem[2 + tid2 ]; - sum = sum + 0.25f * smem[2 + tid2 + 1]; - sum = sum + 0.0625f * smem[2 + tid2 + 2]; - - const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2; - - if (dst_x < dstCols) - dst[y * dstStep / 2 + dst_x] = convert_ushort_sat_rte(sum); - } -} - -/////////////////////////////////////////////////////////////////////// -////////////////////////// CV_16UC4 ////////////////////////////////// -/////////////////////////////////////////////////////////////////////// - -__kernel void pyrDown_C4_D2(__global ushort4 * srcData, int srcStep, int srcRows, int srcCols, __global ushort4 *dst, int dstStep, int dstCols) -{ - const int x = get_global_id(0); - const int y = get_group_id(1); - - __local float4 smem[256 + 4]; - - float4 sum; - - const int src_y = 2*y; - const int last_row = srcRows - 1; - const int last_col = srcCols - 1; - - float4 co1 = 0.375f; - float4 co2 = 0.25f; - float4 co3 = 0.0625f; - - if (src_y >= 2 && src_y < srcRows - 2 && x >= 2 && x < srcCols - 2) - { - sum = co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y - 2) * srcStep / 4))[x]); - sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y - 1) * srcStep / 4))[x]); - sum = sum + co1 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y ) * srcStep / 4))[x]); - sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y + 1) * srcStep / 4))[x]); - sum = sum + co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y + 2) * srcStep / 4))[x]); - - smem[2 + get_local_id(0)] = sum; - - if (get_local_id(0) < 2) - { - const int left_x = x - 2; - - sum = co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y - 2) * srcStep / 4))[left_x]); - sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y - 1) * srcStep / 4))[left_x]); - sum = sum + co1 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y ) * srcStep / 4))[left_x]); - sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y + 1) * srcStep / 4))[left_x]); - sum = sum + co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y + 2) * srcStep / 4))[left_x]); - - smem[get_local_id(0)] = sum; - } - - if (get_local_id(0) > 253) - { - const int right_x = x + 2; - - sum = co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y - 2) * srcStep / 4))[right_x]); - sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y - 1) * srcStep / 4))[right_x]); - sum = sum + co1 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y ) * srcStep / 4))[right_x]); - sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y + 1) * srcStep / 4))[right_x]); - sum = sum + co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + (src_y + 2) * srcStep / 4))[right_x]); - - smem[4 + get_local_id(0)] = sum; - } + if (dst_x < dst_cols) + dstData[y * dst_step / ((int)sizeof(T)) + dst_x] = convertToT(sum); } - else - { - int col = idx_col(x, last_col); - - sum = co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[col]); - sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col]); - sum = sum + co1 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[col]); - sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col]); - sum = sum + co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[col]); - smem[2 + get_local_id(0)] = sum; - - if (get_local_id(0) < 2) - { - const int left_x = x - 2; - - col = idx_col(left_x, last_col); - - sum = co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[col]); - sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col]); - sum = sum + co1 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[col]); - sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col]); - sum = sum + co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[col]); - - smem[get_local_id(0)] = sum; - } - - if (get_local_id(0) > 253) - { - const int right_x = x + 2; - - col = idx_col(right_x, last_col); - - sum = co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[col]); - sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col]); - sum = sum + co1 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[col]); - sum = sum + co2 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col]); - sum = sum + co3 * convert_float4(((__global ushort4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[col]); - - smem[4 + get_local_id(0)] = sum; - } - } - - barrier(CLK_LOCAL_MEM_FENCE); - - if (get_local_id(0) < 128) - { - const int tid2 = get_local_id(0) * 2; - - sum = co3 * smem[2 + tid2 - 2]; - sum = sum + co2 * smem[2 + tid2 - 1]; - sum = sum + co1 * smem[2 + tid2 ]; - sum = sum + co2 * smem[2 + tid2 + 1]; - sum = sum + co3 * smem[2 + tid2 + 2]; - - const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2; - - if (dst_x < dstCols) - dst[y * dstStep / 8 + dst_x] = convert_ushort4_sat_rte(sum); - } -} - -/////////////////////////////////////////////////////////////////////// -////////////////////////// CV_16SC1 ////////////////////////////////// -/////////////////////////////////////////////////////////////////////// - -__kernel void pyrDown_C1_D3(__global short * srcData, int srcStep, int srcRows, int srcCols, __global short *dst, int dstStep, int dstCols) -{ - const int x = get_global_id(0); - const int y = get_group_id(1); - - __local float smem[256 + 4]; - - float sum; - - const int src_y = 2*y; - const int last_row = srcRows - 1; - const int last_col = srcCols - 1; - - if (src_y >= 2 && src_y < srcRows - 2 && x >= 2 && x < srcCols - 2) - { - sum = 0.0625f * ((__global short*)((__global char*)srcData + (src_y - 2) * srcStep))[x]; - sum = sum + 0.25f * ((__global short*)((__global char*)srcData + (src_y - 1) * srcStep))[x]; - sum = sum + 0.375f * ((__global short*)((__global char*)srcData + (src_y ) * srcStep))[x]; - sum = sum + 0.25f * ((__global short*)((__global char*)srcData + (src_y + 1) * srcStep))[x]; - sum = sum + 0.0625f * ((__global short*)((__global char*)srcData + (src_y + 2) * srcStep))[x]; - - smem[2 + get_local_id(0)] = sum; - - if (get_local_id(0) < 2) - { - const int left_x = x - 2; - - sum = 0.0625f * ((__global short*)((__global char*)srcData + (src_y - 2) * srcStep))[left_x]; - sum = sum + 0.25f * ((__global short*)((__global char*)srcData + (src_y - 1) * srcStep))[left_x]; - sum = sum + 0.375f * ((__global short*)((__global char*)srcData + (src_y ) * srcStep))[left_x]; - sum = sum + 0.25f * ((__global short*)((__global char*)srcData + (src_y + 1) * srcStep))[left_x]; - sum = sum + 0.0625f * ((__global short*)((__global char*)srcData + (src_y + 2) * srcStep))[left_x]; - - smem[get_local_id(0)] = sum; - } - - if (get_local_id(0) > 253) - { - const int right_x = x + 2; - - sum = 0.0625f * ((__global short*)((__global char*)srcData + (src_y - 2) * srcStep))[right_x]; - sum = sum + 0.25f * ((__global short*)((__global char*)srcData + (src_y - 1) * srcStep))[right_x]; - sum = sum + 0.375f * ((__global short*)((__global char*)srcData + (src_y ) * srcStep))[right_x]; - sum = sum + 0.25f * ((__global short*)((__global char*)srcData + (src_y + 1) * srcStep))[right_x]; - sum = sum + 0.0625f * ((__global short*)((__global char*)srcData + (src_y + 2) * srcStep))[right_x]; - - smem[4 + get_local_id(0)] = sum; - } - } - else - { - int col = idx_col(x, last_col); - - sum = 0.0625f * ((__global short*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[col]; - sum = sum + 0.25f * ((__global short*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[col]; - sum = sum + 0.375f * ((__global short*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[col]; - sum = sum + 0.25f * ((__global short*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[col]; - sum = sum + 0.0625f * ((__global short*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[col]; - - smem[2 + get_local_id(0)] = sum; - - if (get_local_id(0) < 2) - { - const int left_x = x - 2; - - col = idx_col(left_x, last_col); - - sum = 0.0625f * ((__global short*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[col]; - sum = sum + 0.25f * ((__global short*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[col]; - sum = sum + 0.375f * ((__global short*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[col]; - sum = sum + 0.25f * ((__global short*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[col]; - sum = sum + 0.0625f * ((__global short*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[col]; - - smem[get_local_id(0)] = sum; - } - - if (get_local_id(0) > 253) - { - const int right_x = x + 2; - - col = idx_col(right_x, last_col); - - sum = 0.0625f * ((__global short*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[col]; - sum = sum + 0.25f * ((__global short*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[col]; - sum = sum + 0.375f * ((__global short*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[col]; - sum = sum + 0.25f * ((__global short*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[col]; - sum = sum + 0.0625f * ((__global short*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[col]; - - smem[4 + get_local_id(0)] = sum; - } - } - - barrier(CLK_LOCAL_MEM_FENCE); - - if (get_local_id(0) < 128) - { - const int tid2 = get_local_id(0) * 2; - - sum = 0.0625f * smem[2 + tid2 - 2]; - sum = sum + 0.25f * smem[2 + tid2 - 1]; - sum = sum + 0.375f * smem[2 + tid2 ]; - sum = sum + 0.25f * smem[2 + tid2 + 1]; - sum = sum + 0.0625f * smem[2 + tid2 + 2]; - - const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2; - - if (dst_x < dstCols) - dst[y * dstStep / 2 + dst_x] = convert_short_sat_rte(sum); - } -} - -/////////////////////////////////////////////////////////////////////// -////////////////////////// CV_16SC4 ////////////////////////////////// -/////////////////////////////////////////////////////////////////////// - -__kernel void pyrDown_C4_D3(__global short4 * srcData, int srcStep, int srcRows, int srcCols, __global short4 *dst, int dstStep, int dstCols) -{ - const int x = get_global_id(0); - const int y = get_group_id(1); - - __local float4 smem[256 + 4]; - - float4 sum; - - const int src_y = 2*y; - const int last_row = srcRows - 1; - const int last_col = srcCols - 1; - - float4 co1 = 0.375f; - float4 co2 = 0.25f; - float4 co3 = 0.0625f; - - if (src_y >= 2 && src_y < srcRows - 2 && x >= 2 && x < srcCols - 2) - { - sum = co3 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y - 2) * srcStep / 4))[x]); - sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y - 1) * srcStep / 4))[x]); - sum = sum + co1 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y ) * srcStep / 4))[x]); - sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y + 1) * srcStep / 4))[x]); - sum = sum + co3 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y + 2) * srcStep / 4))[x]); - - smem[2 + get_local_id(0)] = sum; - - if (get_local_id(0) < 2) - { - const int left_x = x - 2; - - sum = co3 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y - 2) * srcStep / 4))[left_x]); - sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y - 1) * srcStep / 4))[left_x]); - sum = sum + co1 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y ) * srcStep / 4))[left_x]); - sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y + 1) * srcStep / 4))[left_x]); - sum = sum + co3 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y + 2) * srcStep / 4))[left_x]); - - smem[get_local_id(0)] = sum; - } - - if (get_local_id(0) > 253) - { - const int right_x = x + 2; - - sum = co3 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y - 2) * srcStep / 4))[right_x]); - sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y - 1) * srcStep / 4))[right_x]); - sum = sum + co1 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y ) * srcStep / 4))[right_x]); - sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y + 1) * srcStep / 4))[right_x]); - sum = sum + co3 * convert_float4(((__global short4*)((__global char4*)srcData + (src_y + 2) * srcStep / 4))[right_x]); - - smem[4 + get_local_id(0)] = sum; - } - } - else - { - int col = idx_col(x, last_col); - - sum = co3 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[col]); - sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col]); - sum = sum + co1 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[col]); - sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col]); - sum = sum + co3 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[col]); - - smem[2 + get_local_id(0)] = sum; - - if (get_local_id(0) < 2) - { - const int left_x = x - 2; - - col = idx_col(left_x, last_col); - - sum = co3 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[col]); - sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col]); - sum = sum + co1 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[col]); - sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col]); - sum = sum + co3 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[col]); - - smem[get_local_id(0)] = sum; - } - - if (get_local_id(0) > 253) - { - const int right_x = x + 2; - - col = idx_col(right_x, last_col); - - sum = co3 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[col]); - sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col]); - sum = sum + co1 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[col]); - sum = sum + co2 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col]); - sum = sum + co3 * convert_float4(((__global short4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[col]); - - smem[4 + get_local_id(0)] = sum; - } - } - - barrier(CLK_LOCAL_MEM_FENCE); - - if (get_local_id(0) < 128) - { - const int tid2 = get_local_id(0) * 2; - - sum = co3 * smem[2 + tid2 - 2]; - sum = sum + co2 * smem[2 + tid2 - 1]; - sum = sum + co1 * smem[2 + tid2 ]; - sum = sum + co2 * smem[2 + tid2 + 1]; - sum = sum + co3 * smem[2 + tid2 + 2]; - - const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2; - - if (dst_x < dstCols) - dst[y * dstStep / 8 + dst_x] = convert_short4_sat_rte(sum); - } -} - -/////////////////////////////////////////////////////////////////////// -////////////////////////// CV_32FC1 ////////////////////////////////// -/////////////////////////////////////////////////////////////////////// - -__kernel void pyrDown_C1_D5(__global float * srcData, int srcStep, int srcRows, int srcCols, __global float *dst, int dstStep, int dstCols) -{ - const int x = get_global_id(0); - const int y = get_group_id(1); - - __local float smem[256 + 4]; - - float sum; - - const int src_y = 2*y; - const int last_row = srcRows - 1; - const int last_col = srcCols - 1; - - if (src_y >= 2 && src_y < srcRows - 2 && x >= 2 && x < srcCols - 2) - { - sum = 0.0625f * ((__global float*)((__global char*)srcData + (src_y - 2) * srcStep))[x]; - sum = sum + 0.25f * ((__global float*)((__global char*)srcData + (src_y - 1) * srcStep))[x]; - sum = sum + 0.375f * ((__global float*)((__global char*)srcData + (src_y ) * srcStep))[x]; - sum = sum + 0.25f * ((__global float*)((__global char*)srcData + (src_y + 1) * srcStep))[x]; - sum = sum + 0.0625f * ((__global float*)((__global char*)srcData + (src_y + 2) * srcStep))[x]; - - smem[2 + get_local_id(0)] = sum; - - if (get_local_id(0) < 2) - { - const int left_x = x - 2; - - sum = 0.0625f * ((__global float*)((__global char*)srcData + (src_y - 2) * srcStep))[left_x]; - sum = sum + 0.25f * ((__global float*)((__global char*)srcData + (src_y - 1) * srcStep))[left_x]; - sum = sum + 0.375f * ((__global float*)((__global char*)srcData + (src_y ) * srcStep))[left_x]; - sum = sum + 0.25f * ((__global float*)((__global char*)srcData + (src_y + 1) * srcStep))[left_x]; - sum = sum + 0.0625f * ((__global float*)((__global char*)srcData + (src_y + 2) * srcStep))[left_x]; - - smem[get_local_id(0)] = sum; - } - - if (get_local_id(0) > 253) - { - const int right_x = x + 2; - - sum = 0.0625f * ((__global float*)((__global char*)srcData + (src_y - 2) * srcStep))[right_x]; - sum = sum + 0.25f * ((__global float*)((__global char*)srcData + (src_y - 1) * srcStep))[right_x]; - sum = sum + 0.375f * ((__global float*)((__global char*)srcData + (src_y ) * srcStep))[right_x]; - sum = sum + 0.25f * ((__global float*)((__global char*)srcData + (src_y + 1) * srcStep))[right_x]; - sum = sum + 0.0625f * ((__global float*)((__global char*)srcData + (src_y + 2) * srcStep))[right_x]; - - smem[4 + get_local_id(0)] = sum; - } - } - else - { - int col = idx_col(x, last_col); - - sum = 0.0625f * ((__global float*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[col]; - sum = sum + 0.25f * ((__global float*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[col]; - sum = sum + 0.375f * ((__global float*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[col]; - sum = sum + 0.25f * ((__global float*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[col]; - sum = sum + 0.0625f * ((__global float*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[col]; - - smem[2 + get_local_id(0)] = sum; - - if (get_local_id(0) < 2) - { - const int left_x = x - 2; - - col = idx_col(left_x, last_col); - - sum = 0.0625f * ((__global float*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[col]; - sum = sum + 0.25f * ((__global float*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[col]; - sum = sum + 0.375f * ((__global float*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[col]; - sum = sum + 0.25f * ((__global float*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[col]; - sum = sum + 0.0625f * ((__global float*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[col]; - - smem[get_local_id(0)] = sum; - } - - if (get_local_id(0) > 253) - { - const int right_x = x + 2; - - col = idx_col(right_x, last_col); - - sum = 0.0625f * ((__global float*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[col]; - sum = sum + 0.25f * ((__global float*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[col]; - sum = sum + 0.375f * ((__global float*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[col]; - sum = sum + 0.25f * ((__global float*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[col]; - sum = sum + 0.0625f * ((__global float*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[col]; - - smem[4 + get_local_id(0)] = sum; - } - } - - barrier(CLK_LOCAL_MEM_FENCE); - - if (get_local_id(0) < 128) - { - const int tid2 = get_local_id(0) * 2; - - sum = 0.0625f * smem[2 + tid2 - 2]; - sum = sum + 0.25f * smem[2 + tid2 - 1]; - sum = sum + 0.375f * smem[2 + tid2 ]; - sum = sum + 0.25f * smem[2 + tid2 + 1]; - sum = sum + 0.0625f * smem[2 + tid2 + 2]; - - const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2; - - if (dst_x < dstCols) - dst[y * dstStep / 4 + dst_x] = sum; - } -} - -/////////////////////////////////////////////////////////////////////// -////////////////////////// CV_32FC4 ////////////////////////////////// -/////////////////////////////////////////////////////////////////////// - -__kernel void pyrDown_C4_D5(__global float4 * srcData, int srcStep, int srcRows, int srcCols, __global float4 *dst, int dstStep, int dstCols) -{ - const int x = get_global_id(0); - const int y = get_group_id(1); - - __local float4 smem[256 + 4]; - - float4 sum; - - const int src_y = 2*y; - const int last_row = srcRows - 1; - const int last_col = srcCols - 1; - - float4 co1 = 0.375f; - float4 co2 = 0.25f; - float4 co3 = 0.0625f; - - if (src_y >= 2 && src_y < srcRows - 2 && x >= 2 && x < srcCols - 2) - { - sum = co3 * ((__global float4*)((__global char4*)srcData + (src_y - 2) * srcStep / 4))[x]; - sum = sum + co2 * ((__global float4*)((__global char4*)srcData + (src_y - 1) * srcStep / 4))[x]; - sum = sum + co1 * ((__global float4*)((__global char4*)srcData + (src_y ) * srcStep / 4))[x]; - sum = sum + co2 * ((__global float4*)((__global char4*)srcData + (src_y + 1) * srcStep / 4))[x]; - sum = sum + co3 * ((__global float4*)((__global char4*)srcData + (src_y + 2) * srcStep / 4))[x]; - - smem[2 + get_local_id(0)] = sum; - - if (get_local_id(0) < 2) - { - const int left_x = x - 2; - - sum = co3 * ((__global float4*)((__global char4*)srcData + (src_y - 2) * srcStep / 4))[left_x]; - sum = sum + co2 * ((__global float4*)((__global char4*)srcData + (src_y - 1) * srcStep / 4))[left_x]; - sum = sum + co1 * ((__global float4*)((__global char4*)srcData + (src_y ) * srcStep / 4))[left_x]; - sum = sum + co2 * ((__global float4*)((__global char4*)srcData + (src_y + 1) * srcStep / 4))[left_x]; - sum = sum + co3 * ((__global float4*)((__global char4*)srcData + (src_y + 2) * srcStep / 4))[left_x]; - - smem[get_local_id(0)] = sum; - } - - if (get_local_id(0) > 253) - { - const int right_x = x + 2; - - sum = co3 * ((__global float4*)((__global char4*)srcData + (src_y - 2) * srcStep / 4))[right_x]; - sum = sum + co2 * ((__global float4*)((__global char4*)srcData + (src_y - 1) * srcStep / 4))[right_x]; - sum = sum + co1 * ((__global float4*)((__global char4*)srcData + (src_y ) * srcStep / 4))[right_x]; - sum = sum + co2 * ((__global float4*)((__global char4*)srcData + (src_y + 1) * srcStep / 4))[right_x]; - sum = sum + co3 * ((__global float4*)((__global char4*)srcData + (src_y + 2) * srcStep / 4))[right_x]; - - smem[4 + get_local_id(0)] = sum; - } - } - else - { - int col = idx_col(x, last_col); - - sum = co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[col]; - sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col]; - sum = sum + co1 * ((__global float4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[col]; - sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col]; - sum = sum + co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[col]; - - smem[2 + get_local_id(0)] = sum; - - if (get_local_id(0) < 2) - { - const int left_x = x - 2; - - col = idx_col(left_x, last_col); - - sum = co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[col]; - sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col]; - sum = sum + co1 * ((__global float4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[col]; - sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col]; - sum = sum + co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[col]; - - smem[get_local_id(0)] = sum; - } - - if (get_local_id(0) > 253) - { - const int right_x = x + 2; - - col = idx_col(right_x, last_col); - - sum = co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[col]; - sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[col]; - sum = sum + co1 * ((__global float4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[col]; - sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[col]; - sum = sum + co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[col]; - - smem[4 + get_local_id(0)] = sum; - } - } - - barrier(CLK_LOCAL_MEM_FENCE); - - if (get_local_id(0) < 128) - { - const int tid2 = get_local_id(0) * 2; - - sum = co3 * smem[2 + tid2 - 2]; - sum = sum + co2 * smem[2 + tid2 - 1]; - sum = sum + co1 * smem[2 + tid2 ]; - sum = sum + co2 * smem[2 + tid2 + 1]; - sum = sum + co3 * smem[2 + tid2 + 2]; - - const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2; - - if (dst_x < dstCols) - dst[y * dstStep / 16 + dst_x] = sum; - } } diff --git a/modules/imgproc/src/opencl/pyr_up.cl b/modules/imgproc/src/opencl/pyr_up.cl new file mode 100644 index 0000000..b8cf6ab --- /dev/null +++ b/modules/imgproc/src/opencl/pyr_up.cl @@ -0,0 +1,159 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Zhang Chunpeng chunpeng@multicorewareinc.com +// Dachuan Zhao, dachuan@multicorewareinc.com +// Yao Wang, yao@multicorewareinc.com +// Peng Xiao, pengxiao@outlook.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +/////////////////////////////////////////////////////////////////////// +//////////////////////// Generic PyrUp ////////////////////////////// +/////////////////////////////////////////////////////////////////////// + +#ifdef DOUBLE_SUPPORT +#ifdef cl_amd_fp64 +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#elif defined (cl_khr_fp64) +#pragma OPENCL EXTENSION cl_khr_fp64:enable +#endif +#endif + +#define noconvert + + +__kernel void pyrUp(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols, + __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols) +{ + const int x = get_global_id(0); + const int y = get_global_id(1); + + const int lsizex = get_local_size(0); + const int lsizey = get_local_size(1); + + const int tidx = get_local_id(0); + const int tidy = get_local_id(1); + + __local FT s_srcPatch[10][10]; + __local FT s_dstPatch[20][16]; + + __global T * dstData = (__global T *)(dst + dst_offset); + __global const T * srcData = (__global const T *)(src + src_offset); + + if( tidx < 10 && tidy < 10 ) + { + int srcx = mad24((int)get_group_id(0), lsizex>>1, tidx) - 1; + int srcy = mad24((int)get_group_id(1), lsizey>>1, tidy) - 1; + + srcx = abs(srcx); + srcx = min(src_cols - 1, srcx); + + srcy = abs(srcy); + srcy = min(src_rows - 1, srcy); + + s_srcPatch[tidy][tidx] = convertToFT(srcData[srcx + srcy * src_step / (int) sizeof(T)]); + } + + barrier(CLK_LOCAL_MEM_FENCE); + + FT sum = 0.f; + const FT evenFlag = (FT)((tidx & 1) == 0); + const FT oddFlag = (FT)((tidx & 1) != 0); + const bool eveny = ((tidy & 1) == 0); + + const FT co1 = 0.375f; + const FT co2 = 0.25f; + const FT co3 = 0.0625f; + + if(eveny) + { + sum = ( evenFlag* co3 ) * s_srcPatch[1 + (tidy >> 1)][1 + ((tidx - 2) >> 1)]; + sum = sum + ( oddFlag * co2 ) * s_srcPatch[1 + (tidy >> 1)][1 + ((tidx - 1) >> 1)]; + sum = sum + ( evenFlag* co1 ) * s_srcPatch[1 + (tidy >> 1)][1 + ((tidx ) >> 1)]; + sum = sum + ( oddFlag * co2 ) * s_srcPatch[1 + (tidy >> 1)][1 + ((tidx + 1) >> 1)]; + sum = sum + ( evenFlag* co3 ) * s_srcPatch[1 + (tidy >> 1)][1 + ((tidx + 2) >> 1)]; + } + + s_dstPatch[2 + tidy][tidx] = sum; + + if (tidy < 2) + { + sum = 0; + + if (eveny) + { + sum = (evenFlag * co3 ) * s_srcPatch[lsizey-16][1 + ((tidx - 2) >> 1)]; + sum = sum + ( oddFlag * co2 ) * s_srcPatch[lsizey-16][1 + ((tidx - 1) >> 1)]; + sum = sum + (evenFlag * co1 ) * s_srcPatch[lsizey-16][1 + ((tidx ) >> 1)]; + sum = sum + ( oddFlag * co2 ) * s_srcPatch[lsizey-16][1 + ((tidx + 1) >> 1)]; + sum = sum + (evenFlag * co3 ) * s_srcPatch[lsizey-16][1 + ((tidx + 2) >> 1)]; + } + + s_dstPatch[tidy][tidx] = sum; + } + + if (tidy > 13) + { + sum = 0; + + if (eveny) + { + sum = (evenFlag * co3) * s_srcPatch[lsizey-7][1 + ((tidx - 2) >> 1)]; + sum = sum + ( oddFlag * co2) * s_srcPatch[lsizey-7][1 + ((tidx - 1) >> 1)]; + sum = sum + (evenFlag * co1) * s_srcPatch[lsizey-7][1 + ((tidx ) >> 1)]; + sum = sum + ( oddFlag * co2) * s_srcPatch[lsizey-7][1 + ((tidx + 1) >> 1)]; + sum = sum + (evenFlag * co3) * s_srcPatch[lsizey-7][1 + ((tidx + 2) >> 1)]; + } + s_dstPatch[4 + tidy][tidx] = sum; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + sum = co3 * s_dstPatch[2 + tidy - 2][tidx]; + sum = sum + co2 * s_dstPatch[2 + tidy - 1][tidx]; + sum = sum + co1 * s_dstPatch[2 + tidy ][tidx]; + sum = sum + co2 * s_dstPatch[2 + tidy + 1][tidx]; + sum = sum + co3 * s_dstPatch[2 + tidy + 2][tidx]; + + if ((x < dst_cols) && (y < dst_rows)) + dstData[x + y * dst_step / (int)sizeof(T)] = convertToT(4.0f * sum); +} diff --git a/modules/imgproc/src/pyramids.cpp b/modules/imgproc/src/pyramids.cpp index 45f7a7c..6802e9e 100644 --- a/modules/imgproc/src/pyramids.cpp +++ b/modules/imgproc/src/pyramids.cpp @@ -401,23 +401,25 @@ pyrUp_( const Mat& _src, Mat& _dst, int) typedef void (*PyrFunc)(const Mat&, Mat&, int); -} - -namespace cv -{ - static bool ocl_pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType) { int type = _src.type(), depth = CV_MAT_DEPTH(type), channels = CV_MAT_CN(type); - if (depth != CV_8U && depth != CV_16U && depth != CV_16S && depth != CV_32F) + if (((channels != 1) && (channels != 2) && (channels != 4)) + || (borderType != BORDER_DEFAULT)) return false; - if (channels != 1 && channels != 3 && channels != 4) + + bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; + if ((depth == CV_64F) && !(doubleSupport)) return false; - double doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; + + Size ssize = _src.size(); + Size dsize = _dsz.area() == 0 ? Size((ssize.width + 1) / 2, (ssize.height + 1) / 2) : _dsz; + CV_Assert( ssize.width > 0 && ssize.height > 0 && + std::abs(dsize.width*2 - ssize.width) <= 2 && + std::abs(dsize.height*2 - ssize.height) <= 2 ); UMat src = _src.getUMat(); - Size dsize = cv::Size((src.rows + 1) / 2, (src.cols + 1) / 2); _dst.create( dsize, src.type() ); UMat dst = _dst.getUMat(); @@ -425,31 +427,73 @@ static bool ocl_pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, in ocl::ProgramSource2 program = ocl::imgproc::pyr_down_oclsrc; ocl::Kernel k; + int float_depth = depth == CV_64F ? CV_64F : CV_32F; char cvt[2][50]; k.create(kernelName, program, - format("-D T=%s -D cn=%d -D convertToT=%s%s -D convertToFT=%s%s", - ocl::typeToStr(type), channels, - ocl::convertTypeStr(CV_32F, depth, channels, cvt[0]), - ocl::convertTypeStr(depth, CV_32F, channels, cvt[1]), + format("-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s", + ocl::typeToStr(type), ocl::typeToStr(CV_MAKETYPE(float_depth, channels)), + ocl::convertTypeStr(float_depth, depth, channels, cvt[0]), + ocl::convertTypeStr(depth, float_depth, channels, cvt[1]), doubleSupport ? " -D DOUBLE_SUPPORT" : "")); k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst)); - + size_t localThreads[2] = { 256, 1 }; size_t globalThreads[2] = { src.cols, dst.rows }; return k.run(2, globalThreads, localThreads, false); } +static bool ocl_pyrUp( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType) +{ + int type = _src.type(), depth = CV_MAT_DEPTH(type), channels = CV_MAT_CN(type); + + if (((channels != 1) && (channels != 2) && (channels != 4)) + || (borderType != BORDER_DEFAULT)) + return false; + + bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; + if ((depth == CV_64F) && !(doubleSupport)) + return false; + + Size ssize = _src.size(); + if ((_dsz.area() != 0) && (_dsz != Size(ssize.width * 2, ssize.height * 2))) + return false; + + UMat src = _src.getUMat(); + Size dsize = Size(ssize.width * 2, ssize.height * 2); + _dst.create( dsize, src.type() ); + UMat dst = _dst.getUMat(); + + const char * const kernelName = "pyrUp"; + ocl::ProgramSource2 program = ocl::imgproc::pyr_up_oclsrc; + ocl::Kernel k; + + int float_depth = depth == CV_64F ? CV_64F : CV_32F; + char cvt[2][50]; + k.create(kernelName, program, + format("-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s", + ocl::typeToStr(type), ocl::typeToStr(CV_MAKETYPE(float_depth, channels)), + ocl::convertTypeStr(float_depth, depth, channels, cvt[0]), + ocl::convertTypeStr(depth, float_depth, channels, cvt[1]), + doubleSupport ? " -D DOUBLE_SUPPORT" : "")); + + k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst)); + size_t globalThreads[2] = {dst.cols, dst.rows}; + size_t localThreads[2] = {16, 16}; + + return k.run(2, globalThreads, localThreads, false); +} + } void cv::pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType ) { if (ocl::useOpenCL() && _dst.isUMat() && ocl_pyrDown(_src, _dst, _dsz, borderType)) - return; + return; Mat src = _src.getMat(); - Size dsz = _dsz == Size() ? Size((src.cols + 1)/2, (src.rows + 1)/2) : _dsz; + Size dsz = _dsz.area() == 0 ? Size((src.cols + 1)/2, (src.rows + 1)/2) : _dsz; _dst.create( dsz, src.type() ); Mat dst = _dst.getMat(); @@ -478,8 +522,12 @@ void cv::pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, int borde void cv::pyrUp( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType ) { + if (ocl::useOpenCL() && _dst.isUMat() && + ocl_pyrUp(_src, _dst, _dsz, borderType)) + return; + Mat src = _src.getMat(); - Size dsz = _dsz == Size() ? Size(src.cols*2, src.rows*2) : _dsz; + Size dsz = _dsz.area() == 0 ? Size(src.cols*2, src.rows*2) : _dsz; _dst.create( dsz, src.type() ); Mat dst = _dst.getMat(); diff --git a/modules/imgproc/test/ocl/test_pyramids.cpp b/modules/imgproc/test/ocl/test_pyramids.cpp index 2d861b6..3dd7a41 100644 --- a/modules/imgproc/test/ocl/test_pyramids.cpp +++ b/modules/imgproc/test/ocl/test_pyramids.cpp @@ -45,73 +45,98 @@ #include "test_precomp.hpp" -#include +#include "opencv2/ts/ocl_test.hpp" #ifdef HAVE_OPENCL -using namespace cv; -using namespace testing; -using namespace std; +namespace cvtest { +namespace ocl { -PARAM_TEST_CASE(PyrBase, MatDepth, Channels) +PARAM_TEST_CASE(PyrTestBase, MatDepth, Channels, bool) { - int depth; - int channels; + int depth, channels; + bool use_roi; - Mat dst_cpu; - ocl::oclMat gdst; + TEST_DECLARE_INPUT_PARAMETER(src) + TEST_DECLARE_OUTPUT_PARAMETER(dst) virtual void SetUp() { depth = GET_PARAM(0); channels = GET_PARAM(1); + use_roi = GET_PARAM(2); + } + + void generateTestData(Size src_roiSize, Size dst_roiSize) + { + Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0); + randomSubMat(src, src_roi, src_roiSize, srcBorder, CV_MAKETYPE(depth, channels), -MAX_VALUE, MAX_VALUE); + + Border dstBorder = randomBorder(0, use_roi ? MAX_VALUE : 0); + randomSubMat(dst, dst_roi, dst_roiSize, dstBorder, CV_MAKETYPE(depth, channels), -MAX_VALUE, MAX_VALUE); + + UMAT_UPLOAD_INPUT_PARAMETER(src) + UMAT_UPLOAD_OUTPUT_PARAMETER(dst) + } + + void Near(double threshold = 0.0) + { + OCL_EXPECT_MATS_NEAR(dst, threshold); } }; /////////////////////// PyrDown ////////////////////////// -typedef PyrBase PyrDown; +typedef PyrTestBase PyrDown; OCL_TEST_P(PyrDown, Mat) { - for (int j = 0; j < LOOP_TIMES; j++) + for (int j = 0; j < test_loop_times; j++) { - Size size(MWIDTH, MHEIGHT); - Mat src = randomMat(size, CV_MAKETYPE(depth, channels), 0, 255); - ocl::oclMat gsrc(src); + Size src_roiSize = randomSize(1, MAX_VALUE); + Size dst_roiSize = Size(randomInt((src_roiSize.width - 1) / 2, (src_roiSize.width + 3) / 2), + randomInt((src_roiSize.height - 1) / 2, (src_roiSize.height + 3) / 2)); + dst_roiSize = dst_roiSize.area() == 0 ? Size((src_roiSize.width + 1) / 2, (src_roiSize.height + 1) / 2) : dst_roiSize; + generateTestData(src_roiSize, dst_roiSize); - pyrDown(src, dst_cpu); - ocl::pyrDown(gsrc, gdst); + OCL_OFF(pyrDown(src_roi, dst_roi, dst_roiSize)); + OCL_ON(pyrDown(usrc_roi, udst_roi, dst_roiSize)); - EXPECT_MAT_NEAR(dst_cpu, Mat(gdst), depth == CV_32F ? 1e-4f : 1.0f); + Near(depth == CV_32F ? 1e-4f : 1.0f); } } -INSTANTIATE_TEST_CASE_P(OCL_ImgProc, PyrDown, Combine( - Values(CV_8U, CV_16U, CV_16S, CV_32F), - Values(1, 3, 4))); +OCL_INSTANTIATE_TEST_CASE_P(ImgprocPyr, PyrDown, Combine( + Values(CV_8U, CV_16U, CV_16S, CV_32F, CV_64F), + Values(1, 2, 4), + Bool() + )); /////////////////////// PyrUp ////////////////////////// -typedef PyrBase PyrUp; +typedef PyrTestBase PyrUp; -OCL_TEST_P(PyrUp, Accuracy) +OCL_TEST_P(PyrUp, Mat) { - for (int j = 0; j < LOOP_TIMES; j++) + for (int j = 0; j < test_loop_times; j++) { - Size size(MWIDTH, MHEIGHT); - Mat src = randomMat(size, CV_MAKETYPE(depth, channels), 0, 255); - ocl::oclMat gsrc(src); + Size src_roiSize = randomSize(1, MAX_VALUE); + Size dst_roiSize = Size(2 * src_roiSize.width, 2 * src_roiSize.height); + generateTestData(src_roiSize, dst_roiSize); - pyrUp(src, dst_cpu); - ocl::pyrUp(gsrc, gdst); + OCL_OFF(pyrUp(src_roi, dst_roi, dst_roiSize)); + OCL_ON(pyrUp(usrc_roi, udst_roi, dst_roiSize)); - EXPECT_MAT_NEAR(dst_cpu, Mat(gdst), (depth == CV_32F ? 1e-4f : 1.0)); + Near(depth == CV_32F ? 1e-4f : 1.0f); } } +OCL_INSTANTIATE_TEST_CASE_P(ImgprocPyr, PyrUp, Combine( + Values(CV_8U, CV_16U, CV_16S, CV_32F, CV_64F), + Values(1, 2, 4), + Bool() + )); + +} } // namespace cvtest::ocl -INSTANTIATE_TEST_CASE_P(OCL_ImgProc, PyrUp, Combine( - Values(CV_8U, CV_16U, CV_16S, CV_32F), - Values(1, 3, 4))); #endif // HAVE_OPENCL -- 2.7.4