From df3997b108600d95835944c26cb98e7bbdcdbf18 Mon Sep 17 00:00:00 2001 From: peng xiao Date: Wed, 15 May 2013 08:51:21 +0800 Subject: [PATCH] Fix ocl::pyrUp Use predefined OpenCL function to convert integers to floating points. This is more accurate than before as it enables: 1. saturate cast 2. customized rounding --- modules/ocl/src/opencl/pyr_up.cl | 82 +++++++------------------------- 1 file changed, 16 insertions(+), 66 deletions(-) diff --git a/modules/ocl/src/opencl/pyr_up.cl b/modules/ocl/src/opencl/pyr_up.cl index 0b7f0c9025..ef41c9408d 100644 --- a/modules/ocl/src/opencl/pyr_up.cl +++ b/modules/ocl/src/opencl/pyr_up.cl @@ -18,6 +18,7 @@ // Zhang Chunpeng chunpeng@multicorewareinc.com // Dachuan Zhao, dachuan@multicorewareinc.com // Yao Wang, yao@multicorewareinc.com +// Peng Xiao, pengxiao@outlook.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -47,7 +48,7 @@ //#pragma OPENCL EXTENSION cl_amd_printf : enable -uchar get_valid_uchar(uchar data) +uchar get_valid_uchar(float data) { return (uchar)(data <= 255 ? data : data > 0 ? 255 : 0); } @@ -142,7 +143,7 @@ __kernel void pyrUp_C1_D0(__global uchar* src,__global uchar* dst, sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][tidx]; if ((x < dstCols) && (y < dstRows)) - dst[x + y * dstStep] = (float)(4.0f * sum); + dst[x + y * dstStep] = convert_uchar_sat_rte(4.0f * sum); } @@ -244,7 +245,7 @@ __kernel void pyrUp_C1_D2(__global ushort* src,__global ushort* dst, sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][get_local_id(0)]; if ((x < dstCols) && (y < dstRows)) - dst[x + y * dstStep] = (float)(4.0f * sum); + dst[x + y * dstStep] = convert_short_sat_rte(4.0f * sum); } @@ -351,31 +352,6 @@ __kernel void pyrUp_C1_D5(__global float* src,__global float* dst, /////////////////////////////////////////////////////////////////////// ////////////////////////// CV_8UC4 ////////////////////////////////// /////////////////////////////////////////////////////////////////////// -float4 covert_uchar4_to_float4(uchar4 data) -{ - float4 f4Data = {0,0,0,0}; - - f4Data.x = (float)data.x; - f4Data.y = (float)data.y; - f4Data.z = (float)data.z; - f4Data.w = (float)data.w; - - return f4Data; -} - - -uchar4 convert_float4_to_uchar4(float4 data) -{ - uchar4 u4Data; - - u4Data.x = get_valid_uchar(data.x); - u4Data.y = get_valid_uchar(data.y); - u4Data.z = get_valid_uchar(data.z); - u4Data.w = get_valid_uchar(data.w); - - return u4Data; -} - __kernel void pyrUp_C4_D0(__global uchar4* src,__global uchar4* dst, int srcRows,int dstRows,int srcCols,int dstCols, int srcOffset,int dstOffset,int srcStep,int dstStep) @@ -406,7 +382,7 @@ __kernel void pyrUp_C4_D0(__global uchar4* src,__global uchar4* dst, srcy = abs(srcy); srcy = min(srcRows -1 ,srcy); - s_srcPatch[tidy][tidx] = covert_uchar4_to_float4(src[srcx + srcy * srcStep]); + s_srcPatch[tidy][tidx] = convert_float4(src[srcx + srcy * srcStep]); } barrier(CLK_LOCAL_MEM_FENCE); @@ -476,38 +452,12 @@ __kernel void pyrUp_C4_D0(__global uchar4* src,__global uchar4* dst, if ((x < dstCols) && (y < dstRows)) { - dst[x + y * dstStep] = convert_float4_to_uchar4(4.0f * sum); + dst[x + y * dstStep] = convert_uchar4_sat_rte(4.0f * sum); } } /////////////////////////////////////////////////////////////////////// ////////////////////////// CV_16UC4 ////////////////////////////////// /////////////////////////////////////////////////////////////////////// -float4 covert_ushort4_to_float4(ushort4 data) -{ - float4 f4Data = {0,0,0,0}; - - f4Data.x = (float)data.x; - f4Data.y = (float)data.y; - f4Data.z = (float)data.z; - f4Data.w = (float)data.w; - - return f4Data; -} - - -ushort4 convert_float4_to_ushort4(float4 data) -{ - ushort4 u4Data; - - u4Data.x = (float)data.x; - u4Data.y = (float)data.y; - u4Data.z = (float)data.z; - u4Data.w = (float)data.w; - - return u4Data; -} - - __kernel void pyrUp_C4_D2(__global ushort4* src,__global ushort4* dst, int srcRows,int dstRows,int srcCols,int dstCols, int srcOffset,int dstOffset,int srcStep,int dstStep) @@ -535,7 +485,7 @@ __kernel void pyrUp_C4_D2(__global ushort4* src,__global ushort4* dst, srcy = abs(srcy); srcy = min(srcRows -1 ,srcy); - s_srcPatch[get_local_id(1)][get_local_id(0)] = covert_ushort4_to_float4(src[srcx + srcy * srcStep]); + s_srcPatch[get_local_id(1)][get_local_id(0)] = convert_float4(src[srcx + srcy * srcStep]); } barrier(CLK_LOCAL_MEM_FENCE); @@ -570,11 +520,11 @@ __kernel void pyrUp_C4_D2(__global ushort4* src,__global ushort4* dst, if (eveny) { - sum = sum + (evenFlag * co3) * s_srcPatch[0][1 + ((tidx - 2) >> 1)]; - sum = sum + ( oddFlag * co2 ) * s_srcPatch[0][1 + ((tidx - 1) >> 1)]; + sum = sum + (evenFlag * co3 ) * s_srcPatch[0][1 + ((tidx - 2) >> 1)]; + sum = sum + (oddFlag * co2 ) * s_srcPatch[0][1 + ((tidx - 1) >> 1)]; sum = sum + (evenFlag * co1 ) * s_srcPatch[0][1 + ((tidx ) >> 1)]; - sum = sum + ( oddFlag * co2 ) * s_srcPatch[0][1 + ((tidx + 1) >> 1)]; - sum = sum + (evenFlag * co3) * s_srcPatch[0][1 + ((tidx + 2) >> 1)]; + sum = sum + (oddFlag * co2 ) * s_srcPatch[0][1 + ((tidx + 1) >> 1)]; + sum = sum + (evenFlag * co3 ) * s_srcPatch[0][1 + ((tidx + 2) >> 1)]; } s_dstPatch[get_local_id(1)][get_local_id(0)] = sum; @@ -610,7 +560,7 @@ __kernel void pyrUp_C4_D2(__global ushort4* src,__global ushort4* dst, if ((x < dstCols) && (y < dstRows)) { - dst[x + y * dstStep] = convert_float4_to_ushort4(4.0f * sum); + dst[x + y * dstStep] = convert_ushort4_sat_rte(4.0f * sum); } } @@ -681,11 +631,11 @@ __kernel void pyrUp_C4_D5(__global float4* src,__global float4* dst, if (eveny) { - sum = sum + (evenFlag * co3) * s_srcPatch[lsizey-16][1 + ((tidx - 2) >> 1)]; - sum = sum + ( oddFlag * co2 ) * s_srcPatch[lsizey-16][1 + ((tidx - 1) >> 1)]; + sum = sum + (evenFlag * co3 ) * s_srcPatch[lsizey-16][1 + ((tidx - 2) >> 1)]; + sum = sum + (oddFlag * co2 ) * s_srcPatch[lsizey-16][1 + ((tidx - 1) >> 1)]; sum = sum + (evenFlag * co1 ) * s_srcPatch[lsizey-16][1 + ((tidx ) >> 1)]; - sum = sum + ( oddFlag * co2 ) * s_srcPatch[lsizey-16][1 + ((tidx + 1) >> 1)]; - sum = sum + (evenFlag * co3) * s_srcPatch[lsizey-16][1 + ((tidx + 2) >> 1)]; + sum = sum + ( oddFlag * co2 ) * s_srcPatch[lsizey-16][1 + ((tidx + 1) >> 1)]; + sum = sum + (evenFlag * co3 ) * s_srcPatch[lsizey-16][1 + ((tidx + 2) >> 1)]; } s_dstPatch[tidy][tidx] = sum; -- 2.34.1