modules/core/src/opencl/lut.cl

   1 //                           License Agreement
   2 //                For Open Source Computer Vision Library
   3 //
   4 // Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
   5 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
   6 // Third party copyrights are property of their respective owners.
   7 //
   8 // @Authors
   9 //    Niko Li, newlife20080214@gmail.com
  10 //    Rock Li, Rock.li@amd.com
  11 // Redistribution and use in source and binary forms, with or without modification,
  12 // are permitted provided that the following conditions are met:
  13 //
  14 //   * Redistribution's of source code must retain the above copyright notice,
  15 //     this list of conditions and the following disclaimer.
  16 //
  17 //   * Redistribution's in binary form must reproduce the above copyright notice,
  18 //     this list of conditions and the following disclaimer in the documentation
  19 //     and/or other materials provided with the distribution.
  20 //
  21 //   * The name of the copyright holders may not be used to endorse or promote products
  22 //     derived from this software without specific prior written permission.
  23 //
  24 // This software is provided by the copyright holders and contributors as is and
  25 // any express or implied warranties, including, but not limited to, the implied
  26 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  27 // In no event shall the Intel Corporation or contributors be liable for any direct,
  28 // indirect, incidental, special, exemplary, or consequential damages
  29 // (including, but not limited to, procurement of substitute goods or services;
  30 // loss of use, data, or profits; or business interruption) however caused
  31 // and on any theory of liability, whether in contract, strict liability,
  32 // or tort (including negligence or otherwise) arising in any way out of
  33 // the use of this software, even if advised of the possibility of such damage.
  34 //
  35 //
  36
  37 #if lcn == 1
  38     #if dcn == 4
  39         #define LUT_OP(num)\
  40             int idx = *(__global const int *)(srcptr + mad24(num, src_step, src_index));\
  41             dst = (__global dstT *)(dstptr + mad24(num, dst_step, dst_index));\
  42             dst[0] = lut_l[idx & 0xff];\
  43             dst[1] = lut_l[(idx >> 8) & 0xff];\
  44             dst[2] = lut_l[(idx >> 16) & 0xff];\
  45             dst[3] = lut_l[(idx >> 24) & 0xff];
  46     #elif dcn == 3
  47         #define LUT_OP(num)\
  48             uchar3 idx = vload3(0, srcptr + mad24(num, src_step, src_index));\
  49             dst = (__global dstT *)(dstptr + mad24(num, dst_step, dst_index));\
  50             dst[0] = lut_l[idx.x];\
  51             dst[1] = lut_l[idx.y];\
  52             dst[2] = lut_l[idx.z];
  53     #elif dcn == 2
  54         #define LUT_OP(num)\
  55             __global const uchar2 * idx = (__global const uchar2 *)(srcptr + mad24(num, src_step, src_index));\
  56             dst = (__global dstT *)(dstptr + mad24(num, dst_step, dst_index));\
  57             dst[0] = lut_l[idx->x];\
  58             dst[1] = lut_l[idx->y];
  59     #elif dcn == 1
  60         #ifdef USE_ALIGNED
  61             #define LUT_OP(num)\
  62                 int idx = *(__global const int *)(srcptr + mad24(num, src_step, src_index));\
  63                 dst = (__global dstT *)(dstptr + mad24(num, dst_step, dst_index));\
  64                 dst[0] = lut_l[idx & 0xff];\
  65                 dst[1] = lut_l[(idx >> 8) & 0xff];\
  66                 dst[2] = lut_l[(idx >> 16) & 0xff];\
  67                 dst[3] = lut_l[(idx >> 24) & 0xff];
  68         #else
  69             #define LUT_OP(num)\
  70                 uchar idx = (srcptr + mad24(num, src_step, src_index))[0];\
  71                 dst = (__global dstT *)(dstptr + mad24(num, dst_step, dst_index));\
  72                 dst[0] = lut_l[idx];
  73         #endif
  74     #else
  75         #define LUT_OP(num)\
  76             src = (__global const srcT *)(srcptr + mad24(num, src_step, src_index));\
  77             dst = (__global dstT *)(dstptr + mad24(num, dst_step, dst_index));\
  78             for (int cn = 0; cn < dcn; ++cn)\
  79                 dst[cn] = lut_l[src[cn]];
  80     #endif
  81 #else
  82     #if dcn == 4
  83         #define LUT_OP(num)\
  84             __global const uchar4 *src_pixel = (__global const uchar4 *)(srcptr + mad24(num, src_step, src_index));\
  85             int4 idx = convert_int4(src_pixel[0]) * lcn + (int4)(0, 1, 2, 3);\
  86             dst = (__global dstT *)(dstptr + mad24(num, dst_step, dst_index));\
  87             dst[0] = lut_l[idx.x];\
  88             dst[1] = lut_l[idx.y];\
  89             dst[2] = lut_l[idx.z];\
  90             dst[3] = lut_l[idx.w];
  91     #elif dcn == 3
  92         #define LUT_OP(num)\
  93             uchar3 src_pixel = vload3(0, srcptr + mad24(num, src_step, src_index));\
  94             int3 idx = convert_int3(src_pixel) * lcn + (int3)(0, 1, 2);\
  95             dst = (__global dstT *)(dstptr + mad24(num, dst_step, dst_index));\
  96             dst[0] = lut_l[idx.x];\
  97             dst[1] = lut_l[idx.y];\
  98             dst[2] = lut_l[idx.z];
  99     #elif dcn == 2
 100         #define LUT_OP(num)\
 101             __global const uchar2 *src_pixel = (__global const uchar2 *)(srcptr + mad24(num, src_step, src_index));\
 102             int2 idx = convert_int2(src_pixel[0]) * lcn + (int2)(0, 1);\
 103             dst = (__global dstT *)(dstptr + mad24(num, dst_step, dst_index));\
 104             dst[0] = lut_l[idx.x];\
 105             dst[1] = lut_l[idx.y];
 106     #elif dcn == 1 //error case (1 < lcn) ==> lcn == scn == dcn
 107         #define LUT_OP(num)\
 108             uchar idx = (srcptr + mad24(num, src_step, src_index))[0];\
 109             dst = (__global dstT *)(dstptr + mad24(num, dst_step, dst_index));\
 110             dst[0] = lut_l[idx];
 111     #else
 112         #define LUT_OP(num)\
 113             src = (__global const srcT *)(srcptr + mad24(num, src_step, src_index));\
 114             dst = (__global dstT *)(dstptr + mad24(num, dst_step, dst_index));\
 115             for (int cn = 0; cn < dcn; ++cn)\
 116                 dst[cn] = lut_l[mad24(src[cn], lcn, cn)];
 117     #endif
 118 #endif
 119
 120 #define LOCAL_LUT_INIT\
 121     {\
 122         __global const dstT * lut = (__global const dstT *)(lutptr + lut_offset);\
 123         int init = mad24((int)get_local_id(1), (int)get_local_size(0), (int)get_local_id(0));\
 124         int step = get_local_size(0) * get_local_size(1);\
 125         for (int i = init; i < 256 * lcn; i += step)\
 126         {\
 127             lut_l[i] = lut[i];\
 128         }\
 129         barrier(CLK_LOCAL_MEM_FENCE);\
 130     }
 131
 132 __kernel void LUT(__global const uchar * srcptr, int src_step, int src_offset,
 133                   __global const uchar * lutptr, int lut_step, int lut_offset,
 134                   __global uchar * dstptr, int dst_step, int dst_offset, int rows, int cols)
 135 {
 136     __local dstT lut_l[256 * lcn];
 137     LOCAL_LUT_INIT;
 138
 139 #ifdef USE_ALIGNED
 140     int x = 4 * get_global_id(0);
 141 #else
 142     int x = get_global_id(0);
 143 #endif
 144     int y = 4 * get_global_id(1);
 145
 146     if (x < cols && y < rows)
 147     {
 148         int src_index = mad24(y, src_step, mad24(x, (int)sizeof(srcT) * dcn, src_offset));
 149         int dst_index = mad24(y, dst_step, mad24(x, (int)sizeof(dstT) * dcn, dst_offset));
 150         __global const srcT * src; __global dstT * dst;
 151         int tmp_idx;
 152         LUT_OP(0);
 153         if (y < rows - 1)
 154         {
 155             LUT_OP(1);
 156             if (y < rows - 2)
 157             {
 158                 LUT_OP(2);
 159                 if (y < rows - 3)
 160                 {
 161                     LUT_OP(3);
 162                 }
 163             }
 164         }
 165
 166     }
 167 }