2 // For Open Source Computer Vision Library
4 // Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
5 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
6 // Third party copyrights are property of their respective owners.
9 // Niko Li, newlife20080214@gmail.com
10 // Rock Li, Rock.li@amd.com
11 // Redistribution and use in source and binary forms, with or without modification,
12 // are permitted provided that the following conditions are met:
14 // * Redistribution's of source code must retain the above copyright notice,
15 // this list of conditions and the following disclaimer.
17 // * Redistribution's in binary form must reproduce the above copyright notice,
18 // this list of conditions and the following disclaimer in the documentation
19 // and/or other materials provided with the distribution.
21 // * The name of the copyright holders may not be used to endorse or promote products
22 // derived from this software without specific prior written permission.
24 // This software is provided by the copyright holders and contributors as is and
25 // any express or implied warranties, including, but not limited to, the implied
26 // warranties of merchantability and fitness for a particular purpose are disclaimed.
27 // In no event shall the Intel Corporation or contributors be liable for any direct,
28 // indirect, incidental, special, exemplary, or consequential damages
29 // (including, but not limited to, procurement of substitute goods or services;
30 // loss of use, data, or profits; or business interruption) however caused
31 // and on any theory of liability, whether in contract, strict liability,
32 // or tort (including negligence or otherwise) arising in any way out of
33 // the use of this software, even if advised of the possibility of such damage.
40 int idx = *(__global const int *)(srcptr + mad24(num, src_step, src_index));\
41 dst = (__global dstT *)(dstptr + mad24(num, dst_step, dst_index));\
42 dst[0] = lut_l[idx & 0xff];\
43 dst[1] = lut_l[(idx >> 8) & 0xff];\
44 dst[2] = lut_l[(idx >> 16) & 0xff];\
45 dst[3] = lut_l[(idx >> 24) & 0xff];
48 uchar3 idx = vload3(0, srcptr + mad24(num, src_step, src_index));\
49 dst = (__global dstT *)(dstptr + mad24(num, dst_step, dst_index));\
50 dst[0] = lut_l[idx.x];\
51 dst[1] = lut_l[idx.y];\
52 dst[2] = lut_l[idx.z];
55 __global const uchar2 * idx = (__global const uchar2 *)(srcptr + mad24(num, src_step, src_index));\
56 dst = (__global dstT *)(dstptr + mad24(num, dst_step, dst_index));\
57 dst[0] = lut_l[idx->x];\
58 dst[1] = lut_l[idx->y];
62 int idx = *(__global const int *)(srcptr + mad24(num, src_step, src_index));\
63 dst = (__global dstT *)(dstptr + mad24(num, dst_step, dst_index));\
64 dst[0] = lut_l[idx & 0xff];\
65 dst[1] = lut_l[(idx >> 8) & 0xff];\
66 dst[2] = lut_l[(idx >> 16) & 0xff];\
67 dst[3] = lut_l[(idx >> 24) & 0xff];
70 uchar idx = (srcptr + mad24(num, src_step, src_index))[0];\
71 dst = (__global dstT *)(dstptr + mad24(num, dst_step, dst_index));\
76 src = (__global const srcT *)(srcptr + mad24(num, src_step, src_index));\
77 dst = (__global dstT *)(dstptr + mad24(num, dst_step, dst_index));\
78 for (int cn = 0; cn < dcn; ++cn)\
79 dst[cn] = lut_l[src[cn]];
84 __global const uchar4 *src_pixel = (__global const uchar4 *)(srcptr + mad24(num, src_step, src_index));\
85 int4 idx = convert_int4(src_pixel[0]) * lcn + (int4)(0, 1, 2, 3);\
86 dst = (__global dstT *)(dstptr + mad24(num, dst_step, dst_index));\
87 dst[0] = lut_l[idx.x];\
88 dst[1] = lut_l[idx.y];\
89 dst[2] = lut_l[idx.z];\
90 dst[3] = lut_l[idx.w];
93 uchar3 src_pixel = vload3(0, srcptr + mad24(num, src_step, src_index));\
94 int3 idx = convert_int3(src_pixel) * lcn + (int3)(0, 1, 2);\
95 dst = (__global dstT *)(dstptr + mad24(num, dst_step, dst_index));\
96 dst[0] = lut_l[idx.x];\
97 dst[1] = lut_l[idx.y];\
98 dst[2] = lut_l[idx.z];
101 __global const uchar2 *src_pixel = (__global const uchar2 *)(srcptr + mad24(num, src_step, src_index));\
102 int2 idx = convert_int2(src_pixel[0]) * lcn + (int2)(0, 1);\
103 dst = (__global dstT *)(dstptr + mad24(num, dst_step, dst_index));\
104 dst[0] = lut_l[idx.x];\
105 dst[1] = lut_l[idx.y];
106 #elif dcn == 1 //error case (1 < lcn) ==> lcn == scn == dcn
108 uchar idx = (srcptr + mad24(num, src_step, src_index))[0];\
109 dst = (__global dstT *)(dstptr + mad24(num, dst_step, dst_index));\
113 src = (__global const srcT *)(srcptr + mad24(num, src_step, src_index));\
114 dst = (__global dstT *)(dstptr + mad24(num, dst_step, dst_index));\
115 for (int cn = 0; cn < dcn; ++cn)\
116 dst[cn] = lut_l[mad24(src[cn], lcn, cn)];
120 #define LOCAL_LUT_INIT\
122 __global const dstT * lut = (__global const dstT *)(lutptr + lut_offset);\
123 int init = mad24((int)get_local_id(1), (int)get_local_size(0), (int)get_local_id(0));\
124 int step = get_local_size(0) * get_local_size(1);\
125 for (int i = init; i < 256 * lcn; i += step)\
129 barrier(CLK_LOCAL_MEM_FENCE);\
132 __kernel void LUT(__global const uchar * srcptr, int src_step, int src_offset,
133 __global const uchar * lutptr, int lut_step, int lut_offset,
134 __global uchar * dstptr, int dst_step, int dst_offset, int rows, int cols)
136 __local dstT lut_l[256 * lcn];
140 int x = 4 * get_global_id(0);
142 int x = get_global_id(0);
144 int y = 4 * get_global_id(1);
146 if (x < cols && y < rows)
148 int src_index = mad24(y, src_step, mad24(x, (int)sizeof(srcT) * dcn, src_offset));
149 int dst_index = mad24(y, dst_step, mad24(x, (int)sizeof(dstT) * dcn, dst_offset));
150 __global const srcT * src; __global dstT * dst;