2 // For Open Source Computer Vision Library
4 // Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
5 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
6 // Third party copyrights are property of their respective owners.
9 // Niko Li, newlife20080214@gmail.com
10 // Redistribution and use in source and binary forms, with or without modification,
11 // are permitted provided that the following conditions are met:
13 // * Redistribution's of source code must retain the above copyright notice,
14 // this list of conditions and the following disclaimer.
16 // * Redistribution's in binary form must reproduce the above copyright notice,
17 // this list of conditions and the following disclaimer in the documentation
18 // and/or other oclMaterials provided with the distribution.
20 // * The name of the copyright holders may not be used to endorse or promote products
21 // derived from this software without specific prior written permission.
23 // This software is provided by the copyright holders and contributors as is and
24 // any express or implied warranties, including, but not limited to, the implied
25 // warranties of merchantability and fitness for a particular purpose are disclaimed.
26 // In no event shall the Intel Corporation or contributors be liable for any direct,
27 // indirect, incidental, special, exemplary, or consequential damages
28 // (including, but not limited to, procurement of substitute goods or services;
29 // loss of use, data, or profits; or business interruption) however caused
30 // and on any theory of liability, whether in contract, strict liability,
31 // or tort (including negligence or otherwise) arising in any way out of
32 // the use of this software, even if advised of the possibility of such damage.
36 #if defined (DOUBLE_SUPPORT)
37 #pragma OPENCL EXTENSION cl_khr_fp64:enable
40 __kernel void convertC3C4(__global const GENTYPE4 * restrict src, __global GENTYPE4 *dst, int cols, int rows,
41 int dstStep_in_piexl,int pixel_end)
43 int id = get_global_id(0);
44 int3 pixelid = (int3)(mul24(id,3),mad24(id,3,1),mad24(id,3,2));
45 pixelid = clamp(pixelid,0,pixel_end);
46 GENTYPE4 pixel0, pixel1, pixel2, outpix0,outpix1,outpix2,outpix3;
48 pixel0 = src[pixelid.x];
49 pixel1 = src[pixelid.y];
50 pixel2 = src[pixelid.z];
52 outpix0 = (GENTYPE4)(pixel0.x,pixel0.y,pixel0.z,0);
53 outpix1 = (GENTYPE4)(pixel0.w,pixel1.x,pixel1.y,0);
54 outpix2 = (GENTYPE4)(pixel1.z,pixel1.w,pixel2.x,0);
55 outpix3 = (GENTYPE4)(pixel2.y,pixel2.z,pixel2.w,0);
57 int4 outy = (id<<2)/cols;
58 int4 outx = (id<<2)%cols;
60 outx += (int4)(0, 1, 2, 3);
61 outy = select(outy, outy+1, outx>=cols);
62 outx = select(outx, outx-cols, outx>=cols);
65 outy = select(outy, outy + 1, outx >= cols);
66 outx = select(outx, outx-cols, outx >= cols);
67 outy = select(outy, outy + 1, outx >= cols);
68 outx = select(outx, outx-cols, outx >= cols);
70 int4 addr = mad24(outy,(int4)dstStep_in_piexl,outx);
72 if(outx.w<cols && outy.w<rows)
74 dst[addr.x] = outpix0;
75 dst[addr.y] = outpix1;
76 dst[addr.z] = outpix2;
77 dst[addr.w] = outpix3;
79 else if(outx.z<cols && outy.z<rows)
81 dst[addr.x] = outpix0;
82 dst[addr.y] = outpix1;
83 dst[addr.z] = outpix2;
85 else if(outx.y<cols && outy.y<rows)
87 dst[addr.x] = outpix0;
88 dst[addr.y] = outpix1;
90 else if(outx.x<cols && outy.x<rows)
92 dst[addr.x] = outpix0;
96 __kernel void convertC4C3(__global const GENTYPE4 * restrict src, __global GENTYPE4 *dst, int cols, int rows,
97 int srcStep_in_pixel,int pixel_end)
99 int id = get_global_id(0)<<2;
103 int4 x4 = (int4)(x,x+1,x+2,x+3);
104 int4 y4 = select((int4)y,(int4)(y+1),x4>=(int4)cols);
105 x4 = select(x4,x4-(int4)cols,x4>=(int4)cols);
108 y4 = select(y4, y4 + 1,x4>=(int4)cols);
109 x4 = select(x4, x4 - (int4)cols,x4>=(int4)cols);
110 y4 = select(y4, y4 + 1,x4>=(int4)cols);
111 x4 = select(x4, x4-(int4)cols,x4>=(int4)cols);
113 y4=clamp(y4,(int4)0,(int4)(rows-1));
114 int4 addr = mad24(y4, (int4)srcStep_in_pixel, x4);
116 GENTYPE4 pixel0,pixel1,pixel2,pixel3, outpixel1, outpixel2;
117 pixel0 = src[addr.x];
118 pixel1 = src[addr.y];
119 pixel2 = src[addr.z];
120 pixel3 = src[addr.w];
123 outpixel1.x = pixel1.y;
124 outpixel1.y = pixel1.z;
125 outpixel1.z = pixel2.x;
126 outpixel1.w = pixel2.y;
127 outpixel2.x = pixel2.z;
128 outpixel2.y = pixel3.x;
129 outpixel2.z = pixel3.y;
130 outpixel2.w = pixel3.z;
132 int4 outaddr = mul24(id>>2 , 3);
136 if(outaddr.z <= pixel_end)
138 dst[outaddr.x] = pixel0;
139 dst[outaddr.y] = outpixel1;
140 dst[outaddr.z] = outpixel2;
142 else if(outaddr.y <= pixel_end)
144 dst[outaddr.x] = pixel0;
145 dst[outaddr.y] = outpixel1;
147 else if(outaddr.x <= pixel_end)
149 dst[outaddr.x] = pixel0;