1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
14 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
18 // Zhang Ying, zhangying913@gmail.com
19 // Niko Li, newlife20080214@gmail.com
20 // Redistribution and use in source and binary forms, with or without modification,
21 // are permitted provided that the following conditions are met:
23 // * Redistribution's of source code must retain the above copyright notice,
24 // this list of conditions and the following disclaimer.
26 // * Redistribution's in binary form must reproduce the above copyright notice,
27 // this list of conditions and the following disclaimer in the documentation
28 // and/or other materials provided with the distribution.
30 // * The name of the copyright holders may not be used to endorse or promote products
31 // derived from this software without specific prior written permission.
33 // This software is provided by the copyright holders and contributors as is and
34 // any express or implied warranties, including, but not limited to, the implied
35 // warranties of merchantability and fitness for a particular purpose are disclaimed.
36 // In no event shall the Intel Corporation or contributors be liable for any direct,
37 // indirect, incidental, special, exemplary, or consequential damages
38 // (including, but not limited to, procurement of substitute goods or services;
39 // loss of use, data, or profits; or business interruption) however caused
40 // and on any theory of liability, whether in contract, strict liability,
41 // or tort (including negligence or otherwise) arising in any way out of
42 // the use of this software, even if advised of the possibility of such damage.
48 #pragma OPENCL EXTENSION cl_amd_fp64:enable
49 #elif defined (cl_khr_fp64)
50 #pragma OPENCL EXTENSION cl_khr_fp64:enable
54 #define INTER_RESIZE_COEF_SCALE (1 << INTER_RESIZE_COEF_BITS)
55 #define CAST_BITS (INTER_RESIZE_COEF_BITS << 1)
56 #define INC(x,l) min(x+1,l-1)
61 #define loadpix(addr) *(__global const T *)(addr)
62 #define storepix(val, addr) *(__global T *)(addr) = val
63 #define TSIZE (int)sizeof(T)
65 #define loadpix(addr) vload3(0, (__global const T1 *)(addr))
66 #define storepix(val, addr) vstore3(val, 0, (__global T1 *)(addr))
67 #define TSIZE (int)sizeof(T1)*cn
70 #if defined USE_SAMPLER
73 #define READ_IMAGE(X,Y,Z) read_imagef(X,Y,Z).x
74 #define INTERMEDIATE_TYPE float
76 #define READ_IMAGE(X,Y,Z) read_imagef(X,Y,Z).xy
77 #define INTERMEDIATE_TYPE float2
79 #define READ_IMAGE(X,Y,Z) read_imagef(X,Y,Z).xyz
80 #define INTERMEDIATE_TYPE float3
82 #define READ_IMAGE(X,Y,Z) read_imagef(X,Y,Z)
83 #define INTERMEDIATE_TYPE float4
86 #define __CAT(x, y) x##y
87 #define CAT(x, y) __CAT(x, y)
88 //#define INTERMEDIATE_TYPE CAT(float, cn)
92 #define RESULT_SCALE 255.0f
94 #define RESULT_SCALE 127.0f
96 #define RESULT_SCALE 65535.0f
98 #define RESULT_SCALE 32767.0f
100 #define RESULT_SCALE 1.0f
103 __kernel void resizeSampler(__read_only image2d_t srcImage,
104 __global uchar* dstptr, int dststep, int dstoffset,
105 int dstrows, int dstcols,
106 float ifx, float ify)
108 const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |
109 CLK_ADDRESS_CLAMP_TO_EDGE |
112 int dx = get_global_id(0);
113 int dy = get_global_id(1);
115 float sx = ((dx+0.5f) * ifx), sy = ((dy+0.5f) * ify);
117 INTERMEDIATE_TYPE intermediate = READ_IMAGE(srcImage, sampler, (float2)(sx, sy));
120 T uval = convertToDT(round(intermediate * RESULT_SCALE));
122 T uval = convertToDT(intermediate * RESULT_SCALE);
125 if(dx < dstcols && dy < dstrows)
127 storepix(uval, dstptr + mad24(dy, dststep, dstoffset + dx*TSIZE));
131 #elif defined INTER_LINEAR_INTEGER
133 __kernel void resizeLN(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
134 __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
135 __global const uchar * buffer)
137 int dx = get_global_id(0);
138 int dy = get_global_id(1);
140 if (dx < dst_cols && dy < dst_rows)
142 __global const int * xofs = (__global const int *)(buffer), * yofs = xofs + dst_cols;
143 __global const short * ialpha = (__global const short *)(yofs + dst_rows);
144 __global const short * ibeta = ialpha + ((dst_cols + dy) << 1);
147 int sx0 = xofs[dx], sy0 = clamp(yofs[dy], 0, src_rows - 1),
148 sy1 = clamp(yofs[dy] + 1, 0, src_rows - 1);
149 short a0 = ialpha[0], a1 = ialpha[1];
150 short b0 = ibeta[0], b1 = ibeta[1];
152 int src_index0 = mad24(sy0, src_step, mad24(sx0, TSIZE, src_offset)),
153 src_index1 = mad24(sy1, src_step, mad24(sx0, TSIZE, src_offset));
154 WT data0 = convertToWT(loadpix(srcptr + src_index0));
155 WT data1 = convertToWT(loadpix(srcptr + src_index0 + TSIZE));
156 WT data2 = convertToWT(loadpix(srcptr + src_index1));
157 WT data3 = convertToWT(loadpix(srcptr + src_index1 + TSIZE));
159 WT val = ( (((data0 * a0 + data1 * a1) >> 4) * b0) >> 16) +
160 ( (((data2 * a0 + data3 * a1) >> 4) * b1) >> 16);
162 storepix(convertToDT((val + 2) >> 2),
163 dstptr + mad24(dy, dst_step, mad24(dx, TSIZE, dst_offset)));
167 #elif defined INTER_LINEAR
169 __kernel void resizeLN(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
170 __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
171 float ifx, float ify)
173 int dx = get_global_id(0);
174 int dy = get_global_id(1);
176 if (dx < dst_cols && dy < dst_rows)
178 float sx = ((dx+0.5f) * ifx - 0.5f), sy = ((dy+0.5f) * ify - 0.5f);
179 int x = floor(sx), y = floor(sy);
181 float u = sx - x, v = sy - y;
184 if ( x>=src_cols ) x=src_cols-1,u=0;
186 if ( y>=src_rows ) y=src_rows-1,v=0;
188 int y_ = INC(y, src_rows);
189 int x_ = INC(x, src_cols);
192 u = u * INTER_RESIZE_COEF_SCALE;
193 v = v * INTER_RESIZE_COEF_SCALE;
197 int U1 = rint(INTER_RESIZE_COEF_SCALE - u);
198 int V1 = rint(INTER_RESIZE_COEF_SCALE - v);
200 WT data0 = convertToWT(loadpix(srcptr + mad24(y, src_step, mad24(x, TSIZE, src_offset))));
201 WT data1 = convertToWT(loadpix(srcptr + mad24(y, src_step, mad24(x_, TSIZE, src_offset))));
202 WT data2 = convertToWT(loadpix(srcptr + mad24(y_, src_step, mad24(x, TSIZE, src_offset))));
203 WT data3 = convertToWT(loadpix(srcptr + mad24(y_, src_step, mad24(x_, TSIZE, src_offset))));
205 WT val = mul24((WT)mul24(U1, V1), data0) + mul24((WT)mul24(U, V1), data1) +
206 mul24((WT)mul24(U1, V), data2) + mul24((WT)mul24(U, V), data3);
208 T uval = convertToDT((val + (1<<(CAST_BITS-1)))>>CAST_BITS);
212 WT data0 = convertToWT(loadpix(srcptr + mad24(y, src_step, mad24(x, TSIZE, src_offset))));
213 WT data1 = convertToWT(loadpix(srcptr + mad24(y, src_step, mad24(x_, TSIZE, src_offset))));
214 WT data2 = convertToWT(loadpix(srcptr + mad24(y_, src_step, mad24(x, TSIZE, src_offset))));
215 WT data3 = convertToWT(loadpix(srcptr + mad24(y_, src_step, mad24(x_, TSIZE, src_offset))));
217 T uval = u1 * v1 * data0 + u * v1 * data1 + u1 * v *data2 + u * v *data3;
219 storepix(uval, dstptr + mad24(dy, dst_step, mad24(dx, TSIZE, dst_offset)));
223 #elif defined INTER_NEAREST
225 __kernel void resizeNN(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
226 __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
227 float ifx, float ify)
229 int dx = get_global_id(0);
230 int dy = get_global_id(1);
232 if (dx < dst_cols && dy < dst_rows)
236 int sx = min(convert_int_rtz(s1), src_cols - 1);
237 int sy = min(convert_int_rtz(s2), src_rows - 1);
239 storepix(loadpix(srcptr + mad24(sy, src_step, mad24(sx, TSIZE, src_offset))),
240 dstptr + mad24(dy, dst_step, mad24(dx, TSIZE, dst_offset)));
244 #elif defined INTER_AREA
246 #ifdef INTER_AREA_FAST
248 __kernel void resizeAREA_FAST(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols,
249 __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols)
251 int dx = get_global_id(0);
252 int dy = get_global_id(1);
254 if (dx < dst_cols && dy < dst_rows)
256 int dst_index = mad24(dy, dst_step, dst_offset);
258 int sx = XSCALE * dx;
259 int sy = YSCALE * dy;
263 for (int py = 0; py < YSCALE; ++py)
265 int y = min(sy + py, src_rows - 1);
266 int src_index = mad24(y, src_step, src_offset);
268 for (int px = 0; px < XSCALE; ++px)
270 int x = min(sx + px, src_cols - 1);
271 sum += convertToWTV(loadpix(src + src_index + x*TSIZE));
275 storepix(convertToT(convertToWT2V(sum) * (WT2V)(SCALE)), dst + mad24(dx, TSIZE, dst_index));
281 __kernel void resizeAREA(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols,
282 __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
283 float ifx, float ify, __global const int * ofs_tab,
284 __global const int * map_tab, __global const float * alpha_tab)
286 int dx = get_global_id(0);
287 int dy = get_global_id(1);
289 if (dx < dst_cols && dy < dst_rows)
291 int dst_index = mad24(dy, dst_step, dst_offset);
293 __global const int * xmap_tab = map_tab;
294 __global const int * ymap_tab = (__global const int *)(map_tab + (src_cols << 1));
295 __global const float * xalpha_tab = alpha_tab;
296 __global const float * yalpha_tab = (__global const float *)(alpha_tab + (src_cols << 1));
297 __global const int * xofs_tab = ofs_tab;
298 __global const int * yofs_tab = (__global const int *)(ofs_tab + dst_cols + 1);
300 int xk0 = xofs_tab[dx], xk1 = xofs_tab[dx + 1];
301 int yk0 = yofs_tab[dy], yk1 = yofs_tab[dy + 1];
303 int sy0 = ymap_tab[yk0], sy1 = ymap_tab[yk1 - 1];
304 int sx0 = xmap_tab[xk0], sx1 = xmap_tab[xk1 - 1];
306 WTV sum = (WTV)(0), buf;
307 int src_index = mad24(sy0, src_step, src_offset);
309 for (int sy = sy0, yk = yk0; sy <= sy1; ++sy, src_index += src_step, ++yk)
311 WTV beta = (WTV)(yalpha_tab[yk]);
314 for (int sx = sx0, xk = xk0; sx <= sx1; ++sx, ++xk)
316 WTV alpha = (WTV)(xalpha_tab[xk]);
317 buf += convertToWTV(loadpix(src + mad24(sx, TSIZE, src_index))) * alpha;
322 storepix(convertToT(sum), dst + mad24(dx, TSIZE, dst_index));