1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
5 // Copyright (C) 2014, Itseez, Inc., all rights reserved.
6 // Third party copyrights are property of their respective owners.
10 #pragma OPENCL EXTENSION cl_amd_fp64:enable
11 #elif defined (cl_khr_fp64)
12 #pragma OPENCL EXTENSION cl_khr_fp64:enable
18 #define MAX_VAL UCHAR_MAX
20 #define MIN_VAL SCHAR_MIN
21 #define MAX_VAL SCHAR_MAX
24 #define MAX_VAL USHRT_MAX
26 #define MIN_VAL SHRT_MIN
27 #define MAX_VAL SHRT_MAX
29 #define MIN_VAL INT_MIN
30 #define MAX_VAL INT_MAX
32 #define MIN_VAL (-FLT_MAX)
33 #define MAX_VAL FLT_MAX
35 #define MIN_VAL (-DBL_MAX)
36 #define MAX_VAL DBL_MAX
40 #define INDEX_MAX UINT_MAX
43 #define loadpix(addr) *(__global const srcT *)(addr)
44 #define srcTSIZE (int)sizeof(srcT)
46 #define loadpix(addr) vload3(0, (__global const srcT1 *)(addr))
47 #define srcTSIZE ((int)sizeof(srcT1) * 3)
52 #define srcTSIZE (int)sizeof(srcT1)
56 #define CALC_MINLOC(inc) minloc = id + inc
58 #define CALC_MINLOC(inc)
62 #define CALC_MAXLOC(inc) maxloc = id + inc
64 #define CALC_MAXLOC(inc)
68 #define CALC_MIN(p, inc) \
69 if (minval > temp.p) \
75 #define CALC_MIN(p, inc)
79 #define CALC_MAX(p, inc) \
80 if (maxval < temp.p) \
86 #define CALC_MAX(p, inc)
90 #define CALC_MAX2(p) \
91 if (maxval2 < temp.p) \
97 #define CALC_P(p, inc) \
102 __kernel void minmaxloc(__global const uchar * srcptr, int src_step, int src_offset, int cols,
103 int total, int groupnum, __global uchar * dstptr
105 , __global const uchar * mask, int mask_step, int mask_offset
108 , __global const uchar * src2ptr, int src2_step, int src2_offset
112 int lid = get_local_id(0);
113 int gid = get_group_id(0);
114 int id = get_global_id(0)
121 srcptr += src_offset;
126 src2ptr += src2_offset;
130 __local dstT1 localmem_min[WGS2_ALIGNED];
131 dstT1 minval = MAX_VAL;
133 __local uint localmem_minloc[WGS2_ALIGNED];
134 uint minloc = INDEX_MAX;
138 dstT1 maxval = MIN_VAL;
139 __local dstT1 localmem_max[WGS2_ALIGNED];
141 __local uint localmem_maxloc[WGS2_ALIGNED];
142 uint maxloc = INDEX_MAX;
146 __local dstT1 localmem_max2[WGS2_ALIGNED];
147 dstT1 maxval2 = MIN_VAL;
163 for (int grain = groupnum * WGS
167 ; id < total; id += grain)
170 #ifdef HAVE_MASK_CONT
173 mask_index = mad24(id / cols, mask_step, id % cols);
175 if (mask[mask_index])
179 src_index = mul24(id, srcTSIZE);
181 src_index = mad24(id / cols, src_step, mul24(id % cols, srcTSIZE));
183 temp = convertToDT(loadpix(srcptr + src_index));
185 temp = temp >= (dstT)(0) ? temp : -temp;
189 #ifdef HAVE_SRC2_CONT
190 src2_index = mul24(id, srcTSIZE);
192 src2_index = mad24(id / cols, src2_step, mul24(id % cols, srcTSIZE));
194 temp2 = convertToDT(loadpix(src2ptr + src2_index));
195 temp = temp > temp2 ? temp - temp2 : (temp2 - temp);
197 temp2 = temp2 >= (dstT)(0) ? temp2 : -temp2;
253 if (lid < WGS2_ALIGNED)
256 localmem_min[lid] = minval;
259 localmem_max[lid] = maxval;
262 localmem_minloc[lid] = minloc;
265 localmem_maxloc[lid] = maxloc;
268 localmem_max2[lid] = maxval2;
271 barrier(CLK_LOCAL_MEM_FENCE);
273 if (lid >= WGS2_ALIGNED && total >= WGS2_ALIGNED)
275 int lid3 = lid - WGS2_ALIGNED;
277 if (localmem_min[lid3] >= minval)
280 if (localmem_min[lid3] == minval)
281 localmem_minloc[lid3] = min(localmem_minloc[lid3], minloc);
283 localmem_minloc[lid3] = minloc,
285 localmem_min[lid3] = minval;
289 if (localmem_max[lid3] <= maxval)
292 if (localmem_max[lid3] == maxval)
293 localmem_maxloc[lid3] = min(localmem_maxloc[lid3], maxloc);
295 localmem_maxloc[lid3] = maxloc,
297 localmem_max[lid3] = maxval;
301 if (localmem_max2[lid3] < maxval2)
302 localmem_max2[lid3] = maxval2;
305 barrier(CLK_LOCAL_MEM_FENCE);
307 for (int lsize = WGS2_ALIGNED >> 1; lsize > 0; lsize >>= 1)
311 int lid2 = lsize + lid;
314 if (localmem_min[lid] >= localmem_min[lid2])
317 if (localmem_min[lid] == localmem_min[lid2])
318 localmem_minloc[lid] = min(localmem_minloc[lid2], localmem_minloc[lid]);
320 localmem_minloc[lid] = localmem_minloc[lid2],
322 localmem_min[lid] = localmem_min[lid2];
326 if (localmem_max[lid] <= localmem_max[lid2])
329 if (localmem_max[lid] == localmem_max[lid2])
330 localmem_maxloc[lid] = min(localmem_maxloc[lid2], localmem_maxloc[lid]);
332 localmem_maxloc[lid] = localmem_maxloc[lid2],
334 localmem_max[lid] = localmem_max[lid2];
338 if (localmem_max2[lid] < localmem_max2[lid2])
339 localmem_max2[lid] = localmem_max2[lid2];
342 barrier(CLK_LOCAL_MEM_FENCE);
349 *(__global dstT1 *)(dstptr + mad24(gid, (int)sizeof(dstT1), pos)) = localmem_min[0];
350 pos = mad24(groupnum, (int)sizeof(dstT1), pos);
353 *(__global dstT1 *)(dstptr + mad24(gid, (int)sizeof(dstT1), pos)) = localmem_max[0];
354 pos = mad24(groupnum, (int)sizeof(dstT1), pos);
357 *(__global uint *)(dstptr + mad24(gid, (int)sizeof(uint), pos)) = localmem_minloc[0];
358 pos = mad24(groupnum, (int)sizeof(uint), pos);
361 *(__global uint *)(dstptr + mad24(gid, (int)sizeof(uint), pos)) = localmem_maxloc[0];
363 pos = mad24(groupnum, (int)sizeof(uint), pos);
367 *(__global dstT1 *)(dstptr + mad24(gid, (int)sizeof(dstT1), pos)) = localmem_max2[0];