1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
14 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
18 // Peng Xiao, pengxiao@multicorewareinc.com
20 // Redistribution and use in source and binary forms, with or without modification,
21 // are permitted provided that the following conditions are met:
23 // * Redistribution's of source code must retain the above copyright notice,
24 // this list of conditions and the following disclaimer.
26 // * Redistribution's in binary form must reproduce the above copyright notice,
27 // this list of conditions and the following disclaimer in the documentation
28 // and/or other materials provided with the distribution.
30 // * The name of the copyright holders may not be used to endorse or promote products
31 // derived from this software without specific prior written permission.
33 // This software is provided by the copyright holders and contributors as is and
34 // any express or implied warranties, including, but not limited to, the implied
35 // warranties of merchantability and fitness for a particular purpose are disclaimed.
36 // In no event shall the Intel Corporation or contributors be liable for any direct,
37 // indirect, incidental, special, exemplary, or consequential damages
38 // (including, but not limited to, procurement of substitute goods or services;
39 // loss of use, data, or profits; or business interruption) however caused
40 // and on any theory of liability, whether in contract, strict liability,
41 // or tort (including negligence or otherwise) arising in any way out of
42 // the use of this software, even if advised of the possibility of such damage.
46 #pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
47 #pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
50 inline float calc(int x, int y)
52 return sqrt((float)(x * x + y * y));
55 inline float calc(int x, int y)
57 return (float)abs(x) + abs(y);
61 // Smoothing perpendicular to the derivative direction with a triangle filter
62 // only support 3x3 Sobel kernel
63 // h (-1) = 1, h (0) = 2, h (1) = 1
64 // h'(-1) = -1, h'(0) = 0, h'(1) = 1
65 // thus sobel 2D operator can be calculated as:
66 // h'(x, y) = h'(x)h(y) for x direction
68 // src input 8bit single channel image data
69 // dx_buf output dx buffer
70 // dy_buf output dy buffer
73 __attribute__((reqd_work_group_size(16,16,1)))
76 __global const uchar * src,
77 __global int * dx_buf,
78 __global int * dy_buf,
89 dx_buf_step /= sizeof(*dx_buf);
90 dx_buf_offset /= sizeof(*dx_buf);
91 dy_buf_step /= sizeof(*dy_buf);
92 dy_buf_offset /= sizeof(*dy_buf);
94 int gidx = get_global_id(0);
95 int gidy = get_global_id(1);
97 int lidx = get_local_id(0);
98 int lidy = get_local_id(1);
100 __local int smem[16][18];
102 smem[lidy][lidx + 1] =
103 src[gidx + min(gidy, rows - 1) * src_step + src_offset];
107 src[max(gidx - 1, 0) + min(gidy, rows - 1) * src_step + src_offset];
109 src[min(gidx + 16, cols - 1) + min(gidy, rows - 1) * src_step + src_offset];
111 barrier(CLK_LOCAL_MEM_FENCE);
113 if(gidy < rows && gidx < cols)
115 dx_buf[gidx + gidy * dx_buf_step + dx_buf_offset] =
116 -smem[lidy][lidx] + smem[lidy][lidx + 2];
117 dy_buf[gidx + gidy * dy_buf_step + dy_buf_offset] =
118 smem[lidy][lidx] + 2 * smem[lidy][lidx + 1] + smem[lidy][lidx + 2];
122 // calculate the magnitude of the filter pass combining both x and y directions
123 // This is the buffered version(3x3 sobel)
125 // dx_buf dx buffer, calculated from calcSobelRowPass
126 // dy_buf dy buffer, calculated from calcSobelRowPass
127 // dx direvitive in x direction output
128 // dy direvitive in y direction output
129 // mag magnitude direvitive of xy output
132 __attribute__((reqd_work_group_size(16,16,1)))
135 __global const int * dx_buf,
136 __global const int * dy_buf,
139 __global float * mag,
154 dx_buf_step /= sizeof(*dx_buf);
155 dx_buf_offset /= sizeof(*dx_buf);
156 dy_buf_step /= sizeof(*dy_buf);
157 dy_buf_offset /= sizeof(*dy_buf);
158 dx_step /= sizeof(*dx);
159 dx_offset /= sizeof(*dx);
160 dy_step /= sizeof(*dy);
161 dy_offset /= sizeof(*dy);
162 mag_step /= sizeof(*mag);
163 mag_offset /= sizeof(*mag);
165 int gidx = get_global_id(0);
166 int gidy = get_global_id(1);
168 int lidx = get_local_id(0);
169 int lidy = get_local_id(1);
171 __local int sdx[18][16];
172 __local int sdy[18][16];
174 sdx[lidy + 1][lidx] =
175 dx_buf[gidx + min(gidy, rows - 1) * dx_buf_step + dx_buf_offset];
176 sdy[lidy + 1][lidx] =
177 dy_buf[gidx + min(gidy, rows - 1) * dy_buf_step + dy_buf_offset];
181 dx_buf[gidx + min(max(gidy-1,0),rows-1) * dx_buf_step + dx_buf_offset];
183 dx_buf[gidx + min(gidy + 16, rows - 1) * dx_buf_step + dx_buf_offset];
186 dy_buf[gidx + min(max(gidy-1,0),rows-1) * dy_buf_step + dy_buf_offset];
188 dy_buf[gidx + min(gidy + 16, rows - 1) * dy_buf_step + dy_buf_offset];
190 barrier(CLK_LOCAL_MEM_FENCE);
192 if(gidx < cols && gidy < rows)
194 int x = sdx[lidy][lidx] + 2 * sdx[lidy + 1][lidx] + sdx[lidy + 2][lidx];
195 int y = -sdy[lidy][lidx] + sdy[lidy + 2][lidx];
197 dx[gidx + gidy * dx_step + dx_offset] = x;
198 dy[gidx + gidy * dy_step + dy_offset] = y;
200 mag[(gidx + 1) + (gidy + 1) * mag_step + mag_offset] = calc(x, y);
204 // calculate the magnitude of the filter pass combining both x and y directions
205 // This is the non-buffered version(non-3x3 sobel)
207 // dx_buf dx buffer, calculated from calcSobelRowPass
208 // dy_buf dy buffer, calculated from calcSobelRowPass
209 // dx direvitive in x direction output
210 // dy direvitive in y direction output
211 // mag magnitude direvitive of xy output
215 __global const int * dx,
216 __global const int * dy,
217 __global float * mag,
228 dx_step /= sizeof(*dx);
229 dx_offset /= sizeof(*dx);
230 dy_step /= sizeof(*dy);
231 dy_offset /= sizeof(*dy);
232 mag_step /= sizeof(*mag);
233 mag_offset /= sizeof(*mag);
235 int gidx = get_global_id(0);
236 int gidy = get_global_id(1);
238 if(gidy < rows && gidx < cols)
240 mag[(gidx + 1) + (gidy + 1) * mag_step + mag_offset] =
242 dx[gidx + gidy * dx_step + dx_offset],
243 dy[gidx + gidy * dy_step + dy_offset]
248 //////////////////////////////////////////////////////////////////////////////////////////
249 // 0.4142135623730950488016887242097 is tan(22.5)
250 #define CANNY_SHIFT 15
251 #define TG22 (int)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5)
253 //First pass of edge detection and non-maximum suppression
254 // edgetype is set to for each pixel:
255 // 0 - below low thres, not an edge
257 // 2 - is an edge, either magnitude is greater than high thres, or
258 // Given estimates of the image gradients, a search is then carried out
259 // to determine if the gradient magnitude assumes a local maximum in the gradient direction.
260 // if the rounded gradient angle is zero degrees (i.e. the edge is in the north-south direction) the point will be considered to be on the edge if its gradient magnitude is greater than the magnitudes in the west and east directions,
261 // if the rounded gradient angle is 90 degrees (i.e. the edge is in the east-west direction) the point will be considered to be on the edge if its gradient magnitude is greater than the magnitudes in the north and south directions,
262 // if the rounded gradient angle is 135 degrees (i.e. the edge is in the north east-south west direction) the point will be considered to be on the edge if its gradient magnitude is greater than the magnitudes in the north west and south east directions,
263 // if the rounded gradient angle is 45 degrees (i.e. the edge is in the north west-south east direction)the point will be considered to be on the edge if its gradient magnitude is greater than the magnitudes in the north east and south west directions.
265 // dx, dy direvitives of x and y direction
266 // mag magnitudes calculated from calcMagnitude function
267 // map output containing raw edge types
270 __attribute__((reqd_work_group_size(16,16,1)))
273 __global const int * dx,
274 __global const int * dy,
275 __global const float * mag,
291 dx_step /= sizeof(*dx);
292 dx_offset /= sizeof(*dx);
293 dy_step /= sizeof(*dy);
294 dy_offset /= sizeof(*dy);
295 mag_step /= sizeof(*mag);
296 mag_offset /= sizeof(*mag);
297 map_step /= sizeof(*map);
298 map_offset /= sizeof(*map);
303 __local float smem[18][18];
305 int gidx = get_global_id(0);
306 int gidy = get_global_id(1);
308 int lidx = get_local_id(0);
309 int lidy = get_local_id(1);
311 int grp_idx = get_global_id(0) & 0xFFFFF0;
312 int grp_idy = get_global_id(1) & 0xFFFFF0;
314 int tid = lidx + lidy * 16;
320 mag[grp_idx + lx + min(grp_idy + ly, rows - 1) * mag_step];
322 if(ly < 4 && grp_idy + ly + 14 <= rows && grp_idx + lx <= cols)
325 mag[grp_idx + lx + min(grp_idy + ly + 14, rows -1) * mag_step];
328 barrier(CLK_LOCAL_MEM_FENCE);
330 if(gidy < rows && gidx < cols)
332 int x = dx[gidx + gidy * dx_step];
333 int y = dy[gidx + gidy * dy_step];
334 const int s = (x ^ y) < 0 ? -1 : 1;
335 const float m = smem[lidy + 1][lidx + 1];
339 // 0 - the pixel can not belong to an edge
340 // 1 - the pixel might belong to an edge
341 // 2 - the pixel does belong to an edge
345 const int tg22x = x * TG22;
346 const int tg67x = tg22x + (x << (1 + CANNY_SHIFT));
350 if(m > smem[lidy + 1][lidx] && m >= smem[lidy + 1][lidx + 2])
352 edge_type = 1 + (int)(m > high_thresh);
357 if(m > smem[lidy][lidx + 1]&& m >= smem[lidy + 2][lidx + 1])
359 edge_type = 1 + (int)(m > high_thresh);
364 if(m > smem[lidy][lidx + 1 - s]&& m > smem[lidy + 2][lidx + 1 + s])
366 edge_type = 1 + (int)(m > high_thresh);
370 map[gidx + 1 + (gidy + 1) * map_step] = edge_type;
377 //////////////////////////////////////////////////////////////////////////////////////////
378 // do Hysteresis for pixel whose edge type is 1
380 // If candidate pixel (edge type is 1) has a neighbour pixel (in 3x3 area) with type 2, it is believed to be part of an edge and
381 // marked as edge. Each thread will iterate for 16 times to connect local edges.
382 // Candidate pixel being identified as edge will then be tested if there is nearby potiential edge points. If there is, counter will
383 // be incremented by 1 and the point location is stored. These potiential candidates will be processed further in next kernel.
385 // map raw edge type results calculated from calcMap.
386 // st the potiential edge points found in this kernel call
387 // counter the number of potiential edge points
390 __attribute__((reqd_work_group_size(16,16,1)))
394 __global ushort2 * st,
395 __global unsigned int * counter,
402 map_step /= sizeof(*map);
403 map_offset /= sizeof(*map);
407 __local int smem[18][18];
409 int gidx = get_global_id(0);
410 int gidy = get_global_id(1);
412 int lidx = get_local_id(0);
413 int lidy = get_local_id(1);
415 int grp_idx = get_global_id(0) & 0xFFFFF0;
416 int grp_idy = get_global_id(1) & 0xFFFFF0;
418 int tid = lidx + lidy * 16;
424 map[grp_idx + lx + min(grp_idy + ly, rows - 1) * map_step];
426 if(ly < 4 && grp_idy + ly + 14 <= rows && grp_idx + lx <= cols)
429 map[grp_idx + lx + min(grp_idy + ly + 14, rows - 1) * map_step];
432 barrier(CLK_LOCAL_MEM_FENCE);
434 if(gidy < rows && gidx < cols)
439 for (int k = 0; k < 16; ++k)
443 if (smem[lidy + 1][lidx + 1] == 1)
445 n += smem[lidy ][lidx ] == 2;
446 n += smem[lidy ][lidx + 1] == 2;
447 n += smem[lidy ][lidx + 2] == 2;
449 n += smem[lidy + 1][lidx ] == 2;
450 n += smem[lidy + 1][lidx + 2] == 2;
452 n += smem[lidy + 2][lidx ] == 2;
453 n += smem[lidy + 2][lidx + 1] == 2;
454 n += smem[lidy + 2][lidx + 2] == 2;
458 smem[lidy + 1][lidx + 1] = 2;
461 const int e = smem[lidy + 1][lidx + 1];
462 map[gidx + 1 + (gidy + 1) * map_step] = e;
467 n += smem[lidy ][lidx ] == 1;
468 n += smem[lidy ][lidx + 1] == 1;
469 n += smem[lidy ][lidx + 2] == 1;
471 n += smem[lidy + 1][lidx ] == 1;
472 n += smem[lidy + 1][lidx + 2] == 1;
474 n += smem[lidy + 2][lidx ] == 1;
475 n += smem[lidy + 2][lidx + 1] == 1;
476 n += smem[lidy + 2][lidx + 2] == 1;
481 unsigned int ind = atomic_inc(counter);
482 st[ind] = (ushort2)(gidx + 1, gidy + 1);
487 __constant int c_dx[8] = {-1, 0, 1, -1, 1, -1, 0, 1};
488 __constant int c_dy[8] = {-1, -1, -1, 0, 0, 1, 1, 1};
491 #define stack_size 512
494 __attribute__((reqd_work_group_size(128,1,1)))
495 edgesHysteresisGlobal
498 __global ushort2 * st1,
499 __global ushort2 * st2,
500 __global int * counter,
509 map_step /= sizeof(*map);
510 map_offset /= sizeof(*map);
514 int gidx = get_global_id(0);
515 int gidy = get_global_id(1);
517 int lidx = get_local_id(0);
518 int lidy = get_local_id(1);
520 int grp_idx = get_group_id(0);
521 int grp_idy = get_group_id(1);
523 __local unsigned int s_counter;
524 __local unsigned int s_ind;
526 __local ushort2 s_st[stack_size];
532 barrier(CLK_LOCAL_MEM_FENCE);
534 int ind = mad24(grp_idy, (int)get_local_size(0), grp_idx);
538 ushort2 pos = st1[ind];
539 if (pos.x > 0 && pos.x <= cols && pos.y > 0 && pos.y <= rows)
546 if (map[pos.x + pos.y * map_step] == 1)
548 map[pos.x + pos.y * map_step] = 2;
550 ind = atomic_inc(&s_counter);
555 barrier(CLK_LOCAL_MEM_FENCE);
557 while (s_counter > 0 && s_counter <= stack_size - get_local_size(0))
559 const int subTaskIdx = lidx >> 3;
560 const int portion = min(s_counter, (uint)(get_local_size(0)>> 3));
564 if (subTaskIdx < portion)
565 pos = s_st[s_counter - 1 - subTaskIdx];
566 barrier(CLK_LOCAL_MEM_FENCE);
569 s_counter -= portion;
570 barrier(CLK_LOCAL_MEM_FENCE);
572 if (pos.x > 0 && pos.x <= cols && pos.y > 0 && pos.y <= rows)
574 pos.x += c_dx[lidx & 7];
575 pos.y += c_dy[lidx & 7];
577 if (map[pos.x + pos.y * map_step] == 1)
579 map[pos.x + pos.y * map_step] = 2;
581 ind = atomic_inc(&s_counter);
586 barrier(CLK_LOCAL_MEM_FENCE);
593 ind = atomic_add(counter, s_counter);
594 s_ind = ind - s_counter;
596 barrier(CLK_LOCAL_MEM_FENCE);
600 for (int i = lidx; i < s_counter; i += get_local_size(0))
602 st2[ind + i] = s_st[i];
610 //Get the edge result. egde type of value 2 will be marked as an edge point and set to 255. Otherwise 0.
611 // map edge type mappings
616 __global const int * map,
617 __global uchar * dst,
626 map_step /= sizeof(*map);
627 map_offset /= sizeof(*map);
629 int gidx = get_global_id(0);
630 int gidy = get_global_id(1);
632 if(gidy < rows && gidx < cols)
634 dst[gidx + gidy * dst_step] = (uchar)(-(map[gidx + 1 + (gidy + 1) * map_step + map_offset] >> 1));