modules/ocl/src/opencl/imgproc_canny.cl

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
  14 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
  15 // Third party copyrights are property of their respective owners.
  16 //
  17 // @Authors
  18 //    Peng Xiao, pengxiao@multicorewareinc.com
  19 //
  20 // Redistribution and use in source and binary forms, with or without modification,
  21 // are permitted provided that the following conditions are met:
  22 //
  23 //   * Redistribution's of source code must retain the above copyright notice,
  24 //     this list of conditions and the following disclaimer.
  25 //
  26 //   * Redistribution's in binary form must reproduce the above copyright notice,
  27 //     this list of conditions and the following disclaimer in the documentation
  28 //     and/or other materials provided with the distribution.
  29 //
  30 //   * The name of the copyright holders may not be used to endorse or promote products
  31 //     derived from this software without specific prior written permission.
  32 //
  33 // This software is provided by the copyright holders and contributors as is and
  34 // any express or implied warranties, including, but not limited to, the implied
  35 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  36 // In no event shall the Intel Corporation or contributors be liable for any direct,
  37 // indirect, incidental, special, exemplary, or consequential damages
  38 // (including, but not limited to, procurement of substitute goods or services;
  39 // loss of use, data, or profits; or business interruption) however caused
  40 // and on any theory of liability, whether in contract, strict liability,
  41 // or tort (including negligence or otherwise) arising in any way out of
  42 // the use of this software, even if advised of the possibility of such damage.
  43 //
  44 //M*/
  45
  46 #pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
  47 #pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
  48
  49 #ifdef L2GRAD
  50 inline float calc(int x, int y)
  51 {
  52     return sqrt((float)(x * x + y * y));
  53 }
  54 #else
  55 inline float calc(int x, int y)
  56 {
  57     return (float)abs(x) + abs(y);
  58 }
  59 #endif //
  60
  61 // Smoothing perpendicular to the derivative direction with a triangle filter
  62 // only support 3x3 Sobel kernel
  63 // h (-1) =  1, h (0) =  2, h (1) =  1
  64 // h'(-1) = -1, h'(0) =  0, h'(1) =  1
  65 // thus sobel 2D operator can be calculated as:
  66 // h'(x, y) = h'(x)h(y) for x direction
  67 //
  68 // src          input 8bit single channel image data
  69 // dx_buf       output dx buffer
  70 // dy_buf       output dy buffer
  71 __kernel
  72 void
  73 __attribute__((reqd_work_group_size(16,16,1)))
  74 calcSobelRowPass
  75 (
  76     __global const uchar * src,
  77     __global int * dx_buf,
  78     __global int * dy_buf,
  79     int rows,
  80     int cols,
  81     int src_step,
  82     int src_offset,
  83     int dx_buf_step,
  84     int dx_buf_offset,
  85     int dy_buf_step,
  86     int dy_buf_offset
  87 )
  88 {
  89     dx_buf_step   /= sizeof(*dx_buf);
  90     dx_buf_offset /= sizeof(*dx_buf);
  91     dy_buf_step   /= sizeof(*dy_buf);
  92     dy_buf_offset /= sizeof(*dy_buf);
  93
  94     int gidx = get_global_id(0);
  95     int gidy = get_global_id(1);
  96
  97     int lidx = get_local_id(0);
  98     int lidy = get_local_id(1);
  99
 100     __local int smem[16][18];
 101
 102     smem[lidy][lidx + 1] =
 103         src[gidx + min(gidy, rows - 1) * src_step + src_offset];
 104     if(lidx == 0)
 105     {
 106         smem[lidy][0]  =
 107             src[max(gidx - 1,  0)        + min(gidy, rows - 1) * src_step + src_offset];
 108         smem[lidy][17] =
 109             src[min(gidx + 16, cols - 1) + min(gidy, rows - 1) * src_step + src_offset];
 110     }
 111     barrier(CLK_LOCAL_MEM_FENCE);
 112
 113     if(gidy < rows && gidx < cols)
 114     {
 115         dx_buf[gidx + gidy * dx_buf_step + dx_buf_offset] =
 116             -smem[lidy][lidx] + smem[lidy][lidx + 2];
 117         dy_buf[gidx + gidy * dy_buf_step + dy_buf_offset] =
 118             smem[lidy][lidx] + 2 * smem[lidy][lidx + 1] + smem[lidy][lidx + 2];
 119     }
 120 }
 121
 122 // calculate the magnitude of the filter pass combining both x and y directions
 123 // This is the buffered version(3x3 sobel)
 124 //
 125 // dx_buf               dx buffer, calculated from calcSobelRowPass
 126 // dy_buf               dy buffer, calculated from calcSobelRowPass
 127 // dx                   direvitive in x direction output
 128 // dy                   direvitive in y direction output
 129 // mag                  magnitude direvitive of xy output
 130 __kernel
 131 void
 132 __attribute__((reqd_work_group_size(16,16,1)))
 133 calcMagnitude_buf
 134 (
 135     __global const int * dx_buf,
 136     __global const int * dy_buf,
 137     __global int * dx,
 138     __global int * dy,
 139     __global float * mag,
 140     int rows,
 141     int cols,
 142     int dx_buf_step,
 143     int dx_buf_offset,
 144     int dy_buf_step,
 145     int dy_buf_offset,
 146     int dx_step,
 147     int dx_offset,
 148     int dy_step,
 149     int dy_offset,
 150     int mag_step,
 151     int mag_offset
 152 )
 153 {
 154     dx_buf_step    /= sizeof(*dx_buf);
 155     dx_buf_offset  /= sizeof(*dx_buf);
 156     dy_buf_step    /= sizeof(*dy_buf);
 157     dy_buf_offset  /= sizeof(*dy_buf);
 158     dx_step    /= sizeof(*dx);
 159     dx_offset  /= sizeof(*dx);
 160     dy_step    /= sizeof(*dy);
 161     dy_offset  /= sizeof(*dy);
 162     mag_step   /= sizeof(*mag);
 163     mag_offset /= sizeof(*mag);
 164
 165     int gidx = get_global_id(0);
 166     int gidy = get_global_id(1);
 167
 168     int lidx = get_local_id(0);
 169     int lidy = get_local_id(1);
 170
 171     __local int sdx[18][16];
 172     __local int sdy[18][16];
 173
 174     sdx[lidy + 1][lidx] =
 175         dx_buf[gidx + min(gidy, rows - 1) * dx_buf_step + dx_buf_offset];
 176     sdy[lidy + 1][lidx] =
 177         dy_buf[gidx + min(gidy, rows - 1) * dy_buf_step + dy_buf_offset];
 178     if(lidy == 0)
 179     {
 180         sdx[0][lidx]  =
 181             dx_buf[gidx + min(max(gidy-1,0),rows-1) * dx_buf_step + dx_buf_offset];
 182         sdx[17][lidx] =
 183             dx_buf[gidx + min(gidy + 16, rows - 1)  * dx_buf_step + dx_buf_offset];
 184
 185         sdy[0][lidx]  =
 186             dy_buf[gidx + min(max(gidy-1,0),rows-1) * dy_buf_step + dy_buf_offset];
 187         sdy[17][lidx] =
 188             dy_buf[gidx + min(gidy + 16, rows - 1)  * dy_buf_step + dy_buf_offset];
 189     }
 190     barrier(CLK_LOCAL_MEM_FENCE);
 191
 192     if(gidx < cols && gidy < rows)
 193     {
 194         int x =  sdx[lidy][lidx] + 2 * sdx[lidy + 1][lidx] + sdx[lidy + 2][lidx];
 195         int y = -sdy[lidy][lidx] + sdy[lidy + 2][lidx];
 196
 197         dx[gidx + gidy * dx_step + dx_offset] = x;
 198         dy[gidx + gidy * dy_step + dy_offset] = y;
 199
 200         mag[(gidx + 1) + (gidy + 1) * mag_step + mag_offset] = calc(x, y);
 201     }
 202 }
 203
 204 // calculate the magnitude of the filter pass combining both x and y directions
 205 // This is the non-buffered version(non-3x3 sobel)
 206 //
 207 // dx_buf               dx buffer, calculated from calcSobelRowPass
 208 // dy_buf               dy buffer, calculated from calcSobelRowPass
 209 // dx                   direvitive in x direction output
 210 // dy                   direvitive in y direction output
 211 // mag                  magnitude direvitive of xy output
 212 __kernel
 213 void calcMagnitude
 214 (
 215     __global const int * dx,
 216     __global const int * dy,
 217     __global float * mag,
 218     int rows,
 219     int cols,
 220     int dx_step,
 221     int dx_offset,
 222     int dy_step,
 223     int dy_offset,
 224     int mag_step,
 225     int mag_offset
 226 )
 227 {
 228     dx_step    /= sizeof(*dx);
 229     dx_offset  /= sizeof(*dx);
 230     dy_step    /= sizeof(*dy);
 231     dy_offset  /= sizeof(*dy);
 232     mag_step   /= sizeof(*mag);
 233     mag_offset /= sizeof(*mag);
 234
 235     int gidx = get_global_id(0);
 236     int gidy = get_global_id(1);
 237
 238     if(gidy < rows && gidx < cols)
 239     {
 240         mag[(gidx + 1) + (gidy + 1) * mag_step + mag_offset] =
 241             calc(
 242                 dx[gidx + gidy * dx_step + dx_offset],
 243                 dy[gidx + gidy * dy_step + dy_offset]
 244             );
 245     }
 246 }
 247
 248 //////////////////////////////////////////////////////////////////////////////////////////
 249 // 0.4142135623730950488016887242097 is tan(22.5)
 250 #define CANNY_SHIFT 15
 251 #define TG22        (int)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5)
 252
 253 //First pass of edge detection and non-maximum suppression
 254 // edgetype is set to for each pixel:
 255 // 0 - below low thres, not an edge
 256 // 1 - maybe an edge
 257 // 2 - is an edge, either magnitude is greater than high thres, or
 258 //     Given estimates of the image gradients, a search is then carried out
 259 //     to determine if the gradient magnitude assumes a local maximum in the gradient direction.
 260 //     if the rounded gradient angle is zero degrees (i.e. the edge is in the north-south direction) the point will be considered to be on the edge if its gradient magnitude is greater than the magnitudes in the west and east directions,
 261 //     if the rounded gradient angle is 90 degrees (i.e. the edge is in the east-west direction) the point will be considered to be on the edge if its gradient magnitude is greater than the magnitudes in the north and south directions,
 262 //     if the rounded gradient angle is 135 degrees (i.e. the edge is in the north east-south west direction) the point will be considered to be on the edge if its gradient magnitude is greater than the magnitudes in the north west and south east directions,
 263 //     if the rounded gradient angle is 45 degrees (i.e. the edge is in the north west-south east direction)the point will be considered to be on the edge if its gradient magnitude is greater than the magnitudes in the north east and south west directions.
 264 //
 265 // dx, dy               direvitives of x and y direction
 266 // mag                  magnitudes calculated from calcMagnitude function
 267 // map                  output containing raw edge types
 268 __kernel
 269 void
 270 __attribute__((reqd_work_group_size(16,16,1)))
 271 calcMap
 272 (
 273     __global const int * dx,
 274     __global const int * dy,
 275     __global const float * mag,
 276     __global int * map,
 277     int rows,
 278     int cols,
 279     float low_thresh,
 280     float high_thresh,
 281     int dx_step,
 282     int dx_offset,
 283     int dy_step,
 284     int dy_offset,
 285     int mag_step,
 286     int mag_offset,
 287     int map_step,
 288     int map_offset
 289 )
 290 {
 291     dx_step    /= sizeof(*dx);
 292     dx_offset  /= sizeof(*dx);
 293     dy_step    /= sizeof(*dy);
 294     dy_offset  /= sizeof(*dy);
 295     mag_step   /= sizeof(*mag);
 296     mag_offset /= sizeof(*mag);
 297     map_step   /= sizeof(*map);
 298     map_offset /= sizeof(*map);
 299
 300     mag += mag_offset;
 301     map += map_offset;
 302
 303     __local float smem[18][18];
 304
 305     int gidx = get_global_id(0);
 306     int gidy = get_global_id(1);
 307
 308     int lidx = get_local_id(0);
 309     int lidy = get_local_id(1);
 310
 311     int grp_idx = get_global_id(0) & 0xFFFFF0;
 312     int grp_idy = get_global_id(1) & 0xFFFFF0;
 313
 314     int tid = lidx + lidy * 16;
 315     int lx = tid % 18;
 316     int ly = tid / 18;
 317     if(ly < 14)
 318     {
 319         smem[ly][lx] =
 320             mag[grp_idx + lx + min(grp_idy + ly, rows - 1) * mag_step];
 321     }
 322     if(ly < 4 && grp_idy + ly + 14 <= rows && grp_idx + lx <= cols)
 323     {
 324         smem[ly + 14][lx] =
 325             mag[grp_idx + lx + min(grp_idy + ly + 14, rows -1) * mag_step];
 326     }
 327
 328     barrier(CLK_LOCAL_MEM_FENCE);
 329
 330     if(gidy < rows && gidx < cols)
 331     {
 332         int x = dx[gidx + gidy * dx_step];
 333         int y = dy[gidx + gidy * dy_step];
 334         const int s = (x ^ y) < 0 ? -1 : 1;
 335         const float m = smem[lidy + 1][lidx + 1];
 336         x = abs(x);
 337         y = abs(y);
 338
 339         // 0 - the pixel can not belong to an edge
 340         // 1 - the pixel might belong to an edge
 341         // 2 - the pixel does belong to an edge
 342         int edge_type = 0;
 343         if(m > low_thresh)
 344         {
 345             const int tg22x = x * TG22;
 346             const int tg67x = tg22x + (x << (1 + CANNY_SHIFT));
 347             y <<= CANNY_SHIFT;
 348             if(y < tg22x)
 349             {
 350                 if(m > smem[lidy + 1][lidx] && m >= smem[lidy + 1][lidx + 2])
 351                 {
 352                     edge_type = 1 + (int)(m > high_thresh);
 353                 }
 354             }
 355             else if (y > tg67x)
 356             {
 357                 if(m > smem[lidy][lidx + 1]&& m >= smem[lidy + 2][lidx + 1])
 358                 {
 359                     edge_type = 1 + (int)(m > high_thresh);
 360                 }
 361             }
 362             else
 363             {
 364                 if(m > smem[lidy][lidx + 1 - s]&& m > smem[lidy + 2][lidx + 1 + s])
 365                 {
 366                     edge_type = 1 + (int)(m > high_thresh);
 367                 }
 368             }
 369         }
 370         map[gidx + 1 + (gidy + 1) * map_step] = edge_type;
 371     }
 372 }
 373
 374 #undef CANNY_SHIFT
 375 #undef TG22
 376
 377 //////////////////////////////////////////////////////////////////////////////////////////
 378 // do Hysteresis for pixel whose edge type is 1
 379 //
 380 // If candidate pixel (edge type is 1) has a neighbour pixel (in 3x3 area) with type 2, it is believed to be part of an edge and
 381 // marked as edge. Each thread will iterate for 16 times to connect local edges.
 382 // Candidate pixel being identified as edge will then be tested if there is nearby potiential edge points. If there is, counter will
 383 // be incremented by 1 and the point location is stored. These potiential candidates will be processed further in next kernel.
 384 //
 385 // map          raw edge type results calculated from calcMap.
 386 // st           the potiential edge points found in this kernel call
 387 // counter      the number of potiential edge points
 388 __kernel
 389 void
 390 __attribute__((reqd_work_group_size(16,16,1)))
 391 edgesHysteresisLocal
 392 (
 393     __global int * map,
 394     __global ushort2 * st,
 395     __global unsigned int * counter,
 396     int rows,
 397     int cols,
 398     int map_step,
 399     int map_offset
 400 )
 401 {
 402     map_step   /= sizeof(*map);
 403     map_offset /= sizeof(*map);
 404
 405     map += map_offset;
 406
 407     __local int smem[18][18];
 408
 409     int gidx = get_global_id(0);
 410     int gidy = get_global_id(1);
 411
 412     int lidx = get_local_id(0);
 413     int lidy = get_local_id(1);
 414
 415     int grp_idx = get_global_id(0) & 0xFFFFF0;
 416     int grp_idy = get_global_id(1) & 0xFFFFF0;
 417
 418     int tid = lidx + lidy * 16;
 419     int lx = tid % 18;
 420     int ly = tid / 18;
 421     if(ly < 14)
 422     {
 423         smem[ly][lx] =
 424             map[grp_idx + lx + min(grp_idy + ly, rows - 1) * map_step];
 425     }
 426     if(ly < 4 && grp_idy + ly + 14 <= rows && grp_idx + lx <= cols)
 427     {
 428         smem[ly + 14][lx] =
 429             map[grp_idx + lx + min(grp_idy + ly + 14, rows - 1) * map_step];
 430     }
 431
 432     barrier(CLK_LOCAL_MEM_FENCE);
 433
 434     if(gidy < rows && gidx < cols)
 435     {
 436         int n;
 437
 438         #pragma unroll
 439         for (int k = 0; k < 16; ++k)
 440         {
 441             n = 0;
 442
 443             if (smem[lidy + 1][lidx + 1] == 1)
 444             {
 445                 n += smem[lidy    ][lidx    ] == 2;
 446                 n += smem[lidy    ][lidx + 1] == 2;
 447                 n += smem[lidy    ][lidx + 2] == 2;
 448
 449                 n += smem[lidy + 1][lidx    ] == 2;
 450                 n += smem[lidy + 1][lidx + 2] == 2;
 451
 452                 n += smem[lidy + 2][lidx    ] == 2;
 453                 n += smem[lidy + 2][lidx + 1] == 2;
 454                 n += smem[lidy + 2][lidx + 2] == 2;
 455             }
 456
 457             if (n > 0)
 458                 smem[lidy + 1][lidx + 1] = 2;
 459         }
 460
 461         const int e = smem[lidy + 1][lidx + 1];
 462         map[gidx + 1 + (gidy + 1) * map_step] = e;
 463
 464         n = 0;
 465         if(e == 2)
 466         {
 467             n += smem[lidy    ][lidx    ] == 1;
 468             n += smem[lidy    ][lidx + 1] == 1;
 469             n += smem[lidy    ][lidx + 2] == 1;
 470
 471             n += smem[lidy + 1][lidx    ] == 1;
 472             n += smem[lidy + 1][lidx + 2] == 1;
 473
 474             n += smem[lidy + 2][lidx    ] == 1;
 475             n += smem[lidy + 2][lidx + 1] == 1;
 476             n += smem[lidy + 2][lidx + 2] == 1;
 477         }
 478
 479         if(n > 0)
 480         {
 481             unsigned int ind = atomic_inc(counter);
 482             st[ind] = (ushort2)(gidx + 1, gidy + 1);
 483         }
 484     }
 485 }
 486
 487 __constant int c_dx[8] = {-1,  0,  1, -1, 1, -1, 0, 1};
 488 __constant int c_dy[8] = {-1, -1, -1,  0, 0,  1, 1, 1};
 489
 490
 491 #define stack_size 512
 492 __kernel
 493 void
 494 __attribute__((reqd_work_group_size(128,1,1)))
 495 edgesHysteresisGlobal
 496 (
 497     __global int * map,
 498     __global ushort2 * st1,
 499     __global ushort2 * st2,
 500     __global int * counter,
 501     int rows,
 502     int cols,
 503     int count,
 504     int map_step,
 505     int map_offset
 506 )
 507 {
 508
 509     map_step   /= sizeof(*map);
 510     map_offset /= sizeof(*map);
 511
 512     map += map_offset;
 513
 514     int gidx = get_global_id(0);
 515     int gidy = get_global_id(1);
 516
 517     int lidx = get_local_id(0);
 518     int lidy = get_local_id(1);
 519
 520     int grp_idx = get_group_id(0);
 521     int grp_idy = get_group_id(1);
 522
 523     __local unsigned int s_counter;
 524     __local unsigned int s_ind;
 525
 526     __local ushort2 s_st[stack_size];
 527
 528     if(lidx == 0)
 529     {
 530         s_counter = 0;
 531     }
 532     barrier(CLK_LOCAL_MEM_FENCE);
 533
 534     int ind = mad24(grp_idy, (int)get_local_size(0), grp_idx);
 535
 536     if(ind < count)
 537     {
 538         ushort2 pos = st1[ind];
 539         if (pos.x > 0 && pos.x <= cols && pos.y > 0 && pos.y <= rows)
 540         {
 541             if (lidx < 8)
 542             {
 543                 pos.x += c_dx[lidx];
 544                 pos.y += c_dy[lidx];
 545
 546                 if (map[pos.x + pos.y * map_step] == 1)
 547                 {
 548                     map[pos.x + pos.y * map_step] = 2;
 549
 550                     ind = atomic_inc(&s_counter);
 551
 552                     s_st[ind] = pos;
 553                 }
 554             }
 555             barrier(CLK_LOCAL_MEM_FENCE);
 556
 557             while (s_counter > 0 && s_counter <= stack_size - get_local_size(0))
 558             {
 559                 const int subTaskIdx = lidx >> 3;
 560                 const int portion = min(s_counter, (uint)(get_local_size(0)>> 3));
 561
 562                 pos.x = pos.y = 0;
 563
 564                 if (subTaskIdx < portion)
 565                     pos = s_st[s_counter - 1 - subTaskIdx];
 566                 barrier(CLK_LOCAL_MEM_FENCE);
 567
 568                 if (lidx == 0)
 569                     s_counter -= portion;
 570                 barrier(CLK_LOCAL_MEM_FENCE);
 571
 572                 if (pos.x > 0 && pos.x <= cols && pos.y > 0 && pos.y <= rows)
 573                 {
 574                     pos.x += c_dx[lidx & 7];
 575                     pos.y += c_dy[lidx & 7];
 576
 577                     if (map[pos.x + pos.y * map_step] == 1)
 578                     {
 579                         map[pos.x + pos.y * map_step] = 2;
 580
 581                         ind = atomic_inc(&s_counter);
 582
 583                         s_st[ind] = pos;
 584                     }
 585                 }
 586                 barrier(CLK_LOCAL_MEM_FENCE);
 587             }
 588
 589             if (s_counter > 0)
 590             {
 591                 if (lidx == 0)
 592                 {
 593                     ind = atomic_add(counter, s_counter);
 594                     s_ind = ind - s_counter;
 595                 }
 596                 barrier(CLK_LOCAL_MEM_FENCE);
 597
 598                 ind = s_ind;
 599
 600                 for (int i = lidx; i < s_counter; i += get_local_size(0))
 601                 {
 602                     st2[ind + i] = s_st[i];
 603                 }
 604             }
 605         }
 606     }
 607 }
 608 #undef stack_size
 609
 610 //Get the edge result. egde type of value 2 will be marked as an edge point and set to 255. Otherwise 0.
 611 // map          edge type mappings
 612 // dst          edge output
 613 __kernel
 614 void getEdges
 615 (
 616     __global const int * map,
 617     __global uchar * dst,
 618     int rows,
 619     int cols,
 620     int map_step,
 621     int map_offset,
 622     int dst_step,
 623     int dst_offset
 624 )
 625 {
 626     map_step   /= sizeof(*map);
 627     map_offset /= sizeof(*map);
 628
 629     int gidx = get_global_id(0);
 630     int gidy = get_global_id(1);
 631
 632     if(gidy < rows && gidx < cols)
 633     {
 634         dst[gidx + gidy * dst_step] = (uchar)(-(map[gidx + 1 + (gidy + 1) * map_step + map_offset] >> 1));
 635     }
 636 }