src/core/CL/cl_kernels/sobel_filter.cl

   1 /*
   2  * Copyright (c) 2016, 2017 ARM Limited.
   3  *
   4  * SPDX-License-Identifier: MIT
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a copy
   7  * of this software and associated documentation files (the "Software"), to
   8  * deal in the Software without restriction, including without limitation the
   9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  10  * sell copies of the Software, and to permit persons to whom the Software is
  11  * furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included in all
  14  * copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22  * SOFTWARE.
  23  */
  24 #include "helpers.h"
  25
  26 /***********************************************/
  27 /*   Begin implementation of Sobel3x3 filter   */
  28 /***********************************************/
  29
  30 /** This OpenCL kernel that computes a Sobel3x3 filter.
  31  *
  32  * @attention To enable computation of the X gradient -DGRAD_X must be passed at compile time, while computation of the Y gradient
  33  * is performed when -DGRAD_Y is used. You can use both when computation of both gradients is required.
  34  *
  35  * @param[in]  src_ptr                              Pointer to the source image. Supported data types: U8
  36  * @param[in]  src_stride_x                         Stride of the source image in X dimension (in bytes)
  37  * @param[in]  src_step_x                           src_stride_x * number of elements along X processed per workitem(in bytes)
  38  * @param[in]  src_stride_y                         Stride of the source image in Y dimension (in bytes)
  39  * @param[in]  src_step_y                           src_stride_y * number of elements along Y processed per workitem(in bytes)
  40  * @param[in]  src_offset_first_element_in_bytes    The offset of the first element in the source image
  41  * @param[out] dst_gx_ptr                           Pointer to the destination image. Supported data types: S16
  42  * @param[in]  dst_gx_stride_x                      Stride of the destination image in X dimension (in bytes)
  43  * @param[in]  dst_gx_step_x                        dst_gx_stride_x * number of elements along X processed per workitem(in bytes)
  44  * @param[in]  dst_gx_stride_y                      Stride of the destination image in Y dimension (in bytes)
  45  * @param[in]  dst_gx_step_y                        dst_gx_stride_y * number of elements along Y processed per workitem(in bytes)
  46  * @param[in]  dst_gx_offset_first_element_in_bytes The offset of the first element in the destination image
  47  * @param[out] dst_gy_ptr                           Pointer to the destination image. Supported data types: S16
  48  * @param[in]  dst_gy_stride_x                      Stride of the destination image in X dimension (in bytes)
  49  * @param[in]  dst_gy_step_x                        dst_gy_stride_x * number of elements along X processed per workitem(in bytes)
  50  * @param[in]  dst_gy_stride_y                      Stride of the destination image in Y dimension (in bytes)
  51  * @param[in]  dst_gy_step_y                        dst_gy_stride_y * number of elements along Y processed per workitem(in bytes)
  52  * @param[in]  dst_gy_offset_first_element_in_bytes The offset of the first element in the destination image
  53  */
  54 __kernel void sobel3x3(
  55     IMAGE_DECLARATION(src)
  56 #ifdef GRAD_X
  57     ,
  58     IMAGE_DECLARATION(dst_gx)
  59 #endif /* GRAD_X */
  60 #ifdef GRAD_Y
  61     ,
  62     IMAGE_DECLARATION(dst_gy)
  63 #endif /* GRAD_Y */
  64 )
  65 {
  66     Image src = CONVERT_TO_IMAGE_STRUCT(src);
  67 #ifdef GRAD_X
  68     Image dst_gx = CONVERT_TO_IMAGE_STRUCT(dst_gx);
  69 #endif /* GRAD_X */
  70 #ifdef GRAD_Y
  71     Image dst_gy = CONVERT_TO_IMAGE_STRUCT(dst_gy);
  72 #endif /* GRAD_Y */
  73
  74     // Output pixels
  75 #ifdef GRAD_X
  76     short8 gx = (short8)0;
  77 #endif /* GRAD_X */
  78 #ifdef GRAD_Y
  79     short8 gy = (short8)0;
  80 #endif /* GRAD_Y */
  81
  82     // Row0
  83     uchar16 temp   = vload16(0, offset(&src, -1, -1));
  84     short8  left   = convert_short8(temp.s01234567);
  85     short8  middle = convert_short8(temp.s12345678);
  86     short8  right  = convert_short8(temp.s23456789);
  87 #ifdef GRAD_X
  88     gx += left * (short8)(-1);
  89     gx += right * (short8)(+1);
  90 #endif /* GRAD_X */
  91 #ifdef GRAD_Y
  92     gy += left * (short8)(-1);
  93     gy += middle * (short8)(-2);
  94     gy += right * (short8)(-1);
  95 #endif /* GRAD_Y */
  96
  97     // Row1
  98     temp  = vload16(0, offset(&src, -1, 0));
  99     left  = convert_short8(temp.s01234567);
 100     right = convert_short8(temp.s23456789);
 101 #ifdef GRAD_X
 102     gx += left * (short8)(-2);
 103     gx += right * (short8)(+2);
 104 #endif /* GRAD_X */
 105
 106     // Row2
 107     temp   = vload16(0, offset(&src, -1, 1));
 108     left   = convert_short8(temp.s01234567);
 109     middle = convert_short8(temp.s12345678);
 110     right  = convert_short8(temp.s23456789);
 111 #ifdef GRAD_X
 112     gx += left * (short8)(-1);
 113     gx += right * (short8)(+1);
 114 #endif /* GRAD_X */
 115 #ifdef GRAD_Y
 116     gy += left * (short8)(+1);
 117     gy += middle * (short8)(+2);
 118     gy += right * (short8)(+1);
 119 #endif /* GRAD_Y */
 120
 121     // Store results
 122 #ifdef GRAD_X
 123     vstore8(gx, 0, ((__global short *)dst_gx.ptr));
 124 #endif /* GRAD_X */
 125 #ifdef GRAD_Y
 126     vstore8(gy, 0, ((__global short *)dst_gy.ptr));
 127 #endif /* GRAD_Y */
 128 }
 129
 130 /**********************************************/
 131 /*    End implementation of Sobel3x3 filter   */
 132 /**********************************************/
 133
 134 /***********************************************/
 135 /*   Begin implementation of Sobel5x5 filter   */
 136 /***********************************************/
 137
 138 /** Compute a 1D horizontal sobel filter 1x5 for 8 bytes assuming the input is made of 1 channel of 1 byte (i.e 8 pixels).
 139  *
 140  * @param[in] src             Pointer to source image.
 141  * @param[in] left1_coeff_gx  Weight of the most left pixel for gx
 142  * @param[in] left2_coeff_gx  Weight of the left pixel for gx
 143  * @param[in] middle_coeff_gx Weight of the middle pixel for gx
 144  * @param[in] right1_coeff_gx Weight of the right pixel for gx
 145  * @param[in] right2_coeff_gx Weight of the most right pixel for gx
 146  * @param[in] left1_coeff_gy  Weight of the most left pixel for gy
 147  * @param[in] left2_coeff_gy  Weight of the left pixel for gy
 148  * @param[in] middle_coeff_gy Weight of the middle pixel for gy
 149  * @param[in] right1_coeff_gy Weight of the right pixel for gy
 150  * @param[in] right2_coeff_gy Weight of the most right pixel for gy
 151  *
 152  * @return a short16 containing short8 gx and short8 gy values.
 153  */
 154 short16 sobel1x5(
 155     Image      *src,
 156     const short left1_coeff_gx,
 157     const short left2_coeff_gx,
 158     const short middle_coeff_gx,
 159     const short right1_coeff_gx,
 160     const short right2_coeff_gx,
 161     const short left1_coeff_gy,
 162     const short left2_coeff_gy,
 163     const short middle_coeff_gy,
 164     const short right1_coeff_gy,
 165     const short right2_coeff_gy)
 166 {
 167     uchar16 temp = vload16(0, offset(src, -2, 0));
 168     short8  gx   = 0;
 169     short8  gy   = 0;
 170     short8  val;
 171
 172     val = convert_short8(temp.s01234567);
 173     gx += val * (short8)left1_coeff_gx;
 174     gy += val * (short8)left1_coeff_gy;
 175
 176     val = convert_short8(temp.s12345678);
 177     gx += val * (short8)left2_coeff_gx;
 178     gy += val * (short8)left2_coeff_gy;
 179
 180     val = convert_short8(temp.s23456789);
 181     gx += val * (short8)middle_coeff_gx;
 182     gy += val * (short8)middle_coeff_gy;
 183
 184     val = convert_short8(temp.s3456789a);
 185     gx += val * (short8)right1_coeff_gx;
 186     gy += val * (short8)right1_coeff_gy;
 187
 188     val = convert_short8(temp.s456789ab);
 189     gx += val * (short8)right2_coeff_gx;
 190     gy += val * (short8)right2_coeff_gy;
 191
 192     return (short16)(gx, gy);
 193 }
 194
 195 /** Compute a 1D vertical sobel filter 5x1 for 8 bytes assuming the input is made of 1 channel of 1 byte (i.e 8 pixels).
 196  *
 197  * @param[in] src          Pointer to source image.
 198  * @param[in] up1_coeff    Weight of the most up pixel
 199  * @param[in] up2_coeff    Weight of the up pixel
 200  * @param[in] middle_coeff Weight of the middle pixel
 201  * @param[in] down1_coeff  Weight of the down pixel
 202  * @param[in] down2_coeff  Weight of the most down pixel
 203  *
 204  * @return a short8 containing 8 convoluted values.
 205  */
 206 short8 sobel5x1(
 207     Image      *src,
 208     const short up1_coeff,
 209     const short up2_coeff,
 210     const short middle_coeff,
 211     const short down1_coeff,
 212     const short down2_coeff)
 213 {
 214     short8 val;
 215     short8 out = (short8)0;
 216
 217     val = vload8(0, (__global short *)offset(src, 0, -2));
 218     out += val * (short8)up1_coeff;
 219
 220     val = vload8(0, (__global short *)offset(src, 0, -1));
 221     out += val * (short8)up2_coeff;
 222
 223     val = vload8(0, (__global short *)offset(src, 0, 0));
 224     out += val * (short8)middle_coeff;
 225
 226     val = vload8(0, (__global short *)offset(src, 0, 1));
 227     out += val * (short8)down1_coeff;
 228
 229     val = vload8(0, (__global short *)offset(src, 0, 2));
 230     out += val * (short8)down2_coeff;
 231
 232     return (short8)(out);
 233 }
 234
 235 /** Apply a 1x5 sobel matrix to a single channel U8 input image and output two temporary channel S16 images.
 236  *
 237  * @attention To enable computation of the X gradient -DGRAD_X must be passed at compile time, while computation of the Y gradient
 238  * is performed when -DGRAD_Y is used. You can use both when computation of both gradients is required.
 239  *
 240  * @param[in]  src_ptr                              Pointer to the source image.. Supported data types: U8
 241  * @param[in]  src_stride_x                         Stride of the source image in X dimension (in bytes)
 242  * @param[in]  src_step_x                           src_stride_x * number of elements along X processed per workitem(in bytes)
 243  * @param[in]  src_stride_y                         Stride of the source image in Y dimension (in bytes)
 244  * @param[in]  src_step_y                           src_stride_y * number of elements along Y processed per workitem(in bytes)
 245  * @param[in]  src_offset_first_element_in_bytes    The offset of the first element in the source image
 246  * @param[out] dst_gx_ptr                           Pointer to the destination image.. Supported data types: S16
 247  * @param[in]  dst_gx_stride_x                      Stride of the destination image in X dimension (in bytes)
 248  * @param[in]  dst_gx_step_x                        dst_gx_stride_x * number of elements along X processed per workitem(in bytes)
 249  * @param[in]  dst_gx_stride_y                      Stride of the destination image in Y dimension (in bytes)
 250  * @param[in]  dst_gx_step_y                        dst_gx_stride_y * number of elements along Y processed per workitem(in bytes)
 251  * @param[in]  dst_gx_offset_first_element_in_bytes The offset of the first element in the destination image
 252  * @param[out] dst_gy_ptr                           Pointer to the destination image. Supported data types: S16
 253  * @param[in]  dst_gy_stride_x                      Stride of the destination image in X dimension (in bytes)
 254  * @param[in]  dst_gy_step_x                        dst_gy_stride_x * number of elements along X processed per workitem(in bytes)
 255  * @param[in]  dst_gy_stride_y                      Stride of the destination image in Y dimension (in bytes)
 256  * @param[in]  dst_gy_step_y                        dst_gy_stride_y * number of elements along Y processed per workitem(in bytes)
 257  * @param[in]  dst_gy_offset_first_element_in_bytes The offset of the first element in the destination image
 258  */
 259 __kernel void sobel_separable1x5(
 260     IMAGE_DECLARATION(src)
 261 #ifdef GRAD_X
 262     ,
 263     IMAGE_DECLARATION(dst_gx)
 264 #endif /* GRAD_X */
 265 #ifdef GRAD_Y
 266     ,
 267     IMAGE_DECLARATION(dst_gy)
 268 #endif /* GRAD_Y */
 269 )
 270 {
 271     Image src = CONVERT_TO_IMAGE_STRUCT(src);
 272 #ifdef GRAD_X
 273     Image dst_gx = CONVERT_TO_IMAGE_STRUCT(dst_gx);
 274 #endif /* GRAD_X */
 275 #ifdef GRAD_Y
 276     Image dst_gy = CONVERT_TO_IMAGE_STRUCT(dst_gy);
 277 #endif /* GRAD_Y */
 278
 279     // Output pixels
 280     short16 gx_gy = sobel1x5(&src,
 281                              -1, -2, 0, 2, 1,
 282                              1, 4, 6, 4, 1);
 283
 284     // Store result in dst
 285 #ifdef GRAD_X
 286     vstore8(gx_gy.s01234567, 0, ((__global short *)dst_gx.ptr));
 287 #endif /* GRAD_X */
 288 #ifdef GRAD_Y
 289     vstore8(gx_gy.s89ABCDEF, 0, ((__global short *)dst_gy.ptr));
 290 #endif /* GRAD_Y */
 291 }
 292
 293 /** Apply a 5x1 convolution matrix to two single channel S16 input temporary images
 294  *  and output two single channel S16 images.
 295  *
 296  * @attention To enable computation of the X gradient -DGRAD_X must be passed at compile time, while computation of the Y gradient
 297  * is performed when -DGRAD_Y is used. You can use both when computation of both gradients is required.
 298  *
 299  * @param[in]  src_x_ptr                            Pointer to the source image.. Supported data types: S16
 300  * @param[in]  src_x_stride_x                       Stride of the source image in X dimension (in bytes)
 301  * @param[in]  src_x_step_x                         src_x_stride_x * number of elements along X processed per workitem(in bytes)
 302  * @param[in]  src_x_stride_y                       Stride of the source image in Y dimension (in bytes)
 303  * @param[in]  src_x_step_y                         src_x_stride_y * number of elements along Y processed per workitem(in bytes)
 304  * @param[in]  src_x_offset_first_element_in_bytes  The offset of the first element in the source image
 305  * @param[out] dst_gx_ptr                           Pointer to the destination image. Supported data types: S16
 306  * @param[in]  dst_gx_stride_x                      Stride of the destination image in X dimension (in bytes)
 307  * @param[in]  dst_gx_step_x                        dst_gx_stride_x * number of elements along X processed per workitem(in bytes)
 308  * @param[in]  dst_gx_stride_y                      Stride of the destination image in Y dimension (in bytes)
 309  * @param[in]  dst_gx_step_y                        dst_gx_stride_y * number of elements along Y processed per workitem(in bytes)
 310  * @param[in]  dst_gx_offset_first_element_in_bytes The offset of the first element in the destination image
 311  * @param[in]  src_y_ptr                            Pointer to the source image. Supported data types: S16
 312  * @param[in]  src_y_stride_x                       Stride of the source image in X dimension (in bytes)
 313  * @param[in]  src_y_step_x                         src_y_stride_x * number of elements along X processed per workitem(in bytes)
 314  * @param[in]  src_y_stride_y                       Stride of the source image in Y dimension (in bytes)
 315  * @param[in]  src_y_step_y                         src_y_stride_y * number of elements along Y processed per workitem(in bytes)
 316  * @param[in]  src_y_offset_first_element_in_bytes  The offset of the first element in the source image
 317  * @param[out] dst_gy_ptr                           Pointer to the destination image. Supported data types: S16
 318  * @param[in]  dst_gy_stride_x                      Stride of the destination image in X dimension (in bytes)
 319  * @param[in]  dst_gy_step_x                        dst_gy_stride_x * number of elements along X processed per workitem(in bytes)
 320  * @param[in]  dst_gy_stride_y                      Stride of the destination image in Y dimension (in bytes)
 321  * @param[in]  dst_gy_step_y                        dst_gy_stride_y * number of elements along Y processed per workitem(in bytes)
 322  * @param[in]  dst_gy_offset_first_element_in_bytes The offset of the first element in the destination image
 323  * @param[in]  dummy                                Dummy parameter to easy conditional inclusion
 324  */
 325 __kernel void sobel_separable5x1(
 326 #ifdef GRAD_X
 327     IMAGE_DECLARATION(src_x),
 328     IMAGE_DECLARATION(dst_gx),
 329 #endif /* GRAD_X */
 330 #ifdef GRAD_Y
 331     IMAGE_DECLARATION(src_y),
 332     IMAGE_DECLARATION(dst_gy),
 333 #endif /* GRAD_Y */
 334     int dummy)
 335 {
 336 #ifdef GRAD_X
 337     Image src_x  = CONVERT_TO_IMAGE_STRUCT(src_x);
 338     Image dst_gx = CONVERT_TO_IMAGE_STRUCT(dst_gx);
 339 #endif /* GRAD_X */
 340 #ifdef GRAD_Y
 341     Image src_y  = CONVERT_TO_IMAGE_STRUCT(src_y);
 342     Image dst_gy = CONVERT_TO_IMAGE_STRUCT(dst_gy);
 343 #endif /* GRAD_Y */
 344
 345 #ifdef GRAD_X
 346     short8 gx = sobel5x1(&src_x,
 347                          1, 4, 6, 4, 1);
 348     vstore8(gx, 0, ((__global short *)dst_gx.ptr));
 349 #endif /* GRAD_X */
 350 #ifdef GRAD_Y
 351     short8 gy = sobel5x1(&src_y,
 352                          -1, -2, 0, 2, 1);
 353     vstore8(gy, 0, ((__global short *)dst_gy.ptr));
 354 #endif /* GRAD_Y */
 355 }
 356
 357 /**********************************************/
 358 /*    End implementation of Sobel5x5 filter   */
 359 /**********************************************/
 360
 361 /***********************************************/
 362 /*   Begin implementation of Sobel7x7 filter   */
 363 /***********************************************/
 364
 365 /* Sobel 1x7 horizontal X / 7x1 vertical Y coefficients */
 366 #define X0 -1
 367 #define X1 -4
 368 #define X2 -5
 369 #define X3 0
 370 #define X4 5
 371 #define X5 4
 372 #define X6 1
 373
 374 /* Sobel 1x7 vertical X / 7x1 horizontal Y coefficients */
 375 #define Y0 1
 376 #define Y1 6
 377 #define Y2 15
 378 #define Y3 20
 379 #define Y4 15
 380 #define Y5 6
 381 #define Y6 1
 382
 383 /* Calculates single horizontal iteration. */
 384 #define SOBEL1x1_HOR(src, gx, gy, idx)                               \
 385     {                                                                \
 386         int8 val = convert_int8(vload8(0, offset(src, idx - 3, 0))); \
 387         gx += val * X##idx;                                          \
 388         gy += val * Y##idx;                                          \
 389     }
 390
 391 /* Calculates single vertical iteration. */
 392 #define SOBEL1x1_VERT(src, g, direction, idx)                          \
 393     {                                                                  \
 394         int8 val = vload8(0, (__global int *)offset(src, 0, idx - 3)); \
 395         g += val * (int8)direction##idx;                               \
 396     }
 397
 398 /* Calculates a 1x7 horizontal iteration. */
 399 #define SOBEL1x7(ptr, gx, gy)                        \
 400     SOBEL1x1_HOR(ptr, gx, gy, 0)                     \
 401     SOBEL1x1_HOR(ptr, gx, gy, 1)                 \
 402     SOBEL1x1_HOR(ptr, gx, gy, 2)             \
 403     SOBEL1x1_HOR(ptr, gx, gy, 3)         \
 404     SOBEL1x1_HOR(ptr, gx, gy, 4)     \
 405     SOBEL1x1_HOR(ptr, gx, gy, 5) \
 406     SOBEL1x1_HOR(ptr, gx, gy, 6)
 407
 408 /* Calculates a 7x1 vertical iteration. */
 409 #define SOBEL7x1(ptr, g, direction)                         \
 410     SOBEL1x1_VERT(ptr, g, direction, 0)                     \
 411     SOBEL1x1_VERT(ptr, g, direction, 1)                 \
 412     SOBEL1x1_VERT(ptr, g, direction, 2)             \
 413     SOBEL1x1_VERT(ptr, g, direction, 3)         \
 414     SOBEL1x1_VERT(ptr, g, direction, 4)     \
 415     SOBEL1x1_VERT(ptr, g, direction, 5) \
 416     SOBEL1x1_VERT(ptr, g, direction, 6)
 417
 418 /** Apply a 1x7 sobel matrix to a single channel U8 input image and output two temporary channel S16 images and leave the borders undefined.
 419  *
 420  * @attention To enable computation of the X gradient -DGRAD_X must be passed at compile time, while computation of the Y gradient
 421  * is performed when -DGRAD_Y is used. You can use both when computation of both gradients is required.
 422  *
 423  * @param[in]  src_ptr                              Pointer to the source image. Supported data types: U8
 424  * @param[in]  src_stride_x                         Stride of the source image in X dimension (in bytes)
 425  * @param[in]  src_step_x                           src_stride_x * number of elements along X processed per workitem(in bytes)
 426  * @param[in]  src_stride_y                         Stride of the source image in Y dimension (in bytes)
 427  * @param[in]  src_step_y                           src_stride_y * number of elements along Y processed per workitem(in bytes)
 428  * @param[in]  src_offset_first_element_in_bytes    The offset of the first element in the source image
 429  * @param[out] dst_gx_ptr                           Pointer to the destination image. Supported data types: S32
 430  * @param[in]  dst_gx_stride_x                      Stride of the destination image in X dimension (in bytes)
 431  * @param[in]  dst_gx_step_x                        dst_gx_stride_x * number of elements along X processed per workitem(in bytes)
 432  * @param[in]  dst_gx_stride_y                      Stride of the destination image in Y dimension (in bytes)
 433  * @param[in]  dst_gx_step_y                        dst_gx_stride_y * number of elements along Y processed per workitem(in bytes)
 434  * @param[in]  dst_gx_offset_first_element_in_bytes The offset of the first element in the destination image
 435  * @param[out] dst_gy_ptr                           Pointer to the destination image. Supported data types: S32
 436  * @param[in]  dst_gy_stride_x                      Stride of the destination image in X dimension (in bytes)
 437  * @param[in]  dst_gy_step_x                        dst_gy_stride_x * number of elements along X processed per workitem(in bytes)
 438  * @param[in]  dst_gy_stride_y                      Stride of the destination image in Y dimension (in bytes)
 439  * @param[in]  dst_gy_step_y                        dst_gy_stride_y * number of elements along Y processed per workitem(in bytes)
 440  * @param[in]  dst_gy_offset_first_element_in_bytes The offset of the first element in the destination image
 441  */
 442 __kernel void sobel_separable1x7(
 443     IMAGE_DECLARATION(src)
 444 #ifdef GRAD_X
 445     ,
 446     IMAGE_DECLARATION(dst_gx)
 447 #endif /* GRAD_X */
 448 #ifdef GRAD_Y
 449     ,
 450     IMAGE_DECLARATION(dst_gy)
 451 #endif /* GRAD_Y */
 452 )
 453 {
 454     Image src = CONVERT_TO_IMAGE_STRUCT(src);
 455 #ifdef GRAD_X
 456     Image dst_gx = CONVERT_TO_IMAGE_STRUCT(dst_gx);
 457 #endif /* GRAD_X */
 458 #ifdef GRAD_Y
 459     Image dst_gy = CONVERT_TO_IMAGE_STRUCT(dst_gy);
 460 #endif /* GRAD_Y */
 461     int8 gx = (int8)0;
 462     int8 gy = (int8)0;
 463
 464     SOBEL1x7(&src, gx, gy);
 465
 466     // Store result in dst
 467 #ifdef GRAD_X
 468     vstore8(gx, 0, ((__global int *)dst_gx.ptr));
 469 #endif /* GRAD_X */
 470 #ifdef GRAD_Y
 471     vstore8(gy, 0, ((__global int *)dst_gy.ptr));
 472 #endif /* GRAD_Y */
 473 }
 474
 475 /** Apply a 7x1 convolution matrix to two single channel S16 input temporary images and output two single channel S16 images and leave the borders undefined.
 476  *
 477  * @attention To enable computation of the X gradient -DGRAD_X must be passed at compile time, while computation of the Y gradient
 478  * is performed when -DGRAD_Y is used. You can use both when computation of both gradients is required.
 479  *
 480  * @param[in]  src_x_ptr                            Pointer to the source image. Supported data types: S32
 481  * @param[in]  src_x_stride_x                       Stride of the source image in X dimension (in bytes)
 482  * @param[in]  src_x_step_x                         src_x_stride_x * number of elements along X processed per workitem(in bytes)
 483  * @param[in]  src_x_stride_y                       Stride of the source image in Y dimension (in bytes)
 484  * @param[in]  src_x_step_y                         src_x_stride_y * number of elements along Y processed per workitem(in bytes)
 485  * @param[in]  src_x_offset_first_element_in_bytes  The offset of the first element in the source image
 486  * @param[out] dst_gx_ptr                           Pointer to the destination image. Supported data types: S16
 487  * @param[in]  dst_gx_stride_x                      Stride of the destination image in X dimension (in bytes)
 488  * @param[in]  dst_gx_step_x                        dst_gx_stride_x * number of elements along X processed per workitem(in bytes)
 489  * @param[in]  dst_gx_stride_y                      Stride of the destination image in Y dimension (in bytes)
 490  * @param[in]  dst_gx_step_y                        dst_gx_stride_y * number of elements along Y processed per workitem(in bytes)
 491  * @param[in]  dst_gx_offset_first_element_in_bytes The offset of the first element in the destination image
 492  * @param[in]  src_y_ptr                            Pointer to the source image. Supported data types: S32
 493  * @param[in]  src_y_stride_x                       Stride of the source image in X dimension (in bytes)
 494  * @param[in]  src_y_step_x                         src_y_stride_x * number of elements along X processed per workitem(in bytes)
 495  * @param[in]  src_y_stride_y                       Stride of the source image in Y dimension (in bytes)
 496  * @param[in]  src_y_step_y                         src_y_stride_y * number of elements along Y processed per workitem(in bytes)
 497  * @param[in]  src_y_offset_first_element_in_bytes  The offset of the first element in the source image
 498  * @param[out] dst_gy_ptr                           Pointer to the destination image. Supported data types: S16
 499  * @param[in]  dst_gy_stride_x                      Stride of the destination image in X dimension (in bytes)
 500  * @param[in]  dst_gy_step_x                        dst_gy_stride_x * number of elements along X processed per workitem(in bytes)
 501  * @param[in]  dst_gy_stride_y                      Stride of the destination image in Y dimension (in bytes)
 502  * @param[in]  dst_gy_step_y                        dst_gy_stride_y * number of elements along Y processed per workitem(in bytes)
 503  * @param[in]  dst_gy_offset_first_element_in_bytes The offset of the first element in the destination image
 504  * @param[in]  dummy                                Dummy parameter to easy conditional inclusion
 505  */
 506 __kernel void sobel_separable7x1(
 507 #ifdef GRAD_X
 508     IMAGE_DECLARATION(src_x),
 509     IMAGE_DECLARATION(dst_gx),
 510 #endif /* GRAD_X */
 511 #ifdef GRAD_Y
 512     IMAGE_DECLARATION(src_y),
 513     IMAGE_DECLARATION(dst_gy),
 514 #endif /* GRAD_Y */
 515     int dummy)
 516 {
 517 #ifdef GRAD_X
 518     Image src_x  = CONVERT_TO_IMAGE_STRUCT(src_x);
 519     Image dst_gx = CONVERT_TO_IMAGE_STRUCT(dst_gx);
 520 #endif /* GRAD_X */
 521 #ifdef GRAD_Y
 522     Image src_y  = CONVERT_TO_IMAGE_STRUCT(src_y);
 523     Image dst_gy = CONVERT_TO_IMAGE_STRUCT(dst_gy);
 524 #endif /* GRAD_Y */
 525
 526     // Output pixels
 527 #ifdef GRAD_X
 528     int8 gx = 0;
 529     SOBEL7x1(&src_x, gx, Y);
 530     vstore8(gx, 0, (__global int *)dst_gx.ptr);
 531 #endif /* GRAD_X */
 532 #ifdef GRAD_Y
 533     int8 gy = 0;
 534     SOBEL7x1(&src_y, gy, X);
 535     vstore8(gy, 0, (__global int *)dst_gy.ptr);
 536 #endif /* GRAD_Y */
 537 }
 538
 539 /**********************************************/
 540 /*    End implementation of Sobel7x7 filter   */
 541 /**********************************************/