src/core/CL/cl_kernels/harris_corners.cl

   1 /*
   2  * Copyright (c) 2016, 2017 ARM Limited.
   3  *
   4  * SPDX-License-Identifier: MIT
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a copy
   7  * of this software and associated documentation files (the "Software"), to
   8  * deal in the Software without restriction, including without limitation the
   9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  10  * sell copies of the Software, and to permit persons to whom the Software is
  11  * furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included in all
  14  * copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22  * SOFTWARE.
  23  */
  24 #include "helpers.h"
  25
  26 /** Function running harris score on 3x3 block size
  27  *
  28  * @attention: The input data type should be passed using a compile option -DDATA_TYPE. Supported types: short and int.
  29  *             e.g. -DDATA_TYPE=short.
  30  *
  31  * @param[in]  src_gx_ptr                           Pointer to the first source image. Supported data types: S16, S32
  32  * @param[in]  src_gx_stride_x                      Stride of the source image in X dimension (in bytes)
  33  * @param[in]  src_gx_step_x                        src_gx_stride_x * number of elements along X processed per workitem(in bytes)
  34  * @param[in]  src_gx_stride_y                      Stride of the source image in Y dimension (in bytes)
  35  * @param[in]  src_gx_step_y                        src_gx_stride_y * number of elements along Y processed per workitem(in bytes)
  36  * @param[in]  src_gx_offset_first_element_in_bytes The offset of the first element in the source image
  37  * @param[in]  src_gy_ptr                           Pointer to the second source image. Supported data types: S16, S32
  38  * @param[in]  src_gy_stride_x                      Stride of the destination image in X dimension (in bytes)
  39  * @param[in]  src_gy_step_x                        src_gy_stride_x * number of elements along X processed per workitem(in bytes)
  40  * @param[in]  src_gy_stride_y                      Stride of the destination image in Y dimension (in bytes)
  41  * @param[in]  src_gy_step_y                        src_gy_stride_y * number of elements along Y processed per workitem(in bytes)
  42  * @param[in]  src_gy_offset_first_element_in_bytes The offset of the first element in the destination image
  43  * @param[out] vc_ptr                               Pointer to the destination image. Supported data types: F32
  44  * @param[in]  vc_stride_x                          Stride of the destination image in X dimension (in bytes)
  45  * @param[in]  vc_step_x                            vc_stride_x * number of elements along X processed per workitem(in bytes)
  46  * @param[in]  vc_stride_y                          Stride of the destination image in Y dimension (in bytes)
  47  * @param[in]  vc_step_y                            vc_stride_y * number of elements along Y processed per workitem(in bytes)
  48  * @param[in]  vc_offset_first_element_in_bytes     The offset of the first element in the destination image
  49  * @param[in]  sensitivity                          Sensitivity threshold k from the Harris-Stephens equation
  50  * @param[in]  strength_thresh                      Minimum threshold with which to eliminate Harris Corner scores
  51  * @param[in]  pow4_normalization_factor            Normalization factor to apply harris score
  52  */
  53 __kernel void harris_score_3x3(
  54     IMAGE_DECLARATION(src_gx),
  55     IMAGE_DECLARATION(src_gy),
  56     IMAGE_DECLARATION(vc),
  57     float sensitivity,
  58     float strength_thresh,
  59     float pow4_normalization_factor)
  60 {
  61     Image src_gx = CONVERT_TO_IMAGE_STRUCT(src_gx);
  62     Image src_gy = CONVERT_TO_IMAGE_STRUCT(src_gy);
  63     Image vc     = CONVERT_TO_IMAGE_STRUCT(vc);
  64
  65     /* Gx^2, Gy^2 and Gx*Gy */
  66     float4 gx2  = (float4)0.0f;
  67     float4 gy2  = (float4)0.0f;
  68     float4 gxgy = (float4)0.0f;
  69
  70     /* Row0 */
  71     VEC_DATA_TYPE(DATA_TYPE, 8)
  72     temp_gx = vload8(0, (__global DATA_TYPE *)offset(&src_gx, -1, -1));
  73     VEC_DATA_TYPE(DATA_TYPE, 8)
  74     temp_gy = vload8(0, (__global DATA_TYPE *)offset(&src_gy, -1, -1));
  75
  76     float4 l_gx = convert_float4(temp_gx.s0123);
  77     float4 m_gx = convert_float4(temp_gx.s1234);
  78     float4 r_gx = convert_float4(temp_gx.s2345);
  79
  80     float4 l_gy = convert_float4(temp_gy.s0123);
  81     float4 m_gy = convert_float4(temp_gy.s1234);
  82     float4 r_gy = convert_float4(temp_gy.s2345);
  83
  84     gx2 += (l_gx * l_gx) + (m_gx * m_gx) + (r_gx * r_gx);
  85     gy2 += (l_gy * l_gy) + (m_gy * m_gy) + (r_gy * r_gy);
  86     gxgy += (l_gx * l_gy) + (m_gx * m_gy) + (r_gx * r_gy);
  87
  88     /* Row1 */
  89     temp_gx = vload8(0, (__global DATA_TYPE *)offset(&src_gx, -1, 0));
  90     temp_gy = vload8(0, (__global DATA_TYPE *)offset(&src_gy, -1, 0));
  91
  92     l_gx = convert_float4(temp_gx.s0123);
  93     m_gx = convert_float4(temp_gx.s1234);
  94     r_gx = convert_float4(temp_gx.s2345);
  95
  96     l_gy = convert_float4(temp_gy.s0123);
  97     m_gy = convert_float4(temp_gy.s1234);
  98     r_gy = convert_float4(temp_gy.s2345);
  99
 100     gx2 += (l_gx * l_gx) + (m_gx * m_gx) + (r_gx * r_gx);
 101     gy2 += (l_gy * l_gy) + (m_gy * m_gy) + (r_gy * r_gy);
 102     gxgy += (l_gx * l_gy) + (m_gx * m_gy) + (r_gx * r_gy);
 103
 104     /* Row2 */
 105     temp_gx = vload8(0, (__global DATA_TYPE *)offset(&src_gx, -1, 1));
 106     temp_gy = vload8(0, (__global DATA_TYPE *)offset(&src_gy, -1, 1));
 107
 108     l_gx = convert_float4(temp_gx.s0123);
 109     m_gx = convert_float4(temp_gx.s1234);
 110     r_gx = convert_float4(temp_gx.s2345);
 111
 112     l_gy = convert_float4(temp_gy.s0123);
 113     m_gy = convert_float4(temp_gy.s1234);
 114     r_gy = convert_float4(temp_gy.s2345);
 115
 116     gx2 += (l_gx * l_gx) + (m_gx * m_gx) + (r_gx * r_gx);
 117     gy2 += (l_gy * l_gy) + (m_gy * m_gy) + (r_gy * r_gy);
 118     gxgy += (l_gx * l_gy) + (m_gx * m_gy) + (r_gx * r_gy);
 119
 120     /* Compute trace and determinant */
 121     float4 trace = gx2 + gy2;
 122     float4 det   = gx2 * gy2 - (gxgy * gxgy);
 123
 124     /* Compute harris score */
 125     float4 mc = (det - (sensitivity * (trace * trace))) * pow4_normalization_factor;
 126
 127     mc = select(0.0f, mc, mc > (float4)strength_thresh);
 128
 129     vstore4(mc, 0, (__global float *)vc.ptr);
 130 }
 131
 132 /** Function for calculating harris score 1x5.
 133  *
 134  * @param[in] src_gx Pointer to gx gradient image.
 135  * @param[in] src_gy Pointer to gy gradient image.
 136  * @param[in] row    Relative row.
 137  */
 138 inline float16 harris_score_1x5(Image *src_gx, Image *src_gy, int row)
 139 {
 140     float4 gx2  = 0.0f;
 141     float4 gy2  = 0.0f;
 142     float4 gxgy = 0.0f;
 143
 144     /* Row */
 145     VEC_DATA_TYPE(DATA_TYPE, 8)
 146     temp_gx = vload8(0, (__global DATA_TYPE *)offset(src_gx, -2, row));
 147     VEC_DATA_TYPE(DATA_TYPE, 8)
 148     temp_gy = vload8(0, (__global DATA_TYPE *)offset(src_gy, -2, row));
 149
 150     float4 gx = convert_float4(temp_gx.s0123);
 151     float4 gy = convert_float4(temp_gy.s0123);
 152     gx2 += (gx * gx);
 153     gy2 += (gy * gy);
 154     gxgy += (gx * gy);
 155
 156     gx = convert_float4(temp_gx.s1234);
 157     gy = convert_float4(temp_gy.s1234);
 158     gx2 += (gx * gx);
 159     gy2 += (gy * gy);
 160     gxgy += (gx * gy);
 161
 162     gx = convert_float4(temp_gx.s2345);
 163     gy = convert_float4(temp_gy.s2345);
 164     gx2 += (gx * gx);
 165     gy2 += (gy * gy);
 166     gxgy += (gx * gy);
 167
 168     gx = convert_float4(temp_gx.s3456);
 169     gy = convert_float4(temp_gy.s3456);
 170     gx2 += (gx * gx);
 171     gy2 += (gy * gy);
 172     gxgy += (gx * gy);
 173
 174     gx = convert_float4(temp_gx.s4567);
 175     gy = convert_float4(temp_gy.s4567);
 176     gx2 += (gx * gx);
 177     gy2 += (gy * gy);
 178     gxgy += (gx * gy);
 179
 180     return (float16)(gx2, gy2, gxgy, (float4)0);
 181 }
 182
 183 /** Function running harris score on 5x5 block size
 184  *
 185  * @attention: The input data type should be passed using a compile option -DDATA_TYPE. Supported types: short and int.
 186  *             e.g. -DDATA_TYPE=short.
 187  *
 188  * @param[in]  src_gx_ptr                           Pointer to the first source image. Supported data types: S16, S32
 189  * @param[in]  src_gx_stride_x                      Stride of the source image in X dimension (in bytes)
 190  * @param[in]  src_gx_step_x                        src_gx_stride_x * number of elements along X processed per workitem(in bytes)
 191  * @param[in]  src_gx_stride_y                      Stride of the source image in Y dimension (in bytes)
 192  * @param[in]  src_gx_step_y                        src_gx_stride_y * number of elements along Y processed per workitem(in bytes)
 193  * @param[in]  src_gx_offset_first_element_in_bytes The offset of the first element in the source image
 194  * @param[in]  src_gy_ptr                           Pointer to the second source image. Supported data types: S16, S32
 195  * @param[in]  src_gy_stride_x                      Stride of the destination image in X dimension (in bytes)
 196  * @param[in]  src_gy_step_x                        src_gy_stride_x * number of elements along X processed per workitem(in bytes)
 197  * @param[in]  src_gy_stride_y                      Stride of the destination image in Y dimension (in bytes)
 198  * @param[in]  src_gy_step_y                        src_gy_stride_y * number of elements along Y processed per workitem(in bytes)
 199  * @param[in]  src_gy_offset_first_element_in_bytes The offset of the first element in the destination image
 200  * @param[out] vc_ptr                               Pointer to the destination image. Supported data types: F32
 201  * @param[in]  vc_stride_x                          Stride of the destination image in X dimension (in bytes)
 202  * @param[in]  vc_step_x                            vc_stride_x * number of elements along X processed per workitem(in bytes)
 203  * @param[in]  vc_stride_y                          Stride of the destination image in Y dimension (in bytes)
 204  * @param[in]  vc_step_y                            vc_stride_y * number of elements along Y processed per workitem(in bytes)
 205  * @param[in]  vc_offset_first_element_in_bytes     The offset of the first element in the destination image
 206  * @param[in]  sensitivity                          Sensitivity threshold k from the Harris-Stephens equation
 207  * @param[in]  strength_thresh                      Minimum threshold with which to eliminate Harris Corner scores
 208  * @param[in]  pow4_normalization_factor            Normalization factor to apply harris score
 209  */
 210 __kernel void harris_score_5x5(
 211     IMAGE_DECLARATION(src_gx),
 212     IMAGE_DECLARATION(src_gy),
 213     IMAGE_DECLARATION(vc),
 214     float sensitivity,
 215     float strength_thresh,
 216     float pow4_normalization_factor)
 217 {
 218     Image src_gx = CONVERT_TO_IMAGE_STRUCT(src_gx);
 219     Image src_gy = CONVERT_TO_IMAGE_STRUCT(src_gy);
 220     Image vc     = CONVERT_TO_IMAGE_STRUCT(vc);
 221
 222     /* Gx^2, Gy^2 and Gx*Gy */
 223     float16 res = (float16)0.0f;
 224
 225     /* Compute row */
 226     for(int i = -2; i < 3; i++)
 227     {
 228         res += harris_score_1x5(&src_gx, &src_gy, i);
 229     }
 230
 231     float4 gx2  = res.s0123;
 232     float4 gy2  = res.s4567;
 233     float4 gxgy = res.s89AB;
 234
 235     /* Compute trace and determinant */
 236     float4 trace = gx2 + gy2;
 237     float4 det   = gx2 * gy2 - (gxgy * gxgy);
 238
 239     /* Compute harris score */
 240     float4 mc = (det - (sensitivity * (trace * trace))) * pow4_normalization_factor;
 241
 242     mc = select(0.0f, mc, mc > (float4)strength_thresh);
 243
 244     vstore4(mc, 0, (__global float *)vc.ptr);
 245 }
 246
 247 /** Function for calculating harris score 1x7.
 248  *
 249  * @param[in] src_gx Pointer to gx gradient image.
 250  * @param[in] src_gy Pointer to gy gradient image.
 251  * @param[in] row    Relative row.
 252  */
 253 inline float16 harris_score_1x7(Image *src_gx, Image *src_gy, int row)
 254 {
 255     float4 gx2  = 0.0f;
 256     float4 gy2  = 0.0f;
 257     float4 gxgy = 0.0f;
 258
 259     /* Row */
 260     VEC_DATA_TYPE(DATA_TYPE, 8)
 261     temp_gx0 = vload8(0, (__global DATA_TYPE *)offset(src_gx, -3, row));
 262     VEC_DATA_TYPE(DATA_TYPE, 8)
 263     temp_gy0 = vload8(0, (__global DATA_TYPE *)offset(src_gy, -3, row));
 264     VEC_DATA_TYPE(DATA_TYPE, 2)
 265     temp_gx1 = vload2(0, (__global DATA_TYPE *)offset(src_gx, 5, row));
 266     VEC_DATA_TYPE(DATA_TYPE, 2)
 267     temp_gy1 = vload2(0, (__global DATA_TYPE *)offset(src_gy, 5, row));
 268
 269     float4 gx = convert_float4(temp_gx0.s0123);
 270     float4 gy = convert_float4(temp_gy0.s0123);
 271     gx2 += (gx * gx);
 272     gy2 += (gy * gy);
 273     gxgy += (gx * gy);
 274
 275     gx = convert_float4(temp_gx0.s1234);
 276     gy = convert_float4(temp_gy0.s1234);
 277     gx2 += (gx * gx);
 278     gy2 += (gy * gy);
 279     gxgy += (gx * gy);
 280
 281     gx = convert_float4(temp_gx0.s2345);
 282     gy = convert_float4(temp_gy0.s2345);
 283     gx2 += (gx * gx);
 284     gy2 += (gy * gy);
 285     gxgy += (gx * gy);
 286
 287     gx = convert_float4(temp_gx0.s3456);
 288     gy = convert_float4(temp_gy0.s3456);
 289     gx2 += (gx * gx);
 290     gy2 += (gy * gy);
 291     gxgy += (gx * gy);
 292
 293     gx = convert_float4(temp_gx0.s4567);
 294     gy = convert_float4(temp_gy0.s4567);
 295     gx2 += (gx * gx);
 296     gy2 += (gy * gy);
 297     gxgy += (gx * gy);
 298
 299     gx = convert_float4((VEC_DATA_TYPE(DATA_TYPE, 4))(temp_gx0.s567, temp_gx1.s0));
 300     gy = convert_float4((VEC_DATA_TYPE(DATA_TYPE, 4))(temp_gy0.s567, temp_gy1.s0));
 301     gx2 += (gx * gx);
 302     gy2 += (gy * gy);
 303     gxgy += (gx * gy);
 304
 305     gx = convert_float4((VEC_DATA_TYPE(DATA_TYPE, 4))(temp_gx0.s67, temp_gx1.s01));
 306     gy = convert_float4((VEC_DATA_TYPE(DATA_TYPE, 4))(temp_gy0.s67, temp_gy1.s01));
 307     gx2 += (gx * gx);
 308     gy2 += (gy * gy);
 309     gxgy += (gx * gy);
 310
 311     return (float16)(gx2, gy2, gxgy, (float4)0);
 312 }
 313
 314 /** Function running harris score on 7x7 block size
 315  *
 316  * @attention: The input data type should be passed using a compile option -DDATA_TYPE. Supported types: short and int.
 317  *             e.g. -DDATA_TYPE=short.
 318  *
 319  * @param[in]  src_gx_ptr                           Pointer to the first source image. Supported data types: S16, S32
 320  * @param[in]  src_gx_stride_x                      Stride of the source image in X dimension (in bytes)
 321  * @param[in]  src_gx_step_x                        src_gx_stride_x * number of elements along X processed per workitem(in bytes)
 322  * @param[in]  src_gx_stride_y                      Stride of the source image in Y dimension (in bytes)
 323  * @param[in]  src_gx_step_y                        src_gx_stride_y * number of elements along Y processed per workitem(in bytes)
 324  * @param[in]  src_gx_offset_first_element_in_bytes The offset of the first element in the source image
 325  * @param[in]  src_gy_ptr                           Pointer to the second source image. Supported data types: S16, S32
 326  * @param[in]  src_gy_stride_x                      Stride of the destination image in X dimension (in bytes)
 327  * @param[in]  src_gy_step_x                        src_gy_stride_x * number of elements along X processed per workitem(in bytes)
 328  * @param[in]  src_gy_stride_y                      Stride of the destination image in Y dimension (in bytes)
 329  * @param[in]  src_gy_step_y                        src_gy_stride_y * number of elements along Y processed per workitem(in bytes)
 330  * @param[in]  src_gy_offset_first_element_in_bytes The offset of the first element in the destination image
 331  * @param[out] vc_ptr                               Pointer to the destination image. Supported data types: F32
 332  * @param[in]  vc_stride_x                          Stride of the destination image in X dimension (in bytes)
 333  * @param[in]  vc_step_x                            vc_stride_x * number of elements along X processed per workitem(in bytes)
 334  * @param[in]  vc_stride_y                          Stride of the destination image in Y dimension (in bytes)
 335  * @param[in]  vc_step_y                            vc_stride_y * number of elements along Y processed per workitem(in bytes)
 336  * @param[in]  vc_offset_first_element_in_bytes     The offset of the first element in the destination image
 337  * @param[in]  sensitivity                          Sensitivity threshold k from the Harris-Stephens equation
 338  * @param[in]  strength_thresh                      Minimum threshold with which to eliminate Harris Corner scores
 339  * @param[in]  pow4_normalization_factor            Normalization factor to apply harris score
 340  */
 341 __kernel void harris_score_7x7(
 342     IMAGE_DECLARATION(src_gx),
 343     IMAGE_DECLARATION(src_gy),
 344     IMAGE_DECLARATION(vc),
 345     float sensitivity,
 346     float strength_thresh,
 347     float pow4_normalization_factor)
 348 {
 349     Image src_gx = CONVERT_TO_IMAGE_STRUCT(src_gx);
 350     Image src_gy = CONVERT_TO_IMAGE_STRUCT(src_gy);
 351     Image vc     = CONVERT_TO_IMAGE_STRUCT(vc);
 352
 353     /* Gx^2, Gy^2 and Gx*Gy */
 354     float16 res = (float16)0.0f;
 355
 356     /* Compute row */
 357     for(int i = -3; i < 4; i++)
 358     {
 359         res += harris_score_1x7(&src_gx, &src_gy, i);
 360     }
 361
 362     float4 gx2  = res.s0123;
 363     float4 gy2  = res.s4567;
 364     float4 gxgy = res.s89AB;
 365
 366     /* Compute trace and determinant */
 367     float4 trace = gx2 + gy2;
 368     float4 det   = gx2 * gy2 - (gxgy * gxgy);
 369
 370     /* Compute harris score */
 371     float4 mc = (det - (sensitivity * (trace * trace))) * pow4_normalization_factor;
 372
 373     mc = select(0.0f, mc, mc > (float4)strength_thresh);
 374
 375     vstore4(mc, 0, (__global float *)vc.ptr);
 376 }