2 * Copyright (c) 2016, 2017 ARM Limited.
4 * SPDX-License-Identifier: MIT
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 #include "arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h"
26 #include "arm_compute/core/AccessWindowAutoPadding.h"
27 #include "arm_compute/core/Coordinates.h"
28 #include "arm_compute/core/Error.h"
29 #include "arm_compute/core/Helpers.h"
30 #include "arm_compute/core/TensorInfo.h"
31 #include "arm_compute/core/Types.h"
32 #include "arm_compute/core/Utils.h"
33 #include "arm_compute/core/Validate.h"
34 #include "arm_compute/core/Window.h"
41 using namespace arm_compute;
43 #ifdef ARM_COMPUTE_ENABLE_FP16
45 template class arm_compute::NEHarrisScoreFP16Kernel<3>;
46 template class arm_compute::NEHarrisScoreFP16Kernel<5>;
47 template class arm_compute::NEHarrisScoreFP16Kernel<7>;
51 inline float16x8_t harris_score(float16x8_t gx2, float16x8_t gy2, float16x8_t gxgy, float sensitivity, float strength_thresh)
53 static const float16x8_t zero = vdupq_n_f16(0.f);
56 float16x8_t trace2 = vaddq_f16(gx2, gy2);
57 trace2 = vmulq_f16(trace2, trace2);
60 float16x8_t det = vmulq_f16(gx2, gy2);
61 det = vfmsq_f16(det, gxgy, gxgy);
63 /* Det(A) - sensitivity * trace^2 */
64 const float16x8_t mc = vfmsq_f16(det, vdupq_n_f16(sensitivity), trace2);
66 /* mc > strength_thresh */
67 const uint16x8_t mask = vcgtq_f16(mc, vdupq_n_f16(strength_thresh));
69 return vbslq_f16(mask, mc, zero);
72 template <size_t block_size>
73 inline void harris_score_FLOAT_FLOAT_FLOAT(float16x8_t low_gx, float16x8_t low_gy, float16x8_t high_gx, float16x8_t high_gy, float16x8_t &gx2, float16x8_t &gy2, float16x8_t &gxgy,
76 const float16x8_t norm_factor_fp16 = vdupq_n_f16(norm_factor);
79 low_gx = vmulq_f16(low_gx, norm_factor_fp16);
80 low_gy = vmulq_f16(low_gy, norm_factor_fp16);
81 high_gx = vmulq_f16(high_gx, norm_factor_fp16);
82 high_gy = vmulq_f16(high_gy, norm_factor_fp16);
84 for(size_t i = 0; i < block_size; ++i)
86 const float16x8_t gx = vextq_f16(low_gx, high_gx, i);
87 const float16x8_t gy = vextq_f16(low_gy, high_gy, i);
89 gx2 = vfmaq_f16(gx2, gx, gx);
90 gy2 = vfmaq_f16(gy2, gy, gy);
91 gxgy = vfmaq_f16(gxgy, gx, gy);
95 template <size_t block_size>
96 inline void harris_score_S16_S16_FLOAT(const void *__restrict in1_ptr, const void *__restrict in2_ptr, void *__restrict out_ptr, int32_t in_stride, float norm_factor, float sensitivity,
97 float strength_thresh)
99 auto gx_ptr_0 = static_cast<const int16_t *__restrict>(in1_ptr) - (block_size / 2) * (in_stride + 1);
100 auto gy_ptr_0 = static_cast<const int16_t *__restrict>(in2_ptr) - (block_size / 2) * (in_stride + 1);
101 const int16_t *gx_ptr_1 = gx_ptr_0 + 8;
102 const int16_t *gy_ptr_1 = gy_ptr_0 + 8;
103 const auto output = static_cast<float *__restrict>(out_ptr);
105 /* Gx^2, Gy^2 and Gx*Gy */
106 float16x8_t gx2 = vdupq_n_f16(0.0f);
107 float16x8_t gy2 = vdupq_n_f16(0.0f);
108 float16x8_t gxgy = vdupq_n_f16(0.0f);
110 for(size_t i = 0; i < block_size; ++i)
112 const float16x8_t low_gx = vcvtq_f16_s16(vld1q_s16(gx_ptr_0));
113 const float16x8_t high_gx = vcvtq_f16_s16(vld1q_s16(gx_ptr_1));
114 const float16x8_t low_gy = vcvtq_f16_s16(vld1q_s16(gy_ptr_0));
115 const float16x8_t high_gy = vcvtq_f16_s16(vld1q_s16(gy_ptr_1));
116 harris_score_FLOAT_FLOAT_FLOAT<block_size>(low_gx, low_gy, high_gx, high_gy, gx2, gy2, gxgy, norm_factor);
118 /* Update gx and gy pointer */
119 gx_ptr_0 += in_stride;
120 gy_ptr_0 += in_stride;
121 gx_ptr_1 += in_stride;
122 gy_ptr_1 += in_stride;
125 /* Calculate harris score */
126 const float16x8_t mc = harris_score(gx2, gy2, gxgy, sensitivity, strength_thresh);
129 vst1q_f32(output + 0, vcvt_f32_f16(vget_low_f16(mc)));
130 vst1q_f32(output + 4, vcvt_f32_f16(vget_high_f16(mc)));
133 template <size_t block_size>
134 inline void harris_score_S32_S32_FLOAT(const void *__restrict in1_ptr, const void *__restrict in2_ptr, void *__restrict out_ptr, int32_t in_stride, float norm_factor, float sensitivity,
135 float strength_thresh)
137 static const float16x8_t zero = vdupq_n_f16(0.0f);
139 auto gx_ptr_0 = static_cast<const int32_t *__restrict>(in1_ptr) - (block_size / 2) * (in_stride + 1);
140 auto gy_ptr_0 = static_cast<const int32_t *__restrict>(in2_ptr) - (block_size / 2) * (in_stride + 1);
141 const int32_t *gx_ptr_1 = gx_ptr_0 + 4;
142 const int32_t *gy_ptr_1 = gy_ptr_0 + 4;
143 const int32_t *gx_ptr_2 = gx_ptr_0 + 8;
144 const int32_t *gy_ptr_2 = gy_ptr_0 + 8;
145 const auto output = static_cast<float *__restrict>(out_ptr);
147 /* Gx^2, Gy^2 and Gx*Gy */
148 float16x8_t gx2 = zero;
149 float16x8_t gy2 = zero;
150 float16x8_t gxgy = zero;
152 for(size_t i = 0; i < block_size; ++i)
154 const float16x8_t low_gx = vcombine_f16(vcvt_f16_f32(vcvtq_f32_s32(vld1q_s32(gx_ptr_0))),
155 vcvt_f16_f32(vcvtq_f32_s32(vld1q_s32(gx_ptr_1))));
156 const float16x8_t high_gx = vcombine_f16(vcvt_f16_f32(vcvtq_f32_s32(vld1q_s32(gx_ptr_2))),
158 const float16x8_t low_gy = vcombine_f16(vcvt_f16_f32(vcvtq_f32_s32(vld1q_s32(gy_ptr_0))),
159 vcvt_f16_f32(vcvtq_f32_s32(vld1q_s32(gy_ptr_1))));
160 const float16x8_t high_gy = vcombine_f16(vcvt_f16_f32(vcvtq_f32_s32(vld1q_s32(gy_ptr_2))),
162 harris_score_FLOAT_FLOAT_FLOAT<block_size>(low_gx, low_gy, high_gx, high_gy, gx2, gy2, gxgy, norm_factor);
164 /* Update gx and gy pointer */
165 gx_ptr_0 += in_stride;
166 gy_ptr_0 += in_stride;
167 gx_ptr_1 += in_stride;
168 gy_ptr_1 += in_stride;
169 gx_ptr_2 += in_stride;
170 gy_ptr_2 += in_stride;
173 /* Calculate harris score */
174 const float16x8_t mc = harris_score(gx2, gy2, gxgy, sensitivity, strength_thresh);
177 vst1q_f32(output + 0, vcvt_f32_f16(vget_low_f16(mc)));
178 vst1q_f32(output + 4, vcvt_f32_f16(vget_high_f16(mc)));
182 inline void harris_score_S32_S32_FLOAT<7>(const void *__restrict in1_ptr, const void *__restrict in2_ptr, void *__restrict out_ptr, int32_t in_stride, float norm_factor, float sensitivity,
183 float strength_thresh)
185 static const float16x8_t zero = vdupq_n_f16(0.0f);
187 auto gx_ptr_0 = static_cast<const int32_t *__restrict>(in1_ptr) - 3 * (in_stride + 1);
188 auto gy_ptr_0 = static_cast<const int32_t *__restrict>(in2_ptr) - 3 * (in_stride + 1);
189 const int32_t *gx_ptr_1 = gx_ptr_0 + 4;
190 const int32_t *gy_ptr_1 = gy_ptr_0 + 4;
191 const int32_t *gx_ptr_2 = gx_ptr_0 + 8;
192 const int32_t *gy_ptr_2 = gy_ptr_0 + 8;
193 const int32_t *gx_ptr_3 = gx_ptr_0 + 12;
194 const int32_t *gy_ptr_3 = gy_ptr_0 + 12;
195 const auto output = static_cast<float *__restrict>(out_ptr);
197 /* Gx^2, Gy^2 and Gx*Gy */
198 float16x8_t gx2 = zero;
199 float16x8_t gy2 = zero;
200 float16x8_t gxgy = zero;
202 for(size_t i = 0; i < 7; ++i)
204 const float16x8_t low_gx = vcombine_f16(vcvt_f16_f32(vcvtq_f32_s32(vld1q_s32(gx_ptr_0))),
205 vcvt_f16_f32(vcvtq_f32_s32(vld1q_s32(gx_ptr_1))));
206 const float16x8_t high_gx = vcombine_f16(vcvt_f16_f32(vcvtq_f32_s32(vld1q_s32(gx_ptr_2))),
207 vcvt_f16_f32(vcvtq_f32_s32(vld1q_s32(gx_ptr_3))));
208 const float16x8_t low_gy = vcombine_f16(vcvt_f16_f32(vcvtq_f32_s32(vld1q_s32(gy_ptr_0))),
209 vcvt_f16_f32(vcvtq_f32_s32(vld1q_s32(gy_ptr_1))));
210 const float16x8_t high_gy = vcombine_f16(vcvt_f16_f32(vcvtq_f32_s32(vld1q_s32(gy_ptr_2))),
211 vcvt_f16_f32(vcvtq_f32_s32(vld1q_s32(gy_ptr_3))));
212 harris_score_FLOAT_FLOAT_FLOAT<7>(low_gx, low_gy, high_gx, high_gy, gx2, gy2, gxgy, norm_factor);
214 /* Update gx and gy pointer */
215 gx_ptr_0 += in_stride;
216 gy_ptr_0 += in_stride;
217 gx_ptr_1 += in_stride;
218 gy_ptr_1 += in_stride;
219 gx_ptr_2 += in_stride;
220 gy_ptr_2 += in_stride;
223 /* Calculate harris score */
224 const float16x8_t mc = harris_score(gx2, gy2, gxgy, sensitivity, strength_thresh);
227 vst1q_f32(output + 0, vcvt_f32_f16(vget_low_f16(mc)));
228 vst1q_f32(output + 4, vcvt_f32_f16(vget_high_f16(mc)));
233 template <int32_t block_size>
234 BorderSize NEHarrisScoreFP16Kernel<block_size>::border_size() const
236 return BorderSize(block_size / 2);
239 template <int32_t block_size>
240 NEHarrisScoreFP16Kernel<block_size>::NEHarrisScoreFP16Kernel()
241 : INEHarrisScoreKernel(), _func(nullptr)
245 template <int32_t block_size>
246 void NEHarrisScoreFP16Kernel<block_size>::run(const Window &window)
248 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
249 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
250 ARM_COMPUTE_ERROR_ON(_func == nullptr);
252 Iterator input1(_input1, window);
253 Iterator input2(_input2, window);
254 Iterator output(_output, window);
256 const size_t input_stride = _input1->info()->strides_in_bytes()[1] / element_size_from_data_type(_input1->info()->data_type());
258 execute_window_loop(window, [&](const Coordinates & id)
260 (*_func)(input1.ptr(), input2.ptr(), output.ptr(), input_stride, _norm_factor, _sensitivity, _strength_thresh);
262 input1, input2, output);
265 template <int32_t block_size>
266 void NEHarrisScoreFP16Kernel<block_size>::configure(const IImage *input1, const IImage *input2, IImage *output, float norm_factor, float strength_thresh, float sensitivity,
267 bool border_undefined)
269 ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input1);
270 ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input2);
271 ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(output);
272 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::S16, DataType::S32);
273 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::S16, DataType::S32);
274 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F32);
275 ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2);
276 ARM_COMPUTE_ERROR_ON(0.0f == norm_factor);
281 _sensitivity = sensitivity;
282 _strength_thresh = strength_thresh;
283 _norm_factor = norm_factor;
285 if(input1->info()->data_type() == DataType::S16)
287 _func = &fp16::harris_score_S16_S16_FLOAT<block_size>;
291 _func = &fp16::harris_score_S32_S32_FLOAT<block_size>;
294 ARM_COMPUTE_ERROR_ON(nullptr == _func);
296 const unsigned int processed_elements = 8;
298 // Configure kernel window
299 Window win = calculate_max_window(*input1->info(), Steps(processed_elements), border_undefined, border_size());
300 AccessWindowAutoPadding output_access(output->info());
302 update_window_and_padding(win,
303 AccessWindowAutoPadding(input1->info()),
304 AccessWindowAutoPadding(input2->info()),
307 output_access.set_valid_region();
309 INEKernel::configure(win);
314 template class arm_compute::NEHarrisScoreKernel<3>;
315 template class arm_compute::NEHarrisScoreKernel<5>;
316 template class arm_compute::NEHarrisScoreKernel<7>;
317 template arm_compute::NEHarrisScoreKernel<3>::NEHarrisScoreKernel();
318 template arm_compute::NEHarrisScoreKernel<5>::NEHarrisScoreKernel();
319 template arm_compute::NEHarrisScoreKernel<7>::NEHarrisScoreKernel();
323 inline float32x4_t harris_score(float32x4_t gx2, float32x4_t gy2, float32x4_t gxgy, float32x4_t sensitivity, float32x4_t strength_thresh)
326 float32x4_t trace2 = vaddq_f32(gx2, gy2);
327 trace2 = vmulq_f32(trace2, trace2);
330 float32x4_t det = vmulq_f32(gx2, gy2);
331 det = vmlsq_f32(det, gxgy, gxgy);
333 /* Det(A) - sensitivity * trace^2 */
334 const float32x4_t mc = vmlsq_f32(det, sensitivity, trace2);
336 /* mc > strength_thresh */
337 const uint32x4_t mask = vcgtq_f32(mc, strength_thresh);
339 return vbslq_f32(mask, mc, vdupq_n_f32(0.0f));
342 inline void harris_score1x3_FLOAT_FLOAT_FLOAT(float32x4_t low_gx, float32x4_t low_gy, float32x4_t high_gx, float32x4_t high_gy, float32x4_t &gx2, float32x4_t &gy2, float32x4_t &gxgy,
343 float32x4_t norm_factor)
346 low_gx = vmulq_f32(low_gx, norm_factor);
347 low_gy = vmulq_f32(low_gy, norm_factor);
348 high_gx = vmulq_f32(high_gx, norm_factor);
349 high_gy = vmulq_f32(high_gy, norm_factor);
351 const float32x4_t l_gx = low_gx;
352 const float32x4_t l_gy = low_gy;
353 const float32x4_t m_gx = vextq_f32(low_gx, high_gx, 1);
354 const float32x4_t m_gy = vextq_f32(low_gy, high_gy, 1);
355 const float32x4_t r_gx = vextq_f32(low_gx, high_gx, 2);
356 const float32x4_t r_gy = vextq_f32(low_gy, high_gy, 2);
359 gx2 = vmlaq_f32(gx2, l_gx, l_gx);
360 gx2 = vmlaq_f32(gx2, m_gx, m_gx);
361 gx2 = vmlaq_f32(gx2, r_gx, r_gx);
364 gy2 = vmlaq_f32(gy2, l_gy, l_gy);
365 gy2 = vmlaq_f32(gy2, m_gy, m_gy);
366 gy2 = vmlaq_f32(gy2, r_gy, r_gy);
369 gxgy = vmlaq_f32(gxgy, l_gx, l_gy);
370 gxgy = vmlaq_f32(gxgy, m_gx, m_gy);
371 gxgy = vmlaq_f32(gxgy, r_gx, r_gy);
374 inline void harris_score1x5_FLOAT_FLOAT_FLOAT(float32x4_t low_gx, float32x4_t low_gy, float32x4_t high_gx, float32x4_t high_gy, float32x4_t &gx2, float32x4_t &gy2, float32x4_t &gxgy,
375 float32x4_t norm_factor)
378 low_gx = vmulq_f32(low_gx, norm_factor);
379 low_gy = vmulq_f32(low_gy, norm_factor);
380 high_gx = vmulq_f32(high_gx, norm_factor);
381 high_gy = vmulq_f32(high_gy, norm_factor);
384 float32x4_t gx = low_gx;
385 float32x4_t gy = low_gy;
388 gx2 = vmlaq_f32(gx2, gx, gx);
389 gy2 = vmlaq_f32(gy2, gy, gy);
390 gxgy = vmlaq_f32(gxgy, gx, gy);
393 gx = vextq_f32(low_gx, high_gx, 1);
394 gy = vextq_f32(low_gy, high_gy, 1);
397 gx2 = vmlaq_f32(gx2, gx, gx);
398 gy2 = vmlaq_f32(gy2, gy, gy);
399 gxgy = vmlaq_f32(gxgy, gx, gy);
402 gx = vextq_f32(low_gx, high_gx, 2);
403 gy = vextq_f32(low_gy, high_gy, 2);
406 gx2 = vmlaq_f32(gx2, gx, gx);
407 gy2 = vmlaq_f32(gy2, gy, gy);
408 gxgy = vmlaq_f32(gxgy, gx, gy);
411 gx = vextq_f32(low_gx, high_gx, 3);
412 gy = vextq_f32(low_gy, high_gy, 3);
415 gx2 = vmlaq_f32(gx2, gx, gx);
416 gy2 = vmlaq_f32(gy2, gy, gy);
417 gxgy = vmlaq_f32(gxgy, gx, gy);
424 gx2 = vmlaq_f32(gx2, gx, gx);
425 gy2 = vmlaq_f32(gy2, gy, gy);
426 gxgy = vmlaq_f32(gxgy, gx, gy);
429 inline void harris_score1x7_FLOAT_FLOAT_FLOAT(float32x4_t low_gx, float32x4_t low_gy, float32x4_t high_gx, float32x4_t high_gy, float32x4_t high_gx1, float32x4_t high_gy1, float32x4_t &gx2,
430 float32x4_t &gy2, float32x4_t &gxgy, float32x4_t norm_factor)
433 low_gx = vmulq_f32(low_gx, norm_factor);
434 low_gy = vmulq_f32(low_gy, norm_factor);
435 high_gx = vmulq_f32(high_gx, norm_factor);
436 high_gy = vmulq_f32(high_gy, norm_factor);
439 float32x4_t gx = low_gx;
440 float32x4_t gy = low_gy;
443 gx2 = vmlaq_f32(gx2, gx, gx);
444 gy2 = vmlaq_f32(gy2, gy, gy);
445 gxgy = vmlaq_f32(gxgy, gx, gy);
448 gx = vextq_f32(low_gx, high_gx, 1);
449 gy = vextq_f32(low_gy, high_gy, 1);
452 gx2 = vmlaq_f32(gx2, gx, gx);
453 gy2 = vmlaq_f32(gy2, gy, gy);
454 gxgy = vmlaq_f32(gxgy, gx, gy);
457 gx = vextq_f32(low_gx, high_gx, 2);
458 gy = vextq_f32(low_gy, high_gy, 2);
461 gx2 = vmlaq_f32(gx2, gx, gx);
462 gy2 = vmlaq_f32(gy2, gy, gy);
463 gxgy = vmlaq_f32(gxgy, gx, gy);
466 gx = vextq_f32(low_gx, high_gx, 3);
467 gy = vextq_f32(low_gy, high_gy, 3);
470 gx2 = vmlaq_f32(gx2, gx, gx);
471 gy2 = vmlaq_f32(gy2, gy, gy);
472 gxgy = vmlaq_f32(gxgy, gx, gy);
479 gx2 = vmlaq_f32(gx2, gx, gx);
480 gy2 = vmlaq_f32(gy2, gy, gy);
481 gxgy = vmlaq_f32(gxgy, gx, gy);
483 /* Change tmp_low and tmp_high for calculating R2 and R3 values */
490 high_gx = vmulq_f32(high_gx, norm_factor);
491 high_gy = vmulq_f32(high_gy, norm_factor);
494 gx = vextq_f32(low_gx, high_gx, 1);
495 gy = vextq_f32(low_gy, high_gy, 1);
498 gx2 = vmlaq_f32(gx2, gx, gx);
499 gy2 = vmlaq_f32(gy2, gy, gy);
500 gxgy = vmlaq_f32(gxgy, gx, gy);
503 gx = vextq_f32(low_gx, high_gx, 2);
504 gy = vextq_f32(low_gy, high_gy, 2);
507 gx2 = vmlaq_f32(gx2, gx, gx);
508 gy2 = vmlaq_f32(gy2, gy, gy);
509 gxgy = vmlaq_f32(gxgy, gx, gy);
512 inline void harris_score3x3_S16_S16_FLOAT(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int32_t input_stride,
513 float in_norm_factor, float in_sensitivity, float in_strength_thresh)
516 const auto gx_ptr_0 = static_cast<const int16_t *__restrict>(input1_ptr) - 1;
517 const auto gy_ptr_0 = static_cast<const int16_t *__restrict>(input2_ptr) - 1;
518 const int16_t *gx_ptr_1 = gx_ptr_0 + 4;
519 const int16_t *gy_ptr_1 = gy_ptr_0 + 4;
520 const auto output = static_cast<float *__restrict>(output_ptr);
522 /* Gx^2, Gy^2 and Gx*Gy */
549 vld1q_s16(gx_ptr_0 - input_stride),
550 vld1q_s16(gx_ptr_1 - input_stride)
556 vld1q_s16(gy_ptr_0 - input_stride),
557 vld1q_s16(gy_ptr_1 - input_stride)
560 float32x4_t sensitivity = vdupq_n_f32(in_sensitivity);
561 float32x4_t norm_factor = vdupq_n_f32(in_norm_factor);
562 float32x4_t strength_thresh = vdupq_n_f32(in_strength_thresh);
564 float32x4_t low_gx = vcvtq_f32_s32(vmovl_s16(vget_low_s16(tmp_gx.val[0])));
565 float32x4_t low_gy = vcvtq_f32_s32(vmovl_s16(vget_low_s16(tmp_gy.val[0])));
566 float32x4_t high_gx = vcvtq_f32_s32(vmovl_s16(vget_high_s16(tmp_gx.val[0])));
567 float32x4_t high_gy = vcvtq_f32_s32(vmovl_s16(vget_high_s16(tmp_gy.val[0])));
568 harris_score1x3_FLOAT_FLOAT_FLOAT(low_gx, low_gy, high_gx, high_gy, gx2.val[0], gy2.val[0], gxgy.val[0], norm_factor);
570 low_gx = vcvtq_f32_s32(vmovl_s16(vget_low_s16(tmp_gx.val[1])));
571 low_gy = vcvtq_f32_s32(vmovl_s16(vget_low_s16(tmp_gy.val[1])));
572 high_gx = vcvtq_f32_s32(vmovl_s16(vget_high_s16(tmp_gx.val[1])));
573 high_gy = vcvtq_f32_s32(vmovl_s16(vget_high_s16(tmp_gy.val[1])));
574 harris_score1x3_FLOAT_FLOAT_FLOAT(low_gx, low_gy, high_gx, high_gy, gx2.val[1], gy2.val[1], gxgy.val[1], norm_factor);
577 tmp_gx.val[0] = vld1q_s16(gx_ptr_0);
578 tmp_gy.val[0] = vld1q_s16(gy_ptr_0);
579 tmp_gx.val[1] = vld1q_s16(gx_ptr_1);
580 tmp_gy.val[1] = vld1q_s16(gy_ptr_1);
582 low_gx = vcvtq_f32_s32(vmovl_s16(vget_low_s16(tmp_gx.val[0])));
583 low_gy = vcvtq_f32_s32(vmovl_s16(vget_low_s16(tmp_gy.val[0])));
584 high_gx = vcvtq_f32_s32(vmovl_s16(vget_high_s16(tmp_gx.val[0])));
585 high_gy = vcvtq_f32_s32(vmovl_s16(vget_high_s16(tmp_gy.val[0])));
586 harris_score1x3_FLOAT_FLOAT_FLOAT(low_gx, low_gy, high_gx, high_gy, gx2.val[0], gy2.val[0], gxgy.val[0], norm_factor);
588 low_gx = vcvtq_f32_s32(vmovl_s16(vget_low_s16(tmp_gx.val[1])));
589 low_gy = vcvtq_f32_s32(vmovl_s16(vget_low_s16(tmp_gy.val[1])));
590 high_gx = vcvtq_f32_s32(vmovl_s16(vget_high_s16(tmp_gx.val[1])));
591 high_gy = vcvtq_f32_s32(vmovl_s16(vget_high_s16(tmp_gy.val[1])));
592 harris_score1x3_FLOAT_FLOAT_FLOAT(low_gx, low_gy, high_gx, high_gy, gx2.val[1], gy2.val[1], gxgy.val[1], norm_factor);
595 tmp_gx.val[0] = vld1q_s16(gx_ptr_0 + input_stride);
596 tmp_gy.val[0] = vld1q_s16(gy_ptr_0 + input_stride);
597 tmp_gx.val[1] = vld1q_s16(gx_ptr_1 + input_stride);
598 tmp_gy.val[1] = vld1q_s16(gy_ptr_1 + input_stride);
600 low_gx = vcvtq_f32_s32(vmovl_s16(vget_low_s16(tmp_gx.val[0])));
601 low_gy = vcvtq_f32_s32(vmovl_s16(vget_low_s16(tmp_gy.val[0])));
602 high_gx = vcvtq_f32_s32(vmovl_s16(vget_high_s16(tmp_gx.val[0])));
603 high_gy = vcvtq_f32_s32(vmovl_s16(vget_high_s16(tmp_gy.val[0])));
604 harris_score1x3_FLOAT_FLOAT_FLOAT(low_gx, low_gy, high_gx, high_gy, gx2.val[0], gy2.val[0], gxgy.val[0], norm_factor);
606 low_gx = vcvtq_f32_s32(vmovl_s16(vget_low_s16(tmp_gx.val[1])));
607 low_gy = vcvtq_f32_s32(vmovl_s16(vget_low_s16(tmp_gy.val[1])));
608 high_gx = vcvtq_f32_s32(vmovl_s16(vget_high_s16(tmp_gx.val[1])));
609 high_gy = vcvtq_f32_s32(vmovl_s16(vget_high_s16(tmp_gy.val[1])));
610 harris_score1x3_FLOAT_FLOAT_FLOAT(low_gx, low_gy, high_gx, high_gy, gx2.val[1], gy2.val[1], gxgy.val[1], norm_factor);
612 /* Calculate harris score */
613 const float32x4x2_t mc =
616 harris_score(gx2.val[0], gy2.val[0], gxgy.val[0], sensitivity, strength_thresh),
617 harris_score(gx2.val[1], gy2.val[1], gxgy.val[1], sensitivity, strength_thresh)
622 vst1q_f32(output + 0, mc.val[0]);
623 vst1q_f32(output + 4, mc.val[1]);
626 inline void harris_score3x3_S32_S32_FLOAT(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int32_t input_stride,
627 float in_norm_factor, float in_sensitivity, float in_strength_thresh)
629 auto gx_ptr_0 = static_cast<const int32_t *__restrict>(input1_ptr) - 1;
630 auto gy_ptr_0 = static_cast<const int32_t *__restrict>(input2_ptr) - 1;
631 const int32_t *gx_ptr_1 = gx_ptr_0 + 4;
632 const int32_t *gy_ptr_1 = gy_ptr_0 + 4;
633 const int32_t *gx_ptr_2 = gx_ptr_0 + 8;
634 const int32_t *gy_ptr_2 = gy_ptr_0 + 8;
635 const auto output = static_cast<float *__restrict>(output_ptr);
636 float32x4_t sensitivity = vdupq_n_f32(in_sensitivity);
637 float32x4_t norm_factor = vdupq_n_f32(in_norm_factor);
638 float32x4_t strength_thresh = vdupq_n_f32(in_strength_thresh);
640 /* Gx^2, Gy^2 and Gx*Gy */
664 float32x4_t low_gx = vcvtq_f32_s32(vld1q_s32(gx_ptr_0 - input_stride));
665 float32x4_t low_gy = vcvtq_f32_s32(vld1q_s32(gy_ptr_0 - input_stride));
666 float32x4_t high_gx = vcvtq_f32_s32(vld1q_s32(gx_ptr_1 - input_stride));
667 float32x4_t high_gy = vcvtq_f32_s32(vld1q_s32(gy_ptr_1 - input_stride));
668 harris_score1x3_FLOAT_FLOAT_FLOAT(low_gx, low_gy, high_gx, high_gy, gx2.val[0], gy2.val[0], gxgy.val[0], norm_factor);
670 low_gx = vcvtq_f32_s32(vld1q_s32(gx_ptr_1 - input_stride));
671 low_gy = vcvtq_f32_s32(vld1q_s32(gy_ptr_1 - input_stride));
672 high_gx = vcvtq_f32_s32(vld1q_s32(gx_ptr_2 - input_stride));
673 high_gy = vcvtq_f32_s32(vld1q_s32(gy_ptr_2 - input_stride));
674 harris_score1x3_FLOAT_FLOAT_FLOAT(low_gx, low_gy, high_gx, high_gy, gx2.val[1], gy2.val[1], gxgy.val[1], norm_factor);
677 low_gx = vcvtq_f32_s32(vld1q_s32(gx_ptr_0));
678 low_gy = vcvtq_f32_s32(vld1q_s32(gy_ptr_0));
679 high_gx = vcvtq_f32_s32(vld1q_s32(gx_ptr_1));
680 high_gy = vcvtq_f32_s32(vld1q_s32(gy_ptr_1));
681 harris_score1x3_FLOAT_FLOAT_FLOAT(low_gx, low_gy, high_gx, high_gy, gx2.val[0], gy2.val[0], gxgy.val[0], norm_factor);
683 low_gx = vcvtq_f32_s32(vld1q_s32(gx_ptr_1));
684 low_gy = vcvtq_f32_s32(vld1q_s32(gy_ptr_1));
685 high_gx = vcvtq_f32_s32(vld1q_s32(gx_ptr_2));
686 high_gy = vcvtq_f32_s32(vld1q_s32(gy_ptr_2));
687 harris_score1x3_FLOAT_FLOAT_FLOAT(low_gx, low_gy, high_gx, high_gy, gx2.val[1], gy2.val[1], gxgy.val[1], norm_factor);
690 low_gx = vcvtq_f32_s32(vld1q_s32(gx_ptr_0 + input_stride));
691 low_gy = vcvtq_f32_s32(vld1q_s32(gy_ptr_0 + input_stride));
692 high_gx = vcvtq_f32_s32(vld1q_s32(gx_ptr_1 + input_stride));
693 high_gy = vcvtq_f32_s32(vld1q_s32(gy_ptr_1 + input_stride));
694 harris_score1x3_FLOAT_FLOAT_FLOAT(low_gx, low_gy, high_gx, high_gy, gx2.val[0], gy2.val[0], gxgy.val[0], norm_factor);
696 low_gx = vcvtq_f32_s32(vld1q_s32(gx_ptr_1 + input_stride));
697 low_gy = vcvtq_f32_s32(vld1q_s32(gy_ptr_1 + input_stride));
698 high_gx = vcvtq_f32_s32(vld1q_s32(gx_ptr_2 + input_stride));
699 high_gy = vcvtq_f32_s32(vld1q_s32(gy_ptr_2 + input_stride));
700 harris_score1x3_FLOAT_FLOAT_FLOAT(low_gx, low_gy, high_gx, high_gy, gx2.val[1], gy2.val[1], gxgy.val[1], norm_factor);
702 /* Calculate harris score */
703 const float32x4x2_t mc =
706 harris_score(gx2.val[0], gy2.val[0], gxgy.val[0], sensitivity, strength_thresh),
707 harris_score(gx2.val[1], gy2.val[1], gxgy.val[1], sensitivity, strength_thresh)
712 vst1q_f32(output + 0, mc.val[0]);
713 vst1q_f32(output + 4, mc.val[1]);
716 inline void harris_score5x5_S16_S16_FLOAT(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int32_t input_stride,
717 float in_norm_factor, float in_sensitivity, float in_strength_thresh)
719 auto gx_ptr_0 = static_cast<const int16_t *__restrict>(input1_ptr) - 2 - 2 * input_stride;
720 auto gy_ptr_0 = static_cast<const int16_t *__restrict>(input2_ptr) - 2 - 2 * input_stride;
721 const int16_t *gx_ptr_1 = gx_ptr_0 + 4;
722 const int16_t *gy_ptr_1 = gy_ptr_0 + 4;
723 const auto output = static_cast<float *__restrict>(output_ptr);
725 /* Gx^2, Gy^2 and Gx*Gy */
747 float32x4_t sensitivity = vdupq_n_f32(in_sensitivity);
748 float32x4_t norm_factor = vdupq_n_f32(in_norm_factor);
749 float32x4_t strength_thresh = vdupq_n_f32(in_strength_thresh);
751 for(int i = 0; i < 5; ++i)
753 const int16x8x2_t tmp_gx =
760 const int16x8x2_t tmp_gy =
768 float32x4_t low_gx = vcvtq_f32_s32(vmovl_s16(vget_low_s16(tmp_gx.val[0])));
769 float32x4_t low_gy = vcvtq_f32_s32(vmovl_s16(vget_low_s16(tmp_gy.val[0])));
770 float32x4_t high_gx = vcvtq_f32_s32(vmovl_s16(vget_high_s16(tmp_gx.val[0])));
771 float32x4_t high_gy = vcvtq_f32_s32(vmovl_s16(vget_high_s16(tmp_gy.val[0])));
772 harris_score1x5_FLOAT_FLOAT_FLOAT(low_gx, low_gy, high_gx, high_gy, gx2.val[0], gy2.val[0], gxgy.val[0], norm_factor);
774 low_gx = vcvtq_f32_s32(vmovl_s16(vget_low_s16(tmp_gx.val[1])));
775 low_gy = vcvtq_f32_s32(vmovl_s16(vget_low_s16(tmp_gy.val[1])));
776 high_gx = vcvtq_f32_s32(vmovl_s16(vget_high_s16(tmp_gx.val[1])));
777 high_gy = vcvtq_f32_s32(vmovl_s16(vget_high_s16(tmp_gy.val[1])));
778 harris_score1x5_FLOAT_FLOAT_FLOAT(low_gx, low_gy, high_gx, high_gy, gx2.val[1], gy2.val[1], gxgy.val[1], norm_factor);
780 /* Update gx and gy pointer */
781 gx_ptr_0 += input_stride;
782 gy_ptr_0 += input_stride;
783 gx_ptr_1 += input_stride;
784 gy_ptr_1 += input_stride;
787 /* Calculate harris score */
788 const float32x4x2_t mc =
791 harris_score(gx2.val[0], gy2.val[0], gxgy.val[0], sensitivity, strength_thresh),
792 harris_score(gx2.val[1], gy2.val[1], gxgy.val[1], sensitivity, strength_thresh)
797 vst1q_f32(output + 0, mc.val[0]);
798 vst1q_f32(output + 4, mc.val[1]);
801 inline void harris_score5x5_S32_S32_FLOAT(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int32_t input_stride,
802 float in_norm_factor, float in_sensitivity, float in_strength_thresh)
805 auto gx_ptr_0 = static_cast<const int32_t *__restrict>(input1_ptr) - 2 - 2 * input_stride;
806 auto gy_ptr_0 = static_cast<const int32_t *__restrict>(input2_ptr) - 2 - 2 * input_stride;
807 const int32_t *gx_ptr_1 = gx_ptr_0 + 4;
808 const int32_t *gy_ptr_1 = gy_ptr_0 + 4;
809 const int32_t *gx_ptr_2 = gx_ptr_0 + 8;
810 const int32_t *gy_ptr_2 = gy_ptr_0 + 8;
811 const auto output = static_cast<float *__restrict>(output_ptr);
813 /* Gx^2, Gy^2 and Gx*Gy */
835 float32x4_t sensitivity = vdupq_n_f32(in_sensitivity);
836 float32x4_t norm_factor = vdupq_n_f32(in_norm_factor);
837 float32x4_t strength_thresh = vdupq_n_f32(in_strength_thresh);
839 for(int i = 0; i < 5; ++i)
841 const float32x4_t low_gx_0 = vcvtq_f32_s32(vld1q_s32(gx_ptr_0));
842 const float32x4_t low_gy_0 = vcvtq_f32_s32(vld1q_s32(gy_ptr_0));
843 const float32x4_t high_gx_0 = vcvtq_f32_s32(vld1q_s32(gx_ptr_1));
844 const float32x4_t high_gy_0 = vcvtq_f32_s32(vld1q_s32(gy_ptr_1));
845 harris_score1x5_FLOAT_FLOAT_FLOAT(low_gx_0, low_gy_0, high_gx_0, high_gy_0, gx2.val[0], gy2.val[0], gxgy.val[0], norm_factor);
847 const float32x4_t low_gx_1 = vcvtq_f32_s32(vld1q_s32(gx_ptr_1));
848 const float32x4_t low_gy_1 = vcvtq_f32_s32(vld1q_s32(gy_ptr_1));
849 const float32x4_t high_gx_1 = vcvtq_f32_s32(vld1q_s32(gx_ptr_2));
850 const float32x4_t high_gy_1 = vcvtq_f32_s32(vld1q_s32(gy_ptr_2));
851 harris_score1x5_FLOAT_FLOAT_FLOAT(low_gx_1, low_gy_1, high_gx_1, high_gy_1, gx2.val[1], gy2.val[1], gxgy.val[1], norm_factor);
853 /* Update gx and gy pointer */
854 gx_ptr_0 += input_stride;
855 gy_ptr_0 += input_stride;
856 gx_ptr_1 += input_stride;
857 gy_ptr_1 += input_stride;
858 gx_ptr_2 += input_stride;
859 gy_ptr_2 += input_stride;
862 /* Calculate harris score */
863 const float32x4x2_t mc =
866 harris_score(gx2.val[0], gy2.val[0], gxgy.val[0], sensitivity, strength_thresh),
867 harris_score(gx2.val[1], gy2.val[1], gxgy.val[1], sensitivity, strength_thresh)
872 vst1q_f32(output + 0, mc.val[0]);
873 vst1q_f32(output + 4, mc.val[1]);
876 inline void harris_score7x7_S16_S16_FLOAT(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int32_t input_stride,
877 float in_norm_factor, float in_sensitivity, float in_strength_thresh)
879 auto gx_ptr_0 = static_cast<const int16_t *__restrict>(input1_ptr) - 3 - 3 * input_stride;
880 auto gy_ptr_0 = static_cast<const int16_t *__restrict>(input2_ptr) - 3 - 3 * input_stride;
881 const int16_t *gx_ptr_1 = gx_ptr_0 + 8;
882 const int16_t *gy_ptr_1 = gy_ptr_0 + 8;
883 const auto output = static_cast<float *__restrict>(output_ptr);
885 /* Gx^2, Gy^2 and Gx*Gy */
886 float32x4_t gx2 = vdupq_n_f32(0.0f);
887 float32x4_t gy2 = vdupq_n_f32(0.0f);
888 float32x4_t gxgy = vdupq_n_f32(0.0f);
889 float32x4_t sensitivity = vdupq_n_f32(in_sensitivity);
890 float32x4_t norm_factor = vdupq_n_f32(in_norm_factor);
891 float32x4_t strength_thresh = vdupq_n_f32(in_strength_thresh);
893 for(int i = 0; i < 7; ++i)
895 const int16x8_t tmp0_gx = vld1q_s16(gx_ptr_0);
896 const int16x8_t tmp0_gy = vld1q_s16(gy_ptr_0);
897 const int16x4_t tmp1_gx = vld1_s16(gx_ptr_1);
898 const int16x4_t tmp1_gy = vld1_s16(gy_ptr_1);
900 float32x4_t low_gx = vcvtq_f32_s32(vmovl_s16(vget_low_s16(tmp0_gx)));
901 float32x4_t low_gy = vcvtq_f32_s32(vmovl_s16(vget_low_s16(tmp0_gy)));
902 float32x4_t high_gx = vcvtq_f32_s32(vmovl_s16(vget_high_s16(tmp0_gx)));
903 float32x4_t high_gy = vcvtq_f32_s32(vmovl_s16(vget_high_s16(tmp0_gy)));
904 float32x4_t high_gx1 = vcvtq_f32_s32(vmovl_s16(tmp1_gx));
905 float32x4_t high_gy1 = vcvtq_f32_s32(vmovl_s16(tmp1_gy));
906 harris_score1x7_FLOAT_FLOAT_FLOAT(low_gx, low_gy, high_gx, high_gy, high_gx1, high_gy1, gx2, gy2, gxgy, norm_factor);
908 /* Update gx and gy pointer */
909 gx_ptr_0 += input_stride;
910 gy_ptr_0 += input_stride;
911 gx_ptr_1 += input_stride;
912 gy_ptr_1 += input_stride;
915 /* Calculate harris score */
916 const float32x4_t mc = harris_score(gx2, gy2, gxgy, sensitivity, strength_thresh);
919 vst1q_f32(output, mc);
922 inline void harris_score7x7_S32_S32_FLOAT(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int32_t input_stride,
923 float in_norm_factor, float in_sensitivity, float in_strength_thresh)
925 auto gx_ptr_0 = static_cast<const int32_t *__restrict>(input1_ptr) - 3 - 3 * input_stride;
926 auto gy_ptr_0 = static_cast<const int32_t *__restrict>(input2_ptr) - 3 - 3 * input_stride;
927 const int32_t *gx_ptr_1 = gx_ptr_0 + 4;
928 const int32_t *gy_ptr_1 = gy_ptr_0 + 4;
929 const int32_t *gx_ptr_2 = gx_ptr_1 + 4;
930 const int32_t *gy_ptr_2 = gy_ptr_1 + 4;
931 const auto output = static_cast<float *__restrict>(output_ptr);
933 /* Gx^2, Gy^2 and Gx*Gy */
934 float32x4_t gx2 = vdupq_n_f32(0.0f);
935 float32x4_t gy2 = vdupq_n_f32(0.0f);
936 float32x4_t gxgy = vdupq_n_f32(0.0f);
937 float32x4_t sensitivity = vdupq_n_f32(in_sensitivity);
938 float32x4_t norm_factor = vdupq_n_f32(in_norm_factor);
939 float32x4_t strength_thresh = vdupq_n_f32(in_strength_thresh);
941 for(int i = 0; i < 7; ++i)
943 const float32x4_t low_gx = vcvtq_f32_s32(vld1q_s32(gx_ptr_0));
944 const float32x4_t low_gy = vcvtq_f32_s32(vld1q_s32(gy_ptr_0));
945 const float32x4_t high_gx = vcvtq_f32_s32(vld1q_s32(gx_ptr_1));
946 const float32x4_t high_gy = vcvtq_f32_s32(vld1q_s32(gy_ptr_1));
947 const float32x4_t high_gx1 = vcvtq_f32_s32(vld1q_s32(gx_ptr_2));
948 const float32x4_t high_gy1 = vcvtq_f32_s32(vld1q_s32(gy_ptr_2));
949 harris_score1x7_FLOAT_FLOAT_FLOAT(low_gx, low_gy, high_gx, high_gy, high_gx1, high_gy1, gx2, gy2, gxgy, norm_factor);
951 /* Update gx and gy pointer */
952 gx_ptr_0 += input_stride;
953 gy_ptr_0 += input_stride;
954 gx_ptr_1 += input_stride;
955 gy_ptr_1 += input_stride;
956 gx_ptr_2 += input_stride;
957 gy_ptr_2 += input_stride;
960 /* Calculate harris score */
961 const float32x4_t mc = harris_score(gx2, gy2, gxgy, sensitivity, strength_thresh);
964 vst1q_f32(output, mc);
969 INEHarrisScoreKernel::INEHarrisScoreKernel()
970 : _input1(nullptr), _input2(nullptr), _output(nullptr), _sensitivity(0.0f), _strength_thresh(0.0f), _norm_factor(0.0f)
974 template <int32_t block_size>
975 NEHarrisScoreKernel<block_size>::NEHarrisScoreKernel()
976 : INEHarrisScoreKernel(), _func(nullptr)
980 template <int32_t block_size>
981 void NEHarrisScoreKernel<block_size>::run(const Window &window)
983 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
984 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
985 ARM_COMPUTE_ERROR_ON(_func == nullptr);
987 Iterator input1(_input1, window);
988 Iterator input2(_input2, window);
989 Iterator output(_output, window);
991 const size_t input_stride = _input1->info()->strides_in_bytes()[1] / element_size_from_data_type(_input1->info()->data_type());
993 execute_window_loop(window, [&](const Coordinates & id)
995 (*_func)(input1.ptr(), input2.ptr(), output.ptr(), input_stride, _norm_factor, _sensitivity, _strength_thresh);
997 input1, input2, output);
1000 template <int32_t block_size>
1001 BorderSize NEHarrisScoreKernel<block_size>::border_size() const
1003 return BorderSize(block_size / 2);
1006 template <int32_t block_size>
1007 void NEHarrisScoreKernel<block_size>::configure(const IImage *input1, const IImage *input2, IImage *output, float norm_factor, float strength_thresh, float sensitivity,
1008 bool border_undefined)
1010 ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input1);
1011 ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input2);
1012 ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(output);
1013 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::S16, DataType::S32);
1014 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::S16, DataType::S32);
1015 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F32);
1016 ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2);
1017 ARM_COMPUTE_ERROR_ON(0.0f == norm_factor);
1022 _sensitivity = sensitivity;
1023 _strength_thresh = strength_thresh;
1024 _norm_factor = norm_factor;
1026 if(input1->info()->data_type() == DataType::S16)
1031 _func = &harris_score3x3_S16_S16_FLOAT;
1034 _func = &harris_score5x5_S16_S16_FLOAT;
1037 _func = &harris_score7x7_S16_S16_FLOAT;
1040 ARM_COMPUTE_ERROR("Invalid block size");
1049 _func = &harris_score3x3_S32_S32_FLOAT;
1052 _func = &harris_score5x5_S32_S32_FLOAT;
1055 _func = &harris_score7x7_S32_S32_FLOAT;
1058 ARM_COMPUTE_ERROR("Invalid block size");
1063 ARM_COMPUTE_ERROR_ON(nullptr == _func);
1065 unsigned int processed_elements = 0;
1069 processed_elements = 8;
1073 processed_elements = 4;
1076 // Configure kernel window
1077 Window win = calculate_max_window(*input1->info(), Steps(processed_elements), border_undefined, border_size());
1078 AccessWindowAutoPadding output_access(output->info());
1080 update_window_and_padding(win,
1081 AccessWindowAutoPadding(input1->info()),
1082 AccessWindowAutoPadding(input2->info()),
1085 output_access.set_valid_region();
1087 INEKernel::configure(win);