next: drop HAVE_TEGRA_OPTIMIZATION/TADP
[platform/upstream/opencv.git] / modules / features2d / src / fast.cpp
1 /* This is FAST corner detector, contributed to OpenCV by the author, Edward Rosten.
2    Below is the original copyright and the references */
3
4 /*
5 Copyright (c) 2006, 2008 Edward Rosten
6 All rights reserved.
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions
10 are met:
11
12     *Redistributions of source code must retain the above copyright
13      notice, this list of conditions and the following disclaimer.
14
15     *Redistributions in binary form must reproduce the above copyright
16      notice, this list of conditions and the following disclaimer in the
17      documentation and/or other materials provided with the distribution.
18
19     *Neither the name of the University of Cambridge nor the names of
20      its contributors may be used to endorse or promote products derived
21      from this software without specific prior written permission.
22
23 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26 A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
27 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
28 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
29 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 */
35
36 /*
37 The references are:
38  * Machine learning for high-speed corner detection,
39    E. Rosten and T. Drummond, ECCV 2006
40  * Faster and better: A machine learning approach to corner detection
41    E. Rosten, R. Porter and T. Drummond, PAMI, 2009
42 */
43
44 #include "precomp.hpp"
45 #include "fast.hpp"
46 #include "fast_score.hpp"
47 #include "opencl_kernels_features2d.hpp"
48 #include "hal_replacement.hpp"
49 #include "opencv2/core/hal/intrin.hpp"
50
51 #include "opencv2/core/openvx/ovx_defs.hpp"
52
53 namespace cv
54 {
55
56 template<int patternSize>
57 void FAST_t(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, bool nonmax_suppression)
58 {
59     Mat img = _img.getMat();
60     const int K = patternSize/2, N = patternSize + K + 1;
61     int i, j, k, pixel[25];
62     makeOffsets(pixel, (int)img.step, patternSize);
63
64 #if CV_SIMD128
65     const int quarterPatternSize = patternSize/4;
66     v_uint8x16 delta = v_setall_u8(0x80), t = v_setall_u8((char)threshold), K16 = v_setall_u8((char)K);
67     bool hasSimd = hasSIMD128();
68 #if CV_TRY_AVX2
69     Ptr<opt_AVX2::FAST_t_patternSize16_AVX2> fast_t_impl_avx2;
70     if(CV_CPU_HAS_SUPPORT_AVX2)
71         fast_t_impl_avx2 = opt_AVX2::FAST_t_patternSize16_AVX2::getImpl(img.cols, threshold, nonmax_suppression, pixel);
72 #endif
73
74 #endif
75
76     keypoints.clear();
77
78     threshold = std::min(std::max(threshold, 0), 255);
79
80     uchar threshold_tab[512];
81     for( i = -255; i <= 255; i++ )
82         threshold_tab[i+255] = (uchar)(i < -threshold ? 1 : i > threshold ? 2 : 0);
83
84     AutoBuffer<uchar> _buf((img.cols+16)*3*(sizeof(int) + sizeof(uchar)) + 128);
85     uchar* buf[3];
86     buf[0] = _buf; buf[1] = buf[0] + img.cols; buf[2] = buf[1] + img.cols;
87     int* cpbuf[3];
88     cpbuf[0] = (int*)alignPtr(buf[2] + img.cols, sizeof(int)) + 1;
89     cpbuf[1] = cpbuf[0] + img.cols + 1;
90     cpbuf[2] = cpbuf[1] + img.cols + 1;
91     memset(buf[0], 0, img.cols*3);
92
93     for(i = 3; i < img.rows-2; i++)
94     {
95         const uchar* ptr = img.ptr<uchar>(i) + 3;
96         uchar* curr = buf[(i - 3)%3];
97         int* cornerpos = cpbuf[(i - 3)%3];
98         memset(curr, 0, img.cols);
99         int ncorners = 0;
100
101         if( i < img.rows - 3 )
102         {
103             j = 3;
104 #if CV_SIMD128
105             if( hasSimd )
106             {
107                 if( patternSize == 16 )
108                 {
109 #if CV_TRY_AVX2
110                     if (fast_t_impl_avx2)
111                         fast_t_impl_avx2->process(j, ptr, curr, cornerpos, ncorners);
112 #endif
113                     //vz if (j <= (img.cols - 27)) //it doesn't make sense using vectors for less than 8 elements
114                     {
115                         for (; j < img.cols - 16 - 3; j += 16, ptr += 16)
116                         {
117                             v_uint8x16 v = v_load(ptr);
118                             v_int8x16 v0 = v_reinterpret_as_s8((v + t) ^ delta);
119                             v_int8x16 v1 = v_reinterpret_as_s8((v - t) ^ delta);
120
121                             v_int8x16 x0 = v_reinterpret_as_s8(v_sub_wrap(v_load(ptr + pixel[0]), delta));
122                             v_int8x16 x1 = v_reinterpret_as_s8(v_sub_wrap(v_load(ptr + pixel[quarterPatternSize]), delta));
123                             v_int8x16 x2 = v_reinterpret_as_s8(v_sub_wrap(v_load(ptr + pixel[2*quarterPatternSize]), delta));
124                             v_int8x16 x3 = v_reinterpret_as_s8(v_sub_wrap(v_load(ptr + pixel[3*quarterPatternSize]), delta));
125
126                             v_int8x16 m0, m1;
127                             m0 = (v0 < x0) & (v0 < x1);
128                             m1 = (x0 < v1) & (x1 < v1);
129                             m0 = m0 | ((v0 < x1) & (v0 < x2));
130                             m1 = m1 | ((x1 < v1) & (x2 < v1));
131                             m0 = m0 | ((v0 < x2) & (v0 < x3));
132                             m1 = m1 | ((x2 < v1) & (x3 < v1));
133                             m0 = m0 | ((v0 < x3) & (v0 < x0));
134                             m1 = m1 | ((x3 < v1) & (x0 < v1));
135                             m0 = m0 | m1;
136
137                             int mask = v_signmask(m0);
138                             if( mask == 0 )
139                                 continue;
140                             if( (mask & 255) == 0 )
141                             {
142                                 j -= 8;
143                                 ptr -= 8;
144                                 continue;
145                             }
146
147                             v_int8x16 c0 = v_setzero_s8();
148                             v_int8x16 c1 = v_setzero_s8();
149                             v_uint8x16 max0 = v_setzero_u8();
150                             v_uint8x16 max1 = v_setzero_u8();
151                             for( k = 0; k < N; k++ )
152                             {
153                                 v_int8x16 x = v_reinterpret_as_s8(v_load((ptr + pixel[k])) ^ delta);
154                                 m0 = v0 < x;
155                                 m1 = x < v1;
156
157                                 c0 = v_sub_wrap(c0, m0) & m0;
158                                 c1 = v_sub_wrap(c1, m1) & m1;
159
160                                 max0 = v_max(max0, v_reinterpret_as_u8(c0));
161                                 max1 = v_max(max1, v_reinterpret_as_u8(c1));
162                             }
163
164                             max0 = v_max(max0, max1);
165                             int m = v_signmask(K16 < max0);
166
167                             for( k = 0; m > 0 && k < 16; k++, m >>= 1 )
168                             {
169                                 if(m & 1)
170                                 {
171                                     cornerpos[ncorners++] = j+k;
172                                     if(nonmax_suppression)
173                                         curr[j+k] = (uchar)cornerScore<patternSize>(ptr+k, pixel, threshold);
174                                 }
175                             }
176                         }
177                     }
178                 }
179             }
180 #endif
181             for( ; j < img.cols - 3; j++, ptr++ )
182             {
183                 int v = ptr[0];
184                 const uchar* tab = &threshold_tab[0] - v + 255;
185                 int d = tab[ptr[pixel[0]]] | tab[ptr[pixel[8]]];
186
187                 if( d == 0 )
188                     continue;
189
190                 d &= tab[ptr[pixel[2]]] | tab[ptr[pixel[10]]];
191                 d &= tab[ptr[pixel[4]]] | tab[ptr[pixel[12]]];
192                 d &= tab[ptr[pixel[6]]] | tab[ptr[pixel[14]]];
193
194                 if( d == 0 )
195                     continue;
196
197                 d &= tab[ptr[pixel[1]]] | tab[ptr[pixel[9]]];
198                 d &= tab[ptr[pixel[3]]] | tab[ptr[pixel[11]]];
199                 d &= tab[ptr[pixel[5]]] | tab[ptr[pixel[13]]];
200                 d &= tab[ptr[pixel[7]]] | tab[ptr[pixel[15]]];
201
202                 if( d & 1 )
203                 {
204                     int vt = v - threshold, count = 0;
205
206                     for( k = 0; k < N; k++ )
207                     {
208                         int x = ptr[pixel[k]];
209                         if(x < vt)
210                         {
211                             if( ++count > K )
212                             {
213                                 cornerpos[ncorners++] = j;
214                                 if(nonmax_suppression)
215                                     curr[j] = (uchar)cornerScore<patternSize>(ptr, pixel, threshold);
216                                 break;
217                             }
218                         }
219                         else
220                             count = 0;
221                     }
222                 }
223
224                 if( d & 2 )
225                 {
226                     int vt = v + threshold, count = 0;
227
228                     for( k = 0; k < N; k++ )
229                     {
230                         int x = ptr[pixel[k]];
231                         if(x > vt)
232                         {
233                             if( ++count > K )
234                             {
235                                 cornerpos[ncorners++] = j;
236                                 if(nonmax_suppression)
237                                     curr[j] = (uchar)cornerScore<patternSize>(ptr, pixel, threshold);
238                                 break;
239                             }
240                         }
241                         else
242                             count = 0;
243                     }
244                 }
245             }
246         }
247
248         cornerpos[-1] = ncorners;
249
250         if( i == 3 )
251             continue;
252
253         const uchar* prev = buf[(i - 4 + 3)%3];
254         const uchar* pprev = buf[(i - 5 + 3)%3];
255         cornerpos = cpbuf[(i - 4 + 3)%3];
256         ncorners = cornerpos[-1];
257
258         for( k = 0; k < ncorners; k++ )
259         {
260             j = cornerpos[k];
261             int score = prev[j];
262             if( !nonmax_suppression ||
263                (score > prev[j+1] && score > prev[j-1] &&
264                 score > pprev[j-1] && score > pprev[j] && score > pprev[j+1] &&
265                 score > curr[j-1] && score > curr[j] && score > curr[j+1]) )
266             {
267                 keypoints.push_back(KeyPoint((float)j, (float)(i-1), 7.f, -1, (float)score));
268             }
269         }
270     }
271 }
272
273 #ifdef HAVE_OPENCL
274 template<typename pt>
275 struct cmp_pt
276 {
277     bool operator ()(const pt& a, const pt& b) const { return a.y < b.y || (a.y == b.y && a.x < b.x); }
278 };
279
280 static bool ocl_FAST( InputArray _img, std::vector<KeyPoint>& keypoints,
281                      int threshold, bool nonmax_suppression, int maxKeypoints )
282 {
283     UMat img = _img.getUMat();
284     if( img.cols < 7 || img.rows < 7 )
285         return false;
286     size_t globalsize[] = { (size_t)img.cols-6, (size_t)img.rows-6 };
287
288     ocl::Kernel fastKptKernel("FAST_findKeypoints", ocl::features2d::fast_oclsrc);
289     if (fastKptKernel.empty())
290         return false;
291
292     UMat kp1(1, maxKeypoints*2+1, CV_32S);
293
294     UMat ucounter1(kp1, Rect(0,0,1,1));
295     ucounter1.setTo(Scalar::all(0));
296
297     if( !fastKptKernel.args(ocl::KernelArg::ReadOnly(img),
298                             ocl::KernelArg::PtrReadWrite(kp1),
299                             maxKeypoints, threshold).run(2, globalsize, 0, true))
300         return false;
301
302     Mat mcounter;
303     ucounter1.copyTo(mcounter);
304     int i, counter = mcounter.at<int>(0);
305     counter = std::min(counter, maxKeypoints);
306
307     keypoints.clear();
308
309     if( counter == 0 )
310         return true;
311
312     if( !nonmax_suppression )
313     {
314         Mat m;
315         kp1(Rect(0, 0, counter*2+1, 1)).copyTo(m);
316         const Point* pt = (const Point*)(m.ptr<int>() + 1);
317         for( i = 0; i < counter; i++ )
318             keypoints.push_back(KeyPoint((float)pt[i].x, (float)pt[i].y, 7.f, -1, 1.f));
319     }
320     else
321     {
322         UMat kp2(1, maxKeypoints*3+1, CV_32S);
323         UMat ucounter2 = kp2(Rect(0,0,1,1));
324         ucounter2.setTo(Scalar::all(0));
325
326         ocl::Kernel fastNMSKernel("FAST_nonmaxSupression", ocl::features2d::fast_oclsrc);
327         if (fastNMSKernel.empty())
328             return false;
329
330         size_t globalsize_nms[] = { (size_t)counter };
331         if( !fastNMSKernel.args(ocl::KernelArg::PtrReadOnly(kp1),
332                                 ocl::KernelArg::PtrReadWrite(kp2),
333                                 ocl::KernelArg::ReadOnly(img),
334                                 counter, counter).run(1, globalsize_nms, 0, true))
335             return false;
336
337         Mat m2;
338         kp2(Rect(0, 0, counter*3+1, 1)).copyTo(m2);
339         Point3i* pt2 = (Point3i*)(m2.ptr<int>() + 1);
340         int newcounter = std::min(m2.at<int>(0), counter);
341
342         std::sort(pt2, pt2 + newcounter, cmp_pt<Point3i>());
343
344         for( i = 0; i < newcounter; i++ )
345             keypoints.push_back(KeyPoint((float)pt2[i].x, (float)pt2[i].y, 7.f, -1, (float)pt2[i].z));
346     }
347
348     return true;
349 }
350 #endif
351
352
353 #ifdef HAVE_OPENVX
354 namespace ovx {
355     template <> inline bool skipSmallImages<VX_KERNEL_FAST_CORNERS>(int w, int h) { return w*h < 800 * 600; }
356 }
357 static bool openvx_FAST(InputArray _img, std::vector<KeyPoint>& keypoints,
358                         int _threshold, bool nonmaxSuppression, int type)
359 {
360     using namespace ivx;
361
362     // Nonmax suppression is done differently in OpenCV than in OpenVX
363     // 9/16 is the only supported mode in OpenVX
364     if(nonmaxSuppression || type != FastFeatureDetector::TYPE_9_16)
365         return false;
366
367     Mat imgMat = _img.getMat();
368     if(imgMat.empty() || imgMat.type() != CV_8UC1)
369         return false;
370
371     if (ovx::skipSmallImages<VX_KERNEL_FAST_CORNERS>(imgMat.cols, imgMat.rows))
372         return false;
373
374     try
375     {
376         Context context = ovx::getOpenVXContext();
377         Image img = Image::createFromHandle(context, Image::matTypeToFormat(imgMat.type()),
378                                             Image::createAddressing(imgMat), (void*)imgMat.data);
379         ivx::Scalar threshold = ivx::Scalar::create<VX_TYPE_FLOAT32>(context, _threshold);
380         vx_size capacity = imgMat.cols * imgMat.rows;
381         Array corners = Array::create(context, VX_TYPE_KEYPOINT, capacity);
382
383         ivx::Scalar numCorners = ivx::Scalar::create<VX_TYPE_SIZE>(context, 0);
384
385         IVX_CHECK_STATUS(vxuFastCorners(context, img, threshold, (vx_bool)nonmaxSuppression, corners, numCorners));
386
387         size_t nPoints = numCorners.getValue<vx_size>();
388         keypoints.clear(); keypoints.reserve(nPoints);
389         std::vector<vx_keypoint_t> vxCorners;
390         corners.copyTo(vxCorners);
391         for(size_t i = 0; i < nPoints; i++)
392         {
393             vx_keypoint_t kp = vxCorners[i];
394             //if nonmaxSuppression is false, kp.strength is undefined
395             keypoints.push_back(KeyPoint((float)kp.x, (float)kp.y, 7.f, -1, kp.strength));
396         }
397
398 #ifdef VX_VERSION_1_1
399         //we should take user memory back before release
400         //(it's not done automatically according to standard)
401         img.swapHandle();
402 #endif
403     }
404     catch (RuntimeError & e)
405     {
406         VX_DbgThrow(e.what());
407     }
408     catch (WrapperError & e)
409     {
410         VX_DbgThrow(e.what());
411     }
412
413     return true;
414 }
415
416 #endif
417
418 static inline int hal_FAST(cv::Mat& src, std::vector<KeyPoint>& keypoints, int threshold, bool nonmax_suppression, int type)
419 {
420     if (threshold > 20)
421         return CV_HAL_ERROR_NOT_IMPLEMENTED;
422
423     cv::Mat scores(src.size(), src.type());
424
425     int error = cv_hal_FAST_dense(src.data, src.step, scores.data, scores.step, src.cols, src.rows, type);
426
427     if (error != CV_HAL_ERROR_OK)
428         return error;
429
430     cv::Mat suppressedScores(src.size(), src.type());
431
432     if (nonmax_suppression)
433     {
434         error = cv_hal_FAST_NMS(scores.data, scores.step, suppressedScores.data, suppressedScores.step, scores.cols, scores.rows);
435
436         if (error != CV_HAL_ERROR_OK)
437             return error;
438     }
439     else
440     {
441         suppressedScores = scores;
442     }
443
444     if (!threshold && nonmax_suppression) threshold = 1;
445
446     cv::KeyPoint kpt(0, 0, 7.f, -1, 0);
447
448     unsigned uthreshold = (unsigned) threshold;
449
450     int ofs = 3;
451
452     int stride = (int)suppressedScores.step;
453     const unsigned char* pscore = suppressedScores.data;
454
455     keypoints.clear();
456
457     for (int y = ofs; y + ofs < suppressedScores.rows; ++y)
458     {
459         kpt.pt.y = (float)(y);
460         for (int x = ofs; x + ofs < suppressedScores.cols; ++x)
461         {
462             unsigned score = pscore[y * stride + x];
463             if (score > uthreshold)
464             {
465                 kpt.pt.x = (float)(x);
466                 kpt.response = (nonmax_suppression != 0) ? (float)((int)score - 1) : 0.f;
467                 keypoints.push_back(kpt);
468             }
469         }
470     }
471
472     return CV_HAL_ERROR_OK;
473 }
474
475 void FAST(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, bool nonmax_suppression, int type)
476 {
477     CV_INSTRUMENT_REGION()
478
479     CV_OCL_RUN(_img.isUMat() && type == FastFeatureDetector::TYPE_9_16,
480                ocl_FAST(_img, keypoints, threshold, nonmax_suppression, 10000));
481
482     cv::Mat img = _img.getMat();
483     CALL_HAL(fast_dense, hal_FAST, img, keypoints, threshold, nonmax_suppression, type);
484
485     size_t keypoints_count;
486     CALL_HAL(fast, cv_hal_FAST, img.data, img.step, img.cols, img.rows,
487              (uchar*)(keypoints.data()), &keypoints_count, threshold, nonmax_suppression, type);
488
489     CV_OVX_RUN(true,
490                openvx_FAST(_img, keypoints, threshold, nonmax_suppression, type))
491
492     switch(type) {
493     case FastFeatureDetector::TYPE_5_8:
494         FAST_t<8>(_img, keypoints, threshold, nonmax_suppression);
495         break;
496     case FastFeatureDetector::TYPE_7_12:
497         FAST_t<12>(_img, keypoints, threshold, nonmax_suppression);
498         break;
499     case FastFeatureDetector::TYPE_9_16:
500         FAST_t<16>(_img, keypoints, threshold, nonmax_suppression);
501         break;
502     }
503 }
504
505
506 void FAST(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, bool nonmax_suppression)
507 {
508     CV_INSTRUMENT_REGION()
509
510     FAST(_img, keypoints, threshold, nonmax_suppression, FastFeatureDetector::TYPE_9_16);
511 }
512
513
514 class FastFeatureDetector_Impl CV_FINAL : public FastFeatureDetector
515 {
516 public:
517     FastFeatureDetector_Impl( int _threshold, bool _nonmaxSuppression, int _type )
518     : threshold(_threshold), nonmaxSuppression(_nonmaxSuppression), type((short)_type)
519     {}
520
521     void detect( InputArray _image, std::vector<KeyPoint>& keypoints, InputArray _mask ) CV_OVERRIDE
522     {
523         CV_INSTRUMENT_REGION()
524
525         Mat mask = _mask.getMat(), grayImage;
526         UMat ugrayImage;
527         _InputArray gray = _image;
528         if( _image.type() != CV_8U )
529         {
530             _OutputArray ogray = _image.isUMat() ? _OutputArray(ugrayImage) : _OutputArray(grayImage);
531             cvtColor( _image, ogray, COLOR_BGR2GRAY );
532             gray = ogray;
533         }
534         FAST( gray, keypoints, threshold, nonmaxSuppression, type );
535         KeyPointsFilter::runByPixelsMask( keypoints, mask );
536     }
537
538     void set(int prop, double value)
539     {
540         if(prop == THRESHOLD)
541             threshold = cvRound(value);
542         else if(prop == NONMAX_SUPPRESSION)
543             nonmaxSuppression = value != 0;
544         else if(prop == FAST_N)
545             type = cvRound(value);
546         else
547             CV_Error(Error::StsBadArg, "");
548     }
549
550     double get(int prop) const
551     {
552         if(prop == THRESHOLD)
553             return threshold;
554         if(prop == NONMAX_SUPPRESSION)
555             return nonmaxSuppression;
556         if(prop == FAST_N)
557             return type;
558         CV_Error(Error::StsBadArg, "");
559         return 0;
560     }
561
562     void setThreshold(int threshold_) CV_OVERRIDE { threshold = threshold_; }
563     int getThreshold() const CV_OVERRIDE { return threshold; }
564
565     void setNonmaxSuppression(bool f) CV_OVERRIDE { nonmaxSuppression = f; }
566     bool getNonmaxSuppression() const CV_OVERRIDE { return nonmaxSuppression; }
567
568     void setType(int type_) CV_OVERRIDE { type = type_; }
569     int getType() const CV_OVERRIDE { return type; }
570
571     int threshold;
572     bool nonmaxSuppression;
573     int type;
574 };
575
576 Ptr<FastFeatureDetector> FastFeatureDetector::create( int threshold, bool nonmaxSuppression, int type )
577 {
578     return makePtr<FastFeatureDetector_Impl>(threshold, nonmaxSuppression, type);
579 }
580
581 String FastFeatureDetector::getDefaultName() const
582 {
583     return (Feature2D::getDefaultName() + ".FastFeatureDetector");
584 }
585
586 }