1 /* This is FAST corner detector, contributed to OpenCV by the author, Edward Rosten.
2 Below is the original copyright and the references */
5 Copyright (c) 2006, 2008 Edward Rosten
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions
12 *Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
15 *Redistributions in binary form must reproduce the above copyright
16 notice, this list of conditions and the following disclaimer in the
17 documentation and/or other materials provided with the distribution.
19 *Neither the name of the University of Cambridge nor the names of
20 its contributors may be used to endorse or promote products derived
21 from this software without specific prior written permission.
23 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
27 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
28 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
29 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 * Machine learning for high-speed corner detection,
39 E. Rosten and T. Drummond, ECCV 2006
40 * Faster and better: A machine learning approach to corner detection
41 E. Rosten, R. Porter and T. Drummond, PAMI, 2009
44 #include "precomp.hpp"
46 #include "fast_score.hpp"
47 #include "opencl_kernels_features2d.hpp"
48 #include "hal_replacement.hpp"
49 #include "opencv2/core/hal/intrin.hpp"
50 #include "opencv2/core/utils/buffer_area.private.hpp"
52 #include "opencv2/core/openvx/ovx_defs.hpp"
57 template<int patternSize>
58 void FAST_t(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, bool nonmax_suppression)
60 Mat img = _img.getMat();
61 const int K = patternSize/2, N = patternSize + K + 1;
62 int i, j, k, pixel[25];
63 makeOffsets(pixel, (int)img.step, patternSize);
66 const int quarterPatternSize = patternSize/4;
67 v_uint8x16 delta = v_setall_u8(0x80), t = v_setall_u8((char)threshold), K16 = v_setall_u8((char)K);
69 Ptr<opt_AVX2::FAST_t_patternSize16_AVX2> fast_t_impl_avx2;
70 if(CV_CPU_HAS_SUPPORT_AVX2)
71 fast_t_impl_avx2 = opt_AVX2::FAST_t_patternSize16_AVX2::getImpl(img.cols, threshold, nonmax_suppression, pixel);
78 threshold = std::min(std::max(threshold, 0), 255);
80 uchar threshold_tab[512];
81 for( i = -255; i <= 255; i++ )
82 threshold_tab[i+255] = (uchar)(i < -threshold ? 1 : i > threshold ? 2 : 0);
84 uchar* buf[3] = { 0 };
85 int* cpbuf[3] = { 0 };
86 utils::BufferArea area;
87 for (unsigned idx = 0; idx < 3; ++idx)
89 area.allocate(buf[idx], img.cols);
90 area.allocate(cpbuf[idx], img.cols + 1);
94 for (unsigned idx = 0; idx < 3; ++idx)
96 memset(buf[idx], 0, img.cols);
99 for(i = 3; i < img.rows-2; i++)
101 const uchar* ptr = img.ptr<uchar>(i) + 3;
102 uchar* curr = buf[(i - 3)%3];
103 int* cornerpos = cpbuf[(i - 3)%3] + 1; // cornerpos[-1] is used to store a value
104 memset(curr, 0, img.cols);
107 if( i < img.rows - 3 )
112 if( patternSize == 16 )
115 if (fast_t_impl_avx2)
116 fast_t_impl_avx2->process(j, ptr, curr, cornerpos, ncorners);
118 //vz if (j <= (img.cols - 27)) //it doesn't make sense using vectors for less than 8 elements
120 for (; j < img.cols - 16 - 3; j += 16, ptr += 16)
122 v_uint8x16 v = v_load(ptr);
123 v_int8x16 v0 = v_reinterpret_as_s8((v + t) ^ delta);
124 v_int8x16 v1 = v_reinterpret_as_s8((v - t) ^ delta);
126 v_int8x16 x0 = v_reinterpret_as_s8(v_sub_wrap(v_load(ptr + pixel[0]), delta));
127 v_int8x16 x1 = v_reinterpret_as_s8(v_sub_wrap(v_load(ptr + pixel[quarterPatternSize]), delta));
128 v_int8x16 x2 = v_reinterpret_as_s8(v_sub_wrap(v_load(ptr + pixel[2*quarterPatternSize]), delta));
129 v_int8x16 x3 = v_reinterpret_as_s8(v_sub_wrap(v_load(ptr + pixel[3*quarterPatternSize]), delta));
132 m0 = (v0 < x0) & (v0 < x1);
133 m1 = (x0 < v1) & (x1 < v1);
134 m0 = m0 | ((v0 < x1) & (v0 < x2));
135 m1 = m1 | ((x1 < v1) & (x2 < v1));
136 m0 = m0 | ((v0 < x2) & (v0 < x3));
137 m1 = m1 | ((x2 < v1) & (x3 < v1));
138 m0 = m0 | ((v0 < x3) & (v0 < x0));
139 m1 = m1 | ((x3 < v1) & (x0 < v1));
142 if( !v_check_any(m0) )
144 if( !v_check_any(v_combine_low(m0, m0)) )
151 v_int8x16 c0 = v_setzero_s8();
152 v_int8x16 c1 = v_setzero_s8();
153 v_uint8x16 max0 = v_setzero_u8();
154 v_uint8x16 max1 = v_setzero_u8();
155 for( k = 0; k < N; k++ )
157 v_int8x16 x = v_reinterpret_as_s8(v_load((ptr + pixel[k])) ^ delta);
161 c0 = v_sub_wrap(c0, m0) & m0;
162 c1 = v_sub_wrap(c1, m1) & m1;
164 max0 = v_max(max0, v_reinterpret_as_u8(c0));
165 max1 = v_max(max1, v_reinterpret_as_u8(c1));
168 max0 = K16 < v_max(max0, max1);
169 unsigned int m = v_signmask(v_reinterpret_as_s8(max0));
171 for( k = 0; m > 0 && k < 16; k++, m >>= 1 )
175 cornerpos[ncorners++] = j+k;
176 if(nonmax_suppression)
179 for (int _k = 0; _k < 25; _k++)
180 d[_k] = (short)(ptr[k] - ptr[k + pixel[_k]]);
182 v_int16x8 a0, b0, a1, b1;
183 a0 = b0 = a1 = b1 = v_load(d + 8);
184 for(int shift = 0; shift < 8; ++shift)
186 v_int16x8 v_nms = v_load(d + shift);
187 a0 = v_min(a0, v_nms);
188 b0 = v_max(b0, v_nms);
189 v_nms = v_load(d + 9 + shift);
190 a1 = v_min(a1, v_nms);
191 b1 = v_max(b1, v_nms);
193 curr[j + k] = (uchar)(v_reduce_max(v_max(v_max(a0, a1), v_setzero_s16() - v_min(b0, b1))) - 1);
202 for( ; j < img.cols - 3; j++, ptr++ )
205 const uchar* tab = &threshold_tab[0] - v + 255;
206 int d = tab[ptr[pixel[0]]] | tab[ptr[pixel[8]]];
211 d &= tab[ptr[pixel[2]]] | tab[ptr[pixel[10]]];
212 d &= tab[ptr[pixel[4]]] | tab[ptr[pixel[12]]];
213 d &= tab[ptr[pixel[6]]] | tab[ptr[pixel[14]]];
218 d &= tab[ptr[pixel[1]]] | tab[ptr[pixel[9]]];
219 d &= tab[ptr[pixel[3]]] | tab[ptr[pixel[11]]];
220 d &= tab[ptr[pixel[5]]] | tab[ptr[pixel[13]]];
221 d &= tab[ptr[pixel[7]]] | tab[ptr[pixel[15]]];
225 int vt = v - threshold, count = 0;
227 for( k = 0; k < N; k++ )
229 int x = ptr[pixel[k]];
234 cornerpos[ncorners++] = j;
235 if(nonmax_suppression)
236 curr[j] = (uchar)cornerScore<patternSize>(ptr, pixel, threshold);
247 int vt = v + threshold, count = 0;
249 for( k = 0; k < N; k++ )
251 int x = ptr[pixel[k]];
256 cornerpos[ncorners++] = j;
257 if(nonmax_suppression)
258 curr[j] = (uchar)cornerScore<patternSize>(ptr, pixel, threshold);
269 cornerpos[-1] = ncorners;
274 const uchar* prev = buf[(i - 4 + 3)%3];
275 const uchar* pprev = buf[(i - 5 + 3)%3];
276 cornerpos = cpbuf[(i - 4 + 3)%3] + 1; // cornerpos[-1] is used to store a value
277 ncorners = cornerpos[-1];
279 for( k = 0; k < ncorners; k++ )
283 if( !nonmax_suppression ||
284 (score > prev[j+1] && score > prev[j-1] &&
285 score > pprev[j-1] && score > pprev[j] && score > pprev[j+1] &&
286 score > curr[j-1] && score > curr[j] && score > curr[j+1]) )
288 keypoints.push_back(KeyPoint((float)j, (float)(i-1), 7.f, -1, (float)score));
295 template<typename pt>
298 bool operator ()(const pt& a, const pt& b) const { return a.y < b.y || (a.y == b.y && a.x < b.x); }
301 static bool ocl_FAST( InputArray _img, std::vector<KeyPoint>& keypoints,
302 int threshold, bool nonmax_suppression, int maxKeypoints )
304 UMat img = _img.getUMat();
305 if( img.cols < 7 || img.rows < 7 )
307 size_t globalsize[] = { (size_t)img.cols-6, (size_t)img.rows-6 };
309 ocl::Kernel fastKptKernel("FAST_findKeypoints", ocl::features2d::fast_oclsrc);
310 if (fastKptKernel.empty())
313 UMat kp1(1, maxKeypoints*2+1, CV_32S);
315 UMat ucounter1(kp1, Rect(0,0,1,1));
316 ucounter1.setTo(Scalar::all(0));
318 if( !fastKptKernel.args(ocl::KernelArg::ReadOnly(img),
319 ocl::KernelArg::PtrReadWrite(kp1),
320 maxKeypoints, threshold).run(2, globalsize, 0, true))
324 ucounter1.copyTo(mcounter);
325 int i, counter = mcounter.at<int>(0);
326 counter = std::min(counter, maxKeypoints);
333 if( !nonmax_suppression )
336 kp1(Rect(0, 0, counter*2+1, 1)).copyTo(m);
337 const Point* pt = (const Point*)(m.ptr<int>() + 1);
338 for( i = 0; i < counter; i++ )
339 keypoints.push_back(KeyPoint((float)pt[i].x, (float)pt[i].y, 7.f, -1, 1.f));
343 UMat kp2(1, maxKeypoints*3+1, CV_32S);
344 UMat ucounter2 = kp2(Rect(0,0,1,1));
345 ucounter2.setTo(Scalar::all(0));
347 ocl::Kernel fastNMSKernel("FAST_nonmaxSupression", ocl::features2d::fast_oclsrc);
348 if (fastNMSKernel.empty())
351 size_t globalsize_nms[] = { (size_t)counter };
352 if( !fastNMSKernel.args(ocl::KernelArg::PtrReadOnly(kp1),
353 ocl::KernelArg::PtrReadWrite(kp2),
354 ocl::KernelArg::ReadOnly(img),
355 counter, counter).run(1, globalsize_nms, 0, true))
359 kp2(Rect(0, 0, counter*3+1, 1)).copyTo(m2);
360 Point3i* pt2 = (Point3i*)(m2.ptr<int>() + 1);
361 int newcounter = std::min(m2.at<int>(0), counter);
363 std::sort(pt2, pt2 + newcounter, cmp_pt<Point3i>());
365 for( i = 0; i < newcounter; i++ )
366 keypoints.push_back(KeyPoint((float)pt2[i].x, (float)pt2[i].y, 7.f, -1, (float)pt2[i].z));
376 template <> inline bool skipSmallImages<VX_KERNEL_FAST_CORNERS>(int w, int h) { return w*h < 800 * 600; }
378 static bool openvx_FAST(InputArray _img, std::vector<KeyPoint>& keypoints,
379 int _threshold, bool nonmaxSuppression, int type)
383 // Nonmax suppression is done differently in OpenCV than in OpenVX
384 // 9/16 is the only supported mode in OpenVX
385 if(nonmaxSuppression || type != FastFeatureDetector::TYPE_9_16)
388 Mat imgMat = _img.getMat();
389 if(imgMat.empty() || imgMat.type() != CV_8UC1)
392 if (ovx::skipSmallImages<VX_KERNEL_FAST_CORNERS>(imgMat.cols, imgMat.rows))
397 Context context = ovx::getOpenVXContext();
398 Image img = Image::createFromHandle(context, Image::matTypeToFormat(imgMat.type()),
399 Image::createAddressing(imgMat), (void*)imgMat.data);
400 ivx::Scalar threshold = ivx::Scalar::create<VX_TYPE_FLOAT32>(context, _threshold);
401 vx_size capacity = imgMat.cols * imgMat.rows;
402 Array corners = Array::create(context, VX_TYPE_KEYPOINT, capacity);
404 ivx::Scalar numCorners = ivx::Scalar::create<VX_TYPE_SIZE>(context, 0);
406 IVX_CHECK_STATUS(vxuFastCorners(context, img, threshold, (vx_bool)nonmaxSuppression, corners, numCorners));
408 size_t nPoints = numCorners.getValue<vx_size>();
409 keypoints.clear(); keypoints.reserve(nPoints);
410 std::vector<vx_keypoint_t> vxCorners;
411 corners.copyTo(vxCorners);
412 for(size_t i = 0; i < nPoints; i++)
414 vx_keypoint_t kp = vxCorners[i];
415 //if nonmaxSuppression is false, kp.strength is undefined
416 keypoints.push_back(KeyPoint((float)kp.x, (float)kp.y, 7.f, -1, kp.strength));
419 #ifdef VX_VERSION_1_1
420 //we should take user memory back before release
421 //(it's not done automatically according to standard)
425 catch (const RuntimeError & e)
427 VX_DbgThrow(e.what());
429 catch (const WrapperError & e)
431 VX_DbgThrow(e.what());
439 static inline int hal_FAST(cv::Mat& src, std::vector<KeyPoint>& keypoints, int threshold, bool nonmax_suppression, FastFeatureDetector::DetectorType type)
442 return CV_HAL_ERROR_NOT_IMPLEMENTED;
444 cv::Mat scores(src.size(), src.type());
446 int error = cv_hal_FAST_dense(src.data, src.step, scores.data, scores.step, src.cols, src.rows, type);
448 if (error != CV_HAL_ERROR_OK)
451 cv::Mat suppressedScores(src.size(), src.type());
453 if (nonmax_suppression)
455 error = cv_hal_FAST_NMS(scores.data, scores.step, suppressedScores.data, suppressedScores.step, scores.cols, scores.rows);
457 if (error != CV_HAL_ERROR_OK)
462 suppressedScores = scores;
465 if (!threshold && nonmax_suppression) threshold = 1;
467 cv::KeyPoint kpt(0, 0, 7.f, -1, 0);
469 unsigned uthreshold = (unsigned) threshold;
473 int stride = (int)suppressedScores.step;
474 const unsigned char* pscore = suppressedScores.data;
478 for (int y = ofs; y + ofs < suppressedScores.rows; ++y)
480 kpt.pt.y = (float)(y);
481 for (int x = ofs; x + ofs < suppressedScores.cols; ++x)
483 unsigned score = pscore[y * stride + x];
484 if (score > uthreshold)
486 kpt.pt.x = (float)(x);
487 kpt.response = (nonmax_suppression != 0) ? (float)((int)score - 1) : 0.f;
488 keypoints.push_back(kpt);
493 return CV_HAL_ERROR_OK;
496 void FAST(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, bool nonmax_suppression, FastFeatureDetector::DetectorType type)
498 CV_INSTRUMENT_REGION();
500 CV_OCL_RUN(_img.isUMat() && type == FastFeatureDetector::TYPE_9_16,
501 ocl_FAST(_img, keypoints, threshold, nonmax_suppression, 10000));
503 cv::Mat img = _img.getMat();
504 CALL_HAL(fast_dense, hal_FAST, img, keypoints, threshold, nonmax_suppression, type);
506 size_t keypoints_count;
507 CALL_HAL(fast, cv_hal_FAST, img.data, img.step, img.cols, img.rows,
508 (uchar*)(keypoints.data()), &keypoints_count, threshold, nonmax_suppression, type);
511 openvx_FAST(_img, keypoints, threshold, nonmax_suppression, type))
514 case FastFeatureDetector::TYPE_5_8:
515 FAST_t<8>(_img, keypoints, threshold, nonmax_suppression);
517 case FastFeatureDetector::TYPE_7_12:
518 FAST_t<12>(_img, keypoints, threshold, nonmax_suppression);
520 case FastFeatureDetector::TYPE_9_16:
521 FAST_t<16>(_img, keypoints, threshold, nonmax_suppression);
527 void FAST(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, bool nonmax_suppression)
529 CV_INSTRUMENT_REGION();
531 FAST(_img, keypoints, threshold, nonmax_suppression, FastFeatureDetector::TYPE_9_16);
535 class FastFeatureDetector_Impl CV_FINAL : public FastFeatureDetector
538 FastFeatureDetector_Impl( int _threshold, bool _nonmaxSuppression, FastFeatureDetector::DetectorType _type )
539 : threshold(_threshold), nonmaxSuppression(_nonmaxSuppression), type(_type)
542 void read( const FileNode& fn) CV_OVERRIDE
544 // if node is empty, keep previous value
545 if (!fn["threshold"].empty())
546 fn["threshold"] >> threshold;
547 if (!fn["nonmaxSuppression"].empty())
548 fn["nonmaxSuppression"] >> nonmaxSuppression;
549 if (!fn["type"].empty())
552 void write( FileStorage& fs) const CV_OVERRIDE
556 fs << "name" << getDefaultName();
557 fs << "threshold" << threshold;
558 fs << "nonmaxSuppression" << nonmaxSuppression;
559 fs << "type" << type;
563 void detect( InputArray _image, std::vector<KeyPoint>& keypoints, InputArray _mask ) CV_OVERRIDE
565 CV_INSTRUMENT_REGION();
573 Mat mask = _mask.getMat(), grayImage;
575 _InputArray gray = _image;
576 if( _image.type() != CV_8U )
578 _OutputArray ogray = _image.isUMat() ? _OutputArray(ugrayImage) : _OutputArray(grayImage);
579 cvtColor( _image, ogray, COLOR_BGR2GRAY );
582 FAST( gray, keypoints, threshold, nonmaxSuppression, type );
583 KeyPointsFilter::runByPixelsMask( keypoints, mask );
586 void set(int prop, double value)
588 if(prop == THRESHOLD)
589 threshold = cvRound(value);
590 else if(prop == NONMAX_SUPPRESSION)
591 nonmaxSuppression = value != 0;
592 else if(prop == FAST_N)
593 type = static_cast<FastFeatureDetector::DetectorType>(cvRound(value));
595 CV_Error(Error::StsBadArg, "");
598 double get(int prop) const
600 if(prop == THRESHOLD)
602 if(prop == NONMAX_SUPPRESSION)
603 return nonmaxSuppression;
605 return static_cast<int>(type);
606 CV_Error(Error::StsBadArg, "");
610 void setThreshold(int threshold_) CV_OVERRIDE { threshold = threshold_; }
611 int getThreshold() const CV_OVERRIDE { return threshold; }
613 void setNonmaxSuppression(bool f) CV_OVERRIDE { nonmaxSuppression = f; }
614 bool getNonmaxSuppression() const CV_OVERRIDE { return nonmaxSuppression; }
616 void setType(FastFeatureDetector::DetectorType type_) CV_OVERRIDE{ type = type_; }
617 FastFeatureDetector::DetectorType getType() const CV_OVERRIDE{ return type; }
620 bool nonmaxSuppression;
621 FastFeatureDetector::DetectorType type;
624 Ptr<FastFeatureDetector> FastFeatureDetector::create( int threshold, bool nonmaxSuppression, FastFeatureDetector::DetectorType type )
626 return makePtr<FastFeatureDetector_Impl>(threshold, nonmaxSuppression, type);
629 String FastFeatureDetector::getDefaultName() const
631 return (Feature2D::getDefaultName() + ".FastFeatureDetector");