From efdfca7a112d118868f5ea0ac653182ccbde14d8 Mon Sep 17 00:00:00 2001 From: Vadim Pisarevsky Date: Fri, 7 Mar 2014 18:55:45 +0400 Subject: [PATCH] do not use the large "score" buffer; now without non-max suppression OpenCL FAST is pretty efficient --- modules/features2d/src/fast.cpp | 20 ++++++-------------- modules/features2d/src/opencl/fast.cl | 30 ++++++++++++++++-------------- 2 files changed, 22 insertions(+), 28 deletions(-) diff --git a/modules/features2d/src/fast.cpp b/modules/features2d/src/fast.cpp index 7020727..004ebb3 100644 --- a/modules/features2d/src/fast.cpp +++ b/modules/features2d/src/fast.cpp @@ -268,23 +268,13 @@ static bool ocl_FAST( InputArray _img, std::vector& keypoints, if (fastKptKernel.empty()) return false; - UMat kp1(1, maxKeypoints*2+1, CV_32S), score; + UMat kp1(1, maxKeypoints*2+1, CV_32S); UMat ucounter1(kp1, Rect(0,0,1,1)); ucounter1.setTo(Scalar::all(0)); - if( nonmax_suppression ) - { - score.create(img.size(), CV_8U); - score.setTo(Scalar::all(0)); - } - else - score = img; // initialize score with some non-empty value - if( !fastKptKernel.args(ocl::KernelArg::ReadOnly(img), - ocl::KernelArg::WriteOnlyNoSize(score), ocl::KernelArg::PtrReadWrite(kp1), - nonmax_suppression ? 1 : 0, maxKeypoints, threshold).run(2, globalsize, 0, true)) return false; @@ -319,7 +309,7 @@ static bool ocl_FAST( InputArray _img, std::vector& keypoints, size_t globalsize_nms[] = { counter }; if( !fastNMSKernel.args(ocl::KernelArg::PtrReadOnly(kp1), ocl::KernelArg::PtrReadWrite(kp2), - ocl::KernelArg::ReadOnlyNoSize(score), + ocl::KernelArg::ReadOnly(img), counter, counter).run(1, globalsize_nms, 0, true)) return false; @@ -340,9 +330,10 @@ static bool ocl_FAST( InputArray _img, std::vector& keypoints, void FAST(InputArray _img, std::vector& keypoints, int threshold, bool nonmax_suppression, int type) { - if( ocl::useOpenCL() && _img.isUMat() && type == FastFeatureDetector::TYPE_9_16 && + double t = (double)getTickCount(); + if( ocl::useOpenCL() && /*_img.isUMat() &&*/ type == FastFeatureDetector::TYPE_9_16 && ocl_FAST(_img, keypoints, threshold, nonmax_suppression, 10000)) - return; + ; switch(type) { case FastFeatureDetector::TYPE_5_8: @@ -359,6 +350,7 @@ void FAST(InputArray _img, std::vector& keypoints, int threshold, bool FAST_t<16>(_img, keypoints, threshold, nonmax_suppression); break; } + printf("time=%.2fms\n", ((double)getTickCount() - t)*1000./getTickFrequency()); } diff --git a/modules/features2d/src/opencl/fast.cl b/modules/features2d/src/opencl/fast.cl index 3dd0d2e..84ca39a 100644 --- a/modules/features2d/src/opencl/fast.cl +++ b/modules/features2d/src/opencl/fast.cl @@ -1,9 +1,9 @@ // OpenCL port of the FAST corner detector. // Copyright (C) 2014, Itseez Inc. See the license at http://opencv.org -inline int cornerScore(__global const uchar* img, int step, int threshold) +inline int cornerScore(__global const uchar* img, int step) { - int k, tofs, v = img[0], a0 = threshold, b0; + int k, tofs, v = img[0], a0 = 0, b0; int d[16]; #define LOAD2(idx, ofs) \ tofs = ofs; d[idx] = (short)(v - img[tofs]); d[idx+8] = (short)(v - img[-tofs]) @@ -53,8 +53,7 @@ __kernel void FAST_findKeypoints( __global const uchar * _img, int step, int img_offset, int img_rows, int img_cols, - __global uchar * score, int score_step, int score_offset, - volatile __global int* kp_loc, int calc_score, + volatile __global int* kp_loc, int max_keypoints, int threshold ) { int j = get_global_id(0) + 3; @@ -118,8 +117,6 @@ void FAST_findKeypoints( kp_loc[1 + 2*idx] = j; kp_loc[2 + 2*idx] = i; } - if(calc_score) - score[mad24(i, score_step, score_offset+j)] = cornerScore(img, step, threshold); } } } @@ -130,8 +127,8 @@ void FAST_findKeypoints( __kernel void FAST_nonmaxSupression( __global const int* kp_in, volatile __global int* kp_out, - __global const uchar * _score, int step, int score_offset, - int counter, int max_keypoints) + __global const uchar * _img, int step, int img_offset, + int rows, int cols, int counter, int max_keypoints) { const int idx = get_global_id(0); @@ -139,14 +136,19 @@ void FAST_nonmaxSupression( { int x = kp_in[1 + 2*idx]; int y = kp_in[2 + 2*idx]; + __global const uchar* img = _img + mad24(y, step, x + img_offset); - __global const uchar* score = _score + mad24(y, step, x + score_offset); - int s = score[0]; + int s = cornerScore(img, step); - if( (s > (int)score[1]) + (s > (int)score[-1]) + - (s > (int)score[-step]) + (s > (int)score[step]) + - (s > (int)score[-step-1]) + (s > (int)score[-step+1]) + - (s > (int)score[step-1]) + (s > (int)score[step+1]) == 8 ) + if( (x < 4 || s > cornerScore(img-1, step)) + + (y < 4 || s > cornerScore(img-step, step)) != 2 ) + return; + if( (x >= cols - 4 || s > cornerScore(img+1, step)) + + (y >= rows - 4 || s > cornerScore(img+step, step)) + + (x < 4 || y < 4 || s > cornerScore(img-step-1, step)) + + (x >= cols - 4 || y < 4 || s > cornerScore(img-step+1, step)) + + (x < 4 || y >= rows - 4 || s > cornerScore(img+step-1, step)) + + (x >= cols - 4 || y >= rows - 4 || s > cornerScore(img+step+1, step)) == 6) { int new_idx = atomic_inc(kp_out); if( new_idx < max_keypoints ) -- 2.7.4