refactor CUDA FAST feature detector algorithm:

author Vladislav Vinogradov <vlad.vinogradov@itseez.com>

Mon, 12 Jan 2015 15:11:09 +0000 (18:11 +0300)

committer Vladislav Vinogradov <vlad.vinogradov@itseez.com>

Tue, 13 Jan 2015 15:03:56 +0000 (18:03 +0300)
author Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Mon, 12 Jan 2015 15:11:09 +0000 (18:11 +0300)
committer Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Tue, 13 Jan 2015 15:03:56 +0000 (18:03 +0300)
diff --git a/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp b/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp

index f61d2df..f6f674d 100644 (file)
--- a/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp
+++ b/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp
@@ -48,6 +48,7 @@
  #endif
  
  #include "opencv2/core/cuda.hpp"
+#include "opencv2/features2d.hpp"
  #include "opencv2/cudafilters.hpp"
  
  /**
@@ -228,91 +229,49 @@ private:
      std::vector<GpuMat> trainDescCollection;
  };
  
-/** @brief Class used for corner detection using the FAST algorithm. :
+//
+// Feature2DAsync
+//
+
+/** @brief Abstract base class for 2D image feature detectors and descriptor extractors.
   */
-class CV_EXPORTS FAST_CUDA
+class CV_EXPORTS Feature2DAsync
  {
  public:
-    enum
-    {
-        LOCATION_ROW = 0,
-        RESPONSE_ROW,
-        ROWS_COUNT
-    };
-
-    //! all features have same size
-    static const int FEATURE_SIZE = 7;
-
-    /** @brief Constructor.
-
-    @param threshold Threshold on difference between intensity of the central pixel and pixels on a
-    circle around this pixel.
-    @param nonmaxSuppression If it is true, non-maximum suppression is applied to detected corners
-    (keypoints).
-    @param keypointsRatio Inner buffer size for keypoints store is determined as (keypointsRatio \*
-    image_width \* image_height).
-     */
-    explicit FAST_CUDA(int threshold, bool nonmaxSuppression = true, double keypointsRatio = 0.05);
-
-    /** @brief Finds the keypoints using FAST detector.
-
-    @param image Image where keypoints (corners) are detected. Only 8-bit grayscale images are
-    supported.
-    @param mask Optional input mask that marks the regions where we should detect features.
-    @param keypoints The output vector of keypoints. Can be stored both in CPU and GPU memory. For GPU
-    memory:
-    -   keypoints.ptr\<Vec2s\>(LOCATION_ROW)[i] will contain location of i'th point
-    -   keypoints.ptr\<float\>(RESPONSE_ROW)[i] will contain response of i'th point (if non-maximum
-    suppression is applied)
-     */
-    void operator ()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints);
-    /** @overload */
-    void operator ()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints);
-
-    /** @brief Download keypoints from GPU to CPU memory.
-    */
-    static void downloadKeypoints(const GpuMat& d_keypoints, std::vector<KeyPoint>& keypoints);
-
-    /** @brief Converts keypoints from CUDA representation to vector of KeyPoint.
-    */
-    static void convertKeypoints(const Mat& h_keypoints, std::vector<KeyPoint>& keypoints);
+    virtual ~Feature2DAsync() {}
  
-    /** @brief Releases inner buffer memory.
-    */
-    void release();
-
-    bool nonmaxSuppression;
-
-    int threshold;
-
-    //! max keypoints = keypointsRatio * img.size().area()
-    double keypointsRatio;
-
-    /** @brief Find keypoints and compute it's response if nonmaxSuppression is true.
+    virtual void detectAsync(InputArray image, OutputArray keypoints,
+                             InputArray mask = noArray(),
+                             Stream& stream = Stream::Null()) = 0;
  
-    @param image Image where keypoints (corners) are detected. Only 8-bit grayscale images are
-    supported.
-    @param mask Optional input mask that marks the regions where we should detect features.
-
-    The function returns count of detected keypoints.
-     */
-    int calcKeyPointsLocation(const GpuMat& image, const GpuMat& mask);
-
-    /** @brief Gets final array of keypoints.
+    virtual void convert(InputArray gpu_keypoints, std::vector<KeyPoint>& keypoints) = 0;
+};
  
-    @param keypoints The output vector of keypoints.
+//
+// FastFeatureDetector
+//
  
-    The function performs non-max suppression if needed and returns final count of keypoints.
-     */
-    int getKeyPoints(GpuMat& keypoints);
+/** @brief Wrapping class for feature detection using the FAST method.
+ */
+class CV_EXPORTS FastFeatureDetector : public cv::FastFeatureDetector, public Feature2DAsync
+{
+public:
+    enum
+    {
+        LOCATION_ROW = 0,
+        RESPONSE_ROW,
+        ROWS_COUNT,
  
-private:
-    GpuMat kpLoc_;
-    int count_;
+        FEATURE_SIZE = 7
+    };
  
-    GpuMat score_;
+    static Ptr<FastFeatureDetector> create(int threshold=10,
+                                           bool nonmaxSuppression=true,
+                                           int type=FastFeatureDetector::TYPE_9_16,
+                                           int max_npoints = 5000);
  
-    GpuMat d_keypoints_;
+    virtual void setMaxNumPoints(int max_npoints) = 0;
+    virtual int getMaxNumPoints() const = 0;
  };
  
  /** @brief Class for extracting ORB features and descriptors from an image. :
@@ -388,8 +347,8 @@ public:
  
      inline void setFastParams(int threshold, bool nonmaxSuppression = true)
      {
-        fastDetector_.threshold = threshold;
-        fastDetector_.nonmaxSuppression = nonmaxSuppression;
+        fastDetector_->setThreshold(threshold);
+        fastDetector_->setNonmaxSuppression(nonmaxSuppression);
      }
  
      /** @brief Releases inner buffer memory.
@@ -433,7 +392,7 @@ private:
      std::vector<GpuMat> keyPointsPyr_;
      std::vector<int> keyPointsCount_;
  
-    FAST_CUDA fastDetector_;
+    Ptr<cv::cuda::FastFeatureDetector> fastDetector_;
  
      Ptr<cuda::Filter> blurFilter;
  
diff --git a/modules/cudafeatures2d/perf/perf_features2d.cpp b/modules/cudafeatures2d/perf/perf_features2d.cpp

index 26eb434..da3cd77 100644 (file)
--- a/modules/cudafeatures2d/perf/perf_features2d.cpp
+++ b/modules/cudafeatures2d/perf/perf_features2d.cpp
@@ -64,15 +64,18 @@ PERF_TEST_P(Image_Threshold_NonMaxSuppression, FAST,
  
      if (PERF_RUN_CUDA())
      {
-        cv::cuda::FAST_CUDA d_fast(threshold, nonMaxSuppersion, 0.5);
+        cv::Ptr<cv::cuda::FastFeatureDetector> d_fast =
+                cv::cuda::FastFeatureDetector::create(threshold, nonMaxSuppersion,
+                                                      cv::FastFeatureDetector::TYPE_9_16,
+                                                      0.5 * img.size().area());
  
          const cv::cuda::GpuMat d_img(img);
          cv::cuda::GpuMat d_keypoints;
  
-        TEST_CYCLE() d_fast(d_img, cv::cuda::GpuMat(), d_keypoints);
+        TEST_CYCLE() d_fast->detectAsync(d_img, d_keypoints);
  
          std::vector<cv::KeyPoint> gpu_keypoints;
-        d_fast.downloadKeypoints(d_keypoints, gpu_keypoints);
+        d_fast->convert(d_keypoints, gpu_keypoints);
  
          sortKeyPoints(gpu_keypoints);
  
diff --git a/modules/cudafeatures2d/src/cuda/fast.cu b/modules/cudafeatures2d/src/cuda/fast.cu

index 7aa888a..72235d4 100644 (file)
--- a/modules/cudafeatures2d/src/cuda/fast.cu
+++ b/modules/cudafeatures2d/src/cuda/fast.cu
@@ -279,7 +279,7 @@ namespace cv { namespace cuda { namespace device
              #endif
          }
  
-        int calcKeypoints_gpu(PtrStepSzb img, PtrStepSzb mask, short2* kpLoc, int maxKeypoints, PtrStepSzi score, int threshold)
+        int calcKeypoints_gpu(PtrStepSzb img, PtrStepSzb mask, short2* kpLoc, int maxKeypoints, PtrStepSzi score, int threshold, cudaStream_t stream)
          {
              void* counter_ptr;
              cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) );
@@ -290,29 +290,29 @@ namespace cv { namespace cuda { namespace device
              grid.x = divUp(img.cols - 6, block.x);
              grid.y = divUp(img.rows - 6, block.y);
  
-            cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(unsigned int)) );
+            cudaSafeCall( cudaMemsetAsync(counter_ptr, 0, sizeof(unsigned int), stream) );
  
              if (score.data)
              {
                  if (mask.data)
-                    calcKeypoints<true><<<grid, block>>>(img, SingleMask(mask), kpLoc, maxKeypoints, score, threshold);
+                    calcKeypoints<true><<<grid, block, 0, stream>>>(img, SingleMask(mask), kpLoc, maxKeypoints, score, threshold);
                  else
-                    calcKeypoints<true><<<grid, block>>>(img, WithOutMask(), kpLoc, maxKeypoints, score, threshold);
+                    calcKeypoints<true><<<grid, block, 0, stream>>>(img, WithOutMask(), kpLoc, maxKeypoints, score, threshold);
              }
              else
              {
                  if (mask.data)
-                    calcKeypoints<false><<<grid, block>>>(img, SingleMask(mask), kpLoc, maxKeypoints, score, threshold);
+                    calcKeypoints<false><<<grid, block, 0, stream>>>(img, SingleMask(mask), kpLoc, maxKeypoints, score, threshold);
                  else
-                    calcKeypoints<false><<<grid, block>>>(img, WithOutMask(), kpLoc, maxKeypoints, score, threshold);
+                    calcKeypoints<false><<<grid, block, 0, stream>>>(img, WithOutMask(), kpLoc, maxKeypoints, score, threshold);
              }
  
              cudaSafeCall( cudaGetLastError() );
  
-            cudaSafeCall( cudaDeviceSynchronize() );
-
              unsigned int count;
-            cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
+            cudaSafeCall( cudaMemcpyAsync(&count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost, stream) );
+
+            cudaSafeCall( cudaStreamSynchronize(stream) );
  
              return count;
          }
@@ -356,7 +356,7 @@ namespace cv { namespace cuda { namespace device
              #endif
          }
  
-        int nonmaxSuppression_gpu(const short2* kpLoc, int count, PtrStepSzi score, short2* loc, float* response)
+        int nonmaxSuppression_gpu(const short2* kpLoc, int count, PtrStepSzi score, short2* loc, float* response, cudaStream_t stream)
          {
              void* counter_ptr;
              cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) );
@@ -366,15 +366,15 @@ namespace cv { namespace cuda { namespace device
              dim3 grid;
              grid.x = divUp(count, block.x);
  
-            cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(unsigned int)) );
+            cudaSafeCall( cudaMemsetAsync(counter_ptr, 0, sizeof(unsigned int), stream) );
  
-            nonmaxSuppression<<<grid, block>>>(kpLoc, count, score, loc, response);
+            nonmaxSuppression<<<grid, block, 0, stream>>>(kpLoc, count, score, loc, response);
              cudaSafeCall( cudaGetLastError() );
  
-            cudaSafeCall( cudaDeviceSynchronize() );
-
              unsigned int new_count;
-            cudaSafeCall( cudaMemcpy(&new_count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
+            cudaSafeCall( cudaMemcpyAsync(&new_count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost, stream) );
+
+            cudaSafeCall( cudaStreamSynchronize(stream) );
  
              return new_count;
          }
diff --git a/modules/cudafeatures2d/src/fast.cpp b/modules/cudafeatures2d/src/fast.cpp

index aa77aa8..cb22ea5 100644 (file)
--- a/modules/cudafeatures2d/src/fast.cpp
+++ b/modules/cudafeatures2d/src/fast.cpp
@@ -47,124 +47,162 @@ using namespace cv::cuda;
  
  #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
  
-cv::cuda::FAST_CUDA::FAST_CUDA(int, bool, double) { throw_no_cuda(); }
-void cv::cuda::FAST_CUDA::operator ()(const GpuMat&, const GpuMat&, GpuMat&) { throw_no_cuda(); }
-void cv::cuda::FAST_CUDA::operator ()(const GpuMat&, const GpuMat&, std::vector<KeyPoint>&) { throw_no_cuda(); }
-void cv::cuda::FAST_CUDA::downloadKeypoints(const GpuMat&, std::vector<KeyPoint>&) { throw_no_cuda(); }
-void cv::cuda::FAST_CUDA::convertKeypoints(const Mat&, std::vector<KeyPoint>&) { throw_no_cuda(); }
-void cv::cuda::FAST_CUDA::release() { throw_no_cuda(); }
-int cv::cuda::FAST_CUDA::calcKeyPointsLocation(const GpuMat&, const GpuMat&) { throw_no_cuda(); return 0; }
-int cv::cuda::FAST_CUDA::getKeyPoints(GpuMat&) { throw_no_cuda(); return 0; }
+Ptr<FastFeatureDetector> cv::cuda::FastFeatureDetector::create(int, bool, int, int) { throw_no_cuda(); return Ptr<FastFeatureDetector>(); }
  
  #else /* !defined (HAVE_CUDA) */
  
-cv::cuda::FAST_CUDA::FAST_CUDA(int _threshold, bool _nonmaxSuppression, double _keypointsRatio) :
-    nonmaxSuppression(_nonmaxSuppression), threshold(_threshold), keypointsRatio(_keypointsRatio), count_(0)
+namespace cv { namespace cuda { namespace device
  {
-}
+    namespace fast
+    {
+        int calcKeypoints_gpu(PtrStepSzb img, PtrStepSzb mask, short2* kpLoc, int maxKeypoints, PtrStepSzi score, int threshold, cudaStream_t stream);
+        int nonmaxSuppression_gpu(const short2* kpLoc, int count, PtrStepSzi score, short2* loc, float* response, cudaStream_t stream);
+    }
+}}}
  
-void cv::cuda::FAST_CUDA::operator ()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints)
+namespace
  {
-    if (image.empty())
-        return;
+    class FAST_Impl : public cv::cuda::FastFeatureDetector
+    {
+    public:
+        FAST_Impl(int threshold, bool nonmaxSuppression, int max_npoints);
  
-    (*this)(image, mask, d_keypoints_);
-    downloadKeypoints(d_keypoints_, keypoints);
-}
+        virtual void detect(InputArray _image, std::vector<KeyPoint>& keypoints, InputArray _mask);
+        virtual void detectAsync(InputArray _image, OutputArray _keypoints, InputArray _mask, Stream& stream);
  
-void cv::cuda::FAST_CUDA::downloadKeypoints(const GpuMat& d_keypoints, std::vector<KeyPoint>& keypoints)
-{
-    if (d_keypoints.empty())
-        return;
+        virtual void convert(InputArray _gpu_keypoints, std::vector<KeyPoint>& keypoints);
  
-    Mat h_keypoints(d_keypoints);
-    convertKeypoints(h_keypoints, keypoints);
-}
+        virtual void setThreshold(int threshold) { threshold_ = threshold; }
+        virtual int getThreshold() const { return threshold_; }
  
-void cv::cuda::FAST_CUDA::convertKeypoints(const Mat& h_keypoints, std::vector<KeyPoint>& keypoints)
-{
-    if (h_keypoints.empty())
-        return;
-
-    CV_Assert(h_keypoints.rows == ROWS_COUNT && h_keypoints.elemSize() == 4);
+        virtual void setNonmaxSuppression(bool f) { nonmaxSuppression_ = f; }
+        virtual bool getNonmaxSuppression() const { return nonmaxSuppression_; }
  
-    int npoints = h_keypoints.cols;
+        virtual void setMaxNumPoints(int max_npoints) { max_npoints_ = max_npoints; }
+        virtual int getMaxNumPoints() const { return max_npoints_; }
  
-    keypoints.resize(npoints);
+        virtual void setType(int type) { CV_Assert( type == TYPE_9_16 ); }
+        virtual int getType() const { return TYPE_9_16; }
  
-    const short2* loc_row = h_keypoints.ptr<short2>(LOCATION_ROW);
-    const float* response_row = h_keypoints.ptr<float>(RESPONSE_ROW);
+    private:
+        int threshold_;
+        bool nonmaxSuppression_;
+        int max_npoints_;
+    };
  
-    for (int i = 0; i < npoints; ++i)
+    FAST_Impl::FAST_Impl(int threshold, bool nonmaxSuppression, int max_npoints) :
+        threshold_(threshold), nonmaxSuppression_(nonmaxSuppression), max_npoints_(max_npoints)
      {
-        KeyPoint kp(loc_row[i].x, loc_row[i].y, static_cast<float>(FEATURE_SIZE), -1, response_row[i]);
-        keypoints[i] = kp;
      }
-}
  
-void cv::cuda::FAST_CUDA::operator ()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints)
-{
-    calcKeyPointsLocation(img, mask);
-    keypoints.cols = getKeyPoints(keypoints);
-}
-
-namespace cv { namespace cuda { namespace device
-{
-    namespace fast
+    void FAST_Impl::detect(InputArray _image, std::vector<KeyPoint>& keypoints, InputArray _mask)
      {
-        int calcKeypoints_gpu(PtrStepSzb img, PtrStepSzb mask, short2* kpLoc, int maxKeypoints, PtrStepSzi score, int threshold);
-        int nonmaxSuppression_gpu(const short2* kpLoc, int count, PtrStepSzi score, short2* loc, float* response);
-    }
-}}}
-
-int cv::cuda::FAST_CUDA::calcKeyPointsLocation(const GpuMat& img, const GpuMat& mask)
-{
-    using namespace cv::cuda::device::fast;
-
-    CV_Assert(img.type() == CV_8UC1);
-    CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == img.size()));
+        if (_image.empty())
+        {
+            keypoints.clear();
+            return;
+        }
  
-    int maxKeypoints = static_cast<int>(keypointsRatio * img.size().area());
+        BufferPool pool(Stream::Null());
+        GpuMat d_keypoints = pool.getBuffer(ROWS_COUNT, max_npoints_, CV_16SC2);
  
-    ensureSizeIsEnough(1, maxKeypoints, CV_16SC2, kpLoc_);
+        detectAsync(_image, d_keypoints, _mask, Stream::Null());
+        convert(d_keypoints, keypoints);
+    }
  
-    if (nonmaxSuppression)
+    void FAST_Impl::detectAsync(InputArray _image, OutputArray _keypoints, InputArray _mask, Stream& stream)
      {
-        ensureSizeIsEnough(img.size(), CV_32SC1, score_);
-        score_.setTo(Scalar::all(0));
+        using namespace cv::cuda::device::fast;
+
+        const GpuMat img = _image.getGpuMat();
+        const GpuMat mask = _mask.getGpuMat();
+
+        CV_Assert( img.type() == CV_8UC1 );
+        CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == img.size()) );
+
+        BufferPool pool(stream);
+
+        GpuMat kpLoc = pool.getBuffer(1, max_npoints_, CV_16SC2);
+
+        GpuMat score;
+        if (nonmaxSuppression_)
+        {
+            score = pool.getBuffer(img.size(), CV_32SC1);
+            score.setTo(Scalar::all(0), stream);
+        }
+
+        int count = calcKeypoints_gpu(img, mask, kpLoc.ptr<short2>(), max_npoints_, score, threshold_, StreamAccessor::getStream(stream));
+        count = std::min(count, max_npoints_);
+
+        if (count == 0)
+        {
+            _keypoints.release();
+            return;
+        }
+
+        ensureSizeIsEnough(ROWS_COUNT, count, CV_32FC1, _keypoints);
+        GpuMat& keypoints = _keypoints.getGpuMatRef();
+
+        if (nonmaxSuppression_)
+        {
+            count = nonmaxSuppression_gpu(kpLoc.ptr<short2>(), count, score, keypoints.ptr<short2>(LOCATION_ROW), keypoints.ptr<float>(RESPONSE_ROW), StreamAccessor::getStream(stream));
+            if (count == 0)
+            {
+                keypoints.release();
+            }
+            else
+            {
+                keypoints.cols = count;
+            }
+        }
+        else
+        {
+            GpuMat locRow(1, count, kpLoc.type(), keypoints.ptr(0));
+            kpLoc.colRange(0, count).copyTo(locRow, stream);
+            keypoints.row(1).setTo(Scalar::all(0), stream);
+        }
      }
  
-    count_ = calcKeypoints_gpu(img, mask, kpLoc_.ptr<short2>(), maxKeypoints, nonmaxSuppression ? score_ : PtrStepSzi(), threshold);
-    count_ = std::min(count_, maxKeypoints);
-
-    return count_;
-}
-
-int cv::cuda::FAST_CUDA::getKeyPoints(GpuMat& keypoints)
-{
-    using namespace cv::cuda::device::fast;
-
-    if (count_ == 0)
-        return 0;
-
-    ensureSizeIsEnough(ROWS_COUNT, count_, CV_32FC1, keypoints);
-
-    if (nonmaxSuppression)
-        return nonmaxSuppression_gpu(kpLoc_.ptr<short2>(), count_, score_, keypoints.ptr<short2>(LOCATION_ROW), keypoints.ptr<float>(RESPONSE_ROW));
-
-    GpuMat locRow(1, count_, kpLoc_.type(), keypoints.ptr(0));
-    kpLoc_.colRange(0, count_).copyTo(locRow);
-    keypoints.row(1).setTo(Scalar::all(0));
-
-    return count_;
+    void FAST_Impl::convert(InputArray _gpu_keypoints, std::vector<KeyPoint>& keypoints)
+    {
+        if (_gpu_keypoints.empty())
+        {
+            keypoints.clear();
+            return;
+        }
+
+        Mat h_keypoints;
+        if (_gpu_keypoints.kind() == _InputArray::CUDA_GPU_MAT)
+        {
+            _gpu_keypoints.getGpuMat().download(h_keypoints);
+        }
+        else
+        {
+            h_keypoints = _gpu_keypoints.getMat();
+        }
+
+        CV_Assert( h_keypoints.rows == ROWS_COUNT );
+        CV_Assert( h_keypoints.elemSize() == 4 );
+
+        const int npoints = h_keypoints.cols;
+
+        keypoints.resize(npoints);
+
+        const short2* loc_row = h_keypoints.ptr<short2>(LOCATION_ROW);
+        const float* response_row = h_keypoints.ptr<float>(RESPONSE_ROW);
+
+        for (int i = 0; i < npoints; ++i)
+        {
+            KeyPoint kp(loc_row[i].x, loc_row[i].y, static_cast<float>(FEATURE_SIZE), -1, response_row[i]);
+            keypoints[i] = kp;
+        }
+    }
  }
  
-void cv::cuda::FAST_CUDA::release()
+Ptr<cv::cuda::FastFeatureDetector> cv::cuda::FastFeatureDetector::create(int threshold, bool nonmaxSuppression, int type, int max_npoints)
  {
-    kpLoc_.release();
-    score_.release();
-
-    d_keypoints_.release();
+    CV_Assert( type == TYPE_9_16 );
+    return makePtr<FAST_Impl>(threshold, nonmaxSuppression, max_npoints);
  }
  
  #endif /* !defined (HAVE_CUDA) */
diff --git a/modules/cudafeatures2d/src/orb.cpp b/modules/cudafeatures2d/src/orb.cpp

index 8d8afe8..c04649b 100644 (file)
--- a/modules/cudafeatures2d/src/orb.cpp
+++ b/modules/cudafeatures2d/src/orb.cpp
@@ -398,7 +398,7 @@ namespace
  cv::cuda::ORB_CUDA::ORB_CUDA(int nFeatures, float scaleFactor, int nLevels, int edgeThreshold, int firstLevel, int WTA_K, int scoreType, int patchSize) :
      nFeatures_(nFeatures), scaleFactor_(scaleFactor), nLevels_(nLevels), edgeThreshold_(edgeThreshold), firstLevel_(firstLevel), WTA_K_(WTA_K),
      scoreType_(scoreType), patchSize_(patchSize),
-    fastDetector_(DEFAULT_FAST_THRESHOLD)
+    fastDetector_(cuda::FastFeatureDetector::create(DEFAULT_FAST_THRESHOLD))
  {
      CV_Assert(patchSize_ >= 2);
  
@@ -554,7 +554,7 @@ namespace
                  return;
              }
  
-            count = cull_gpu(keypoints.ptr<int>(FAST_CUDA::LOCATION_ROW), keypoints.ptr<float>(FAST_CUDA::RESPONSE_ROW), count, n_points);
+            count = cull_gpu(keypoints.ptr<int>(cuda::FastFeatureDetector::LOCATION_ROW), keypoints.ptr<float>(cuda::FastFeatureDetector::RESPONSE_ROW), count, n_points);
          }
      }
  }
@@ -570,20 +570,20 @@ void cv::cuda::ORB_CUDA::computeKeyPointsPyramid()
  
      for (int level = 0; level < nLevels_; ++level)
      {
-        keyPointsCount_[level] = fastDetector_.calcKeyPointsLocation(imagePyr_[level], maskPyr_[level]);
+        fastDetector_->setMaxNumPoints(0.05 * imagePyr_[level].size().area());
  
-        if (keyPointsCount_[level] == 0)
-            continue;
-
-        ensureSizeIsEnough(3, keyPointsCount_[level], CV_32FC1, keyPointsPyr_[level]);
+        GpuMat fastKpRange;
+        fastDetector_->detectAsync(imagePyr_[level], fastKpRange, maskPyr_[level], Stream::Null());
  
-        GpuMat fastKpRange = keyPointsPyr_[level].rowRange(0, 2);
-        keyPointsCount_[level] = fastDetector_.getKeyPoints(fastKpRange);
+        keyPointsCount_[level] = fastKpRange.cols;
  
          if (keyPointsCount_[level] == 0)
              continue;
  
-        int n_features = static_cast<int>(n_features_per_level_[level]);
+        ensureSizeIsEnough(3, keyPointsCount_[level], fastKpRange.type(), keyPointsPyr_[level]);
+        fastKpRange.copyTo(keyPointsPyr_[level].rowRange(0, 2));
+
+        const int n_features = static_cast<int>(n_features_per_level_[level]);
  
          if (scoreType_ == ORB::HARRIS_SCORE)
          {
@@ -767,8 +767,6 @@ void cv::cuda::ORB_CUDA::release()
  
      keyPointsPyr_.clear();
  
-    fastDetector_.release();
-
      d_keypoints_.release();
  }
  
diff --git a/modules/cudafeatures2d/test/test_features2d.cpp b/modules/cudafeatures2d/test/test_features2d.cpp

index 468024a..9a8d76c 100644 (file)
--- a/modules/cudafeatures2d/test/test_features2d.cpp
+++ b/modules/cudafeatures2d/test/test_features2d.cpp
@@ -76,15 +76,14 @@ CUDA_TEST_P(FAST, Accuracy)
      cv::Mat image = readImage("features2d/aloe.png", cv::IMREAD_GRAYSCALE);
      ASSERT_FALSE(image.empty());
  
-    cv::cuda::FAST_CUDA fast(threshold);
-    fast.nonmaxSuppression = nonmaxSuppression;
+    cv::Ptr<cv::cuda::FastFeatureDetector> fast = cv::cuda::FastFeatureDetector::create(threshold, nonmaxSuppression);
  
      if (!supportFeature(devInfo, cv::cuda::GLOBAL_ATOMICS))
      {
          try
          {
              std::vector<cv::KeyPoint> keypoints;
-            fast(loadMat(image), cv::cuda::GpuMat(), keypoints);
+            fast->detect(loadMat(image), keypoints);
          }
          catch (const cv::Exception& e)
          {
@@ -94,7 +93,7 @@ CUDA_TEST_P(FAST, Accuracy)
      else
      {
          std::vector<cv::KeyPoint> keypoints;
-        fast(loadMat(image), cv::cuda::GpuMat(), keypoints);
+        fast->detect(loadMat(image), keypoints);
  
          std::vector<cv::KeyPoint> keypoints_gold;
          cv::FAST(image, keypoints_gold, threshold, nonmaxSuppression);
diff --git a/samples/gpu/performance/tests.cpp b/samples/gpu/performance/tests.cpp

index 2e7faa3..8869a1b 100644 (file)
--- a/samples/gpu/performance/tests.cpp
+++ b/samples/gpu/performance/tests.cpp
@@ -322,14 +322,14 @@ TEST(FAST)
      FAST(src, keypoints, 20);
      CPU_OFF;
  
-    cuda::FAST_CUDA d_FAST(20);
+    cv::Ptr<cv::cuda::FastFeatureDetector> d_FAST = cv::cuda::FastFeatureDetector::create(20);
      cuda::GpuMat d_src(src);
      cuda::GpuMat d_keypoints;
  
-    d_FAST(d_src, cuda::GpuMat(), d_keypoints);
+    d_FAST->detectAsync(d_src, d_keypoints);
  
      CUDA_ON;
-    d_FAST(d_src, cuda::GpuMat(), d_keypoints);
+    d_FAST->detectAsync(d_src, d_keypoints);
      CUDA_OFF;
  }
author	Vladislav Vinogradov <vlad.vinogradov@itseez.com>
	Mon, 12 Jan 2015 15:11:09 +0000 (18:11 +0300)
committer	Vladislav Vinogradov <vlad.vinogradov@itseez.com>
	Tue, 13 Jan 2015 15:03:56 +0000 (18:03 +0300)
modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp		patch \| blob \| history
modules/cudafeatures2d/perf/perf_features2d.cpp		patch \| blob \| history
modules/cudafeatures2d/src/cuda/fast.cu		patch \| blob \| history
modules/cudafeatures2d/src/fast.cpp		patch \| blob \| history
modules/cudafeatures2d/src/orb.cpp		patch \| blob \| history
modules/cudafeatures2d/test/test_features2d.cpp		patch \| blob \| history
samples/gpu/performance/tests.cpp		patch \| blob \| history