From: Vladislav Vinogradov Date: Tue, 13 Jan 2015 14:57:30 +0000 (+0300) Subject: refactor CUDA BFMatcher algorithm: X-Git-Tag: accepted/tizen/6.0/unified/20201030.111113~2733^2~1 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=8a178da1a42d6ec2a26eed1c998889377d1576ae;p=platform%2Fupstream%2Fopencv.git refactor CUDA BFMatcher algorithm: use new abstract interface and hidden implementation --- diff --git a/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp b/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp index c7ab6e3..9757269 100644 --- a/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp +++ b/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp @@ -63,170 +63,315 @@ namespace cv { namespace cuda { //! @addtogroup cudafeatures2d //! @{ -/** @brief Brute-force descriptor matcher. - -For each descriptor in the first set, this matcher finds the closest descriptor in the second set -by trying each one. This descriptor matcher supports masking permissible matches between descriptor -sets. +// +// DescriptorMatcher +// -The class BFMatcher_CUDA has an interface similar to the class DescriptorMatcher. It has two groups -of match methods: for matching descriptors of one image with another image or with an image set. -Also, all functions have an alternative to save results either to the GPU memory or to the CPU -memory. +/** @brief Abstract base class for matching keypoint descriptors. -@sa DescriptorMatcher, BFMatcher +It has two groups of match methods: for matching descriptors of an image with another image or with +an image set. */ -class CV_EXPORTS BFMatcher_CUDA +class CV_EXPORTS DescriptorMatcher : public cv::Algorithm { public: - explicit BFMatcher_CUDA(int norm = cv::NORM_L2); - - //! Add descriptors to train descriptor collection - void add(const std::vector& descCollection); - - //! Get train descriptors collection - const std::vector& getTrainDescriptors() const; - - //! Clear train descriptors collection - void clear(); - - //! Return true if there are not train descriptors in collection - bool empty() const; - - //! Return true if the matcher supports mask in match methods - bool isMaskSupported() const; - - //! Find one best match for each query descriptor - void matchSingle(const GpuMat& query, const GpuMat& train, - GpuMat& trainIdx, GpuMat& distance, - const GpuMat& mask = GpuMat(), Stream& stream = Stream::Null()); - - //! Download trainIdx and distance and convert it to CPU vector with DMatch - static void matchDownload(const GpuMat& trainIdx, const GpuMat& distance, std::vector& matches); - //! Convert trainIdx and distance to vector with DMatch - static void matchConvert(const Mat& trainIdx, const Mat& distance, std::vector& matches); - - //! Find one best match for each query descriptor - void match(const GpuMat& query, const GpuMat& train, std::vector& matches, const GpuMat& mask = GpuMat()); - - //! Make gpu collection of trains and masks in suitable format for matchCollection function - void makeGpuCollection(GpuMat& trainCollection, GpuMat& maskCollection, const std::vector& masks = std::vector()); - - //! Find one best match from train collection for each query descriptor - void matchCollection(const GpuMat& query, const GpuMat& trainCollection, - GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, - const GpuMat& masks = GpuMat(), Stream& stream = Stream::Null()); - - //! Download trainIdx, imgIdx and distance and convert it to vector with DMatch - static void matchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, std::vector& matches); - //! Convert trainIdx, imgIdx and distance to vector with DMatch - static void matchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, std::vector& matches); - - //! Find one best match from train collection for each query descriptor. - void match(const GpuMat& query, std::vector& matches, const std::vector& masks = std::vector()); - - //! Find k best matches for each query descriptor (in increasing order of distances) - void knnMatchSingle(const GpuMat& query, const GpuMat& train, - GpuMat& trainIdx, GpuMat& distance, GpuMat& allDist, int k, - const GpuMat& mask = GpuMat(), Stream& stream = Stream::Null()); - - //! Download trainIdx and distance and convert it to vector with DMatch - //! compactResult is used when mask is not empty. If compactResult is false matches - //! vector will have the same size as queryDescriptors rows. If compactResult is true - //! matches vector will not contain matches for fully masked out query descriptors. - static void knnMatchDownload(const GpuMat& trainIdx, const GpuMat& distance, - std::vector< std::vector >& matches, bool compactResult = false); - //! Convert trainIdx and distance to vector with DMatch - static void knnMatchConvert(const Mat& trainIdx, const Mat& distance, - std::vector< std::vector >& matches, bool compactResult = false); - - //! Find k best matches for each query descriptor (in increasing order of distances). - //! compactResult is used when mask is not empty. If compactResult is false matches - //! vector will have the same size as queryDescriptors rows. If compactResult is true - //! matches vector will not contain matches for fully masked out query descriptors. - void knnMatch(const GpuMat& query, const GpuMat& train, - std::vector< std::vector >& matches, int k, const GpuMat& mask = GpuMat(), - bool compactResult = false); - - //! Find k best matches from train collection for each query descriptor (in increasing order of distances) - void knnMatch2Collection(const GpuMat& query, const GpuMat& trainCollection, - GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, - const GpuMat& maskCollection = GpuMat(), Stream& stream = Stream::Null()); - - //! Download trainIdx and distance and convert it to vector with DMatch - //! compactResult is used when mask is not empty. If compactResult is false matches - //! vector will have the same size as queryDescriptors rows. If compactResult is true - //! matches vector will not contain matches for fully masked out query descriptors. - //! @see BFMatcher_CUDA::knnMatchDownload - static void knnMatch2Download(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, - std::vector< std::vector >& matches, bool compactResult = false); - //! Convert trainIdx and distance to vector with DMatch - //! @see BFMatcher_CUDA::knnMatchConvert - static void knnMatch2Convert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, - std::vector< std::vector >& matches, bool compactResult = false); - - //! Find k best matches for each query descriptor (in increasing order of distances). - //! compactResult is used when mask is not empty. If compactResult is false matches - //! vector will have the same size as queryDescriptors rows. If compactResult is true - //! matches vector will not contain matches for fully masked out query descriptors. - void knnMatch(const GpuMat& query, std::vector< std::vector >& matches, int k, - const std::vector& masks = std::vector(), bool compactResult = false); - - //! Find best matches for each query descriptor which have distance less than maxDistance. - //! nMatches.at(0, queryIdx) will contain matches count for queryIdx. - //! carefully nMatches can be greater than trainIdx.cols - it means that matcher didn't find all matches, - //! because it didn't have enough memory. - //! If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nTrain / 100), 10), - //! otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches - //! Matches doesn't sorted. - void radiusMatchSingle(const GpuMat& query, const GpuMat& train, - GpuMat& trainIdx, GpuMat& distance, GpuMat& nMatches, float maxDistance, - const GpuMat& mask = GpuMat(), Stream& stream = Stream::Null()); - - //! Download trainIdx, nMatches and distance and convert it to vector with DMatch. - //! matches will be sorted in increasing order of distances. - //! compactResult is used when mask is not empty. If compactResult is false matches - //! vector will have the same size as queryDescriptors rows. If compactResult is true - //! matches vector will not contain matches for fully masked out query descriptors. - static void radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& distance, const GpuMat& nMatches, - std::vector< std::vector >& matches, bool compactResult = false); - //! Convert trainIdx, nMatches and distance to vector with DMatch. - static void radiusMatchConvert(const Mat& trainIdx, const Mat& distance, const Mat& nMatches, - std::vector< std::vector >& matches, bool compactResult = false); - - //! Find best matches for each query descriptor which have distance less than maxDistance - //! in increasing order of distances). - void radiusMatch(const GpuMat& query, const GpuMat& train, - std::vector< std::vector >& matches, float maxDistance, - const GpuMat& mask = GpuMat(), bool compactResult = false); - - //! Find best matches for each query descriptor which have distance less than maxDistance. - //! If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nQuery / 100), 10), - //! otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches - //! Matches doesn't sorted. - void radiusMatchCollection(const GpuMat& query, GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, GpuMat& nMatches, float maxDistance, - const std::vector& masks = std::vector(), Stream& stream = Stream::Null()); - - //! Download trainIdx, imgIdx, nMatches and distance and convert it to vector with DMatch. - //! matches will be sorted in increasing order of distances. - //! compactResult is used when mask is not empty. If compactResult is false matches - //! vector will have the same size as queryDescriptors rows. If compactResult is true - //! matches vector will not contain matches for fully masked out query descriptors. - static void radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, const GpuMat& nMatches, - std::vector< std::vector >& matches, bool compactResult = false); - //! Convert trainIdx, nMatches and distance to vector with DMatch. - static void radiusMatchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, const Mat& nMatches, - std::vector< std::vector >& matches, bool compactResult = false); - - //! Find best matches from train collection for each query descriptor which have distance less than - //! maxDistance (in increasing order of distances). - void radiusMatch(const GpuMat& query, std::vector< std::vector >& matches, float maxDistance, - const std::vector& masks = std::vector(), bool compactResult = false); - - int norm; - -private: - std::vector trainDescCollection; + // + // Factories + // + + /** @brief Brute-force descriptor matcher. + + For each descriptor in the first set, this matcher finds the closest descriptor in the second set + by trying each one. This descriptor matcher supports masking permissible matches of descriptor + sets. + + @param normType One of NORM_L1, NORM_L2, NORM_HAMMING. L1 and L2 norms are + preferable choices for SIFT and SURF descriptors, NORM_HAMMING should be used with ORB, BRISK and + BRIEF). + */ + static Ptr createBFMatcher(int norm = cv::NORM_L2); + + // + // Utility + // + + /** @brief Returns true if the descriptor matcher supports masking permissible matches. + */ + virtual bool isMaskSupported() const = 0; + + // + // Descriptor collection + // + + /** @brief Adds descriptors to train a descriptor collection. + + If the collection is not empty, the new descriptors are added to existing train descriptors. + + @param descriptors Descriptors to add. Each descriptors[i] is a set of descriptors from the same + train image. + */ + virtual void add(const std::vector& descriptors) = 0; + + /** @brief Returns a constant link to the train descriptor collection. + */ + virtual const std::vector& getTrainDescriptors() const = 0; + + /** @brief Clears the train descriptor collection. + */ + virtual void clear() = 0; + + /** @brief Returns true if there are no train descriptors in the collection. + */ + virtual bool empty() const = 0; + + /** @brief Trains a descriptor matcher. + + Trains a descriptor matcher (for example, the flann index). In all methods to match, the method + train() is run every time before matching. + */ + virtual void train() = 0; + + // + // 1 to 1 match + // + + /** @brief Finds the best match for each descriptor from a query set (blocking version). + + @param queryDescriptors Query set of descriptors. + @param trainDescriptors Train set of descriptors. This set is not added to the train descriptors + collection stored in the class object. + @param matches Matches. If a query descriptor is masked out in mask , no match is added for this + descriptor. So, matches size may be smaller than the query descriptors count. + @param mask Mask specifying permissible matches between an input query and train matrices of + descriptors. + + In the first variant of this method, the train descriptors are passed as an input argument. In the + second variant of the method, train descriptors collection that was set by DescriptorMatcher::add is + used. Optional mask (or masks) can be passed to specify which query and training descriptors can be + matched. Namely, queryDescriptors[i] can be matched with trainDescriptors[j] only if + mask.at\(i,j) is non-zero. + */ + virtual void match(InputArray queryDescriptors, InputArray trainDescriptors, + std::vector& matches, + InputArray mask = noArray()) = 0; + + /** @overload + */ + virtual void match(InputArray queryDescriptors, + std::vector& matches, + const std::vector& masks = std::vector()) = 0; + + /** @brief Finds the best match for each descriptor from a query set (asynchronous version). + + @param queryDescriptors Query set of descriptors. + @param trainDescriptors Train set of descriptors. This set is not added to the train descriptors + collection stored in the class object. + @param matches Matches array stored in GPU memory. Internal representation is not defined. + Use DescriptorMatcher::matchConvert method to retrieve results in standard representation. + @param mask Mask specifying permissible matches between an input query and train matrices of + descriptors. + @param stream CUDA stream. + + In the first variant of this method, the train descriptors are passed as an input argument. In the + second variant of the method, train descriptors collection that was set by DescriptorMatcher::add is + used. Optional mask (or masks) can be passed to specify which query and training descriptors can be + matched. Namely, queryDescriptors[i] can be matched with trainDescriptors[j] only if + mask.at\(i,j) is non-zero. + */ + virtual void matchAsync(InputArray queryDescriptors, InputArray trainDescriptors, + OutputArray matches, + InputArray mask = noArray(), + Stream& stream = Stream::Null()) = 0; + + /** @overload + */ + virtual void matchAsync(InputArray queryDescriptors, + OutputArray matches, + const std::vector& masks = std::vector(), + Stream& stream = Stream::Null()) = 0; + + /** @brief Converts matches array from internal representation to standard matches vector. + + The method is supposed to be used with DescriptorMatcher::matchAsync to get final result. + Call this method only after DescriptorMatcher::matchAsync is completed (ie. after synchronization). + + @param gpu_matches Matches, returned from DescriptorMatcher::matchAsync. + @param matches Vector of DMatch objects. + */ + virtual void matchConvert(InputArray gpu_matches, + std::vector& matches) = 0; + + // + // knn match + // + + /** @brief Finds the k best matches for each descriptor from a query set (blocking version). + + @param queryDescriptors Query set of descriptors. + @param trainDescriptors Train set of descriptors. This set is not added to the train descriptors + collection stored in the class object. + @param matches Matches. Each matches[i] is k or less matches for the same query descriptor. + @param k Count of best matches found per each query descriptor or less if a query descriptor has + less than k possible matches in total. + @param mask Mask specifying permissible matches between an input query and train matrices of + descriptors. + @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is + false, the matches vector has the same size as queryDescriptors rows. If compactResult is true, + the matches vector does not contain matches for fully masked-out query descriptors. + + These extended variants of DescriptorMatcher::match methods find several best matches for each query + descriptor. The matches are returned in the distance increasing order. See DescriptorMatcher::match + for the details about query and train descriptors. + */ + virtual void knnMatch(InputArray queryDescriptors, InputArray trainDescriptors, + std::vector >& matches, + int k, + InputArray mask = noArray(), + bool compactResult = false) = 0; + + /** @overload + */ + virtual void knnMatch(InputArray queryDescriptors, + std::vector >& matches, + int k, + const std::vector& masks = std::vector(), + bool compactResult = false) = 0; + + /** @brief Finds the k best matches for each descriptor from a query set (asynchronous version). + + @param queryDescriptors Query set of descriptors. + @param trainDescriptors Train set of descriptors. This set is not added to the train descriptors + collection stored in the class object. + @param matches Matches array stored in GPU memory. Internal representation is not defined. + Use DescriptorMatcher::knnMatchConvert method to retrieve results in standard representation. + @param k Count of best matches found per each query descriptor or less if a query descriptor has + less than k possible matches in total. + @param mask Mask specifying permissible matches between an input query and train matrices of + descriptors. + @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is + false, the matches vector has the same size as queryDescriptors rows. If compactResult is true, + the matches vector does not contain matches for fully masked-out query descriptors. + @param stream CUDA stream. + + These extended variants of DescriptorMatcher::matchAsync methods find several best matches for each query + descriptor. The matches are returned in the distance increasing order. See DescriptorMatcher::matchAsync + for the details about query and train descriptors. + */ + virtual void knnMatchAsync(InputArray queryDescriptors, InputArray trainDescriptors, + OutputArray matches, + int k, + InputArray mask = noArray(), + Stream& stream = Stream::Null()) = 0; + + /** @overload + */ + virtual void knnMatchAsync(InputArray queryDescriptors, + OutputArray matches, + int k, + const std::vector& masks = std::vector(), + Stream& stream = Stream::Null()) = 0; + + /** @brief Converts matches array from internal representation to standard matches vector. + + The method is supposed to be used with DescriptorMatcher::knnMatchAsync to get final result. + Call this method only after DescriptorMatcher::knnMatchAsync is completed (ie. after synchronization). + + @param gpu_matches Matches, returned from DescriptorMatcher::knnMatchAsync. + @param matches Vector of DMatch objects. + @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is + false, the matches vector has the same size as queryDescriptors rows. If compactResult is true, + the matches vector does not contain matches for fully masked-out query descriptors. + */ + virtual void knnMatchConvert(InputArray gpu_matches, + std::vector< std::vector >& matches, + bool compactResult = false) = 0; + + // + // radius match + // + + /** @brief For each query descriptor, finds the training descriptors not farther than the specified distance (blocking version). + + @param queryDescriptors Query set of descriptors. + @param trainDescriptors Train set of descriptors. This set is not added to the train descriptors + collection stored in the class object. + @param matches Found matches. + @param maxDistance Threshold for the distance between matched descriptors. Distance means here + metric distance (e.g. Hamming distance), not the distance between coordinates (which is measured + in Pixels)! + @param mask Mask specifying permissible matches between an input query and train matrices of + descriptors. + @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is + false, the matches vector has the same size as queryDescriptors rows. If compactResult is true, + the matches vector does not contain matches for fully masked-out query descriptors. + + For each query descriptor, the methods find such training descriptors that the distance between the + query descriptor and the training descriptor is equal or smaller than maxDistance. Found matches are + returned in the distance increasing order. + */ + virtual void radiusMatch(InputArray queryDescriptors, InputArray trainDescriptors, + std::vector >& matches, + float maxDistance, + InputArray mask = noArray(), + bool compactResult = false) = 0; + + /** @overload + */ + virtual void radiusMatch(InputArray queryDescriptors, + std::vector >& matches, + float maxDistance, + const std::vector& masks = std::vector(), + bool compactResult = false) = 0; + + /** @brief For each query descriptor, finds the training descriptors not farther than the specified distance (asynchronous version). + + @param queryDescriptors Query set of descriptors. + @param trainDescriptors Train set of descriptors. This set is not added to the train descriptors + collection stored in the class object. + @param matches Matches array stored in GPU memory. Internal representation is not defined. + Use DescriptorMatcher::radiusMatchConvert method to retrieve results in standard representation. + @param maxDistance Threshold for the distance between matched descriptors. Distance means here + metric distance (e.g. Hamming distance), not the distance between coordinates (which is measured + in Pixels)! + @param mask Mask specifying permissible matches between an input query and train matrices of + descriptors. + @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is + false, the matches vector has the same size as queryDescriptors rows. If compactResult is true, + the matches vector does not contain matches for fully masked-out query descriptors. + @param stream CUDA stream. + + For each query descriptor, the methods find such training descriptors that the distance between the + query descriptor and the training descriptor is equal or smaller than maxDistance. Found matches are + returned in the distance increasing order. + */ + virtual void radiusMatchAsync(InputArray queryDescriptors, InputArray trainDescriptors, + OutputArray matches, + float maxDistance, + InputArray mask = noArray(), + Stream& stream = Stream::Null()) = 0; + + /** @overload + */ + virtual void radiusMatchAsync(InputArray queryDescriptors, + OutputArray matches, + float maxDistance, + const std::vector& masks = std::vector(), + Stream& stream = Stream::Null()) = 0; + + /** @brief Converts matches array from internal representation to standard matches vector. + + The method is supposed to be used with DescriptorMatcher::radiusMatchAsync to get final result. + Call this method only after DescriptorMatcher::radiusMatchAsync is completed (ie. after synchronization). + + @param gpu_matches Matches, returned from DescriptorMatcher::radiusMatchAsync. + @param matches Vector of DMatch objects. + @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is + false, the matches vector has the same size as queryDescriptors rows. If compactResult is true, + the matches vector does not contain matches for fully masked-out query descriptors. + */ + virtual void radiusMatchConvert(InputArray gpu_matches, + std::vector< std::vector >& matches, + bool compactResult = false) = 0; }; // diff --git a/modules/cudafeatures2d/perf/perf_features2d.cpp b/modules/cudafeatures2d/perf/perf_features2d.cpp index 0dcb043..9d81348 100644 --- a/modules/cudafeatures2d/perf/perf_features2d.cpp +++ b/modules/cudafeatures2d/perf/perf_features2d.cpp @@ -167,16 +167,16 @@ PERF_TEST_P(DescSize_Norm, BFMatch, if (PERF_RUN_CUDA()) { - cv::cuda::BFMatcher_CUDA d_matcher(normType); + cv::Ptr d_matcher = cv::cuda::DescriptorMatcher::createBFMatcher(normType); const cv::cuda::GpuMat d_query(query); const cv::cuda::GpuMat d_train(train); - cv::cuda::GpuMat d_trainIdx, d_distance; + cv::cuda::GpuMat d_matches; - TEST_CYCLE() d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance); + TEST_CYCLE() d_matcher->matchAsync(d_query, d_train, d_matches); std::vector gpu_matches; - d_matcher.matchDownload(d_trainIdx, d_distance, gpu_matches); + d_matcher->matchConvert(d_matches, gpu_matches); SANITY_CHECK_MATCHES(gpu_matches); } @@ -226,16 +226,16 @@ PERF_TEST_P(DescSize_K_Norm, BFKnnMatch, if (PERF_RUN_CUDA()) { - cv::cuda::BFMatcher_CUDA d_matcher(normType); + cv::Ptr d_matcher = cv::cuda::DescriptorMatcher::createBFMatcher(normType); const cv::cuda::GpuMat d_query(query); const cv::cuda::GpuMat d_train(train); - cv::cuda::GpuMat d_trainIdx, d_distance, d_allDist; + cv::cuda::GpuMat d_matches; - TEST_CYCLE() d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, k); + TEST_CYCLE() d_matcher->knnMatchAsync(d_query, d_train, d_matches, k); std::vector< std::vector > matchesTbl; - d_matcher.knnMatchDownload(d_trainIdx, d_distance, matchesTbl); + d_matcher->knnMatchConvert(d_matches, matchesTbl); std::vector gpu_matches; toOneRowMatches(matchesTbl, gpu_matches); @@ -280,16 +280,16 @@ PERF_TEST_P(DescSize_Norm, BFRadiusMatch, if (PERF_RUN_CUDA()) { - cv::cuda::BFMatcher_CUDA d_matcher(normType); + cv::Ptr d_matcher = cv::cuda::DescriptorMatcher::createBFMatcher(normType); const cv::cuda::GpuMat d_query(query); const cv::cuda::GpuMat d_train(train); - cv::cuda::GpuMat d_trainIdx, d_nMatches, d_distance; + cv::cuda::GpuMat d_matches; - TEST_CYCLE() d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, maxDistance); + TEST_CYCLE() d_matcher->radiusMatchAsync(d_query, d_train, d_matches, maxDistance); std::vector< std::vector > matchesTbl; - d_matcher.radiusMatchDownload(d_trainIdx, d_distance, d_nMatches, matchesTbl); + d_matcher->radiusMatchConvert(d_matches, matchesTbl); std::vector gpu_matches; toOneRowMatches(matchesTbl, gpu_matches); diff --git a/modules/cudafeatures2d/src/brute_force_matcher.cpp b/modules/cudafeatures2d/src/brute_force_matcher.cpp index 5de0b06..a00537c 100644 --- a/modules/cudafeatures2d/src/brute_force_matcher.cpp +++ b/modules/cudafeatures2d/src/brute_force_matcher.cpp @@ -47,37 +47,7 @@ using namespace cv::cuda; #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) -cv::cuda::BFMatcher_CUDA::BFMatcher_CUDA(int) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::add(const std::vector&) { throw_no_cuda(); } -const std::vector& cv::cuda::BFMatcher_CUDA::getTrainDescriptors() const { throw_no_cuda(); return trainDescCollection; } -void cv::cuda::BFMatcher_CUDA::clear() { throw_no_cuda(); } -bool cv::cuda::BFMatcher_CUDA::empty() const { throw_no_cuda(); return true; } -bool cv::cuda::BFMatcher_CUDA::isMaskSupported() const { throw_no_cuda(); return true; } -void cv::cuda::BFMatcher_CUDA::matchSingle(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::matchDownload(const GpuMat&, const GpuMat&, std::vector&) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::matchConvert(const Mat&, const Mat&, std::vector&) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::match(const GpuMat&, const GpuMat&, std::vector&, const GpuMat&) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::makeGpuCollection(GpuMat&, GpuMat&, const std::vector&) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::matchCollection(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::matchDownload(const GpuMat&, const GpuMat&, const GpuMat&, std::vector&) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::matchConvert(const Mat&, const Mat&, const Mat&, std::vector&) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::match(const GpuMat&, std::vector&, const std::vector&) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::knnMatchSingle(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, int, const GpuMat&, Stream&) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::knnMatchDownload(const GpuMat&, const GpuMat&, std::vector< std::vector >&, bool) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::knnMatchConvert(const Mat&, const Mat&, std::vector< std::vector >&, bool) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::knnMatch(const GpuMat&, const GpuMat&, std::vector< std::vector >&, int, const GpuMat&, bool) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::knnMatch2Collection(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::knnMatch2Download(const GpuMat&, const GpuMat&, const GpuMat&, std::vector< std::vector >&, bool) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::knnMatch2Convert(const Mat&, const Mat&, const Mat&, std::vector< std::vector >&, bool) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::knnMatch(const GpuMat&, std::vector< std::vector >&, int, const std::vector&, bool) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::radiusMatchSingle(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, float, const GpuMat&, Stream&) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::radiusMatchDownload(const GpuMat&, const GpuMat&, const GpuMat&, std::vector< std::vector >&, bool) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::radiusMatchConvert(const Mat&, const Mat&, const Mat&, std::vector< std::vector >&, bool) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::radiusMatch(const GpuMat&, const GpuMat&, std::vector< std::vector >&, float, const GpuMat&, bool) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::radiusMatchCollection(const GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, float, const std::vector&, Stream&) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::radiusMatchDownload(const GpuMat&, const GpuMat&, const GpuMat&, const GpuMat&, std::vector< std::vector >&, bool) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::radiusMatchConvert(const Mat&, const Mat&, const Mat&, const Mat&, std::vector< std::vector >&, bool) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::radiusMatch(const GpuMat&, std::vector< std::vector >&, float, const std::vector&, bool) { throw_no_cuda(); } +Ptr cv::cuda::DescriptorMatcher::createBFMatcher(int) { throw_no_cuda(); return Ptr(); } #else /* !defined (HAVE_CUDA) */ @@ -155,857 +125,953 @@ namespace cv { namespace cuda { namespace device } }}} -//////////////////////////////////////////////////////////////////// -// Train collection - -cv::cuda::BFMatcher_CUDA::BFMatcher_CUDA(int norm_) : norm(norm_) +namespace { -} + static void makeGpuCollection(const std::vector& trainDescCollection, + const std::vector& masks, + GpuMat& trainCollection, + GpuMat& maskCollection) + { + if (trainDescCollection.empty()) + return; -void cv::cuda::BFMatcher_CUDA::add(const std::vector& descCollection) -{ - trainDescCollection.insert(trainDescCollection.end(), descCollection.begin(), descCollection.end()); -} + if (masks.empty()) + { + Mat trainCollectionCPU(1, static_cast(trainDescCollection.size()), CV_8UC(sizeof(PtrStepSzb))); -const std::vector& cv::cuda::BFMatcher_CUDA::getTrainDescriptors() const -{ - return trainDescCollection; -} + PtrStepSzb* trainCollectionCPU_ptr = trainCollectionCPU.ptr(); -void cv::cuda::BFMatcher_CUDA::clear() -{ - trainDescCollection.clear(); -} + for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr) + *trainCollectionCPU_ptr = trainDescCollection[i]; -bool cv::cuda::BFMatcher_CUDA::empty() const -{ - return trainDescCollection.empty(); -} + trainCollection.upload(trainCollectionCPU); + maskCollection.release(); + } + else + { + CV_Assert( masks.size() == trainDescCollection.size() ); -bool cv::cuda::BFMatcher_CUDA::isMaskSupported() const -{ - return true; -} + Mat trainCollectionCPU(1, static_cast(trainDescCollection.size()), CV_8UC(sizeof(PtrStepSzb))); + Mat maskCollectionCPU(1, static_cast(trainDescCollection.size()), CV_8UC(sizeof(PtrStepb))); -//////////////////////////////////////////////////////////////////// -// Match + PtrStepSzb* trainCollectionCPU_ptr = trainCollectionCPU.ptr(); + PtrStepb* maskCollectionCPU_ptr = maskCollectionCPU.ptr(); -void cv::cuda::BFMatcher_CUDA::matchSingle(const GpuMat& query, const GpuMat& train, - GpuMat& trainIdx, GpuMat& distance, - const GpuMat& mask, Stream& stream) -{ - if (query.empty() || train.empty()) - return; + for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr, ++maskCollectionCPU_ptr) + { + const GpuMat& train = trainDescCollection[i]; + const GpuMat& mask = masks[i]; - using namespace cv::cuda::device::bf_match; + CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.cols == train.rows) ); - typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask, - const PtrStepSzi& trainIdx, const PtrStepSzf& distance, - cudaStream_t stream); + *trainCollectionCPU_ptr = train; + *maskCollectionCPU_ptr = mask; + } - static const caller_t callersL1[] = - { - matchL1_gpu, 0/*matchL1_gpu*/, - matchL1_gpu, matchL1_gpu, - matchL1_gpu, matchL1_gpu - }; - static const caller_t callersL2[] = - { - 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, - 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, - 0/*matchL2_gpu*/, matchL2_gpu - }; + trainCollection.upload(trainCollectionCPU); + maskCollection.upload(maskCollectionCPU); + } + } - static const caller_t callersHamming[] = + class BFMatcher_Impl : public cv::cuda::DescriptorMatcher { - matchHamming_gpu, 0/*matchHamming_gpu*/, - matchHamming_gpu, 0/*matchHamming_gpu*/, - matchHamming_gpu, 0/*matchHamming_gpu*/ - }; - - CV_Assert(query.channels() == 1 && query.depth() < CV_64F); - CV_Assert(train.cols == query.cols && train.type() == query.type()); - CV_Assert(norm == NORM_L1 || norm == NORM_L2 || norm == NORM_HAMMING); - - const caller_t* callers = norm == NORM_L1 ? callersL1 : norm == NORM_L2 ? callersL2 : callersHamming; - - const int nQuery = query.rows; - - ensureSizeIsEnough(1, nQuery, CV_32S, trainIdx); - ensureSizeIsEnough(1, nQuery, CV_32F, distance); - - caller_t func = callers[query.depth()]; - CV_Assert(func != 0); + public: + explicit BFMatcher_Impl(int norm) : norm_(norm) + { + CV_Assert( norm == NORM_L1 || norm == NORM_L2 || norm == NORM_HAMMING ); + } - func(query, train, mask, trainIdx, distance, StreamAccessor::getStream(stream)); -} + virtual bool isMaskSupported() const { return true; } -void cv::cuda::BFMatcher_CUDA::matchDownload(const GpuMat& trainIdx, const GpuMat& distance, std::vector& matches) -{ - if (trainIdx.empty() || distance.empty()) - return; + virtual void add(const std::vector& descriptors) + { + trainDescCollection_.insert(trainDescCollection_.end(), descriptors.begin(), descriptors.end()); + } - Mat trainIdxCPU(trainIdx); - Mat distanceCPU(distance); + virtual const std::vector& getTrainDescriptors() const + { + return trainDescCollection_; + } - matchConvert(trainIdxCPU, distanceCPU, matches); -} + virtual void clear() + { + trainDescCollection_.clear(); + } -void cv::cuda::BFMatcher_CUDA::matchConvert(const Mat& trainIdx, const Mat& distance, std::vector& matches) -{ - if (trainIdx.empty() || distance.empty()) - return; + virtual bool empty() const + { + return trainDescCollection_.empty(); + } - CV_Assert(trainIdx.type() == CV_32SC1); - CV_Assert(distance.type() == CV_32FC1 && distance.cols == trainIdx.cols); + virtual void train() + { + } - const int nQuery = trainIdx.cols; + virtual void match(InputArray queryDescriptors, InputArray trainDescriptors, + std::vector& matches, + InputArray mask = noArray()); + + virtual void match(InputArray queryDescriptors, + std::vector& matches, + const std::vector& masks = std::vector()); + + virtual void matchAsync(InputArray queryDescriptors, InputArray trainDescriptors, + OutputArray matches, + InputArray mask = noArray(), + Stream& stream = Stream::Null()); + + virtual void matchAsync(InputArray queryDescriptors, + OutputArray matches, + const std::vector& masks = std::vector(), + Stream& stream = Stream::Null()); + + virtual void matchConvert(InputArray gpu_matches, + std::vector& matches); + + virtual void knnMatch(InputArray queryDescriptors, InputArray trainDescriptors, + std::vector >& matches, + int k, + InputArray mask = noArray(), + bool compactResult = false); + + virtual void knnMatch(InputArray queryDescriptors, + std::vector >& matches, + int k, + const std::vector& masks = std::vector(), + bool compactResult = false); + + virtual void knnMatchAsync(InputArray queryDescriptors, InputArray trainDescriptors, + OutputArray matches, + int k, + InputArray mask = noArray(), + Stream& stream = Stream::Null()); + + virtual void knnMatchAsync(InputArray queryDescriptors, + OutputArray matches, + int k, + const std::vector& masks = std::vector(), + Stream& stream = Stream::Null()); + + virtual void knnMatchConvert(InputArray gpu_matches, + std::vector< std::vector >& matches, + bool compactResult = false); + + virtual void radiusMatch(InputArray queryDescriptors, InputArray trainDescriptors, + std::vector >& matches, + float maxDistance, + InputArray mask = noArray(), + bool compactResult = false); + + virtual void radiusMatch(InputArray queryDescriptors, + std::vector >& matches, + float maxDistance, + const std::vector& masks = std::vector(), + bool compactResult = false); + + virtual void radiusMatchAsync(InputArray queryDescriptors, InputArray trainDescriptors, + OutputArray matches, + float maxDistance, + InputArray mask = noArray(), + Stream& stream = Stream::Null()); + + virtual void radiusMatchAsync(InputArray queryDescriptors, + OutputArray matches, + float maxDistance, + const std::vector& masks = std::vector(), + Stream& stream = Stream::Null()); + + virtual void radiusMatchConvert(InputArray gpu_matches, + std::vector< std::vector >& matches, + bool compactResult = false); + + private: + int norm_; + std::vector trainDescCollection_; + }; - matches.clear(); - matches.reserve(nQuery); + // + // 1 to 1 match + // - const int* trainIdx_ptr = trainIdx.ptr(); - const float* distance_ptr = distance.ptr(); - for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx, ++trainIdx_ptr, ++distance_ptr) + void BFMatcher_Impl::match(InputArray _queryDescriptors, InputArray _trainDescriptors, + std::vector& matches, + InputArray _mask) { - int train_idx = *trainIdx_ptr; - - if (train_idx == -1) - continue; - - float distance_local = *distance_ptr; - - DMatch m(queryIdx, train_idx, 0, distance_local); - - matches.push_back(m); + GpuMat d_matches; + matchAsync(_queryDescriptors, _trainDescriptors, d_matches, _mask); + matchConvert(d_matches, matches); } -} - -void cv::cuda::BFMatcher_CUDA::match(const GpuMat& query, const GpuMat& train, - std::vector& matches, const GpuMat& mask) -{ - GpuMat trainIdx, distance; - matchSingle(query, train, trainIdx, distance, mask); - matchDownload(trainIdx, distance, matches); -} - -void cv::cuda::BFMatcher_CUDA::makeGpuCollection(GpuMat& trainCollection, GpuMat& maskCollection, - const std::vector& masks) -{ - if (empty()) - return; - if (masks.empty()) + void BFMatcher_Impl::match(InputArray _queryDescriptors, + std::vector& matches, + const std::vector& masks) { - Mat trainCollectionCPU(1, static_cast(trainDescCollection.size()), CV_8UC(sizeof(PtrStepSzb))); + GpuMat d_matches; + matchAsync(_queryDescriptors, d_matches, masks); + matchConvert(d_matches, matches); + } - PtrStepSzb* trainCollectionCPU_ptr = trainCollectionCPU.ptr(); + void BFMatcher_Impl::matchAsync(InputArray _queryDescriptors, InputArray _trainDescriptors, + OutputArray _matches, + InputArray _mask, + Stream& stream) + { + using namespace cv::cuda::device::bf_match; - for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr) - *trainCollectionCPU_ptr = trainDescCollection[i]; + const GpuMat query = _queryDescriptors.getGpuMat(); + const GpuMat train = _trainDescriptors.getGpuMat(); + const GpuMat mask = _mask.getGpuMat(); - trainCollection.upload(trainCollectionCPU); - maskCollection.release(); - } - else - { - CV_Assert(masks.size() == trainDescCollection.size()); + if (query.empty() || train.empty()) + { + _matches.release(); + return; + } - Mat trainCollectionCPU(1, static_cast(trainDescCollection.size()), CV_8UC(sizeof(PtrStepSzb))); - Mat maskCollectionCPU(1, static_cast(trainDescCollection.size()), CV_8UC(sizeof(PtrStepb))); + CV_Assert( query.channels() == 1 && query.depth() < CV_64F ); + CV_Assert( train.cols == query.cols && train.type() == query.type() ); + CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.rows == query.rows && mask.cols == train.rows) ); - PtrStepSzb* trainCollectionCPU_ptr = trainCollectionCPU.ptr(); - PtrStepb* maskCollectionCPU_ptr = maskCollectionCPU.ptr(); + typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask, + const PtrStepSzi& trainIdx, const PtrStepSzf& distance, + cudaStream_t stream); - for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr, ++maskCollectionCPU_ptr) + static const caller_t callersL1[] = { - const GpuMat& train = trainDescCollection[i]; - const GpuMat& mask = masks[i]; + matchL1_gpu, 0/*matchL1_gpu*/, + matchL1_gpu, matchL1_gpu, + matchL1_gpu, matchL1_gpu + }; + static const caller_t callersL2[] = + { + 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, + 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, + 0/*matchL2_gpu*/, matchL2_gpu + }; + static const caller_t callersHamming[] = + { + matchHamming_gpu, 0/*matchHamming_gpu*/, + matchHamming_gpu, 0/*matchHamming_gpu*/, + matchHamming_gpu, 0/*matchHamming_gpu*/ + }; - CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.cols == train.rows)); + const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming; - *trainCollectionCPU_ptr = train; - *maskCollectionCPU_ptr = mask; + const caller_t func = callers[query.depth()]; + if (func == 0) + { + CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm"); } - trainCollection.upload(trainCollectionCPU); - maskCollection.upload(maskCollectionCPU); - } -} + const int nQuery = query.rows; -void cv::cuda::BFMatcher_CUDA::matchCollection(const GpuMat& query, const GpuMat& trainCollection, - GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, - const GpuMat& masks, Stream& stream) -{ - if (query.empty() || trainCollection.empty()) - return; + _matches.create(2, nQuery, CV_32SC1); + GpuMat matches = _matches.getGpuMat(); - using namespace cv::cuda::device::bf_match; + GpuMat trainIdx(1, nQuery, CV_32SC1, matches.ptr(0)); + GpuMat distance(1, nQuery, CV_32FC1, matches.ptr(1)); - typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, - const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, - cudaStream_t stream); + func(query, train, mask, trainIdx, distance, StreamAccessor::getStream(stream)); + } - static const caller_t callersL1[] = - { - matchL1_gpu, 0/*matchL1_gpu*/, - matchL1_gpu, matchL1_gpu, - matchL1_gpu, matchL1_gpu - }; - static const caller_t callersL2[] = + void BFMatcher_Impl::matchAsync(InputArray _queryDescriptors, + OutputArray _matches, + const std::vector& masks, + Stream& stream) { - 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, - 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, - 0/*matchL2_gpu*/, matchL2_gpu - }; - static const caller_t callersHamming[] = - { - matchHamming_gpu, 0/*matchHamming_gpu*/, - matchHamming_gpu, 0/*matchHamming_gpu*/, - matchHamming_gpu, 0/*matchHamming_gpu*/ - }; + using namespace cv::cuda::device::bf_match; - CV_Assert(query.channels() == 1 && query.depth() < CV_64F); - CV_Assert(norm == NORM_L1 || norm == NORM_L2 || norm == NORM_HAMMING); + const GpuMat query = _queryDescriptors.getGpuMat(); - const caller_t* callers = norm == NORM_L1 ? callersL1 : norm == NORM_L2 ? callersL2 : callersHamming; - - const int nQuery = query.rows; + if (query.empty() || trainDescCollection_.empty()) + { + _matches.release(); + return; + } - ensureSizeIsEnough(1, nQuery, CV_32S, trainIdx); - ensureSizeIsEnough(1, nQuery, CV_32S, imgIdx); - ensureSizeIsEnough(1, nQuery, CV_32F, distance); + CV_Assert( query.channels() == 1 && query.depth() < CV_64F ); - caller_t func = callers[query.depth()]; - CV_Assert(func != 0); + GpuMat trainCollection, maskCollection; + makeGpuCollection(trainDescCollection_, masks, trainCollection, maskCollection); - func(query, trainCollection, masks, trainIdx, imgIdx, distance, StreamAccessor::getStream(stream)); -} + typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, + const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, + cudaStream_t stream); -void cv::cuda::BFMatcher_CUDA::matchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, std::vector& matches) -{ - if (trainIdx.empty() || imgIdx.empty() || distance.empty()) - return; + static const caller_t callersL1[] = + { + matchL1_gpu, 0/*matchL1_gpu*/, + matchL1_gpu, matchL1_gpu, + matchL1_gpu, matchL1_gpu + }; + static const caller_t callersL2[] = + { + 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, + 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, + 0/*matchL2_gpu*/, matchL2_gpu + }; + static const caller_t callersHamming[] = + { + matchHamming_gpu, 0/*matchHamming_gpu*/, + matchHamming_gpu, 0/*matchHamming_gpu*/, + matchHamming_gpu, 0/*matchHamming_gpu*/ + }; - Mat trainIdxCPU(trainIdx); - Mat imgIdxCPU(imgIdx); - Mat distanceCPU(distance); + const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming; - matchConvert(trainIdxCPU, imgIdxCPU, distanceCPU, matches); -} + const caller_t func = callers[query.depth()]; + if (func == 0) + { + CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm"); + } -void cv::cuda::BFMatcher_CUDA::matchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, std::vector& matches) -{ - if (trainIdx.empty() || imgIdx.empty() || distance.empty()) - return; + const int nQuery = query.rows; - CV_Assert(trainIdx.type() == CV_32SC1); - CV_Assert(imgIdx.type() == CV_32SC1 && imgIdx.cols == trainIdx.cols); - CV_Assert(distance.type() == CV_32FC1 && distance.cols == trainIdx.cols); + _matches.create(3, nQuery, CV_32SC1); + GpuMat matches = _matches.getGpuMat(); - const int nQuery = trainIdx.cols; + GpuMat trainIdx(1, nQuery, CV_32SC1, matches.ptr(0)); + GpuMat imgIdx(1, nQuery, CV_32SC1, matches.ptr(1)); + GpuMat distance(1, nQuery, CV_32FC1, matches.ptr(2)); - matches.clear(); - matches.reserve(nQuery); + func(query, trainCollection, maskCollection, trainIdx, imgIdx, distance, StreamAccessor::getStream(stream)); + } - const int* trainIdx_ptr = trainIdx.ptr(); - const int* imgIdx_ptr = imgIdx.ptr(); - const float* distance_ptr = distance.ptr(); - for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx, ++trainIdx_ptr, ++imgIdx_ptr, ++distance_ptr) + void BFMatcher_Impl::matchConvert(InputArray _gpu_matches, + std::vector& matches) { - int _trainIdx = *trainIdx_ptr; - - if (_trainIdx == -1) - continue; - - int _imgIdx = *imgIdx_ptr; + Mat gpu_matches; + if (_gpu_matches.kind() == _InputArray::CUDA_GPU_MAT) + { + _gpu_matches.getGpuMat().download(gpu_matches); + } + else + { + gpu_matches = _gpu_matches.getMat(); + } - float _distance = *distance_ptr; + if (gpu_matches.empty()) + { + matches.clear(); + return; + } - DMatch m(queryIdx, _trainIdx, _imgIdx, _distance); + CV_Assert( (gpu_matches.type() == CV_32SC1) && (gpu_matches.rows == 2 || gpu_matches.rows == 3) ); - matches.push_back(m); - } -} + const int nQuery = gpu_matches.cols; -void cv::cuda::BFMatcher_CUDA::match(const GpuMat& query, std::vector& matches, const std::vector& masks) -{ - GpuMat trainCollection; - GpuMat maskCollection; + matches.clear(); + matches.reserve(nQuery); - makeGpuCollection(trainCollection, maskCollection, masks); + const int* trainIdxPtr = NULL; + const int* imgIdxPtr = NULL; + const float* distancePtr = NULL; - GpuMat trainIdx, imgIdx, distance; + if (gpu_matches.rows == 2) + { + trainIdxPtr = gpu_matches.ptr(0); + distancePtr = gpu_matches.ptr(1); + } + else + { + trainIdxPtr = gpu_matches.ptr(0); + imgIdxPtr = gpu_matches.ptr(1); + distancePtr = gpu_matches.ptr(2); + } - matchCollection(query, trainCollection, trainIdx, imgIdx, distance, maskCollection); - matchDownload(trainIdx, imgIdx, distance, matches); -} + for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx) + { + const int trainIdx = trainIdxPtr[queryIdx]; + if (trainIdx == -1) + continue; -//////////////////////////////////////////////////////////////////// -// KnnMatch + const int imgIdx = imgIdxPtr ? imgIdxPtr[queryIdx] : 0; + const float distance = distancePtr[queryIdx]; -void cv::cuda::BFMatcher_CUDA::knnMatchSingle(const GpuMat& query, const GpuMat& train, - GpuMat& trainIdx, GpuMat& distance, GpuMat& allDist, int k, - const GpuMat& mask, Stream& stream) -{ - if (query.empty() || train.empty()) - return; + DMatch m(queryIdx, trainIdx, imgIdx, distance); - using namespace cv::cuda::device::bf_knnmatch; + matches.push_back(m); + } + } - typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask, - const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, - cudaStream_t stream); + // + // knn match + // - static const caller_t callersL1[] = + void BFMatcher_Impl::knnMatch(InputArray _queryDescriptors, InputArray _trainDescriptors, + std::vector >& matches, + int k, + InputArray _mask, + bool compactResult) { - matchL1_gpu, 0/*matchL1_gpu*/, - matchL1_gpu, matchL1_gpu, - matchL1_gpu, matchL1_gpu - }; - static const caller_t callersL2[] = - { - 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, - 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, - 0/*matchL2_gpu*/, matchL2_gpu - }; - static const caller_t callersHamming[] = + GpuMat d_matches; + knnMatchAsync(_queryDescriptors, _trainDescriptors, d_matches, k, _mask); + knnMatchConvert(d_matches, matches, compactResult); + } + + void BFMatcher_Impl::knnMatch(InputArray _queryDescriptors, + std::vector >& matches, + int k, + const std::vector& masks, + bool compactResult) { - matchHamming_gpu, 0/*matchHamming_gpu*/, - matchHamming_gpu, 0/*matchHamming_gpu*/, - matchHamming_gpu, 0/*matchHamming_gpu*/ - }; + if (k == 2) + { + GpuMat d_matches; + knnMatchAsync(_queryDescriptors, d_matches, k, masks); + knnMatchConvert(d_matches, matches, compactResult); + } + else + { + const GpuMat query = _queryDescriptors.getGpuMat(); - CV_Assert(query.channels() == 1 && query.depth() < CV_64F); - CV_Assert(train.type() == query.type() && train.cols == query.cols); - CV_Assert(norm == NORM_L1 || norm == NORM_L2 || norm == NORM_HAMMING); + if (query.empty() || trainDescCollection_.empty()) + { + matches.clear(); + return; + } - const caller_t* callers = norm == NORM_L1 ? callersL1 : norm == NORM_L2 ? callersL2 : callersHamming; + CV_Assert( query.channels() == 1 && query.depth() < CV_64F ); - const int nQuery = query.rows; - const int nTrain = train.rows; + std::vector< std::vector > curMatches; + std::vector temp; + temp.reserve(2 * k); - if (k == 2) - { - ensureSizeIsEnough(1, nQuery, CV_32SC2, trainIdx); - ensureSizeIsEnough(1, nQuery, CV_32FC2, distance); - } - else - { - ensureSizeIsEnough(nQuery, k, CV_32S, trainIdx); - ensureSizeIsEnough(nQuery, k, CV_32F, distance); - ensureSizeIsEnough(nQuery, nTrain, CV_32FC1, allDist); - } + matches.resize(query.rows); + for (size_t i = 0; i < matches.size(); ++i) + matches[i].reserve(k); - trainIdx.setTo(Scalar::all(-1), stream); + for (size_t imgIdx = 0; imgIdx < trainDescCollection_.size(); ++imgIdx) + { + knnMatch(query, trainDescCollection_[imgIdx], curMatches, k, masks.empty() ? GpuMat() : masks[imgIdx]); - caller_t func = callers[query.depth()]; - CV_Assert(func != 0); + for (int queryIdx = 0; queryIdx < query.rows; ++queryIdx) + { + std::vector& localMatch = curMatches[queryIdx]; + std::vector& globalMatch = matches[queryIdx]; - func(query, train, k, mask, trainIdx, distance, allDist, StreamAccessor::getStream(stream)); -} + for (size_t i = 0; i < localMatch.size(); ++i) + localMatch[i].imgIdx = imgIdx; -void cv::cuda::BFMatcher_CUDA::knnMatchDownload(const GpuMat& trainIdx, const GpuMat& distance, - std::vector< std::vector >& matches, bool compactResult) -{ - if (trainIdx.empty() || distance.empty()) - return; + temp.clear(); + std::merge(globalMatch.begin(), globalMatch.end(), localMatch.begin(), localMatch.end(), std::back_inserter(temp)); - Mat trainIdxCPU(trainIdx); - Mat distanceCPU(distance); + globalMatch.clear(); + const size_t count = std::min(static_cast(k), temp.size()); + std::copy(temp.begin(), temp.begin() + count, std::back_inserter(globalMatch)); + } + } - knnMatchConvert(trainIdxCPU, distanceCPU, matches, compactResult); -} + if (compactResult) + { + std::vector< std::vector >::iterator new_end = std::remove_if(matches.begin(), matches.end(), std::mem_fun_ref(&std::vector::empty)); + matches.erase(new_end, matches.end()); + } + } + } -void cv::cuda::BFMatcher_CUDA::knnMatchConvert(const Mat& trainIdx, const Mat& distance, - std::vector< std::vector >& matches, bool compactResult) -{ - if (trainIdx.empty() || distance.empty()) - return; + void BFMatcher_Impl::knnMatchAsync(InputArray _queryDescriptors, InputArray _trainDescriptors, + OutputArray _matches, + int k, + InputArray _mask, + Stream& stream) + { + using namespace cv::cuda::device::bf_knnmatch; - CV_Assert(trainIdx.type() == CV_32SC2 || trainIdx.type() == CV_32SC1); - CV_Assert(distance.type() == CV_32FC2 || distance.type() == CV_32FC1); - CV_Assert(distance.size() == trainIdx.size()); - CV_Assert(trainIdx.isContinuous() && distance.isContinuous()); + const GpuMat query = _queryDescriptors.getGpuMat(); + const GpuMat train = _trainDescriptors.getGpuMat(); + const GpuMat mask = _mask.getGpuMat(); + + if (query.empty() || train.empty()) + { + _matches.release(); + return; + } - const int nQuery = trainIdx.type() == CV_32SC2 ? trainIdx.cols : trainIdx.rows; - const int k = trainIdx.type() == CV_32SC2 ? 2 :trainIdx.cols; + CV_Assert( query.channels() == 1 && query.depth() < CV_64F ); + CV_Assert( train.cols == query.cols && train.type() == query.type() ); + CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.rows == query.rows && mask.cols == train.rows) ); - matches.clear(); - matches.reserve(nQuery); + typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask, + const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, + cudaStream_t stream); - const int* trainIdx_ptr = trainIdx.ptr(); - const float* distance_ptr = distance.ptr(); + static const caller_t callersL1[] = + { + matchL1_gpu, 0/*matchL1_gpu*/, + matchL1_gpu, matchL1_gpu, + matchL1_gpu, matchL1_gpu + }; + static const caller_t callersL2[] = + { + 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, + 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, + 0/*matchL2_gpu*/, matchL2_gpu + }; + static const caller_t callersHamming[] = + { + matchHamming_gpu, 0/*matchHamming_gpu*/, + matchHamming_gpu, 0/*matchHamming_gpu*/, + matchHamming_gpu, 0/*matchHamming_gpu*/ + }; - for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx) - { - matches.push_back(std::vector()); - std::vector& curMatches = matches.back(); - curMatches.reserve(k); + const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming; - for (int i = 0; i < k; ++i, ++trainIdx_ptr, ++distance_ptr) + const caller_t func = callers[query.depth()]; + if (func == 0) { - int _trainIdx = *trainIdx_ptr; + CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm"); + } - if (_trainIdx != -1) - { - float _distance = *distance_ptr; + const int nQuery = query.rows; + const int nTrain = train.rows; - DMatch m(queryIdx, _trainIdx, 0, _distance); + GpuMat trainIdx, distance, allDist; + if (k == 2) + { + _matches.create(2, nQuery, CV_32SC2); + GpuMat matches = _matches.getGpuMat(); - curMatches.push_back(m); - } + trainIdx = GpuMat(1, nQuery, CV_32SC2, matches.ptr(0)); + distance = GpuMat(1, nQuery, CV_32FC2, matches.ptr(1)); } + else + { + _matches.create(2 * nQuery, k, CV_32SC1); + GpuMat matches = _matches.getGpuMat(); - if (compactResult && curMatches.empty()) - matches.pop_back(); - } -} - -void cv::cuda::BFMatcher_CUDA::knnMatch(const GpuMat& query, const GpuMat& train, - std::vector< std::vector >& matches, int k, const GpuMat& mask, bool compactResult) -{ - GpuMat trainIdx, distance, allDist; - knnMatchSingle(query, train, trainIdx, distance, allDist, k, mask); - knnMatchDownload(trainIdx, distance, matches, compactResult); -} + trainIdx = GpuMat(nQuery, k, CV_32SC1, matches.ptr(0), matches.step); + distance = GpuMat(nQuery, k, CV_32FC1, matches.ptr(nQuery), matches.step); -void cv::cuda::BFMatcher_CUDA::knnMatch2Collection(const GpuMat& query, const GpuMat& trainCollection, - GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, - const GpuMat& maskCollection, Stream& stream) -{ - if (query.empty() || trainCollection.empty()) - return; + BufferPool pool(stream); + allDist = pool.getBuffer(nQuery, nTrain, CV_32FC1); + } - using namespace cv::cuda::device::bf_knnmatch; + trainIdx.setTo(Scalar::all(-1), stream); - typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, - const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, - cudaStream_t stream); + func(query, train, k, mask, trainIdx, distance, allDist, StreamAccessor::getStream(stream)); + } - static const caller_t callersL1[] = - { - match2L1_gpu, 0/*match2L1_gpu*/, - match2L1_gpu, match2L1_gpu, - match2L1_gpu, match2L1_gpu - }; - static const caller_t callersL2[] = - { - 0/*match2L2_gpu*/, 0/*match2L2_gpu*/, - 0/*match2L2_gpu*/, 0/*match2L2_gpu*/, - 0/*match2L2_gpu*/, match2L2_gpu - }; - static const caller_t callersHamming[] = + void BFMatcher_Impl::knnMatchAsync(InputArray _queryDescriptors, + OutputArray _matches, + int k, + const std::vector& masks, + Stream& stream) { - match2Hamming_gpu, 0/*match2Hamming_gpu*/, - match2Hamming_gpu, 0/*match2Hamming_gpu*/, - match2Hamming_gpu, 0/*match2Hamming_gpu*/ - }; - - CV_Assert(query.channels() == 1 && query.depth() < CV_64F); - CV_Assert(norm == NORM_L1 || norm == NORM_L2 || norm == NORM_HAMMING); + using namespace cv::cuda::device::bf_knnmatch; - const caller_t* callers = norm == NORM_L1 ? callersL1 : norm == NORM_L2 ? callersL2 : callersHamming; + if (k != 2) + { + CV_Error(Error::StsNotImplemented, "only k=2 mode is supported for now"); + } - const int nQuery = query.rows; + const GpuMat query = _queryDescriptors.getGpuMat(); - ensureSizeIsEnough(1, nQuery, CV_32SC2, trainIdx); - ensureSizeIsEnough(1, nQuery, CV_32SC2, imgIdx); - ensureSizeIsEnough(1, nQuery, CV_32FC2, distance); + if (query.empty() || trainDescCollection_.empty()) + { + _matches.release(); + return; + } - trainIdx.setTo(Scalar::all(-1), stream); + CV_Assert( query.channels() == 1 && query.depth() < CV_64F ); - caller_t func = callers[query.depth()]; - CV_Assert(func != 0); + GpuMat trainCollection, maskCollection; + makeGpuCollection(trainDescCollection_, masks, trainCollection, maskCollection); - func(query, trainCollection, maskCollection, trainIdx, imgIdx, distance, StreamAccessor::getStream(stream)); -} + typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, + const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, + cudaStream_t stream); -void cv::cuda::BFMatcher_CUDA::knnMatch2Download(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, - std::vector< std::vector >& matches, bool compactResult) -{ - if (trainIdx.empty() || imgIdx.empty() || distance.empty()) - return; + static const caller_t callersL1[] = + { + match2L1_gpu, 0/*match2L1_gpu*/, + match2L1_gpu, match2L1_gpu, + match2L1_gpu, match2L1_gpu + }; + static const caller_t callersL2[] = + { + 0/*match2L2_gpu*/, 0/*match2L2_gpu*/, + 0/*match2L2_gpu*/, 0/*match2L2_gpu*/, + 0/*match2L2_gpu*/, match2L2_gpu + }; + static const caller_t callersHamming[] = + { + match2Hamming_gpu, 0/*match2Hamming_gpu*/, + match2Hamming_gpu, 0/*match2Hamming_gpu*/, + match2Hamming_gpu, 0/*match2Hamming_gpu*/ + }; - Mat trainIdxCPU(trainIdx); - Mat imgIdxCPU(imgIdx); - Mat distanceCPU(distance); + const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming; - knnMatch2Convert(trainIdxCPU, imgIdxCPU, distanceCPU, matches, compactResult); -} + const caller_t func = callers[query.depth()]; + if (func == 0) + { + CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm"); + } -void cv::cuda::BFMatcher_CUDA::knnMatch2Convert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, - std::vector< std::vector >& matches, bool compactResult) -{ - if (trainIdx.empty() || imgIdx.empty() || distance.empty()) - return; + const int nQuery = query.rows; - CV_Assert(trainIdx.type() == CV_32SC2); - CV_Assert(imgIdx.type() == CV_32SC2 && imgIdx.cols == trainIdx.cols); - CV_Assert(distance.type() == CV_32FC2 && distance.cols == trainIdx.cols); + _matches.create(3, nQuery, CV_32SC2); + GpuMat matches = _matches.getGpuMat(); - const int nQuery = trainIdx.cols; + GpuMat trainIdx(1, nQuery, CV_32SC2, matches.ptr(0)); + GpuMat imgIdx(1, nQuery, CV_32SC2, matches.ptr(1)); + GpuMat distance(1, nQuery, CV_32FC2, matches.ptr(2)); - matches.clear(); - matches.reserve(nQuery); + trainIdx.setTo(Scalar::all(-1), stream); - const int* trainIdx_ptr = trainIdx.ptr(); - const int* imgIdx_ptr = imgIdx.ptr(); - const float* distance_ptr = distance.ptr(); + func(query, trainCollection, maskCollection, trainIdx, imgIdx, distance, StreamAccessor::getStream(stream)); + } - for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx) + void BFMatcher_Impl::knnMatchConvert(InputArray _gpu_matches, + std::vector< std::vector >& matches, + bool compactResult) { - matches.push_back(std::vector()); - std::vector& curMatches = matches.back(); - curMatches.reserve(2); - - for (int i = 0; i < 2; ++i, ++trainIdx_ptr, ++imgIdx_ptr, ++distance_ptr) + Mat gpu_matches; + if (_gpu_matches.kind() == _InputArray::CUDA_GPU_MAT) { - int _trainIdx = *trainIdx_ptr; - - if (_trainIdx != -1) - { - int _imgIdx = *imgIdx_ptr; - - float _distance = *distance_ptr; - - DMatch m(queryIdx, _trainIdx, _imgIdx, _distance); - - curMatches.push_back(m); - } + _gpu_matches.getGpuMat().download(gpu_matches); + } + else + { + gpu_matches = _gpu_matches.getMat(); } - if (compactResult && curMatches.empty()) - matches.pop_back(); - } -} + if (gpu_matches.empty()) + { + matches.clear(); + return; + } -namespace -{ - struct ImgIdxSetter - { - explicit inline ImgIdxSetter(int imgIdx_) : imgIdx(imgIdx_) {} - inline void operator()(DMatch& m) const {m.imgIdx = imgIdx;} - int imgIdx; - }; -} + CV_Assert( ((gpu_matches.type() == CV_32SC2) && (gpu_matches.rows == 2 || gpu_matches.rows == 3)) || + (gpu_matches.type() == CV_32SC1) ); -void cv::cuda::BFMatcher_CUDA::knnMatch(const GpuMat& query, std::vector< std::vector >& matches, int k, - const std::vector& masks, bool compactResult) -{ - if (k == 2) - { - GpuMat trainCollection; - GpuMat maskCollection; + int nQuery = -1, k = -1; - makeGpuCollection(trainCollection, maskCollection, masks); + const int* trainIdxPtr = NULL; + const int* imgIdxPtr = NULL; + const float* distancePtr = NULL; - GpuMat trainIdx, imgIdx, distance; + if (gpu_matches.type() == CV_32SC2) + { + nQuery = gpu_matches.cols; + k = 2; - knnMatch2Collection(query, trainCollection, trainIdx, imgIdx, distance, maskCollection); - knnMatch2Download(trainIdx, imgIdx, distance, matches); - } - else - { - if (query.empty() || empty()) - return; + if (gpu_matches.rows == 2) + { + trainIdxPtr = gpu_matches.ptr(0); + distancePtr = gpu_matches.ptr(1); + } + else + { + trainIdxPtr = gpu_matches.ptr(0); + imgIdxPtr = gpu_matches.ptr(1); + distancePtr = gpu_matches.ptr(2); + } + } + else + { + nQuery = gpu_matches.rows / 2; + k = gpu_matches.cols; - std::vector< std::vector > curMatches; - std::vector temp; - temp.reserve(2 * k); + trainIdxPtr = gpu_matches.ptr(0); + distancePtr = gpu_matches.ptr(nQuery); + } - matches.resize(query.rows); - for_each(matches.begin(), matches.end(), bind2nd(mem_fun_ref(&std::vector::reserve), k)); + matches.clear(); + matches.reserve(nQuery); - for (size_t imgIdx = 0, size = trainDescCollection.size(); imgIdx < size; ++imgIdx) + for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx) { - knnMatch(query, trainDescCollection[imgIdx], curMatches, k, masks.empty() ? GpuMat() : masks[imgIdx]); + matches.push_back(std::vector()); + std::vector& curMatches = matches.back(); + curMatches.reserve(k); - for (int queryIdx = 0; queryIdx < query.rows; ++queryIdx) + for (int i = 0; i < k; ++i) { - std::vector& localMatch = curMatches[queryIdx]; - std::vector& globalMatch = matches[queryIdx]; + const int trainIdx = *trainIdxPtr; + if (trainIdx == -1) + continue; - for_each(localMatch.begin(), localMatch.end(), ImgIdxSetter(static_cast(imgIdx))); + const int imgIdx = imgIdxPtr ? *imgIdxPtr : 0; + const float distance = *distancePtr; - temp.clear(); - merge(globalMatch.begin(), globalMatch.end(), localMatch.begin(), localMatch.end(), back_inserter(temp)); + DMatch m(queryIdx, trainIdx, imgIdx, distance); + + curMatches.push_back(m); - globalMatch.clear(); - const size_t count = std::min((size_t)k, temp.size()); - copy(temp.begin(), temp.begin() + count, back_inserter(globalMatch)); + ++trainIdxPtr; + ++distancePtr; + if (imgIdxPtr) + ++imgIdxPtr; } - } - if (compactResult) - { - std::vector< std::vector >::iterator new_end = remove_if(matches.begin(), matches.end(), mem_fun_ref(&std::vector::empty)); - matches.erase(new_end, matches.end()); + if (compactResult && curMatches.empty()) + { + matches.pop_back(); + } } } -} - -//////////////////////////////////////////////////////////////////// -// RadiusMatch - -void cv::cuda::BFMatcher_CUDA::radiusMatchSingle(const GpuMat& query, const GpuMat& train, - GpuMat& trainIdx, GpuMat& distance, GpuMat& nMatches, float maxDistance, - const GpuMat& mask, Stream& stream) -{ - if (query.empty() || train.empty()) - return; - - using namespace cv::cuda::device::bf_radius_match; - typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask, - const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, - cudaStream_t stream); + // + // radius match + // - static const caller_t callersL1[] = + void BFMatcher_Impl::radiusMatch(InputArray _queryDescriptors, InputArray _trainDescriptors, + std::vector >& matches, + float maxDistance, + InputArray _mask, + bool compactResult) { - matchL1_gpu, 0/*matchL1_gpu*/, - matchL1_gpu, matchL1_gpu, - matchL1_gpu, matchL1_gpu - }; - static const caller_t callersL2[] = - { - 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, - 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, - 0/*matchL2_gpu*/, matchL2_gpu - }; - static const caller_t callersHamming[] = - { - matchHamming_gpu, 0/*matchHamming_gpu*/, - matchHamming_gpu, 0/*matchHamming_gpu*/, - matchHamming_gpu, 0/*matchHamming_gpu*/ - }; + GpuMat d_matches; + radiusMatchAsync(_queryDescriptors, _trainDescriptors, d_matches, maxDistance, _mask); + radiusMatchConvert(d_matches, matches, compactResult); + } - const int nQuery = query.rows; - const int nTrain = train.rows; + void BFMatcher_Impl::radiusMatch(InputArray _queryDescriptors, + std::vector >& matches, + float maxDistance, + const std::vector& masks, + bool compactResult) + { + GpuMat d_matches; + radiusMatchAsync(_queryDescriptors, d_matches, maxDistance, masks); + radiusMatchConvert(d_matches, matches, compactResult); + } - CV_Assert(query.channels() == 1 && query.depth() < CV_64F); - CV_Assert(train.type() == query.type() && train.cols == query.cols); - CV_Assert(trainIdx.empty() || (trainIdx.rows == nQuery && trainIdx.size() == distance.size())); - CV_Assert(norm == NORM_L1 || norm == NORM_L2 || norm == NORM_HAMMING); + void BFMatcher_Impl::radiusMatchAsync(InputArray _queryDescriptors, InputArray _trainDescriptors, + OutputArray _matches, + float maxDistance, + InputArray _mask, + Stream& stream) + { + using namespace cv::cuda::device::bf_radius_match; - const caller_t* callers = norm == NORM_L1 ? callersL1 : norm == NORM_L2 ? callersL2 : callersHamming; + const GpuMat query = _queryDescriptors.getGpuMat(); + const GpuMat train = _trainDescriptors.getGpuMat(); + const GpuMat mask = _mask.getGpuMat(); - ensureSizeIsEnough(1, nQuery, CV_32SC1, nMatches); - if (trainIdx.empty()) - { - ensureSizeIsEnough(nQuery, std::max((nTrain / 100), 10), CV_32SC1, trainIdx); - ensureSizeIsEnough(nQuery, std::max((nTrain / 100), 10), CV_32FC1, distance); - } + if (query.empty() || train.empty()) + { + _matches.release(); + return; + } - nMatches.setTo(Scalar::all(0), stream); + CV_Assert( query.channels() == 1 && query.depth() < CV_64F ); + CV_Assert( train.cols == query.cols && train.type() == query.type() ); + CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.rows == query.rows && mask.cols == train.rows) ); - caller_t func = callers[query.depth()]; - CV_Assert(func != 0); + typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask, + const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, + cudaStream_t stream); - func(query, train, maxDistance, mask, trainIdx, distance, nMatches, StreamAccessor::getStream(stream)); -} + static const caller_t callersL1[] = + { + matchL1_gpu, 0/*matchL1_gpu*/, + matchL1_gpu, matchL1_gpu, + matchL1_gpu, matchL1_gpu + }; + static const caller_t callersL2[] = + { + 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, + 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, + 0/*matchL2_gpu*/, matchL2_gpu + }; + static const caller_t callersHamming[] = + { + matchHamming_gpu, 0/*matchHamming_gpu*/, + matchHamming_gpu, 0/*matchHamming_gpu*/, + matchHamming_gpu, 0/*matchHamming_gpu*/ + }; -void cv::cuda::BFMatcher_CUDA::radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& distance, const GpuMat& nMatches, - std::vector< std::vector >& matches, bool compactResult) -{ - if (trainIdx.empty() || distance.empty() || nMatches.empty()) - return; + const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming; - Mat trainIdxCPU(trainIdx); - Mat distanceCPU(distance); - Mat nMatchesCPU(nMatches); + const caller_t func = callers[query.depth()]; + if (func == 0) + { + CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm"); + } - radiusMatchConvert(trainIdxCPU, distanceCPU, nMatchesCPU, matches, compactResult); -} + const int nQuery = query.rows; + const int nTrain = train.rows; -void cv::cuda::BFMatcher_CUDA::radiusMatchConvert(const Mat& trainIdx, const Mat& distance, const Mat& nMatches, - std::vector< std::vector >& matches, bool compactResult) -{ - if (trainIdx.empty() || distance.empty() || nMatches.empty()) - return; + const int cols = std::max((nTrain / 100), nQuery); - CV_Assert(trainIdx.type() == CV_32SC1); - CV_Assert(distance.type() == CV_32FC1 && distance.size() == trainIdx.size()); - CV_Assert(nMatches.type() == CV_32SC1 && nMatches.cols == trainIdx.rows); + _matches.create(2 * nQuery + 1, cols, CV_32SC1); + GpuMat matches = _matches.getGpuMat(); - const int nQuery = trainIdx.rows; + GpuMat trainIdx(nQuery, cols, CV_32SC1, matches.ptr(0), matches.step); + GpuMat distance(nQuery, cols, CV_32FC1, matches.ptr(nQuery), matches.step); + GpuMat nMatches(1, nQuery, CV_32SC1, matches.ptr(2 * nQuery)); - matches.clear(); - matches.reserve(nQuery); + nMatches.setTo(Scalar::all(0), stream); - const int* nMatches_ptr = nMatches.ptr(); + func(query, train, maxDistance, mask, trainIdx, distance, nMatches, StreamAccessor::getStream(stream)); + } - for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx) + void BFMatcher_Impl::radiusMatchAsync(InputArray _queryDescriptors, + OutputArray _matches, + float maxDistance, + const std::vector& masks, + Stream& stream) { - const int* trainIdx_ptr = trainIdx.ptr(queryIdx); - const float* distance_ptr = distance.ptr(queryIdx); + using namespace cv::cuda::device::bf_radius_match; - const int nMatched = std::min(nMatches_ptr[queryIdx], trainIdx.cols); + const GpuMat query = _queryDescriptors.getGpuMat(); - if (nMatched == 0) + if (query.empty() || trainDescCollection_.empty()) { - if (!compactResult) - matches.push_back(std::vector()); - continue; + _matches.release(); + return; } - matches.push_back(std::vector(nMatched)); - std::vector& curMatches = matches.back(); - - for (int i = 0; i < nMatched; ++i, ++trainIdx_ptr, ++distance_ptr) - { - int _trainIdx = *trainIdx_ptr; + CV_Assert( query.channels() == 1 && query.depth() < CV_64F ); - float _distance = *distance_ptr; + GpuMat trainCollection, maskCollection; + makeGpuCollection(trainDescCollection_, masks, trainCollection, maskCollection); - DMatch m(queryIdx, _trainIdx, 0, _distance); - - curMatches[i] = m; - } + typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, + const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, + cudaStream_t stream); - sort(curMatches.begin(), curMatches.end()); - } -} - -void cv::cuda::BFMatcher_CUDA::radiusMatch(const GpuMat& query, const GpuMat& train, - std::vector< std::vector >& matches, float maxDistance, const GpuMat& mask, bool compactResult) -{ - GpuMat trainIdx, distance, nMatches; - radiusMatchSingle(query, train, trainIdx, distance, nMatches, maxDistance, mask); - radiusMatchDownload(trainIdx, distance, nMatches, matches, compactResult); -} + static const caller_t callersL1[] = + { + matchL1_gpu, 0/*matchL1_gpu*/, + matchL1_gpu, matchL1_gpu, + matchL1_gpu, matchL1_gpu + }; + static const caller_t callersL2[] = + { + 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, + 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, + 0/*matchL2_gpu*/, matchL2_gpu + }; + static const caller_t callersHamming[] = + { + matchHamming_gpu, 0/*matchHamming_gpu*/, + matchHamming_gpu, 0/*matchHamming_gpu*/, + matchHamming_gpu, 0/*matchHamming_gpu*/ + }; -void cv::cuda::BFMatcher_CUDA::radiusMatchCollection(const GpuMat& query, GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, GpuMat& nMatches, - float maxDistance, const std::vector& masks, Stream& stream) -{ - if (query.empty() || empty()) - return; + const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming; - using namespace cv::cuda::device::bf_radius_match; + const caller_t func = callers[query.depth()]; + if (func == 0) + { + CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm"); + } - typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, - const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, - cudaStream_t stream); + const int nQuery = query.rows; - static const caller_t callersL1[] = - { - matchL1_gpu, 0/*matchL1_gpu*/, - matchL1_gpu, matchL1_gpu, - matchL1_gpu, matchL1_gpu - }; - static const caller_t callersL2[] = - { - 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, - 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, - 0/*matchL2_gpu*/, matchL2_gpu - }; - static const caller_t callersHamming[] = - { - matchHamming_gpu, 0/*matchHamming_gpu*/, - matchHamming_gpu, 0/*matchHamming_gpu*/, - matchHamming_gpu, 0/*matchHamming_gpu*/ - }; + _matches.create(3 * nQuery + 1, nQuery, CV_32FC1); + GpuMat matches = _matches.getGpuMat(); - const int nQuery = query.rows; + GpuMat trainIdx(nQuery, nQuery, CV_32SC1, matches.ptr(0), matches.step); + GpuMat imgIdx(nQuery, nQuery, CV_32SC1, matches.ptr(nQuery), matches.step); + GpuMat distance(nQuery, nQuery, CV_32FC1, matches.ptr(2 * nQuery), matches.step); + GpuMat nMatches(1, nQuery, CV_32SC1, matches.ptr(3 * nQuery)); - CV_Assert(query.channels() == 1 && query.depth() < CV_64F); - CV_Assert(trainIdx.empty() || (trainIdx.rows == nQuery && trainIdx.size() == distance.size() && trainIdx.size() == imgIdx.size())); - CV_Assert(norm == NORM_L1 || norm == NORM_L2 || norm == NORM_HAMMING); + nMatches.setTo(Scalar::all(0), stream); - const caller_t* callers = norm == NORM_L1 ? callersL1 : norm == NORM_L2 ? callersL2 : callersHamming; + std::vector trains_(trainDescCollection_.begin(), trainDescCollection_.end()); + std::vector masks_(masks.begin(), masks.end()); - ensureSizeIsEnough(1, nQuery, CV_32SC1, nMatches); - if (trainIdx.empty()) - { - ensureSizeIsEnough(nQuery, std::max((nQuery / 100), 10), CV_32SC1, trainIdx); - ensureSizeIsEnough(nQuery, std::max((nQuery / 100), 10), CV_32SC1, imgIdx); - ensureSizeIsEnough(nQuery, std::max((nQuery / 100), 10), CV_32FC1, distance); + func(query, &trains_[0], static_cast(trains_.size()), maxDistance, masks_.size() == 0 ? 0 : &masks_[0], + trainIdx, imgIdx, distance, nMatches, StreamAccessor::getStream(stream)); } - nMatches.setTo(Scalar::all(0), stream); - - caller_t func = callers[query.depth()]; - CV_Assert(func != 0); - - std::vector trains_(trainDescCollection.begin(), trainDescCollection.end()); - std::vector masks_(masks.begin(), masks.end()); + void BFMatcher_Impl::radiusMatchConvert(InputArray _gpu_matches, + std::vector< std::vector >& matches, + bool compactResult) + { + Mat gpu_matches; + if (_gpu_matches.kind() == _InputArray::CUDA_GPU_MAT) + { + _gpu_matches.getGpuMat().download(gpu_matches); + } + else + { + gpu_matches = _gpu_matches.getMat(); + } - func(query, &trains_[0], static_cast(trains_.size()), maxDistance, masks_.size() == 0 ? 0 : &masks_[0], - trainIdx, imgIdx, distance, nMatches, StreamAccessor::getStream(stream)); -} + if (gpu_matches.empty()) + { + matches.clear(); + return; + } -void cv::cuda::BFMatcher_CUDA::radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, const GpuMat& nMatches, - std::vector< std::vector >& matches, bool compactResult) -{ - if (trainIdx.empty() || imgIdx.empty() || distance.empty() || nMatches.empty()) - return; + CV_Assert( gpu_matches.type() == CV_32SC1 || gpu_matches.type() == CV_32FC1 ); - Mat trainIdxCPU(trainIdx); - Mat imgIdxCPU(imgIdx); - Mat distanceCPU(distance); - Mat nMatchesCPU(nMatches); + int nQuery = -1; - radiusMatchConvert(trainIdxCPU, imgIdxCPU, distanceCPU, nMatchesCPU, matches, compactResult); -} + const int* trainIdxPtr = NULL; + const int* imgIdxPtr = NULL; + const float* distancePtr = NULL; + const int* nMatchesPtr = NULL; -void cv::cuda::BFMatcher_CUDA::radiusMatchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, const Mat& nMatches, - std::vector< std::vector >& matches, bool compactResult) -{ - if (trainIdx.empty() || imgIdx.empty() || distance.empty() || nMatches.empty()) - return; + if (gpu_matches.type() == CV_32SC1) + { + nQuery = (gpu_matches.rows - 1) / 2; - CV_Assert(trainIdx.type() == CV_32SC1); - CV_Assert(imgIdx.type() == CV_32SC1 && imgIdx.size() == trainIdx.size()); - CV_Assert(distance.type() == CV_32FC1 && distance.size() == trainIdx.size()); - CV_Assert(nMatches.type() == CV_32SC1 && nMatches.cols == trainIdx.rows); + trainIdxPtr = gpu_matches.ptr(0); + distancePtr = gpu_matches.ptr(nQuery); + nMatchesPtr = gpu_matches.ptr(2 * nQuery); + } + else + { + nQuery = (gpu_matches.rows - 1) / 3; - const int nQuery = trainIdx.rows; + trainIdxPtr = gpu_matches.ptr(0); + imgIdxPtr = gpu_matches.ptr(nQuery); + distancePtr = gpu_matches.ptr(2 * nQuery); + nMatchesPtr = gpu_matches.ptr(3 * nQuery); + } - matches.clear(); - matches.reserve(nQuery); + matches.clear(); + matches.reserve(nQuery); - const int* nMatches_ptr = nMatches.ptr(); + for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx) + { + const int nMatched = std::min(nMatchesPtr[queryIdx], gpu_matches.cols); - for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx) - { - const int* trainIdx_ptr = trainIdx.ptr(queryIdx); - const int* imgIdx_ptr = imgIdx.ptr(queryIdx); - const float* distance_ptr = distance.ptr(queryIdx); + if (nMatched == 0) + { + if (!compactResult) + { + matches.push_back(std::vector()); + } + } + else + { + matches.push_back(std::vector(nMatched)); + std::vector& curMatches = matches.back(); - const int nMatched = std::min(nMatches_ptr[queryIdx], trainIdx.cols); + for (int i = 0; i < nMatched; ++i) + { + const int trainIdx = trainIdxPtr[i]; - if (nMatched == 0) - { - if (!compactResult) - matches.push_back(std::vector()); - continue; - } + const int imgIdx = imgIdxPtr ? imgIdxPtr[i] : 0; + const float distance = distancePtr[i]; - matches.push_back(std::vector()); - std::vector& curMatches = matches.back(); - curMatches.reserve(nMatched); + DMatch m(queryIdx, trainIdx, imgIdx, distance); - for (int i = 0; i < nMatched; ++i, ++trainIdx_ptr, ++imgIdx_ptr, ++distance_ptr) - { - int _trainIdx = *trainIdx_ptr; - int _imgIdx = *imgIdx_ptr; - float _distance = *distance_ptr; + curMatches[i] = m; + } - DMatch m(queryIdx, _trainIdx, _imgIdx, _distance); + std::sort(curMatches.begin(), curMatches.end()); + } - curMatches.push_back(m); + trainIdxPtr += gpu_matches.cols; + distancePtr += gpu_matches.cols; + if (imgIdxPtr) + imgIdxPtr += gpu_matches.cols; } - - sort(curMatches.begin(), curMatches.end()); } } -void cv::cuda::BFMatcher_CUDA::radiusMatch(const GpuMat& query, std::vector< std::vector >& matches, - float maxDistance, const std::vector& masks, bool compactResult) +Ptr cv::cuda::DescriptorMatcher::createBFMatcher(int norm) { - GpuMat trainIdx, imgIdx, distance, nMatches; - radiusMatchCollection(query, trainIdx, imgIdx, distance, nMatches, maxDistance, masks); - radiusMatchDownload(trainIdx, imgIdx, distance, nMatches, matches, compactResult); + return makePtr(norm); } #endif /* !defined (HAVE_CUDA) */ diff --git a/modules/cudafeatures2d/test/test_features2d.cpp b/modules/cudafeatures2d/test/test_features2d.cpp index 25ba48f..3046a60 100644 --- a/modules/cudafeatures2d/test/test_features2d.cpp +++ b/modules/cudafeatures2d/test/test_features2d.cpp @@ -285,7 +285,8 @@ PARAM_TEST_CASE(BruteForceMatcher, cv::cuda::DeviceInfo, NormCode, DescriptorSiz CUDA_TEST_P(BruteForceMatcher, Match_Single) { - cv::cuda::BFMatcher_CUDA matcher(normCode); + cv::Ptr matcher = + cv::cuda::DescriptorMatcher::createBFMatcher(normCode); cv::cuda::GpuMat mask; if (useMask) @@ -295,7 +296,7 @@ CUDA_TEST_P(BruteForceMatcher, Match_Single) } std::vector matches; - matcher.match(loadMat(query), loadMat(train), matches, mask); + matcher->match(loadMat(query), loadMat(train), matches, mask); ASSERT_EQ(static_cast(queryDescCount), matches.size()); @@ -312,13 +313,14 @@ CUDA_TEST_P(BruteForceMatcher, Match_Single) CUDA_TEST_P(BruteForceMatcher, Match_Collection) { - cv::cuda::BFMatcher_CUDA matcher(normCode); + cv::Ptr matcher = + cv::cuda::DescriptorMatcher::createBFMatcher(normCode); cv::cuda::GpuMat d_train(train); // make add() twice to test such case - matcher.add(std::vector(1, d_train.rowRange(0, train.rows / 2))); - matcher.add(std::vector(1, d_train.rowRange(train.rows / 2, train.rows))); + matcher->add(std::vector(1, d_train.rowRange(0, train.rows / 2))); + matcher->add(std::vector(1, d_train.rowRange(train.rows / 2, train.rows))); // prepare masks (make first nearest match illegal) std::vector masks(2); @@ -331,9 +333,9 @@ CUDA_TEST_P(BruteForceMatcher, Match_Collection) std::vector matches; if (useMask) - matcher.match(cv::cuda::GpuMat(query), matches, masks); + matcher->match(cv::cuda::GpuMat(query), matches, masks); else - matcher.match(cv::cuda::GpuMat(query), matches); + matcher->match(cv::cuda::GpuMat(query), matches); ASSERT_EQ(static_cast(queryDescCount), matches.size()); @@ -366,7 +368,8 @@ CUDA_TEST_P(BruteForceMatcher, Match_Collection) CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Single) { - cv::cuda::BFMatcher_CUDA matcher(normCode); + cv::Ptr matcher = + cv::cuda::DescriptorMatcher::createBFMatcher(normCode); const int knn = 2; @@ -378,7 +381,7 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Single) } std::vector< std::vector > matches; - matcher.knnMatch(loadMat(query), loadMat(train), matches, knn, mask); + matcher->knnMatch(loadMat(query), loadMat(train), matches, knn, mask); ASSERT_EQ(static_cast(queryDescCount), matches.size()); @@ -405,7 +408,8 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Single) CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Single) { - cv::cuda::BFMatcher_CUDA matcher(normCode); + cv::Ptr matcher = + cv::cuda::DescriptorMatcher::createBFMatcher(normCode); const int knn = 3; @@ -417,7 +421,7 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Single) } std::vector< std::vector > matches; - matcher.knnMatch(loadMat(query), loadMat(train), matches, knn, mask); + matcher->knnMatch(loadMat(query), loadMat(train), matches, knn, mask); ASSERT_EQ(static_cast(queryDescCount), matches.size()); @@ -444,15 +448,16 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Single) CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Collection) { - cv::cuda::BFMatcher_CUDA matcher(normCode); + cv::Ptr matcher = + cv::cuda::DescriptorMatcher::createBFMatcher(normCode); const int knn = 2; cv::cuda::GpuMat d_train(train); // make add() twice to test such case - matcher.add(std::vector(1, d_train.rowRange(0, train.rows / 2))); - matcher.add(std::vector(1, d_train.rowRange(train.rows / 2, train.rows))); + matcher->add(std::vector(1, d_train.rowRange(0, train.rows / 2))); + matcher->add(std::vector(1, d_train.rowRange(train.rows / 2, train.rows))); // prepare masks (make first nearest match illegal) std::vector masks(2); @@ -466,9 +471,9 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Collection) std::vector< std::vector > matches; if (useMask) - matcher.knnMatch(cv::cuda::GpuMat(query), matches, knn, masks); + matcher->knnMatch(cv::cuda::GpuMat(query), matches, knn, masks); else - matcher.knnMatch(cv::cuda::GpuMat(query), matches, knn); + matcher->knnMatch(cv::cuda::GpuMat(query), matches, knn); ASSERT_EQ(static_cast(queryDescCount), matches.size()); @@ -506,15 +511,16 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Collection) CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Collection) { - cv::cuda::BFMatcher_CUDA matcher(normCode); + cv::Ptr matcher = + cv::cuda::DescriptorMatcher::createBFMatcher(normCode); const int knn = 3; cv::cuda::GpuMat d_train(train); // make add() twice to test such case - matcher.add(std::vector(1, d_train.rowRange(0, train.rows / 2))); - matcher.add(std::vector(1, d_train.rowRange(train.rows / 2, train.rows))); + matcher->add(std::vector(1, d_train.rowRange(0, train.rows / 2))); + matcher->add(std::vector(1, d_train.rowRange(train.rows / 2, train.rows))); // prepare masks (make first nearest match illegal) std::vector masks(2); @@ -528,9 +534,9 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Collection) std::vector< std::vector > matches; if (useMask) - matcher.knnMatch(cv::cuda::GpuMat(query), matches, knn, masks); + matcher->knnMatch(cv::cuda::GpuMat(query), matches, knn, masks); else - matcher.knnMatch(cv::cuda::GpuMat(query), matches, knn); + matcher->knnMatch(cv::cuda::GpuMat(query), matches, knn); ASSERT_EQ(static_cast(queryDescCount), matches.size()); @@ -568,7 +574,8 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Collection) CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Single) { - cv::cuda::BFMatcher_CUDA matcher(normCode); + cv::Ptr matcher = + cv::cuda::DescriptorMatcher::createBFMatcher(normCode); const float radius = 1.f / countFactor; @@ -577,7 +584,7 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Single) try { std::vector< std::vector > matches; - matcher.radiusMatch(loadMat(query), loadMat(train), matches, radius); + matcher->radiusMatch(loadMat(query), loadMat(train), matches, radius); } catch (const cv::Exception& e) { @@ -594,7 +601,7 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Single) } std::vector< std::vector > matches; - matcher.radiusMatch(loadMat(query), loadMat(train), matches, radius, mask); + matcher->radiusMatch(loadMat(query), loadMat(train), matches, radius, mask); ASSERT_EQ(static_cast(queryDescCount), matches.size()); @@ -617,7 +624,8 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Single) CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Collection) { - cv::cuda::BFMatcher_CUDA matcher(normCode); + cv::Ptr matcher = + cv::cuda::DescriptorMatcher::createBFMatcher(normCode); const int n = 3; const float radius = 1.f / countFactor * n; @@ -625,8 +633,8 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Collection) cv::cuda::GpuMat d_train(train); // make add() twice to test such case - matcher.add(std::vector(1, d_train.rowRange(0, train.rows / 2))); - matcher.add(std::vector(1, d_train.rowRange(train.rows / 2, train.rows))); + matcher->add(std::vector(1, d_train.rowRange(0, train.rows / 2))); + matcher->add(std::vector(1, d_train.rowRange(train.rows / 2, train.rows))); // prepare masks (make first nearest match illegal) std::vector masks(2); @@ -642,7 +650,7 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Collection) try { std::vector< std::vector > matches; - matcher.radiusMatch(cv::cuda::GpuMat(query), matches, radius, masks); + matcher->radiusMatch(cv::cuda::GpuMat(query), matches, radius, masks); } catch (const cv::Exception& e) { @@ -654,9 +662,9 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Collection) std::vector< std::vector > matches; if (useMask) - matcher.radiusMatch(cv::cuda::GpuMat(query), matches, radius, masks); + matcher->radiusMatch(cv::cuda::GpuMat(query), matches, radius, masks); else - matcher.radiusMatch(cv::cuda::GpuMat(query), matches, radius); + matcher->radiusMatch(cv::cuda::GpuMat(query), matches, radius); ASSERT_EQ(static_cast(queryDescCount), matches.size()); diff --git a/modules/stitching/src/matchers.cpp b/modules/stitching/src/matchers.cpp index 49ee0f4..ee05268 100644 --- a/modules/stitching/src/matchers.cpp +++ b/modules/stitching/src/matchers.cpp @@ -154,7 +154,7 @@ void CpuMatcher::match(const ImageFeatures &features1, const ImageFeatures &feat matches_info.matches.clear(); - Ptr matcher; + Ptr matcher; #if 0 // TODO check this if (ocl::useOpenCL()) { @@ -220,13 +220,13 @@ void GpuMatcher::match(const ImageFeatures &features1, const ImageFeatures &feat descriptors1_.upload(features1.descriptors); descriptors2_.upload(features2.descriptors); - BFMatcher_CUDA matcher(NORM_L2); + Ptr matcher = cuda::DescriptorMatcher::createBFMatcher(NORM_L2); + MatchesSet matches; // Find 1->2 matches pair_matches.clear(); - matcher.knnMatchSingle(descriptors1_, descriptors2_, train_idx_, distance_, all_dist_, 2); - matcher.knnMatchDownload(train_idx_, distance_, pair_matches); + matcher->knnMatch(descriptors1_, descriptors2_, pair_matches, 2); for (size_t i = 0; i < pair_matches.size(); ++i) { if (pair_matches[i].size() < 2) @@ -242,8 +242,7 @@ void GpuMatcher::match(const ImageFeatures &features1, const ImageFeatures &feat // Find 2->1 matches pair_matches.clear(); - matcher.knnMatchSingle(descriptors2_, descriptors1_, train_idx_, distance_, all_dist_, 2); - matcher.knnMatchDownload(train_idx_, distance_, pair_matches); + matcher->knnMatch(descriptors2_, descriptors1_, pair_matches, 2); for (size_t i = 0; i < pair_matches.size(); ++i) { if (pair_matches[i].size() < 2) diff --git a/samples/gpu/performance/tests.cpp b/samples/gpu/performance/tests.cpp index 0d083e5..0909428 100644 --- a/samples/gpu/performance/tests.cpp +++ b/samples/gpu/performance/tests.cpp @@ -379,14 +379,14 @@ TEST(BruteForceMatcher) // Init CUDA matcher - cuda::BFMatcher_CUDA d_matcher(NORM_L2); + Ptr d_matcher = cuda::DescriptorMatcher::createBFMatcher(NORM_L2); cuda::GpuMat d_query(query); cuda::GpuMat d_train(train); // Output vector< vector > matches(2); - cuda::GpuMat d_trainIdx, d_distance, d_allDist, d_nMatches; + cuda::GpuMat d_matches; SUBTEST << "match"; @@ -396,10 +396,10 @@ TEST(BruteForceMatcher) matcher.match(query, train, matches[0]); CPU_OFF; - d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance); + d_matcher->matchAsync(d_query, d_train, d_matches); CUDA_ON; - d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance); + d_matcher->matchAsync(d_query, d_train, d_matches); CUDA_OFF; SUBTEST << "knnMatch"; @@ -410,10 +410,10 @@ TEST(BruteForceMatcher) matcher.knnMatch(query, train, matches, 2); CPU_OFF; - d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, 2); + d_matcher->knnMatchAsync(d_query, d_train, d_matches, 2); CUDA_ON; - d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, 2); + d_matcher->knnMatchAsync(d_query, d_train, d_matches, 2); CUDA_OFF; SUBTEST << "radiusMatch"; @@ -426,12 +426,10 @@ TEST(BruteForceMatcher) matcher.radiusMatch(query, train, matches, max_distance); CPU_OFF; - d_trainIdx.release(); - - d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, max_distance); + d_matcher->radiusMatchAsync(d_query, d_train, d_matches, max_distance); CUDA_ON; - d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, max_distance); + d_matcher->radiusMatchAsync(d_query, d_train, d_matches, max_distance); CUDA_OFF; }