From: Vladislav Vinogradov Date: Tue, 27 Mar 2012 07:33:39 +0000 (+0000) Subject: added assertion on device features (global atomics) into gpu tests X-Git-Tag: accepted/2.0/20130307.220821~999 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=bd13e9479b223f234f5e74c668778a3a685a46db;p=profile%2Fivi%2Fopencv.git added assertion on device features (global atomics) into gpu tests --- diff --git a/modules/gpu/src/brute_force_matcher.cpp b/modules/gpu/src/brute_force_matcher.cpp index 7f11282..8270dfe 100644 --- a/modules/gpu/src/brute_force_matcher.cpp +++ b/modules/gpu/src/brute_force_matcher.cpp @@ -82,76 +82,76 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat&, vector< vec #else /* !defined (HAVE_CUDA) */ -namespace cv { namespace gpu { namespace device +namespace cv { namespace gpu { namespace device { namespace bf_match { - template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask, - const DevMem2Di& trainIdx, const DevMem2Df& distance, + template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask, + const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); - template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask, - const DevMem2Di& trainIdx, const DevMem2Df& distance, + template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask, + const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); - template void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask, - const DevMem2Di& trainIdx, const DevMem2Df& distance, + template void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask, + const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); - template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, - const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, + template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, + const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); - template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, - const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, + template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, + const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); - template void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, + template void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); } namespace bf_knnmatch { - template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask, - const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, + template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask, + const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream); - template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask, - const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, + template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask, + const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream); - template void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask, - const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, + template void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask, + const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream); - template void match2L1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, - const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, + template void match2L1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, + const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream); - template void match2L2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, - const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, + template void match2L2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, + const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream); - template void match2Hamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, - const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, + template void match2Hamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, + const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream); } - namespace bf_radius_match + namespace bf_radius_match { - template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask, - const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, + template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask, + const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); - template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask, - const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, + template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask, + const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); - template void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask, - const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, + template void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask, + const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); - template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, - const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, + template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, + const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); - template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, - const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, + template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, + const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); - template void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, - const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, + template void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, + const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); } }}} @@ -192,7 +192,7 @@ bool cv::gpu::BruteForceMatcher_GPU_base::isMaskSupported() const // Match void cv::gpu::BruteForceMatcher_GPU_base::matchSingle(const GpuMat& query, const GpuMat& train, - GpuMat& trainIdx, GpuMat& distance, + GpuMat& trainIdx, GpuMat& distance, const GpuMat& mask, Stream& stream) { if (query.empty() || train.empty()) @@ -200,25 +200,25 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchSingle(const GpuMat& query, const using namespace ::cv::gpu::device::bf_match; - typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask, + typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); static const caller_t callers[3][6] = { { - matchL1_gpu, 0/*matchL1_gpu*/, - matchL1_gpu, matchL1_gpu, + matchL1_gpu, 0/*matchL1_gpu*/, + matchL1_gpu, matchL1_gpu, matchL1_gpu, matchL1_gpu }, { - 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, - 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, + 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, + 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, matchL2_gpu }, { - matchHamming_gpu, 0/*matchHamming_gpu*/, - matchHamming_gpu, 0/*matchHamming_gpu*/, + matchHamming_gpu, 0/*matchHamming_gpu*/, + matchHamming_gpu, 0/*matchHamming_gpu*/, matchHamming_gpu, 0/*matchHamming_gpu*/ } }; @@ -334,7 +334,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::makeGpuCollection(GpuMat& trainCollect } void cv::gpu::BruteForceMatcher_GPU_base::matchCollection(const GpuMat& query, const GpuMat& trainCollection, - GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, + GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, const GpuMat& masks, Stream& stream) { if (query.empty() || trainCollection.empty()) @@ -342,8 +342,8 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchCollection(const GpuMat& query, c using namespace ::cv::gpu::device::bf_match; - typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, - const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, + typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, + const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); static const caller_t callers[3][6] = @@ -453,25 +453,25 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatchSingle(const GpuMat& query, co using namespace ::cv::gpu::device::bf_knnmatch; - typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask, - const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, + typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask, + const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream); static const caller_t callers[3][6] = { { - matchL1_gpu, 0/*matchL1_gpu*/, - matchL1_gpu, matchL1_gpu, + matchL1_gpu, 0/*matchL1_gpu*/, + matchL1_gpu, matchL1_gpu, matchL1_gpu, matchL1_gpu }, { - 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, - 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, + 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, + 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, matchL2_gpu }, { - matchHamming_gpu, 0/*matchHamming_gpu*/, - matchHamming_gpu, 0/*matchHamming_gpu*/, + matchHamming_gpu, 0/*matchHamming_gpu*/, + matchHamming_gpu, 0/*matchHamming_gpu*/, matchHamming_gpu, 0/*matchHamming_gpu*/ } }; @@ -501,7 +501,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatchSingle(const GpuMat& query, co caller_t func = callers[distType][query.depth()]; CV_Assert(func != 0); - + DeviceInfo info; int cc = info.majorVersion() * 10 + info.minorVersion(); @@ -520,7 +520,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatchDownload(const GpuMat& trainId knnMatchConvert(trainIdxCPU, distanceCPU, matches, compactResult); } -void cv::gpu::BruteForceMatcher_GPU_base::knnMatchConvert(const Mat& trainIdx, const Mat& distance, +void cv::gpu::BruteForceMatcher_GPU_base::knnMatchConvert(const Mat& trainIdx, const Mat& distance, vector< vector >& matches, bool compactResult) { if (trainIdx.empty() || distance.empty()) @@ -536,7 +536,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatchConvert(const Mat& trainIdx, c matches.clear(); matches.reserve(nQuery); - + const int* trainIdx_ptr = trainIdx.ptr(); const float* distance_ptr = distance.ptr(); @@ -582,25 +582,25 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch2Collection(const GpuMat& quer using namespace ::cv::gpu::device::bf_knnmatch; - typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, - const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, + typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, + const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream); static const caller_t callers[3][6] = { { - match2L1_gpu, 0/*match2L1_gpu*/, - match2L1_gpu, match2L1_gpu, + match2L1_gpu, 0/*match2L1_gpu*/, + match2L1_gpu, match2L1_gpu, match2L1_gpu, match2L1_gpu }, { - 0/*match2L2_gpu*/, 0/*match2L2_gpu*/, - 0/*match2L2_gpu*/, 0/*match2L2_gpu*/, + 0/*match2L2_gpu*/, 0/*match2L2_gpu*/, + 0/*match2L2_gpu*/, 0/*match2L2_gpu*/, 0/*match2L2_gpu*/, match2L2_gpu }, { - match2Hamming_gpu, 0/*match2Hamming_gpu*/, - match2Hamming_gpu, 0/*match2Hamming_gpu*/, + match2Hamming_gpu, 0/*match2Hamming_gpu*/, + match2Hamming_gpu, 0/*match2Hamming_gpu*/, match2Hamming_gpu, 0/*match2Hamming_gpu*/ } }; @@ -620,7 +620,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch2Collection(const GpuMat& quer caller_t func = callers[distType][query.depth()]; CV_Assert(func != 0); - + DeviceInfo info; int cc = info.majorVersion() * 10 + info.minorVersion(); @@ -654,7 +654,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch2Convert(const Mat& trainIdx, matches.clear(); matches.reserve(nQuery); - + const int* trainIdx_ptr = trainIdx.ptr(); const int* imgIdx_ptr = imgIdx.ptr(); const float* distance_ptr = distance.ptr(); @@ -755,33 +755,33 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat& query, vector< // RadiusMatch void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchSingle(const GpuMat& query, const GpuMat& train, - GpuMat& trainIdx, GpuMat& distance, GpuMat& nMatches, float maxDistance, + GpuMat& trainIdx, GpuMat& distance, GpuMat& nMatches, float maxDistance, const GpuMat& mask, Stream& stream) { if (query.empty() || train.empty()) return; - using namespace ::cv::gpu::device::bf_radius_match; + using namespace cv::gpu::device::bf_radius_match; - typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask, - const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, + typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask, + const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); static const caller_t callers[3][6] = { { - matchL1_gpu, 0/*matchL1_gpu*/, - matchL1_gpu, matchL1_gpu, + matchL1_gpu, 0/*matchL1_gpu*/, + matchL1_gpu, matchL1_gpu, matchL1_gpu, matchL1_gpu }, { - 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, - 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, + 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, + 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, matchL2_gpu }, { - matchHamming_gpu, 0/*matchHamming_gpu*/, - matchHamming_gpu, 0/*matchHamming_gpu*/, + matchHamming_gpu, 0/*matchHamming_gpu*/, + matchHamming_gpu, 0/*matchHamming_gpu*/, matchHamming_gpu, 0/*matchHamming_gpu*/ } }; @@ -789,7 +789,8 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchSingle(const GpuMat& query, DeviceInfo info; int cc = info.majorVersion() * 10 + info.minorVersion(); - CV_Assert(TargetArchs::builtWith(GLOBAL_ATOMICS) && info.supports(GLOBAL_ATOMICS)); + if (!TargetArchs::builtWith(GLOBAL_ATOMICS) || !DeviceInfo().supports(GLOBAL_ATOMICS)) + CV_Error(CV_StsNotImplemented, "The device doesn't support global atomics"); const int nQuery = query.rows; const int nTrain = train.rows; @@ -804,19 +805,19 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchSingle(const GpuMat& query, ensureSizeIsEnough(nQuery, std::max((nTrain / 100), 10), CV_32SC1, trainIdx); ensureSizeIsEnough(nQuery, std::max((nTrain / 100), 10), CV_32FC1, distance); } - + if (stream) stream.enqueueMemSet(nMatches, Scalar::all(0)); else nMatches.setTo(Scalar::all(0)); caller_t func = callers[distType][query.depth()]; - CV_Assert(func != 0); + CV_Assert(func != 0); func(query, train, maxDistance, mask, trainIdx, distance, nMatches, cc, StreamAccessor::getStream(stream)); } -void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& distance, const GpuMat& nMatches, +void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& distance, const GpuMat& nMatches, vector< vector >& matches, bool compactResult) { if (trainIdx.empty() || distance.empty() || nMatches.empty()) @@ -886,33 +887,33 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat& query, const radiusMatchDownload(trainIdx, distance, nMatches, matches, compactResult); } -void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchCollection(const GpuMat& query, GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, GpuMat& nMatches, +void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchCollection(const GpuMat& query, GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, GpuMat& nMatches, float maxDistance, const vector& masks, Stream& stream) { if (query.empty() || empty()) return; - using namespace ::cv::gpu::device::bf_radius_match; + using namespace cv::gpu::device::bf_radius_match; - typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, - const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, + typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, + const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); static const caller_t callers[3][6] = { { - matchL1_gpu, 0/*matchL1_gpu*/, - matchL1_gpu, matchL1_gpu, + matchL1_gpu, 0/*matchL1_gpu*/, + matchL1_gpu, matchL1_gpu, matchL1_gpu, matchL1_gpu }, { - 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, - 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, + 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, + 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, matchL2_gpu }, { - matchHamming_gpu, 0/*matchHamming_gpu*/, - matchHamming_gpu, 0/*matchHamming_gpu*/, + matchHamming_gpu, 0/*matchHamming_gpu*/, + matchHamming_gpu, 0/*matchHamming_gpu*/, matchHamming_gpu, 0/*matchHamming_gpu*/ } }; @@ -920,7 +921,8 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchCollection(const GpuMat& qu DeviceInfo info; int cc = info.majorVersion() * 10 + info.minorVersion(); - CV_Assert(TargetArchs::builtWith(GLOBAL_ATOMICS) && info.supports(GLOBAL_ATOMICS)); + if (!TargetArchs::builtWith(GLOBAL_ATOMICS) || !DeviceInfo().supports(GLOBAL_ATOMICS)) + CV_Error(CV_StsNotImplemented, "The device doesn't support global atomics"); const int nQuery = query.rows; @@ -934,7 +936,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchCollection(const GpuMat& qu ensureSizeIsEnough(nQuery, std::max((nQuery / 100), 10), CV_32SC1, imgIdx); ensureSizeIsEnough(nQuery, std::max((nQuery / 100), 10), CV_32FC1, distance); } - + if (stream) stream.enqueueMemSet(nMatches, Scalar::all(0)); else @@ -946,7 +948,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchCollection(const GpuMat& qu vector trains_(trainDescCollection.begin(), trainDescCollection.end()); vector masks_(masks.begin(), masks.end()); - func(query, &trains_[0], static_cast(trains_.size()), maxDistance, masks_.size() == 0 ? 0 : &masks_[0], + func(query, &trains_[0], static_cast(trains_.size()), maxDistance, masks_.size() == 0 ? 0 : &masks_[0], trainIdx, imgIdx, distance, nMatches, cc, StreamAccessor::getStream(stream)); } diff --git a/modules/gpu/src/cuda/surf.cu b/modules/gpu/src/cuda/surf.cu index 76181bc..ac7b6c2 100644 --- a/modules/gpu/src/cuda/surf.cu +++ b/modules/gpu/src/cuda/surf.cu @@ -40,7 +40,7 @@ // // Copyright (c) 2010, Paul Furgale, Chi Hay Tong // -// The original code was written by Paul Furgale and Chi Hay Tong +// The original code was written by Paul Furgale and Chi Hay Tong // and later optimized and prepared for integration into OpenCV by Itseez. // //M*/ @@ -52,9 +52,9 @@ #include "opencv2/gpu/device/functional.hpp" #include "opencv2/gpu/device/filters.hpp" -namespace cv { namespace gpu { namespace device +namespace cv { namespace gpu { namespace device { - namespace surf + namespace surf { //////////////////////////////////////////////////////////////////////// // Global parameters @@ -123,7 +123,7 @@ namespace cv { namespace gpu { namespace device #endif float ratio = (float)newSize / oldSize; - + real_t d = 0; #pragma unroll @@ -225,7 +225,7 @@ namespace cv { namespace gpu { namespace device static __device__ bool check(int sum_i, int sum_j, int size) { float ratio = (float)size / 9.0f; - + float d = 0; int dx1 = __float2int_rn(ratio * c_DM[0]); @@ -423,12 +423,12 @@ namespace cv { namespace gpu { namespace device if (::fabs(x[0]) <= 1.f && ::fabs(x[1]) <= 1.f && ::fabs(x[2]) <= 1.f) { // if the step is within the interpolation region, perform it - + const int size = calcSize(c_octave, maxPos.z); const int sum_i = (maxPos.y - ((size >> 1) >> c_octave)) << c_octave; const int sum_j = (maxPos.x - ((size >> 1) >> c_octave)) << c_octave; - + const float center_i = sum_i + (float)(size - 1) / 2; const float center_j = sum_j + (float)(size - 1) / 2; @@ -471,8 +471,8 @@ namespace cv { namespace gpu { namespace device #endif } - void icvInterpolateKeypoint_gpu(const PtrStepf& det, const int4* maxPosBuffer, unsigned int maxCounter, - float* featureX, float* featureY, int* featureLaplacian, int* featureOctave, float* featureSize, float* featureHessian, + void icvInterpolateKeypoint_gpu(const PtrStepf& det, const int4* maxPosBuffer, unsigned int maxCounter, + float* featureX, float* featureY, int* featureLaplacian, int* featureOctave, float* featureSize, float* featureHessian, unsigned int* featureCounter) { dim3 threads; @@ -509,7 +509,8 @@ namespace cv { namespace gpu { namespace device __shared__ float s_Y[128]; __shared__ float s_angle[128]; - __shared__ float s_sum[32 * 4]; + __shared__ float s_sumx[32 * 4]; + __shared__ float s_sumy[32 * 4]; /* The sampling intervals and wavelet sized for selecting an orientation and building the keypoint descriptor are defined relative to 's' */ @@ -522,126 +523,109 @@ namespace cv { namespace gpu { namespace device const int grad_wav_size = 2 * __float2int_rn(2.0f * s); // check when grad_wav_size is too big - if ((c_img_rows + 1) >= grad_wav_size && (c_img_cols + 1) >= grad_wav_size) - { - // Calc X, Y, angle and store it to shared memory - const int tid = threadIdx.y * blockDim.x + threadIdx.x; - - float X = 0.0f, Y = 0.0f, angle = 0.0f; + if ((c_img_rows + 1) < grad_wav_size || (c_img_cols + 1) < grad_wav_size) + return; - if (tid < ORI_SAMPLES) - { - const float margin = (float)(grad_wav_size - 1) / 2.0f; - const int x = __float2int_rn(featureX[blockIdx.x] + c_aptX[tid] * s - margin); - const int y = __float2int_rn(featureY[blockIdx.x] + c_aptY[tid] * s - margin); + // Calc X, Y, angle and store it to shared memory + const int tid = threadIdx.y * blockDim.x + threadIdx.x; - if ((unsigned)y < (unsigned)((c_img_rows + 1) - grad_wav_size) && (unsigned)x < (unsigned)((c_img_cols + 1) - grad_wav_size)) - { - X = c_aptW[tid] * icvCalcHaarPatternSum<2>(c_NX, 4, grad_wav_size, y, x); - Y = c_aptW[tid] * icvCalcHaarPatternSum<2>(c_NY, 4, grad_wav_size, y, x); - - angle = atan2f(Y, X); - if (angle < 0) - angle += 2.0f * CV_PI_F; - angle *= 180.0f / CV_PI_F; - } - } - s_X[tid] = X; - s_Y[tid] = Y; - s_angle[tid] = angle; - __syncthreads(); + float X = 0.0f, Y = 0.0f, angle = 0.0f; - float bestx = 0, besty = 0, best_mod = 0; + if (tid < ORI_SAMPLES) + { + const float margin = (float)(grad_wav_size - 1) / 2.0f; + const int x = __float2int_rn(featureX[blockIdx.x] + c_aptX[tid] * s - margin); + const int y = __float2int_rn(featureY[blockIdx.x] + c_aptY[tid] * s - margin); - #pragma unroll - for (int i = 0; i < 18; ++i) + if (y >= 0 && y < (c_img_rows + 1) - grad_wav_size && + x >= 0 && x < (c_img_cols + 1) - grad_wav_size) { - const int dir = (i * 4 + threadIdx.y) * ORI_SEARCH_INC; + X = c_aptW[tid] * icvCalcHaarPatternSum<2>(c_NX, 4, grad_wav_size, y, x); + Y = c_aptW[tid] * icvCalcHaarPatternSum<2>(c_NY, 4, grad_wav_size, y, x); - float sumx = 0.0f, sumy = 0.0f; - int d = ::abs(__float2int_rn(s_angle[threadIdx.x]) - dir); - if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2) - { - sumx = s_X[threadIdx.x]; - sumy = s_Y[threadIdx.x]; - } - d = ::abs(__float2int_rn(s_angle[threadIdx.x + 32]) - dir); - if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2) - { - sumx += s_X[threadIdx.x + 32]; - sumy += s_Y[threadIdx.x + 32]; - } - d = ::abs(__float2int_rn(s_angle[threadIdx.x + 64]) - dir); - if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2) - { - sumx += s_X[threadIdx.x + 64]; - sumy += s_Y[threadIdx.x + 64]; - } - d = ::abs(__float2int_rn(s_angle[threadIdx.x + 96]) - dir); - if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2) - { - sumx += s_X[threadIdx.x + 96]; - sumy += s_Y[threadIdx.x + 96]; - } - - float* s_sum_row = s_sum + threadIdx.y * 32; + angle = atan2f(Y, X); + if (angle < 0) + angle += 2.0f * CV_PI_F; + angle *= 180.0f / CV_PI_F; + } + } + s_X[tid] = X; + s_Y[tid] = Y; + s_angle[tid] = angle; + __syncthreads(); - device::reduce<32>(s_sum_row, sumx, threadIdx.x, plus()); - device::reduce<32>(s_sum_row, sumy, threadIdx.x, plus()); + float bestx = 0, besty = 0, best_mod = 0; - const float temp_mod = sumx * sumx + sumy * sumy; - if (temp_mod > best_mod) - { - best_mod = temp_mod; - bestx = sumx; - besty = sumy; - } + #pragma unroll + for (int i = 0; i < 18; ++i) + { + const int dir = (i * 4 + threadIdx.y) * ORI_SEARCH_INC; - __syncthreads(); + float sumx = 0.0f, sumy = 0.0f; + int d = ::abs(__float2int_rn(s_angle[threadIdx.x]) - dir); + if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2) + { + sumx = s_X[threadIdx.x]; + sumy = s_Y[threadIdx.x]; + } + d = ::abs(__float2int_rn(s_angle[threadIdx.x + 32]) - dir); + if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2) + { + sumx += s_X[threadIdx.x + 32]; + sumy += s_Y[threadIdx.x + 32]; + } + d = ::abs(__float2int_rn(s_angle[threadIdx.x + 64]) - dir); + if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2) + { + sumx += s_X[threadIdx.x + 64]; + sumy += s_Y[threadIdx.x + 64]; + } + d = ::abs(__float2int_rn(s_angle[threadIdx.x + 96]) - dir); + if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2) + { + sumx += s_X[threadIdx.x + 96]; + sumy += s_Y[threadIdx.x + 96]; } - if (threadIdx.x == 0) + device::reduce<32>(s_sumx + threadIdx.y * 32, sumx, threadIdx.x, plus()); + device::reduce<32>(s_sumy + threadIdx.y * 32, sumy, threadIdx.x, plus()); + + const float temp_mod = sumx * sumx + sumy * sumy; + if (temp_mod > best_mod) { - s_X[threadIdx.y] = bestx; - s_Y[threadIdx.y] = besty; - s_angle[threadIdx.y] = best_mod; + best_mod = temp_mod; + bestx = sumx; + besty = sumy; } + __syncthreads(); + } - if (threadIdx.x < 2 && threadIdx.y == 0) - { - volatile float* v_x = s_X; - volatile float* v_y = s_Y; - volatile float* v_mod = s_angle; + if (threadIdx.x == 0) + { + s_X[threadIdx.y] = bestx; + s_Y[threadIdx.y] = besty; + s_angle[threadIdx.y] = best_mod; + } + __syncthreads(); - bestx = v_x[threadIdx.x]; - besty = v_y[threadIdx.x]; - best_mod = v_mod[threadIdx.x]; + if (threadIdx.x == 0 && threadIdx.y == 0) + { + int bestIdx = 0; - float temp_mod = v_mod[threadIdx.x + 2]; - if (temp_mod > best_mod) - { - v_x[threadIdx.x] = bestx = v_x[threadIdx.x + 2]; - v_y[threadIdx.x] = besty = v_y[threadIdx.x + 2]; - v_mod[threadIdx.x] = best_mod = temp_mod; - } - temp_mod = v_mod[threadIdx.x + 1]; - if (temp_mod > best_mod) - { - v_x[threadIdx.x] = bestx = v_x[threadIdx.x + 1]; - v_y[threadIdx.x] = besty = v_y[threadIdx.x + 1]; - } - } + if (s_angle[1] > s_angle[bestIdx]) + bestIdx = 1; + if (s_angle[2] > s_angle[bestIdx]) + bestIdx = 2; + if (s_angle[3] > s_angle[bestIdx]) + bestIdx = 3; - if (threadIdx.x == 0 && threadIdx.y == 0 && best_mod != 0) - { - float kp_dir = atan2f(besty, bestx); - if (kp_dir < 0) - kp_dir += 2.0f * CV_PI_F; - kp_dir *= 180.0f / CV_PI_F; + float kp_dir = atan2f(s_Y[bestIdx], s_X[bestIdx]); + if (kp_dir < 0) + kp_dir += 2.0f * CV_PI_F; + kp_dir *= 180.0f / CV_PI_F; - featureDir[blockIdx.x] = kp_dir; - } + featureDir[blockIdx.x] = kp_dir; } } @@ -649,7 +633,7 @@ namespace cv { namespace gpu { namespace device #undef ORI_WIN #undef ORI_SAMPLES - void icvCalcOrientation_gpu(const float* featureX, const float* featureY, const float* featureSize, float* featureDir, int nFeatures) + void icvCalcOrientation_gpu(const float* featureX, const float* featureY, const float* featureSize, float* featureDir, int nFeatures) { dim3 threads; threads.x = 32; @@ -669,27 +653,27 @@ namespace cv { namespace gpu { namespace device #define PATCH_SZ 20 - __constant__ float c_DW[PATCH_SZ * PATCH_SZ] = + __constant__ float c_DW[PATCH_SZ * PATCH_SZ] = { - 3.695352233989979e-006f, 8.444558261544444e-006f, 1.760426494001877e-005f, 3.34794785885606e-005f, 5.808438800158911e-005f, 9.193058212986216e-005f, 0.0001327334757661447f, 0.0001748319627949968f, 0.0002100782439811155f, 0.0002302826324012131f, 0.0002302826324012131f, 0.0002100782439811155f, 0.0001748319627949968f, 0.0001327334757661447f, 9.193058212986216e-005f, 5.808438800158911e-005f, 3.34794785885606e-005f, 1.760426494001877e-005f, 8.444558261544444e-006f, 3.695352233989979e-006f, - 8.444558261544444e-006f, 1.929736572492402e-005f, 4.022897701361217e-005f, 7.650675252079964e-005f, 0.0001327334903180599f, 0.0002100782585330308f, 0.0003033203829545528f, 0.0003995231236331165f, 0.0004800673632416874f, 0.0005262381164357066f, 0.0005262381164357066f, 0.0004800673632416874f, 0.0003995231236331165f, 0.0003033203829545528f, 0.0002100782585330308f, 0.0001327334903180599f, 7.650675252079964e-005f, 4.022897701361217e-005f, 1.929736572492402e-005f, 8.444558261544444e-006f, - 1.760426494001877e-005f, 4.022897701361217e-005f, 8.386484114453197e-005f, 0.0001594926579855382f, 0.0002767078403849155f, 0.0004379475140012801f, 0.0006323281559161842f, 0.0008328808471560478f, 0.001000790391117334f, 0.001097041997127235f, 0.001097041997127235f, 0.001000790391117334f, 0.0008328808471560478f, 0.0006323281559161842f, 0.0004379475140012801f, 0.0002767078403849155f, 0.0001594926579855382f, 8.386484114453197e-005f, 4.022897701361217e-005f, 1.760426494001877e-005f, - 3.34794785885606e-005f, 7.650675252079964e-005f, 0.0001594926579855382f, 0.0003033203247468919f, 0.0005262380582280457f, 0.0008328807889483869f, 0.001202550483867526f, 0.001583957928232849f, 0.001903285388834775f, 0.002086334861814976f, 0.002086334861814976f, 0.001903285388834775f, 0.001583957928232849f, 0.001202550483867526f, 0.0008328807889483869f, 0.0005262380582280457f, 0.0003033203247468919f, 0.0001594926579855382f, 7.650675252079964e-005f, 3.34794785885606e-005f, - 5.808438800158911e-005f, 0.0001327334903180599f, 0.0002767078403849155f, 0.0005262380582280457f, 0.0009129836107604206f, 0.001444985857233405f, 0.002086335094645619f, 0.002748048631474376f, 0.00330205773934722f, 0.003619635012000799f, 0.003619635012000799f, 0.00330205773934722f, 0.002748048631474376f, 0.002086335094645619f, 0.001444985857233405f, 0.0009129836107604206f, 0.0005262380582280457f, 0.0002767078403849155f, 0.0001327334903180599f, 5.808438800158911e-005f, - 9.193058212986216e-005f, 0.0002100782585330308f, 0.0004379475140012801f, 0.0008328807889483869f, 0.001444985857233405f, 0.002286989474669099f, 0.00330205773934722f, 0.004349356517195702f, 0.00522619066759944f, 0.005728822201490402f, 0.005728822201490402f, 0.00522619066759944f, 0.004349356517195702f, 0.00330205773934722f, 0.002286989474669099f, 0.001444985857233405f, 0.0008328807889483869f, 0.0004379475140012801f, 0.0002100782585330308f, 9.193058212986216e-005f, - 0.0001327334757661447f, 0.0003033203829545528f, 0.0006323281559161842f, 0.001202550483867526f, 0.002086335094645619f, 0.00330205773934722f, 0.004767658654600382f, 0.006279794964939356f, 0.007545807864516974f, 0.008271530270576477f, 0.008271530270576477f, 0.007545807864516974f, 0.006279794964939356f, 0.004767658654600382f, 0.00330205773934722f, 0.002086335094645619f, 0.001202550483867526f, 0.0006323281559161842f, 0.0003033203829545528f, 0.0001327334757661447f, - 0.0001748319627949968f, 0.0003995231236331165f, 0.0008328808471560478f, 0.001583957928232849f, 0.002748048631474376f, 0.004349356517195702f, 0.006279794964939356f, 0.008271529339253902f, 0.009939077310264111f, 0.01089497376233339f, 0.01089497376233339f, 0.009939077310264111f, 0.008271529339253902f, 0.006279794964939356f, 0.004349356517195702f, 0.002748048631474376f, 0.001583957928232849f, 0.0008328808471560478f, 0.0003995231236331165f, 0.0001748319627949968f, - 0.0002100782439811155f, 0.0004800673632416874f, 0.001000790391117334f, 0.001903285388834775f, 0.00330205773934722f, 0.00522619066759944f, 0.007545807864516974f, 0.009939077310264111f, 0.01194280479103327f, 0.01309141051024199f, 0.01309141051024199f, 0.01194280479103327f, 0.009939077310264111f, 0.007545807864516974f, 0.00522619066759944f, 0.00330205773934722f, 0.001903285388834775f, 0.001000790391117334f, 0.0004800673632416874f, 0.0002100782439811155f, - 0.0002302826324012131f, 0.0005262381164357066f, 0.001097041997127235f, 0.002086334861814976f, 0.003619635012000799f, 0.005728822201490402f, 0.008271530270576477f, 0.01089497376233339f, 0.01309141051024199f, 0.01435048412531614f, 0.01435048412531614f, 0.01309141051024199f, 0.01089497376233339f, 0.008271530270576477f, 0.005728822201490402f, 0.003619635012000799f, 0.002086334861814976f, 0.001097041997127235f, 0.0005262381164357066f, 0.0002302826324012131f, - 0.0002302826324012131f, 0.0005262381164357066f, 0.001097041997127235f, 0.002086334861814976f, 0.003619635012000799f, 0.005728822201490402f, 0.008271530270576477f, 0.01089497376233339f, 0.01309141051024199f, 0.01435048412531614f, 0.01435048412531614f, 0.01309141051024199f, 0.01089497376233339f, 0.008271530270576477f, 0.005728822201490402f, 0.003619635012000799f, 0.002086334861814976f, 0.001097041997127235f, 0.0005262381164357066f, 0.0002302826324012131f, - 0.0002100782439811155f, 0.0004800673632416874f, 0.001000790391117334f, 0.001903285388834775f, 0.00330205773934722f, 0.00522619066759944f, 0.007545807864516974f, 0.009939077310264111f, 0.01194280479103327f, 0.01309141051024199f, 0.01309141051024199f, 0.01194280479103327f, 0.009939077310264111f, 0.007545807864516974f, 0.00522619066759944f, 0.00330205773934722f, 0.001903285388834775f, 0.001000790391117334f, 0.0004800673632416874f, 0.0002100782439811155f, - 0.0001748319627949968f, 0.0003995231236331165f, 0.0008328808471560478f, 0.001583957928232849f, 0.002748048631474376f, 0.004349356517195702f, 0.006279794964939356f, 0.008271529339253902f, 0.009939077310264111f, 0.01089497376233339f, 0.01089497376233339f, 0.009939077310264111f, 0.008271529339253902f, 0.006279794964939356f, 0.004349356517195702f, 0.002748048631474376f, 0.001583957928232849f, 0.0008328808471560478f, 0.0003995231236331165f, 0.0001748319627949968f, - 0.0001327334757661447f, 0.0003033203829545528f, 0.0006323281559161842f, 0.001202550483867526f, 0.002086335094645619f, 0.00330205773934722f, 0.004767658654600382f, 0.006279794964939356f, 0.007545807864516974f, 0.008271530270576477f, 0.008271530270576477f, 0.007545807864516974f, 0.006279794964939356f, 0.004767658654600382f, 0.00330205773934722f, 0.002086335094645619f, 0.001202550483867526f, 0.0006323281559161842f, 0.0003033203829545528f, 0.0001327334757661447f, - 9.193058212986216e-005f, 0.0002100782585330308f, 0.0004379475140012801f, 0.0008328807889483869f, 0.001444985857233405f, 0.002286989474669099f, 0.00330205773934722f, 0.004349356517195702f, 0.00522619066759944f, 0.005728822201490402f, 0.005728822201490402f, 0.00522619066759944f, 0.004349356517195702f, 0.00330205773934722f, 0.002286989474669099f, 0.001444985857233405f, 0.0008328807889483869f, 0.0004379475140012801f, 0.0002100782585330308f, 9.193058212986216e-005f, - 5.808438800158911e-005f, 0.0001327334903180599f, 0.0002767078403849155f, 0.0005262380582280457f, 0.0009129836107604206f, 0.001444985857233405f, 0.002086335094645619f, 0.002748048631474376f, 0.00330205773934722f, 0.003619635012000799f, 0.003619635012000799f, 0.00330205773934722f, 0.002748048631474376f, 0.002086335094645619f, 0.001444985857233405f, 0.0009129836107604206f, 0.0005262380582280457f, 0.0002767078403849155f, 0.0001327334903180599f, 5.808438800158911e-005f, - 3.34794785885606e-005f, 7.650675252079964e-005f, 0.0001594926579855382f, 0.0003033203247468919f, 0.0005262380582280457f, 0.0008328807889483869f, 0.001202550483867526f, 0.001583957928232849f, 0.001903285388834775f, 0.002086334861814976f, 0.002086334861814976f, 0.001903285388834775f, 0.001583957928232849f, 0.001202550483867526f, 0.0008328807889483869f, 0.0005262380582280457f, 0.0003033203247468919f, 0.0001594926579855382f, 7.650675252079964e-005f, 3.34794785885606e-005f, - 1.760426494001877e-005f, 4.022897701361217e-005f, 8.386484114453197e-005f, 0.0001594926579855382f, 0.0002767078403849155f, 0.0004379475140012801f, 0.0006323281559161842f, 0.0008328808471560478f, 0.001000790391117334f, 0.001097041997127235f, 0.001097041997127235f, 0.001000790391117334f, 0.0008328808471560478f, 0.0006323281559161842f, 0.0004379475140012801f, 0.0002767078403849155f, 0.0001594926579855382f, 8.386484114453197e-005f, 4.022897701361217e-005f, 1.760426494001877e-005f, - 8.444558261544444e-006f, 1.929736572492402e-005f, 4.022897701361217e-005f, 7.650675252079964e-005f, 0.0001327334903180599f, 0.0002100782585330308f, 0.0003033203829545528f, 0.0003995231236331165f, 0.0004800673632416874f, 0.0005262381164357066f, 0.0005262381164357066f, 0.0004800673632416874f, 0.0003995231236331165f, 0.0003033203829545528f, 0.0002100782585330308f, 0.0001327334903180599f, 7.650675252079964e-005f, 4.022897701361217e-005f, 1.929736572492402e-005f, 8.444558261544444e-006f, + 3.695352233989979e-006f, 8.444558261544444e-006f, 1.760426494001877e-005f, 3.34794785885606e-005f, 5.808438800158911e-005f, 9.193058212986216e-005f, 0.0001327334757661447f, 0.0001748319627949968f, 0.0002100782439811155f, 0.0002302826324012131f, 0.0002302826324012131f, 0.0002100782439811155f, 0.0001748319627949968f, 0.0001327334757661447f, 9.193058212986216e-005f, 5.808438800158911e-005f, 3.34794785885606e-005f, 1.760426494001877e-005f, 8.444558261544444e-006f, 3.695352233989979e-006f, + 8.444558261544444e-006f, 1.929736572492402e-005f, 4.022897701361217e-005f, 7.650675252079964e-005f, 0.0001327334903180599f, 0.0002100782585330308f, 0.0003033203829545528f, 0.0003995231236331165f, 0.0004800673632416874f, 0.0005262381164357066f, 0.0005262381164357066f, 0.0004800673632416874f, 0.0003995231236331165f, 0.0003033203829545528f, 0.0002100782585330308f, 0.0001327334903180599f, 7.650675252079964e-005f, 4.022897701361217e-005f, 1.929736572492402e-005f, 8.444558261544444e-006f, + 1.760426494001877e-005f, 4.022897701361217e-005f, 8.386484114453197e-005f, 0.0001594926579855382f, 0.0002767078403849155f, 0.0004379475140012801f, 0.0006323281559161842f, 0.0008328808471560478f, 0.001000790391117334f, 0.001097041997127235f, 0.001097041997127235f, 0.001000790391117334f, 0.0008328808471560478f, 0.0006323281559161842f, 0.0004379475140012801f, 0.0002767078403849155f, 0.0001594926579855382f, 8.386484114453197e-005f, 4.022897701361217e-005f, 1.760426494001877e-005f, + 3.34794785885606e-005f, 7.650675252079964e-005f, 0.0001594926579855382f, 0.0003033203247468919f, 0.0005262380582280457f, 0.0008328807889483869f, 0.001202550483867526f, 0.001583957928232849f, 0.001903285388834775f, 0.002086334861814976f, 0.002086334861814976f, 0.001903285388834775f, 0.001583957928232849f, 0.001202550483867526f, 0.0008328807889483869f, 0.0005262380582280457f, 0.0003033203247468919f, 0.0001594926579855382f, 7.650675252079964e-005f, 3.34794785885606e-005f, + 5.808438800158911e-005f, 0.0001327334903180599f, 0.0002767078403849155f, 0.0005262380582280457f, 0.0009129836107604206f, 0.001444985857233405f, 0.002086335094645619f, 0.002748048631474376f, 0.00330205773934722f, 0.003619635012000799f, 0.003619635012000799f, 0.00330205773934722f, 0.002748048631474376f, 0.002086335094645619f, 0.001444985857233405f, 0.0009129836107604206f, 0.0005262380582280457f, 0.0002767078403849155f, 0.0001327334903180599f, 5.808438800158911e-005f, + 9.193058212986216e-005f, 0.0002100782585330308f, 0.0004379475140012801f, 0.0008328807889483869f, 0.001444985857233405f, 0.002286989474669099f, 0.00330205773934722f, 0.004349356517195702f, 0.00522619066759944f, 0.005728822201490402f, 0.005728822201490402f, 0.00522619066759944f, 0.004349356517195702f, 0.00330205773934722f, 0.002286989474669099f, 0.001444985857233405f, 0.0008328807889483869f, 0.0004379475140012801f, 0.0002100782585330308f, 9.193058212986216e-005f, + 0.0001327334757661447f, 0.0003033203829545528f, 0.0006323281559161842f, 0.001202550483867526f, 0.002086335094645619f, 0.00330205773934722f, 0.004767658654600382f, 0.006279794964939356f, 0.007545807864516974f, 0.008271530270576477f, 0.008271530270576477f, 0.007545807864516974f, 0.006279794964939356f, 0.004767658654600382f, 0.00330205773934722f, 0.002086335094645619f, 0.001202550483867526f, 0.0006323281559161842f, 0.0003033203829545528f, 0.0001327334757661447f, + 0.0001748319627949968f, 0.0003995231236331165f, 0.0008328808471560478f, 0.001583957928232849f, 0.002748048631474376f, 0.004349356517195702f, 0.006279794964939356f, 0.008271529339253902f, 0.009939077310264111f, 0.01089497376233339f, 0.01089497376233339f, 0.009939077310264111f, 0.008271529339253902f, 0.006279794964939356f, 0.004349356517195702f, 0.002748048631474376f, 0.001583957928232849f, 0.0008328808471560478f, 0.0003995231236331165f, 0.0001748319627949968f, + 0.0002100782439811155f, 0.0004800673632416874f, 0.001000790391117334f, 0.001903285388834775f, 0.00330205773934722f, 0.00522619066759944f, 0.007545807864516974f, 0.009939077310264111f, 0.01194280479103327f, 0.01309141051024199f, 0.01309141051024199f, 0.01194280479103327f, 0.009939077310264111f, 0.007545807864516974f, 0.00522619066759944f, 0.00330205773934722f, 0.001903285388834775f, 0.001000790391117334f, 0.0004800673632416874f, 0.0002100782439811155f, + 0.0002302826324012131f, 0.0005262381164357066f, 0.001097041997127235f, 0.002086334861814976f, 0.003619635012000799f, 0.005728822201490402f, 0.008271530270576477f, 0.01089497376233339f, 0.01309141051024199f, 0.01435048412531614f, 0.01435048412531614f, 0.01309141051024199f, 0.01089497376233339f, 0.008271530270576477f, 0.005728822201490402f, 0.003619635012000799f, 0.002086334861814976f, 0.001097041997127235f, 0.0005262381164357066f, 0.0002302826324012131f, + 0.0002302826324012131f, 0.0005262381164357066f, 0.001097041997127235f, 0.002086334861814976f, 0.003619635012000799f, 0.005728822201490402f, 0.008271530270576477f, 0.01089497376233339f, 0.01309141051024199f, 0.01435048412531614f, 0.01435048412531614f, 0.01309141051024199f, 0.01089497376233339f, 0.008271530270576477f, 0.005728822201490402f, 0.003619635012000799f, 0.002086334861814976f, 0.001097041997127235f, 0.0005262381164357066f, 0.0002302826324012131f, + 0.0002100782439811155f, 0.0004800673632416874f, 0.001000790391117334f, 0.001903285388834775f, 0.00330205773934722f, 0.00522619066759944f, 0.007545807864516974f, 0.009939077310264111f, 0.01194280479103327f, 0.01309141051024199f, 0.01309141051024199f, 0.01194280479103327f, 0.009939077310264111f, 0.007545807864516974f, 0.00522619066759944f, 0.00330205773934722f, 0.001903285388834775f, 0.001000790391117334f, 0.0004800673632416874f, 0.0002100782439811155f, + 0.0001748319627949968f, 0.0003995231236331165f, 0.0008328808471560478f, 0.001583957928232849f, 0.002748048631474376f, 0.004349356517195702f, 0.006279794964939356f, 0.008271529339253902f, 0.009939077310264111f, 0.01089497376233339f, 0.01089497376233339f, 0.009939077310264111f, 0.008271529339253902f, 0.006279794964939356f, 0.004349356517195702f, 0.002748048631474376f, 0.001583957928232849f, 0.0008328808471560478f, 0.0003995231236331165f, 0.0001748319627949968f, + 0.0001327334757661447f, 0.0003033203829545528f, 0.0006323281559161842f, 0.001202550483867526f, 0.002086335094645619f, 0.00330205773934722f, 0.004767658654600382f, 0.006279794964939356f, 0.007545807864516974f, 0.008271530270576477f, 0.008271530270576477f, 0.007545807864516974f, 0.006279794964939356f, 0.004767658654600382f, 0.00330205773934722f, 0.002086335094645619f, 0.001202550483867526f, 0.0006323281559161842f, 0.0003033203829545528f, 0.0001327334757661447f, + 9.193058212986216e-005f, 0.0002100782585330308f, 0.0004379475140012801f, 0.0008328807889483869f, 0.001444985857233405f, 0.002286989474669099f, 0.00330205773934722f, 0.004349356517195702f, 0.00522619066759944f, 0.005728822201490402f, 0.005728822201490402f, 0.00522619066759944f, 0.004349356517195702f, 0.00330205773934722f, 0.002286989474669099f, 0.001444985857233405f, 0.0008328807889483869f, 0.0004379475140012801f, 0.0002100782585330308f, 9.193058212986216e-005f, + 5.808438800158911e-005f, 0.0001327334903180599f, 0.0002767078403849155f, 0.0005262380582280457f, 0.0009129836107604206f, 0.001444985857233405f, 0.002086335094645619f, 0.002748048631474376f, 0.00330205773934722f, 0.003619635012000799f, 0.003619635012000799f, 0.00330205773934722f, 0.002748048631474376f, 0.002086335094645619f, 0.001444985857233405f, 0.0009129836107604206f, 0.0005262380582280457f, 0.0002767078403849155f, 0.0001327334903180599f, 5.808438800158911e-005f, + 3.34794785885606e-005f, 7.650675252079964e-005f, 0.0001594926579855382f, 0.0003033203247468919f, 0.0005262380582280457f, 0.0008328807889483869f, 0.001202550483867526f, 0.001583957928232849f, 0.001903285388834775f, 0.002086334861814976f, 0.002086334861814976f, 0.001903285388834775f, 0.001583957928232849f, 0.001202550483867526f, 0.0008328807889483869f, 0.0005262380582280457f, 0.0003033203247468919f, 0.0001594926579855382f, 7.650675252079964e-005f, 3.34794785885606e-005f, + 1.760426494001877e-005f, 4.022897701361217e-005f, 8.386484114453197e-005f, 0.0001594926579855382f, 0.0002767078403849155f, 0.0004379475140012801f, 0.0006323281559161842f, 0.0008328808471560478f, 0.001000790391117334f, 0.001097041997127235f, 0.001097041997127235f, 0.001000790391117334f, 0.0008328808471560478f, 0.0006323281559161842f, 0.0004379475140012801f, 0.0002767078403849155f, 0.0001594926579855382f, 8.386484114453197e-005f, 4.022897701361217e-005f, 1.760426494001877e-005f, + 8.444558261544444e-006f, 1.929736572492402e-005f, 4.022897701361217e-005f, 7.650675252079964e-005f, 0.0001327334903180599f, 0.0002100782585330308f, 0.0003033203829545528f, 0.0003995231236331165f, 0.0004800673632416874f, 0.0005262381164357066f, 0.0005262381164357066f, 0.0004800673632416874f, 0.0003995231236331165f, 0.0003033203829545528f, 0.0002100782585330308f, 0.0001327334903180599f, 7.650675252079964e-005f, 4.022897701361217e-005f, 1.929736572492402e-005f, 8.444558261544444e-006f, 3.695352233989979e-006f, 8.444558261544444e-006f, 1.760426494001877e-005f, 3.34794785885606e-005f, 5.808438800158911e-005f, 9.193058212986216e-005f, 0.0001327334757661447f, 0.0001748319627949968f, 0.0002100782439811155f, 0.0002302826324012131f, 0.0002302826324012131f, 0.0002100782439811155f, 0.0001748319627949968f, 0.0001327334757661447f, 9.193058212986216e-005f, 5.808438800158911e-005f, 3.34794785885606e-005f, 1.760426494001877e-005f, 8.444558261544444e-006f, 3.695352233989979e-006f }; @@ -697,7 +681,7 @@ namespace cv { namespace gpu { namespace device { typedef uchar elem_type; - __device__ __forceinline__ WinReader(float centerX_, float centerY_, float win_offset_, float cos_dir_, float sin_dir_) : + __device__ __forceinline__ WinReader(float centerX_, float centerY_, float win_offset_, float cos_dir_, float sin_dir_) : centerX(centerX_), centerY(centerY_), win_offset(win_offset_), cos_dir(cos_dir_), sin_dir(sin_dir_) { } @@ -710,14 +694,14 @@ namespace cv { namespace gpu { namespace device return tex2D(imgTex, pixel_x, pixel_y); } - float centerX; + float centerX; float centerY; - float win_offset; - float cos_dir; + float win_offset; + float cos_dir; float sin_dir; }; - __device__ void calc_dx_dy(float s_dx_bin[25], float s_dy_bin[25], + __device__ void calc_dx_dy(float s_dx_bin[25], float s_dy_bin[25], const float* featureX, const float* featureY, const float* featureSize, const float* featureDir) { __shared__ float s_PATCH[6][6]; @@ -739,7 +723,7 @@ namespace cv { namespace gpu { namespace device sincosf(descriptor_dir, &sin_dir, &cos_dir); /* Nearest neighbour version (faster) */ - const float win_offset = -(float)(win_size - 1) / 2; + const float win_offset = -(float)(win_size - 1) / 2; // Compute sampling points // since grids are 2D, need to compute xBlock and yBlock indices @@ -966,11 +950,11 @@ namespace cv { namespace gpu { namespace device descriptor_base[threadIdx.x] = lookup / len; } - void compute_descriptors_gpu(const DevMem2Df& descriptors, + void compute_descriptors_gpu(const DevMem2Df& descriptors, const float* featureX, const float* featureY, const float* featureSize, const float* featureDir, int nFeatures) { // compute unnormalized descriptors, then normalize them - odd indexing since grid must be 2D - + if (descriptors.cols == 64) { compute_descriptors64<<>>(descriptors, featureX, featureY, featureSize, featureDir); @@ -985,12 +969,12 @@ namespace cv { namespace gpu { namespace device } else { - compute_descriptors128<<>>(descriptors, featureX, featureY, featureSize, featureDir); + compute_descriptors128<<>>(descriptors, featureX, featureY, featureSize, featureDir); cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaDeviceSynchronize() ); - normalize_descriptors<128><<>>(descriptors); + normalize_descriptors<128><<>>(descriptors); cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaDeviceSynchronize() ); diff --git a/modules/gpu/src/fast.cpp b/modules/gpu/src/fast.cpp index 5a7c73b..270548d 100644 --- a/modules/gpu/src/fast.cpp +++ b/modules/gpu/src/fast.cpp @@ -59,7 +59,7 @@ int cv::gpu::FAST_GPU::getKeyPoints(GpuMat&) { throw_nogpu(); return 0; } #else /* !defined (HAVE_CUDA) */ -cv::gpu::FAST_GPU::FAST_GPU(int _threshold, bool _nonmaxSupression, double _keypointsRatio) : +cv::gpu::FAST_GPU::FAST_GPU(int _threshold, bool _nonmaxSupression, double _keypointsRatio) : nonmaxSupression(_nonmaxSupression), threshold(_threshold), keypointsRatio(_keypointsRatio), count_(0) { } @@ -109,9 +109,9 @@ void cv::gpu::FAST_GPU::operator ()(const GpuMat& img, const GpuMat& mask, GpuMa keypoints.cols = getKeyPoints(keypoints); } -namespace cv { namespace gpu { namespace device +namespace cv { namespace gpu { namespace device { - namespace fast + namespace fast { int calcKeypoints_gpu(DevMem2Db img, DevMem2Db mask, short2* kpLoc, int maxKeypoints, DevMem2Di score, int threshold); int nonmaxSupression_gpu(const short2* kpLoc, int count, DevMem2Di score, short2* loc, float* response); @@ -124,7 +124,9 @@ int cv::gpu::FAST_GPU::calcKeyPointsLocation(const GpuMat& img, const GpuMat& ma CV_Assert(img.type() == CV_8UC1); CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == img.size())); - CV_Assert(TargetArchs::builtWith(GLOBAL_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS)); + + if (!TargetArchs::builtWith(GLOBAL_ATOMICS) || !DeviceInfo().supports(GLOBAL_ATOMICS)) + CV_Error(CV_StsNotImplemented, "The device doesn't support global atomics"); int maxKeypoints = static_cast(keypointsRatio * img.size().area()); @@ -146,7 +148,8 @@ int cv::gpu::FAST_GPU::getKeyPoints(GpuMat& keypoints) { using namespace cv::gpu::device::fast; - CV_Assert(TargetArchs::builtWith(GLOBAL_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS)); + if (!TargetArchs::builtWith(GLOBAL_ATOMICS) || !DeviceInfo().supports(GLOBAL_ATOMICS)) + CV_Error(CV_StsNotImplemented, "The device doesn't support global atomics"); if (count_ == 0) return 0; @@ -160,7 +163,7 @@ int cv::gpu::FAST_GPU::getKeyPoints(GpuMat& keypoints) kpLoc_.colRange(0, count_).copyTo(locRow); keypoints.row(1).setTo(Scalar::all(0)); - return count_; + return count_; } void cv::gpu::FAST_GPU::release() diff --git a/modules/gpu/src/surf.cpp b/modules/gpu/src/surf.cpp index dea51a9..9a9efc4 100644 --- a/modules/gpu/src/surf.cpp +++ b/modules/gpu/src/surf.cpp @@ -120,7 +120,9 @@ namespace CV_Assert(!img.empty() && img.type() == CV_8UC1); CV_Assert(mask.empty() || (mask.size() == img.size() && mask.type() == CV_8UC1)); CV_Assert(surf_.nOctaves > 0 && surf_.nOctaveLayers > 0); - CV_Assert(TargetArchs::builtWith(GLOBAL_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS)); + + if (!TargetArchs::builtWith(GLOBAL_ATOMICS) || !DeviceInfo().supports(GLOBAL_ATOMICS)) + CV_Error(CV_StsNotImplemented, "The device doesn't support global atomics"); const int min_size = calcSize(surf_.nOctaves - 1, 0); CV_Assert(img_rows - min_size >= 0); @@ -184,8 +186,8 @@ namespace { icvInterpolateKeypoint_gpu(surf_.det, surf_.maxPosBuffer.ptr(), maxCounter, keypoints.ptr(SURF_GPU::X_ROW), keypoints.ptr(SURF_GPU::Y_ROW), - keypoints.ptr(SURF_GPU::LAPLACIAN_ROW), keypoints.ptr(SURF_GPU::OCTAVE_ROW), - keypoints.ptr(SURF_GPU::SIZE_ROW), keypoints.ptr(SURF_GPU::HESSIAN_ROW), + keypoints.ptr(SURF_GPU::LAPLACIAN_ROW), keypoints.ptr(SURF_GPU::OCTAVE_ROW), + keypoints.ptr(SURF_GPU::SIZE_ROW), keypoints.ptr(SURF_GPU::HESSIAN_ROW), counters.ptr()); } } @@ -306,7 +308,7 @@ void cv::gpu::SURF_GPU::downloadKeypoints(const GpuMat& keypointsGPU, vector(SURF_GPU::X_ROW); float* kp_y = keypointsCPU.ptr(SURF_GPU::Y_ROW); int* kp_laplacian = keypointsCPU.ptr(SURF_GPU::LAPLACIAN_ROW); diff --git a/modules/gpu/test/test_features2d.cpp b/modules/gpu/test/test_features2d.cpp index 2121820..9af8ad9 100644 --- a/modules/gpu/test/test_features2d.cpp +++ b/modules/gpu/test/test_features2d.cpp @@ -108,6 +108,25 @@ testing::AssertionResult assertKeyPointsEquals(const char* gold_expr, const char #define ASSERT_KEYPOINTS_EQ(gold, actual) EXPECT_PRED_FORMAT2(assertKeyPointsEquals, gold, actual); +int getMatchedPointsCount(std::vector& gold, std::vector& actual) +{ + std::sort(actual.begin(), actual.end(), KeyPointLess()); + std::sort(gold.begin(), gold.end(), KeyPointLess()); + + int validCount = 0; + + for (size_t i = 0; i < gold.size(); ++i) + { + const cv::KeyPoint& p1 = gold[i]; + const cv::KeyPoint& p2 = actual[i]; + + if (keyPointsEquals(p1, p2)) + ++validCount; + } + + return validCount; +} + int getMatchedPointsCount(const std::vector& keypoints1, const std::vector& keypoints2, const std::vector& matches) { int validCount = 0; @@ -170,20 +189,39 @@ TEST_P(SURF, Detector) surf.upright = upright; surf.keypointsRatio = 0.05f; - std::vector keypoints; - surf(loadMat(image), cv::gpu::GpuMat(), keypoints); + if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS)) + { + try + { + std::vector keypoints; + surf(loadMat(image), cv::gpu::GpuMat(), keypoints); + } + catch (const cv::Exception& e) + { + ASSERT_EQ(CV_StsNotImplemented, e.code); + } + } + else + { + std::vector keypoints; + surf(loadMat(image), cv::gpu::GpuMat(), keypoints); - cv::SURF surf_gold; - surf_gold.hessianThreshold = hessianThreshold; - surf_gold.nOctaves = nOctaves; - surf_gold.nOctaveLayers = nOctaveLayers; - surf_gold.extended = extended; - surf_gold.upright = upright; + cv::SURF surf_gold; + surf_gold.hessianThreshold = hessianThreshold; + surf_gold.nOctaves = nOctaves; + surf_gold.nOctaveLayers = nOctaveLayers; + surf_gold.extended = extended; + surf_gold.upright = upright; - std::vector keypoints_gold; - surf_gold(image, cv::noArray(), keypoints_gold); + std::vector keypoints_gold; + surf_gold(image, cv::noArray(), keypoints_gold); - ASSERT_KEYPOINTS_EQ(keypoints_gold, keypoints); + ASSERT_EQ(keypoints_gold.size(), keypoints.size()); + int matchedCount = getMatchedPointsCount(keypoints_gold, keypoints); + double matchedRatio = static_cast(matchedCount) / keypoints_gold.size(); + + EXPECT_GT(matchedRatio, 0.95); + } } TEST_P(SURF, Detector_Masked) @@ -202,20 +240,39 @@ TEST_P(SURF, Detector_Masked) surf.upright = upright; surf.keypointsRatio = 0.05f; - std::vector keypoints; - surf(loadMat(image), loadMat(mask), keypoints); + if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS)) + { + try + { + std::vector keypoints; + surf(loadMat(image), loadMat(mask), keypoints); + } + catch (const cv::Exception& e) + { + ASSERT_EQ(CV_StsNotImplemented, e.code); + } + } + else + { + std::vector keypoints; + surf(loadMat(image), loadMat(mask), keypoints); + + cv::SURF surf_gold; + surf_gold.hessianThreshold = hessianThreshold; + surf_gold.nOctaves = nOctaves; + surf_gold.nOctaveLayers = nOctaveLayers; + surf_gold.extended = extended; + surf_gold.upright = upright; - cv::SURF surf_gold; - surf_gold.hessianThreshold = hessianThreshold; - surf_gold.nOctaves = nOctaves; - surf_gold.nOctaveLayers = nOctaveLayers; - surf_gold.extended = extended; - surf_gold.upright = upright; + std::vector keypoints_gold; + surf_gold(image, mask, keypoints_gold); - std::vector keypoints_gold; - surf_gold(image, mask, keypoints_gold); + ASSERT_EQ(keypoints_gold.size(), keypoints.size()); + int matchedCount = getMatchedPointsCount(keypoints_gold, keypoints); + double matchedRatio = static_cast(matchedCount) / keypoints_gold.size(); - ASSERT_KEYPOINTS_EQ(keypoints_gold, keypoints); + EXPECT_GT(matchedRatio, 0.95); + } } TEST_P(SURF, Descriptor) @@ -238,23 +295,39 @@ TEST_P(SURF, Descriptor) surf_gold.extended = extended; surf_gold.upright = upright; - std::vector keypoints; - surf_gold(image, cv::noArray(), keypoints); + if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS)) + { + try + { + std::vector keypoints; + cv::gpu::GpuMat descriptors; + surf(loadMat(image), cv::gpu::GpuMat(), keypoints, descriptors); + } + catch (const cv::Exception& e) + { + ASSERT_EQ(CV_StsNotImplemented, e.code); + } + } + else + { + std::vector keypoints; + surf_gold(image, cv::noArray(), keypoints); - cv::gpu::GpuMat descriptors; - surf(loadMat(image), cv::gpu::GpuMat(), keypoints, descriptors, true); + cv::gpu::GpuMat descriptors; + surf(loadMat(image), cv::gpu::GpuMat(), keypoints, descriptors, true); - cv::Mat descriptors_gold; - surf_gold(image, cv::noArray(), keypoints, descriptors_gold, true); + cv::Mat descriptors_gold; + surf_gold(image, cv::noArray(), keypoints, descriptors_gold, true); - cv::BFMatcher matcher(cv::NORM_L2); - std::vector matches; - matcher.match(descriptors_gold, cv::Mat(descriptors), matches); + cv::BFMatcher matcher(cv::NORM_L2); + std::vector matches; + matcher.match(descriptors_gold, cv::Mat(descriptors), matches); - int matchedCount = getMatchedPointsCount(keypoints, keypoints, matches); - double matchedRatio = static_cast(matchedCount) / keypoints.size(); + int matchedCount = getMatchedPointsCount(keypoints, keypoints, matches); + double matchedRatio = static_cast(matchedCount) / keypoints.size(); - EXPECT_GT(matchedRatio, 0.35); + EXPECT_GT(matchedRatio, 0.35); + } } INSTANTIATE_TEST_CASE_P(GPU_Features2D, SURF, testing::Combine( @@ -295,13 +368,28 @@ TEST_P(FAST, Accuracy) cv::gpu::FAST_GPU fast(threshold); fast.nonmaxSupression = nonmaxSupression; - std::vector keypoints; - fast(loadMat(image), cv::gpu::GpuMat(), keypoints); + if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS)) + { + try + { + std::vector keypoints; + fast(loadMat(image), cv::gpu::GpuMat(), keypoints); + } + catch (const cv::Exception& e) + { + ASSERT_EQ(CV_StsNotImplemented, e.code); + } + } + else + { + std::vector keypoints; + fast(loadMat(image), cv::gpu::GpuMat(), keypoints); - std::vector keypoints_gold; - cv::FAST(image, keypoints_gold, threshold, nonmaxSupression); + std::vector keypoints_gold; + cv::FAST(image, keypoints_gold, threshold, nonmaxSupression); - ASSERT_KEYPOINTS_EQ(keypoints_gold, keypoints); + ASSERT_KEYPOINTS_EQ(keypoints_gold, keypoints); + } } INSTANTIATE_TEST_CASE_P(GPU_Features2D, FAST, testing::Combine( @@ -364,24 +452,40 @@ TEST_P(ORB, Accuracy) cv::gpu::ORB_GPU orb(nFeatures, scaleFactor, nLevels, edgeThreshold, firstLevel, WTA_K, scoreType, patchSize); orb.blurForDescriptor = blurForDescriptor; - std::vector keypoints; - cv::gpu::GpuMat descriptors; - orb(loadMat(image), loadMat(mask), keypoints, descriptors); + if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS)) + { + try + { + std::vector keypoints; + cv::gpu::GpuMat descriptors; + orb(loadMat(image), loadMat(mask), keypoints, descriptors); + } + catch (const cv::Exception& e) + { + ASSERT_EQ(CV_StsNotImplemented, e.code); + } + } + else + { + std::vector keypoints; + cv::gpu::GpuMat descriptors; + orb(loadMat(image), loadMat(mask), keypoints, descriptors); - cv::ORB orb_gold(nFeatures, scaleFactor, nLevels, edgeThreshold, firstLevel, WTA_K, scoreType, patchSize); + cv::ORB orb_gold(nFeatures, scaleFactor, nLevels, edgeThreshold, firstLevel, WTA_K, scoreType, patchSize); - std::vector keypoints_gold; - cv::Mat descriptors_gold; - orb_gold(image, mask, keypoints_gold, descriptors_gold); + std::vector keypoints_gold; + cv::Mat descriptors_gold; + orb_gold(image, mask, keypoints_gold, descriptors_gold); - cv::BFMatcher matcher(cv::NORM_HAMMING); - std::vector matches; - matcher.match(descriptors_gold, cv::Mat(descriptors), matches); + cv::BFMatcher matcher(cv::NORM_HAMMING); + std::vector matches; + matcher.match(descriptors_gold, cv::Mat(descriptors), matches); - int matchedCount = getMatchedPointsCount(keypoints_gold, keypoints, matches); - double matchedRatio = static_cast(matchedCount) / keypoints.size(); + int matchedCount = getMatchedPointsCount(keypoints_gold, keypoints, matches); + double matchedRatio = static_cast(matchedCount) / keypoints.size(); - EXPECT_GT(matchedRatio, 0.35); + EXPECT_GT(matchedRatio, 0.35); + } } INSTANTIATE_TEST_CASE_P(GPU_Features2D, ORB, testing::Combine( @@ -713,25 +817,40 @@ TEST_P(BruteForceMatcher, RadiusMatch) cv::gpu::BruteForceMatcher_GPU_base matcher(distType); - std::vector< std::vector > matches; - matcher.radiusMatch(loadMat(query), loadMat(train), matches, radius); + if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS)) + { + try + { + std::vector< std::vector > matches; + matcher.radiusMatch(loadMat(query), loadMat(train), matches, radius); + } + catch (const cv::Exception& e) + { + ASSERT_EQ(CV_StsNotImplemented, e.code); + } + } + else + { + std::vector< std::vector > matches; + matcher.radiusMatch(loadMat(query), loadMat(train), matches, radius); - ASSERT_EQ(static_cast(queryDescCount), matches.size()); + ASSERT_EQ(static_cast(queryDescCount), matches.size()); - int badCount = 0; - for (size_t i = 0; i < matches.size(); i++) - { - if ((int)matches[i].size() != 1) - badCount++; - else + int badCount = 0; + for (size_t i = 0; i < matches.size(); i++) { - cv::DMatch match = matches[i][0]; - if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i*countFactor) || (match.imgIdx != 0)) + if ((int)matches[i].size() != 1) badCount++; + else + { + cv::DMatch match = matches[i][0]; + if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i*countFactor) || (match.imgIdx != 0)) + badCount++; + } } - } - ASSERT_EQ(0, badCount); + ASSERT_EQ(0, badCount); + } } TEST_P(BruteForceMatcher, RadiusMatchAdd) @@ -756,42 +875,57 @@ TEST_P(BruteForceMatcher, RadiusMatchAdd) masks[mi].col(di * countFactor).setTo(cv::Scalar::all(0)); } - std::vector< std::vector > matches; - matcher.radiusMatch(cv::gpu::GpuMat(query), matches, radius, masks); + if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS)) + { + try + { + std::vector< std::vector > matches; + matcher.radiusMatch(cv::gpu::GpuMat(query), matches, radius, masks); + } + catch (const cv::Exception& e) + { + ASSERT_EQ(CV_StsNotImplemented, e.code); + } + } + else + { + std::vector< std::vector > matches; + matcher.radiusMatch(cv::gpu::GpuMat(query), matches, radius, masks); - ASSERT_EQ(static_cast(queryDescCount), matches.size()); + ASSERT_EQ(static_cast(queryDescCount), matches.size()); - int badCount = 0; - int shift = matcher.isMaskSupported() ? 1 : 0; - int needMatchCount = matcher.isMaskSupported() ? n-1 : n; - for (size_t i = 0; i < matches.size(); i++) - { - if ((int)matches[i].size() != needMatchCount) - badCount++; - else + int badCount = 0; + int shift = matcher.isMaskSupported() ? 1 : 0; + int needMatchCount = matcher.isMaskSupported() ? n-1 : n; + for (size_t i = 0; i < matches.size(); i++) { - int localBadCount = 0; - for (int k = 0; k < needMatchCount; k++) + if ((int)matches[i].size() != needMatchCount) + badCount++; + else { - cv::DMatch match = matches[i][k]; + int localBadCount = 0; + for (int k = 0; k < needMatchCount; k++) { - if ((int)i < queryDescCount / 2) + cv::DMatch match = matches[i][k]; { - if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor + k + shift) || (match.imgIdx != 0) ) - localBadCount++; - } - else - { - if ((match.queryIdx != (int)i) || (match.trainIdx != ((int)i - queryDescCount / 2) * countFactor + k + shift) || (match.imgIdx != 1) ) - localBadCount++; + if ((int)i < queryDescCount / 2) + { + if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor + k + shift) || (match.imgIdx != 0) ) + localBadCount++; + } + else + { + if ((match.queryIdx != (int)i) || (match.trainIdx != ((int)i - queryDescCount / 2) * countFactor + k + shift) || (match.imgIdx != 1) ) + localBadCount++; + } } } + badCount += localBadCount > 0 ? 1 : 0; } - badCount += localBadCount > 0 ? 1 : 0; } - } - ASSERT_EQ(0, badCount); + ASSERT_EQ(0, badCount); + } } INSTANTIATE_TEST_CASE_P(GPU_Features2D, BruteForceMatcher, testing::Combine( diff --git a/modules/gpu/test/test_filters.cpp b/modules/gpu/test/test_filters.cpp index 6b9f23c..66325c3 100644 --- a/modules/gpu/test/test_filters.cpp +++ b/modules/gpu/test/test_filters.cpp @@ -258,13 +258,28 @@ TEST_P(GaussianBlur, Accuracy) double sigma1 = randomDouble(0.1, 1.0); double sigma2 = randomDouble(0.1, 1.0); - cv::gpu::GpuMat dst = createMat(size, type, useRoi); - cv::gpu::GaussianBlur(loadMat(src, useRoi), dst, ksize, sigma1, sigma2, borderType); + if (ksize.height > 16 && !supportFeature(devInfo, cv::gpu::FEATURE_SET_COMPUTE_20)) + { + try + { + cv::gpu::GpuMat dst; + cv::gpu::GaussianBlur(loadMat(src), dst, ksize, sigma1, sigma2, borderType); + } + catch (const cv::Exception& e) + { + ASSERT_EQ(CV_StsNotImplemented, e.code); + } + } + else + { + cv::gpu::GpuMat dst = createMat(size, type, useRoi); + cv::gpu::GaussianBlur(loadMat(src, useRoi), dst, ksize, sigma1, sigma2, borderType); - cv::Mat dst_gold; - cv::GaussianBlur(src, dst_gold, ksize, sigma1, sigma2, borderType); + cv::Mat dst_gold; + cv::GaussianBlur(src, dst_gold, ksize, sigma1, sigma2, borderType); - EXPECT_MAT_NEAR(dst_gold, dst, 4.0); + EXPECT_MAT_NEAR(dst_gold, dst, 4.0); + } } INSTANTIATE_TEST_CASE_P(GPU_Filter, GaussianBlur, testing::Combine(