void cv::gpu::BruteForceMatcher_GPU_base::knnMatchConvert(const Mat&, const Mat&, std::vector< std::vector<DMatch> >&, bool) { throw_nogpu(); }\r
void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat&, const GpuMat&, std::vector< std::vector<DMatch> >&, int, const GpuMat&, bool) { throw_nogpu(); }\r
void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat&, std::vector< std::vector<DMatch> >&, int, const std::vector<GpuMat>&, bool) { throw_nogpu(); }\r
-void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, float, const GpuMat&, Stream&) { throw_nogpu(); }\r
+void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchSingle(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, float, const GpuMat&, Stream&) { throw_nogpu(); }\r
void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchDownload(const GpuMat&, const GpuMat&, const GpuMat&, std::vector< std::vector<DMatch> >&, bool) { throw_nogpu(); }\r
void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchConvert(const Mat&, const Mat&, const Mat&, std::vector< std::vector<DMatch> >&, bool) { throw_nogpu(); }\r
void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat&, const GpuMat&, std::vector< std::vector<DMatch> >&, float, const GpuMat&, bool) { throw_nogpu(); }\r
+void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchCollection(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, float, const GpuMat&, Stream&) { throw_nogpu(); }\r
+void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchDownload(const GpuMat&, const GpuMat&, const GpuMat&, const GpuMat&, vector< vector<DMatch> >&, bool) { throw_nogpu(); }\r
+void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchConvert(const Mat&, const Mat&, const Mat&, const Mat&, vector< vector<DMatch> >&, bool) { throw_nogpu(); }\r
void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat&, std::vector< std::vector<DMatch> >&, float, const std::vector<GpuMat>&, bool) { throw_nogpu(); }\r
\r
#else /* !defined (HAVE_CUDA) */\r
\r
namespace cv { namespace gpu { namespace bf_radius_match \r
{\r
- template <typename T> void radiusMatchL1_gpu(const DevMem2D& query, const DevMem2D& train, float maxDistance, const DevMem2D& mask, \r
- const DevMem2D& trainIdx, const DevMem2D& nMatches, const DevMem2D& distance, \r
+ template <typename T> void radiusMatchSingleL1_gpu(const DevMem2D& query, const DevMem2D& train, float maxDistance, const DevMem2D& mask, \r
+ const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& nMatches, \r
cudaStream_t stream);\r
- template <typename T> void radiusMatchL2_gpu(const DevMem2D& query, const DevMem2D& train, float maxDistance, const DevMem2D& mask, \r
- const DevMem2D& trainIdx, const DevMem2D& nMatches, const DevMem2D& distance, \r
+ template <typename T> void radiusMatchSingleL2_gpu(const DevMem2D& query, const DevMem2D& train, float maxDistance, const DevMem2D& mask, \r
+ const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& nMatches, \r
cudaStream_t stream);\r
- template <typename T> void radiusMatchHamming_gpu(const DevMem2D& query, const DevMem2D& train, float maxDistance, const DevMem2D& mask, \r
- const DevMem2D& trainIdx, const DevMem2D& nMatches, const DevMem2D& distance, \r
+ template <typename T> void radiusMatchSingleHamming_gpu(const DevMem2D& query, const DevMem2D& train, float maxDistance, const DevMem2D& mask, \r
+ const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& nMatches, \r
cudaStream_t stream);\r
-}}}\r
\r
-namespace\r
-{\r
- struct ImgIdxSetter\r
- {\r
- explicit inline ImgIdxSetter(int imgIdx_) : imgIdx(imgIdx_) {}\r
- inline void operator()(DMatch& m) const {m.imgIdx = imgIdx;}\r
- int imgIdx;\r
- };\r
-}\r
+ template <typename T> void radiusMatchCollectionL1_gpu(const DevMem2D& query, const DevMem2D& trainCollection, float maxDistance, const DevMem2D_<PtrStep>& maskCollection, \r
+ const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& nMatches, \r
+ cudaStream_t stream);\r
+ template <typename T> void radiusMatchCollectionL2_gpu(const DevMem2D& query, const DevMem2D& trainCollection, float maxDistance, const DevMem2D_<PtrStep>& maskCollection, \r
+ const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& nMatches, \r
+ cudaStream_t stream);\r
+ template <typename T> void radiusMatchCollectionHamming_gpu(const DevMem2D& query, const DevMem2D& trainCollection, float maxDistance, const DevMem2D_<PtrStep>& maskCollection, \r
+ const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& nMatches, \r
+ cudaStream_t stream);\r
+}}}\r
\r
cv::gpu::BruteForceMatcher_GPU_base::BruteForceMatcher_GPU_base(DistType distType_) : distType(distType_)\r
{\r
knnMatchDownload(trainIdx, distance, matches, compactResult);\r
}\r
\r
+namespace\r
+{\r
+ struct ImgIdxSetter\r
+ {\r
+ explicit inline ImgIdxSetter(int imgIdx_) : imgIdx(imgIdx_) {}\r
+ inline void operator()(DMatch& m) const {m.imgIdx = imgIdx;}\r
+ int imgIdx;\r
+ };\r
+}\r
+\r
void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat& queryDescs,\r
vector< vector<DMatch> >& matches, int knn, const vector<GpuMat>& masks, bool compactResult)\r
{\r
////////////////////////////////////////////////////////////////////\r
// RadiusMatch\r
\r
-void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat& queryDescs, const GpuMat& trainDescs,\r
- GpuMat& trainIdx, GpuMat& nMatches, GpuMat& distance, float maxDistance, const GpuMat& mask, Stream& stream)\r
+void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchSingle(const GpuMat& queryDescs, const GpuMat& trainDescs,\r
+ GpuMat& trainIdx, GpuMat& distance, GpuMat& nMatches, float maxDistance, const GpuMat& mask, Stream& stream)\r
{\r
if (queryDescs.empty() || trainDescs.empty())\r
return;\r
using namespace cv::gpu::bf_radius_match;\r
\r
typedef void (*radiusMatch_caller_t)(const DevMem2D& query, const DevMem2D& train, float maxDistance, const DevMem2D& mask, \r
- const DevMem2D& trainIdx, const DevMem2D& nMatches, const DevMem2D& distance, \r
+ const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& nMatches, \r
cudaStream_t stream);\r
\r
static const radiusMatch_caller_t radiusMatch_callers[3][8] =\r
{\r
{\r
- radiusMatchL1_gpu<unsigned char>, 0/*radiusMatchL1_gpu<signed char>*/, radiusMatchL1_gpu<unsigned short>,\r
- radiusMatchL1_gpu<short>, radiusMatchL1_gpu<int>, radiusMatchL1_gpu<float>, 0, 0\r
+ radiusMatchSingleL1_gpu<unsigned char>, 0/*radiusMatchSingleL1_gpu<signed char>*/, radiusMatchSingleL1_gpu<unsigned short>,\r
+ radiusMatchSingleL1_gpu<short>, radiusMatchSingleL1_gpu<int>, radiusMatchSingleL1_gpu<float>, 0, 0\r
},\r
{\r
- 0/*radiusMatchL2_gpu<unsigned char>*/, 0/*radiusMatchL2_gpu<signed char>*/, 0/*radiusMatchL2_gpu<unsigned short>*/,\r
- 0/*radiusMatchL2_gpu<short>*/, 0/*radiusMatchL2_gpu<int>*/, radiusMatchL2_gpu<float>, 0, 0\r
+ 0/*radiusMatchSingleL2_gpu<unsigned char>*/, 0/*radiusMatchSingleL2_gpu<signed char>*/, 0/*radiusMatchSingleL2_gpu<unsigned short>*/,\r
+ 0/*radiusMatchSingleL2_gpu<short>*/, 0/*radiusMatchSingleL2_gpu<int>*/, radiusMatchSingleL2_gpu<float>, 0, 0\r
},\r
{\r
- radiusMatchHamming_gpu<unsigned char>, 0/*radiusMatchHamming_gpu<signed char>*/, radiusMatchHamming_gpu<unsigned short>,\r
- 0/*radiusMatchHamming_gpu<short>*/, radiusMatchHamming_gpu<int>, 0, 0, 0\r
+ radiusMatchSingleHamming_gpu<unsigned char>, 0/*radiusMatchSingleHamming_gpu<signed char>*/, radiusMatchSingleHamming_gpu<unsigned short>,\r
+ 0/*radiusMatchSingleHamming_gpu<short>*/, radiusMatchSingleHamming_gpu<int>, 0, 0, 0\r
}\r
};\r
\r
- CV_Assert(TargetArchs::builtWith(SHARED_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS));\r
+ CV_Assert(TargetArchs::builtWith(SHARED_ATOMICS) && DeviceInfo().supports(SHARED_ATOMICS));\r
\r
const int nQuery = queryDescs.rows;\r
const int nTrain = trainDescs.rows;\r
ensureSizeIsEnough(1, nQuery, CV_32SC1, nMatches);\r
if (trainIdx.empty())\r
{\r
- ensureSizeIsEnough(nQuery, nTrain, CV_32SC1, trainIdx);\r
- ensureSizeIsEnough(nQuery, nTrain, CV_32FC1, distance);\r
+ ensureSizeIsEnough(nQuery, nTrain / 2, CV_32SC1, trainIdx);\r
+ ensureSizeIsEnough(nQuery, nTrain / 2, CV_32FC1, distance);\r
}\r
\r
- if (stream)\r
- stream.enqueueMemSet(nMatches, Scalar::all(0));\r
- else\r
- nMatches.setTo(Scalar::all(0));\r
-\r
radiusMatch_caller_t func = radiusMatch_callers[distType][queryDescs.depth()];\r
CV_Assert(func != 0);\r
\r
- func(queryDescs, trainDescs, maxDistance, mask, trainIdx, nMatches, distance, StreamAccessor::getStream(stream));\r
+ func(queryDescs, trainDescs, maxDistance, mask, trainIdx, distance, nMatches, StreamAccessor::getStream(stream));\r
}\r
\r
-void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& nMatches,\r
- const GpuMat& distance, std::vector< std::vector<DMatch> >& matches, bool compactResult)\r
+void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& distance, const GpuMat& nMatches, \r
+ vector< vector<DMatch> >& matches, bool compactResult)\r
{\r
- if (trainIdx.empty() || nMatches.empty() || distance.empty())\r
+ if (trainIdx.empty() || distance.empty() || nMatches.empty())\r
return;\r
\r
Mat trainIdxCPU = trainIdx;\r
- Mat nMatchesCPU = nMatches;\r
Mat distanceCPU = distance;\r
+ Mat nMatchesCPU = nMatches;\r
\r
- radiusMatchConvert(trainIdxCPU, nMatchesCPU, distanceCPU, matches, compactResult);\r
+ radiusMatchConvert(trainIdxCPU, distanceCPU, nMatchesCPU, matches, compactResult);\r
}\r
\r
-void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchConvert(const Mat& trainIdx, const Mat& nMatches, const Mat& distance,\r
- std::vector< std::vector<DMatch> >& matches, bool compactResult)\r
+void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchConvert(const Mat& trainIdx, const Mat& distance, const Mat& nMatches,\r
+ vector< vector<DMatch> >& matches, bool compactResult)\r
{\r
- if (trainIdx.empty() || nMatches.empty() || distance.empty())\r
+ if (trainIdx.empty() || distance.empty() || nMatches.empty())\r
return;\r
\r
CV_Assert(trainIdx.type() == CV_32SC1);\r
matches.clear();\r
matches.reserve(nQuery);\r
\r
- const unsigned int* nMatches_ptr = nMatches.ptr<unsigned int>();\r
+ const int* nMatches_ptr = nMatches.ptr<int>();\r
for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)\r
{\r
const int* trainIdx_ptr = trainIdx.ptr<int>(queryIdx);\r
const float* distance_ptr = distance.ptr<float>(queryIdx);\r
\r
- const int nMatches = std::min(static_cast<int>(nMatches_ptr[queryIdx]), trainIdx.cols);\r
+ const int nMatches = std::min(nMatches_ptr[queryIdx], trainIdx.cols);\r
\r
if (nMatches == 0)\r
{\r
continue;\r
}\r
\r
- matches.push_back(vector<DMatch>());\r
+ matches.push_back(vector<DMatch>(nMatches));\r
vector<DMatch>& curMatches = matches.back();\r
- curMatches.reserve(nMatches);\r
\r
for (int i = 0; i < nMatches; ++i, ++trainIdx_ptr, ++distance_ptr)\r
{\r
\r
DMatch m(queryIdx, trainIdx, 0, distance);\r
\r
- curMatches.push_back(m);\r
+ curMatches[i] = m;\r
}\r
+\r
sort(curMatches.begin(), curMatches.end());\r
}\r
}\r
void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat& queryDescs, const GpuMat& trainDescs,\r
vector< vector<DMatch> >& matches, float maxDistance, const GpuMat& mask, bool compactResult)\r
{\r
- GpuMat trainIdx, nMatches, distance;\r
- radiusMatch(queryDescs, trainDescs, trainIdx, nMatches, distance, maxDistance, mask);\r
- radiusMatchDownload(trainIdx, nMatches, distance, matches, compactResult);\r
+ GpuMat trainIdx, distance, nMatches;\r
+ radiusMatchSingle(queryDescs, trainDescs, trainIdx, distance, nMatches, maxDistance, mask);\r
+ radiusMatchDownload(trainIdx, distance, nMatches, matches, compactResult);\r
}\r
\r
-void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat& queryDescs, vector< vector<DMatch> >& matches,\r
- float maxDistance, const vector<GpuMat>& masks, bool compactResult)\r
+void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchCollection(const GpuMat& queryDescs, const GpuMat& trainCollection,\r
+ GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, GpuMat& nMatches, float maxDistance,\r
+ const GpuMat& maskCollection, Stream& stream)\r
{\r
- if (queryDescs.empty() || empty())\r
+ if (queryDescs.empty() || trainCollection.empty())\r
return;\r
\r
- matches.resize(queryDescs.rows);\r
+ using namespace cv::gpu::bf_radius_match;\r
\r
- vector< vector<DMatch> > curMatches;\r
+ typedef void (*radiusMatch_caller_t)(const DevMem2D& query, const DevMem2D& trainCollection, float maxDistance, const DevMem2D_<PtrStep>& maskCollection, \r
+ const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& nMatches, \r
+ cudaStream_t stream);\r
\r
- for (size_t imgIdx = 0; imgIdx < trainDescCollection.size(); ++imgIdx)\r
+ static const radiusMatch_caller_t radiusMatch_callers[3][8] =\r
{\r
- radiusMatch(queryDescs, trainDescCollection[imgIdx], curMatches, maxDistance,\r
- masks.empty() ? GpuMat() : masks[imgIdx]);\r
-\r
- for (int queryIdx = 0; queryIdx < queryDescs.rows; ++queryIdx)\r
{\r
- vector<DMatch>& localMatch = curMatches[queryIdx];\r
- vector<DMatch>& globalMatch = matches[queryIdx];\r
+ radiusMatchCollectionL1_gpu<unsigned char>, 0/*radiusMatchCollectionL1_gpu<signed char>*/, radiusMatchCollectionL1_gpu<unsigned short>,\r
+ radiusMatchCollectionL1_gpu<short>, radiusMatchCollectionL1_gpu<int>, radiusMatchCollectionL1_gpu<float>, 0, 0\r
+ },\r
+ {\r
+ 0/*radiusMatchCollectionL2_gpu<unsigned char>*/, 0/*radiusMatchCollectionL2_gpu<signed char>*/, 0/*radiusMatchCollectionL2_gpu<unsigned short>*/,\r
+ 0/*radiusMatchCollectionL2_gpu<short>*/, 0/*radiusMatchCollectionL2_gpu<int>*/, radiusMatchCollectionL2_gpu<float>, 0, 0\r
+ },\r
+ {\r
+ radiusMatchCollectionHamming_gpu<unsigned char>, 0/*radiusMatchCollectionHamming_gpu<signed char>*/, radiusMatchCollectionHamming_gpu<unsigned short>,\r
+ 0/*radiusMatchCollectionHamming_gpu<short>*/, radiusMatchCollectionHamming_gpu<int>, 0, 0, 0\r
+ }\r
+ };\r
\r
- for_each(localMatch.begin(), localMatch.end(), ImgIdxSetter(static_cast<int>(imgIdx)));\r
+ CV_Assert(TargetArchs::builtWith(SHARED_ATOMICS) && DeviceInfo().supports(SHARED_ATOMICS));\r
\r
- const size_t oldSize = globalMatch.size();\r
+ const int nQuery = queryDescs.rows;\r
\r
- copy(localMatch.begin(), localMatch.end(), back_inserter(globalMatch));\r
- inplace_merge(globalMatch.begin(), globalMatch.begin() + oldSize, globalMatch.end());\r
- }\r
+ CV_Assert(queryDescs.channels() == 1 && queryDescs.depth() < CV_64F);\r
+ CV_Assert(trainIdx.empty() || (trainIdx.rows == nQuery && trainIdx.size() == distance.size() && trainIdx.size() == imgIdx.size()));\r
+\r
+ ensureSizeIsEnough(1, nQuery, CV_32SC1, nMatches);\r
+ if (trainIdx.empty())\r
+ {\r
+ ensureSizeIsEnough(nQuery, nQuery / 2, CV_32SC1, trainIdx);\r
+ ensureSizeIsEnough(nQuery, nQuery / 2, CV_32SC1, imgIdx);\r
+ ensureSizeIsEnough(nQuery, nQuery / 2, CV_32FC1, distance);\r
}\r
\r
- if (compactResult)\r
+ radiusMatch_caller_t func = radiusMatch_callers[distType][queryDescs.depth()];\r
+ CV_Assert(func != 0);\r
+\r
+ func(queryDescs, trainCollection, maxDistance, maskCollection, trainIdx, imgIdx, distance, nMatches, StreamAccessor::getStream(stream));\r
+}\r
+\r
+void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, const GpuMat& nMatches,\r
+ vector< vector<DMatch> >& matches, bool compactResult)\r
+{\r
+ if (trainIdx.empty() || imgIdx.empty() || distance.empty() || nMatches.empty())\r
+ return;\r
+\r
+ Mat trainIdxCPU = trainIdx;\r
+ Mat imgIdxCPU = imgIdx;\r
+ Mat distanceCPU = distance;\r
+ Mat nMatchesCPU = nMatches;\r
+\r
+ radiusMatchConvert(trainIdxCPU, imgIdxCPU, distanceCPU, nMatchesCPU, matches, compactResult);\r
+}\r
+\r
+void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, const Mat& nMatches,\r
+ vector< vector<DMatch> >& matches, bool compactResult)\r
+{\r
+ if (trainIdx.empty() || imgIdx.empty() || distance.empty() || nMatches.empty())\r
+ return;\r
+\r
+ CV_Assert(trainIdx.type() == CV_32SC1);\r
+ CV_Assert(imgIdx.type() == CV_32SC1 && imgIdx.size() == trainIdx.size());\r
+ CV_Assert(distance.type() == CV_32FC1 && distance.size() == trainIdx.size());\r
+ CV_Assert(nMatches.type() == CV_32SC1 && nMatches.isContinuous() && nMatches.cols >= trainIdx.rows);\r
+\r
+ const int nQuery = trainIdx.rows;\r
+\r
+ matches.clear();\r
+ matches.reserve(nQuery);\r
+\r
+ const int* nMatches_ptr = nMatches.ptr<int>();\r
+ for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)\r
{\r
- vector< vector<DMatch> >::iterator new_end = remove_if(matches.begin(), matches.end(),\r
- mem_fun_ref(&vector<DMatch>::empty));\r
- matches.erase(new_end, matches.end());\r
+ const int* trainIdx_ptr = trainIdx.ptr<int>(queryIdx);\r
+ const int* imgIdx_ptr = imgIdx.ptr<int>(queryIdx);\r
+ const float* distance_ptr = distance.ptr<float>(queryIdx);\r
+\r
+ const int nMatches = std::min(nMatches_ptr[queryIdx], trainIdx.cols);\r
+\r
+ if (nMatches == 0)\r
+ {\r
+ if (!compactResult)\r
+ matches.push_back(vector<DMatch>());\r
+ continue;\r
+ }\r
+\r
+ matches.push_back(vector<DMatch>());\r
+ vector<DMatch>& curMatches = matches.back();\r
+ curMatches.reserve(nMatches);\r
+\r
+ for (int i = 0; i < nMatches; ++i, ++trainIdx_ptr, ++imgIdx_ptr, ++distance_ptr)\r
+ {\r
+ int trainIdx = *trainIdx_ptr;\r
+ int imgIdx = *imgIdx_ptr;\r
+ float distance = *distance_ptr;\r
+\r
+ DMatch m(queryIdx, trainIdx, imgIdx, distance);\r
+\r
+ curMatches.push_back(m);\r
+ }\r
+\r
+ sort(curMatches.begin(), curMatches.end());\r
}\r
}\r
\r
+void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat& queryDescs, vector< vector<DMatch> >& matches,\r
+ float maxDistance, const vector<GpuMat>& masks, bool compactResult)\r
+{\r
+ GpuMat trainCollection;\r
+ GpuMat maskCollection;\r
+\r
+ makeGpuCollection(trainCollection, maskCollection, masks);\r
+\r
+ GpuMat trainIdx, imgIdx, distance, nMatches;\r
+\r
+ radiusMatchCollection(queryDescs, trainCollection, trainIdx, imgIdx, distance, nMatches, maxDistance, maskCollection);\r
+\r
+ radiusMatchDownload(trainIdx, imgIdx, distance, nMatches, matches, compactResult);\r
+}\r
+\r
#endif /* !defined (HAVE_CUDA) */\r
\r
namespace cv { namespace gpu { namespace bf_radius_match\r
{\r
- __device__ __forceinline__ void store(const int* sidx, const float* sdist, const unsigned int scount, int* trainIdx, float* distance, int& sglob_ind, const int tid)\r
+ template <typename T> struct SingleTrain\r
{\r
- if (tid < scount)\r
+ enum {USE_IMG_IDX = 0};\r
+\r
+ explicit SingleTrain(const DevMem2D_<T>& train_) : train(train_)\r
{\r
- trainIdx[sglob_ind + tid] = sidx[tid];\r
- distance[sglob_ind + tid] = sdist[tid];\r
}\r
\r
- if (tid == 0)\r
- sglob_ind += scount;\r
- }\r
+ static __device__ __forceinline__ void store(const int* s_trainIdx, const int* s_imgIdx, const float* s_dist, unsigned int& s_count, int& s_globInd, \r
+ int* trainIdx, int* imgIdx, float* distance, int maxCount)\r
+ {\r
+ const int tid = threadIdx.y * blockDim.x + threadIdx.x;\r
\r
- template <int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_STACK, typename VecDiff, typename Dist, typename T, typename Mask>\r
- __global__ void radiusMatch(const PtrStep_<T> query, const DevMem2D_<T> train, const float maxDistance, const Mask mask, \r
- DevMem2Di trainIdx_, PtrStepf distance, unsigned int* nMatches)\r
- {\r
- #if __CUDA_ARCH__ >= 120\r
+ if (tid < s_count && s_globInd + tid < maxCount)\r
+ {\r
+ trainIdx[s_globInd + tid] = s_trainIdx[tid];\r
+ distance[s_globInd + tid] = s_dist[tid];\r
+ }\r
\r
- typedef typename Dist::result_type result_type;\r
- typedef typename Dist::value_type value_type;\r
+ if (tid == 0)\r
+ {\r
+ s_globInd += s_count;\r
+ s_count = 0;\r
+ }\r
+ }\r
\r
- __shared__ result_type smem[BLOCK_DIM_X * BLOCK_DIM_Y];\r
- __shared__ int sidx[BLOCK_STACK];\r
- __shared__ float sdist[BLOCK_STACK];\r
- __shared__ unsigned int scount;\r
- __shared__ int sglob_ind;\r
+ template <int BLOCK_STACK, typename Dist, typename VecDiff, typename Mask>\r
+ __device__ __forceinline__ void loop(float maxDistance, Mask& mask, const VecDiff& vecDiff, \r
+ int* s_trainIdx, int* s_imgIdx, float* s_dist, unsigned int& s_count, int& s_globInd, \r
+ int* trainIdxRow, int* imgIdxRow, float* distanceRow, int maxCount, \r
+ typename Dist::result_type* s_diffRow) const\r
+ {\r
+ #if __CUDA_ARCH__ >= 120\r
\r
- const int queryIdx = blockIdx.x;\r
- const int tid = threadIdx.y * BLOCK_DIM_X + threadIdx.x;\r
+ for (int i = 0; i < train.rows; i += blockDim.y)\r
+ {\r
+ int trainIdx = i + threadIdx.y;\r
\r
- if (tid == 0)\r
+ if (trainIdx < train.rows && mask(blockIdx.x, trainIdx))\r
+ {\r
+ Dist dist;\r
+ \r
+ vecDiff.calc(train.ptr(trainIdx), train.cols, dist, s_diffRow, threadIdx.x);\r
+\r
+ const typename Dist::result_type val = dist;\r
+\r
+ if (threadIdx.x == 0 && val < maxDistance)\r
+ {\r
+ unsigned int ind = atomicInc(&s_count, (unsigned int) -1);\r
+ s_trainIdx[ind] = trainIdx;\r
+ s_dist[ind] = val;\r
+ }\r
+ }\r
+\r
+ __syncthreads();\r
+\r
+ if (s_count >= BLOCK_STACK - blockDim.y)\r
+ store(s_trainIdx, s_imgIdx, s_dist, s_count, s_globInd, trainIdxRow, imgIdxRow, distanceRow, maxCount);\r
+\r
+ __syncthreads();\r
+ }\r
+\r
+ store(s_trainIdx, s_imgIdx, s_dist, s_count, s_globInd, trainIdxRow, imgIdxRow, distanceRow, maxCount);\r
+\r
+ #endif\r
+ }\r
+\r
+ __device__ __forceinline__ int descLen() const\r
{\r
- scount = 0;\r
- sglob_ind = 0;\r
+ return train.cols;\r
}\r
- __syncthreads();\r
\r
- int* trainIdx_row = trainIdx_.ptr(queryIdx);\r
- float* distance_row = distance.ptr(queryIdx);\r
+ const DevMem2D_<T> train;\r
+ };\r
\r
- const VecDiff vecDiff(query.ptr(queryIdx), train.cols, (typename Dist::value_type*)smem, tid, threadIdx.x);\r
- \r
- typename Dist::result_type* sdiffRow = smem + BLOCK_DIM_X * threadIdx.y;\r
+ template <typename T> struct TrainCollection\r
+ {\r
+ enum {USE_IMG_IDX = 1};\r
+\r
+ TrainCollection(const DevMem2D_<T>* trainCollection_, int nImg_, int desclen_) : \r
+ trainCollection(trainCollection_), nImg(nImg_), desclen(desclen_)\r
+ {\r
+ }\r
\r
- for (int trainIdx = threadIdx.y; trainIdx < train.rows; trainIdx += BLOCK_DIM_Y)\r
+ static __device__ __forceinline__ void store(const int* s_trainIdx, const int* s_imgIdx, const float* s_dist, unsigned int& s_count, int& s_globInd, \r
+ int* trainIdx, int* imgIdx, float* distance, int maxCount)\r
{\r
- if (mask(queryIdx, trainIdx))\r
+ const int tid = threadIdx.y * blockDim.x + threadIdx.x;\r
+\r
+ if (tid < s_count && s_globInd + tid < maxCount)\r
{\r
- Dist dist;\r
+ trainIdx[s_globInd + tid] = s_trainIdx[tid];\r
+ imgIdx[s_globInd + tid] = s_imgIdx[tid];\r
+ distance[s_globInd + tid] = s_dist[tid];\r
+ }\r
+\r
+ if (tid == 0)\r
+ {\r
+ s_globInd += s_count;\r
+ s_count = 0;\r
+ }\r
+ }\r
\r
- const T* trainRow = train.ptr(trainIdx);\r
- \r
- vecDiff.calc(trainRow, train.cols, dist, sdiffRow, threadIdx.x);\r
+ template <int BLOCK_STACK, typename Dist, typename VecDiff, typename Mask>\r
+ __device__ void loop(float maxDistance, Mask& mask, const VecDiff& vecDiff, \r
+ int* s_trainIdx, int* s_imgIdx, float* s_dist, unsigned int& s_count, int& s_globInd, \r
+ int* trainIdxRow, int* imgIdxRow, float* distanceRow, int maxCount, \r
+ typename Dist::result_type* s_diffRow) const\r
+ {\r
+ #if __CUDA_ARCH__ >= 120\r
+\r
+ for (int imgIdx = 0; imgIdx < nImg; ++imgIdx)\r
+ {\r
+ const DevMem2D_<T> train = trainCollection[imgIdx];\r
\r
- const typename Dist::result_type val = dist;\r
+ mask.next();\r
\r
- if (threadIdx.x == 0 && val < maxDistance)\r
+ for (int i = 0; i < train.rows; i += blockDim.y)\r
{\r
- unsigned int i = atomicInc(&scount, (unsigned int) -1);\r
- sidx[i] = trainIdx;\r
- sdist[i] = val;\r
+ int trainIdx = i + threadIdx.y;\r
+\r
+ if (trainIdx < train.rows && mask(blockIdx.x, trainIdx))\r
+ {\r
+ Dist dist;\r
+ \r
+ vecDiff.calc(train.ptr(trainIdx), desclen, dist, s_diffRow, threadIdx.x);\r
+\r
+ const typename Dist::result_type val = dist;\r
+\r
+ if (threadIdx.x == 0 && val < maxDistance)\r
+ {\r
+ unsigned int ind = atomicInc(&s_count, (unsigned int) -1);\r
+ s_trainIdx[ind] = trainIdx;\r
+ s_imgIdx[ind] = imgIdx;\r
+ s_dist[ind] = val;\r
+ }\r
+ }\r
+\r
+ __syncthreads();\r
+\r
+ if (s_count >= BLOCK_STACK - blockDim.y)\r
+ store(s_trainIdx, s_imgIdx, s_dist, s_count, s_globInd, trainIdxRow, imgIdxRow, distanceRow, maxCount);\r
+\r
+ __syncthreads();\r
}\r
}\r
- __syncthreads();\r
\r
- if (scount > BLOCK_STACK - BLOCK_DIM_Y)\r
- {\r
- store(sidx, sdist, scount, trainIdx_row, distance_row, sglob_ind, tid);\r
- if (tid == 0)\r
- scount = 0;\r
- }\r
- __syncthreads();\r
+ store(s_trainIdx, s_imgIdx, s_dist, s_count, s_globInd, trainIdxRow, imgIdxRow, distanceRow, maxCount);\r
+\r
+ #endif\r
+ }\r
+\r
+ __device__ __forceinline__ int descLen() const\r
+ {\r
+ return desclen;\r
+ }\r
+\r
+ const DevMem2D_<T>* trainCollection;\r
+ const int nImg;\r
+ const int desclen;\r
+ };\r
+\r
+ template <int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_STACK, typename VecDiff, typename Dist, typename T, typename Train, typename Mask>\r
+ __global__ void radiusMatch(const PtrStep_<T> query, const Train train, float maxDistance, const Mask mask, \r
+ PtrStepi trainIdx, PtrStepi imgIdx, PtrStepf distance, int* nMatches, int maxCount)\r
+ {\r
+ typedef typename Dist::result_type result_type;\r
+ typedef typename Dist::value_type value_type;\r
+\r
+ __shared__ result_type s_mem[BLOCK_DIM_X * BLOCK_DIM_Y];\r
+\r
+ __shared__ int s_trainIdx[BLOCK_STACK];\r
+ __shared__ int s_imgIdx[Train::USE_IMG_IDX ? BLOCK_STACK : 1];\r
+ __shared__ float s_dist[BLOCK_STACK];\r
+ __shared__ unsigned int s_count;\r
+\r
+ __shared__ int s_globInd;\r
+\r
+ if (threadIdx.x == 0 && threadIdx.y == 0)\r
+ {\r
+ s_count = 0;\r
+ s_globInd = 0;\r
}\r
+ __syncthreads();\r
\r
- store(sidx, sdist, scount, trainIdx_row, distance_row, sglob_ind, tid);\r
+ const VecDiff vecDiff(query.ptr(blockIdx.x), train.descLen(), (typename Dist::value_type*)s_mem, threadIdx.y * BLOCK_DIM_X + threadIdx.x, threadIdx.x);\r
\r
- if (tid == 0)\r
- nMatches[queryIdx] = sglob_ind;\r
+ Mask m = mask;\r
\r
- #endif\r
+ train.template loop<BLOCK_STACK, Dist>(maxDistance, m, vecDiff, \r
+ s_trainIdx, s_imgIdx, s_dist, s_count, s_globInd, \r
+ trainIdx.ptr(blockIdx.x), imgIdx.ptr(blockIdx.x), distance.ptr(blockIdx.x), maxCount, \r
+ s_mem + BLOCK_DIM_X * threadIdx.y);\r
+\r
+ if (threadIdx.x == 0 && threadIdx.y == 0)\r
+ nMatches[blockIdx.x] = s_globInd;\r
}\r
\r
///////////////////////////////////////////////////////////////////////////////\r
// Radius Match kernel caller\r
\r
- template <int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_STACK, typename Dist, typename T, typename Mask>\r
- void radiusMatchSimple_caller(const DevMem2D_<T>& query, const DevMem2D_<T>& train, float maxDistance, const Mask& mask, \r
- const DevMem2Di& trainIdx, const DevMem2Df& distance, unsigned int* nMatches,\r
+ template <int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_STACK, typename Dist, typename T, typename Train, typename Mask>\r
+ void radiusMatchSimple_caller(const DevMem2D_<T>& query, const Train& train, float maxDistance, const Mask& mask, \r
+ const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int* nMatches,\r
cudaStream_t stream)\r
{\r
StaticAssert<BLOCK_STACK >= BLOCK_DIM_Y>::check();\r
const dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y, 1);\r
\r
radiusMatch<BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_STACK, VecDiffGlobal<BLOCK_DIM_X, T>, Dist, T>\r
- <<<grid, threads, 0, stream>>>(query, train, maxDistance, mask, trainIdx, distance, nMatches);\r
+ <<<grid, threads, 0, stream>>>(query, train, maxDistance, mask, trainIdx, imgIdx, distance, nMatches, trainIdx.cols);\r
cudaSafeCall( cudaGetLastError() );\r
\r
if (stream == 0)\r
cudaSafeCall( cudaDeviceSynchronize() );\r
}\r
\r
- template <int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_STACK, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename Dist, typename T, typename Mask>\r
- void radiusMatchCached_caller(const DevMem2D_<T>& query, const DevMem2D_<T>& train, float maxDistance, const Mask& mask, \r
- const DevMem2Di& trainIdx, const DevMem2Df& distance, unsigned int* nMatches, \r
+ template <int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_STACK, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename Dist, typename T, typename Train, typename Mask>\r
+ void radiusMatchCached_caller(const DevMem2D_<T>& query, const Train& train, float maxDistance, const Mask& mask, \r
+ const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int* nMatches, \r
cudaStream_t stream)\r
{\r
StaticAssert<BLOCK_STACK >= BLOCK_DIM_Y>::check();\r
const dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y, 1);\r
\r
radiusMatch<BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_STACK, VecDiffCachedRegister<BLOCK_DIM_X, MAX_LEN, LEN_EQ_MAX_LEN, typename Dist::value_type>, Dist, T>\r
- <<<grid, threads, 0, stream>>>(query, train, maxDistance, mask, trainIdx, distance, nMatches);\r
+ <<<grid, threads, 0, stream>>>(query, train, maxDistance, mask, trainIdx, imgIdx, distance, nMatches, trainIdx.cols);\r
cudaSafeCall( cudaGetLastError() );\r
\r
if (stream == 0)\r
///////////////////////////////////////////////////////////////////////////////\r
// Radius Match Dispatcher\r
\r
- template <typename Dist, typename T, typename Mask>\r
- void radiusMatchDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>& train, float maxDistance, const Mask& mask, \r
- const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& nMatches, \r
+ template <typename Dist, typename T, typename Train, typename Mask>\r
+ void radiusMatchDispatcher(const DevMem2D_<T>& query, const Train& train, float maxDistance, const Mask& mask, \r
+ const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& nMatches, \r
cudaStream_t stream)\r
{\r
if (query.cols < 64)\r
{\r
radiusMatchCached_caller<16, 16, 64, 64, false, Dist>(\r
query, train, maxDistance, mask, \r
- static_cast<DevMem2Di>(trainIdx), static_cast<DevMem2Df>(distance), (unsigned int*)nMatches.data,\r
+ static_cast<DevMem2Di>(trainIdx), static_cast<DevMem2Di>(imgIdx), static_cast<DevMem2Df>(distance), (int*)nMatches.data,\r
stream);\r
}\r
else if (query.cols == 64)\r
{\r
radiusMatchCached_caller<16, 16, 64, 64, true, Dist>(\r
query, train, maxDistance, mask, \r
- static_cast<DevMem2Di>(trainIdx), static_cast<DevMem2Df>(distance), (unsigned int*)nMatches.data,\r
+ static_cast<DevMem2Di>(trainIdx), static_cast<DevMem2Di>(imgIdx), static_cast<DevMem2Df>(distance), (int*)nMatches.data,\r
stream);\r
}\r
else if (query.cols < 128)\r
{\r
radiusMatchCached_caller<16, 16, 64, 128, false, Dist>(\r
query, train, maxDistance, mask, \r
- static_cast<DevMem2Di>(trainIdx), static_cast<DevMem2Df>(distance), (unsigned int*)nMatches.data,\r
+ static_cast<DevMem2Di>(trainIdx), static_cast<DevMem2Di>(imgIdx), static_cast<DevMem2Df>(distance), (int*)nMatches.data,\r
stream);\r
}\r
else if (query.cols == 128)\r
{\r
radiusMatchCached_caller<16, 16, 64, 128, true, Dist>(\r
query, train, maxDistance, mask, \r
- static_cast<DevMem2Di>(trainIdx), static_cast<DevMem2Df>(distance), (unsigned int*)nMatches.data,\r
+ static_cast<DevMem2Di>(trainIdx), static_cast<DevMem2Di>(imgIdx), static_cast<DevMem2Df>(distance), (int*)nMatches.data,\r
stream);\r
}\r
else if (query.cols < 256)\r
{\r
radiusMatchCached_caller<16, 16, 64, 256, false, Dist>(\r
query, train, maxDistance, mask, \r
- static_cast<DevMem2Di>(trainIdx), static_cast<DevMem2Df>(distance), (unsigned int*)nMatches.data,\r
+ static_cast<DevMem2Di>(trainIdx), static_cast<DevMem2Di>(imgIdx), static_cast<DevMem2Df>(distance), (int*)nMatches.data,\r
stream);\r
}\r
else if (query.cols == 256)\r
{\r
radiusMatchCached_caller<16, 16, 64, 256, true, Dist>(\r
query, train, maxDistance, mask, \r
- static_cast<DevMem2Di>(trainIdx), static_cast<DevMem2Df>(distance), (unsigned int*)nMatches.data, \r
+ static_cast<DevMem2Di>(trainIdx), static_cast<DevMem2Di>(imgIdx), static_cast<DevMem2Df>(distance), (int*)nMatches.data, \r
stream);\r
}\r
else\r
{\r
radiusMatchSimple_caller<16, 16, 64, Dist>(\r
query, train, maxDistance, mask, \r
- static_cast<DevMem2Di>(trainIdx), static_cast<DevMem2Df>(distance), (unsigned int*)nMatches.data,\r
+ static_cast<DevMem2Di>(trainIdx), static_cast<DevMem2Di>(imgIdx), static_cast<DevMem2Df>(distance), (int*)nMatches.data,\r
stream);\r
}\r
} \r
///////////////////////////////////////////////////////////////////////////////\r
// Radius Match caller\r
\r
- template <typename T> void radiusMatchL1_gpu(const DevMem2D& query, const DevMem2D& train, float maxDistance, const DevMem2D& mask, \r
- const DevMem2D& trainIdx, const DevMem2D& nMatches, const DevMem2D& distance, \r
+ template <typename T> void radiusMatchSingleL1_gpu(const DevMem2D& query, const DevMem2D& train_, float maxDistance, const DevMem2D& mask, \r
+ const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& nMatches, \r
cudaStream_t stream)\r
{\r
+ SingleTrain<T> train(static_cast< DevMem2D_<T> >(train_));\r
+\r
if (mask.data)\r
{\r
- radiusMatchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), maxDistance, SingleMask(mask), \r
- trainIdx, distance, nMatches, \r
+ radiusMatchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), train, maxDistance, SingleMask(mask), \r
+ trainIdx, DevMem2D(), distance, nMatches, \r
stream);\r
}\r
else\r
{\r
- radiusMatchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), maxDistance, WithOutMask(), \r
- trainIdx, distance, nMatches, \r
+ radiusMatchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), train, maxDistance, WithOutMask(), \r
+ trainIdx, DevMem2D(), distance, nMatches, \r
stream);\r
}\r
}\r
\r
- template void radiusMatchL1_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& nMatches, const DevMem2D& distance, cudaStream_t stream);\r
- //template void radiusMatchL1_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& nMatches, const DevMem2D& distance, cudaStream_t stream);\r
- template void radiusMatchL1_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& nMatches, const DevMem2D& distance, cudaStream_t stream);\r
- template void radiusMatchL1_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& nMatches, const DevMem2D& distance, cudaStream_t stream);\r
- template void radiusMatchL1_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& nMatches, const DevMem2D& distance, cudaStream_t stream);\r
- template void radiusMatchL1_gpu<float >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& nMatches, const DevMem2D& distance, cudaStream_t stream);\r
+ template void radiusMatchSingleL1_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);\r
+ //template void radiusMatchSingleL1_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);\r
+ template void radiusMatchSingleL1_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);\r
+ template void radiusMatchSingleL1_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);\r
+ template void radiusMatchSingleL1_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);\r
+ template void radiusMatchSingleL1_gpu<float >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);\r
\r
- template <typename T> void radiusMatchL2_gpu(const DevMem2D& query, const DevMem2D& train, float maxDistance, const DevMem2D& mask, \r
- const DevMem2D& trainIdx, const DevMem2D& nMatches, const DevMem2D& distance, \r
+ template <typename T> void radiusMatchSingleL2_gpu(const DevMem2D& query, const DevMem2D& train_, float maxDistance, const DevMem2D& mask, \r
+ const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& nMatches, \r
cudaStream_t stream)\r
{\r
+ SingleTrain<T> train(static_cast< DevMem2D_<T> >(train_));\r
+\r
if (mask.data)\r
{\r
- radiusMatchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), maxDistance, SingleMask(mask), \r
- trainIdx, distance, nMatches, \r
+ radiusMatchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), train, maxDistance, SingleMask(mask), \r
+ trainIdx, DevMem2D(), distance, nMatches, \r
stream);\r
}\r
else\r
{\r
- radiusMatchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), maxDistance, WithOutMask(), \r
- trainIdx, distance, nMatches, \r
+ radiusMatchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), train, maxDistance, WithOutMask(), \r
+ trainIdx, DevMem2D(), distance, nMatches, \r
stream);\r
}\r
}\r
\r
- //template void radiusMatchL2_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& nMatches, const DevMem2D& distance, cudaStream_t stream);\r
- //template void radiusMatchL2_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& nMatches, const DevMem2D& distance, cudaStream_t stream);\r
- //template void radiusMatchL2_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& nMatches, const DevMem2D& distance, cudaStream_t stream);\r
- //template void radiusMatchL2_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& nMatches, const DevMem2D& distance, cudaStream_t stream);\r
- //template void radiusMatchL2_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& nMatches, const DevMem2D& distance, cudaStream_t stream);\r
- template void radiusMatchL2_gpu<float >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& nMatches, const DevMem2D& distance, cudaStream_t stream);\r
+ //template void radiusMatchSingleL2_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);\r
+ //template void radiusMatchSingleL2_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);\r
+ //template void radiusMatchSingleL2_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);\r
+ //template void radiusMatchSingleL2_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);\r
+ //template void radiusMatchSingleL2_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);\r
+ template void radiusMatchSingleL2_gpu<float >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);\r
\r
- template <typename T> void radiusMatchHamming_gpu(const DevMem2D& query, const DevMem2D& train, float maxDistance, const DevMem2D& mask, \r
- const DevMem2D& trainIdx, const DevMem2D& nMatches, const DevMem2D& distance, \r
+ template <typename T> void radiusMatchSingleHamming_gpu(const DevMem2D& query, const DevMem2D& train_, float maxDistance, const DevMem2D& mask, \r
+ const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& nMatches, \r
cudaStream_t stream)\r
{\r
+ SingleTrain<T> train(static_cast< DevMem2D_<T> >(train_));\r
+\r
if (mask.data)\r
{\r
- radiusMatchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), maxDistance, SingleMask(mask), \r
- trainIdx, distance, nMatches, \r
+ radiusMatchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), train, maxDistance, SingleMask(mask), \r
+ trainIdx, DevMem2D(), distance, nMatches, \r
+ stream);\r
+ }\r
+ else\r
+ {\r
+ radiusMatchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), train, maxDistance, WithOutMask(), \r
+ trainIdx, DevMem2D(), distance, nMatches, \r
+ stream);\r
+ }\r
+ }\r
+\r
+ template void radiusMatchSingleHamming_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);\r
+ //template void radiusMatchSingleHamming_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);\r
+ template void radiusMatchSingleHamming_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);\r
+ //template void radiusMatchSingleHamming_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);\r
+ template void radiusMatchSingleHamming_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);\r
+\r
+ template <typename T> void radiusMatchCollectionL1_gpu(const DevMem2D& query, const DevMem2D& trainCollection, float maxDistance, const DevMem2D_<PtrStep>& maskCollection, \r
+ const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& nMatches, \r
+ cudaStream_t stream)\r
+ {\r
+ TrainCollection<T> train((DevMem2D_<T>*)trainCollection.ptr(), trainCollection.cols, query.cols);\r
+\r
+ if (maskCollection.data)\r
+ {\r
+ radiusMatchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), train, maxDistance, MaskCollection(maskCollection.data), \r
+ trainIdx, imgIdx, distance, nMatches, \r
+ stream);\r
+ }\r
+ else\r
+ {\r
+ radiusMatchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), train, maxDistance, WithOutMask(), \r
+ trainIdx, imgIdx, distance, nMatches, \r
+ stream);\r
+ }\r
+ }\r
+\r
+ template void radiusMatchCollectionL1_gpu<uchar >(const DevMem2D& query, const DevMem2D& trainCollection, float maxDistance, const DevMem2D_<PtrStep>& maskCollection, const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);\r
+ //template void radiusMatchCollectionL1_gpu<schar >(const DevMem2D& query, const DevMem2D& trainCollection, float maxDistance, const DevMem2D_<PtrStep>& maskCollection, const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);\r
+ template void radiusMatchCollectionL1_gpu<ushort>(const DevMem2D& query, const DevMem2D& trainCollection, float maxDistance, const DevMem2D_<PtrStep>& maskCollection, const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);\r
+ template void radiusMatchCollectionL1_gpu<short >(const DevMem2D& query, const DevMem2D& trainCollection, float maxDistance, const DevMem2D_<PtrStep>& maskCollection, const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);\r
+ template void radiusMatchCollectionL1_gpu<int >(const DevMem2D& query, const DevMem2D& trainCollection, float maxDistance, const DevMem2D_<PtrStep>& maskCollection, const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);\r
+ template void radiusMatchCollectionL1_gpu<float >(const DevMem2D& query, const DevMem2D& trainCollection, float maxDistance, const DevMem2D_<PtrStep>& maskCollection, const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);\r
+\r
+ template <typename T> void radiusMatchCollectionL2_gpu(const DevMem2D& query, const DevMem2D& trainCollection, float maxDistance, const DevMem2D_<PtrStep>& maskCollection, \r
+ const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& nMatches, \r
+ cudaStream_t stream)\r
+ {\r
+ TrainCollection<T> train((DevMem2D_<T>*)trainCollection.ptr(), trainCollection.cols, query.cols);\r
+\r
+ if (maskCollection.data)\r
+ {\r
+ radiusMatchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), train, maxDistance, MaskCollection(maskCollection.data), \r
+ trainIdx, imgIdx, distance, nMatches, \r
+ stream);\r
+ }\r
+ else\r
+ {\r
+ radiusMatchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), train, maxDistance, WithOutMask(), \r
+ trainIdx, imgIdx, distance, nMatches, \r
+ stream);\r
+ }\r
+ }\r
+\r
+ //template void radiusMatchCollectionL2_gpu<uchar >(const DevMem2D& query, const DevMem2D& trainCollection, float maxDistance, const DevMem2D_<PtrStep>& maskCollection, const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);\r
+ //template void radiusMatchCollectionL2_gpu<schar >(const DevMem2D& query, const DevMem2D& trainCollection, float maxDistance, const DevMem2D_<PtrStep>& maskCollection, const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);\r
+ //template void radiusMatchCollectionL2_gpu<ushort>(const DevMem2D& query, const DevMem2D& trainCollection, float maxDistance, const DevMem2D_<PtrStep>& maskCollection, const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);\r
+ //template void radiusMatchCollectionL2_gpu<short >(const DevMem2D& query, const DevMem2D& trainCollection, float maxDistance, const DevMem2D_<PtrStep>& maskCollection, const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);\r
+ //template void radiusMatchCollectionL2_gpu<int >(const DevMem2D& query, const DevMem2D& trainCollection, float maxDistance, const DevMem2D_<PtrStep>& maskCollection, const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);\r
+ template void radiusMatchCollectionL2_gpu<float >(const DevMem2D& query, const DevMem2D& trainCollection, float maxDistance, const DevMem2D_<PtrStep>& maskCollection, const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);\r
+\r
+ template <typename T> void radiusMatchCollectionHamming_gpu(const DevMem2D& query, const DevMem2D& trainCollection, float maxDistance, const DevMem2D_<PtrStep>& maskCollection, \r
+ const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& nMatches, \r
+ cudaStream_t stream)\r
+ {\r
+ TrainCollection<T> train((DevMem2D_<T>*)trainCollection.ptr(), trainCollection.cols, query.cols);\r
+\r
+ if (maskCollection.data)\r
+ {\r
+ radiusMatchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), train, maxDistance, MaskCollection(maskCollection.data), \r
+ trainIdx, imgIdx, distance, nMatches, \r
stream);\r
}\r
else\r
{\r
- radiusMatchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), maxDistance, WithOutMask(), \r
- trainIdx, distance, nMatches, \r
+ radiusMatchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), train, maxDistance, WithOutMask(), \r
+ trainIdx, imgIdx, distance, nMatches, \r
stream);\r
}\r
}\r
\r
- template void radiusMatchHamming_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& nMatches, const DevMem2D& distance, cudaStream_t stream);\r
- //template void radiusMatchHamming_gpu<schar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& nMatches, const DevMem2D& distance, cudaStream_t stream);\r
- template void radiusMatchHamming_gpu<ushort>(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& nMatches, const DevMem2D& distance, cudaStream_t stream);\r
- //template void radiusMatchHamming_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& nMatches, const DevMem2D& distance, cudaStream_t stream);\r
- template void radiusMatchHamming_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2D& trainIdx, const DevMem2D& nMatches, const DevMem2D& distance, cudaStream_t stream);\r
+ template void radiusMatchCollectionHamming_gpu<uchar >(const DevMem2D& query, const DevMem2D& trainCollection, float maxDistance, const DevMem2D_<PtrStep>& maskCollection, const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);\r
+ //template void radiusMatchCollectionHamming_gpu<schar >(const DevMem2D& query, const DevMem2D& trainCollection, float maxDistance, const DevMem2D_<PtrStep>& maskCollection, const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);\r
+ template void radiusMatchCollectionHamming_gpu<ushort>(const DevMem2D& query, const DevMem2D& trainCollection, float maxDistance, const DevMem2D_<PtrStep>& maskCollection, const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);\r
+ //template void radiusMatchCollectionHamming_gpu<short >(const DevMem2D& query, const DevMem2D& trainCollection, float maxDistance, const DevMem2D_<PtrStep>& maskCollection, const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);\r
+ template void radiusMatchCollectionHamming_gpu<int >(const DevMem2D& query, const DevMem2D& trainCollection, float maxDistance, const DevMem2D_<PtrStep>& maskCollection, const DevMem2D& trainIdx, const DevMem2D& imgIdx, const DevMem2D& distance, const DevMem2D& nMatches, cudaStream_t stream);\r
}}}\r