const int nQuery = queryDescs.rows;\r
const int nTrain = trainDescs.rows;\r
\r
- ensureSizeIsEnough(nQuery, k, CV_32S, trainIdx);\r
- ensureSizeIsEnough(nQuery, k, CV_32F, distance);\r
- if (k != 2)\r
+ if (k == 2)\r
+ {\r
+ ensureSizeIsEnough(1, nQuery, CV_32SC2, trainIdx);\r
+ ensureSizeIsEnough(1, nQuery, CV_32FC2, distance);\r
+ }\r
+ else\r
+ {\r
+ ensureSizeIsEnough(nQuery, k, CV_32S, trainIdx);\r
+ ensureSizeIsEnough(nQuery, k, CV_32F, distance);\r
ensureSizeIsEnough(nQuery, nTrain, CV_32FC1, allDist);\r
+ }\r
\r
if (stream)\r
{\r
if (trainIdx.empty() || distance.empty())\r
return;\r
\r
- CV_Assert(trainIdx.type() == CV_32SC1);\r
- CV_Assert(distance.type() == CV_32FC1 && distance.size() == trainIdx.size());\r
+ CV_Assert(trainIdx.type() == CV_32SC2 || trainIdx.type() == CV_32SC1);\r
+ CV_Assert(distance.type() == CV_32FC2 || distance.type() == CV_32FC1);\r
+ CV_Assert(distance.size() == trainIdx.size());\r
+ CV_Assert(trainIdx.isContinuous() && distance.isContinuous());\r
\r
- const int nQuery = distance.rows;\r
- const int k = trainIdx.cols;\r
+ const int nQuery = trainIdx.type() == CV_32SC2 ? trainIdx.cols : trainIdx.rows;\r
+ const int k = trainIdx.type() == CV_32SC2 ? 2 :trainIdx.cols;\r
\r
matches.clear();\r
matches.reserve(nQuery);\r
+ \r
+ const int* trainIdx_ptr = trainIdx.ptr<int>();\r
+ const float* distance_ptr = distance.ptr<float>();\r
\r
for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)\r
{\r
vector<DMatch>& curMatches = matches.back();\r
curMatches.reserve(k);\r
\r
- const int* trainIdx_ptr = trainIdx.ptr<int>(queryIdx);\r
- const float* distance_ptr = distance.ptr<float>(queryIdx);\r
for (int i = 0; i < k; ++i, ++trainIdx_ptr, ++distance_ptr)\r
{\r
int trainIdx = *trainIdx_ptr;\r
}\r
\r
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, typename VecDiff, typename Dist, typename T, typename Mask>\r
- __global__ void knnMatch2(const PtrStep_<T> query, const DevMem2D_<T> train, const Mask m, PtrStep_<int2> trainIdx, PtrStep_<float2> distance)\r
+ __global__ void knnMatch2(const PtrStep_<T> query, const DevMem2D_<T> train, const Mask m, int2* trainIdx, float2* distance)\r
{\r
typedef typename Dist::result_type result_type;\r
typedef typename Dist::value_type value_type;\r
}\r
}\r
\r
- trainIdx.ptr(queryIdx)[0] = make_int2(bestTrainIdx1, bestTrainIdx2);\r
- distance.ptr(queryIdx)[0] = make_float2(distMin1, distMin2);\r
+ trainIdx[queryIdx] = make_int2(bestTrainIdx1, bestTrainIdx2);\r
+ distance[queryIdx] = make_float2(distMin1, distMin2);\r
}\r
}\r
\r
const dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y, 1);\r
\r
knnMatch2<BLOCK_DIM_X, BLOCK_DIM_Y, VecDiffCachedRegister<BLOCK_DIM_X, MAX_LEN, LEN_EQ_MAX_LEN, typename Dist::value_type>, Dist, T>\r
- <<<grid, threads, 0, stream>>>(query, train, mask, trainIdx, distance);\r
+ <<<grid, threads, 0, stream>>>(query, train, mask, trainIdx.data, distance.data);\r
cudaSafeCall( cudaGetLastError() );\r
\r
if (stream == 0)\r