//
// @Authors
// Nathan, liujun@multicorewareinc.com
+// Peng Xiao, pengxiao@outlook.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
}
}
+static const int OPT_SIZE = 100;
+
template < int BLOCK_SIZE, int MAX_DESC_LEN/*, typename Mask*/ >
void matchUnrolledCached(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
const oclMat &trainIdx, const oclMat &distance, int distType)
int m_size = MAX_DESC_LEN;
vector< pair<size_t, const void *> > args;
- static const int OPT_SIZE = 40;
char opt [OPT_SIZE] = "";
- sprintf(opt, "-D block_size=%d -D max_desc_len=%d", block_size, m_size);
+ sprintf(opt, "-D distType=%d -D block_size=%d -D max_desc_len=%d", distType, block_size, m_size);
+
if(globalSize[0] != 0)
{
args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows ));
args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&distType ));
std::string kernelName = "BruteForceMatch_UnrollMatch";
int block_size = BLOCK_SIZE;
vector< pair<size_t, const void *> > args;
- static const int OPT_SIZE = 40;
char opt [OPT_SIZE] = "";
- sprintf(opt, "-D block_size=%d", block_size);
+ sprintf(opt, "-D distType=%d -D block_size=%d", distType, block_size);
+
if(globalSize[0] != 0)
{
args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows ));
args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&distType ));
std::string kernelName = "BruteForceMatch_Match";
int m_size = MAX_DESC_LEN;
vector< pair<size_t, const void *> > args;
- static const int OPT_SIZE = 40;
char opt [OPT_SIZE] = "";
- sprintf(opt, "-D block_size=%d -D max_desc_len=%d", block_size, m_size);
+ sprintf(opt, "-D distType=%d -D block_size=%d -D max_desc_len=%d", distType, block_size, m_size);
+
if(globalSize[0] != 0)
{
args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
args.push_back( make_pair( sizeof(cl_int), (void *)&trainIdx.cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
args.push_back( make_pair( sizeof(cl_int), (void *)&trainIdx.step ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&distType ));
std::string kernelName = "BruteForceMatch_RadiusUnrollMatch";
int block_size = BLOCK_SIZE;
vector< pair<size_t, const void *> > args;
- static const int OPT_SIZE = 40;
char opt [OPT_SIZE] = "";
- sprintf(opt, "-D block_size=%d", block_size);
+ sprintf(opt, "-D distType=%d -D block_size=%d", distType, block_size);
+
if(globalSize[0] != 0)
{
args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
args.push_back( make_pair( sizeof(cl_int), (void *)&trainIdx.cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
args.push_back( make_pair( sizeof(cl_int), (void *)&trainIdx.step ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&distType ));
std::string kernelName = "BruteForceMatch_RadiusMatch";
int m_size = MAX_DESC_LEN;
vector< pair<size_t, const void *> > args;
- static const int OPT_SIZE = 40;
char opt [OPT_SIZE] = "";
- sprintf(opt, "-D block_size=%d -D max_desc_len=%d", block_size, m_size);
+ sprintf(opt, "-D distType=%d -D block_size=%d -D max_desc_len=%d", distType, block_size, m_size);
+
if(globalSize[0] != 0)
{
args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows ));
args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&distType ));
std::string kernelName = "BruteForceMatch_knnUnrollMatch";
int block_size = BLOCK_SIZE;
vector< pair<size_t, const void *> > args;
- static const int OPT_SIZE = 40;
char opt [OPT_SIZE] = "";
- sprintf(opt, "-D block_size=%d", block_size);
+ sprintf(opt, "-D distType=%d -D block_size=%d", distType, block_size);
+
if(globalSize[0] != 0)
{
args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows ));
args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&distType ));
std::string kernelName = "BruteForceMatch_knnMatch";
int m_size = MAX_DESC_LEN;
vector< pair<size_t, const void *> > args;
+ char opt [OPT_SIZE] = "";
+ sprintf(opt, "-D distType=%d", distType);
if(globalSize[0] != 0)
{
args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows ));
args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&distType ));
std::string kernelName = "BruteForceMatch_calcDistanceUnrolled";
- openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, query.depth());
+ openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, query.depth(), opt);
}
}
int block_size = BLOCK_SIZE;
vector< pair<size_t, const void *> > args;
+ char opt [OPT_SIZE] = "";
+ sprintf(opt, "-D distType=%d", distType);
if(globalSize[0] != 0)
{
args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows ));
args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&distType ));
std::string kernelName = "BruteForceMatch_calcDistance";
- openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, query.depth());
+ openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, query.depth(), opt);
}
}
}
CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
+
const int nQuery = query.rows;
ensureSizeIsEnough(1, nQuery, CV_32S, trainIdx);
ensureSizeIsEnough(1, nQuery, CV_32S, imgIdx);
ensureSizeIsEnough(1, nQuery, CV_32F, distance);
+
matchDispatcher(query, (const oclMat *)trainCollection.ptr(), trainCollection.cols, masks, trainIdx, imgIdx, distance, distType);
exit:
return;
const int nQuery = query.rows;
const int nTrain = train.rows;
+
if (k == 2)
{
ensureSizeIsEnough(1, nQuery, CV_32SC2, trainIdx);
const int nQuery = query.rows;
const int nTrain = train.rows;
+
CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
CV_Assert(train.type() == query.type() && train.cols == query.cols);
CV_Assert(trainIdx.empty() || (trainIdx.rows == query.rows && trainIdx.size() == distance.size()));
return (float)c;
}
+#ifndef distType
+#define distType 0
+#endif
+
+#if (distType == 0)
+#define DIST(x, y) fabs((x) - (y))
+#elif (distType == 1)
+#define DIST(x, y) (((x) - (y)) * ((x) - (y)))
+#elif (distType == 2)
+#define DIST(x, y) bit1Count((uint)(x) ^ (uint)(y))
+#endif
+
+
float reduce_block(__local float *s_query,
__local float *s_train,
int lidx,
- int lidy,
- int distType
+ int lidy
)
{
- /* there are threee types in the reducer. the first is L1Dist, which to sum the abs(v1, v2), the second is L2Dist, which to
- sum the (v1 - v2) * (v1 - v2), the third is humming, which to popc(v1 ^ v2), popc is to count the bits are set to 1*/
float result = 0;
- switch(distType)
+ #pragma unroll
+ for (int j = 0 ; j < block_size ; j++)
{
- case 0:
- for (int j = 0 ; j < block_size ; j++)
- {
- result += fabs(s_query[lidy * block_size + j] - s_train[j * block_size + lidx]);
- }
- break;
- case 1:
- for (int j = 0 ; j < block_size ; j++)
- {
- float qr = s_query[lidy * block_size + j] - s_train[j * block_size + lidx];
- result += qr * qr;
- }
- break;
- case 2:
- for (int j = 0 ; j < block_size ; j++)
- {
- result += bit1Count((uint)s_query[lidy * block_size + j] ^ (uint)s_train[(uint)j * block_size + lidx]);
- }
- break;
+ result += DIST(s_query[lidy * block_size + j], s_train[j * block_size + lidx]);
}
return result;
}
__local float *s_train,
int block_index,
int lidx,
- int lidy,
- int distType
+ int lidy
)
{
- /* there are threee types in the reducer. the first is L1Dist, which to sum the abs(v1, v2), the second is L2Dist, which to
- sum the (v1 - v2) * (v1 - v2), the third is humming, which to popc(v1 ^ v2), popc is to count the bits are set to 1*/
float result = 0;
- switch(distType)
+ #pragma unroll
+ for (int j = 0 ; j < block_size ; j++)
{
- case 0:
- for (int j = 0 ; j < block_size ; j++)
- {
- result += fabs(s_query[lidy * max_desc_len + block_index * block_size + j] - s_train[j * block_size + lidx]);
- }
- break;
- case 1:
- for (int j = 0 ; j < block_size ; j++)
- {
- float qr = s_query[lidy * max_desc_len + block_index * block_size + j] - s_train[j * block_size + lidx];
- result += qr * qr;
- }
- break;
- case 2:
- for (int j = 0 ; j < block_size ; j++)
- {
- //result += popcount((uint)s_query[lidy * max_desc_len + block_index * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
- result += bit1Count((uint)s_query[lidy * max_desc_len + block_index * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
- }
- break;
+ result += DIST(s_query[lidy * max_desc_len + block_index * block_size + j], s_train[j * block_size + lidx]);
}
return result;
}
int query_cols,
int train_rows,
int train_cols,
- int step,
- int distType
+ int step
)
{
//synchronize to make sure each elem for reduceIteration in share memory is written already.
barrier(CLK_LOCAL_MEM_FENCE);
- result += reduce_multi_block(s_query, s_train, i, lidx, lidy, distType);
+ result += reduce_multi_block(s_query, s_train, i, lidx, lidy);
barrier(CLK_LOCAL_MEM_FENCE);
}
int query_cols,
int train_rows,
int train_cols,
- int step,
- int distType
+ int step
)
{
const int lidx = get_local_id(0);
barrier(CLK_LOCAL_MEM_FENCE);
- result += reduce_block(s_query, s_train, lidx, lidy, distType);
+ result += reduce_block(s_query, s_train, lidx, lidy);
barrier(CLK_LOCAL_MEM_FENCE);
}
int train_cols,
int bestTrainIdx_cols,
int step,
- int ostep,
- int distType
+ int ostep
)
{
const int lidx = get_local_id(0);
//synchronize to make sure each elem for reduceIteration in share memory is written already.
barrier(CLK_LOCAL_MEM_FENCE);
- result += reduce_block(s_query, s_train, lidx, lidy, distType);
+ result += reduce_block(s_query, s_train, lidx, lidy);
barrier(CLK_LOCAL_MEM_FENCE);
}
int train_cols,
int bestTrainIdx_cols,
int step,
- int ostep,
- int distType
+ int ostep
)
{
const int lidx = get_local_id(0);
//synchronize to make sure each elem for reduceIteration in share memory is written already.
barrier(CLK_LOCAL_MEM_FENCE);
- result += reduce_block(s_query, s_train, lidx, lidy, distType);
+ result += reduce_block(s_query, s_train, lidx, lidy);
barrier(CLK_LOCAL_MEM_FENCE);
}
int query_cols,
int train_rows,
int train_cols,
- int step,
- int distType
+ int step
)
{
const int lidx = get_local_id(0);
//synchronize to make sure each elem for reduceIteration in share memory is written already.
barrier(CLK_LOCAL_MEM_FENCE);
- result += reduce_multi_block(s_query, s_train, i, lidx, lidy, distType);
+ result += reduce_multi_block(s_query, s_train, i, lidx, lidy);
barrier(CLK_LOCAL_MEM_FENCE);
}
int query_cols,
int train_rows,
int train_cols,
- int step,
- int distType
+ int step
)
{
const int lidx = get_local_id(0);
barrier(CLK_LOCAL_MEM_FENCE);
- result += reduce_block(s_query, s_train, lidx, lidy, distType);
+ result += reduce_block(s_query, s_train, lidx, lidy);
barrier(CLK_LOCAL_MEM_FENCE);
}
int query_cols,
int train_rows,
int train_cols,
- int step,
- int distType)
+ int step)
{
/* Todo */
}
int query_cols,
int train_rows,
int train_cols,
- int step,
- int distType)
+ int step)
{
/* Todo */
}