using namespace cvtest;
using namespace testing;
-int main(int argc, char** argv)
+int main(int argc, char **argv)
{
try
{
TS::ptr()->init("cv");
InitGoogleTest(&argc, argv);
- return RUN_ALL_TESTS();
- }
+ return RUN_ALL_TESTS();
+}
catch (const std::exception& e)
{
std::cerr << e.what() << std::endl;
protected:
Context();
friend class auto_ptr<Context>;
- static auto_ptr<Context> clCxt;
+ private:
+ static auto_ptr<Context> clCxt;
+ static int val;
public:
~Context();
- static int val;
- static Context *getContext();
+ void release();
+ Info::Impl* impl;
+
+ static Context* getContext();
static void setContext(Info &oclinfo);
- struct Impl;
- Impl *impl;
+
+ enum {CL_DOUBLE, CL_UNIFIED_MEM};
+ bool supportsFeature(int ftype);
+ size_t computeUnits();
+ void* oclContext();
+ void* oclCommandQueue();
};
//! Calls a kernel, by string. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
template<typename T>
void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName, const char **kernelString, void *_scalar)
{
- if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
+ if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
return;
}
static void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask, string kernelName, const char **kernelString)
{
- if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
+ if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
return;
void cv::ocl::multiply(const oclMat &src1, const oclMat &src2, oclMat &dst, double scalar)
{
- if((src1.clCxt -> impl -> double_support != 0) && (src1.depth() == CV_64F))
+ if(src1.clCxt->supportsFeature(Context::CL_DOUBLE) && (src1.depth() == CV_64F))
arithmetic_run<double>(src1, src2, dst, "arithm_mul", &arithm_mul, (void *)(&scalar));
else
arithmetic_run<float>(src1, src2, dst, "arithm_mul", &arithm_mul, (void *)(&scalar));
void cv::ocl::divide(const oclMat &src1, const oclMat &src2, oclMat &dst, double scalar)
{
- if(src1.clCxt -> impl -> double_support != 0)
+ if(src1.clCxt->supportsFeature(Context::CL_DOUBLE))
arithmetic_run<double>(src1, src2, dst, "arithm_div", &arithm_div, (void *)(&scalar));
else
arithmetic_run<float>(src1, src2, dst, "arithm_div", &arithm_div, (void *)(&scalar));
template <typename WT , typename CL_WT>
void arithmetic_scalar_run(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask, string kernelName, const char **kernelString, int isMatSubScalar)
{
- if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
+ if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
return;
static void arithmetic_scalar_run(const oclMat &src, oclMat &dst, string kernelName, const char **kernelString, double scalar)
{
- if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F)
+ if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
return;
args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
- if(src.clCxt -> impl -> double_support != 0)
+ if(src.clCxt->supportsFeature(Context::CL_DOUBLE))
args.push_back( make_pair( sizeof(cl_double), (void *)&scalar ));
else
{
}
void cv::ocl::divide(double scalar, const oclMat &src, oclMat &dst)
{
- if(src.clCxt -> impl -> double_support == 0)
+ if(!src.clCxt->supportsFeature(Context::CL_DOUBLE))
{
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
return;
void cv::ocl::compare(const oclMat &src1, const oclMat &src2, oclMat &dst , int cmpOp)
{
- if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
+ if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
{
cout << "Selected device do not support double" << endl;
return;
template <typename T>
Scalar arithmetic_sum(const oclMat &src, int type = 0)
{
- size_t groupnum = src.clCxt->impl->maxComputeUnits;
+ size_t groupnum = src.clCxt->computeUnits();
CV_Assert(groupnum != 0);
int vlen = src.oclchannels() == 3 ? 12 : 8, dbsize = groupnum * vlen;
Context *clCxt = src.clCxt;
typedef Scalar (*sumFunc)(const oclMat &src, int type);
Scalar cv::ocl::sum(const oclMat &src)
{
- if(src.clCxt->impl->double_support == 0 && src.depth() == CV_64F)
+ if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "select device don't support double");
}
};
sumFunc func;
- func = functab[src.clCxt->impl->double_support];
+ func = functab[(int)src.clCxt->supportsFeature(Context::CL_DOUBLE)];
return func(src, 0);
}
Scalar cv::ocl::absSum(const oclMat &src)
{
- if(src.clCxt->impl->double_support == 0 && src.depth() == CV_64F)
+ if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "select device don't support double");
}
};
sumFunc func;
- func = functab[src.clCxt->impl->double_support];
+ func = functab[(int)src.clCxt->supportsFeature(Context::CL_DOUBLE)];
return func(src, 1);
}
Scalar cv::ocl::sqrSum(const oclMat &src)
{
- if(src.clCxt->impl->double_support == 0 && src.depth() == CV_64F)
+ if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "select device don't support double");
}
};
sumFunc func;
- func = functab[src.clCxt->impl->double_support];
+ func = functab[(int)src.clCxt->supportsFeature(Context::CL_DOUBLE)];
return func(src, 2);
}
//////////////////////////////////////////////////////////////////////////////
template <typename T> void arithmetic_minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask)
{
- size_t groupnum = src.clCxt->impl->maxComputeUnits;
+ size_t groupnum = src.clCxt->computeUnits();
CV_Assert(groupnum != 0);
groupnum = groupnum * 2;
int vlen = 8;
void cv::ocl::minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask)
{
CV_Assert(src.oclchannels() == 1);
- if(src.clCxt->impl->double_support == 0 && src.depth() == CV_64F)
+ if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "select device don't support double");
}
//////////////////////////////////////////////////////////////////////////////
static void arithmetic_flip_rows_run(const oclMat &src, oclMat &dst, string kernelName)
{
- if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F)
+ if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
return;
}
static void arithmetic_flip_cols_run(const oclMat &src, oclMat &dst, string kernelName, bool isVertical)
{
- if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F)
+ if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
return;
CV_Assert( src.type() == CV_32F || src.type() == CV_64F);
Context *clCxt = src.clCxt;
- if(clCxt -> impl -> double_support == 0 && src.type() == CV_64F)
+ if(!clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
return;
//////////////////////////////////////////////////////////////////////////////
static void arithmetic_magnitude_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName)
{
- if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
+ if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
return;
static void arithmetic_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName, const char **kernelString)
{
- if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
+ if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
return;
static void arithmetic_cartToPolar_run(const oclMat &src1, const oclMat &src2, oclMat &dst_mag, oclMat &dst_cart,
string kernelName, bool angleInDegrees)
{
- if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
+ if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
return;
static void arithmetic_ptc_run(const oclMat &src1, const oclMat &src2, oclMat &dst1, oclMat &dst2, bool angleInDegrees,
string kernelName)
{
- if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
+ if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
return;
Point *minLoc, Point *maxLoc, const oclMat &mask)
{
CV_Assert(src.oclchannels() == 1);
- size_t groupnum = src.clCxt->impl->maxComputeUnits;
+ size_t groupnum = src.clCxt->computeUnits();
CV_Assert(groupnum != 0);
int minloc = -1 , maxloc = -1;
int vlen = 4, dbsize = groupnum * vlen * 4 * sizeof(T) ;
void cv::ocl::minMaxLoc(const oclMat &src, double *minVal, double *maxVal,
Point *minLoc, Point *maxLoc, const oclMat &mask)
{
- if(src.clCxt->impl->double_support == 0 && src.depth() == CV_64F)
+ if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "select device don't support double");
}
};
minMaxLocFunc func;
- func = functab[src.clCxt->impl->double_support];
+ func = functab[(int)src.clCxt->supportsFeature(Context::CL_DOUBLE)];
func(src, minVal, maxVal, minLoc, maxLoc, mask);
}
int cv::ocl::countNonZero(const oclMat &src)
{
- size_t groupnum = src.clCxt->impl->maxComputeUnits;
- if(src.clCxt->impl->double_support == 0 && src.depth() == CV_64F)
+ size_t groupnum = src.clCxt->computeUnits();
+ if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "select device don't support double");
}
void cv::ocl::bitwise_not(const oclMat &src, oclMat &dst)
{
- if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F)
+ if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F)
{
cout << "Selected device do not support double" << endl;
return;
void cv::ocl::bitwise_or(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask)
{
// dst.create(src1.size(),src1.type());
- if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
+ if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
{
cout << "Selected device do not support double" << endl;
return;
void cv::ocl::bitwise_or(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask)
{
- if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
+ if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
{
cout << "Selected device do not support double" << endl;
return;
void cv::ocl::bitwise_and(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask)
{
// dst.create(src1.size(),src1.type());
- if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
+ if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
{
cout << "Selected device do not support double" << endl;
return;
void cv::ocl::bitwise_and(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask)
{
- if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
+ if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
{
cout << "Selected device do not support double" << endl;
return;
void cv::ocl::bitwise_xor(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask)
{
- if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
+ if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
{
cout << "Selected device do not support double" << endl;
return;
void cv::ocl::bitwise_xor(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask)
{
- if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
+ if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
{
cout << "Selected device do not support double" << endl;
return;
#define BLOCK_ROWS (256/TILE_DIM)
static void transpose_run(const oclMat &src, oclMat &dst, string kernelName)
{
- if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F)
+ if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
return;
args.push_back( make_pair( sizeof(cl_int), (void *)&src2_step ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src2.offset));
- if(src1.clCxt -> impl -> double_support != 0)
+ if(src1.clCxt->supportsFeature(Context::CL_DOUBLE))
{
args.push_back( make_pair( sizeof(cl_double), (void *)&alpha ));
args.push_back( make_pair( sizeof(cl_double), (void *)&beta ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows ));
args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
- if(src1.clCxt -> impl -> double_support == 0)
+ if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE))
{
float pf = p;
args.push_back( make_pair( sizeof(cl_float), (void *)&pf ));
}
void cv::ocl::pow(const oclMat &x, double p, oclMat &y)
{
- if(x.clCxt -> impl -> double_support == 0 && x.type() == CV_64F)
+ if(!x.clCxt->supportsFeature(Context::CL_DOUBLE) && x.type() == CV_64F)
{
cout << "Selected device do not support double" << endl;
return;
{
openCLFree(counter);
}
- counter = clCreateBuffer( Context::getContext()->impl->clContext, CL_MEM_COPY_HOST_PTR, sizeof(int), counter_i, &err );
+ counter = clCreateBuffer( (cl_context)getoclContext(), CL_MEM_COPY_HOST_PTR, sizeof(int), counter_i, &err );
openCLSafeCall(err);
}
void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, void *counter, int rows, int cols)
{
unsigned int count;
- openCLSafeCall(clEnqueueReadBuffer(Context::getContext()->impl->clCmdQueue, (cl_mem)counter, 1, 0, sizeof(float), &count, 0, NULL, NULL));
+ openCLSafeCall(clEnqueueReadBuffer((cl_command_queue)getoclCommandQueue(), (cl_mem)counter, 1, 0, sizeof(float), &count, 0, NULL, NULL));
Context *clCxt = map.clCxt;
string kernelName = "edgesHysteresisGlobal";
vector< pair<size_t, const void *> > args;
int count_i[1] = {0};
while(count > 0)
{
- openCLSafeCall(clEnqueueWriteBuffer(Context::getContext()->impl->clCmdQueue, (cl_mem)counter, 1, 0, sizeof(int), &count_i, 0, NULL, NULL));
+ openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)getoclCommandQueue(), (cl_mem)counter, 1, 0, sizeof(int), &count_i, 0, NULL, NULL));
args.clear();
size_t globalThreads[3] = {std::min(count, 65535u) * 128, DIVUP(count, 65535), 1};
args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));
openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, DISABLE);
- openCLSafeCall(clEnqueueReadBuffer(Context::getContext()->impl->clCmdQueue, (cl_mem)counter, 1, 0, sizeof(int), &count, 0, NULL, NULL));
+ openCLSafeCall(clEnqueueReadBuffer((cl_command_queue)getoclCommandQueue(), (cl_mem)counter, 1, 0, sizeof(int), &count, 0, NULL, NULL));
std::swap(st1, st2);
}
#undef DIVUP
clStridesIn[2] = is_row_dft ? clStridesIn[1] : dft_size.width * clStridesIn[1];
clStridesOut[2] = is_row_dft ? clStridesOut[1] : dft_size.width * clStridesOut[1];
- openCLSafeCall( clAmdFftCreateDefaultPlan( &plHandle, Context::getContext()->impl->clContext, dim, clLengthsIn ) );
+ openCLSafeCall( clAmdFftCreateDefaultPlan( &plHandle, (cl_context)getoclContext(), dim, clLengthsIn ) );
openCLSafeCall( clAmdFftSetResultLocation( plHandle, CLFFT_OUTOFPLACE ) );
openCLSafeCall( clAmdFftSetLayout( plHandle, inLayout, outLayout ) );
openCLSafeCall( clAmdFftSetPlanScale ( plHandle, is_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, scale_ ) );
//ready to bake
- openCLSafeCall( clAmdFftBakePlan( plHandle, 1, &(Context::getContext()->impl->clCmdQueue), NULL, NULL ) );
+ cl_command_queue clq = (cl_command_queue)getoclCommandQueue();
+ openCLSafeCall( clAmdFftBakePlan( plHandle, 1, &clq, NULL, NULL ) );
}
cv::ocl::FftPlan::~FftPlan()
{
if (buffersize)
{
cl_int medstatus;
- clMedBuffer = clCreateBuffer ( src.clCxt->impl->clContext, CL_MEM_READ_WRITE, buffersize, 0, &medstatus);
+ clMedBuffer = clCreateBuffer ( (cl_context)src.clCxt->oclContext(), CL_MEM_READ_WRITE, buffersize, 0, &medstatus);
openCLSafeCall( medstatus );
}
+ cl_command_queue clq = (cl_command_queue)src.clCxt->oclCommandQueue();
openCLSafeCall( clAmdFftEnqueueTransform( plHandle,
is_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD,
1,
- &src.clCxt->impl->clCmdQueue,
+ &clq,
0, NULL, NULL,
(cl_mem *)&src.data, (cl_mem *)&dst.data, clMedBuffer ) );
- openCLSafeCall( clFinish(src.clCxt->impl->clCmdQueue) );
+ openCLSafeCall( clFinish(clq) );
if(clMedBuffer)
{
openCLFree(clMedBuffer);
void cv::ocl::Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize, double scale)
{
- if (src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F)
+ if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
return;
int offb = src2.offset;
int offc = dst.offset;
-
+ cl_command_queue clq = (cl_command_queue)src1.clCxt->oclCommandQueue();
switch(src1.type())
{
case CV_32FC1:
offa /= sizeof(float);
offb /= sizeof(float);
offc /= sizeof(float);
+
openCLSafeCall
(
clAmdBlasSgemmEx(order, transA, transB, M, N, K,
alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
- beta, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
+ beta, (cl_mem)dst.data, offc, ldc, 1, &clq, 0, NULL, NULL)
);
break;
case CV_64FC1:
(
clAmdBlasDgemmEx(order, transA, transB, M, N, K,
alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
- beta, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
+ beta, (cl_mem)dst.data, offc, ldc, 1, &clq, 0, NULL, NULL)
);
break;
case CV_32FC2:
(
clAmdBlasCgemmEx(order, transA, transB, M, N, K,
alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
- beta_2, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
+ beta_2, (cl_mem)dst.data, offc, ldc, 1, &clq, 0, NULL, NULL)
);
}
break;
(
clAmdBlasZgemmEx(order, transA, transB, M, N, K,
alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
- beta_2, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
+ beta_2, (cl_mem)dst.data, offc, ldc, 1, &clq, 0, NULL, NULL)
);
}
break;
size_t blocksize = 8;
size_t localThreads[3] = { blocksize, blocksize , 1 };
- size_t globalThreads[3] = { grp_per_CU *((gsum.clCxt)->impl->maxComputeUnits) *localThreads[0],
+ size_t globalThreads[3] = { grp_per_CU *((gsum.clCxt)->computeUnits()) *localThreads[0],
localThreads[1], 1
};
int outputsz = 256 * globalThreads[0] / localThreads[0];
stagebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(GpuHidHaarStageClassifier) * gcascade->count);
//openCLVerifyCall(status);
- openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL));
+ openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL));
//classifierbuffer = clCreateBuffer(gsum.clCxt->clContext,CL_MEM_READ_ONLY,sizeof(GpuHidHaarClassifier)*totalclassifier,NULL,&status);
//status = clEnqueueWriteBuffer(gsum.clCxt->clCmdQueue,classifierbuffer,1,0,sizeof(GpuHidHaarClassifier)*totalclassifier,classifier,0,NULL,NULL);
nodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, nodenum * sizeof(GpuHidHaarTreeNode));
//openCLVerifyCall(status);
- openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, nodebuffer, 1, 0,
+ openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), nodebuffer, 1, 0,
nodenum * sizeof(GpuHidHaarTreeNode),
node, 0, NULL, NULL));
candidatebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_WRITE_ONLY, 4 * sizeof(int) * outputsz);
//openCLVerifyCall(status);
scaleinfobuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(detect_piramid_info) * loopcount);
//openCLVerifyCall(status);
- openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, scaleinfobuffer, 1, 0, sizeof(detect_piramid_info)*loopcount, scaleinfo, 0, NULL, NULL));
+ openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), scaleinfobuffer, 1, 0, sizeof(detect_piramid_info)*loopcount, scaleinfo, 0, NULL, NULL));
//flag = 1;
//}
int grp_per_CU = 12;
size_t blocksize = 8;
size_t localThreads[3] = { blocksize, blocksize , 1 };
- size_t globalThreads[3] = { grp_per_CU *gsum.clCxt->impl->maxComputeUnits *localThreads[0],
+ size_t globalThreads[3] = { grp_per_CU *gsum.clCxt->computeUnits() *localThreads[0],
localThreads[1], 1
};
int outputsz = 256 * globalThreads[0] / localThreads[0];
nodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY,
nodenum * sizeof(GpuHidHaarTreeNode));
//openCLVerifyCall(status);
- openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, nodebuffer, 1, 0,
+ openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), nodebuffer, 1, 0,
nodenum * sizeof(GpuHidHaarTreeNode),
node, 0, NULL, NULL));
cl_mem newnodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_WRITE,
int splitnode = stage[0].count + stage[1].count + stage[2].count;
stagebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(GpuHidHaarStageClassifier) * gcascade->count);
//openCLVerifyCall(status);
- openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL));
+ openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL));
candidatebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, 4 * sizeof(int) * outputsz);
//openCLVerifyCall(status);
scaleinfobuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(detect_piramid_info) * loopcount);
//openCLVerifyCall(status);
- openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, scaleinfobuffer, 1, 0, sizeof(detect_piramid_info)*loopcount, scaleinfo, 0, NULL, NULL));
+ openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), scaleinfobuffer, 1, 0, sizeof(detect_piramid_info)*loopcount, scaleinfo, 0, NULL, NULL));
pbuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(cl_int4) * loopcount);
- openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, pbuffer, 1, 0, sizeof(cl_int4)*loopcount, p, 0, NULL, NULL));
+ openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), pbuffer, 1, 0, sizeof(cl_int4)*loopcount, p, 0, NULL, NULL));
correctionbuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(cl_float) * loopcount);
- openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, correctionbuffer, 1, 0, sizeof(cl_float)*loopcount, correction, 0, NULL, NULL));
+ openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), correctionbuffer, 1, 0, sizeof(cl_float)*loopcount, correction, 0, NULL, NULL));
//int argcount = 0;
vector<pair<size_t, const void *> > args;
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1);
//openCLSafeCall(clEnqueueReadBuffer(gsum.clCxt->clCmdQueue,candidatebuffer,1,0,4*sizeof(int)*outputsz,candidate,0,NULL,NULL));
- candidate = (int *)clEnqueueMapBuffer(gsum.clCxt->impl->clCmdQueue, candidatebuffer, 1, CL_MAP_READ, 0, 4 * sizeof(int), 0, 0, 0, &status);
+ candidate = (int *)clEnqueueMapBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), candidatebuffer, 1, CL_MAP_READ, 0, 4 * sizeof(int), 0, 0, 0, &status);
for(int i = 0; i < outputsz; i++)
{
free(scaleinfo);
free(p);
free(correction);
- clEnqueueUnmapMemObject(gsum.clCxt->impl->clCmdQueue, candidatebuffer, candidate, 0, 0, 0);
+ clEnqueueUnmapMemObject((cl_command_queue)gsum.clCxt->oclCommandQueue(), candidatebuffer, candidate, 0, 0, 0);
openCLSafeCall(clReleaseMemObject(stagebuffer));
openCLSafeCall(clReleaseMemObject(scaleinfobuffer));
openCLSafeCall(clReleaseMemObject(nodebuffer));
args.push_back( make_pair(sizeof(cl_int), (void *)&map1.rows));
args.push_back( make_pair(sizeof(cl_int), (void *)&cols));
float borderFloat[4] = {(float)borderValue[0], (float)borderValue[1], (float)borderValue[2], (float)borderValue[3]};
-
- if(src.clCxt -> impl -> double_support != 0)
+
+ if(src.clCxt->supportsFeature(Context::CL_DOUBLE))
{
args.push_back( make_pair(sizeof(cl_double4), (void *)&borderValue));
}
args.push_back( make_pair(sizeof(cl_int), (void *)&map1.cols));
args.push_back( make_pair(sizeof(cl_int), (void *)&map1.rows));
args.push_back( make_pair(sizeof(cl_int), (void *)&cols));
- if(src.clCxt -> impl -> double_support != 0)
+ if(src.clCxt->supportsFeature(Context::CL_DOUBLE))
{
args.push_back( make_pair(sizeof(cl_double4), (void *)&borderValue));
}
args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows));
- if(src.clCxt -> impl -> double_support != 0)
+ if(src.clCxt->supportsFeature(Context::CL_DOUBLE))
{
args.push_back( make_pair(sizeof(cl_double), (void *)&ifx_d));
args.push_back( make_pair(sizeof(cl_double), (void *)&ify_d));
string kernelName = "warpAffine" + s[interpolation];
- if(src.clCxt -> impl -> double_support != 0)
+ if(src.clCxt->supportsFeature(Context::CL_DOUBLE))
{
cl_int st;
- coeffs_cm = clCreateBuffer( clCxt->impl->clContext, CL_MEM_READ_WRITE, sizeof(F) * 2 * 3, NULL, &st );
+ coeffs_cm = clCreateBuffer( (cl_context)clCxt->oclContext(), CL_MEM_READ_WRITE, sizeof(F) * 2 * 3, NULL, &st );
openCLVerifyCall(st);
- openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)coeffs_cm, 1, 0, sizeof(F) * 2 * 3, coeffs, 0, 0, 0));
+ openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)clCxt->oclCommandQueue(), (cl_mem)coeffs_cm, 1, 0, sizeof(F) * 2 * 3, coeffs, 0, 0, 0));
}
else
{
{
float_coeffs[m][n] = coeffs[m][n];
}
- coeffs_cm = clCreateBuffer( clCxt->impl->clContext, CL_MEM_READ_WRITE, sizeof(float) * 2 * 3, NULL, &st );
- openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)coeffs_cm, 1, 0, sizeof(float) * 2 * 3, float_coeffs, 0, 0, 0));
+ coeffs_cm = clCreateBuffer( (cl_context)clCxt->oclContext(), CL_MEM_READ_WRITE, sizeof(float) * 2 * 3, NULL, &st );
+ openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)clCxt->oclCommandQueue(), (cl_mem)coeffs_cm, 1, 0, sizeof(float) * 2 * 3, float_coeffs, 0, 0, 0));
}
//TODO: improve this kernel
string s[3] = {"NN", "Linear", "Cubic"};
string kernelName = "warpPerspective" + s[interpolation];
- if(src.clCxt -> impl -> double_support != 0)
+ if(src.clCxt->supportsFeature(Context::CL_DOUBLE))
{
cl_int st;
- coeffs_cm = clCreateBuffer( clCxt->impl->clContext, CL_MEM_READ_WRITE, sizeof(double) * 3 * 3, NULL, &st );
+ coeffs_cm = clCreateBuffer((cl_context) clCxt->oclContext(), CL_MEM_READ_WRITE, sizeof(double) * 3 * 3, NULL, &st );
openCLVerifyCall(st);
- openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)coeffs_cm, 1, 0, sizeof(double) * 3 * 3, coeffs, 0, 0, 0));
+ openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)clCxt->oclCommandQueue(), (cl_mem)coeffs_cm, 1, 0, sizeof(double) * 3 * 3, coeffs, 0, 0, 0));
}
else
{
for(int n = 0; n < 3; n++)
float_coeffs[m][n] = coeffs[m][n];
- coeffs_cm = clCreateBuffer( clCxt->impl->clContext, CL_MEM_READ_WRITE, sizeof(float) * 3 * 3, NULL, &st );
+ coeffs_cm = clCreateBuffer((cl_context) clCxt->oclContext(), CL_MEM_READ_WRITE, sizeof(float) * 3 * 3, NULL, &st );
openCLVerifyCall(st);
- openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)coeffs_cm, 1, 0, sizeof(float) * 3 * 3, float_coeffs, 0, 0, 0));
+ openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)clCxt->oclCommandQueue(), (cl_mem)coeffs_cm, 1, 0, sizeof(float) * 3 * 3, float_coeffs, 0, 0, 0));
}
//TODO: improve this kernel
size_t blkSizeX = 16, blkSizeY = 16;
void integral(const oclMat &src, oclMat &sum, oclMat &sqsum)
{
CV_Assert(src.type() == CV_8UC1);
- if(src.clCxt->impl->double_support == 0 && src.depth() == CV_64F)
+ if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "select device don't support double");
}
void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize,
double k, int borderType)
{
- if(src.clCxt->impl->double_support == 0 && src.depth() == CV_64F)
+ if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "select device don't support double");
}
void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int borderType)
{
- if(src.clCxt->impl->double_support == 0 && src.depth() == CV_64F)
+ if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "select device don't support double");
}
if( src.depth() != CV_8U || src.oclchannels() != 4 )
CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" );
- // if(src.clCxt->impl->double_support == 0)
+ // if(!src.clCxt->supportsFeature(Context::CL_DOUBLE))
// {
// CV_Error( CV_GpuNotSupported, "Selected device doesn't support double, so a deviation exists.\nIf the accuracy is acceptable, the error can be ignored.\n");
// }
if( src.depth() != CV_8U || src.oclchannels() != 4 )
CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" );
- // if(src.clCxt->impl->double_support == 0)
+ // if(!src.clCxt->supportsFeature(Context::CL_DOUBLE))
// {
// CV_Error( CV_GpuNotSupported, "Selected device doesn't support double, so a deviation exists.\nIf the accuracy is acceptable, the error can be ignored.\n");
// }
ProgramCache *programCache = NULL;
DevMemType gDeviceMemType = DEVICE_MEM_DEFAULT;
DevMemRW gDeviceMemRW = DEVICE_MEM_R_W;
- int gDevMemTypeValueMap[5] = {0,
+ int gDevMemTypeValueMap[5] = {0,
CL_MEM_ALLOC_HOST_PTR,
CL_MEM_USE_HOST_PTR,
CL_MEM_COPY_HOST_PTR,
cacheSize = 0;
}
- ////////////////////////Common OpenCL specific calls///////////////
- int getDevMemType(DevMemRW& rw_type, DevMemType& mem_type)
- {
- rw_type = gDeviceMemRW;
- mem_type = gDeviceMemType;
- return Context::getContext()->impl->unified_memory;
- }
- int setDevMemType(DevMemRW rw_type, DevMemType mem_type)
- {
- if( (mem_type == DEVICE_MEM_PM && Context::getContext()->impl->unified_memory == 0) ||
- mem_type == DEVICE_MEM_UHP ||
- mem_type == DEVICE_MEM_CHP )
- return -1;
- gDeviceMemRW = rw_type;
- gDeviceMemType = mem_type;
- return 0;
- }
-
- struct Info::Impl
+ struct Info::Impl
{
cl_platform_id oclplatform;
std::vector<cl_device_id> devices;
cl_context oclcontext;
cl_command_queue clCmdQueue;
int devnum;
- cl_uint maxDimensions;
size_t maxWorkGroupSize;
- size_t *maxWorkItemSizes;
+ cl_uint maxDimensions; // == maxWorkItemSizes.size()
+ std::vector<size_t> maxWorkItemSizes;
cl_uint maxComputeUnits;
char extra_options[512];
int double_support;
+ int unified_memory; //1 means integrated GPU, otherwise this value is 0
+ string binpath;
+ int refcounter;
+
Impl()
{
+ refcounter = 1;
+ oclplatform = 0;
+ oclcontext = 0;
+ clCmdQueue = 0;
+ devnum = -1;
+ maxComputeUnits = 0;
+ maxWorkGroupSize = 0;
memset(extra_options, 0, 512);
+ double_support = 0;
+ unified_memory = 0;
+ }
+
+ void setDevice(void *ctx, void *q, int devnum);
+
+ void release()
+ {
+ if(1 == CV_XADD(&refcounter, -1))
+ {
+ releaseResources();
+ delete this;
+ }
+ }
+
+ Impl* copy()
+ {
+ CV_XADD(&refcounter, 1);
+ return this;
}
+
+ private:
+ Impl(const Impl&);
+ Impl& operator=(const Impl&);
+ void releaseResources();
};
+ void Info::Impl::releaseResources()
+ {
+ devnum = -1;
+
+ if(clCmdQueue)
+ {
+ openCLSafeCall(clReleaseCommandQueue(clCmdQueue));
+ clCmdQueue = 0;
+ }
+
+ if(oclcontext)
+ {
+ openCLSafeCall(clReleaseContext(oclcontext));
+ oclcontext = 0;
+ }
+ }
+
+ void Info::Impl::setDevice(void *ctx, void *q, int dnum)
+ {
+ if((ctx && q) || devnum != dnum)
+ releaseResources();
+
+ CV_Assert(dnum >= 0 && dnum < (int)devices.size());
+ devnum = dnum;
+ if(ctx && q)
+ {
+ oclcontext = (cl_context)ctx;
+ clCmdQueue = (cl_command_queue)q;
+ clRetainContext(oclcontext);
+ clRetainCommandQueue(clCmdQueue);
+ }
+ else
+ {
+ cl_int status = 0;
+ cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)(oclplatform), 0 };
+ oclcontext = clCreateContext(cps, 1, &devices[devnum], 0, 0, &status);
+ openCLVerifyCall(status);
+ clCmdQueue = clCreateCommandQueue(oclcontext, devices[devnum], CL_QUEUE_PROFILING_ENABLE, &status);
+ openCLVerifyCall(status);
+ }
+
+ openCLSafeCall(clGetDeviceInfo(devices[devnum], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), (void *)&maxWorkGroupSize, 0));
+ openCLSafeCall(clGetDeviceInfo(devices[devnum], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint), (void *)&maxDimensions, 0));
+ maxWorkItemSizes.resize(maxDimensions);
+ openCLSafeCall(clGetDeviceInfo(devices[devnum], CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t)*maxDimensions, (void *)&maxWorkItemSizes[0], 0));
+ openCLSafeCall(clGetDeviceInfo(devices[devnum], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), (void *)&maxComputeUnits, 0));
+
+ cl_bool unfymem = false;
+ openCLSafeCall(clGetDeviceInfo(devices[devnum], CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof(cl_bool), (void *)&unfymem, 0));
+ unified_memory = unfymem ? 1 : 0;
+
+ //initialize extra options for compilation. Currently only fp64 is included.
+ //Assume 4KB is enough to store all possible extensions.
+ const int EXT_LEN = 4096 + 1 ;
+ char extends_set[EXT_LEN];
+ size_t extends_size;
+ openCLSafeCall(clGetDeviceInfo(devices[devnum], CL_DEVICE_EXTENSIONS, EXT_LEN, (void *)extends_set, &extends_size));
+ extends_set[EXT_LEN - 1] = 0;
+ size_t fp64_khr = std::string(extends_set).find("cl_khr_fp64");
+
+ if(fp64_khr != std::string::npos)
+ {
+ sprintf(extra_options, "-D DOUBLE_SUPPORT");
+ double_support = 1;
+ }
+ else
+ {
+ memset(extra_options, 0, 512);
+ double_support = 0;
+ }
+ }
+
+ ////////////////////////Common OpenCL specific calls///////////////
+ int getDevMemType(DevMemRW& rw_type, DevMemType& mem_type)
+ {
+ rw_type = gDeviceMemRW;
+ mem_type = gDeviceMemType;
+ return Context::getContext()->impl->unified_memory;
+ }
+
+ int setDevMemType(DevMemRW rw_type, DevMemType mem_type)
+ {
+ if( (mem_type == DEVICE_MEM_PM && Context::getContext()->impl->unified_memory == 0) ||
+ mem_type == DEVICE_MEM_UHP ||
+ mem_type == DEVICE_MEM_CHP )
+ return -1;
+ gDeviceMemRW = rw_type;
+ gDeviceMemType = mem_type;
+ return 0;
+ }
+
inline int divUp(int total, int grain)
{
return (total + grain - 1) / grain;
int getDevice(std::vector<Info> &oclinfo, int devicetype)
{
+ //TODO: cache oclinfo vector
+ oclinfo.clear();
+
switch(devicetype)
{
case CVCL_DEVICE_TYPE_DEFAULT:
case CVCL_DEVICE_TYPE_ALL:
break;
default:
- CV_Error(CV_GpuApiCallError, "Unkown device type");
+ return 0;
}
- int devcienums = 0;
+
// Platform info
- cl_int status = 0;
cl_uint numPlatforms;
- Info ocltmpinfo;
- openCLSafeCall(clGetPlatformIDs(0, NULL, &numPlatforms));
- CV_Assert(numPlatforms > 0);
- cl_platform_id *platforms = new cl_platform_id[numPlatforms];
+ openCLSafeCall(clGetPlatformIDs(0, 0, &numPlatforms));
+ if(numPlatforms < 1) return 0;
+
+ std::vector<cl_platform_id> platforms(numPlatforms);
+ openCLSafeCall(clGetPlatformIDs(numPlatforms, &platforms[0], 0));
- openCLSafeCall(clGetPlatformIDs(numPlatforms, platforms, NULL));
char deviceName[256];
+ int devcienums = 0;
for (unsigned i = 0; i < numPlatforms; ++i)
{
cl_uint numsdev;
- status = clGetDeviceIDs(platforms[i], devicetype, 0, NULL, &numsdev);
+ cl_int status = clGetDeviceIDs(platforms[i], devicetype, 0, NULL, &numsdev);
if(status != CL_DEVICE_NOT_FOUND)
- {
openCLVerifyCall(status);
- }
+
if(numsdev > 0)
{
devcienums += numsdev;
- cl_device_id *devices = new cl_device_id[numsdev];
- openCLSafeCall(clGetDeviceIDs(platforms[i], devicetype, numsdev, devices, NULL));
+ std::vector<cl_device_id> devices(numsdev);
+ openCLSafeCall(clGetDeviceIDs(platforms[i], devicetype, numsdev, &devices[0], 0));
+
+ Info ocltmpinfo;
ocltmpinfo.impl->oclplatform = platforms[i];
- for(unsigned j = 0; j < numsdev; j++)
+ for(unsigned j = 0; j < numsdev; ++j)
{
ocltmpinfo.impl->devices.push_back(devices[j]);
- openCLSafeCall(clGetDeviceInfo(devices[j], CL_DEVICE_NAME, 256, deviceName, NULL));
- ocltmpinfo.impl->devName.push_back(std::string(deviceName));
- ocltmpinfo.DeviceName.push_back(std::string(deviceName));
+ openCLSafeCall(clGetDeviceInfo(devices[j], CL_DEVICE_NAME, sizeof(deviceName), deviceName, 0));
+ ocltmpinfo.impl->devName.push_back(deviceName);
+ ocltmpinfo.DeviceName.push_back(deviceName);
}
- delete[] devices;
oclinfo.push_back(ocltmpinfo);
- ocltmpinfo.release();
}
}
- delete[] platforms;
- if(devcienums > 0)
- {
- setDevice(oclinfo[0]);
- }
return devcienums;
}
- static void fillClcontext(Info &oclinfo)
- {
- //get device information
- size_t devnum = oclinfo.impl->devnum;
-
- openCLSafeCall(clGetDeviceInfo(oclinfo.impl->devices[devnum], CL_DEVICE_MAX_WORK_GROUP_SIZE,
- sizeof(size_t), (void *)&oclinfo.impl->maxWorkGroupSize, NULL));
- openCLSafeCall(clGetDeviceInfo(oclinfo.impl->devices[devnum], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
- sizeof(cl_uint), (void *)&oclinfo.impl->maxDimensions, NULL));
- oclinfo.impl->maxWorkItemSizes = new size_t[oclinfo.impl->maxDimensions];
- openCLSafeCall(clGetDeviceInfo(oclinfo.impl->devices[devnum], CL_DEVICE_MAX_WORK_ITEM_SIZES,
- sizeof(size_t)*oclinfo.impl->maxDimensions, (void *)oclinfo.impl->maxWorkItemSizes, NULL));
- openCLSafeCall(clGetDeviceInfo(oclinfo.impl->devices[devnum], CL_DEVICE_MAX_COMPUTE_UNITS,
- sizeof(cl_uint), (void *)&oclinfo.impl->maxComputeUnits, NULL));
- //initialize extra options for compilation. Currently only fp64 is included.
- //Assume 4KB is enough to store all possible extensions.
-
- const int EXT_LEN = 4096 + 1 ;
- char extends_set[EXT_LEN];
- size_t extends_size;
- openCLSafeCall(clGetDeviceInfo(oclinfo.impl->devices[devnum], CL_DEVICE_EXTENSIONS,
- EXT_LEN, (void *)extends_set, &extends_size));
- CV_Assert(extends_size < (size_t)EXT_LEN);
- extends_set[EXT_LEN - 1] = 0;
- memset(oclinfo.impl->extra_options, 0, 512);
- oclinfo.impl->double_support = 0;
- int fp64_khr = string(extends_set).find("cl_khr_fp64");
-
- if(fp64_khr >= 0 && fp64_khr < EXT_LEN)
- {
- sprintf(oclinfo.impl->extra_options , "-D DOUBLE_SUPPORT");
- oclinfo.impl -> double_support = 1;
- }
- Context::setContext(oclinfo);
-
- }
-
void setDevice(Info &oclinfo, int devnum)
{
- CV_Assert(devnum >= 0);
- cl_int status = 0;
- cl_context_properties cps[3] =
- {
- CL_CONTEXT_PLATFORM, (cl_context_properties)(oclinfo.impl->oclplatform), 0
- };
- oclinfo.impl->devnum = devnum;
- oclinfo.impl->oclcontext = clCreateContext(cps, 1, &oclinfo.impl->devices[devnum], NULL, NULL, &status);
- openCLVerifyCall(status);
- //create the command queue using the first device of the list
- oclinfo.impl->clCmdQueue = clCreateCommandQueue(oclinfo.impl->oclcontext, oclinfo.impl->devices[devnum],
- CL_QUEUE_PROFILING_ENABLE, &status);
- openCLVerifyCall(status);
- fillClcontext(oclinfo);
+ oclinfo.impl->setDevice(0, 0, devnum);
+ Context::setContext(oclinfo);
}
void setDeviceEx(Info &oclinfo, void *ctx, void *q, int devnum)
{
- CV_Assert(devnum >= 0);
- oclinfo.impl->devnum = devnum;
- if(ctx && q)
- {
- oclinfo.impl->oclcontext = (cl_context)ctx;
- oclinfo.impl->clCmdQueue = (cl_command_queue)q;
- clRetainContext((cl_context)ctx);
- clRetainCommandQueue((cl_command_queue)q);
- fillClcontext(oclinfo);
- }
+ oclinfo.impl->setDevice(ctx, q, devnum);
+ Context::setContext(oclinfo);
}
void *getoclContext()
{
- return &(Context::getContext()->impl->clContext);
+ return &(Context::getContext()->impl->oclcontext);
}
void *getoclCommandQueue()
cl_mem openCLCreateBuffer(Context *clCxt, size_t flag , size_t size)
{
cl_int status;
- cl_mem buffer = clCreateBuffer(clCxt->impl->clContext, (cl_mem_flags)flag, size, NULL, &status);
+ cl_mem buffer = clCreateBuffer(clCxt->impl->oclcontext, (cl_mem_flags)flag, size, NULL, &status);
openCLVerifyCall(status);
return buffer;
}
size_t widthInBytes, size_t height, DevMemRW rw_type, DevMemType mem_type)
{
cl_int status;
- *dev_ptr = clCreateBuffer(clCxt->impl->clContext, gDevMemRWValueMap[rw_type]|gDevMemTypeValueMap[mem_type],
+ *dev_ptr = clCreateBuffer(clCxt->impl->oclcontext, gDevMemRWValueMap[rw_type]|gDevMemTypeValueMap[mem_type],
widthInBytes * height, 0, &status);
openCLVerifyCall(status);
*pitch = widthInBytes;
void setBinpath(const char *path)
{
Context *clcxt = Context::getContext();
- clcxt->impl->Binpath = path;
+ clcxt->impl->binpath = path;
}
int savetofile(const Context*, cl_program &program, const char *fileName)
if(NULL != build_options)
{
- src_sign << (int64)(*source) << clCxt->impl->clContext << "_" << build_options;
+ src_sign << (int64)(*source) << clCxt->impl->oclcontext << "_" << build_options;
}
else
{
- src_sign << (int64)(*source) << clCxt->impl->clContext;
+ src_sign << (int64)(*source) << clCxt->impl->oclcontext;
}
srcsign = src_sign.str();
strcat(all_build_options, build_options);
if(all_build_options != NULL)
{
- filename = clCxt->impl->Binpath + kernelName + "_" + clCxt->impl->devName + all_build_options + ".clb";
+ filename = clCxt->impl->binpath + kernelName + "_" + clCxt->impl->devName[clCxt->impl->devnum] + all_build_options + ".clb";
}
else
{
- filename = clCxt->impl->Binpath + kernelName + "_" + clCxt->impl->devName + ".clb";
+ filename = clCxt->impl->binpath + kernelName + "_" + clCxt->impl->devName[clCxt->impl->devnum] + ".clb";
}
FILE *fp = fopen(filename.c_str(), "rb");
- if(fp == NULL || clCxt->impl->Binpath.size() == 0) //we should generate a binary file for the first time.
+ if(fp == NULL || clCxt->impl->binpath.size() == 0) //we should generate a binary file for the first time.
{
if(fp != NULL)
fclose(fp);
program = clCreateProgramWithSource(
- clCxt->impl->clContext, 1, source, NULL, &status);
+ clCxt->impl->oclcontext, 1, source, NULL, &status);
openCLVerifyCall(status);
- status = clBuildProgram(program, 1, &(clCxt->impl->devices), all_build_options, NULL, NULL);
- if(status == CL_SUCCESS && clCxt->impl->Binpath.size())
+ status = clBuildProgram(program, 1, &(clCxt->impl->devices[clCxt->impl->devnum]), all_build_options, NULL, NULL);
+ if(status == CL_SUCCESS && clCxt->impl->binpath.size())
savetofile(clCxt, program, filename.c_str());
}
else
CV_Assert(1 == fread(binary, binarySize, 1, fp));
fclose(fp);
cl_int status = 0;
- program = clCreateProgramWithBinary(clCxt->impl->clContext,
+ program = clCreateProgramWithBinary(clCxt->impl->oclcontext,
1,
- &(clCxt->impl->devices),
+ &(clCxt->impl->devices[clCxt->impl->devnum]),
(const size_t *)&binarySize,
(const unsigned char **)&binary,
NULL,
&status);
openCLVerifyCall(status);
- status = clBuildProgram(program, 1, &(clCxt->impl->devices), all_build_options, NULL, NULL);
+ status = clBuildProgram(program, 1, &(clCxt->impl->devices[clCxt->impl->devnum]), all_build_options, NULL, NULL);
delete[] binary;
}
char *buildLog = NULL;
size_t buildLogSize = 0;
logStatus = clGetProgramBuildInfo(program,
- clCxt->impl->devices, CL_PROGRAM_BUILD_LOG, buildLogSize,
+ clCxt->impl->devices[clCxt->impl->devnum], CL_PROGRAM_BUILD_LOG, buildLogSize,
buildLog, &buildLogSize);
if(logStatus != CL_SUCCESS)
cout << "Failed to build the program and get the build info." << endl;
buildLog = new char[buildLogSize];
CV_DbgAssert(!!buildLog);
memset(buildLog, 0, buildLogSize);
- openCLSafeCall(clGetProgramBuildInfo(program, clCxt->impl->devices,
+ openCLSafeCall(clGetProgramBuildInfo(program, clCxt->impl->devices[clCxt->impl->devnum],
CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, NULL));
cout << "\n\t\t\tBUILD LOG\n";
cout << buildLog << endl;
void openCLVerifyKernel(const Context *clCxt, cl_kernel kernel, size_t *localThreads)
{
size_t kernelWorkGroupSize;
- openCLSafeCall(clGetKernelWorkGroupInfo(kernel, clCxt->impl->devices,
+ openCLSafeCall(clGetKernelWorkGroupInfo(kernel, clCxt->impl->devices[clCxt->impl->devnum],
CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &kernelWorkGroupSize, 0));
CV_Assert( (localThreads[0] <= clCxt->impl->maxWorkItemSizes[0]) &&
(localThreads[1] <= clCxt->impl->maxWorkItemSizes[1]) &&
cout << "average kernel total time: " << total_kernel_time / RUN_TIMES << endl; // "ms" << endl;
#endif
}
-
+
double openCLExecuteKernelInterop(Context *clCxt , const char **source, string kernelName,
size_t globalThreads[3], size_t localThreads[3],
- vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options,
+ vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options,
bool finish, bool measureKernelTime, bool cleanUp)
{
f.read(str, fileSize);
f.close();
str[size] = '\0';
-
+
s = str;
delete[] str;
return 0;
double openCLExecuteKernelInterop(Context *clCxt , const char **fileName, const int numFiles, string kernelName,
size_t globalThreads[3], size_t localThreads[3],
- vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options,
+ vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options,
bool finish, bool measureKernelTime, bool cleanUp)
{
delete []source;
return kernelTime;
}
-
- cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value,
+
+ cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value,
const size_t size)
{
int status;
/////////////////////////////OpenCL initialization/////////////////
auto_ptr<Context> Context::clCxt;
int Context::val = 0;
- Mutex cs;
- Context *Context::getContext()
+ static Mutex cs;
+ Context* Context::getContext()
{
- if(val == 0)
+ if(*((volatile int*)&val) != 1)
{
AutoLock al(cs);
- if( NULL == clCxt.get())
+ if(*((volatile int*)&val) != 1)
+ {
+ if( 0 == clCxt.get())
+ clCxt.reset(new Context);
+
+ std::vector<Info> oclinfo;
+ CV_Assert(getDevice(oclinfo, CVCL_DEVICE_TYPE_ALL) > 0);
+ oclinfo[0].impl->setDevice(0, 0, 0);
+ clCxt.get()->impl = oclinfo[0].impl->copy();
+
+ *((volatile int*)&val) = 1;
+ }
+ }
+ return clCxt.get();
+ }
+
+ void Context::setContext(Info &oclinfo)
+ {
+ AutoLock guard(cs);
+ if(*((volatile int*)&val) != 1)
+ {
+ if( 0 == clCxt.get())
clCxt.reset(new Context);
- val = 1;
- return clCxt.get();
+ clCxt.get()->impl = oclinfo.impl->copy();
+
+ *((volatile int*)&val) = 1;
}
else
{
- return clCxt.get();
+ clCxt.get()->impl->release();
+ clCxt.get()->impl = oclinfo.impl->copy();
}
}
- void Context::setContext(Info &oclinfo)
- {
- Context *clcxt = getContext();
- clcxt->impl->clContext = oclinfo.impl->oclcontext;
- clcxt->impl->clCmdQueue = oclinfo.impl->clCmdQueue;
- clcxt->impl->devices = oclinfo.impl->devices[oclinfo.impl->devnum];
- clcxt->impl->devName = oclinfo.impl->devName[oclinfo.impl->devnum];
- clcxt->impl->maxDimensions = oclinfo.impl->maxDimensions;
- clcxt->impl->maxWorkGroupSize = oclinfo.impl->maxWorkGroupSize;
- for(size_t i=0; i<clcxt->impl->maxDimensions && i<4; i++)
- clcxt->impl->maxWorkItemSizes[i] = oclinfo.impl->maxWorkItemSizes[i];
- clcxt->impl->maxComputeUnits = oclinfo.impl->maxComputeUnits;
- clcxt->impl->double_support = oclinfo.impl->double_support;
- //extra options to recognize compiler options
- memcpy(clcxt->impl->extra_options, oclinfo.impl->extra_options, 512);
- cl_bool unfymem = false;
- openCLSafeCall(clGetDeviceInfo(clcxt->impl->devices, CL_DEVICE_HOST_UNIFIED_MEMORY,
- sizeof(cl_bool), (void *)&unfymem, NULL));
- if(unfymem)
- clcxt->impl->unified_memory = 1;
- }
+
Context::Context()
{
- impl = new Impl;
- //Information of the OpenCL context
- impl->clContext = NULL;
- impl->clCmdQueue = NULL;
- impl->devices = NULL;
- impl->maxDimensions = 0;
- impl->maxWorkGroupSize = 0;
- for(int i=0; i<4; i++)
- impl->maxWorkItemSizes[i] = 0;
- impl->maxComputeUnits = 0;
- impl->double_support = 0;
- //extra options to recognize vendor specific fp64 extensions
- memset(impl->extra_options, 0, 512);
- impl->unified_memory = 0;
+ impl = 0;
programCache = ProgramCache::getProgramCache();
}
Context::~Context()
{
- delete impl;
+ release();
+ }
+
+ void Context::release()
+ {
+ if (impl)
+ impl->release();
programCache->releaseProgram();
}
+
+ bool Context::supportsFeature(int ftype)
+ {
+ switch(ftype)
+ {
+ case CL_DOUBLE:
+ return impl->double_support == 1;
+ case CL_UNIFIED_MEM:
+ return impl->unified_memory == 1;
+ default:
+ return false;
+ }
+ }
+
+ size_t Context::computeUnits()
+ {
+ return impl->maxComputeUnits;
+ }
+
+ void* Context::oclContext()
+ {
+ return impl->oclcontext;
+ }
+
+ void* Context::oclCommandQueue()
+ {
+ return impl->clCmdQueue;
+ }
+
Info::Info()
{
impl = new Impl;
- impl->oclplatform = 0;
- impl->oclcontext = 0;
- impl->clCmdQueue = 0;
- impl->devnum = 0;
- impl->maxDimensions = 0;
- impl->maxWorkGroupSize = 0;
- impl->maxWorkItemSizes = 0;
- impl->maxComputeUnits = 0;
- impl->double_support = 0;
- //extra_options = 0;
}
+
void Info::release()
{
fft_teardown();
- if(impl->oclplatform)
- {
- impl->oclplatform = 0;
- }
- if(impl->clCmdQueue)
- {
- openCLSafeCall(clReleaseCommandQueue(impl->clCmdQueue));
- }
- ProgramCache::getProgramCache()->releaseProgram();
- if(impl->oclcontext)
- {
- openCLSafeCall(clReleaseContext(impl->oclcontext));
- }
- if(impl->maxWorkItemSizes)
- {
- delete[] impl->maxWorkItemSizes;
- impl->maxWorkItemSizes = 0;
- }
- //if(extra_options)
- //{
- // delete[] extra_options;
- // extra_options = 0;
- //}
- impl->devices.clear();
- impl->devName.clear();
+ impl->release();
+ impl = new Impl;
DeviceName.clear();
}
+
Info::~Info()
{
- release();
- delete impl;
+ fft_teardown();
+ impl->release();
}
+
Info &Info::operator = (const Info &m)
{
- impl->oclplatform = m.impl->oclplatform;
- impl->oclcontext = m.impl->oclcontext;
- impl->clCmdQueue = m.impl->clCmdQueue;
- impl->devnum = m.impl->devnum;
- impl->maxDimensions = m.impl->maxDimensions;
- impl->maxWorkGroupSize = m.impl->maxWorkGroupSize;
- impl->maxWorkItemSizes = m.impl->maxWorkItemSizes;
- impl->maxComputeUnits = m.impl->maxComputeUnits;
- impl->double_support = m.impl->double_support;
- memcpy(impl->extra_options, m.impl->extra_options, 512);
- for(size_t i = 0; i < m.impl->devices.size(); i++)
- {
- impl->devices.push_back(m.impl->devices[i]);
- impl->devName.push_back(m.impl->devName[i]);
- DeviceName.push_back(m.DeviceName[i]);
- }
+ impl->release();
+ impl = m.impl->copy();
+ DeviceName = m.DeviceName;
return *this;
}
+
Info::Info(const Info &m)
{
- impl = new Impl;
- *this = m;
+ impl = m.impl->copy();
+ DeviceName = m.DeviceName;
}
}//namespace ocl
}//namespace cv
+
+#if defined BUILD_SHARED_LIBS && defined CVAPI_EXPORTS && defined WIN32 && !defined WINCE
+#include <windows.h>
+BOOL WINAPI DllMain( HINSTANCE, DWORD fdwReason, LPVOID );
+
+BOOL WINAPI DllMain( HINSTANCE, DWORD fdwReason, LPVOID )
+{
+ if( fdwReason == DLL_PROCESS_DETACH )
+ {
+ // application hangs if call clReleaseCommandQueue here, so release context only
+ // without context release application hangs as well
+ cl_context ctx = (cl_context)getoclContext();
+ if(ctx)
+ openCLSafeCall(clReleaseContext(ctx));
+ }
+ return TRUE;
+}
+#endif
int pitch = wholeSize.width * 3 * m.elemSize1();
int tail_padding = m.elemSize1() * 3072;
int err;
- cl_mem temp = clCreateBuffer(clCxt->impl->clContext, CL_MEM_READ_WRITE,
+ cl_mem temp = clCreateBuffer((cl_context)clCxt->oclContext(), CL_MEM_READ_WRITE,
(pitch * wholeSize.height + tail_padding - 1) / tail_padding * tail_padding, 0, &err);
openCLVerifyCall(err);
int pitch = wholecols * 3 * m.elemSize1();
int tail_padding = m.elemSize1() * 3072;
int err;
- cl_mem temp = clCreateBuffer(clCxt->impl->clContext, CL_MEM_READ_WRITE,
+ cl_mem temp = clCreateBuffer((cl_context)clCxt->oclContext(), CL_MEM_READ_WRITE,
(pitch * wholerows + tail_padding - 1) / tail_padding * tail_padding, 0, &err);
openCLVerifyCall(err);
#ifdef CL_VERSION_1_2
if(dst.offset == 0 && dst.cols == dst.wholecols)
{
- clEnqueueFillBuffer(dst.clCxt->impl->clCmdQueue, (cl_mem)dst.data, args[0].second, args[0].first, 0, dst.step * dst.rows, 0, NULL, NULL);
+ clEnqueueFillBuffer((cl_command_queue)dst.clCxt->oclCommandQueue(), (cl_mem)dst.data, args[0].second, args[0].first, 0, dst.step * dst.rows, 0, NULL, NULL);
}
else
{
for(size_t i = 0; i < args.size(); i ++)
openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second));
- openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL, globalThreads,
+ openCLSafeCall(clEnqueueNDRangeKernel((cl_command_queue)clCxt->oclCommandQueue(), kernel, 3, NULL, globalThreads,
localThreads, 0, NULL, NULL));
switch(finish_mode)
{
case CLFINISH:
- clFinish(clCxt->impl->clCmdQueue);
+ clFinish((cl_command_queue)clCxt->oclCommandQueue());
case CLFLUSH:
- clFlush(clCxt->impl->clCmdQueue);
+ clFlush((cl_command_queue)clCxt->oclCommandQueue());
break;
case DISABLE:
default:
openCLExecuteKernel_2(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth,
build_options, finish_mode);
}
-
+
cl_mem bindTexture(const oclMat &mat)
{
cl_mem texture;
desc.buffer = NULL;
desc.num_mip_levels = 0;
desc.num_samples = 0;
- texture = clCreateImage(mat.clCxt->impl->clContext, CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
+ texture = clCreateImage((cl_context)mat.clCxt->oclContext(), CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
#else
texture = clCreateImage2D(
mat.clCxt->impl->clContext,
cl_mem devData;
if (mat.cols * mat.elemSize() != mat.step)
{
- devData = clCreateBuffer(mat.clCxt->impl->clContext, CL_MEM_READ_ONLY, mat.cols * mat.rows
+ devData = clCreateBuffer((cl_context)mat.clCxt->oclContext(), CL_MEM_READ_ONLY, mat.cols * mat.rows
* mat.elemSize(), NULL, NULL);
const size_t regin[3] = {mat.cols * mat.elemSize(), mat.rows, 1};
- clEnqueueCopyBufferRect(mat.clCxt->impl->clCmdQueue, (cl_mem)mat.data, devData, origin, origin,
+ clEnqueueCopyBufferRect((cl_command_queue)mat.clCxt->oclCommandQueue(), (cl_mem)mat.data, devData, origin, origin,
regin, mat.step, 0, mat.cols * mat.elemSize(), 0, 0, NULL, NULL);
}
else
devData = (cl_mem)mat.data;
}
- clEnqueueCopyBufferToImage(mat.clCxt->impl->clCmdQueue, devData, texture, 0, origin, region, 0, NULL, 0);
+ clEnqueueCopyBufferToImage((cl_command_queue)mat.clCxt->oclCommandQueue(), devData, texture, 0, origin, region, 0, NULL, 0);
if ((mat.cols * mat.elemSize() != mat.step))
{
- clFinish(mat.clCxt->impl->clCmdQueue);
+ clFinish((cl_command_queue)mat.clCxt->oclCommandQueue());
clReleaseMemObject(devData);
}
}
bool support_image2d(Context *clCxt)
- {return false;
+ {
static const char * _kernel_string = "__kernel void test_func(image2d_t img) {}";
static bool _isTested = false;
static bool _support = false;
try
{
cv::ocl::openCLGetKernelFromSource(clCxt, &_kernel_string, "test_func");
- _support = true;
+ //_support = true;
}
catch (const cv::Exception& e)
{
bool is_float = CV_SEQ_ELTYPE(contour) == CV_32FC2;
- if (!cv::ocl::Context::getContext()->impl->double_support && is_float)
+ if (!cv::ocl::Context::getContext()->supportsFeature(Context::CL_DOUBLE) && is_float)
{
CV_Error(CV_StsUnsupportedFormat, "Moments - double is not supported by your GPU!");
}
cv::Mat dst(dst_a);
a00 = a10 = a01 = a20 = a11 = a02 = a30 = a21 = a12 = a03 = 0.0;
- if (!cv::ocl::Context::getContext()->impl->double_support)
+ if (!cv::ocl::Context::getContext()->supportsFeature(Context::CL_DOUBLE))
{
for (int i = 0; i < contour->total; ++i)
{
#include "opencv2/ocl/private/util.hpp"
#include "safe_call.hpp"
-using namespace std;
-
-namespace cv
-{
- namespace ocl
- {
- struct Context::Impl
- {
- //Information of the OpenCL context
- cl_context clContext;
- cl_command_queue clCmdQueue;
- cl_device_id devices;
- string devName;
- cl_uint maxDimensions;
- size_t maxWorkGroupSize;
- size_t maxWorkItemSizes[4];
- cl_uint maxComputeUnits;
- int double_support;
- //extra options to recognize vendor specific fp64 extensions
- char extra_options[512];
- string Binpath;
- int unified_memory; //1 means integrated GPU, otherwise this value is 0
- };
- }
-}
-
-
#else /* defined(HAVE_OPENCL) */
static inline void throw_nogpu()
#endif /* defined(HAVE_OPENCL) */
+using namespace std;
+
#endif /* __OPENCV_PRECOMP_H__ */
#ifdef CL_VERSION_1_2
if(dst.offset == 0 && dst.cols == dst.wholecols)
{
- clEnqueueFillBuffer(dst.clCxt->impl->clCmdQueue, (cl_mem)dst.data, args[0].second, args[0].first, 0, dst.step * dst.rows, 0, NULL, NULL);
+ clEnqueueFillBuffer((cl_command_queue)dst.clCxt->oclCommandQueue(), (cl_mem)dst.data, args[0].second, args[0].first, 0, dst.step * dst.rows, 0, NULL, NULL);
}
else
{
static void arithmetic_run(const oclMat &src1, oclMat &dst, string kernelName, const char **kernelString, void *_scalar)
{
- if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
+ if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
return;
level, /*block, */patch, winSize, iters);
}
- clFinish(prevImg.clCxt->impl->clCmdQueue);
+ clFinish((cl_command_queue)prevImg.clCxt->oclCommandQueue());
if(errMat)
delete err;
copyTo(uPyr_[idx], u);
copyTo(vPyr_[idx], v);
- clFinish(prevImg.clCxt->impl->clCmdQueue);
+ clFinish((cl_command_queue)prevImg.clCxt->oclCommandQueue());
}
static void merge_vector_run(const oclMat *mat_src, size_t n, oclMat &mat_dst)
{
- if(mat_dst.clCxt -> impl -> double_support == 0 && mat_dst.type() == CV_64F)
+ if(!mat_dst.clCxt->supportsFeature(Context::CL_DOUBLE) && mat_dst.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
return;
static void split_vector_run(const oclMat &mat_src, oclMat *mat_dst)
{
- if(mat_src.clCxt -> impl -> double_support == 0 && mat_src.type() == CV_64F)
+ if(!mat_src.clCxt->supportsFeature(Context::CL_DOUBLE) && mat_src.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
return;
openCLSafeCall(clSetKernelArg(kernel, 3, sizeof(cl_int), (void *)&input.cols));
openCLSafeCall(clSetKernelArg(kernel, 4, sizeof(cl_int), (void *)&prefilterCap));
- openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL,
+ openCLSafeCall(clEnqueueNDRangeKernel((cl_command_queue)clCxt->oclCommandQueue(), kernel, 3, NULL,
globalThreads, localThreads, 0, NULL, NULL));
- clFinish(clCxt->impl->clCmdQueue);
+ clFinish((cl_command_queue)clCxt->oclCommandQueue());
openCLSafeCall(clReleaseKernel(kernel));
}
openCLSafeCall(clSetKernelArg(kernel, 10, sizeof(cl_int), (void *)&winsz2));
openCLSafeCall(clSetKernelArg(kernel, 11, local_mem_size, (void *)NULL));
- openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 2, NULL,
+ openCLSafeCall(clEnqueueNDRangeKernel((cl_command_queue)clCxt->oclCommandQueue(), kernel, 2, NULL,
globalThreads, localThreads, 0, NULL, NULL));
- clFinish(clCxt->impl->clCmdQueue);
+ clFinish((cl_command_queue)clCxt->oclCommandQueue());
openCLSafeCall(clReleaseKernel(kernel));
}
////////////////////////////////////////////////////////////////////////////
openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_int), (void *)&winSize));
openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_float), (void *)&avergeTexThreshold));
openCLSafeCall(clSetKernelArg(kernel, 9, local_mem_size, NULL));
- openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 2, NULL,
+ openCLSafeCall(clEnqueueNDRangeKernel((cl_command_queue)clCxt->oclCommandQueue(), kernel, 2, NULL,
globalThreads, localThreads, 0, NULL, NULL));
- clFinish(clCxt->impl->clCmdQueue);
+ clFinish((cl_command_queue)clCxt->oclCommandQueue());
openCLSafeCall(clReleaseKernel(kernel));
}
//////////////////////////////////////////////////////////////////////////////