From 7b8ad4cb041f9908ce8de24f4ba96e5019e7e637 Mon Sep 17 00:00:00 2001 From: Andrey Kamaev Date: Mon, 18 Mar 2013 01:59:24 +0400 Subject: [PATCH] Refactor OpenCL initialization and allow to use ocl module witout explicit setup --- modules/nonfree/test/test_main.cpp | 6 +- modules/ocl/include/opencv2/ocl/ocl.hpp | 18 +- modules/ocl/src/arithm.cpp | 82 +++--- modules/ocl/src/canny.cpp | 8 +- modules/ocl/src/fft.cpp | 12 +- modules/ocl/src/filtering.cpp | 2 +- modules/ocl/src/gemm.cpp | 11 +- modules/ocl/src/haar.cpp | 24 +- modules/ocl/src/imgproc.cpp | 38 +-- modules/ocl/src/initialization.cpp | 507 +++++++++++++++++--------------- modules/ocl/src/matrix_operations.cpp | 6 +- modules/ocl/src/mcwutil.cpp | 22 +- modules/ocl/src/moments.cpp | 4 +- modules/ocl/src/precomp.hpp | 29 +- modules/ocl/src/pyrlk.cpp | 8 +- modules/ocl/src/split_merge.cpp | 4 +- modules/ocl/src/stereobm.cpp | 12 +- 17 files changed, 414 insertions(+), 379 deletions(-) diff --git a/modules/nonfree/test/test_main.cpp b/modules/nonfree/test/test_main.cpp index f43d833..4f6cfd3 100644 --- a/modules/nonfree/test/test_main.cpp +++ b/modules/nonfree/test/test_main.cpp @@ -7,7 +7,7 @@ using namespace cv::gpu; using namespace cvtest; using namespace testing; -int main(int argc, char** argv) +int main(int argc, char **argv) { try { @@ -50,8 +50,8 @@ int main(int argc, char** argv) TS::ptr()->init("cv"); InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); - } + return RUN_ALL_TESTS(); +} catch (const std::exception& e) { std::cerr << e.what() << std::endl; diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp index 400e2d3..c321633 100644 --- a/modules/ocl/include/opencv2/ocl/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl/ocl.hpp @@ -140,15 +140,23 @@ namespace cv protected: Context(); friend class auto_ptr; - static auto_ptr clCxt; + private: + static auto_ptr clCxt; + static int val; public: ~Context(); - static int val; - static Context *getContext(); + void release(); + Info::Impl* impl; + + static Context* getContext(); static void setContext(Info &oclinfo); - struct Impl; - Impl *impl; + + enum {CL_DOUBLE, CL_UNIFIED_MEM}; + bool supportsFeature(int ftype); + size_t computeUnits(); + void* oclContext(); + void* oclCommandQueue(); }; //! Calls a kernel, by string. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing. diff --git a/modules/ocl/src/arithm.cpp b/modules/ocl/src/arithm.cpp index 4e2c819..410e460 100644 --- a/modules/ocl/src/arithm.cpp +++ b/modules/ocl/src/arithm.cpp @@ -132,7 +132,7 @@ inline int divUp(int total, int grain) template void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName, const char **kernelString, void *_scalar) { - if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F) + if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); return; @@ -195,7 +195,7 @@ static void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst, } static void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask, string kernelName, const char **kernelString) { - if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F) + if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); return; @@ -272,7 +272,7 @@ typedef void (*MulDivFunc)(const oclMat &src1, const oclMat &src2, oclMat &dst, void cv::ocl::multiply(const oclMat &src1, const oclMat &src2, oclMat &dst, double scalar) { - if((src1.clCxt -> impl -> double_support != 0) && (src1.depth() == CV_64F)) + if(src1.clCxt->supportsFeature(Context::CL_DOUBLE) && (src1.depth() == CV_64F)) arithmetic_run(src1, src2, dst, "arithm_mul", &arithm_mul, (void *)(&scalar)); else arithmetic_run(src1, src2, dst, "arithm_mul", &arithm_mul, (void *)(&scalar)); @@ -280,7 +280,7 @@ void cv::ocl::multiply(const oclMat &src1, const oclMat &src2, oclMat &dst, doub void cv::ocl::divide(const oclMat &src1, const oclMat &src2, oclMat &dst, double scalar) { - if(src1.clCxt -> impl -> double_support != 0) + if(src1.clCxt->supportsFeature(Context::CL_DOUBLE)) arithmetic_run(src1, src2, dst, "arithm_div", &arithm_div, (void *)(&scalar)); else arithmetic_run(src1, src2, dst, "arithm_div", &arithm_div, (void *)(&scalar)); @@ -289,7 +289,7 @@ void cv::ocl::divide(const oclMat &src1, const oclMat &src2, oclMat &dst, double template void arithmetic_scalar_run(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask, string kernelName, const char **kernelString, int isMatSubScalar) { - if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F) + if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); return; @@ -361,7 +361,7 @@ void arithmetic_scalar_run(const oclMat &src1, const Scalar &src2, oclMat &dst, static void arithmetic_scalar_run(const oclMat &src, oclMat &dst, string kernelName, const char **kernelString, double scalar) { - if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F) + if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); return; @@ -405,7 +405,7 @@ static void arithmetic_scalar_run(const oclMat &src, oclMat &dst, string kernelN args.push_back( make_pair( sizeof(cl_int), (void *)&cols )); args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 )); - if(src.clCxt -> impl -> double_support != 0) + if(src.clCxt->supportsFeature(Context::CL_DOUBLE)) args.push_back( make_pair( sizeof(cl_double), (void *)&scalar )); else { @@ -464,7 +464,7 @@ void cv::ocl::subtract(const Scalar &src2, const oclMat &src1, oclMat &dst, cons } void cv::ocl::divide(double scalar, const oclMat &src, oclMat &dst) { - if(src.clCxt -> impl -> double_support == 0) + if(!src.clCxt->supportsFeature(Context::CL_DOUBLE)) { CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); return; @@ -524,7 +524,7 @@ static void compare_run(const oclMat &src1, const oclMat &src2, oclMat &dst, str void cv::ocl::compare(const oclMat &src1, const oclMat &src2, oclMat &dst , int cmpOp) { - if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F) + if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F) { cout << "Selected device do not support double" << endl; return; @@ -599,7 +599,7 @@ static void arithmetic_sum_buffer_run(const oclMat &src, cl_mem &dst, int vlen , template Scalar arithmetic_sum(const oclMat &src, int type = 0) { - size_t groupnum = src.clCxt->impl->maxComputeUnits; + size_t groupnum = src.clCxt->computeUnits(); CV_Assert(groupnum != 0); int vlen = src.oclchannels() == 3 ? 12 : 8, dbsize = groupnum * vlen; Context *clCxt = src.clCxt; @@ -627,7 +627,7 @@ Scalar arithmetic_sum(const oclMat &src, int type = 0) typedef Scalar (*sumFunc)(const oclMat &src, int type); Scalar cv::ocl::sum(const oclMat &src) { - if(src.clCxt->impl->double_support == 0 && src.depth() == CV_64F) + if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "select device don't support double"); } @@ -638,13 +638,13 @@ Scalar cv::ocl::sum(const oclMat &src) }; sumFunc func; - func = functab[src.clCxt->impl->double_support]; + func = functab[(int)src.clCxt->supportsFeature(Context::CL_DOUBLE)]; return func(src, 0); } Scalar cv::ocl::absSum(const oclMat &src) { - if(src.clCxt->impl->double_support == 0 && src.depth() == CV_64F) + if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "select device don't support double"); } @@ -655,13 +655,13 @@ Scalar cv::ocl::absSum(const oclMat &src) }; sumFunc func; - func = functab[src.clCxt->impl->double_support]; + func = functab[(int)src.clCxt->supportsFeature(Context::CL_DOUBLE)]; return func(src, 1); } Scalar cv::ocl::sqrSum(const oclMat &src) { - if(src.clCxt->impl->double_support == 0 && src.depth() == CV_64F) + if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "select device don't support double"); } @@ -672,7 +672,7 @@ Scalar cv::ocl::sqrSum(const oclMat &src) }; sumFunc func; - func = functab[src.clCxt->impl->double_support]; + func = functab[(int)src.clCxt->supportsFeature(Context::CL_DOUBLE)]; return func(src, 2); } ////////////////////////////////////////////////////////////////////////////// @@ -771,7 +771,7 @@ static void arithmetic_minMax_mask_run(const oclMat &src, const oclMat &mask, cl template void arithmetic_minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask) { - size_t groupnum = src.clCxt->impl->maxComputeUnits; + size_t groupnum = src.clCxt->computeUnits(); CV_Assert(groupnum != 0); groupnum = groupnum * 2; int vlen = 8; @@ -810,7 +810,7 @@ typedef void (*minMaxFunc)(const oclMat &src, double *minVal, double *maxVal, co void cv::ocl::minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask) { CV_Assert(src.oclchannels() == 1); - if(src.clCxt->impl->double_support == 0 && src.depth() == CV_64F) + if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "select device don't support double"); } @@ -894,7 +894,7 @@ double cv::ocl::norm(const oclMat &src1, const oclMat &src2, int normType) ////////////////////////////////////////////////////////////////////////////// static void arithmetic_flip_rows_run(const oclMat &src, oclMat &dst, string kernelName) { - if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F) + if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); return; @@ -943,7 +943,7 @@ static void arithmetic_flip_rows_run(const oclMat &src, oclMat &dst, string kern } static void arithmetic_flip_cols_run(const oclMat &src, oclMat &dst, string kernelName, bool isVertical) { - if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F) + if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); return; @@ -1123,7 +1123,7 @@ static void arithmetic_exp_log_run(const oclMat &src, oclMat &dst, string kernel CV_Assert( src.type() == CV_32F || src.type() == CV_64F); Context *clCxt = src.clCxt; - if(clCxt -> impl -> double_support == 0 && src.type() == CV_64F) + if(!clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); return; @@ -1164,7 +1164,7 @@ void cv::ocl::log(const oclMat &src, oclMat &dst) ////////////////////////////////////////////////////////////////////////////// static void arithmetic_magnitude_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName) { - if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F) + if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); return; @@ -1212,7 +1212,7 @@ void cv::ocl::magnitude(const oclMat &src1, const oclMat &src2, oclMat &dst) static void arithmetic_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName, const char **kernelString) { - if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F) + if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); return; @@ -1276,7 +1276,7 @@ void cv::ocl::phase(const oclMat &x, const oclMat &y, oclMat &Angle , bool angle static void arithmetic_cartToPolar_run(const oclMat &src1, const oclMat &src2, oclMat &dst_mag, oclMat &dst_cart, string kernelName, bool angleInDegrees) { - if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F) + if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); return; @@ -1331,7 +1331,7 @@ void cv::ocl::cartToPolar(const oclMat &x, const oclMat &y, oclMat &mag, oclMat static void arithmetic_ptc_run(const oclMat &src1, const oclMat &src2, oclMat &dst1, oclMat &dst2, bool angleInDegrees, string kernelName) { - if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F) + if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); return; @@ -1452,7 +1452,7 @@ void arithmetic_minMaxLoc(const oclMat &src, double *minVal, double *maxVal, Point *minLoc, Point *maxLoc, const oclMat &mask) { CV_Assert(src.oclchannels() == 1); - size_t groupnum = src.clCxt->impl->maxComputeUnits; + size_t groupnum = src.clCxt->computeUnits(); CV_Assert(groupnum != 0); int minloc = -1 , maxloc = -1; int vlen = 4, dbsize = groupnum * vlen * 4 * sizeof(T) ; @@ -1513,7 +1513,7 @@ typedef void (*minMaxLocFunc)(const oclMat &src, double *minVal, double *maxVal, void cv::ocl::minMaxLoc(const oclMat &src, double *minVal, double *maxVal, Point *minLoc, Point *maxLoc, const oclMat &mask) { - if(src.clCxt->impl->double_support == 0 && src.depth() == CV_64F) + if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "select device don't support double"); } @@ -1524,7 +1524,7 @@ void cv::ocl::minMaxLoc(const oclMat &src, double *minVal, double *maxVal, }; minMaxLocFunc func; - func = functab[src.clCxt->impl->double_support]; + func = functab[(int)src.clCxt->supportsFeature(Context::CL_DOUBLE)]; func(src, minVal, maxVal, minLoc, maxLoc, mask); } @@ -1559,8 +1559,8 @@ static void arithmetic_countNonZero_run(const oclMat &src, cl_mem &dst, int vlen int cv::ocl::countNonZero(const oclMat &src) { - size_t groupnum = src.clCxt->impl->maxComputeUnits; - if(src.clCxt->impl->double_support == 0 && src.depth() == CV_64F) + size_t groupnum = src.clCxt->computeUnits(); + if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "select device don't support double"); } @@ -1845,7 +1845,7 @@ static void bitwise_scalar(const oclMat &src1, const Scalar &src2, oclMat &dst, void cv::ocl::bitwise_not(const oclMat &src, oclMat &dst) { - if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F) + if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F) { cout << "Selected device do not support double" << endl; return; @@ -1858,7 +1858,7 @@ void cv::ocl::bitwise_not(const oclMat &src, oclMat &dst) void cv::ocl::bitwise_or(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask) { // dst.create(src1.size(),src1.type()); - if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F) + if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F) { cout << "Selected device do not support double" << endl; return; @@ -1874,7 +1874,7 @@ void cv::ocl::bitwise_or(const oclMat &src1, const oclMat &src2, oclMat &dst, co void cv::ocl::bitwise_or(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask) { - if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F) + if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F) { cout << "Selected device do not support double" << endl; return; @@ -1889,7 +1889,7 @@ void cv::ocl::bitwise_or(const oclMat &src1, const Scalar &src2, oclMat &dst, co void cv::ocl::bitwise_and(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask) { // dst.create(src1.size(),src1.type()); - if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F) + if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F) { cout << "Selected device do not support double" << endl; return; @@ -1906,7 +1906,7 @@ void cv::ocl::bitwise_and(const oclMat &src1, const oclMat &src2, oclMat &dst, c void cv::ocl::bitwise_and(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask) { - if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F) + if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F) { cout << "Selected device do not support double" << endl; return; @@ -1920,7 +1920,7 @@ void cv::ocl::bitwise_and(const oclMat &src1, const Scalar &src2, oclMat &dst, c void cv::ocl::bitwise_xor(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask) { - if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F) + if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F) { cout << "Selected device do not support double" << endl; return; @@ -1939,7 +1939,7 @@ void cv::ocl::bitwise_xor(const oclMat &src1, const oclMat &src2, oclMat &dst, c void cv::ocl::bitwise_xor(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask) { - if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F) + if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F) { cout << "Selected device do not support double" << endl; return; @@ -2036,7 +2036,7 @@ oclMatExpr::operator oclMat() const #define BLOCK_ROWS (256/TILE_DIM) static void transpose_run(const oclMat &src, oclMat &dst, string kernelName) { - if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F) + if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); return; @@ -2135,7 +2135,7 @@ void cv::ocl::addWeighted(const oclMat &src1, double alpha, const oclMat &src2, args.push_back( make_pair( sizeof(cl_int), (void *)&src2_step )); args.push_back( make_pair( sizeof(cl_int), (void *)&src2.offset)); - if(src1.clCxt -> impl -> double_support != 0) + if(src1.clCxt->supportsFeature(Context::CL_DOUBLE)) { args.push_back( make_pair( sizeof(cl_double), (void *)&alpha )); args.push_back( make_pair( sizeof(cl_double), (void *)&beta )); @@ -2282,7 +2282,7 @@ static void arithmetic_pow_run(const oclMat &src1, double p, oclMat &dst, string args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows )); args.push_back( make_pair( sizeof(cl_int), (void *)&cols )); args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 )); - if(src1.clCxt -> impl -> double_support == 0) + if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE)) { float pf = p; args.push_back( make_pair( sizeof(cl_float), (void *)&pf )); @@ -2294,7 +2294,7 @@ static void arithmetic_pow_run(const oclMat &src1, double p, oclMat &dst, string } void cv::ocl::pow(const oclMat &x, double p, oclMat &y) { - if(x.clCxt -> impl -> double_support == 0 && x.type() == CV_64F) + if(!x.clCxt->supportsFeature(Context::CL_DOUBLE) && x.type() == CV_64F) { cout << "Selected device do not support double" << endl; return; diff --git a/modules/ocl/src/canny.cpp b/modules/ocl/src/canny.cpp index 23720a2..ae92bc7 100644 --- a/modules/ocl/src/canny.cpp +++ b/modules/ocl/src/canny.cpp @@ -98,7 +98,7 @@ void cv::ocl::CannyBuf::create(const Size &image_size, int apperture_size) { openCLFree(counter); } - counter = clCreateBuffer( Context::getContext()->impl->clContext, CL_MEM_COPY_HOST_PTR, sizeof(int), counter_i, &err ); + counter = clCreateBuffer( (cl_context)getoclContext(), CL_MEM_COPY_HOST_PTR, sizeof(int), counter_i, &err ); openCLSafeCall(err); } @@ -354,7 +354,7 @@ void canny::edgesHysteresisLocal_gpu(oclMat &map, oclMat &st1, void *counter, in void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, void *counter, int rows, int cols) { unsigned int count; - openCLSafeCall(clEnqueueReadBuffer(Context::getContext()->impl->clCmdQueue, (cl_mem)counter, 1, 0, sizeof(float), &count, 0, NULL, NULL)); + openCLSafeCall(clEnqueueReadBuffer((cl_command_queue)getoclCommandQueue(), (cl_mem)counter, 1, 0, sizeof(float), &count, 0, NULL, NULL)); Context *clCxt = map.clCxt; string kernelName = "edgesHysteresisGlobal"; vector< pair > args; @@ -364,7 +364,7 @@ void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, voi int count_i[1] = {0}; while(count > 0) { - openCLSafeCall(clEnqueueWriteBuffer(Context::getContext()->impl->clCmdQueue, (cl_mem)counter, 1, 0, sizeof(int), &count_i, 0, NULL, NULL)); + openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)getoclCommandQueue(), (cl_mem)counter, 1, 0, sizeof(int), &count_i, 0, NULL, NULL)); args.clear(); size_t globalThreads[3] = {std::min(count, 65535u) * 128, DIVUP(count, 65535), 1}; @@ -379,7 +379,7 @@ void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, voi args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset)); openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, DISABLE); - openCLSafeCall(clEnqueueReadBuffer(Context::getContext()->impl->clCmdQueue, (cl_mem)counter, 1, 0, sizeof(int), &count, 0, NULL, NULL)); + openCLSafeCall(clEnqueueReadBuffer((cl_command_queue)getoclCommandQueue(), (cl_mem)counter, 1, 0, sizeof(int), &count, 0, NULL, NULL)); std::swap(st1, st2); } #undef DIVUP diff --git a/modules/ocl/src/fft.cpp b/modules/ocl/src/fft.cpp index aab2a04..36c6358 100644 --- a/modules/ocl/src/fft.cpp +++ b/modules/ocl/src/fft.cpp @@ -206,7 +206,7 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla clStridesIn[2] = is_row_dft ? clStridesIn[1] : dft_size.width * clStridesIn[1]; clStridesOut[2] = is_row_dft ? clStridesOut[1] : dft_size.width * clStridesOut[1]; - openCLSafeCall( clAmdFftCreateDefaultPlan( &plHandle, Context::getContext()->impl->clContext, dim, clLengthsIn ) ); + openCLSafeCall( clAmdFftCreateDefaultPlan( &plHandle, (cl_context)getoclContext(), dim, clLengthsIn ) ); openCLSafeCall( clAmdFftSetResultLocation( plHandle, CLFFT_OUTOFPLACE ) ); openCLSafeCall( clAmdFftSetLayout( plHandle, inLayout, outLayout ) ); @@ -220,7 +220,8 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla openCLSafeCall( clAmdFftSetPlanScale ( plHandle, is_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, scale_ ) ); //ready to bake - openCLSafeCall( clAmdFftBakePlan( plHandle, 1, &(Context::getContext()->impl->clCmdQueue), NULL, NULL ) ); + cl_command_queue clq = (cl_command_queue)getoclCommandQueue(); + openCLSafeCall( clAmdFftBakePlan( plHandle, 1, &clq, NULL, NULL ) ); } cv::ocl::FftPlan::~FftPlan() { @@ -338,16 +339,17 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags) if (buffersize) { cl_int medstatus; - clMedBuffer = clCreateBuffer ( src.clCxt->impl->clContext, CL_MEM_READ_WRITE, buffersize, 0, &medstatus); + clMedBuffer = clCreateBuffer ( (cl_context)src.clCxt->oclContext(), CL_MEM_READ_WRITE, buffersize, 0, &medstatus); openCLSafeCall( medstatus ); } + cl_command_queue clq = (cl_command_queue)src.clCxt->oclCommandQueue(); openCLSafeCall( clAmdFftEnqueueTransform( plHandle, is_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, 1, - &src.clCxt->impl->clCmdQueue, + &clq, 0, NULL, NULL, (cl_mem *)&src.data, (cl_mem *)&dst.data, clMedBuffer ) ); - openCLSafeCall( clFinish(src.clCxt->impl->clCmdQueue) ); + openCLSafeCall( clFinish(clq) ); if(clMedBuffer) { openCLFree(clMedBuffer); diff --git a/modules/ocl/src/filtering.cpp b/modules/ocl/src/filtering.cpp index 6dbb492..2f4a494 100644 --- a/modules/ocl/src/filtering.cpp +++ b/modules/ocl/src/filtering.cpp @@ -1478,7 +1478,7 @@ void cv::ocl::Scharr(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, void cv::ocl::Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize, double scale) { - if (src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F) + if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); return; diff --git a/modules/ocl/src/gemm.cpp b/modules/ocl/src/gemm.cpp index be7e79c..840f628 100644 --- a/modules/ocl/src/gemm.cpp +++ b/modules/ocl/src/gemm.cpp @@ -87,7 +87,7 @@ void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha, int offb = src2.offset; int offc = dst.offset; - + cl_command_queue clq = (cl_command_queue)src1.clCxt->oclCommandQueue(); switch(src1.type()) { case CV_32FC1: @@ -97,11 +97,12 @@ void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha, offa /= sizeof(float); offb /= sizeof(float); offc /= sizeof(float); + openCLSafeCall ( clAmdBlasSgemmEx(order, transA, transB, M, N, K, alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb, - beta, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL) + beta, (cl_mem)dst.data, offc, ldc, 1, &clq, 0, NULL, NULL) ); break; case CV_64FC1: @@ -115,7 +116,7 @@ void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha, ( clAmdBlasDgemmEx(order, transA, transB, M, N, K, alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb, - beta, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL) + beta, (cl_mem)dst.data, offc, ldc, 1, &clq, 0, NULL, NULL) ); break; case CV_32FC2: @@ -132,7 +133,7 @@ void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha, ( clAmdBlasCgemmEx(order, transA, transB, M, N, K, alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb, - beta_2, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL) + beta_2, (cl_mem)dst.data, offc, ldc, 1, &clq, 0, NULL, NULL) ); } break; @@ -150,7 +151,7 @@ void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha, ( clAmdBlasZgemmEx(order, transA, transB, M, N, K, alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb, - beta_2, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL) + beta_2, (cl_mem)dst.data, offc, ldc, 1, &clq, 0, NULL, NULL) ); } break; diff --git a/modules/ocl/src/haar.cpp b/modules/ocl/src/haar.cpp index 506dc6b..4e0f5b8 100644 --- a/modules/ocl/src/haar.cpp +++ b/modules/ocl/src/haar.cpp @@ -971,7 +971,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS size_t blocksize = 8; size_t localThreads[3] = { blocksize, blocksize , 1 }; - size_t globalThreads[3] = { grp_per_CU *((gsum.clCxt)->impl->maxComputeUnits) *localThreads[0], + size_t globalThreads[3] = { grp_per_CU *((gsum.clCxt)->computeUnits()) *localThreads[0], localThreads[1], 1 }; int outputsz = 256 * globalThreads[0] / localThreads[0]; @@ -1047,21 +1047,21 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS stagebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(GpuHidHaarStageClassifier) * gcascade->count); //openCLVerifyCall(status); - openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL)); + openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL)); //classifierbuffer = clCreateBuffer(gsum.clCxt->clContext,CL_MEM_READ_ONLY,sizeof(GpuHidHaarClassifier)*totalclassifier,NULL,&status); //status = clEnqueueWriteBuffer(gsum.clCxt->clCmdQueue,classifierbuffer,1,0,sizeof(GpuHidHaarClassifier)*totalclassifier,classifier,0,NULL,NULL); nodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, nodenum * sizeof(GpuHidHaarTreeNode)); //openCLVerifyCall(status); - openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, nodebuffer, 1, 0, + openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), nodebuffer, 1, 0, nodenum * sizeof(GpuHidHaarTreeNode), node, 0, NULL, NULL)); candidatebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_WRITE_ONLY, 4 * sizeof(int) * outputsz); //openCLVerifyCall(status); scaleinfobuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(detect_piramid_info) * loopcount); //openCLVerifyCall(status); - openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, scaleinfobuffer, 1, 0, sizeof(detect_piramid_info)*loopcount, scaleinfo, 0, NULL, NULL)); + openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), scaleinfobuffer, 1, 0, sizeof(detect_piramid_info)*loopcount, scaleinfo, 0, NULL, NULL)); //flag = 1; //} @@ -1186,7 +1186,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS int grp_per_CU = 12; size_t blocksize = 8; size_t localThreads[3] = { blocksize, blocksize , 1 }; - size_t globalThreads[3] = { grp_per_CU *gsum.clCxt->impl->maxComputeUnits *localThreads[0], + size_t globalThreads[3] = { grp_per_CU *gsum.clCxt->computeUnits() *localThreads[0], localThreads[1], 1 }; int outputsz = 256 * globalThreads[0] / localThreads[0]; @@ -1195,7 +1195,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS nodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, nodenum * sizeof(GpuHidHaarTreeNode)); //openCLVerifyCall(status); - openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, nodebuffer, 1, 0, + openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), nodebuffer, 1, 0, nodenum * sizeof(GpuHidHaarTreeNode), node, 0, NULL, NULL)); cl_mem newnodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_WRITE, @@ -1252,16 +1252,16 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS int splitnode = stage[0].count + stage[1].count + stage[2].count; stagebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(GpuHidHaarStageClassifier) * gcascade->count); //openCLVerifyCall(status); - openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL)); + openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL)); candidatebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, 4 * sizeof(int) * outputsz); //openCLVerifyCall(status); scaleinfobuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(detect_piramid_info) * loopcount); //openCLVerifyCall(status); - openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, scaleinfobuffer, 1, 0, sizeof(detect_piramid_info)*loopcount, scaleinfo, 0, NULL, NULL)); + openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), scaleinfobuffer, 1, 0, sizeof(detect_piramid_info)*loopcount, scaleinfo, 0, NULL, NULL)); pbuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(cl_int4) * loopcount); - openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, pbuffer, 1, 0, sizeof(cl_int4)*loopcount, p, 0, NULL, NULL)); + openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), pbuffer, 1, 0, sizeof(cl_int4)*loopcount, p, 0, NULL, NULL)); correctionbuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(cl_float) * loopcount); - openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, correctionbuffer, 1, 0, sizeof(cl_float)*loopcount, correction, 0, NULL, NULL)); + openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), correctionbuffer, 1, 0, sizeof(cl_float)*loopcount, correction, 0, NULL, NULL)); //int argcount = 0; vector > args; @@ -1286,7 +1286,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1); //openCLSafeCall(clEnqueueReadBuffer(gsum.clCxt->clCmdQueue,candidatebuffer,1,0,4*sizeof(int)*outputsz,candidate,0,NULL,NULL)); - candidate = (int *)clEnqueueMapBuffer(gsum.clCxt->impl->clCmdQueue, candidatebuffer, 1, CL_MAP_READ, 0, 4 * sizeof(int), 0, 0, 0, &status); + candidate = (int *)clEnqueueMapBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), candidatebuffer, 1, CL_MAP_READ, 0, 4 * sizeof(int), 0, 0, 0, &status); for(int i = 0; i < outputsz; i++) { @@ -1297,7 +1297,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS free(scaleinfo); free(p); free(correction); - clEnqueueUnmapMemObject(gsum.clCxt->impl->clCmdQueue, candidatebuffer, candidate, 0, 0, 0); + clEnqueueUnmapMemObject((cl_command_queue)gsum.clCxt->oclCommandQueue(), candidatebuffer, candidate, 0, 0, 0); openCLSafeCall(clReleaseMemObject(stagebuffer)); openCLSafeCall(clReleaseMemObject(scaleinfobuffer)); openCLSafeCall(clReleaseMemObject(nodebuffer)); diff --git a/modules/ocl/src/imgproc.cpp b/modules/ocl/src/imgproc.cpp index 9b6cf74..04f732f 100644 --- a/modules/ocl/src/imgproc.cpp +++ b/modules/ocl/src/imgproc.cpp @@ -290,8 +290,8 @@ namespace cv args.push_back( make_pair(sizeof(cl_int), (void *)&map1.rows)); args.push_back( make_pair(sizeof(cl_int), (void *)&cols)); float borderFloat[4] = {(float)borderValue[0], (float)borderValue[1], (float)borderValue[2], (float)borderValue[3]}; - - if(src.clCxt -> impl -> double_support != 0) + + if(src.clCxt->supportsFeature(Context::CL_DOUBLE)) { args.push_back( make_pair(sizeof(cl_double4), (void *)&borderValue)); } @@ -319,7 +319,7 @@ namespace cv args.push_back( make_pair(sizeof(cl_int), (void *)&map1.cols)); args.push_back( make_pair(sizeof(cl_int), (void *)&map1.rows)); args.push_back( make_pair(sizeof(cl_int), (void *)&cols)); - if(src.clCxt -> impl -> double_support != 0) + if(src.clCxt->supportsFeature(Context::CL_DOUBLE)) { args.push_back( make_pair(sizeof(cl_double4), (void *)&borderValue)); } @@ -383,7 +383,7 @@ namespace cv args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows)); args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols)); args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows)); - if(src.clCxt -> impl -> double_support != 0) + if(src.clCxt->supportsFeature(Context::CL_DOUBLE)) { args.push_back( make_pair(sizeof(cl_double), (void *)&ifx_d)); args.push_back( make_pair(sizeof(cl_double), (void *)&ify_d)); @@ -824,12 +824,12 @@ namespace cv string kernelName = "warpAffine" + s[interpolation]; - if(src.clCxt -> impl -> double_support != 0) + if(src.clCxt->supportsFeature(Context::CL_DOUBLE)) { cl_int st; - coeffs_cm = clCreateBuffer( clCxt->impl->clContext, CL_MEM_READ_WRITE, sizeof(F) * 2 * 3, NULL, &st ); + coeffs_cm = clCreateBuffer( (cl_context)clCxt->oclContext(), CL_MEM_READ_WRITE, sizeof(F) * 2 * 3, NULL, &st ); openCLVerifyCall(st); - openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)coeffs_cm, 1, 0, sizeof(F) * 2 * 3, coeffs, 0, 0, 0)); + openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)clCxt->oclCommandQueue(), (cl_mem)coeffs_cm, 1, 0, sizeof(F) * 2 * 3, coeffs, 0, 0, 0)); } else { @@ -839,8 +839,8 @@ namespace cv { float_coeffs[m][n] = coeffs[m][n]; } - coeffs_cm = clCreateBuffer( clCxt->impl->clContext, CL_MEM_READ_WRITE, sizeof(float) * 2 * 3, NULL, &st ); - openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)coeffs_cm, 1, 0, sizeof(float) * 2 * 3, float_coeffs, 0, 0, 0)); + coeffs_cm = clCreateBuffer( (cl_context)clCxt->oclContext(), CL_MEM_READ_WRITE, sizeof(float) * 2 * 3, NULL, &st ); + openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)clCxt->oclCommandQueue(), (cl_mem)coeffs_cm, 1, 0, sizeof(float) * 2 * 3, float_coeffs, 0, 0, 0)); } //TODO: improve this kernel @@ -894,12 +894,12 @@ namespace cv string s[3] = {"NN", "Linear", "Cubic"}; string kernelName = "warpPerspective" + s[interpolation]; - if(src.clCxt -> impl -> double_support != 0) + if(src.clCxt->supportsFeature(Context::CL_DOUBLE)) { cl_int st; - coeffs_cm = clCreateBuffer( clCxt->impl->clContext, CL_MEM_READ_WRITE, sizeof(double) * 3 * 3, NULL, &st ); + coeffs_cm = clCreateBuffer((cl_context) clCxt->oclContext(), CL_MEM_READ_WRITE, sizeof(double) * 3 * 3, NULL, &st ); openCLVerifyCall(st); - openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)coeffs_cm, 1, 0, sizeof(double) * 3 * 3, coeffs, 0, 0, 0)); + openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)clCxt->oclCommandQueue(), (cl_mem)coeffs_cm, 1, 0, sizeof(double) * 3 * 3, coeffs, 0, 0, 0)); } else { @@ -908,9 +908,9 @@ namespace cv for(int n = 0; n < 3; n++) float_coeffs[m][n] = coeffs[m][n]; - coeffs_cm = clCreateBuffer( clCxt->impl->clContext, CL_MEM_READ_WRITE, sizeof(float) * 3 * 3, NULL, &st ); + coeffs_cm = clCreateBuffer((cl_context) clCxt->oclContext(), CL_MEM_READ_WRITE, sizeof(float) * 3 * 3, NULL, &st ); openCLVerifyCall(st); - openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)coeffs_cm, 1, 0, sizeof(float) * 3 * 3, float_coeffs, 0, 0, 0)); + openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)clCxt->oclCommandQueue(), (cl_mem)coeffs_cm, 1, 0, sizeof(float) * 3 * 3, float_coeffs, 0, 0, 0)); } //TODO: improve this kernel size_t blkSizeX = 16, blkSizeY = 16; @@ -1018,7 +1018,7 @@ namespace cv void integral(const oclMat &src, oclMat &sum, oclMat &sqsum) { CV_Assert(src.type() == CV_8UC1); - if(src.clCxt->impl->double_support == 0 && src.depth() == CV_64F) + if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "select device don't support double"); } @@ -1192,7 +1192,7 @@ namespace cv void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize, double k, int borderType) { - if(src.clCxt->impl->double_support == 0 && src.depth() == CV_64F) + if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "select device don't support double"); } @@ -1206,7 +1206,7 @@ namespace cv void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int borderType) { - if(src.clCxt->impl->double_support == 0 && src.depth() == CV_64F) + if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "select device don't support double"); } @@ -1260,7 +1260,7 @@ namespace cv if( src.depth() != CV_8U || src.oclchannels() != 4 ) CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" ); - // if(src.clCxt->impl->double_support == 0) + // if(!src.clCxt->supportsFeature(Context::CL_DOUBLE)) // { // CV_Error( CV_GpuNotSupported, "Selected device doesn't support double, so a deviation exists.\nIf the accuracy is acceptable, the error can be ignored.\n"); // } @@ -1328,7 +1328,7 @@ namespace cv if( src.depth() != CV_8U || src.oclchannels() != 4 ) CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" ); - // if(src.clCxt->impl->double_support == 0) + // if(!src.clCxt->supportsFeature(Context::CL_DOUBLE)) // { // CV_Error( CV_GpuNotSupported, "Selected device doesn't support double, so a deviation exists.\nIf the accuracy is acceptable, the error can be ignored.\n"); // } diff --git a/modules/ocl/src/initialization.cpp b/modules/ocl/src/initialization.cpp index 7782046..3f4c316 100644 --- a/modules/ocl/src/initialization.cpp +++ b/modules/ocl/src/initialization.cpp @@ -77,7 +77,7 @@ namespace cv ProgramCache *programCache = NULL; DevMemType gDeviceMemType = DEVICE_MEM_DEFAULT; DevMemRW gDeviceMemRW = DEVICE_MEM_R_W; - int gDevMemTypeValueMap[5] = {0, + int gDevMemTypeValueMap[5] = {0, CL_MEM_ALLOC_HOST_PTR, CL_MEM_USE_HOST_PTR, CL_MEM_COPY_HOST_PTR, @@ -124,26 +124,8 @@ namespace cv cacheSize = 0; } - ////////////////////////Common OpenCL specific calls/////////////// - int getDevMemType(DevMemRW& rw_type, DevMemType& mem_type) - { - rw_type = gDeviceMemRW; - mem_type = gDeviceMemType; - return Context::getContext()->impl->unified_memory; - } - int setDevMemType(DevMemRW rw_type, DevMemType mem_type) - { - if( (mem_type == DEVICE_MEM_PM && Context::getContext()->impl->unified_memory == 0) || - mem_type == DEVICE_MEM_UHP || - mem_type == DEVICE_MEM_CHP ) - return -1; - gDeviceMemRW = rw_type; - gDeviceMemType = mem_type; - return 0; - } - - struct Info::Impl + struct Info::Impl { cl_platform_id oclplatform; std::vector devices; @@ -152,18 +134,144 @@ namespace cv cl_context oclcontext; cl_command_queue clCmdQueue; int devnum; - cl_uint maxDimensions; size_t maxWorkGroupSize; - size_t *maxWorkItemSizes; + cl_uint maxDimensions; // == maxWorkItemSizes.size() + std::vector maxWorkItemSizes; cl_uint maxComputeUnits; char extra_options[512]; int double_support; + int unified_memory; //1 means integrated GPU, otherwise this value is 0 + string binpath; + int refcounter; + Impl() { + refcounter = 1; + oclplatform = 0; + oclcontext = 0; + clCmdQueue = 0; + devnum = -1; + maxComputeUnits = 0; + maxWorkGroupSize = 0; memset(extra_options, 0, 512); + double_support = 0; + unified_memory = 0; + } + + void setDevice(void *ctx, void *q, int devnum); + + void release() + { + if(1 == CV_XADD(&refcounter, -1)) + { + releaseResources(); + delete this; + } + } + + Impl* copy() + { + CV_XADD(&refcounter, 1); + return this; } + + private: + Impl(const Impl&); + Impl& operator=(const Impl&); + void releaseResources(); }; + void Info::Impl::releaseResources() + { + devnum = -1; + + if(clCmdQueue) + { + openCLSafeCall(clReleaseCommandQueue(clCmdQueue)); + clCmdQueue = 0; + } + + if(oclcontext) + { + openCLSafeCall(clReleaseContext(oclcontext)); + oclcontext = 0; + } + } + + void Info::Impl::setDevice(void *ctx, void *q, int dnum) + { + if((ctx && q) || devnum != dnum) + releaseResources(); + + CV_Assert(dnum >= 0 && dnum < (int)devices.size()); + devnum = dnum; + if(ctx && q) + { + oclcontext = (cl_context)ctx; + clCmdQueue = (cl_command_queue)q; + clRetainContext(oclcontext); + clRetainCommandQueue(clCmdQueue); + } + else + { + cl_int status = 0; + cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)(oclplatform), 0 }; + oclcontext = clCreateContext(cps, 1, &devices[devnum], 0, 0, &status); + openCLVerifyCall(status); + clCmdQueue = clCreateCommandQueue(oclcontext, devices[devnum], CL_QUEUE_PROFILING_ENABLE, &status); + openCLVerifyCall(status); + } + + openCLSafeCall(clGetDeviceInfo(devices[devnum], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), (void *)&maxWorkGroupSize, 0)); + openCLSafeCall(clGetDeviceInfo(devices[devnum], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint), (void *)&maxDimensions, 0)); + maxWorkItemSizes.resize(maxDimensions); + openCLSafeCall(clGetDeviceInfo(devices[devnum], CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t)*maxDimensions, (void *)&maxWorkItemSizes[0], 0)); + openCLSafeCall(clGetDeviceInfo(devices[devnum], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), (void *)&maxComputeUnits, 0)); + + cl_bool unfymem = false; + openCLSafeCall(clGetDeviceInfo(devices[devnum], CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof(cl_bool), (void *)&unfymem, 0)); + unified_memory = unfymem ? 1 : 0; + + //initialize extra options for compilation. Currently only fp64 is included. + //Assume 4KB is enough to store all possible extensions. + const int EXT_LEN = 4096 + 1 ; + char extends_set[EXT_LEN]; + size_t extends_size; + openCLSafeCall(clGetDeviceInfo(devices[devnum], CL_DEVICE_EXTENSIONS, EXT_LEN, (void *)extends_set, &extends_size)); + extends_set[EXT_LEN - 1] = 0; + size_t fp64_khr = std::string(extends_set).find("cl_khr_fp64"); + + if(fp64_khr != std::string::npos) + { + sprintf(extra_options, "-D DOUBLE_SUPPORT"); + double_support = 1; + } + else + { + memset(extra_options, 0, 512); + double_support = 0; + } + } + + ////////////////////////Common OpenCL specific calls/////////////// + int getDevMemType(DevMemRW& rw_type, DevMemType& mem_type) + { + rw_type = gDeviceMemRW; + mem_type = gDeviceMemType; + return Context::getContext()->impl->unified_memory; + } + + int setDevMemType(DevMemRW rw_type, DevMemType mem_type) + { + if( (mem_type == DEVICE_MEM_PM && Context::getContext()->impl->unified_memory == 0) || + mem_type == DEVICE_MEM_UHP || + mem_type == DEVICE_MEM_CHP ) + return -1; + gDeviceMemRW = rw_type; + gDeviceMemType = mem_type; + return 0; + } + inline int divUp(int total, int grain) { return (total + grain - 1) / grain; @@ -171,6 +279,9 @@ namespace cv int getDevice(std::vector &oclinfo, int devicetype) { + //TODO: cache oclinfo vector + oclinfo.clear(); + switch(devicetype) { case CVCL_DEVICE_TYPE_DEFAULT: @@ -180,125 +291,62 @@ namespace cv case CVCL_DEVICE_TYPE_ALL: break; default: - CV_Error(CV_GpuApiCallError, "Unkown device type"); + return 0; } - int devcienums = 0; + // Platform info - cl_int status = 0; cl_uint numPlatforms; - Info ocltmpinfo; - openCLSafeCall(clGetPlatformIDs(0, NULL, &numPlatforms)); - CV_Assert(numPlatforms > 0); - cl_platform_id *platforms = new cl_platform_id[numPlatforms]; + openCLSafeCall(clGetPlatformIDs(0, 0, &numPlatforms)); + if(numPlatforms < 1) return 0; + + std::vector platforms(numPlatforms); + openCLSafeCall(clGetPlatformIDs(numPlatforms, &platforms[0], 0)); - openCLSafeCall(clGetPlatformIDs(numPlatforms, platforms, NULL)); char deviceName[256]; + int devcienums = 0; for (unsigned i = 0; i < numPlatforms; ++i) { cl_uint numsdev; - status = clGetDeviceIDs(platforms[i], devicetype, 0, NULL, &numsdev); + cl_int status = clGetDeviceIDs(platforms[i], devicetype, 0, NULL, &numsdev); if(status != CL_DEVICE_NOT_FOUND) - { openCLVerifyCall(status); - } + if(numsdev > 0) { devcienums += numsdev; - cl_device_id *devices = new cl_device_id[numsdev]; - openCLSafeCall(clGetDeviceIDs(platforms[i], devicetype, numsdev, devices, NULL)); + std::vector devices(numsdev); + openCLSafeCall(clGetDeviceIDs(platforms[i], devicetype, numsdev, &devices[0], 0)); + + Info ocltmpinfo; ocltmpinfo.impl->oclplatform = platforms[i]; - for(unsigned j = 0; j < numsdev; j++) + for(unsigned j = 0; j < numsdev; ++j) { ocltmpinfo.impl->devices.push_back(devices[j]); - openCLSafeCall(clGetDeviceInfo(devices[j], CL_DEVICE_NAME, 256, deviceName, NULL)); - ocltmpinfo.impl->devName.push_back(std::string(deviceName)); - ocltmpinfo.DeviceName.push_back(std::string(deviceName)); + openCLSafeCall(clGetDeviceInfo(devices[j], CL_DEVICE_NAME, sizeof(deviceName), deviceName, 0)); + ocltmpinfo.impl->devName.push_back(deviceName); + ocltmpinfo.DeviceName.push_back(deviceName); } - delete[] devices; oclinfo.push_back(ocltmpinfo); - ocltmpinfo.release(); } } - delete[] platforms; - if(devcienums > 0) - { - setDevice(oclinfo[0]); - } return devcienums; } - static void fillClcontext(Info &oclinfo) - { - //get device information - size_t devnum = oclinfo.impl->devnum; - - openCLSafeCall(clGetDeviceInfo(oclinfo.impl->devices[devnum], CL_DEVICE_MAX_WORK_GROUP_SIZE, - sizeof(size_t), (void *)&oclinfo.impl->maxWorkGroupSize, NULL)); - openCLSafeCall(clGetDeviceInfo(oclinfo.impl->devices[devnum], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, - sizeof(cl_uint), (void *)&oclinfo.impl->maxDimensions, NULL)); - oclinfo.impl->maxWorkItemSizes = new size_t[oclinfo.impl->maxDimensions]; - openCLSafeCall(clGetDeviceInfo(oclinfo.impl->devices[devnum], CL_DEVICE_MAX_WORK_ITEM_SIZES, - sizeof(size_t)*oclinfo.impl->maxDimensions, (void *)oclinfo.impl->maxWorkItemSizes, NULL)); - openCLSafeCall(clGetDeviceInfo(oclinfo.impl->devices[devnum], CL_DEVICE_MAX_COMPUTE_UNITS, - sizeof(cl_uint), (void *)&oclinfo.impl->maxComputeUnits, NULL)); - //initialize extra options for compilation. Currently only fp64 is included. - //Assume 4KB is enough to store all possible extensions. - - const int EXT_LEN = 4096 + 1 ; - char extends_set[EXT_LEN]; - size_t extends_size; - openCLSafeCall(clGetDeviceInfo(oclinfo.impl->devices[devnum], CL_DEVICE_EXTENSIONS, - EXT_LEN, (void *)extends_set, &extends_size)); - CV_Assert(extends_size < (size_t)EXT_LEN); - extends_set[EXT_LEN - 1] = 0; - memset(oclinfo.impl->extra_options, 0, 512); - oclinfo.impl->double_support = 0; - int fp64_khr = string(extends_set).find("cl_khr_fp64"); - - if(fp64_khr >= 0 && fp64_khr < EXT_LEN) - { - sprintf(oclinfo.impl->extra_options , "-D DOUBLE_SUPPORT"); - oclinfo.impl -> double_support = 1; - } - Context::setContext(oclinfo); - - } - void setDevice(Info &oclinfo, int devnum) { - CV_Assert(devnum >= 0); - cl_int status = 0; - cl_context_properties cps[3] = - { - CL_CONTEXT_PLATFORM, (cl_context_properties)(oclinfo.impl->oclplatform), 0 - }; - oclinfo.impl->devnum = devnum; - oclinfo.impl->oclcontext = clCreateContext(cps, 1, &oclinfo.impl->devices[devnum], NULL, NULL, &status); - openCLVerifyCall(status); - //create the command queue using the first device of the list - oclinfo.impl->clCmdQueue = clCreateCommandQueue(oclinfo.impl->oclcontext, oclinfo.impl->devices[devnum], - CL_QUEUE_PROFILING_ENABLE, &status); - openCLVerifyCall(status); - fillClcontext(oclinfo); + oclinfo.impl->setDevice(0, 0, devnum); + Context::setContext(oclinfo); } void setDeviceEx(Info &oclinfo, void *ctx, void *q, int devnum) { - CV_Assert(devnum >= 0); - oclinfo.impl->devnum = devnum; - if(ctx && q) - { - oclinfo.impl->oclcontext = (cl_context)ctx; - oclinfo.impl->clCmdQueue = (cl_command_queue)q; - clRetainContext((cl_context)ctx); - clRetainCommandQueue((cl_command_queue)q); - fillClcontext(oclinfo); - } + oclinfo.impl->setDevice(ctx, q, devnum); + Context::setContext(oclinfo); } void *getoclContext() { - return &(Context::getContext()->impl->clContext); + return &(Context::getContext()->impl->oclcontext); } void *getoclCommandQueue() @@ -316,7 +364,7 @@ namespace cv cl_mem openCLCreateBuffer(Context *clCxt, size_t flag , size_t size) { cl_int status; - cl_mem buffer = clCreateBuffer(clCxt->impl->clContext, (cl_mem_flags)flag, size, NULL, &status); + cl_mem buffer = clCreateBuffer(clCxt->impl->oclcontext, (cl_mem_flags)flag, size, NULL, &status); openCLVerifyCall(status); return buffer; } @@ -331,7 +379,7 @@ namespace cv size_t widthInBytes, size_t height, DevMemRW rw_type, DevMemType mem_type) { cl_int status; - *dev_ptr = clCreateBuffer(clCxt->impl->clContext, gDevMemRWValueMap[rw_type]|gDevMemTypeValueMap[mem_type], + *dev_ptr = clCreateBuffer(clCxt->impl->oclcontext, gDevMemRWValueMap[rw_type]|gDevMemTypeValueMap[mem_type], widthInBytes * height, 0, &status); openCLVerifyCall(status); *pitch = widthInBytes; @@ -397,7 +445,7 @@ namespace cv void setBinpath(const char *path) { Context *clcxt = Context::getContext(); - clcxt->impl->Binpath = path; + clcxt->impl->binpath = path; } int savetofile(const Context*, cl_program &program, const char *fileName) @@ -441,11 +489,11 @@ namespace cv if(NULL != build_options) { - src_sign << (int64)(*source) << clCxt->impl->clContext << "_" << build_options; + src_sign << (int64)(*source) << clCxt->impl->oclcontext << "_" << build_options; } else { - src_sign << (int64)(*source) << clCxt->impl->clContext; + src_sign << (int64)(*source) << clCxt->impl->oclcontext; } srcsign = src_sign.str(); @@ -465,24 +513,24 @@ namespace cv strcat(all_build_options, build_options); if(all_build_options != NULL) { - filename = clCxt->impl->Binpath + kernelName + "_" + clCxt->impl->devName + all_build_options + ".clb"; + filename = clCxt->impl->binpath + kernelName + "_" + clCxt->impl->devName[clCxt->impl->devnum] + all_build_options + ".clb"; } else { - filename = clCxt->impl->Binpath + kernelName + "_" + clCxt->impl->devName + ".clb"; + filename = clCxt->impl->binpath + kernelName + "_" + clCxt->impl->devName[clCxt->impl->devnum] + ".clb"; } FILE *fp = fopen(filename.c_str(), "rb"); - if(fp == NULL || clCxt->impl->Binpath.size() == 0) //we should generate a binary file for the first time. + if(fp == NULL || clCxt->impl->binpath.size() == 0) //we should generate a binary file for the first time. { if(fp != NULL) fclose(fp); program = clCreateProgramWithSource( - clCxt->impl->clContext, 1, source, NULL, &status); + clCxt->impl->oclcontext, 1, source, NULL, &status); openCLVerifyCall(status); - status = clBuildProgram(program, 1, &(clCxt->impl->devices), all_build_options, NULL, NULL); - if(status == CL_SUCCESS && clCxt->impl->Binpath.size()) + status = clBuildProgram(program, 1, &(clCxt->impl->devices[clCxt->impl->devnum]), all_build_options, NULL, NULL); + if(status == CL_SUCCESS && clCxt->impl->binpath.size()) savetofile(clCxt, program, filename.c_str()); } else @@ -494,15 +542,15 @@ namespace cv CV_Assert(1 == fread(binary, binarySize, 1, fp)); fclose(fp); cl_int status = 0; - program = clCreateProgramWithBinary(clCxt->impl->clContext, + program = clCreateProgramWithBinary(clCxt->impl->oclcontext, 1, - &(clCxt->impl->devices), + &(clCxt->impl->devices[clCxt->impl->devnum]), (const size_t *)&binarySize, (const unsigned char **)&binary, NULL, &status); openCLVerifyCall(status); - status = clBuildProgram(program, 1, &(clCxt->impl->devices), all_build_options, NULL, NULL); + status = clBuildProgram(program, 1, &(clCxt->impl->devices[clCxt->impl->devnum]), all_build_options, NULL, NULL); delete[] binary; } @@ -514,14 +562,14 @@ namespace cv char *buildLog = NULL; size_t buildLogSize = 0; logStatus = clGetProgramBuildInfo(program, - clCxt->impl->devices, CL_PROGRAM_BUILD_LOG, buildLogSize, + clCxt->impl->devices[clCxt->impl->devnum], CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, &buildLogSize); if(logStatus != CL_SUCCESS) cout << "Failed to build the program and get the build info." << endl; buildLog = new char[buildLogSize]; CV_DbgAssert(!!buildLog); memset(buildLog, 0, buildLogSize); - openCLSafeCall(clGetProgramBuildInfo(program, clCxt->impl->devices, + openCLSafeCall(clGetProgramBuildInfo(program, clCxt->impl->devices[clCxt->impl->devnum], CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, NULL)); cout << "\n\t\t\tBUILD LOG\n"; cout << buildLog << endl; @@ -543,7 +591,7 @@ namespace cv void openCLVerifyKernel(const Context *clCxt, cl_kernel kernel, size_t *localThreads) { size_t kernelWorkGroupSize; - openCLSafeCall(clGetKernelWorkGroupInfo(kernel, clCxt->impl->devices, + openCLSafeCall(clGetKernelWorkGroupInfo(kernel, clCxt->impl->devices[clCxt->impl->devnum], CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &kernelWorkGroupSize, 0)); CV_Assert( (localThreads[0] <= clCxt->impl->maxWorkItemSizes[0]) && (localThreads[1] <= clCxt->impl->maxWorkItemSizes[1]) && @@ -663,10 +711,10 @@ namespace cv cout << "average kernel total time: " << total_kernel_time / RUN_TIMES << endl; // "ms" << endl; #endif } - + double openCLExecuteKernelInterop(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3], size_t localThreads[3], - vector< pair > &args, int channels, int depth, const char *build_options, + vector< pair > &args, int channels, int depth, const char *build_options, bool finish, bool measureKernelTime, bool cleanUp) { @@ -763,7 +811,7 @@ namespace cv f.read(str, fileSize); f.close(); str[size] = '\0'; - + s = str; delete[] str; return 0; @@ -774,7 +822,7 @@ namespace cv double openCLExecuteKernelInterop(Context *clCxt , const char **fileName, const int numFiles, string kernelName, size_t globalThreads[3], size_t localThreads[3], - vector< pair > &args, int channels, int depth, const char *build_options, + vector< pair > &args, int channels, int depth, const char *build_options, bool finish, bool measureKernelTime, bool cleanUp) { @@ -794,8 +842,8 @@ namespace cv delete []source; return kernelTime; } - - cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value, + + cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value, const size_t size) { int status; @@ -814,142 +862,143 @@ namespace cv /////////////////////////////OpenCL initialization///////////////// auto_ptr Context::clCxt; int Context::val = 0; - Mutex cs; - Context *Context::getContext() + static Mutex cs; + Context* Context::getContext() { - if(val == 0) + if(*((volatile int*)&val) != 1) { AutoLock al(cs); - if( NULL == clCxt.get()) + if(*((volatile int*)&val) != 1) + { + if( 0 == clCxt.get()) + clCxt.reset(new Context); + + std::vector oclinfo; + CV_Assert(getDevice(oclinfo, CVCL_DEVICE_TYPE_ALL) > 0); + oclinfo[0].impl->setDevice(0, 0, 0); + clCxt.get()->impl = oclinfo[0].impl->copy(); + + *((volatile int*)&val) = 1; + } + } + return clCxt.get(); + } + + void Context::setContext(Info &oclinfo) + { + AutoLock guard(cs); + if(*((volatile int*)&val) != 1) + { + if( 0 == clCxt.get()) clCxt.reset(new Context); - val = 1; - return clCxt.get(); + clCxt.get()->impl = oclinfo.impl->copy(); + + *((volatile int*)&val) = 1; } else { - return clCxt.get(); + clCxt.get()->impl->release(); + clCxt.get()->impl = oclinfo.impl->copy(); } } - void Context::setContext(Info &oclinfo) - { - Context *clcxt = getContext(); - clcxt->impl->clContext = oclinfo.impl->oclcontext; - clcxt->impl->clCmdQueue = oclinfo.impl->clCmdQueue; - clcxt->impl->devices = oclinfo.impl->devices[oclinfo.impl->devnum]; - clcxt->impl->devName = oclinfo.impl->devName[oclinfo.impl->devnum]; - clcxt->impl->maxDimensions = oclinfo.impl->maxDimensions; - clcxt->impl->maxWorkGroupSize = oclinfo.impl->maxWorkGroupSize; - for(size_t i=0; iimpl->maxDimensions && i<4; i++) - clcxt->impl->maxWorkItemSizes[i] = oclinfo.impl->maxWorkItemSizes[i]; - clcxt->impl->maxComputeUnits = oclinfo.impl->maxComputeUnits; - clcxt->impl->double_support = oclinfo.impl->double_support; - //extra options to recognize compiler options - memcpy(clcxt->impl->extra_options, oclinfo.impl->extra_options, 512); - cl_bool unfymem = false; - openCLSafeCall(clGetDeviceInfo(clcxt->impl->devices, CL_DEVICE_HOST_UNIFIED_MEMORY, - sizeof(cl_bool), (void *)&unfymem, NULL)); - if(unfymem) - clcxt->impl->unified_memory = 1; - } + Context::Context() { - impl = new Impl; - //Information of the OpenCL context - impl->clContext = NULL; - impl->clCmdQueue = NULL; - impl->devices = NULL; - impl->maxDimensions = 0; - impl->maxWorkGroupSize = 0; - for(int i=0; i<4; i++) - impl->maxWorkItemSizes[i] = 0; - impl->maxComputeUnits = 0; - impl->double_support = 0; - //extra options to recognize vendor specific fp64 extensions - memset(impl->extra_options, 0, 512); - impl->unified_memory = 0; + impl = 0; programCache = ProgramCache::getProgramCache(); } Context::~Context() { - delete impl; + release(); + } + + void Context::release() + { + if (impl) + impl->release(); programCache->releaseProgram(); } + + bool Context::supportsFeature(int ftype) + { + switch(ftype) + { + case CL_DOUBLE: + return impl->double_support == 1; + case CL_UNIFIED_MEM: + return impl->unified_memory == 1; + default: + return false; + } + } + + size_t Context::computeUnits() + { + return impl->maxComputeUnits; + } + + void* Context::oclContext() + { + return impl->oclcontext; + } + + void* Context::oclCommandQueue() + { + return impl->clCmdQueue; + } + Info::Info() { impl = new Impl; - impl->oclplatform = 0; - impl->oclcontext = 0; - impl->clCmdQueue = 0; - impl->devnum = 0; - impl->maxDimensions = 0; - impl->maxWorkGroupSize = 0; - impl->maxWorkItemSizes = 0; - impl->maxComputeUnits = 0; - impl->double_support = 0; - //extra_options = 0; } + void Info::release() { fft_teardown(); - if(impl->oclplatform) - { - impl->oclplatform = 0; - } - if(impl->clCmdQueue) - { - openCLSafeCall(clReleaseCommandQueue(impl->clCmdQueue)); - } - ProgramCache::getProgramCache()->releaseProgram(); - if(impl->oclcontext) - { - openCLSafeCall(clReleaseContext(impl->oclcontext)); - } - if(impl->maxWorkItemSizes) - { - delete[] impl->maxWorkItemSizes; - impl->maxWorkItemSizes = 0; - } - //if(extra_options) - //{ - // delete[] extra_options; - // extra_options = 0; - //} - impl->devices.clear(); - impl->devName.clear(); + impl->release(); + impl = new Impl; DeviceName.clear(); } + Info::~Info() { - release(); - delete impl; + fft_teardown(); + impl->release(); } + Info &Info::operator = (const Info &m) { - impl->oclplatform = m.impl->oclplatform; - impl->oclcontext = m.impl->oclcontext; - impl->clCmdQueue = m.impl->clCmdQueue; - impl->devnum = m.impl->devnum; - impl->maxDimensions = m.impl->maxDimensions; - impl->maxWorkGroupSize = m.impl->maxWorkGroupSize; - impl->maxWorkItemSizes = m.impl->maxWorkItemSizes; - impl->maxComputeUnits = m.impl->maxComputeUnits; - impl->double_support = m.impl->double_support; - memcpy(impl->extra_options, m.impl->extra_options, 512); - for(size_t i = 0; i < m.impl->devices.size(); i++) - { - impl->devices.push_back(m.impl->devices[i]); - impl->devName.push_back(m.impl->devName[i]); - DeviceName.push_back(m.DeviceName[i]); - } + impl->release(); + impl = m.impl->copy(); + DeviceName = m.DeviceName; return *this; } + Info::Info(const Info &m) { - impl = new Impl; - *this = m; + impl = m.impl->copy(); + DeviceName = m.DeviceName; } }//namespace ocl }//namespace cv + +#if defined BUILD_SHARED_LIBS && defined CVAPI_EXPORTS && defined WIN32 && !defined WINCE +#include +BOOL WINAPI DllMain( HINSTANCE, DWORD fdwReason, LPVOID ); + +BOOL WINAPI DllMain( HINSTANCE, DWORD fdwReason, LPVOID ) +{ + if( fdwReason == DLL_PROCESS_DETACH ) + { + // application hangs if call clReleaseCommandQueue here, so release context only + // without context release application hangs as well + cl_context ctx = (cl_context)getoclContext(); + if(ctx) + openCLSafeCall(clReleaseContext(ctx)); + } + return TRUE; +} +#endif diff --git a/modules/ocl/src/matrix_operations.cpp b/modules/ocl/src/matrix_operations.cpp index f859193..ce96e3a 100644 --- a/modules/ocl/src/matrix_operations.cpp +++ b/modules/ocl/src/matrix_operations.cpp @@ -190,7 +190,7 @@ void cv::ocl::oclMat::upload(const Mat &m) int pitch = wholeSize.width * 3 * m.elemSize1(); int tail_padding = m.elemSize1() * 3072; int err; - cl_mem temp = clCreateBuffer(clCxt->impl->clContext, CL_MEM_READ_WRITE, + cl_mem temp = clCreateBuffer((cl_context)clCxt->oclContext(), CL_MEM_READ_WRITE, (pitch * wholeSize.height + tail_padding - 1) / tail_padding * tail_padding, 0, &err); openCLVerifyCall(err); @@ -242,7 +242,7 @@ void cv::ocl::oclMat::download(cv::Mat &m) const int pitch = wholecols * 3 * m.elemSize1(); int tail_padding = m.elemSize1() * 3072; int err; - cl_mem temp = clCreateBuffer(clCxt->impl->clContext, CL_MEM_READ_WRITE, + cl_mem temp = clCreateBuffer((cl_context)clCxt->oclContext(), CL_MEM_READ_WRITE, (pitch * wholerows + tail_padding - 1) / tail_padding * tail_padding, 0, &err); openCLVerifyCall(err); @@ -595,7 +595,7 @@ static void set_to_withoutmask_run(const oclMat &dst, const Scalar &scalar, stri #ifdef CL_VERSION_1_2 if(dst.offset == 0 && dst.cols == dst.wholecols) { - clEnqueueFillBuffer(dst.clCxt->impl->clCmdQueue, (cl_mem)dst.data, args[0].second, args[0].first, 0, dst.step * dst.rows, 0, NULL, NULL); + clEnqueueFillBuffer((cl_command_queue)dst.clCxt->oclCommandQueue(), (cl_mem)dst.data, args[0].second, args[0].first, 0, dst.step * dst.rows, 0, NULL, NULL); } else { diff --git a/modules/ocl/src/mcwutil.cpp b/modules/ocl/src/mcwutil.cpp index ffa8095..bc64fa2 100644 --- a/modules/ocl/src/mcwutil.cpp +++ b/modules/ocl/src/mcwutil.cpp @@ -94,15 +94,15 @@ namespace cv for(size_t i = 0; i < args.size(); i ++) openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second)); - openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL, globalThreads, + openCLSafeCall(clEnqueueNDRangeKernel((cl_command_queue)clCxt->oclCommandQueue(), kernel, 3, NULL, globalThreads, localThreads, 0, NULL, NULL)); switch(finish_mode) { case CLFINISH: - clFinish(clCxt->impl->clCmdQueue); + clFinish((cl_command_queue)clCxt->oclCommandQueue()); case CLFLUSH: - clFlush(clCxt->impl->clCmdQueue); + clFlush((cl_command_queue)clCxt->oclCommandQueue()); break; case DISABLE: default: @@ -126,7 +126,7 @@ namespace cv openCLExecuteKernel_2(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, build_options, finish_mode); } - + cl_mem bindTexture(const oclMat &mat) { cl_mem texture; @@ -177,7 +177,7 @@ namespace cv desc.buffer = NULL; desc.num_mip_levels = 0; desc.num_samples = 0; - texture = clCreateImage(mat.clCxt->impl->clContext, CL_MEM_READ_WRITE, &format, &desc, NULL, &err); + texture = clCreateImage((cl_context)mat.clCxt->oclContext(), CL_MEM_READ_WRITE, &format, &desc, NULL, &err); #else texture = clCreateImage2D( mat.clCxt->impl->clContext, @@ -195,10 +195,10 @@ namespace cv cl_mem devData; if (mat.cols * mat.elemSize() != mat.step) { - devData = clCreateBuffer(mat.clCxt->impl->clContext, CL_MEM_READ_ONLY, mat.cols * mat.rows + devData = clCreateBuffer((cl_context)mat.clCxt->oclContext(), CL_MEM_READ_ONLY, mat.cols * mat.rows * mat.elemSize(), NULL, NULL); const size_t regin[3] = {mat.cols * mat.elemSize(), mat.rows, 1}; - clEnqueueCopyBufferRect(mat.clCxt->impl->clCmdQueue, (cl_mem)mat.data, devData, origin, origin, + clEnqueueCopyBufferRect((cl_command_queue)mat.clCxt->oclCommandQueue(), (cl_mem)mat.data, devData, origin, origin, regin, mat.step, 0, mat.cols * mat.elemSize(), 0, 0, NULL, NULL); } else @@ -206,10 +206,10 @@ namespace cv devData = (cl_mem)mat.data; } - clEnqueueCopyBufferToImage(mat.clCxt->impl->clCmdQueue, devData, texture, 0, origin, region, 0, NULL, 0); + clEnqueueCopyBufferToImage((cl_command_queue)mat.clCxt->oclCommandQueue(), devData, texture, 0, origin, region, 0, NULL, 0); if ((mat.cols * mat.elemSize() != mat.step)) { - clFinish(mat.clCxt->impl->clCmdQueue); + clFinish((cl_command_queue)mat.clCxt->oclCommandQueue()); clReleaseMemObject(devData); } @@ -223,7 +223,7 @@ namespace cv } bool support_image2d(Context *clCxt) - {return false; + { static const char * _kernel_string = "__kernel void test_func(image2d_t img) {}"; static bool _isTested = false; static bool _support = false; @@ -234,7 +234,7 @@ namespace cv try { cv::ocl::openCLGetKernelFromSource(clCxt, &_kernel_string, "test_func"); - _support = true; + //_support = true; } catch (const cv::Exception& e) { diff --git a/modules/ocl/src/moments.cpp b/modules/ocl/src/moments.cpp index 4abca03..285041d 100644 --- a/modules/ocl/src/moments.cpp +++ b/modules/ocl/src/moments.cpp @@ -106,7 +106,7 @@ static void icvContourMoments( CvSeq* contour, CvMoments* mom ) bool is_float = CV_SEQ_ELTYPE(contour) == CV_32FC2; - if (!cv::ocl::Context::getContext()->impl->double_support && is_float) + if (!cv::ocl::Context::getContext()->supportsFeature(Context::CL_DOUBLE) && is_float) { CV_Error(CV_StsUnsupportedFormat, "Moments - double is not supported by your GPU!"); } @@ -146,7 +146,7 @@ static void icvContourMoments( CvSeq* contour, CvMoments* mom ) cv::Mat dst(dst_a); a00 = a10 = a01 = a20 = a11 = a02 = a30 = a21 = a12 = a03 = 0.0; - if (!cv::ocl::Context::getContext()->impl->double_support) + if (!cv::ocl::Context::getContext()->supportsFeature(Context::CL_DOUBLE)) { for (int i = 0; i < contour->total; ++i) { diff --git a/modules/ocl/src/precomp.hpp b/modules/ocl/src/precomp.hpp index 2c84e5a..b2a3e41 100644 --- a/modules/ocl/src/precomp.hpp +++ b/modules/ocl/src/precomp.hpp @@ -81,33 +81,6 @@ #include "opencv2/ocl/private/util.hpp" #include "safe_call.hpp" -using namespace std; - -namespace cv -{ - namespace ocl - { - struct Context::Impl - { - //Information of the OpenCL context - cl_context clContext; - cl_command_queue clCmdQueue; - cl_device_id devices; - string devName; - cl_uint maxDimensions; - size_t maxWorkGroupSize; - size_t maxWorkItemSizes[4]; - cl_uint maxComputeUnits; - int double_support; - //extra options to recognize vendor specific fp64 extensions - char extra_options[512]; - string Binpath; - int unified_memory; //1 means integrated GPU, otherwise this value is 0 - }; - } -} - - #else /* defined(HAVE_OPENCL) */ static inline void throw_nogpu() @@ -117,4 +90,6 @@ static inline void throw_nogpu() #endif /* defined(HAVE_OPENCL) */ +using namespace std; + #endif /* __OPENCV_PRECOMP_H__ */ diff --git a/modules/ocl/src/pyrlk.cpp b/modules/ocl/src/pyrlk.cpp index 2fac42a..c8d4b52 100644 --- a/modules/ocl/src/pyrlk.cpp +++ b/modules/ocl/src/pyrlk.cpp @@ -357,7 +357,7 @@ static void set_to_withoutmask_run_cus(const oclMat &dst, const Scalar &scalar, #ifdef CL_VERSION_1_2 if(dst.offset == 0 && dst.cols == dst.wholecols) { - clEnqueueFillBuffer(dst.clCxt->impl->clCmdQueue, (cl_mem)dst.data, args[0].second, args[0].first, 0, dst.step * dst.rows, 0, NULL, NULL); + clEnqueueFillBuffer((cl_command_queue)dst.clCxt->oclCommandQueue(), (cl_mem)dst.data, args[0].second, args[0].first, 0, dst.step * dst.rows, 0, NULL, NULL); } else { @@ -464,7 +464,7 @@ static void copyTo(const oclMat &src, oclMat &m ) static void arithmetic_run(const oclMat &src1, oclMat &dst, string kernelName, const char **kernelString, void *_scalar) { - if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F) + if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); return; @@ -712,7 +712,7 @@ void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat &prevImg, const oclMat &next level, /*block, */patch, winSize, iters); } - clFinish(prevImg.clCxt->impl->clCmdQueue); + clFinish((cl_command_queue)prevImg.clCxt->oclCommandQueue()); if(errMat) delete err; @@ -851,5 +851,5 @@ void cv::ocl::PyrLKOpticalFlow::dense(const oclMat &prevImg, const oclMat &nextI copyTo(uPyr_[idx], u); copyTo(vPyr_[idx], v); - clFinish(prevImg.clCxt->impl->clCmdQueue); + clFinish((cl_command_queue)prevImg.clCxt->oclCommandQueue()); } diff --git a/modules/ocl/src/split_merge.cpp b/modules/ocl/src/split_merge.cpp index e7aad43..de3d270 100644 --- a/modules/ocl/src/split_merge.cpp +++ b/modules/ocl/src/split_merge.cpp @@ -130,7 +130,7 @@ namespace cv static void merge_vector_run(const oclMat *mat_src, size_t n, oclMat &mat_dst) { - if(mat_dst.clCxt -> impl -> double_support == 0 && mat_dst.type() == CV_64F) + if(!mat_dst.clCxt->supportsFeature(Context::CL_DOUBLE) && mat_dst.type() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); return; @@ -279,7 +279,7 @@ namespace cv static void split_vector_run(const oclMat &mat_src, oclMat *mat_dst) { - if(mat_src.clCxt -> impl -> double_support == 0 && mat_src.type() == CV_64F) + if(!mat_src.clCxt->supportsFeature(Context::CL_DOUBLE) && mat_src.type() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); return; diff --git a/modules/ocl/src/stereobm.cpp b/modules/ocl/src/stereobm.cpp index 57e14f9..fe3b255 100644 --- a/modules/ocl/src/stereobm.cpp +++ b/modules/ocl/src/stereobm.cpp @@ -90,10 +90,10 @@ static void prefilter_xsobel(const oclMat &input, oclMat &output, int prefilterC openCLSafeCall(clSetKernelArg(kernel, 3, sizeof(cl_int), (void *)&input.cols)); openCLSafeCall(clSetKernelArg(kernel, 4, sizeof(cl_int), (void *)&prefilterCap)); - openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL, + openCLSafeCall(clEnqueueNDRangeKernel((cl_command_queue)clCxt->oclCommandQueue(), kernel, 3, NULL, globalThreads, localThreads, 0, NULL, NULL)); - clFinish(clCxt->impl->clCmdQueue); + clFinish((cl_command_queue)clCxt->oclCommandQueue()); openCLSafeCall(clReleaseKernel(kernel)); } @@ -150,11 +150,11 @@ static void stereo_bm(const oclMat &left, const oclMat &right, oclMat &disp, openCLSafeCall(clSetKernelArg(kernel, 10, sizeof(cl_int), (void *)&winsz2)); openCLSafeCall(clSetKernelArg(kernel, 11, local_mem_size, (void *)NULL)); - openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 2, NULL, + openCLSafeCall(clEnqueueNDRangeKernel((cl_command_queue)clCxt->oclCommandQueue(), kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL)); - clFinish(clCxt->impl->clCmdQueue); + clFinish((cl_command_queue)clCxt->oclCommandQueue()); openCLSafeCall(clReleaseKernel(kernel)); } //////////////////////////////////////////////////////////////////////////// @@ -188,10 +188,10 @@ static void postfilter_textureness(oclMat &left, int winSize, openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_int), (void *)&winSize)); openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_float), (void *)&avergeTexThreshold)); openCLSafeCall(clSetKernelArg(kernel, 9, local_mem_size, NULL)); - openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 2, NULL, + openCLSafeCall(clEnqueueNDRangeKernel((cl_command_queue)clCxt->oclCommandQueue(), kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL)); - clFinish(clCxt->impl->clCmdQueue); + clFinish((cl_command_queue)clCxt->oclCommandQueue()); openCLSafeCall(clReleaseKernel(kernel)); } ////////////////////////////////////////////////////////////////////////////// -- 2.7.4