}
cl_kernel kernel;
kernel = openCLGetKernelFromSource(clCxt, source, kernelName, optBufPtr);
- size_t wave_size = queryDeviceInfo<WAVEFRONT_SIZE, size_t>(kernel);
+ size_t wave_size = queryWaveFrontSize(kernel);
CV_Assert(clReleaseKernel(kernel) == CL_SUCCESS);
sprintf(optBufPtr, "-D WAVE_SIZE=%d", static_cast<int>(wave_size));
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, optBufPtr);
{
namespace ocl
{
- using std::auto_ptr;
- enum
+ enum DeviceType
{
CVCL_DEVICE_TYPE_DEFAULT = (1 << 0),
CVCL_DEVICE_TYPE_CPU = (1 << 1),
//return -1 if the target type is unsupported, otherwise return 0
CV_EXPORTS int setDevMemType(DevMemRW rw_type = DEVICE_MEM_R_W, DevMemType mem_type = DEVICE_MEM_DEFAULT);
- //this class contains ocl runtime information
- class CV_EXPORTS Info
+ // these classes contain OpenCL runtime information
+
+ struct PlatformInfo;
+
+ struct DeviceInfo
{
- public:
- struct Impl;
- Impl *impl;
+ int _id; // reserved, don't use it
- Info();
- Info(const Info &m);
- ~Info();
- void release();
- Info &operator = (const Info &m);
- std::vector<string> DeviceName;
+ DeviceType deviceType;
+ std::string deviceProfile;
+ std::string deviceVersion;
+ std::string deviceName;
+ std::string deviceVendor;
+ int deviceVendorId;
+ std::string deviceDriverVersion;
+ std::string deviceExtensions;
+
+ size_t maxWorkGroupSize;
+ std::vector<size_t> maxWorkItemSizes;
+ int maxComputeUnits;
+ size_t localMemorySize;
+
+ int deviceVersionMajor;
+ int deviceVersionMinor;
+
+ bool haveDoubleSupport;
+ bool isUnifiedMemory; // 1 means integrated GPU, otherwise this value is 0
+
+ std::string compilationExtraOptions;
+
+ const PlatformInfo* platform;
+
+ DeviceInfo();
+ };
+
+ struct PlatformInfo
+ {
+ int _id; // reserved, don't use it
+
+ std::string platformProfile;
+ std::string platformVersion;
+ std::string platformName;
+ std::string platformVendor;
+ std::string platformExtensons;
+
+ int platformVersionMajor;
+ int platformVersionMinor;
+
+ std::vector<const DeviceInfo*> devices;
+
+ PlatformInfo();
};
- //////////////////////////////// Initialization & Info ////////////////////////
- //this function may be obsoleted
- //CV_EXPORTS cl_device_id getDevice();
- //the function must be called before any other cv::ocl::functions, it initialize ocl runtime
- //each Info relates to an OpenCL platform
- //there is one or more devices in each platform, each one has a separate name
- CV_EXPORTS int getDevice(std::vector<Info> &oclinfo, int devicetype = CVCL_DEVICE_TYPE_GPU);
- //set device you want to use, optional function after getDevice be called
- //the devnum is the index of the selected device in DeviceName vector of INfo
- CV_EXPORTS void setDevice(Info &oclinfo, int devnum = 0);
+ //////////////////////////////// Initialization & Info ////////////////////////
+ typedef std::vector<const PlatformInfo*> PlatformsInfo;
- //The two functions below enable other opencl program to use ocl module's cl_context and cl_command_queue
- //returns cl_context *
- CV_EXPORTS void* getoclContext();
- //returns cl_command_queue *
- CV_EXPORTS void* getoclCommandQueue();
+ CV_EXPORTS int getOpenCLPlatforms(PlatformsInfo& platforms);
- //explicit call clFinish. The global command queue will be used.
- CV_EXPORTS void finish();
+ typedef std::vector<const DeviceInfo*> DevicesInfo;
- //this function enable ocl module to use customized cl_context and cl_command_queue
- //getDevice also need to be called before this function
- CV_EXPORTS void setDeviceEx(Info &oclinfo, void *ctx, void *qu, int devnum = 0);
+ CV_EXPORTS int getOpenCLDevices(DevicesInfo& devices, int deviceType = CVCL_DEVICE_TYPE_GPU,
+ const PlatformInfo* platform = NULL);
- //returns true when global OpenCL context is initialized
- CV_EXPORTS bool initialized();
+ // set device you want to use
+ CV_EXPORTS void setDevice(const DeviceInfo* info);
//////////////////////////////// Error handling ////////////////////////
CV_EXPORTS void error(const char *error_string, const char *file, const int line, const char *func);
- //////////////////////////////// OpenCL context ////////////////////////
- //This is a global singleton class used to represent a OpenCL context.
+ enum FEATURE_TYPE
+ {
+ FEATURE_CL_DOUBLE = 1,
+ FEATURE_CL_UNIFIED_MEM,
+ FEATURE_CL_VER_1_2
+ };
+
+ // Represents OpenCL context, interface
class CV_EXPORTS Context
{
protected:
- Context();
- friend class auto_ptr<Context>;
- friend bool initialized();
- private:
- static auto_ptr<Context> clCxt;
- static int val;
+ Context() { }
+ ~Context() { }
public:
- ~Context();
- void release();
- Info::Impl* impl;
-
static Context* getContext();
- static void setContext(Info &oclinfo);
- enum {CL_DOUBLE, CL_UNIFIED_MEM, CL_VER_1_2};
- bool supportsFeature(int ftype) const;
- size_t computeUnits() const;
- void* oclContext();
- void* oclCommandQueue();
+ bool supportsFeature(FEATURE_TYPE featureType) const;
+ const DeviceInfo& getDeviceInfo() const;
+
+ const void* getOpenCLContextPtr() const;
+ const void* getOpenCLCommandQueuePtr() const;
+ const void* getOpenCLDeviceIDPtr() const;
};
+ inline const void *getClContextPtr()
+ {
+ return Context::getContext()->getOpenCLContextPtr();
+ }
+
+ inline const void *getClCommandQueuePtr()
+ {
+ return Context::getContext()->getOpenCLCommandQueuePtr();
+ }
+
+ bool CV_EXPORTS supportsFeature(FEATURE_TYPE featureType);
+
+ void CV_EXPORTS finish();
+
//! Calls a kernel, by string. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt ,
const char **source, string kernelName,
uchar *dataend;
//! OpenCL context associated with the oclMat object.
- Context *clCxt;
+ Context *clCxt; // TODO clCtx
//add offset for handle ROI, calculated in byte
int offset;
//add wholerows and wholecols for the whole matrix, datastart and dataend are no longer used
oclMat temp5;
};
- static inline size_t divUp(size_t total, size_t grain)
- {
- return (total + grain - 1) / grain;
- }
-
/*!***************K Nearest Neighbour*************!*/
class CV_EXPORTS KNearestNeighbour: public CvKNearest
{
namespace cv
{
- namespace ocl
+namespace ocl
+{
+
+inline cl_device_id getClDeviceID(const Context *ctx)
+{
+ return *(cl_device_id*)(ctx->getOpenCLDeviceIDPtr());
+}
+
+inline cl_context getClContext(const Context *ctx)
+{
+ return *(cl_context*)(ctx->getOpenCLContextPtr());
+}
+
+inline cl_command_queue getClCommandQueue(const Context *ctx)
+{
+ return *(cl_command_queue*)(ctx->getOpenCLCommandQueuePtr());
+}
+
+enum openCLMemcpyKind
+{
+ clMemcpyHostToDevice = 0,
+ clMemcpyDeviceToHost,
+ clMemcpyDeviceToDevice
+};
+///////////////////////////OpenCL call wrappers////////////////////////////
+void CV_EXPORTS openCLMallocPitch(Context *clCxt, void **dev_ptr, size_t *pitch,
+ size_t widthInBytes, size_t height);
+void CV_EXPORTS openCLMallocPitchEx(Context *clCxt, void **dev_ptr, size_t *pitch,
+ size_t widthInBytes, size_t height, DevMemRW rw_type, DevMemType mem_type);
+void CV_EXPORTS openCLMemcpy2D(Context *clCxt, void *dst, size_t dpitch,
+ const void *src, size_t spitch,
+ size_t width, size_t height, openCLMemcpyKind kind, int channels = -1);
+void CV_EXPORTS openCLCopyBuffer2D(Context *clCxt, void *dst, size_t dpitch, int dst_offset,
+ const void *src, size_t spitch,
+ size_t width, size_t height, int src_offset);
+void CV_EXPORTS openCLFree(void *devPtr);
+cl_mem CV_EXPORTS openCLCreateBuffer(Context *clCxt, size_t flag, size_t size);
+void CV_EXPORTS openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void *host_buffer, size_t size);
+cl_kernel CV_EXPORTS openCLGetKernelFromSource(const Context *clCxt,
+ const char **source, std::string kernelName);
+cl_kernel CV_EXPORTS openCLGetKernelFromSource(const Context *clCxt,
+ const char **source, std::string kernelName, const char *build_options);
+void CV_EXPORTS openCLVerifyKernel(const Context *clCxt, cl_kernel kernel, size_t *localThreads);
+void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, string kernelName, std::vector< std::pair<size_t, const void *> > &args,
+ int globalcols , int globalrows, size_t blockSize = 16, int kernel_expand_depth = -1, int kernel_expand_channel = -1);
+void CV_EXPORTS openCLExecuteKernel_(Context *clCxt , const char **source, std::string kernelName,
+ size_t globalThreads[3], size_t localThreads[3],
+ std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, const char *build_options);
+void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
+ size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth);
+void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
+ size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels,
+ int depth, const char *build_options);
+
+cl_mem CV_EXPORTS load_constant(cl_context context, cl_command_queue command_queue, const void *value,
+ const size_t size);
+
+cl_mem CV_EXPORTS openCLMalloc(cl_context clCxt, size_t size, cl_mem_flags flags, void *host_ptr);
+
+int CV_EXPORTS savetofile(const Context *clcxt, cl_program &program, const char *fileName);
+
+enum FLUSH_MODE
+{
+ CLFINISH = 0,
+ CLFLUSH,
+ DISABLE
+};
+
+void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
+ size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, FLUSH_MODE finish_mode = DISABLE);
+void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
+ size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels,
+ int depth, char *build_options, FLUSH_MODE finish_mode = DISABLE);
+// bind oclMat to OpenCL image textures
+// note:
+// 1. there is no memory management. User need to explicitly release the resource
+// 2. for faster clamping, there is no buffer padding for the constructed texture
+cl_mem CV_EXPORTS bindTexture(const oclMat &mat);
+void CV_EXPORTS releaseTexture(cl_mem& texture);
+
+//Represents an image texture object
+class CV_EXPORTS TextureCL
+{
+public:
+ TextureCL(cl_mem tex, int r, int c, int t)
+ : tex_(tex), rows(r), cols(c), type(t) {}
+ ~TextureCL()
+ {
+ openCLFree(tex_);
+ }
+ operator cl_mem()
{
- enum openCLMemcpyKind
- {
- clMemcpyHostToDevice = 0,
- clMemcpyDeviceToHost,
- clMemcpyDeviceToDevice
- };
- ///////////////////////////OpenCL call wrappers////////////////////////////
- void CV_EXPORTS openCLMallocPitch(Context *clCxt, void **dev_ptr, size_t *pitch,
- size_t widthInBytes, size_t height);
- void CV_EXPORTS openCLMallocPitchEx(Context *clCxt, void **dev_ptr, size_t *pitch,
- size_t widthInBytes, size_t height, DevMemRW rw_type, DevMemType mem_type);
- void CV_EXPORTS openCLMemcpy2D(Context *clCxt, void *dst, size_t dpitch,
- const void *src, size_t spitch,
- size_t width, size_t height, openCLMemcpyKind kind, int channels = -1);
- void CV_EXPORTS openCLCopyBuffer2D(Context *clCxt, void *dst, size_t dpitch, int dst_offset,
- const void *src, size_t spitch,
- size_t width, size_t height, int src_offset);
- void CV_EXPORTS openCLFree(void *devPtr);
- cl_mem CV_EXPORTS openCLCreateBuffer(Context *clCxt, size_t flag, size_t size);
- void CV_EXPORTS openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void *host_buffer, size_t size);
- cl_kernel CV_EXPORTS openCLGetKernelFromSource(const Context *clCxt,
- const char **source, std::string kernelName);
- cl_kernel CV_EXPORTS openCLGetKernelFromSource(const Context *clCxt,
- const char **source, std::string kernelName, const char *build_options);
- void CV_EXPORTS openCLVerifyKernel(const Context *clCxt, cl_kernel kernel, size_t *localThreads);
- void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, string kernelName, std::vector< std::pair<size_t, const void *> > &args,
- int globalcols , int globalrows, size_t blockSize = 16, int kernel_expand_depth = -1, int kernel_expand_channel = -1);
- void CV_EXPORTS openCLExecuteKernel_(Context *clCxt , const char **source, std::string kernelName,
- size_t globalThreads[3], size_t localThreads[3],
- std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, const char *build_options);
- void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
- size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth);
- void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
- size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels,
- int depth, const char *build_options);
-
- cl_mem CV_EXPORTS load_constant(cl_context context, cl_command_queue command_queue, const void *value,
- const size_t size);
-
- cl_mem CV_EXPORTS openCLMalloc(cl_context clCxt, size_t size, cl_mem_flags flags, void *host_ptr);
-
- int CV_EXPORTS savetofile(const Context *clcxt, cl_program &program, const char *fileName);
-
- enum FLUSH_MODE
- {
- CLFINISH = 0,
- CLFLUSH,
- DISABLE
- };
-
- void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
- size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, FLUSH_MODE finish_mode = DISABLE);
- void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
- size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels,
- int depth, char *build_options, FLUSH_MODE finish_mode = DISABLE);
- // bind oclMat to OpenCL image textures
- // note:
- // 1. there is no memory management. User need to explicitly release the resource
- // 2. for faster clamping, there is no buffer padding for the constructed texture
- cl_mem CV_EXPORTS bindTexture(const oclMat &mat);
- void CV_EXPORTS releaseTexture(cl_mem& texture);
-
- //Represents an image texture object
- class CV_EXPORTS TextureCL
- {
- public:
- TextureCL(cl_mem tex, int r, int c, int t)
- : tex_(tex), rows(r), cols(c), type(t) {}
- ~TextureCL()
- {
- openCLFree(tex_);
- }
- operator cl_mem()
- {
- return tex_;
- }
- cl_mem const tex_;
- const int rows;
- const int cols;
- const int type;
- private:
- //disable assignment
- void operator=(const TextureCL&);
- };
- // bind oclMat to OpenCL image textures and retunrs an TextureCL object
- // note:
- // for faster clamping, there is no buffer padding for the constructed texture
- Ptr<TextureCL> CV_EXPORTS bindTexturePtr(const oclMat &mat);
-
- // returns whether the current context supports image2d_t format or not
- bool CV_EXPORTS support_image2d(Context *clCxt = Context::getContext());
-
- // the enums are used to query device information
- // currently only support wavefront size queries
- enum DEVICE_INFO
- {
- WAVEFRONT_SIZE, //in AMD speak
- IS_CPU_DEVICE //check if the device is CPU
- };
- template<DEVICE_INFO _it, typename _ty>
- _ty queryDeviceInfo(cl_kernel kernel = NULL);
-
- template<>
- int CV_EXPORTS queryDeviceInfo<WAVEFRONT_SIZE, int>(cl_kernel kernel);
- template<>
- size_t CV_EXPORTS queryDeviceInfo<WAVEFRONT_SIZE, size_t>(cl_kernel kernel);
- template<>
- bool CV_EXPORTS queryDeviceInfo<IS_CPU_DEVICE, bool>(cl_kernel kernel);
-
- unsigned long CV_EXPORTS queryLocalMemInfo();
- }//namespace ocl
+ return tex_;
+ }
+ cl_mem const tex_;
+ const int rows;
+ const int cols;
+ const int type;
+private:
+ //disable assignment
+ void operator=(const TextureCL&);
+};
+// bind oclMat to OpenCL image textures and retunrs an TextureCL object
+// note:
+// for faster clamping, there is no buffer padding for the constructed texture
+Ptr<TextureCL> CV_EXPORTS bindTexturePtr(const oclMat &mat);
+
+// returns whether the current context supports image2d_t format or not
+bool CV_EXPORTS support_image2d(Context *clCxt = Context::getContext());
+
+bool CV_EXPORTS isCpuDevice();
+
+size_t CV_EXPORTS queryWaveFrontSize(cl_kernel kernel);
+
+
+inline size_t divUp(size_t total, size_t grain)
+{
+ return (total + grain - 1) / grain;
+}
+
+inline size_t roundUp(size_t sz, size_t n)
+{
+ // we don't assume that n is a power of 2 (see alignSize)
+ // equal to divUp(sz, n) * n
+ size_t t = sz + n - 1;
+ size_t rem = t % n;
+ size_t result = t - rem;
+ return result;
+}
+
+}//namespace ocl
}//namespace cv
#endif //__OPENCV_OCL_PRIVATE_UTIL__
const char * keys =
"{ h | help | false | print help message }"
"{ t | type | gpu | set device type:cpu or gpu}"
- "{ p | platform | 0 | set platform id }"
+ "{ p | platform | -1 | set platform id }"
"{ d | device | 0 | set device id }";
CommandLineParser cmd(argc, argv, keys);
}
string type = cmd.get<string>("type");
- unsigned int pid = cmd.get<unsigned int>("platform");
+ int pid = cmd.get<int>("platform");
int device = cmd.get<int>("device");
int flag = type == "cpu" ? cv::ocl::CVCL_DEVICE_TYPE_CPU :
cv::ocl::CVCL_DEVICE_TYPE_GPU;
- std::vector<cv::ocl::Info> oclinfo;
- int devnums = cv::ocl::getDevice(oclinfo, flag);
- if (devnums <= device || device < 0)
+ cv::ocl::PlatformsInfo platformsInfo;
+ cv::ocl::getOpenCLPlatforms(platformsInfo);
+ if (pid >= (int)platformsInfo.size())
{
- std::cout << "device invalid\n";
- return -1;
+ std::cout << "platform is invalid\n";
+ return 1;
}
- if (pid >= oclinfo.size())
+ cv::ocl::DevicesInfo devicesInfo;
+ int devnums = cv::ocl::getOpenCLDevices(devicesInfo, flag, (pid < 0) ? NULL : platformsInfo[pid]);
+ if (device < 0 || device >= devnums)
{
- std::cout << "platform invalid\n";
- return -1;
+ std::cout << "device/platform invalid\n";
+ return 1;
}
- cv::ocl::setDevice(oclinfo[pid], device);
+ cv::ocl::setDevice(devicesInfo[device]);
cv::ocl::setBinaryDiskCache(cv::ocl::CACHE_UPDATE);
+ cout << "Device type:" << type << endl
+ << "Platform name:" << devicesInfo[device]->platform->platformName << endl
+ << "Device name:" << devicesInfo[device]->deviceName << endl;
+
CV_PERF_TEST_MAIN_INTERNALS(ocl, impls)
}
//M*/
#include "precomp.hpp"
-#include <iomanip>
-
+#include "opencl_kernels.hpp"
using namespace cv;
using namespace cv::ocl;
-using namespace std;
-
-namespace cv
-{
- namespace ocl
- {
- //////////////////////////////// OpenCL kernel strings /////////////////////
-
- extern const char *arithm_absdiff_nonsaturate;
- extern const char *arithm_nonzero;
- extern const char *arithm_sum;
- extern const char *arithm_minMax;
- extern const char *arithm_minMaxLoc;
- extern const char *arithm_minMaxLoc_mask;
- extern const char *arithm_LUT;
- extern const char *arithm_add;
- extern const char *arithm_add_mask;
- extern const char *arithm_add_scalar;
- extern const char *arithm_add_scalar_mask;
- extern const char *arithm_bitwise_binary;
- extern const char *arithm_bitwise_binary_mask;
- extern const char *arithm_bitwise_binary_scalar;
- extern const char *arithm_bitwise_binary_scalar_mask;
- extern const char *arithm_bitwise_not;
- extern const char *arithm_compare;
- extern const char *arithm_transpose;
- extern const char *arithm_flip;
- extern const char *arithm_flip_rc;
- extern const char *arithm_magnitude;
- extern const char *arithm_cartToPolar;
- extern const char *arithm_polarToCart;
- extern const char *arithm_exp;
- extern const char *arithm_log;
- extern const char *arithm_addWeighted;
- extern const char *arithm_phase;
- extern const char *arithm_pow;
- extern const char *arithm_setidentity;
- }
-}
//////////////////////////////////////////////////////////////////////////////
/////////////////////// add subtract multiply divide /////////////////////////
oclMat &dst, int op_type, bool use_scalar = false)
{
Context *clCxt = src1.clCxt;
- bool hasDouble = clCxt->supportsFeature(Context::CL_DOUBLE);
+ bool hasDouble = clCxt->supportsFeature(FEATURE_CL_DOUBLE);
if (!hasDouble && (src1.depth() == CV_64F || src2.depth() == CV_64F || dst.depth() == CV_64F))
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
//////////////////////////////////////////////////////////////////////////////
static void compare_run(const oclMat &src1, const oclMat &src2, oclMat &dst, int cmpOp,
- string kernelName, const char **kernelString)
+ string kernelName, const cv::ocl::ProgramEntry* source)
{
CV_Assert(src1.type() == src2.type());
dst.create(src1.size(), CV_8UC1);
args.push_back( make_pair( sizeof(cl_int), (void *)&src1.cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src1.rows ));
- openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads,
+ openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads,
args, -1, -1, buildOptions.c_str());
}
void cv::ocl::compare(const oclMat &src1, const oclMat &src2, oclMat &dst , int cmpOp)
{
- if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.depth() == CV_64F)
+ if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.depth() == CV_64F)
{
cout << "Selected device do not support double" << endl;
return;
{
CV_Assert(src.step % src.elemSize() == 0);
- size_t groupnum = src.clCxt->computeUnits();
+ size_t groupnum = src.clCxt->getDeviceInfo().maxComputeUnits;
CV_Assert(groupnum != 0);
int dbsize = groupnum * src.oclchannels();
Scalar cv::ocl::sum(const oclMat &src)
{
- if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
+ if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
}
arithmetic_sum<double>
};
- bool hasDouble = src.clCxt->supportsFeature(Context::CL_DOUBLE);
+ bool hasDouble = src.clCxt->supportsFeature(FEATURE_CL_DOUBLE);
int ddepth = std::max(src.depth(), CV_32S);
if (!hasDouble && ddepth == CV_64F)
ddepth = CV_32F;
Scalar cv::ocl::absSum(const oclMat &src)
{
- if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
+ if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
}
arithmetic_sum<double>
};
- bool hasDouble = src.clCxt->supportsFeature(Context::CL_DOUBLE);
+ bool hasDouble = src.clCxt->supportsFeature(FEATURE_CL_DOUBLE);
int ddepth = std::max(src.depth(), CV_32S);
if (!hasDouble && ddepth == CV_64F)
ddepth = CV_32F;
Scalar cv::ocl::sqrSum(const oclMat &src)
{
- if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
+ if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
}
arithmetic_sum<double>
};
- bool hasDouble = src.clCxt->supportsFeature(Context::CL_DOUBLE);
+ bool hasDouble = src.clCxt->supportsFeature(FEATURE_CL_DOUBLE);
int ddepth = src.depth() <= CV_32S ? CV_32S : (hasDouble ? CV_64F : CV_32F);
sumFunc func = functab[ddepth - CV_32S];
void arithmetic_minMax(const oclMat &src, double *minVal, double *maxVal,
const oclMat &mask, oclMat &buf)
{
- size_t groupnum = src.clCxt->computeUnits();
+ size_t groupnum = src.clCxt->getDeviceInfo().maxComputeUnits;
CV_Assert(groupnum != 0);
int dbsize = groupnum * 2 * src.elemSize();
if (minVal == NULL && maxVal == NULL)
return;
- if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
+ if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
}
static void arithmetic_flip_rows_run(const oclMat &src, oclMat &dst, string kernelName)
{
- if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F)
+ if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
return;
static void arithmetic_flip_cols_run(const oclMat &src, oclMat &dst, string kernelName, bool isVertical)
{
- if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F)
+ if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
return;
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
- const char **kernelString = isVertical ? &arithm_flip_rc : &arithm_flip;
+ const cv::ocl::ProgramEntry* source = isVertical ? &arithm_flip_rc : &arithm_flip;
- openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, src.oclchannels(), depth);
+ openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, src.oclchannels(), depth);
}
void cv::ocl::flip(const oclMat &src, oclMat &dst, int flipCode)
//////////////////////////////// exp log /////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
-static void arithmetic_exp_log_run(const oclMat &src, oclMat &dst, string kernelName, const char **kernelString)
+static void arithmetic_exp_log_run(const oclMat &src, oclMat &dst, string kernelName, const cv::ocl::ProgramEntry* source)
{
Context *clCxt = src.clCxt;
- if (!clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
+ if (!clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
return;
args.push_back( make_pair( sizeof(cl_int), (void *)&srcstep1 ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dststep1 ));
- openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads,
+ openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads,
args, src.oclchannels(), -1, buildOptions.c_str());
}
static void arithmetic_magnitude_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName)
{
- if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
+ if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
return;
arithmetic_magnitude_phase_run(src1, src2, dst, "arithm_magnitude");
}
-static void arithmetic_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName, const char **kernelString)
+static void arithmetic_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName, const cv::ocl::ProgramEntry* source)
{
- if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
+ if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
return;
args.push_back( make_pair( sizeof(cl_int), (void *)&cols1 ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows ));
- openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth);
+ openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, -1, depth);
}
void cv::ocl::phase(const oclMat &x, const oclMat &y, oclMat &Angle, bool angleInDegrees)
static void arithmetic_cartToPolar_run(const oclMat &src1, const oclMat &src2, oclMat &dst_mag, oclMat &dst_cart,
string kernelName, bool angleInDegrees)
{
- if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
+ if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
return;
static void arithmetic_ptc_run(const oclMat &src1, const oclMat &src2, oclMat &dst1, oclMat &dst2, bool angleInDegrees,
string kernelName)
{
- if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
+ if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
return;
Point *minLoc, Point *maxLoc, const oclMat &mask)
{
CV_Assert(src.oclchannels() == 1);
- size_t groupnum = src.clCxt->computeUnits();
+ size_t groupnum = src.clCxt->getDeviceInfo().maxComputeUnits;
CV_Assert(groupnum != 0);
int minloc = -1 , maxloc = -1;
int vlen = 4, dbsize = groupnum * vlen * 4 * sizeof(T) ;
void cv::ocl::minMaxLoc(const oclMat &src, double *minVal, double *maxVal,
Point *minLoc, Point *maxLoc, const oclMat &mask)
{
- if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
+ if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
return;
};
minMaxLocFunc func;
- func = functab[(int)src.clCxt->supportsFeature(Context::CL_DOUBLE)];
+ func = functab[(int)src.clCxt->supportsFeature(FEATURE_CL_DOUBLE)];
func(src, minVal, maxVal, minLoc, maxLoc, mask);
}
CV_Assert(src.channels() == 1);
Context *clCxt = src.clCxt;
- if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
+ if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "selected device doesn't support double");
}
////////////////////////////////bitwise_op////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
-static void bitwise_unary_run(const oclMat &src1, oclMat &dst, string kernelName, const char **kernelString)
+static void bitwise_unary_run(const oclMat &src1, oclMat &dst, string kernelName, const cv::ocl::ProgramEntry* source)
{
dst.create(src1.size(), src1.type());
args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
- openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth);
+ openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, -1, depth);
}
enum { AND = 0, OR, XOR };
oclMat &dst, int operationType)
{
Context *clCxt = src1.clCxt;
- if (!clCxt->supportsFeature(Context::CL_DOUBLE) && src1.depth() == CV_64F)
+ if (!clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.depth() == CV_64F)
{
cout << "Selected device does not support double" << endl;
return;
void cv::ocl::bitwise_not(const oclMat &src, oclMat &dst)
{
- if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F)
+ if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.type() == CV_64F)
{
cout << "Selected device does not support double" << endl;
return;
static void transpose_run(const oclMat &src, oclMat &dst, string kernelName, bool inplace = false)
{
Context *clCxt = src.clCxt;
- if (!clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
+ if (!clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
return;
void cv::ocl::addWeighted(const oclMat &src1, double alpha, const oclMat &src2, double beta, double gama, oclMat &dst)
{
Context *clCxt = src1.clCxt;
- bool hasDouble = clCxt->supportsFeature(Context::CL_DOUBLE);
+ bool hasDouble = clCxt->supportsFeature(FEATURE_CL_DOUBLE);
if (!hasDouble && src1.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
/////////////////////////////////// Pow //////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
-static void arithmetic_pow_run(const oclMat &src1, double p, oclMat &dst, string kernelName, const char **kernelString)
+static void arithmetic_pow_run(const oclMat &src1, double p, oclMat &dst, string kernelName, const cv::ocl::ProgramEntry* source)
{
CV_Assert(src1.cols == dst.cols && src1.rows == dst.rows);
CV_Assert(src1.type() == dst.type());
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
float pf = static_cast<float>(p);
- if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE))
+ if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE))
args.push_back( make_pair( sizeof(cl_float), (void *)&pf ));
else
args.push_back( make_pair( sizeof(cl_double), (void *)&p ));
- openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth);
+ openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, -1, depth);
}
void cv::ocl::pow(const oclMat &x, double p, oclMat &y)
{
- if (!x.clCxt->supportsFeature(Context::CL_DOUBLE) && x.type() == CV_64F)
+ if (!x.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && x.type() == CV_64F)
{
cout << "Selected device do not support double" << endl;
return;
constants->c_tau = tau;
constants->c_shadowVal = shadowVal;
- cl_constants = load_constant(*((cl_context*)getoclContext()), *((cl_command_queue*)getoclCommandQueue()),
+ cl_constants = load_constant(*((cl_context*)getClContextPtr()), *((cl_command_queue*)getClCommandQueuePtr()),
(void *)constants, sizeof(_contant_struct));
}
mean_.release();
bgmodelUsedModes_.release();
-}
\ No newline at end of file
+}
using std::cout;
using std::endl;
-namespace cv
+namespace cv { namespace ocl {
+
+class ProgramCache
{
- namespace ocl
- {
- class ProgramCache
- {
- protected:
- ProgramCache();
- friend class auto_ptr<ProgramCache>;
- static auto_ptr<ProgramCache> programCache;
+protected:
+ ProgramCache();
+ ~ProgramCache();
+ friend class std::auto_ptr<ProgramCache>;
+public:
+ static ProgramCache *getProgramCache();
- public:
- ~ProgramCache();
- static ProgramCache *getProgramCache()
- {
- if( NULL == programCache.get())
- programCache.reset(new ProgramCache());
- return programCache.get();
- }
+ cl_program getProgram(const Context *ctx, const char **source, string kernelName,
+ const char *build_options);
- //lookup the binary given the file name
- cl_program progLookup(string srcsign);
+ void releaseProgram();
+protected:
+ //lookup the binary given the file name
+ cl_program progLookup(string srcsign);
- //add program to the cache
- void addProgram(string srcsign, cl_program program);
- void releaseProgram();
+ //add program to the cache
+ void addProgram(string srcsign, cl_program program);
- map <string, cl_program> codeCache;
- unsigned int cacheSize;
- //The presumed watermark for the cache volume (256MB). Is it enough?
- //We may need more delicate algorithms when necessary later.
- //Right now, let's just leave it along.
- static const unsigned MAX_PROG_CACHE_SIZE = 1024;
- };
+ map <string, cl_program> codeCache;
+ unsigned int cacheSize;
- }//namespace ocl
+ //The presumed watermark for the cache volume (256MB). Is it enough?
+ //We may need more delicate algorithms when necessary later.
+ //Right now, let's just leave it along.
+ static const unsigned MAX_PROG_CACHE_SIZE = 1024;
+};
+}//namespace ocl
}//namespace cv
{
const oclMat zeroMask;
const oclMat &tempMask = mask.data ? mask : zeroMask;
- bool is_cpu = queryDeviceInfo<IS_CPU_DEVICE, bool>();
+ bool is_cpu = isCpuDevice();
if (query.cols <= 64)
{
matchUnrolledCached<16, 64>(query, train, tempMask, trainIdx, distance, distType);
{
const oclMat zeroMask;
const oclMat &tempMask = mask.data ? mask : zeroMask;
- bool is_cpu = queryDeviceInfo<IS_CPU_DEVICE, bool>();
+ bool is_cpu = isCpuDevice();
if (query.cols <= 64)
{
matchUnrolledCached<16, 64>(query, trains, n, tempMask, trainIdx, imgIdx, distance, distType);
{
const oclMat zeroMask;
const oclMat &tempMask = mask.data ? mask : zeroMask;
- bool is_cpu = queryDeviceInfo<IS_CPU_DEVICE, bool>();
+ bool is_cpu = isCpuDevice();
if (query.cols <= 64)
{
matchUnrolledCached<16, 64>(query, train, maxDistance, tempMask, trainIdx, distance, nMatches, distType);
static void match2Dispatcher(const oclMat &query, const oclMat &train, const oclMat &mask,
const oclMat &trainIdx, const oclMat &distance, int distType)
{
- bool is_cpu = queryDeviceInfo<IS_CPU_DEVICE, bool>();
+ bool is_cpu = isCpuDevice();
if (query.cols <= 64)
{
knn_matchUnrolledCached<16, 64>(query, train, mask, trainIdx, distance, distType);
{
openCLFree(counter);
}
- counter = clCreateBuffer( *((cl_context*)getoclContext()), CL_MEM_COPY_HOST_PTR, sizeof(int), counter_i, &err );
+ counter = clCreateBuffer( *((cl_context*)getClContextPtr()), CL_MEM_COPY_HOST_PTR, sizeof(int), counter_i, &err );
openCLSafeCall(err);
}
void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, void *counter, int rows, int cols)
{
unsigned int count;
- openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)getoclCommandQueue(), (cl_mem)counter, 1, 0, sizeof(float), &count, 0, NULL, NULL));
+ openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)getClCommandQueuePtr(), (cl_mem)counter, 1, 0, sizeof(float), &count, 0, NULL, NULL));
Context *clCxt = map.clCxt;
string kernelName = "edgesHysteresisGlobal";
vector< pair<size_t, const void *> > args;
int count_i[1] = {0};
while(count > 0)
{
- openCLSafeCall(clEnqueueWriteBuffer(*(cl_command_queue*)getoclCommandQueue(), (cl_mem)counter, 1, 0, sizeof(int), &count_i, 0, NULL, NULL));
+ openCLSafeCall(clEnqueueWriteBuffer(*(cl_command_queue*)getClCommandQueuePtr(), (cl_mem)counter, 1, 0, sizeof(int), &count_i, 0, NULL, NULL));
args.clear();
size_t globalThreads[3] = {std::min(count, 65535u) * 128, divUp(count, 65535), 1};
args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
- openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)getoclCommandQueue(), (cl_mem)counter, 1, 0, sizeof(int), &count, 0, NULL, NULL));
+ openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)getClCommandQueuePtr(), (cl_mem)counter, 1, 0, sizeof(int), &count, 0, NULL, NULL));
std::swap(st1, st2);
}
}
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+// Guoping Long, longguoping@gmail.com
+// Niko Li, newlife20080214@gmail.com
+// Yao Wang, bitwangyaoyao@gmail.com
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other oclMaterials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+#include <iomanip>
+#include <fstream>
+#include "binarycaching.hpp"
+
+#undef __CL_ENABLE_EXCEPTIONS
+#include <CL/cl.hpp>
+
+namespace cv { namespace ocl {
+
+extern void fft_teardown();
+extern void clBlasTeardown();
+
+struct PlatformInfoImpl
+{
+ cl_platform_id platform_id;
+
+ std::vector<int> deviceIDs;
+
+ PlatformInfo info;
+
+ PlatformInfoImpl()
+ : platform_id(NULL)
+ {
+ }
+};
+
+struct DeviceInfoImpl
+{
+ cl_platform_id platform_id;
+ cl_device_id device_id;
+
+ DeviceInfo info;
+
+ DeviceInfoImpl()
+ : platform_id(NULL), device_id(NULL)
+ {
+ }
+};
+
+static std::vector<PlatformInfoImpl> global_platforms;
+static std::vector<DeviceInfoImpl> global_devices;
+
+static bool parseOpenCLVersion(const std::string& versionStr, int& major, int& minor)
+{
+ size_t p0 = versionStr.find(' ');
+ while (true)
+ {
+ if (p0 == std::string::npos)
+ break;
+ if (p0 + 1 >= versionStr.length())
+ break;
+ char c = versionStr[p0 + 1];
+ if (isdigit(c))
+ break;
+ p0 = versionStr.find(' ', p0 + 1);
+ }
+ size_t p1 = versionStr.find('.', p0);
+ size_t p2 = versionStr.find(' ', p1);
+ if (p0 == std::string::npos || p1 == std::string::npos || p2 == std::string::npos)
+ {
+ major = 0;
+ minor = 0;
+ return false;
+ }
+ std::string majorStr = versionStr.substr(p0 + 1, p1 - p0 - 1);
+ std::string minorStr = versionStr.substr(p1 + 1, p2 - p1 - 1);
+ major = atoi(majorStr.c_str());
+ minor = atoi(minorStr.c_str());
+ return true;
+}
+
+static int initializeOpenCLDevices()
+{
+ assert(global_devices.size() == 0);
+
+ std::vector<cl::Platform> platforms;
+ try
+ {
+ openCLSafeCall(cl::Platform::get(&platforms));
+ }
+ catch (cv::Exception& e)
+ {
+ return 0; // OpenCL not found
+ }
+
+ global_platforms.resize(platforms.size());
+
+ for (size_t i = 0; i < platforms.size(); ++i)
+ {
+ PlatformInfoImpl& platformInfo = global_platforms[i];
+ platformInfo.info._id = i;
+
+ cl::Platform& platform = platforms[i];
+
+ platformInfo.platform_id = platform();
+ openCLSafeCall(platform.getInfo(CL_PLATFORM_PROFILE, &platformInfo.info.platformProfile));
+ openCLSafeCall(platform.getInfo(CL_PLATFORM_VERSION, &platformInfo.info.platformVersion));
+ openCLSafeCall(platform.getInfo(CL_PLATFORM_NAME, &platformInfo.info.platformName));
+ openCLSafeCall(platform.getInfo(CL_PLATFORM_VENDOR, &platformInfo.info.platformVendor));
+ openCLSafeCall(platform.getInfo(CL_PLATFORM_EXTENSIONS, &platformInfo.info.platformExtensons));
+
+ parseOpenCLVersion(platformInfo.info.platformVersion,
+ platformInfo.info.platformVersionMajor, platformInfo.info.platformVersionMinor);
+
+ std::vector<cl::Device> devices;
+ cl_int status = platform.getDevices(CL_DEVICE_TYPE_ALL, &devices);
+ if(status != CL_DEVICE_NOT_FOUND)
+ openCLVerifyCall(status);
+
+ if(devices.size() > 0)
+ {
+ int baseIndx = global_devices.size();
+ global_devices.resize(baseIndx + devices.size());
+ platformInfo.deviceIDs.resize(devices.size());
+ platformInfo.info.devices.resize(devices.size());
+
+ for(size_t j = 0; j < devices.size(); ++j)
+ {
+ cl::Device& device = devices[j];
+
+ DeviceInfoImpl& deviceInfo = global_devices[baseIndx + j];
+ deviceInfo.info._id = baseIndx + j;
+ deviceInfo.platform_id = platform();
+ deviceInfo.device_id = device();
+
+ deviceInfo.info.platform = &platformInfo.info;
+ platformInfo.deviceIDs[j] = deviceInfo.info._id;
+
+ cl_device_type type = -1;
+ openCLSafeCall(device.getInfo(CL_DEVICE_TYPE, &type));
+ deviceInfo.info.deviceType = DeviceType(type);
+
+ openCLSafeCall(device.getInfo(CL_DEVICE_PROFILE, &deviceInfo.info.deviceProfile));
+ openCLSafeCall(device.getInfo(CL_DEVICE_VERSION, &deviceInfo.info.deviceVersion));
+ openCLSafeCall(device.getInfo(CL_DEVICE_NAME, &deviceInfo.info.deviceName));
+ openCLSafeCall(device.getInfo(CL_DEVICE_VENDOR, &deviceInfo.info.deviceVendor));
+ cl_uint vendorID = -1;
+ openCLSafeCall(device.getInfo(CL_DEVICE_VENDOR_ID, &vendorID));
+ deviceInfo.info.deviceVendorId = vendorID;
+ openCLSafeCall(device.getInfo(CL_DRIVER_VERSION, &deviceInfo.info.deviceDriverVersion));
+ openCLSafeCall(device.getInfo(CL_DEVICE_EXTENSIONS, &deviceInfo.info.deviceExtensions));
+
+ parseOpenCLVersion(deviceInfo.info.deviceVersion,
+ deviceInfo.info.deviceVersionMajor, deviceInfo.info.deviceVersionMinor);
+
+ size_t maxWorkGroupSize = 0;
+ openCLSafeCall(device.getInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE, &maxWorkGroupSize));
+ deviceInfo.info.maxWorkGroupSize = maxWorkGroupSize;
+
+ cl_uint maxDimensions = 0;
+ openCLSafeCall(device.getInfo(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, &maxDimensions));
+ std::vector<size_t> maxWorkItemSizes(maxDimensions);
+ openCLSafeCall(clGetDeviceInfo(device(), CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * maxDimensions,
+ (void *)&maxWorkItemSizes[0], 0));
+ deviceInfo.info.maxWorkItemSizes = maxWorkItemSizes;
+
+ cl_uint maxComputeUnits = 0;
+ openCLSafeCall(device.getInfo(CL_DEVICE_MAX_COMPUTE_UNITS, &maxComputeUnits));
+ deviceInfo.info.maxComputeUnits = maxComputeUnits;
+
+ cl_ulong localMemorySize = 0;
+ openCLSafeCall(device.getInfo(CL_DEVICE_LOCAL_MEM_SIZE, &localMemorySize));
+ deviceInfo.info.localMemorySize = (size_t)localMemorySize;
+
+
+ cl_bool unifiedMemory = false;
+ openCLSafeCall(device.getInfo(CL_DEVICE_HOST_UNIFIED_MEMORY, &unifiedMemory));
+ deviceInfo.info.isUnifiedMemory = unifiedMemory != 0;
+
+ //initialize extra options for compilation. Currently only fp64 is included.
+ //Assume 4KB is enough to store all possible extensions.
+ openCLSafeCall(device.getInfo(CL_DEVICE_EXTENSIONS, &deviceInfo.info.deviceExtensions));
+
+ size_t fp64_khr = deviceInfo.info.deviceExtensions.find("cl_khr_fp64");
+ if(fp64_khr != std::string::npos)
+ {
+ deviceInfo.info.compilationExtraOptions += "-D DOUBLE_SUPPORT";
+ deviceInfo.info.haveDoubleSupport = true;
+ }
+ else
+ {
+ deviceInfo.info.haveDoubleSupport = false;
+ }
+ }
+ }
+ }
+
+ for (size_t i = 0; i < platforms.size(); ++i)
+ {
+ PlatformInfoImpl& platformInfo = global_platforms[i];
+ for(size_t j = 0; j < platformInfo.deviceIDs.size(); ++j)
+ {
+ DeviceInfoImpl& deviceInfo = global_devices[platformInfo.deviceIDs[j]];
+ platformInfo.info.devices[j] = &deviceInfo.info;
+ }
+ }
+
+ return global_devices.size();
+}
+
+
+DeviceInfo::DeviceInfo()
+ : _id(-1), deviceType(DeviceType(0)),
+ deviceVendorId(-1),
+ maxWorkGroupSize(0), maxComputeUnits(0), localMemorySize(0),
+ deviceVersionMajor(0), deviceVersionMinor(0),
+ haveDoubleSupport(false), isUnifiedMemory(false),
+ platform(NULL)
+{
+ // nothing
+}
+
+PlatformInfo::PlatformInfo()
+ : _id(-1),
+ platformVersionMajor(0), platformVersionMinor(0)
+{
+ // nothing
+}
+
+//////////////////////////////// OpenCL context ////////////////////////
+//This is a global singleton class used to represent a OpenCL context.
+class ContextImpl : public Context
+{
+public:
+ const cl_device_id clDeviceID;
+ cl_context clContext;
+ cl_command_queue clCmdQueue;
+ const DeviceInfo& deviceInfo;
+
+protected:
+ ContextImpl(const DeviceInfo& deviceInfo, cl_device_id clDeviceID)
+ : clDeviceID(clDeviceID), clContext(NULL), clCmdQueue(NULL), deviceInfo(deviceInfo)
+ {
+ // nothing
+ }
+ ~ContextImpl();
+public:
+
+ static ContextImpl* getContext();
+ static void setContext(const DeviceInfo* deviceInfo);
+
+ bool supportsFeature(FEATURE_TYPE featureType) const;
+
+ static void cleanupContext(void);
+};
+
+static cv::Mutex currentContextMutex;
+static ContextImpl* currentContext = NULL;
+
+Context* Context::getContext()
+{
+ return currentContext;
+}
+
+bool Context::supportsFeature(FEATURE_TYPE featureType) const
+{
+ return ((ContextImpl*)this)->supportsFeature(featureType);
+}
+
+const DeviceInfo& Context::getDeviceInfo() const
+{
+ return ((ContextImpl*)this)->deviceInfo;
+}
+
+const void* Context::getOpenCLContextPtr() const
+{
+ return &(((ContextImpl*)this)->clContext);
+}
+
+const void* Context::getOpenCLCommandQueuePtr() const
+{
+ return &(((ContextImpl*)this)->clCmdQueue);
+}
+
+const void* Context::getOpenCLDeviceIDPtr() const
+{
+ return &(((ContextImpl*)this)->clDeviceID);
+}
+
+
+bool ContextImpl::supportsFeature(FEATURE_TYPE featureType) const
+{
+ switch (featureType)
+ {
+ case FEATURE_CL_DOUBLE:
+ return deviceInfo.haveDoubleSupport;
+ case FEATURE_CL_UNIFIED_MEM:
+ return deviceInfo.isUnifiedMemory;
+ case FEATURE_CL_VER_1_2:
+ return deviceInfo.deviceVersionMajor > 1 || (deviceInfo.deviceVersionMajor == 1 && deviceInfo.deviceVersionMinor >= 2);
+ }
+ CV_Error(CV_StsBadArg, "Invalid feature type");
+ return false;
+}
+
+#if defined(WIN32)
+static bool __termination = false;
+#endif
+
+ContextImpl::~ContextImpl()
+{
+ fft_teardown();
+ clBlasTeardown();
+
+#ifdef WIN32
+ // if process is on termination stage (ExitProcess was called and other threads were terminated)
+ // then disable command queue release because it may cause program hang
+ if (!__termination)
+#endif
+ {
+ if(clCmdQueue)
+ {
+ openCLSafeCall(clReleaseCommandQueue(clCmdQueue)); // some cleanup problems are here
+ }
+
+ if(clContext)
+ {
+ openCLSafeCall(clReleaseContext(clContext));
+ }
+ }
+ clCmdQueue = NULL;
+ clContext = NULL;
+}
+
+void ContextImpl::cleanupContext(void)
+{
+ cv::AutoLock lock(currentContextMutex);
+ if (currentContext)
+ delete currentContext;
+ currentContext = NULL;
+}
+
+void ContextImpl::setContext(const DeviceInfo* deviceInfo)
+{
+ CV_Assert(deviceInfo->_id >= 0 && deviceInfo->_id < (int)global_devices.size());
+
+ DeviceInfoImpl& infoImpl = global_devices[deviceInfo->_id];
+ CV_Assert(deviceInfo == &infoImpl.info);
+
+ cl_int status = 0;
+ cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)(infoImpl.platform_id), 0 };
+ cl_context clContext = clCreateContext(cps, 1, &infoImpl.device_id, NULL, NULL, &status);
+ openCLVerifyCall(status);
+ // TODO add CL_QUEUE_PROFILING_ENABLE
+ cl_command_queue clCmdQueue = clCreateCommandQueue(clContext, infoImpl.device_id, 0, &status);
+ openCLVerifyCall(status);
+
+ ContextImpl* ctx = new ContextImpl(infoImpl.info, infoImpl.device_id);
+ ctx->clCmdQueue = clCmdQueue;
+ ctx->clContext = clContext;
+
+ ContextImpl* old = NULL;
+ {
+ cv::AutoLock lock(currentContextMutex);
+ old = currentContext;
+ currentContext = ctx;
+ }
+ if (old != NULL)
+ {
+ delete old;
+ }
+}
+
+ContextImpl* ContextImpl::getContext()
+{
+ return currentContext;
+}
+
+int getOpenCLPlatforms(PlatformsInfo& platforms)
+{
+ platforms.clear();
+
+ for (size_t id = 0; id < global_platforms.size(); ++id)
+ {
+ PlatformInfoImpl& impl = global_platforms[id];
+ platforms.push_back(&impl.info);
+ }
+
+ return platforms.size();
+}
+
+int getOpenCLDevices(std::vector<const DeviceInfo*> &devices, int deviceType, const PlatformInfo* platform)
+{
+ devices.clear();
+
+ switch(deviceType)
+ {
+ case CVCL_DEVICE_TYPE_DEFAULT:
+ case CVCL_DEVICE_TYPE_CPU:
+ case CVCL_DEVICE_TYPE_GPU:
+ case CVCL_DEVICE_TYPE_ACCELERATOR:
+ case CVCL_DEVICE_TYPE_ALL:
+ break;
+ default:
+ return 0;
+ }
+
+ if (platform == NULL)
+ {
+ for (size_t id = 0; id < global_devices.size(); ++id)
+ {
+ DeviceInfoImpl& deviceInfo = global_devices[id];
+ if (((int)deviceInfo.info.deviceType & deviceType) == deviceType)
+ {
+ devices.push_back(&deviceInfo.info);
+ }
+ }
+ }
+ else
+ {
+ for (size_t id = 0; id < platform->devices.size(); ++id)
+ {
+ const DeviceInfo* deviceInfo = platform->devices[id];
+ if (((int)deviceInfo->deviceType & deviceType) == deviceType)
+ {
+ devices.push_back(deviceInfo);
+ }
+ }
+ }
+
+ return (int)devices.size();
+}
+
+void setDevice(const DeviceInfo* info)
+{
+ ContextImpl::setContext(info);
+}
+
+bool supportsFeature(FEATURE_TYPE featureType)
+{
+ return Context::getContext()->supportsFeature(featureType);
+}
+
+struct __Module
+{
+ __Module() { initializeOpenCLDevices(); }
+ ~__Module() { ContextImpl::cleanupContext(); }
+};
+static __Module __module;
+
+
+}//namespace ocl
+}//namespace cv
+
+
+#if defined(WIN32) && defined(CVAPI_EXPORTS)
+
+extern "C"
+BOOL WINAPI DllMain(HINSTANCE /*hInst*/, DWORD fdwReason, LPVOID lpReserved)
+{
+ if (fdwReason == DLL_PROCESS_DETACH)
+ {
+ if (lpReserved != NULL) // called after ExitProcess() call
+ cv::ocl::__termination = true;
+ }
+ return TRUE;
+}
+
+#endif
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+// Guoping Long, longguoping@gmail.com
+// Niko Li, newlife20080214@gmail.com
+// Yao Wang, bitwangyaoyao@gmail.com
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other oclMaterials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+#include <iomanip>
+#include <fstream>
+#include "binarycaching.hpp"
+
+#undef __CL_ENABLE_EXCEPTIONS
+#include <CL/cl.hpp>
+
+//#define PRINT_KERNEL_RUN_TIME
+#define RUN_TIMES 100
+#ifndef CL_MEM_USE_PERSISTENT_MEM_AMD
+#define CL_MEM_USE_PERSISTENT_MEM_AMD 0
+#endif
+//#define AMD_DOUBLE_DIFFER
+
+namespace cv { namespace ocl {
+
+DevMemType gDeviceMemType = DEVICE_MEM_DEFAULT;
+DevMemRW gDeviceMemRW = DEVICE_MEM_R_W;
+int gDevMemTypeValueMap[5] = {0,
+ CL_MEM_ALLOC_HOST_PTR,
+ CL_MEM_USE_HOST_PTR,
+ CL_MEM_COPY_HOST_PTR,
+ CL_MEM_USE_PERSISTENT_MEM_AMD};
+int gDevMemRWValueMap[3] = {CL_MEM_READ_WRITE, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY};
+
+void finish()
+{
+ clFinish(getClCommandQueue(Context::getContext()));
+}
+
+bool isCpuDevice()
+{
+ const DeviceInfo& info = Context::getContext()->getDeviceInfo();
+ return (info.deviceType == CVCL_DEVICE_TYPE_CPU);
+}
+
+size_t queryWaveFrontSize(cl_kernel kernel)
+{
+ const DeviceInfo& info = Context::getContext()->getDeviceInfo();
+ if (info.deviceType == CVCL_DEVICE_TYPE_CPU)
+ return 1;
+ size_t wavefront = 0;
+ CV_Assert(kernel != NULL);
+ openCLSafeCall(clGetKernelWorkGroupInfo(kernel, getClDeviceID(Context::getContext()),
+ CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(size_t), &wavefront, NULL));
+ return wavefront;
+}
+
+
+void openCLReadBuffer(Context *ctx, cl_mem dst_buffer, void *host_buffer, size_t size)
+{
+ cl_int status;
+ status = clEnqueueReadBuffer(getClCommandQueue(ctx), dst_buffer, CL_TRUE, 0,
+ size, host_buffer, 0, NULL, NULL);
+ openCLVerifyCall(status);
+}
+
+cl_mem openCLCreateBuffer(Context *ctx, size_t flag , size_t size)
+{
+ cl_int status;
+ cl_mem buffer = clCreateBuffer(getClContext(ctx), (cl_mem_flags)flag, size, NULL, &status);
+ openCLVerifyCall(status);
+ return buffer;
+}
+
+void openCLMallocPitch(Context *ctx, void **dev_ptr, size_t *pitch,
+ size_t widthInBytes, size_t height)
+{
+ openCLMallocPitchEx(ctx, dev_ptr, pitch, widthInBytes, height, gDeviceMemRW, gDeviceMemType);
+}
+
+void openCLMallocPitchEx(Context *ctx, void **dev_ptr, size_t *pitch,
+ size_t widthInBytes, size_t height, DevMemRW rw_type, DevMemType mem_type)
+{
+ cl_int status;
+ *dev_ptr = clCreateBuffer(getClContext(ctx), gDevMemRWValueMap[rw_type]|gDevMemTypeValueMap[mem_type],
+ widthInBytes * height, 0, &status);
+ openCLVerifyCall(status);
+ *pitch = widthInBytes;
+}
+
+void openCLMemcpy2D(Context *ctx, void *dst, size_t dpitch,
+ const void *src, size_t spitch,
+ size_t width, size_t height, openCLMemcpyKind kind, int channels)
+{
+ size_t buffer_origin[3] = {0, 0, 0};
+ size_t host_origin[3] = {0, 0, 0};
+ size_t region[3] = {width, height, 1};
+ if(kind == clMemcpyHostToDevice)
+ {
+ if(dpitch == width || channels == 3 || height == 1)
+ {
+ openCLSafeCall(clEnqueueWriteBuffer(getClCommandQueue(ctx), (cl_mem)dst, CL_TRUE,
+ 0, width * height, src, 0, NULL, NULL));
+ }
+ else
+ {
+ openCLSafeCall(clEnqueueWriteBufferRect(getClCommandQueue(ctx), (cl_mem)dst, CL_TRUE,
+ buffer_origin, host_origin, region, dpitch, 0, spitch, 0, src, 0, 0, 0));
+ }
+ }
+ else if(kind == clMemcpyDeviceToHost)
+ {
+ if(spitch == width || channels == 3 || height == 1)
+ {
+ openCLSafeCall(clEnqueueReadBuffer(getClCommandQueue(ctx), (cl_mem)src, CL_TRUE,
+ 0, width * height, dst, 0, NULL, NULL));
+ }
+ else
+ {
+ openCLSafeCall(clEnqueueReadBufferRect(getClCommandQueue(ctx), (cl_mem)src, CL_TRUE,
+ buffer_origin, host_origin, region, spitch, 0, dpitch, 0, dst, 0, 0, 0));
+ }
+ }
+}
+
+void openCLCopyBuffer2D(Context *ctx, void *dst, size_t dpitch, int dst_offset,
+ const void *src, size_t spitch,
+ size_t width, size_t height, int src_offset)
+{
+ size_t src_origin[3] = {src_offset % spitch, src_offset / spitch, 0};
+ size_t dst_origin[3] = {dst_offset % dpitch, dst_offset / dpitch, 0};
+ size_t region[3] = {width, height, 1};
+
+ openCLSafeCall(clEnqueueCopyBufferRect(getClCommandQueue(ctx), (cl_mem)src, (cl_mem)dst, src_origin, dst_origin,
+ region, spitch, 0, dpitch, 0, 0, 0, 0));
+}
+
+void openCLFree(void *devPtr)
+{
+ openCLSafeCall(clReleaseMemObject((cl_mem)devPtr));
+}
+
+cl_kernel openCLGetKernelFromSource(const Context *ctx, const char **source, string kernelName)
+{
+ return openCLGetKernelFromSource(ctx, source, kernelName, NULL);
+}
+
+cl_kernel openCLGetKernelFromSource(const Context *ctx, const char **source, string kernelName,
+ const char *build_options)
+{
+ cl_kernel kernel;
+ cl_int status = 0;
+ CV_Assert(ProgramCache::getProgramCache() != NULL);
+ cl_program program = ProgramCache::getProgramCache()->getProgram(ctx, source, kernelName, build_options);
+ CV_Assert(program != NULL);
+ kernel = clCreateKernel(program, kernelName.c_str(), &status);
+ openCLVerifyCall(status);
+ return kernel;
+}
+
+void openCLVerifyKernel(const Context *ctx, cl_kernel kernel, size_t *localThreads)
+{
+ size_t kernelWorkGroupSize;
+ openCLSafeCall(clGetKernelWorkGroupInfo(kernel, getClDeviceID(ctx),
+ CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &kernelWorkGroupSize, 0));
+ CV_Assert( localThreads[0] <= ctx->getDeviceInfo().maxWorkItemSizes[0] );
+ CV_Assert( localThreads[1] <= ctx->getDeviceInfo().maxWorkItemSizes[1] );
+ CV_Assert( localThreads[2] <= ctx->getDeviceInfo().maxWorkItemSizes[2] );
+ CV_Assert( localThreads[0] * localThreads[1] * localThreads[2] <= kernelWorkGroupSize );
+ CV_Assert( localThreads[0] * localThreads[1] * localThreads[2] <= ctx->getDeviceInfo().maxWorkGroupSize );
+}
+
+#ifdef PRINT_KERNEL_RUN_TIME
+static double total_execute_time = 0;
+static double total_kernel_time = 0;
+#endif
+void openCLExecuteKernel_(Context *ctx , const char **source, string kernelName, size_t globalThreads[3],
+ size_t localThreads[3], vector< pair<size_t, const void *> > &args, int channels,
+ int depth, const char *build_options)
+{
+ //construct kernel name
+ //The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number
+ //for exmaple split_C2_D2, represent the split kernel with channels =2 and dataType Depth = 2(Data type is char)
+ stringstream idxStr;
+ if(channels != -1)
+ idxStr << "_C" << channels;
+ if(depth != -1)
+ idxStr << "_D" << depth;
+ kernelName += idxStr.str();
+
+ cl_kernel kernel;
+ kernel = openCLGetKernelFromSource(ctx, source, kernelName, build_options);
+
+ if ( localThreads != NULL)
+ {
+ globalThreads[0] = roundUp(globalThreads[0], localThreads[0]);
+ globalThreads[1] = roundUp(globalThreads[1], localThreads[1]);
+ globalThreads[2] = roundUp(globalThreads[2], localThreads[2]);
+
+ cv::ocl::openCLVerifyKernel(ctx, kernel, localThreads);
+ }
+ for(size_t i = 0; i < args.size(); i ++)
+ openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second));
+
+#ifndef PRINT_KERNEL_RUN_TIME
+ openCLSafeCall(clEnqueueNDRangeKernel(getClCommandQueue(ctx), kernel, 3, NULL, globalThreads,
+ localThreads, 0, NULL, NULL));
+#else
+ cl_event event = NULL;
+ openCLSafeCall(clEnqueueNDRangeKernel(getClCommandQueue(ctx), kernel, 3, NULL, globalThreads,
+ localThreads, 0, NULL, &event));
+
+ cl_ulong start_time, end_time, queue_time;
+ double execute_time = 0;
+ double total_time = 0;
+
+ openCLSafeCall(clWaitForEvents(1, &event));
+ openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START,
+ sizeof(cl_ulong), &start_time, 0));
+
+ openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END,
+ sizeof(cl_ulong), &end_time, 0));
+
+ openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_QUEUED,
+ sizeof(cl_ulong), &queue_time, 0));
+
+ execute_time = (double)(end_time - start_time) / (1000 * 1000);
+ total_time = (double)(end_time - queue_time) / (1000 * 1000);
+
+ total_execute_time += execute_time;
+ total_kernel_time += total_time;
+ clReleaseEvent(event);
+#endif
+
+ clFlush(getClCommandQueue(ctx));
+ openCLSafeCall(clReleaseKernel(kernel));
+}
+
+void openCLExecuteKernel(Context *ctx , const char **source, string kernelName,
+ size_t globalThreads[3], size_t localThreads[3],
+ vector< pair<size_t, const void *> > &args, int channels, int depth)
+{
+ openCLExecuteKernel(ctx, source, kernelName, globalThreads, localThreads, args,
+ channels, depth, NULL);
+}
+void openCLExecuteKernel(Context *ctx , const char **source, string kernelName,
+ size_t globalThreads[3], size_t localThreads[3],
+ vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options)
+
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ openCLExecuteKernel_(ctx, source, kernelName, globalThreads, localThreads, args, channels, depth,
+ build_options);
+#else
+ string data_type[] = { "uchar", "char", "ushort", "short", "int", "float", "double"};
+ cout << endl;
+ cout << "Function Name: " << kernelName;
+ if(depth >= 0)
+ cout << " |data type: " << data_type[depth];
+ cout << " |channels: " << channels;
+ cout << " |Time Unit: " << "ms" << endl;
+
+ total_execute_time = 0;
+ total_kernel_time = 0;
+ cout << "-------------------------------------" << endl;
+
+ cout << setiosflags(ios::left) << setw(15) << "excute time";
+ cout << setiosflags(ios::left) << setw(15) << "lauch time";
+ cout << setiosflags(ios::left) << setw(15) << "kernel time" << endl;
+ int i = 0;
+ for(i = 0; i < RUN_TIMES; i++)
+ openCLExecuteKernel_(ctx, source, kernelName, globalThreads, localThreads, args, channels, depth,
+ build_options);
+
+ cout << "average kernel excute time: " << total_execute_time / RUN_TIMES << endl; // "ms" << endl;
+ cout << "average kernel total time: " << total_kernel_time / RUN_TIMES << endl; // "ms" << endl;
+#endif
+}
+
+double openCLExecuteKernelInterop(Context *ctx , const char **source, string kernelName,
+ size_t globalThreads[3], size_t localThreads[3],
+ vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options,
+ bool finish, bool measureKernelTime, bool cleanUp)
+
+{
+ //construct kernel name
+ //The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number
+ //for exmaple split_C2_D2, represent the split kernel with channels =2 and dataType Depth = 2(Data type is char)
+ stringstream idxStr;
+ if(channels != -1)
+ idxStr << "_C" << channels;
+ if(depth != -1)
+ idxStr << "_D" << depth;
+ kernelName += idxStr.str();
+
+ cl_kernel kernel;
+ kernel = openCLGetKernelFromSource(ctx, source, kernelName, build_options);
+
+ double kernelTime = 0.0;
+
+ if( globalThreads != NULL)
+ {
+ if ( localThreads != NULL)
+ {
+ globalThreads[0] = divUp(globalThreads[0], localThreads[0]) * localThreads[0];
+ globalThreads[1] = divUp(globalThreads[1], localThreads[1]) * localThreads[1];
+ globalThreads[2] = divUp(globalThreads[2], localThreads[2]) * localThreads[2];
+
+ //size_t blockSize = localThreads[0] * localThreads[1] * localThreads[2];
+ cv::ocl::openCLVerifyKernel(ctx, kernel, localThreads);
+ }
+ for(size_t i = 0; i < args.size(); i ++)
+ openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second));
+
+ if(measureKernelTime == false)
+ {
+ openCLSafeCall(clEnqueueNDRangeKernel(getClCommandQueue(ctx), kernel, 3, NULL, globalThreads,
+ localThreads, 0, NULL, NULL));
+ }
+ else
+ {
+ cl_event event = NULL;
+ openCLSafeCall(clEnqueueNDRangeKernel(getClCommandQueue(ctx), kernel, 3, NULL, globalThreads,
+ localThreads, 0, NULL, &event));
+
+ cl_ulong end_time, queue_time;
+
+ openCLSafeCall(clWaitForEvents(1, &event));
+
+ openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END,
+ sizeof(cl_ulong), &end_time, 0));
+
+ openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_QUEUED,
+ sizeof(cl_ulong), &queue_time, 0));
+
+ kernelTime = (double)(end_time - queue_time) / (1000 * 1000);
+
+ clReleaseEvent(event);
+ }
+ }
+
+ if(finish)
+ {
+ clFinish(getClCommandQueue(ctx));
+ }
+
+ if(cleanUp)
+ {
+ openCLSafeCall(clReleaseKernel(kernel));
+ }
+
+ return kernelTime;
+}
+
+//double openCLExecuteKernelInterop(Context *ctx , const char **fileName, const int numFiles, string kernelName,
+// size_t globalThreads[3], size_t localThreads[3],
+// vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options,
+// bool finish, bool measureKernelTime, bool cleanUp)
+//
+//{
+// std::vector<std::string> fsource;
+// for (int i = 0 ; i < numFiles ; i++)
+// {
+// std::string str;
+// if (convertToString(fileName[i], str) >= 0)
+// fsource.push_back(str);
+// }
+// const char **source = new const char *[numFiles];
+// for (int i = 0 ; i < numFiles ; i++)
+// source[i] = fsource[i].c_str();
+// double kernelTime = openCLExecuteKernelInterop(ctx ,source, kernelName, globalThreads, localThreads,
+// args, channels, depth, build_options, finish, measureKernelTime, cleanUp);
+// fsource.clear();
+// delete []source;
+// return kernelTime;
+//}
+
+cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value,
+ const size_t size)
+{
+ int status;
+ cl_mem con_struct;
+
+ con_struct = clCreateBuffer(context, CL_MEM_READ_ONLY, size, NULL, &status);
+ openCLSafeCall(status);
+
+ openCLSafeCall(clEnqueueWriteBuffer(command_queue, con_struct, 1, 0, size,
+ value, 0, 0, 0));
+
+ return con_struct;
+
+}
+
+}//namespace ocl
+}//namespace cv
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+// Guoping Long, longguoping@gmail.com
+// Niko Li, newlife20080214@gmail.com
+// Yao Wang, bitwangyaoyao@gmail.com
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other oclMaterials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+#include <iomanip>
+#include <fstream>
+#include "binarycaching.hpp"
+
+#undef __CL_ENABLE_EXCEPTIONS
+#include <CL/cl.hpp>
+
+namespace cv { namespace ocl {
+/*
+ * The binary caching system to eliminate redundant program source compilation.
+ * Strictly, this is not a cache because we do not implement evictions right now.
+ * We shall add such features to trade-off memory consumption and performance when necessary.
+ */
+
+std::auto_ptr<ProgramCache> _programCache;
+ProgramCache* ProgramCache::getProgramCache()
+{
+ if (NULL == _programCache.get())
+ _programCache.reset(new ProgramCache());
+ return _programCache.get();
+}
+
+ProgramCache::ProgramCache()
+{
+ codeCache.clear();
+ cacheSize = 0;
+}
+
+ProgramCache::~ProgramCache()
+{
+ releaseProgram();
+}
+
+cl_program ProgramCache::progLookup(string srcsign)
+{
+ map<string, cl_program>::iterator iter;
+ iter = codeCache.find(srcsign);
+ if(iter != codeCache.end())
+ return iter->second;
+ else
+ return NULL;
+}
+
+void ProgramCache::addProgram(string srcsign , cl_program program)
+{
+ if(!progLookup(srcsign))
+ {
+ codeCache.insert(map<string, cl_program>::value_type(srcsign, program));
+ }
+}
+
+void ProgramCache::releaseProgram()
+{
+ map<string, cl_program>::iterator iter;
+ for(iter = codeCache.begin(); iter != codeCache.end(); iter++)
+ {
+ openCLSafeCall(clReleaseProgram(iter->second));
+ }
+ codeCache.clear();
+ cacheSize = 0;
+}
+
+static int enable_disk_cache =
+#ifdef _DEBUG
+ false;
+#else
+ true;
+#endif
+static int update_disk_cache = false;
+static String binpath = "";
+
+void setBinaryDiskCache(int mode, String path)
+{
+ if(mode == CACHE_NONE)
+ {
+ update_disk_cache = 0;
+ enable_disk_cache = 0;
+ return;
+ }
+ update_disk_cache |= (mode & CACHE_UPDATE) == CACHE_UPDATE;
+ enable_disk_cache |=
+#ifdef _DEBUG
+ (mode & CACHE_DEBUG) == CACHE_DEBUG;
+#else
+ (mode & CACHE_RELEASE) == CACHE_RELEASE;
+#endif
+ if(enable_disk_cache && !path.empty())
+ {
+ binpath = path;
+ }
+}
+
+void setBinpath(const char *path)
+{
+ binpath = path;
+}
+
+int savetofile(const Context*, cl_program &program, const char *fileName)
+{
+ size_t binarySize;
+ openCLSafeCall(clGetProgramInfo(program,
+ CL_PROGRAM_BINARY_SIZES,
+ sizeof(size_t),
+ &binarySize, NULL));
+ char* binary = (char*)malloc(binarySize);
+ if(binary == NULL)
+ {
+ CV_Error(CV_StsNoMem, "Failed to allocate host memory.");
+ }
+ openCLSafeCall(clGetProgramInfo(program,
+ CL_PROGRAM_BINARIES,
+ sizeof(char *),
+ &binary,
+ NULL));
+
+ FILE *fp = fopen(fileName, "wb+");
+ if(fp != NULL)
+ {
+ fwrite(binary, binarySize, 1, fp);
+ free(binary);
+ fclose(fp);
+ }
+ return 1;
+}
+
+cl_program ProgramCache::getProgram(const Context *ctx, const char **source, string kernelName,
+ const char *build_options)
+{
+ cl_program program;
+ cl_int status = 0;
+ stringstream src_sign;
+ string srcsign;
+ string filename;
+
+ if (NULL != build_options)
+ {
+ src_sign << (int64)(*source) << getClContext(ctx) << "_" << build_options;
+ }
+ else
+ {
+ src_sign << (int64)(*source) << getClContext(ctx);
+ }
+ srcsign = src_sign.str();
+
+ program = NULL;
+ program = ProgramCache::getProgramCache()->progLookup(srcsign);
+
+ if (!program)
+ {
+ //config build programs
+ std::string all_build_options;
+ if (!ctx->getDeviceInfo().compilationExtraOptions.empty())
+ all_build_options += ctx->getDeviceInfo().compilationExtraOptions;
+ if (build_options != NULL)
+ {
+ all_build_options += " ";
+ all_build_options += build_options;
+ }
+ filename = binpath + kernelName + "_" + ctx->getDeviceInfo().deviceName + all_build_options + ".clb";
+
+ FILE *fp = enable_disk_cache ? fopen(filename.c_str(), "rb") : NULL;
+ if(fp == NULL || update_disk_cache)
+ {
+ if(fp != NULL)
+ fclose(fp);
+
+ program = clCreateProgramWithSource(
+ getClContext(ctx), 1, source, NULL, &status);
+ openCLVerifyCall(status);
+ cl_device_id device = getClDeviceID(ctx);
+ status = clBuildProgram(program, 1, &device, all_build_options.c_str(), NULL, NULL);
+ if(status == CL_SUCCESS && enable_disk_cache)
+ savetofile(ctx, program, filename.c_str());
+ }
+ else
+ {
+ fseek(fp, 0, SEEK_END);
+ size_t binarySize = ftell(fp);
+ fseek(fp, 0, SEEK_SET);
+ char *binary = new char[binarySize];
+ CV_Assert(1 == fread(binary, binarySize, 1, fp));
+ fclose(fp);
+ cl_int status = 0;
+ cl_device_id device = getClDeviceID(ctx);
+ program = clCreateProgramWithBinary(getClContext(ctx),
+ 1,
+ &device,
+ (const size_t *)&binarySize,
+ (const unsigned char **)&binary,
+ NULL,
+ &status);
+ openCLVerifyCall(status);
+ status = clBuildProgram(program, 1, &device, all_build_options.c_str(), NULL, NULL);
+ delete[] binary;
+ }
+
+ if(status != CL_SUCCESS)
+ {
+ if(status == CL_BUILD_PROGRAM_FAILURE)
+ {
+ cl_int logStatus;
+ char *buildLog = NULL;
+ size_t buildLogSize = 0;
+ logStatus = clGetProgramBuildInfo(program,
+ getClDeviceID(ctx), CL_PROGRAM_BUILD_LOG, buildLogSize,
+ buildLog, &buildLogSize);
+ if(logStatus != CL_SUCCESS)
+ std::cout << "Failed to build the program and get the build info." << endl;
+ buildLog = new char[buildLogSize];
+ CV_DbgAssert(!!buildLog);
+ memset(buildLog, 0, buildLogSize);
+ openCLSafeCall(clGetProgramBuildInfo(program, getClDeviceID(ctx),
+ CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, NULL));
+ std::cout << "\n\t\t\tBUILD LOG\n";
+ std::cout << buildLog << endl;
+ delete [] buildLog;
+ }
+ openCLVerifyCall(status);
+ }
+ //Cache the binary for future use if build_options is null
+ if( (this->cacheSize += 1) < MAX_PROG_CACHE_SIZE)
+ this->addProgram(srcsign, program);
+ else
+ cout << "Warning: code cache has been full.\n";
+ }
+ return program;
+}
+
+//// Converts the contents of a file into a string
+//static int convertToString(const char *filename, std::string& s)
+//{
+// size_t size;
+// char* str;
+//
+// std::fstream f(filename, (std::fstream::in | std::fstream::binary));
+// if(f.is_open())
+// {
+// size_t fileSize;
+// f.seekg(0, std::fstream::end);
+// size = fileSize = (size_t)f.tellg();
+// f.seekg(0, std::fstream::beg);
+//
+// str = new char[size+1];
+// if(!str)
+// {
+// f.close();
+// return -1;
+// }
+//
+// f.read(str, fileSize);
+// f.close();
+// str[size] = '\0';
+//
+// s = str;
+// delete[] str;
+// return 0;
+// }
+// printf("Error: Failed to open file %s\n", filename);
+// return -1;
+//}
+
+} // namespace ocl
+} // namespace cv
case CL_INVALID_GLOBAL_WORK_SIZE:
return "CL_INVALID_GLOBAL_WORK_SIZE";
//case CL_INVALID_PROPERTY:
- // return "CL_INVALID_PROPERTY";
+ // return "CL_INVALID_PROPERTY";
//case CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR:
- // return "CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR";
+ // return "CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR";
//case CL_PLATFORM_NOT_FOUND_KHR:
- // return "CL_PLATFORM_NOT_FOUND_KHR";
- // //case CL_INVALID_PROPERTY_EXT:
- // // return "CL_INVALID_PROPERTY_EXT";
+ // return "CL_PLATFORM_NOT_FOUND_KHR";
+ // //case CL_INVALID_PROPERTY_EXT:
+ // // return "CL_INVALID_PROPERTY_EXT";
//case CL_DEVICE_PARTITION_FAILED_EXT:
- // return "CL_DEVICE_PARTITION_FAILED_EXT";
+ // return "CL_DEVICE_PARTITION_FAILED_EXT";
//case CL_INVALID_PARTITION_COUNT_EXT:
- // return "CL_INVALID_PARTITION_COUNT_EXT";
+ // return "CL_INVALID_PARTITION_COUNT_EXT";
//default:
- // return "unknown error code";
+ // return "unknown error code";
}
static char buf[256];
sprintf(buf, "%d", err);
{
fft_setup();
- bool is_1d_input = (_dft_size.height == 1);
- int is_row_dft = flags & DFT_ROWS;
+ bool is_1d_input = (_dft_size.height == 1);
+ int is_row_dft = flags & DFT_ROWS;
int is_scaled_dft = flags & DFT_SCALE;
- int is_inverse = flags & DFT_INVERSE;
+ int is_inverse = flags & DFT_INVERSE;
- //clAmdFftResultLocation place;
- clAmdFftLayout inLayout;
- clAmdFftLayout outLayout;
- clAmdFftDim dim = is_1d_input || is_row_dft ? CLFFT_1D : CLFFT_2D;
+ //clAmdFftResultLocation place;
+ clAmdFftLayout inLayout;
+ clAmdFftLayout outLayout;
+ clAmdFftDim dim = is_1d_input || is_row_dft ? CLFFT_1D : CLFFT_2D;
- size_t batchSize = is_row_dft ? dft_size.height : 1;
+ size_t batchSize = is_row_dft ? dft_size.height : 1;
size_t clLengthsIn[ 3 ] = {1, 1, 1};
size_t clStridesIn[ 3 ] = {1, 1, 1};
//size_t clLengthsOut[ 3 ] = {1, 1, 1};
size_t clStridesOut[ 3 ] = {1, 1, 1};
- clLengthsIn[0] = dft_size.width;
- clLengthsIn[1] = is_row_dft ? 1 : dft_size.height;
- clStridesIn[0] = 1;
- clStridesOut[0] = 1;
+ clLengthsIn[0] = dft_size.width;
+ clLengthsIn[1] = is_row_dft ? 1 : dft_size.height;
+ clStridesIn[0] = 1;
+ clStridesOut[0] = 1;
switch(_type)
{
clStridesIn[2] = is_row_dft ? clStridesIn[1] : dft_size.width * clStridesIn[1];
clStridesOut[2] = is_row_dft ? clStridesOut[1] : dft_size.width * clStridesOut[1];
- openCLSafeCall( clAmdFftCreateDefaultPlan( &plHandle, *(cl_context*)getoclContext(), dim, clLengthsIn ) );
+ openCLSafeCall( clAmdFftCreateDefaultPlan( &plHandle, *(cl_context*)getClContextPtr(), dim, clLengthsIn ) );
openCLSafeCall( clAmdFftSetResultLocation( plHandle, CLFFT_OUTOFPLACE ) );
openCLSafeCall( clAmdFftSetLayout( plHandle, inLayout, outLayout ) );
openCLSafeCall( clAmdFftSetPlanScale ( plHandle, is_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, scale_ ) );
//ready to bake
- openCLSafeCall( clAmdFftBakePlan( plHandle, 1, (cl_command_queue*)getoclCommandQueue(), NULL, NULL ) );
+ openCLSafeCall( clAmdFftBakePlan( plHandle, 1, (cl_command_queue*)getClCommandQueuePtr(), NULL, NULL ) );
}
cv::ocl::FftPlan::~FftPlan()
{
// similar assertions with cuda module
CV_Assert(src.type() == CV_32F || src.type() == CV_32FC2);
- //bool is_1d_input = (src.rows == 1);
- //int is_row_dft = flags & DFT_ROWS;
- //int is_scaled_dft = flags & DFT_SCALE;
- int is_inverse = flags & DFT_INVERSE;
- bool is_complex_input = src.channels() == 2;
- bool is_complex_output = !(flags & DFT_REAL_OUTPUT);
+ //bool is_1d_input = (src.rows == 1);
+ //int is_row_dft = flags & DFT_ROWS;
+ //int is_scaled_dft = flags & DFT_SCALE;
+ int is_inverse = flags & DFT_INVERSE;
+ bool is_complex_input = src.channels() == 2;
+ bool is_complex_output = !(flags & DFT_REAL_OUTPUT);
// We don't support real-to-real transform
if (buffersize)
{
cl_int medstatus;
- clMedBuffer = clCreateBuffer ( (cl_context)src.clCxt->oclContext(), CL_MEM_READ_WRITE, buffersize, 0, &medstatus);
+ clMedBuffer = clCreateBuffer ( *(cl_context*)(src.clCxt->getOpenCLContextPtr()), CL_MEM_READ_WRITE, buffersize, 0, &medstatus);
openCLSafeCall( medstatus );
}
- cl_command_queue clq = (cl_command_queue)src.clCxt->oclCommandQueue();
+ cl_command_queue clq = *(cl_command_queue*)(src.clCxt->getOpenCLCommandQueuePtr());
openCLSafeCall( clAmdFftEnqueueTransform( plHandle,
is_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD,
1,
void cv::ocl::Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize, double scale)
{
- if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F)
+ if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
return;
int offb = src2.offset;
int offc = dst.offset;
- cl_command_queue clq = (cl_command_queue)src1.clCxt->oclCommandQueue();
+ cl_command_queue clq = *(cl_command_queue*)src1.clCxt->getOpenCLCommandQueuePtr();
switch(src1.type())
{
case CV_32FC1:
CV_DbgAssert(points.type() == CV_32FC2);
points_v.resize(points.cols);
openCLSafeCall(clEnqueueReadBuffer(
- *reinterpret_cast<cl_command_queue*>(getoclCommandQueue()),
+ *(cl_command_queue*)getClCommandQueuePtr(),
reinterpret_cast<cl_mem>(points.data),
CL_TRUE,
0,
if( gimg.cols < minSize.width || gimg.rows < minSize.height )
CV_Error(CV_StsError, "Image too small");
- cl_command_queue qu = reinterpret_cast<cl_command_queue>(Context::getContext()->oclCommandQueue());
+ cl_command_queue qu = getClCommandQueue(Context::getContext());
if( (flags & CV_HAAR_SCALE_IMAGE) )
{
CvSize winSize0 = cascade->orig_window_size;
size_t blocksize = 8;
size_t localThreads[3] = { blocksize, blocksize , 1 };
- size_t globalThreads[3] = { grp_per_CU *(gsum.clCxt->computeUnits()) *localThreads[0],
+ size_t globalThreads[3] = { grp_per_CU *(gsum.clCxt->getDeviceInfo().maxComputeUnits) *localThreads[0],
localThreads[1], 1
};
int outputsz = 256 * globalThreads[0] / localThreads[0];
int grp_per_CU = 12;
size_t blocksize = 8;
size_t localThreads[3] = { blocksize, blocksize , 1 };
- size_t globalThreads[3] = { grp_per_CU *gsum.clCxt->computeUnits() *localThreads[0],
+ size_t globalThreads[3] = { grp_per_CU *gsum.clCxt->getDeviceInfo().maxComputeUnits *localThreads[0],
localThreads[1], 1 };
int outputsz = 256 * globalThreads[0] / localThreads[0];
int nodenum = (datasize - sizeof(GpuHidHaarClassifierCascade) -
int blocksize = 8;
int grp_per_CU = 12;
size_t localThreads[3] = { blocksize, blocksize, 1 };
- size_t globalThreads[3] = { grp_per_CU * cv::ocl::Context::getContext()->computeUnits() *localThreads[0],
+ size_t globalThreads[3] = { grp_per_CU * cv::ocl::Context::getContext()->getDeviceInfo().maxComputeUnits *localThreads[0],
localThreads[1],
1 };
int outputsz = 256 * globalThreads[0] / localThreads[0];
}
int *candidate;
- cl_command_queue qu = reinterpret_cast<cl_command_queue>(Context::getContext()->oclCommandQueue());
+ cl_command_queue qu = getClCommandQueue(Context::getContext());
if( (flags & CV_HAAR_SCALE_IMAGE) )
{
int indexy = 0;
GpuHidHaarStageClassifier *stage;
GpuHidHaarClassifier *classifier;
GpuHidHaarTreeNode *node;
- cl_command_queue qu = reinterpret_cast<cl_command_queue>(Context::getContext()->oclCommandQueue());
+ cl_command_queue qu = getClCommandQueue(Context::getContext());
if( (flags & CV_HAAR_SCALE_IMAGE) )
{
gcascade = (GpuHidHaarClassifierCascade *)(cascade->hid_cascade);
CvSize sz;
CvSize winSize0 = oldCascade->orig_window_size;
detect_piramid_info *scaleinfo;
- cl_command_queue qu = reinterpret_cast<cl_command_queue>(Context::getContext()->oclCommandQueue());
+ cl_command_queue qu = getClCommandQueue(Context::getContext());
if (flags & CV_HAAR_SCALE_IMAGE)
{
for(factor = 1.f;; factor *= scaleFactor)
effect_size = Size(0, 0);
- if (queryDeviceInfo<IS_CPU_DEVICE, bool>())
+ if (isCpuDevice())
hog_device_cpu = true;
else
hog_device_cpu = false;
else
{
cl_kernel kernel = openCLGetKernelFromSource(clCxt, &objdetect_hog, kernelName);
- int wave_size = queryDeviceInfo<WAVEFRONT_SIZE, int>(kernel);
+ size_t wave_size = queryWaveFrontSize(kernel);
char opt[32] = {0};
- sprintf(opt, "-D WAVE_SIZE=%d", wave_size);
+ sprintf(opt, "-D WAVE_SIZE=%d", (int)wave_size);
openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads,
localThreads, args, -1, -1, opt);
}
else
{
cl_kernel kernel = openCLGetKernelFromSource(clCxt, &objdetect_hog, kernelName);
- int wave_size = queryDeviceInfo<WAVEFRONT_SIZE, int>(kernel);
+ size_t wave_size = queryWaveFrontSize(kernel);
char opt[32] = {0};
- sprintf(opt, "-D WAVE_SIZE=%d", wave_size);
+ sprintf(opt, "-D WAVE_SIZE=%d", (int)wave_size);
openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads,
localThreads, args, -1, -1, opt);
}
else
{
cl_kernel kernel = openCLGetKernelFromSource(clCxt, &objdetect_hog, kernelName);
- int wave_size = queryDeviceInfo<WAVEFRONT_SIZE, int>(kernel);
+ size_t wave_size = queryWaveFrontSize(kernel);
char opt[32] = {0};
- sprintf(opt, "-D WAVE_SIZE=%d", wave_size);
+ sprintf(opt, "-D WAVE_SIZE=%d", (int)wave_size);
openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads,
localThreads, args, -1, -1, opt);
}
args.push_back( make_pair(sizeof(cl_int), (void *)&map1.rows));
args.push_back( make_pair(sizeof(cl_int), (void *)&cols));
- if(src.clCxt->supportsFeature(Context::CL_DOUBLE))
+ if(src.clCxt->supportsFeature(FEATURE_CL_DOUBLE))
{
args.push_back( make_pair(sizeof(cl_double4), (void *)&borderValue));
}
args.push_back( make_pair(sizeof(cl_int), (void *)&map1.cols));
args.push_back( make_pair(sizeof(cl_int), (void *)&map1.rows));
args.push_back( make_pair(sizeof(cl_int), (void *)&cols));
- if(src.clCxt->supportsFeature(Context::CL_DOUBLE))
+ if(src.clCxt->supportsFeature(FEATURE_CL_DOUBLE))
{
args.push_back( make_pair(sizeof(cl_double4), (void *)&borderValue));
}
args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows));
- if(src.clCxt->supportsFeature(Context::CL_DOUBLE))
+ if(src.clCxt->supportsFeature(FEATURE_CL_DOUBLE))
{
args.push_back( make_pair(sizeof(cl_double), (void *)&ifx_d));
args.push_back( make_pair(sizeof(cl_double), (void *)&ify_d));
string kernelName = "warpAffine" + s[interpolation];
- if(src.clCxt->supportsFeature(Context::CL_DOUBLE))
+ if(src.clCxt->supportsFeature(FEATURE_CL_DOUBLE))
{
cl_int st;
- coeffs_cm = clCreateBuffer( (cl_context)clCxt->oclContext(), CL_MEM_READ_WRITE, sizeof(F) * 2 * 3, NULL, &st );
+ coeffs_cm = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE, sizeof(F) * 2 * 3, NULL, &st );
openCLVerifyCall(st);
- openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)clCxt->oclCommandQueue(), (cl_mem)coeffs_cm, 1, 0, sizeof(F) * 2 * 3, coeffs, 0, 0, 0));
+ openCLSafeCall(clEnqueueWriteBuffer(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr(), (cl_mem)coeffs_cm, 1, 0, sizeof(F) * 2 * 3, coeffs, 0, 0, 0));
}
else
{
{
float_coeffs[m][n] = coeffs[m][n];
}
- coeffs_cm = clCreateBuffer( (cl_context)clCxt->oclContext(), CL_MEM_READ_WRITE, sizeof(float) * 2 * 3, NULL, &st );
- openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)clCxt->oclCommandQueue(), (cl_mem)coeffs_cm, 1, 0, sizeof(float) * 2 * 3, float_coeffs, 0, 0, 0));
+ coeffs_cm = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE, sizeof(float) * 2 * 3, NULL, &st );
+ openCLSafeCall(clEnqueueWriteBuffer(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr(), (cl_mem)coeffs_cm, 1, 0, sizeof(float) * 2 * 3, float_coeffs, 0, 0, 0));
}
//TODO: improve this kernel
string s[3] = {"NN", "Linear", "Cubic"};
string kernelName = "warpPerspective" + s[interpolation];
- if(src.clCxt->supportsFeature(Context::CL_DOUBLE))
+ if(src.clCxt->supportsFeature(FEATURE_CL_DOUBLE))
{
cl_int st;
- coeffs_cm = clCreateBuffer((cl_context) clCxt->oclContext(), CL_MEM_READ_WRITE, sizeof(double) * 3 * 3, NULL, &st );
+ coeffs_cm = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE, sizeof(double) * 3 * 3, NULL, &st );
openCLVerifyCall(st);
- openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)clCxt->oclCommandQueue(), (cl_mem)coeffs_cm, 1, 0, sizeof(double) * 3 * 3, coeffs, 0, 0, 0));
+ openCLSafeCall(clEnqueueWriteBuffer(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr(), (cl_mem)coeffs_cm, 1, 0, sizeof(double) * 3 * 3, coeffs, 0, 0, 0));
}
else
{
for(int n = 0; n < 3; n++)
float_coeffs[m][n] = coeffs[m][n];
- coeffs_cm = clCreateBuffer((cl_context) clCxt->oclContext(), CL_MEM_READ_WRITE, sizeof(float) * 3 * 3, NULL, &st );
+ coeffs_cm = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE, sizeof(float) * 3 * 3, NULL, &st );
openCLVerifyCall(st);
- openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)clCxt->oclCommandQueue(), (cl_mem)coeffs_cm, 1, 0, sizeof(float) * 3 * 3, float_coeffs, 0, 0, 0));
+ openCLSafeCall(clEnqueueWriteBuffer(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr(), (cl_mem)coeffs_cm, 1, 0, sizeof(float) * 3 * 3, float_coeffs, 0, 0, 0));
}
//TODO: improve this kernel
size_t blkSizeX = 16, blkSizeY = 16;
void integral(const oclMat &src, oclMat &sum, oclMat &sqsum)
{
CV_Assert(src.type() == CV_8UC1);
- if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
+ if(!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "select device don't support double");
}
void cornerHarris_dxdy(const oclMat &src, oclMat &dst, oclMat &dx, oclMat &dy, int blockSize, int ksize,
double k, int borderType)
{
- if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
+ if(!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "select device don't support double");
}
void cornerMinEigenVal_dxdy(const oclMat &src, oclMat &dst, oclMat &dx, oclMat &dy, int blockSize, int ksize, int borderType)
{
- if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
+ if(!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "select device don't support double");
}
String kernelName = "calcLut";
size_t localThreads[3] = { 32, 8, 1 };
size_t globalThreads[3] = { tilesX * localThreads[0], tilesY * localThreads[1], 1 };
- bool is_cpu = queryDeviceInfo<IS_CPU_DEVICE, bool>();
+ bool is_cpu = isCpuDevice();
if (is_cpu)
openCLExecuteKernel(Context::getContext(), &imgproc_clahe, kernelName, globalThreads, localThreads, args, -1, -1, (char*)" -D CPU");
else
{
cl_kernel kernel = openCLGetKernelFromSource(Context::getContext(), &imgproc_clahe, kernelName);
- int wave_size = queryDeviceInfo<WAVEFRONT_SIZE, int>(kernel);
+ size_t wave_size = queryWaveFrontSize(kernel);
openCLSafeCall(clReleaseKernel(kernel));
static char opt[20] = {0};
- sprintf(opt, " -D WAVE_SIZE=%d", wave_size);
+ sprintf(opt, " -D WAVE_SIZE=%d", (int)wave_size);
openCLExecuteKernel(Context::getContext(), &imgproc_clahe, kernelName, globalThreads, localThreads, args, -1, -1, opt);
}
}
+++ /dev/null
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-// By downloading, copying, installing or using the software you agree to this license.
-// If you do not agree to this license, do not download, install,
-// copy or use the software.
-//
-//
-// License Agreement
-// For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-// Guoping Long, longguoping@gmail.com
-// Niko Li, newlife20080214@gmail.com
-// Yao Wang, bitwangyaoyao@gmail.com
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-// * Redistribution's of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// * Redistribution's in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other oclMaterials provided with the distribution.
-//
-// * The name of the copyright holders may not be used to endorse or promote products
-// derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-#include <iomanip>
-#include <fstream>
-#include "binarycaching.hpp"
-
-using namespace cv;
-using namespace cv::ocl;
-using namespace std;
-using std::cout;
-using std::endl;
-
-//#define PRINT_KERNEL_RUN_TIME
-#define RUN_TIMES 100
-#ifndef CL_MEM_USE_PERSISTENT_MEM_AMD
-#define CL_MEM_USE_PERSISTENT_MEM_AMD 0
-#endif
-//#define AMD_DOUBLE_DIFFER
-
-namespace cv
-{
- namespace ocl
- {
- extern void fft_teardown();
- extern void clBlasTeardown();
- /*
- * The binary caching system to eliminate redundant program source compilation.
- * Strictly, this is not a cache because we do not implement evictions right now.
- * We shall add such features to trade-off memory consumption and performance when necessary.
- */
- auto_ptr<ProgramCache> ProgramCache::programCache;
- ProgramCache *programCache = NULL;
- DevMemType gDeviceMemType = DEVICE_MEM_DEFAULT;
- DevMemRW gDeviceMemRW = DEVICE_MEM_R_W;
- int gDevMemTypeValueMap[5] = {0,
- CL_MEM_ALLOC_HOST_PTR,
- CL_MEM_USE_HOST_PTR,
- CL_MEM_COPY_HOST_PTR,
- CL_MEM_USE_PERSISTENT_MEM_AMD};
- int gDevMemRWValueMap[3] = {CL_MEM_READ_WRITE, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY};
-
- ProgramCache::ProgramCache()
- {
- codeCache.clear();
- cacheSize = 0;
- }
-
- ProgramCache::~ProgramCache()
- {
- releaseProgram();
- }
-
- cl_program ProgramCache::progLookup(string srcsign)
- {
- map<string, cl_program>::iterator iter;
- iter = codeCache.find(srcsign);
- if(iter != codeCache.end())
- return iter->second;
- else
- return NULL;
- }
-
- void ProgramCache::addProgram(string srcsign , cl_program program)
- {
- if(!progLookup(srcsign))
- {
- codeCache.insert(map<string, cl_program>::value_type(srcsign, program));
- }
- }
-
- void ProgramCache::releaseProgram()
- {
- map<string, cl_program>::iterator iter;
- for(iter = codeCache.begin(); iter != codeCache.end(); iter++)
- {
- openCLSafeCall(clReleaseProgram(iter->second));
- }
- codeCache.clear();
- cacheSize = 0;
- }
- struct Info::Impl
- {
- cl_platform_id oclplatform;
- std::vector<cl_device_id> devices;
- std::vector<std::string> devName;
- std::string clVersion;
-
- cl_context oclcontext;
- cl_command_queue clCmdQueue;
- int devnum;
- size_t maxWorkGroupSize;
- cl_uint maxDimensions; // == maxWorkItemSizes.size()
- std::vector<size_t> maxWorkItemSizes;
- cl_uint maxComputeUnits;
- char extra_options[512];
- int double_support;
- int unified_memory; //1 means integrated GPU, otherwise this value is 0
- int refcounter;
-
- Impl();
-
- void setDevice(void *ctx, void *q, int devnum);
-
- void release()
- {
- if(1 == CV_XADD(&refcounter, -1))
- {
- releaseResources();
- delete this;
- }
- }
-
- Impl* copy()
- {
- CV_XADD(&refcounter, 1);
- return this;
- }
-
- private:
- Impl(const Impl&);
- Impl& operator=(const Impl&);
- void releaseResources();
- };
-
- // global variables to hold binary cache properties
- static int enable_disk_cache =
-#ifdef _DEBUG
- false;
-#else
- true;
-#endif
- static int update_disk_cache = false;
- static String binpath = "";
-
- Info::Impl::Impl()
- :oclplatform(0),
- oclcontext(0),
- clCmdQueue(0),
- devnum(-1),
- maxWorkGroupSize(0),
- maxDimensions(0),
- maxComputeUnits(0),
- double_support(0),
- unified_memory(0),
- refcounter(1)
- {
- memset(extra_options, 0, 512);
- }
-
- void Info::Impl::releaseResources()
- {
- devnum = -1;
-
- if(clCmdQueue)
- {
- //temporarily disable command queue release as it causes program hang at exit
- //openCLSafeCall(clReleaseCommandQueue(clCmdQueue));
- clCmdQueue = 0;
- }
-
- if(oclcontext)
- {
- openCLSafeCall(clReleaseContext(oclcontext));
- oclcontext = 0;
- }
- }
-
- void Info::Impl::setDevice(void *ctx, void *q, int dnum)
- {
- if((ctx && q) || devnum != dnum)
- releaseResources();
-
- CV_Assert(dnum >= 0 && dnum < (int)devices.size());
- devnum = dnum;
- if(ctx && q)
- {
- oclcontext = (cl_context)ctx;
- clCmdQueue = (cl_command_queue)q;
- clRetainContext(oclcontext);
- clRetainCommandQueue(clCmdQueue);
- }
- else
- {
- cl_int status = 0;
- cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)(oclplatform), 0 };
- oclcontext = clCreateContext(cps, 1, &devices[devnum], 0, 0, &status);
- openCLVerifyCall(status);
- clCmdQueue = clCreateCommandQueue(oclcontext, devices[devnum], CL_QUEUE_PROFILING_ENABLE, &status);
- openCLVerifyCall(status);
- }
-
- openCLSafeCall(clGetDeviceInfo(devices[devnum], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), (void *)&maxWorkGroupSize, 0));
- openCLSafeCall(clGetDeviceInfo(devices[devnum], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint), (void *)&maxDimensions, 0));
- maxWorkItemSizes.resize(maxDimensions);
- openCLSafeCall(clGetDeviceInfo(devices[devnum], CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t)*maxDimensions, (void *)&maxWorkItemSizes[0], 0));
- openCLSafeCall(clGetDeviceInfo(devices[devnum], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), (void *)&maxComputeUnits, 0));
-
- cl_bool unfymem = false;
- openCLSafeCall(clGetDeviceInfo(devices[devnum], CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof(cl_bool), (void *)&unfymem, 0));
- unified_memory = unfymem ? 1 : 0;
-
- //initialize extra options for compilation. Currently only fp64 is included.
- //Assume 4KB is enough to store all possible extensions.
- const int EXT_LEN = 4096 + 1 ;
- char extends_set[EXT_LEN];
- size_t extends_size;
- openCLSafeCall(clGetDeviceInfo(devices[devnum], CL_DEVICE_EXTENSIONS, EXT_LEN, (void *)extends_set, &extends_size));
- extends_set[EXT_LEN - 1] = 0;
- size_t fp64_khr = std::string(extends_set).find("cl_khr_fp64");
-
- if(fp64_khr != std::string::npos)
- {
- sprintf(extra_options, "-D DOUBLE_SUPPORT");
- double_support = 1;
- }
- else
- {
- memset(extra_options, 0, 512);
- double_support = 0;
- }
- }
-
- ////////////////////////Common OpenCL specific calls///////////////
- int getDevMemType(DevMemRW& rw_type, DevMemType& mem_type)
- {
- rw_type = gDeviceMemRW;
- mem_type = gDeviceMemType;
- return Context::getContext()->impl->unified_memory;
- }
-
- int setDevMemType(DevMemRW rw_type, DevMemType mem_type)
- {
- if( (mem_type == DEVICE_MEM_PM && Context::getContext()->impl->unified_memory == 0) ||
- mem_type == DEVICE_MEM_UHP ||
- mem_type == DEVICE_MEM_CHP )
- return -1;
- gDeviceMemRW = rw_type;
- gDeviceMemType = mem_type;
- return 0;
- }
-
- int getDevice(std::vector<Info> &oclinfo, int devicetype)
- {
- //TODO: cache oclinfo vector
- oclinfo.clear();
-
- switch(devicetype)
- {
- case CVCL_DEVICE_TYPE_DEFAULT:
- case CVCL_DEVICE_TYPE_CPU:
- case CVCL_DEVICE_TYPE_GPU:
- case CVCL_DEVICE_TYPE_ACCELERATOR:
- case CVCL_DEVICE_TYPE_ALL:
- break;
- default:
- return 0;
- }
-
- // Platform info
- cl_uint numPlatforms;
- openCLSafeCall(clGetPlatformIDs(0, 0, &numPlatforms));
- if(numPlatforms < 1) return 0;
-
- std::vector<cl_platform_id> platforms(numPlatforms);
- openCLSafeCall(clGetPlatformIDs(numPlatforms, &platforms[0], 0));
-
- char deviceName[256];
- int devcienums = 0;
- char clVersion[256];
- for (unsigned i = 0; i < numPlatforms; ++i)
- {
- cl_uint numsdev = 0;
- cl_int status = clGetDeviceIDs(platforms[i], devicetype, 0, NULL, &numsdev);
- if(status != CL_DEVICE_NOT_FOUND)
- openCLVerifyCall(status);
-
- if(numsdev > 0)
- {
- devcienums += numsdev;
- std::vector<cl_device_id> devices(numsdev);
- openCLSafeCall(clGetDeviceIDs(platforms[i], devicetype, numsdev, &devices[0], 0));
-
- Info ocltmpinfo;
- ocltmpinfo.impl->oclplatform = platforms[i];
- openCLSafeCall(clGetPlatformInfo(platforms[i], CL_PLATFORM_VERSION, sizeof(clVersion), clVersion, NULL));
- ocltmpinfo.impl->clVersion = clVersion;
- for(unsigned j = 0; j < numsdev; ++j)
- {
- ocltmpinfo.impl->devices.push_back(devices[j]);
- openCLSafeCall(clGetDeviceInfo(devices[j], CL_DEVICE_NAME, sizeof(deviceName), deviceName, 0));
- ocltmpinfo.impl->devName.push_back(deviceName);
- ocltmpinfo.DeviceName.push_back(deviceName);
- }
- oclinfo.push_back(ocltmpinfo);
- }
- }
- if(devcienums > 0)
- {
- setDevice(oclinfo[0]);
- }
- return devcienums;
- }
-
- void setDevice(Info &oclinfo, int devnum)
- {
- oclinfo.impl->setDevice(0, 0, devnum);
- Context::setContext(oclinfo);
- }
-
- void setDeviceEx(Info &oclinfo, void *ctx, void *q, int devnum)
- {
- oclinfo.impl->setDevice(ctx, q, devnum);
- Context::setContext(oclinfo);
- }
-
- void *getoclContext()
- {
- return &(Context::getContext()->impl->oclcontext);
- }
-
- void *getoclCommandQueue()
- {
- return &(Context::getContext()->impl->clCmdQueue);
- }
-
- void finish()
- {
- clFinish(Context::getContext()->impl->clCmdQueue);
- }
-
- //template specializations of queryDeviceInfo
- template<>
- bool queryDeviceInfo<IS_CPU_DEVICE, bool>(cl_kernel)
- {
- Info::Impl* impl = Context::getContext()->impl;
- cl_device_type devicetype;
- openCLSafeCall(clGetDeviceInfo(impl->devices[impl->devnum],
- CL_DEVICE_TYPE, sizeof(cl_device_type),
- &devicetype, NULL));
- return (devicetype == CVCL_DEVICE_TYPE_CPU);
- }
-
- template<typename _ty>
- static _ty queryWavesize(cl_kernel kernel)
- {
- size_t info = 0;
- Info::Impl* impl = Context::getContext()->impl;
- bool is_cpu = queryDeviceInfo<IS_CPU_DEVICE, bool>();
- if(is_cpu)
- {
- return 1;
- }
- CV_Assert(kernel != NULL);
- openCLSafeCall(clGetKernelWorkGroupInfo(kernel, impl->devices[impl->devnum],
- CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(size_t), &info, NULL));
- return static_cast<_ty>(info);
- }
-
- template<>
- size_t queryDeviceInfo<WAVEFRONT_SIZE, size_t>(cl_kernel kernel)
- {
- return queryWavesize<size_t>(kernel);
- }
- template<>
- int queryDeviceInfo<WAVEFRONT_SIZE, int>(cl_kernel kernel)
- {
- return queryWavesize<int>(kernel);
- }
-
- void openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void *host_buffer, size_t size)
- {
- cl_int status;
- status = clEnqueueReadBuffer(clCxt->impl->clCmdQueue, dst_buffer, CL_TRUE, 0,
- size, host_buffer, 0, NULL, NULL);
- openCLVerifyCall(status);
- }
-
- cl_mem openCLCreateBuffer(Context *clCxt, size_t flag , size_t size)
- {
- cl_int status;
- cl_mem buffer = clCreateBuffer(clCxt->impl->oclcontext, (cl_mem_flags)flag, size, NULL, &status);
- openCLVerifyCall(status);
- return buffer;
- }
-
- void openCLMallocPitch(Context *clCxt, void **dev_ptr, size_t *pitch,
- size_t widthInBytes, size_t height)
- {
- openCLMallocPitchEx(clCxt, dev_ptr, pitch, widthInBytes, height, gDeviceMemRW, gDeviceMemType);
- }
-
- void openCLMallocPitchEx(Context *clCxt, void **dev_ptr, size_t *pitch,
- size_t widthInBytes, size_t height, DevMemRW rw_type, DevMemType mem_type)
- {
- cl_int status;
- *dev_ptr = clCreateBuffer(clCxt->impl->oclcontext, gDevMemRWValueMap[rw_type]|gDevMemTypeValueMap[mem_type],
- widthInBytes * height, 0, &status);
- openCLVerifyCall(status);
- *pitch = widthInBytes;
- }
-
- void openCLMemcpy2D(Context *clCxt, void *dst, size_t dpitch,
- const void *src, size_t spitch,
- size_t width, size_t height, openCLMemcpyKind kind, int channels)
- {
- size_t buffer_origin[3] = {0, 0, 0};
- size_t host_origin[3] = {0, 0, 0};
- size_t region[3] = {width, height, 1};
- if(kind == clMemcpyHostToDevice)
- {
- if(dpitch == width || channels == 3 || height == 1)
- {
- openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)dst, CL_TRUE,
- 0, width * height, src, 0, NULL, NULL));
- }
- else
- {
- openCLSafeCall(clEnqueueWriteBufferRect(clCxt->impl->clCmdQueue, (cl_mem)dst, CL_TRUE,
- buffer_origin, host_origin, region, dpitch, 0, spitch, 0, src, 0, 0, 0));
- }
- }
- else if(kind == clMemcpyDeviceToHost)
- {
- if(spitch == width || channels == 3 || height == 1)
- {
- openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, (cl_mem)src, CL_TRUE,
- 0, width * height, dst, 0, NULL, NULL));
- }
- else
- {
- openCLSafeCall(clEnqueueReadBufferRect(clCxt->impl->clCmdQueue, (cl_mem)src, CL_TRUE,
- buffer_origin, host_origin, region, spitch, 0, dpitch, 0, dst, 0, 0, 0));
- }
- }
- }
-
- void openCLCopyBuffer2D(Context *clCxt, void *dst, size_t dpitch, int dst_offset,
- const void *src, size_t spitch,
- size_t width, size_t height, int src_offset)
- {
- size_t src_origin[3] = {src_offset % spitch, src_offset / spitch, 0};
- size_t dst_origin[3] = {dst_offset % dpitch, dst_offset / dpitch, 0};
- size_t region[3] = {width, height, 1};
-
- openCLSafeCall(clEnqueueCopyBufferRect(clCxt->impl->clCmdQueue, (cl_mem)src, (cl_mem)dst, src_origin, dst_origin,
- region, spitch, 0, dpitch, 0, 0, 0, 0));
- }
-
- void openCLFree(void *devPtr)
- {
- openCLSafeCall(clReleaseMemObject((cl_mem)devPtr));
- }
- cl_kernel openCLGetKernelFromSource(const Context *clCxt, const char **source, string kernelName)
- {
- return openCLGetKernelFromSource(clCxt, source, kernelName, NULL);
- }
-
- void setBinaryDiskCache(int mode, String path)
- {
- if(mode == CACHE_NONE)
- {
- update_disk_cache = 0;
- enable_disk_cache = 0;
- return;
- }
- update_disk_cache |= (mode & CACHE_UPDATE) == CACHE_UPDATE;
- enable_disk_cache |=
-#ifdef _DEBUG
- (mode & CACHE_DEBUG) == CACHE_DEBUG;
-#else
- (mode & CACHE_RELEASE) == CACHE_RELEASE;
-#endif
- if(enable_disk_cache && !path.empty())
- {
- binpath = path;
- }
- }
-
- void setBinpath(const char *path)
- {
- binpath = path;
- }
-
- int savetofile(const Context*, cl_program &program, const char *fileName)
- {
- size_t binarySize;
- openCLSafeCall(clGetProgramInfo(program,
- CL_PROGRAM_BINARY_SIZES,
- sizeof(size_t),
- &binarySize, NULL));
- char* binary = (char*)malloc(binarySize);
- if(binary == NULL)
- {
- CV_Error(CV_StsNoMem, "Failed to allocate host memory.");
- }
- openCLSafeCall(clGetProgramInfo(program,
- CL_PROGRAM_BINARIES,
- sizeof(char *),
- &binary,
- NULL));
-
- FILE *fp = fopen(fileName, "wb+");
- if(fp != NULL)
- {
- fwrite(binary, binarySize, 1, fp);
- free(binary);
- fclose(fp);
- }
- return 1;
- }
-
- cl_kernel openCLGetKernelFromSource(const Context *clCxt, const char **source, string kernelName,
- const char *build_options)
- {
- cl_kernel kernel;
- cl_program program ;
- cl_int status = 0;
- stringstream src_sign;
- string srcsign;
- string filename;
- CV_Assert(programCache != NULL);
-
- if(NULL != build_options)
- {
- src_sign << (int64)(*source) << clCxt->impl->oclcontext << "_" << build_options;
- }
- else
- {
- src_sign << (int64)(*source) << clCxt->impl->oclcontext;
- }
- srcsign = src_sign.str();
-
- program = NULL;
- program = programCache->progLookup(srcsign);
-
- if(!program)
- {
- //config build programs
- char all_build_options[1024];
- memset(all_build_options, 0, 1024);
- char zeromem[512] = {0};
- if(0 != memcmp(clCxt -> impl->extra_options, zeromem, 512))
- strcat(all_build_options, clCxt -> impl->extra_options);
- strcat(all_build_options, " ");
- if(build_options != NULL)
- strcat(all_build_options, build_options);
- if(all_build_options != NULL)
- {
- filename = binpath + kernelName + "_" + clCxt->impl->devName[clCxt->impl->devnum] + all_build_options + ".clb";
- }
- else
- {
- filename = binpath + kernelName + "_" + clCxt->impl->devName[clCxt->impl->devnum] + ".clb";
- }
-
- FILE *fp = enable_disk_cache ? fopen(filename.c_str(), "rb") : NULL;
- if(fp == NULL || update_disk_cache)
- {
- if(fp != NULL)
- fclose(fp);
-
- program = clCreateProgramWithSource(
- clCxt->impl->oclcontext, 1, source, NULL, &status);
- openCLVerifyCall(status);
- status = clBuildProgram(program, 1, &(clCxt->impl->devices[clCxt->impl->devnum]), all_build_options, NULL, NULL);
- if(status == CL_SUCCESS && enable_disk_cache)
- savetofile(clCxt, program, filename.c_str());
- }
- else
- {
- fseek(fp, 0, SEEK_END);
- size_t binarySize = ftell(fp);
- fseek(fp, 0, SEEK_SET);
- char *binary = new char[binarySize];
- CV_Assert(1 == fread(binary, binarySize, 1, fp));
- fclose(fp);
- cl_int status = 0;
- program = clCreateProgramWithBinary(clCxt->impl->oclcontext,
- 1,
- &(clCxt->impl->devices[clCxt->impl->devnum]),
- (const size_t *)&binarySize,
- (const unsigned char **)&binary,
- NULL,
- &status);
- openCLVerifyCall(status);
- status = clBuildProgram(program, 1, &(clCxt->impl->devices[clCxt->impl->devnum]), all_build_options, NULL, NULL);
- delete[] binary;
- }
-
- if(status != CL_SUCCESS)
- {
- if(status == CL_BUILD_PROGRAM_FAILURE)
- {
- cl_int logStatus;
- char *buildLog = NULL;
- size_t buildLogSize = 0;
- logStatus = clGetProgramBuildInfo(program,
- clCxt->impl->devices[clCxt->impl->devnum], CL_PROGRAM_BUILD_LOG, buildLogSize,
- buildLog, &buildLogSize);
- if(logStatus != CL_SUCCESS)
- cout << "Failed to build the program and get the build info." << endl;
- buildLog = new char[buildLogSize];
- CV_DbgAssert(!!buildLog);
- memset(buildLog, 0, buildLogSize);
- openCLSafeCall(clGetProgramBuildInfo(program, clCxt->impl->devices[clCxt->impl->devnum],
- CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, NULL));
- cout << "\n\t\t\tBUILD LOG\n";
- cout << buildLog << endl;
- delete [] buildLog;
- }
- openCLVerifyCall(status);
- }
- //Cache the binary for future use if build_options is null
- if( (programCache->cacheSize += 1) < programCache->MAX_PROG_CACHE_SIZE)
- programCache->addProgram(srcsign, program);
- else
- cout << "Warning: code cache has been full.\n";
- }
- kernel = clCreateKernel(program, kernelName.c_str(), &status);
- openCLVerifyCall(status);
- return kernel;
- }
-
- void openCLVerifyKernel(const Context *clCxt, cl_kernel kernel, size_t *localThreads)
- {
- size_t kernelWorkGroupSize;
- openCLSafeCall(clGetKernelWorkGroupInfo(kernel, clCxt->impl->devices[clCxt->impl->devnum],
- CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &kernelWorkGroupSize, 0));
- CV_Assert( localThreads[0] <= clCxt->impl->maxWorkItemSizes[0] );
- CV_Assert( localThreads[1] <= clCxt->impl->maxWorkItemSizes[1] );
- CV_Assert( localThreads[2] <= clCxt->impl->maxWorkItemSizes[2] );
- CV_Assert( localThreads[0] * localThreads[1] * localThreads[2] <= kernelWorkGroupSize );
- CV_Assert( localThreads[0] * localThreads[1] * localThreads[2] <= clCxt->impl->maxWorkGroupSize );
- }
-
- static inline size_t roundUp(size_t sz, size_t n)
- {
- // we don't assume that n is a power of 2 (see alignSize)
- // equal to divUp(sz, n) * n
- size_t t = sz + n - 1;
- size_t rem = t % n;
- size_t result = t - rem;
- return result;
- }
-
-#ifdef PRINT_KERNEL_RUN_TIME
- static double total_execute_time = 0;
- static double total_kernel_time = 0;
-#endif
- void openCLExecuteKernel_(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3],
- size_t localThreads[3], vector< pair<size_t, const void *> > &args, int channels,
- int depth, const char *build_options)
- {
- //construct kernel name
- //The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number
- //for exmaple split_C2_D2, represent the split kernel with channels =2 and dataType Depth = 2(Data type is char)
- stringstream idxStr;
- if(channels != -1)
- idxStr << "_C" << channels;
- if(depth != -1)
- idxStr << "_D" << depth;
- kernelName += idxStr.str();
-
- cl_kernel kernel;
- kernel = openCLGetKernelFromSource(clCxt, source, kernelName, build_options);
-
- if ( localThreads != NULL)
- {
- globalThreads[0] = roundUp(globalThreads[0], localThreads[0]);
- globalThreads[1] = roundUp(globalThreads[1], localThreads[1]);
- globalThreads[2] = roundUp(globalThreads[2], localThreads[2]);
-
- cv::ocl::openCLVerifyKernel(clCxt, kernel, localThreads);
- }
- for(size_t i = 0; i < args.size(); i ++)
- openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second));
-
-#ifndef PRINT_KERNEL_RUN_TIME
- openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL, globalThreads,
- localThreads, 0, NULL, NULL));
-#else
- cl_event event = NULL;
- openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL, globalThreads,
- localThreads, 0, NULL, &event));
-
- cl_ulong start_time, end_time, queue_time;
- double execute_time = 0;
- double total_time = 0;
-
- openCLSafeCall(clWaitForEvents(1, &event));
- openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START,
- sizeof(cl_ulong), &start_time, 0));
-
- openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END,
- sizeof(cl_ulong), &end_time, 0));
-
- openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_QUEUED,
- sizeof(cl_ulong), &queue_time, 0));
-
- execute_time = (double)(end_time - start_time) / (1000 * 1000);
- total_time = (double)(end_time - queue_time) / (1000 * 1000);
-
- total_execute_time += execute_time;
- total_kernel_time += total_time;
- clReleaseEvent(event);
-#endif
-
- clFlush(clCxt->impl->clCmdQueue);
- openCLSafeCall(clReleaseKernel(kernel));
- }
-
- void openCLExecuteKernel(Context *clCxt , const char **source, string kernelName,
- size_t globalThreads[3], size_t localThreads[3],
- vector< pair<size_t, const void *> > &args, int channels, int depth)
- {
- openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args,
- channels, depth, NULL);
- }
- void openCLExecuteKernel(Context *clCxt , const char **source, string kernelName,
- size_t globalThreads[3], size_t localThreads[3],
- vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options)
-
- {
-#ifndef PRINT_KERNEL_RUN_TIME
- openCLExecuteKernel_(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth,
- build_options);
-#else
- string data_type[] = { "uchar", "char", "ushort", "short", "int", "float", "double"};
- cout << endl;
- cout << "Function Name: " << kernelName;
- if(depth >= 0)
- cout << " |data type: " << data_type[depth];
- cout << " |channels: " << channels;
- cout << " |Time Unit: " << "ms" << endl;
-
- total_execute_time = 0;
- total_kernel_time = 0;
- cout << "-------------------------------------" << endl;
-
- cout << setiosflags(ios::left) << setw(15) << "excute time";
- cout << setiosflags(ios::left) << setw(15) << "lauch time";
- cout << setiosflags(ios::left) << setw(15) << "kernel time" << endl;
- int i = 0;
- for(i = 0; i < RUN_TIMES; i++)
- openCLExecuteKernel_(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth,
- build_options);
-
- cout << "average kernel excute time: " << total_execute_time / RUN_TIMES << endl; // "ms" << endl;
- cout << "average kernel total time: " << total_kernel_time / RUN_TIMES << endl; // "ms" << endl;
-#endif
- }
-
- double openCLExecuteKernelInterop(Context *clCxt , const char **source, string kernelName,
- size_t globalThreads[3], size_t localThreads[3],
- vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options,
- bool finish, bool measureKernelTime, bool cleanUp)
-
- {
- //construct kernel name
- //The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number
- //for exmaple split_C2_D2, represent the split kernel with channels =2 and dataType Depth = 2(Data type is char)
- stringstream idxStr;
- if(channels != -1)
- idxStr << "_C" << channels;
- if(depth != -1)
- idxStr << "_D" << depth;
- kernelName += idxStr.str();
-
- cl_kernel kernel;
- kernel = openCLGetKernelFromSource(clCxt, source, kernelName, build_options);
-
- double kernelTime = 0.0;
-
- if( globalThreads != NULL)
- {
- if ( localThreads != NULL)
- {
- globalThreads[0] = divUp(globalThreads[0], localThreads[0]) * localThreads[0];
- globalThreads[1] = divUp(globalThreads[1], localThreads[1]) * localThreads[1];
- globalThreads[2] = divUp(globalThreads[2], localThreads[2]) * localThreads[2];
-
- //size_t blockSize = localThreads[0] * localThreads[1] * localThreads[2];
- cv::ocl::openCLVerifyKernel(clCxt, kernel, localThreads);
- }
- for(size_t i = 0; i < args.size(); i ++)
- openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second));
-
- if(measureKernelTime == false)
- {
- openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL, globalThreads,
- localThreads, 0, NULL, NULL));
- }
- else
- {
- cl_event event = NULL;
- openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL, globalThreads,
- localThreads, 0, NULL, &event));
-
- cl_ulong end_time, queue_time;
-
- openCLSafeCall(clWaitForEvents(1, &event));
-
- openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END,
- sizeof(cl_ulong), &end_time, 0));
-
- openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_QUEUED,
- sizeof(cl_ulong), &queue_time, 0));
-
- kernelTime = (double)(end_time - queue_time) / (1000 * 1000);
-
- clReleaseEvent(event);
- }
- }
-
- if(finish)
- {
- clFinish(clCxt->impl->clCmdQueue);
- }
-
- if(cleanUp)
- {
- openCLSafeCall(clReleaseKernel(kernel));
- }
-
- return kernelTime;
- }
-
- // Converts the contents of a file into a string
- static int convertToString(const char *filename, std::string& s)
- {
- size_t size;
- char* str;
-
- std::fstream f(filename, (std::fstream::in | std::fstream::binary));
- if(f.is_open())
- {
- size_t fileSize;
- f.seekg(0, std::fstream::end);
- size = fileSize = (size_t)f.tellg();
- f.seekg(0, std::fstream::beg);
-
- str = new char[size+1];
- if(!str)
- {
- f.close();
- return -1;
- }
-
- f.read(str, fileSize);
- f.close();
- str[size] = '\0';
-
- s = str;
- delete[] str;
- return 0;
- }
- printf("Error: Failed to open file %s\n", filename);
- return -1;
- }
-
- double openCLExecuteKernelInterop(Context *clCxt , const char **fileName, const int numFiles, string kernelName,
- size_t globalThreads[3], size_t localThreads[3],
- vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options,
- bool finish, bool measureKernelTime, bool cleanUp)
-
- {
- std::vector<std::string> fsource;
- for (int i = 0 ; i < numFiles ; i++)
- {
- std::string str;
- if (convertToString(fileName[i], str) >= 0)
- fsource.push_back(str);
- }
- const char **source = new const char *[numFiles];
- for (int i = 0 ; i < numFiles ; i++)
- source[i] = fsource[i].c_str();
- double kernelTime = openCLExecuteKernelInterop(clCxt ,source, kernelName, globalThreads, localThreads,
- args, channels, depth, build_options, finish, measureKernelTime, cleanUp);
- fsource.clear();
- delete []source;
- return kernelTime;
- }
-
- cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value,
- const size_t size)
- {
- int status;
- cl_mem con_struct;
-
- con_struct = clCreateBuffer(context, CL_MEM_READ_ONLY, size, NULL, &status);
- openCLSafeCall(status);
-
- openCLSafeCall(clEnqueueWriteBuffer(command_queue, con_struct, 1, 0, size,
- value, 0, 0, 0));
-
- return con_struct;
-
- }
-
- /////////////////////////////OpenCL initialization/////////////////
- auto_ptr<Context> Context::clCxt;
- int Context::val = 0;
- static Mutex cs;
- static volatile int context_tear_down = 0;
-
- bool initialized()
- {
- return *((volatile int*)&Context::val) != 0 &&
- Context::clCxt->impl->clCmdQueue != NULL&&
- Context::clCxt->impl->oclcontext != NULL;
- }
-
- Context* Context::getContext()
- {
- if(*((volatile int*)&val) != 1)
- {
- AutoLock al(cs);
- if(*((volatile int*)&val) != 1)
- {
- if (context_tear_down)
- return clCxt.get();
- if( 0 == clCxt.get())
- clCxt.reset(new Context);
- std::vector<Info> oclinfo;
- CV_Assert(getDevice(oclinfo, CVCL_DEVICE_TYPE_ALL) > 0);
-
- *((volatile int*)&val) = 1;
- }
- }
- return clCxt.get();
- }
-
- void Context::setContext(Info &oclinfo)
- {
- AutoLock guard(cs);
- if(*((volatile int*)&val) != 1)
- {
- if( 0 == clCxt.get())
- clCxt.reset(new Context);
-
- clCxt.get()->impl = oclinfo.impl->copy();
-
- *((volatile int*)&val) = 1;
- }
- else
- {
- clCxt.get()->impl->release();
- clCxt.get()->impl = oclinfo.impl->copy();
- }
- }
-
- Context::Context()
- {
- impl = 0;
- programCache = ProgramCache::getProgramCache();
- }
-
- Context::~Context()
- {
- release();
- }
-
- void Context::release()
- {
- if (impl)
- impl->release();
- programCache->releaseProgram();
- }
-
- bool Context::supportsFeature(int ftype) const
- {
- switch(ftype)
- {
- case CL_DOUBLE:
- return impl->double_support == 1;
- case CL_UNIFIED_MEM:
- return impl->unified_memory == 1;
- case CL_VER_1_2:
- return impl->clVersion.find("OpenCL 1.2") != string::npos;
- default:
- return false;
- }
- }
-
- size_t Context::computeUnits() const
- {
- return impl->maxComputeUnits;
- }
-
- unsigned long queryLocalMemInfo()
- {
- Info::Impl* impl = Context::getContext()->impl;
- cl_ulong local_memory_size = 0;
- clGetDeviceInfo(impl->devices[impl->devnum], CL_DEVICE_LOCAL_MEM_SIZE, sizeof(cl_ulong), (void*)&local_memory_size, 0);
- return local_memory_size;
- }
-
- void* Context::oclContext()
- {
- return impl->oclcontext;
- }
-
- void* Context::oclCommandQueue()
- {
- return impl->clCmdQueue;
- }
-
- Info::Info()
- {
- impl = new Impl;
- }
-
- void Info::release()
- {
- fft_teardown();
- clBlasTeardown();
- impl->release();
- impl = new Impl;
- DeviceName.clear();
- }
-
- Info::~Info()
- {
- fft_teardown();
- clBlasTeardown();
- impl->release();
- }
-
- Info &Info::operator = (const Info &m)
- {
- impl->release();
- impl = m.impl->copy();
- DeviceName = m.DeviceName;
- return *this;
- }
-
- Info::Info(const Info &m)
- {
- impl = m.impl->copy();
- DeviceName = m.DeviceName;
- }
- }//namespace ocl
-
-}//namespace cv
//M*/
#include "precomp.hpp"
+#include "opencl_kernels.hpp"
+
using namespace cv;
using namespace cv::ocl;
-namespace cv
-{
- namespace ocl
- {
- extern const char* knearest;//knearest
- }
-}
-
KNearestNeighbour::KNearestNeighbour()
{
clear();
k1 = MIN( k1, k );
String kernel_name = "knn_find_nearest";
- cl_ulong local_memory_size = queryLocalMemInfo();
+ cl_ulong local_memory_size = (cl_ulong)Context::getContext()->getDeviceInfo().localMemorySize;
int nThreads = local_memory_size / (2 * k * 4);
if(nThreads >= 256)
nThreads = 256;
size_t global_thread[] = {1, samples.rows, 1};
char build_option[50];
- if(!Context::getContext()->supportsFeature(Context::CL_DOUBLE))
+ if(!Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
{
sprintf(build_option, " ");
}else
Size wholeSize;
Point ofs;
m.locateROI(wholeSize, ofs);
-
create(wholeSize, m.type());
if (m.channels() == 3)
int pitch = wholeSize.width * 3 * m.elemSize1();
int tail_padding = m.elemSize1() * 3072;
int err;
- cl_mem temp = clCreateBuffer((cl_context)clCxt->oclContext(), CL_MEM_READ_WRITE,
+ cl_mem temp = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE,
(pitch * wholeSize.height + tail_padding - 1) / tail_padding * tail_padding, 0, &err);
openCLVerifyCall(err);
openCLMemcpy2D(clCxt, temp, pitch, m.datastart, m.step, wholeSize.width * m.elemSize(), wholeSize.height, clMemcpyHostToDevice, 3);
convert_C3C4(temp, *this);
-
openCLSafeCall(clReleaseMemObject(temp));
}
else
int pitch = wholecols * 3 * m.elemSize1();
int tail_padding = m.elemSize1() * 3072;
int err;
- cl_mem temp = clCreateBuffer((cl_context)clCxt->oclContext(), CL_MEM_READ_WRITE,
+ cl_mem temp = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE,
(pitch * wholerows + tail_padding - 1) / tail_padding * tail_padding, 0, &err);
openCLVerifyCall(err);
convert_C4C3(*this, temp);
openCLMemcpy2D(clCxt, m.data, m.step, temp, pitch, wholecols * m.elemSize(), wholerows, clMemcpyDeviceToHost, 3);
-
openCLSafeCall(clReleaseMemObject(temp));
}
else
void cv::ocl::oclMat::convertTo( oclMat &dst, int rtype, double alpha, double beta ) const
{
- if (!clCxt->supportsFeature(Context::CL_DOUBLE) &&
+ if (!clCxt->supportsFeature(FEATURE_CL_DOUBLE) &&
(depth() == CV_64F || dst.depth() == CV_64F))
{
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
#ifdef CL_VERSION_1_2
// this enables backwards portability to
// run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support
- if (Context::getContext()->supportsFeature(Context::CL_VER_1_2) &&
+ if (Context::getContext()->supportsFeature(FEATURE_CL_VER_1_2) &&
dst.offset == 0 && dst.cols == dst.wholecols)
{
const int sizeofMap[][7] =
};
int sizeofGeneric = sizeofMap[dst.oclchannels() - 1][dst.depth()];
- clEnqueueFillBuffer((cl_command_queue)dst.clCxt->oclCommandQueue(),
+ clEnqueueFillBuffer(getClCommandQueue(dst.clCxt),
(cl_mem)dst.data, (void*)mat.data, sizeofGeneric,
0, dst.step * dst.rows, 0, NULL, NULL);
}
for(size_t i = 0; i < args.size(); i ++)
openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second));
- openCLSafeCall(clEnqueueNDRangeKernel((cl_command_queue)clCxt->oclCommandQueue(), kernel, 3, NULL, globalThreads,
+ openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr(), kernel, 3, NULL, globalThreads,
localThreads, 0, NULL, NULL));
switch(finish_mode)
{
case CLFINISH:
- clFinish((cl_command_queue)clCxt->oclCommandQueue());
+ clFinish(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr());
case CLFLUSH:
- clFlush((cl_command_queue)clCxt->oclCommandQueue());
+ clFlush(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr());
break;
case DISABLE:
default:
#ifdef CL_VERSION_1_2
//this enables backwards portability to
//run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support
- if(Context::getContext()->supportsFeature(Context::CL_VER_1_2))
+ if(Context::getContext()->supportsFeature(FEATURE_CL_VER_1_2))
{
cl_image_desc desc;
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
desc.buffer = NULL;
desc.num_mip_levels = 0;
desc.num_samples = 0;
- texture = clCreateImage((cl_context)mat.clCxt->oclContext(), CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
+ texture = clCreateImage(*(cl_context*)mat.clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
}
else
#endif
{
texture = clCreateImage2D(
- (cl_context)mat.clCxt->oclContext(),
+ *(cl_context*)mat.clCxt->getOpenCLContextPtr(),
CL_MEM_READ_WRITE,
&format,
mat.cols,
cl_mem devData;
if (mat.cols * mat.elemSize() != mat.step)
{
- devData = clCreateBuffer((cl_context)mat.clCxt->oclContext(), CL_MEM_READ_ONLY, mat.cols * mat.rows
+ devData = clCreateBuffer(*(cl_context*)mat.clCxt->getOpenCLContextPtr(), CL_MEM_READ_ONLY, mat.cols * mat.rows
* mat.elemSize(), NULL, NULL);
const size_t regin[3] = {mat.cols * mat.elemSize(), mat.rows, 1};
- clEnqueueCopyBufferRect((cl_command_queue)mat.clCxt->oclCommandQueue(), (cl_mem)mat.data, devData, origin, origin,
+ clEnqueueCopyBufferRect(*(cl_command_queue*)mat.clCxt->getOpenCLCommandQueuePtr(), (cl_mem)mat.data, devData, origin, origin,
regin, mat.step, 0, mat.cols * mat.elemSize(), 0, 0, NULL, NULL);
- clFlush((cl_command_queue)mat.clCxt->oclCommandQueue());
+ clFlush(*(cl_command_queue*)mat.clCxt->getOpenCLCommandQueuePtr());
}
else
{
devData = (cl_mem)mat.data;
}
- clEnqueueCopyBufferToImage((cl_command_queue)mat.clCxt->oclCommandQueue(), devData, texture, 0, origin, region, 0, NULL, 0);
+ clEnqueueCopyBufferToImage(*(cl_command_queue*)mat.clCxt->getOpenCLCommandQueuePtr(), devData, texture, 0, origin, region, 0, NULL, 0);
if ((mat.cols * mat.elemSize() != mat.step))
{
- clFlush((cl_command_queue)mat.clCxt->oclCommandQueue());
+ clFlush(*(cl_command_queue*)mat.clCxt->getOpenCLCommandQueuePtr());
clReleaseMemObject(devData);
}
try
{
cv::ocl::openCLGetKernelFromSource(clCxt, &_kernel_string, "test_func");
- finish();
+ cv::ocl::finish();
_support = true;
}
catch (const cv::Exception& e)
bool is_float = CV_SEQ_ELTYPE(contour) == CV_32FC2;
- if (!cv::ocl::Context::getContext()->supportsFeature(Context::CL_DOUBLE) && is_float)
+ if (!cv::ocl::Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE) && is_float)
{
CV_Error(CV_StsUnsupportedFormat, "Moments - double is not supported by your GPU!");
}
cv::Mat dst(dst_a);
a00 = a10 = a01 = a20 = a11 = a02 = a30 = a21 = a12 = a03 = 0.0;
- if (!cv::ocl::Context::getContext()->supportsFeature(Context::CL_DOUBLE))
+ if (!cv::ocl::Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
{
for (int i = 0; i < contour->total; ++i)
{
// Third party copyrights are property of their respective owners.
//
// @Authors
-// Dachuan Zhao, dachuan@multicorewareinc.com
-// Yao Wang, yao@multicorewareinc.com
+// Dachuan Zhao, dachuan@multicorewareinc.com
+// Yao Wang, yao@multicorewareinc.com
//
//
// Redistribution and use in source and binary forms, with or without modification,
args.push_back( make_pair( sizeof(cl_int), (void *)&iters ));
args.push_back( make_pair( sizeof(cl_char), (void *)&calcErr ));
- bool is_cpu = queryDeviceInfo<IS_CPU_DEVICE, bool>();
+ bool is_cpu = isCpuDevice();
if (is_cpu)
{
openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), (char*)" -D CPU");
stringstream idxStr;
idxStr << kernelName << "_C" << I.oclchannels() << "_D" << I.depth();
cl_kernel kernel = openCLGetKernelFromSource(clCxt, &pyrlk, idxStr.str());
- int wave_size = queryDeviceInfo<WAVEFRONT_SIZE, int>(kernel);
+ int wave_size = (int)queryWaveFrontSize(kernel);
openCLSafeCall(clReleaseKernel(kernel));
static char opt[32] = {0};
// Third party copyrights are property of their respective owners.
//
// @Authors
-// Zhang Chunpeng chunpeng@multicorewareinc.com
-// Yao Wang, yao@multicorewareinc.com
+// Zhang Chunpeng chunpeng@multicorewareinc.com
+// Yao Wang, yao@multicorewareinc.com
//
//
// Redistribution and use in source and binary forms, with or without modification,
{
static void merge_vector_run(const oclMat *mat_src, size_t n, oclMat &mat_dst)
{
- if(!mat_dst.clCxt->supportsFeature(Context::CL_DOUBLE) && mat_dst.type() == CV_64F)
+ if(!mat_dst.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && mat_dst.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
return;
static void split_vector_run(const oclMat &mat_src, oclMat *mat_dst)
{
- if(!mat_src.clCxt->supportsFeature(Context::CL_DOUBLE) && mat_src.type() == CV_64F)
+ if(!mat_src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && mat_src.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
return;
openCLSafeCall(clSetKernelArg(kernel, 11, sizeof(cl_int), (void *)&rthis.min_disp_th));
openCLSafeCall(clSetKernelArg(kernel, 12, sizeof(cl_int), (void *)&left.step));
openCLSafeCall(clSetKernelArg(kernel, 13, sizeof(cl_int), (void *)&rthis.ndisp));
- openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL,
+ openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getClCommandQueuePtr(), kernel, 2, NULL,
globalThreads, localThreads, 0, NULL, NULL));
- clFinish(*(cl_command_queue*)getoclCommandQueue());
+ clFinish(*(cl_command_queue*)getClCommandQueuePtr());
openCLSafeCall(clReleaseKernel(kernel));
}
openCLSafeCall(clSetKernelArg(kernel, 14, sizeof(cl_int), (void *)&rthis.min_disp_th));
openCLSafeCall(clSetKernelArg(kernel, 15, sizeof(cl_int), (void *)&cdisp_step1));
openCLSafeCall(clSetKernelArg(kernel, 16, sizeof(cl_int), (void *)&msg_step));
- openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 3, NULL,
+ openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getClCommandQueuePtr(), kernel, 3, NULL,
globalThreads, localThreads, 0, NULL, NULL));
- clFinish(*(cl_command_queue*)getoclCommandQueue());
+ clFinish(*(cl_command_queue*)getClCommandQueuePtr());
openCLSafeCall(clReleaseKernel(kernel));
}
openCLSafeCall(clSetKernelArg(kernel, 6, sizeof(cl_int), (void *)&msg_step));
openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_int), (void *)&disp_step));
openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_int), (void *)&rthis.ndisp));
- openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL,
+ openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getClCommandQueuePtr(), kernel, 2, NULL,
globalThreads, localThreads, 0, NULL, NULL));
- clFinish(*(cl_command_queue*)getoclCommandQueue());
+ clFinish(*(cl_command_queue*)getClCommandQueuePtr());
openCLSafeCall(clReleaseKernel(kernel));
}
static void get_first_initial_global_caller(uchar *data_cost_selected, uchar *disp_selected_pyr,
openCLSafeCall(clSetKernelArg(kernel, 6, sizeof(cl_int), (void *)&msg_step));
openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_int), (void *)&disp_step));
openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_int), (void *)&rthis.ndisp));
- openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL,
+ openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getClCommandQueuePtr(), kernel, 2, NULL,
globalThreads, localThreads, 0, NULL, NULL));
- clFinish(*(cl_command_queue*)getoclCommandQueue());
+ clFinish(*(cl_command_queue*)getClCommandQueuePtr());
openCLSafeCall(clReleaseKernel(kernel));
}
openCLSafeCall(clSetKernelArg(kernel, 14, sizeof(cl_float), (void *)&rthis.max_data_term));
openCLSafeCall(clSetKernelArg(kernel, 15, sizeof(cl_int), (void *)&left.step));
openCLSafeCall(clSetKernelArg(kernel, 16, sizeof(cl_int), (void *)&rthis.min_disp_th));
- openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL,
+ openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getClCommandQueuePtr(), kernel, 2, NULL,
globalThreads, localThreads, 0, NULL, NULL));
- clFinish(*(cl_command_queue*)getoclCommandQueue());
+ clFinish(*(cl_command_queue*)getClCommandQueuePtr());
openCLSafeCall(clReleaseKernel(kernel));
}
static void compute_data_cost_reduce_caller(uchar *disp_selected_pyr, uchar *data_cost,
openCLSafeCall(clSetKernelArg(kernel, 17, sizeof(cl_float), (void *)&rthis.max_data_term));
openCLSafeCall(clSetKernelArg(kernel, 18, sizeof(cl_int), (void *)&left.step));
openCLSafeCall(clSetKernelArg(kernel, 19, sizeof(cl_int), (void *)&rthis.min_disp_th));
- openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 3, NULL,
+ openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getClCommandQueuePtr(), kernel, 3, NULL,
globalThreads, localThreads, 0, NULL, NULL));
- clFinish(*(cl_command_queue*)getoclCommandQueue());
+ clFinish(*(cl_command_queue*)getClCommandQueuePtr());
openCLSafeCall(clReleaseKernel(kernel));
}
static void compute_data_cost(uchar *disp_selected_pyr, uchar *data_cost, StereoConstantSpaceBP &rthis,
openCLSafeCall(clSetKernelArg(kernel, 20, sizeof(cl_int), (void *)&disp_step2));
openCLSafeCall(clSetKernelArg(kernel, 21, sizeof(cl_int), (void *)&msg_step1));
openCLSafeCall(clSetKernelArg(kernel, 22, sizeof(cl_int), (void *)&msg_step2));
- openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL,
+ openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getClCommandQueuePtr(), kernel, 2, NULL,
globalThreads, localThreads, 0, NULL, NULL));
- clFinish(*(cl_command_queue*)getoclCommandQueue());
+ clFinish(*(cl_command_queue*)getClCommandQueuePtr());
openCLSafeCall(clReleaseKernel(kernel));
}
////////////////////////////////////////////////////////////////////////////////////////////////
openCLSafeCall(clSetKernelArg(kernel, 12, sizeof(cl_int), (void *)&disp_step));
openCLSafeCall(clSetKernelArg(kernel, 13, sizeof(cl_int), (void *)&msg_step));
openCLSafeCall(clSetKernelArg(kernel, 14, sizeof(cl_float), (void *)&rthis.disc_single_jump));
- openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL,
+ openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getClCommandQueuePtr(), kernel, 2, NULL,
globalThreads, localThreads, 0, NULL, NULL));
- clFinish(*(cl_command_queue*)getoclCommandQueue());
+ clFinish(*(cl_command_queue*)getClCommandQueuePtr());
openCLSafeCall(clReleaseKernel(kernel));
}
static void calc_all_iterations(uchar *u, uchar *d, uchar *l, uchar *r, uchar *data_cost_selected,
openCLSafeCall(clSetKernelArg(kernel, 10, sizeof(cl_int), (void *)&nr_plane));
openCLSafeCall(clSetKernelArg(kernel, 11, sizeof(cl_int), (void *)&msg_step));
openCLSafeCall(clSetKernelArg(kernel, 12, sizeof(cl_int), (void *)&disp_step));
- openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL,
+ openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getClCommandQueuePtr(), kernel, 2, NULL,
globalThreads, localThreads, 0, NULL, NULL));
- clFinish(*(cl_command_queue*)getoclCommandQueue());
+ clFinish(*(cl_command_queue*)getClCommandQueuePtr());
openCLSafeCall(clReleaseKernel(kernel));
}
}
con_struct -> cmax_disc_term = max_disc_term;
con_struct -> cdisc_single_jump = disc_single_jump;
- cl_con_struct = load_constant(*((cl_context*)getoclContext()), *((cl_command_queue*)getoclCommandQueue()), (void *)con_struct,
+ Context* clCtx = Context::getContext();
+ cl_context clContext = *(cl_context*)(clCtx->getOpenCLContextPtr());
+ cl_command_queue clCmdQueue = *(cl_command_queue*)(clCtx->getOpenCLCommandQueuePtr());
+ cl_con_struct = load_constant(clContext, clCmdQueue, (void *)con_struct,
sizeof(con_struct_t));
delete con_struct;
// Third party copyrights are property of their respective owners.
//
// @Authors
-// Jin Ma, jin@multicorewareinc.com
+// Jin Ma, jin@multicorewareinc.com
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
const char *keys =
"{ h | help | false | print help message }"
"{ t | type | gpu | set device type:cpu or gpu}"
- "{ p | platform | 0 | set platform id }"
+ "{ p | platform | -1 | set platform id }"
"{ d | device | 0 | set device id }";
CommandLineParser cmd(argc, argv, keys);
if (cmd.get<bool>("help"))
{
- cout << "Avaible options besides goole test option:" << endl;
+ cout << "Available options besides google test option:" << endl;
cmd.printParams();
return 0;
}
string type = cmd.get<string>("type");
- unsigned int pid = cmd.get<unsigned int>("platform");
+ int pid = cmd.get<int>("platform");
int device = cmd.get<int>("device");
print_info();
{
flag = CVCL_DEVICE_TYPE_CPU;
}
- std::vector<cv::ocl::Info> oclinfo;
- int devnums = getDevice(oclinfo, flag);
- if(devnums <= device || device < 0)
+
+ cv::ocl::PlatformsInfo platformsInfo;
+ cv::ocl::getOpenCLPlatforms(platformsInfo);
+ if (pid >= (int)platformsInfo.size())
{
- std::cout << "device invalid\n";
- return -1;
+ std::cout << "platform is invalid\n";
+ return 1;
}
- if(pid >= oclinfo.size())
+
+ cv::ocl::DevicesInfo devicesInfo;
+ int devnums = cv::ocl::getOpenCLDevices(devicesInfo, flag, (pid < 0) ? NULL : platformsInfo[pid]);
+ if (device < 0 || device >= devnums)
{
- std::cout << "platform invalid\n";
- return -1;
+ std::cout << "device/platform invalid\n";
+ return 1;
}
- setDevice(oclinfo[pid], device);
-
+ cv::ocl::setDevice(devicesInfo[device]);
setBinaryDiskCache(CACHE_UPDATE);
- cout << "Device type:" << type << endl << "Device name:" << oclinfo[pid].DeviceName[device] << endl;
+ cout << "Device type: " << type << endl
+ << "Platform name: " << devicesInfo[device]->platform->platformName << endl
+ << "Device name: " << devicesInfo[device]->deviceName << endl;
return RUN_ALL_TESTS();
}
Combine(Values(szSmall64, szSmall128),
Values(MatType(CV_8UC1), MatType(CV_8UC3))))
{
- std::vector<cv::ocl::Info>info;
- cv::ocl::getDevice(info);
-
declare.time(5 * 60);
const Size size = std::tr1::get<0>(GetParam());
cl_mem c_btvRegWeights;
size_t count = btvWeights_size * sizeof(float);
c_btvRegWeights = openCLCreateBuffer(clCxt, CL_MEM_READ_ONLY, count);
- int cl_safe_check = clEnqueueWriteBuffer((cl_command_queue)clCxt->oclCommandQueue(), c_btvRegWeights, 1, 0, count, btvWeights_, 0, NULL, NULL);
+ int cl_safe_check = clEnqueueWriteBuffer(getClCommandQueue(clCxt), c_btvRegWeights, 1, 0, count, btvWeights_, 0, NULL, NULL);
CV_Assert(cl_safe_check == CL_SUCCESS);
args.push_back(make_pair(sizeof(cl_mem), (void*)&src_.data));
#if defined(HAVE_OPENCV_OCL) && defined(HAVE_OPENCL)
TEST_F(SuperResolution, BTVL1_OCL)
{
- std::vector<cv::ocl::Info> infos;
- cv::ocl::getDevice(infos);
RunTest(cv::superres::createSuperResolution_BTVL1_OCL());
}
#endif