bool runTask(bool sync, const Queue& q=Queue());
size_t workGroupSize() const;
+ size_t preferedWorkGroupSizeMultiple() const;
bool compileWorkGroupSize(size_t wsz[]) const;
size_t localMemSize() const;
sizeof(val), &val, &retsz) >= 0 ? val : 0;
}
+size_t Kernel::preferedWorkGroupSizeMultiple() const
+{
+ if(!p)
+ return 0;
+ size_t val = 0, retsz = 0;
+ cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
+ return clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
+ sizeof(val), &val, &retsz) >= 0 ? val : 0;
+}
+
bool Kernel::compileWorkGroupSize(size_t wsz[]) const
{
if(!p || !wsz)
const int tilesX, const int tilesY, const cv::Size tileSize,
const int clipLimit, const float lutScale)
{
+ cv::ocl::Kernel _k("calcLut", cv::ocl::imgproc::clahe_oclsrc);
+
bool is_cpu = cv::ocl::Device::getDefault().type() == cv::ocl::Device::TYPE_CPU;
cv::String opts;
if(is_cpu)
opts = "-D CPU ";
else
- opts = cv::format("-D WAVE_SIZE=%d", cv::ocl::Device::getDefault().maxWorkGroupSize());
+ opts = cv::format("-D WAVE_SIZE=%d", _k.preferedWorkGroupSizeMultiple());
cv::ocl::Kernel k("calcLut", cv::ocl::imgproc::clahe_oclsrc, opts);
if(k.empty())