void integral(const oclMat &src, oclMat &sum, oclMat &sqsum)
{
CV_Assert(src.type() == CV_8UC1);
- if(!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
+ if(!src.clCxt->supportsFeature(ocl::FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "select device don't support double");
+ return;
}
+
int vlen = 4;
int offset = src.offset / vlen;
int pre_invalid = src.offset % vlen;
oclMat t_sum , t_sqsum;
int w = src.cols + 1, h = src.rows + 1;
- int depth;
- if( src.cols * src.rows <= 2901 * 2901 ) //2901 is the maximum size for int when all values are 255
- {
- t_sum.create(src.cols, src.rows, CV_32SC1);
- sum.create(h, w, CV_32SC1);
- }
- else
- {
- //Use float to prevent overflow
- t_sum.create(src.cols, src.rows, CV_32FC1);
- sum.create(h, w, CV_32FC1);
- }
- t_sqsum.create(src.cols, src.rows, CV_32FC1);
- sqsum.create(h, w, CV_32FC1);
- depth = sum.depth();
- int sum_offset = sum.offset / vlen;
- int sqsum_offset = sqsum.offset / vlen;
-
- vector<pair<size_t , const void *> > args;
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data ));
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data ));
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sqsum.data ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&offset ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&pre_invalid ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step));
- size_t gt[3] = {((vcols + 1) / 2) * 256, 1, 1}, lt[3] = {256, 1, 1};
- openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_cols", gt, lt, args, -1, depth);
- args.clear();
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data ));
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sqsum.data ));
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&sum.data ));
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&sqsum.data ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.rows ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.cols ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&sum.step));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&sqsum.step));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&sum_offset));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&sqsum_offset));
- size_t gt2[3] = {t_sum.cols * 32, 1, 1}, lt2[3] = {256, 1, 1};
- openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_rows", gt2, lt2, args, -1, depth);
+ int depth = src.depth() == CV_8U ? CV_32S : CV_64F;
+ int type = CV_MAKE_TYPE(depth, 1);
+
+ t_sum.create(src.cols, src.rows, type);
+ sum.create(h, w, type);
+
+ t_sqsum.create(src.cols, src.rows, CV_32FC1);
+ sqsum.create(h, w, CV_32FC1);
+
+ int sum_offset = sum.offset / vlen;
+ int sqsum_offset = sqsum.offset / vlen;
+
+ vector<pair<size_t , const void *> > args;
+ args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data ));
+ args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data ));
+ args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sqsum.data ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&offset ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&pre_invalid ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step));
+ size_t gt[3] = {((vcols + 1) / 2) * 256, 1, 1}, lt[3] = {256, 1, 1};
+ openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_cols", gt, lt, args, -1, depth);
+
+ args.clear();
+ args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data ));
+ args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sqsum.data ));
+ args.push_back( make_pair( sizeof(cl_mem) , (void *)&sum.data ));
+ args.push_back( make_pair( sizeof(cl_mem) , (void *)&sqsum.data ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.rows ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.cols ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&sum.step));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&sqsum.step));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&sum_offset));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&sqsum_offset));
+ size_t gt2[3] = {t_sum.cols * 32, 1, 1}, lt2[3] = {256, 1, 1};
+ openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_rows", gt2, lt2, args, -1, depth);
}
void integral(const oclMat &src, oclMat &sum)
oclMat t_sum;
int w = src.cols + 1, h = src.rows + 1;
- int depth;
- if(src.cols * src.rows <= 2901 * 2901)
- {
- t_sum.create(src.cols, src.rows, CV_32SC1);
- sum.create(h, w, CV_32SC1);
- }else
- {
- t_sum.create(src.cols, src.rows, CV_32FC1);
- sum.create(h, w, CV_32FC1);
- }
- depth = sum.depth();
- int sum_offset = sum.offset / vlen;
- vector<pair<size_t , const void *> > args;
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data ));
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&offset ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&pre_invalid ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step));
- size_t gt[3] = {((vcols + 1) / 2) * 256, 1, 1}, lt[3] = {256, 1, 1};
- openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_cols", gt, lt, args, -1, depth);
- args.clear();
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data ));
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&sum.data ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.rows ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.cols ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&sum.step));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&sum_offset));
- size_t gt2[3] = {t_sum.cols * 32, 1, 1}, lt2[3] = {256, 1, 1};
- openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_rows", gt2, lt2, args, -1, depth);
+ int depth = src.depth() == CV_8U ? CV_32S : CV_32F;
+ int type = CV_MAKE_TYPE(depth, 1);
+
+ t_sum.create(src.cols, src.rows, type);
+ sum.create(h, w, type);
+
+ int sum_offset = sum.offset / vlen;
+ vector<pair<size_t , const void *> > args;
+ args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data ));
+ args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&offset ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&pre_invalid ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step));
+ size_t gt[3] = {((vcols + 1) / 2) * 256, 1, 1}, lt[3] = {256, 1, 1};
+ openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_cols", gt, lt, args, -1, depth);
+
+ args.clear();
+ args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data ));
+ args.push_back( make_pair( sizeof(cl_mem) , (void *)&sum.data ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.rows ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.cols ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&sum.step));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&sum_offset));
+ size_t gt2[3] = {t_sum.cols * 32, 1, 1}, lt2[3] = {256, 1, 1};
+ openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_rows", gt2, lt2, args, -1, depth);
}
/////////////////////// corner //////////////////////////////