void cv::ocl::columnSum(const oclMat &src, oclMat &dst)
{
CV_Assert(src.type() == CV_32FC1);
-
dst.create(src.size(), src.type());
- Context *clCxt = src.clCxt;
-
- const std::string kernelName = "columnSum";
+ int src_step = src.step / src.elemSize(), src_offset = src.offset / src.elemSize();
+ int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize();
std::vector< pair<size_t, const void *> > args;
-
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data));
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&src.step));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&src_step));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&src_offset));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&dst_offset));
size_t globalThreads[3] = {dst.cols, 1, 1};
size_t localThreads[3] = {256, 1, 1};
- openCLExecuteKernel(clCxt, &imgproc_columnsum, kernelName, globalThreads, localThreads, args, src.channels(), src.depth());
+ openCLExecuteKernel(src.clCxt, &imgproc_columnsum, "columnSum", globalThreads, localThreads, args, src.oclchannels(), src.depth());
}
//
//M*/
-#pragma OPENCL EXTENSION cl_amd_printf : enable
-#if defined (__ATI__)
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-
-#elif defined (__NVIDIA__)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-
////////////////////////////////////////////////////////////////////
///////////////////////// columnSum ////////////////////////////////
////////////////////////////////////////////////////////////////////
-/// CV_32FC1
-__kernel void columnSum_C1_D5(__global float* src,__global float* dst,int srcCols,int srcRows,int srcStep,int dstStep)
+
+__kernel void columnSum_C1_D5(__global float * src, __global float * dst,
+ int cols, int rows, int src_step, int dst_step, int src_offset, int dst_offset)
{
const int x = get_global_id(0);
- srcStep >>= 2;
- dstStep >>= 2;
-
- if (x < srcCols)
+ if (x < cols)
{
- int srcIdx = x ;
- int dstIdx = x ;
+ int srcIdx = x + src_offset;
+ int dstIdx = x + dst_offset;
float sum = 0;
- for (int y = 0; y < srcRows; ++y)
+ for (int y = 0; y < rows; ++y)
{
sum += src[srcIdx];
dst[dstIdx] = sum;
- srcIdx += srcStep;
- dstIdx += dstStep;
+ srcIdx += src_step;
+ dstIdx += dst_step;
}
}
}