filterDY = createDerivFilter_GPU(CV_8U, CV_32S, 0, 1, apperture_size, BORDER_REPLICATE);
}
}
- ensureSizeIsEnough(image_size.height + 2, image_size.width + 2, CV_32FC1, edgeBuf);
+ ensureSizeIsEnough(2 * (image_size.height + 2), image_size.width + 2, CV_32FC1, edgeBuf);
ensureSizeIsEnough(1, image_size.width * image_size.height, CV_16UC2, trackBuf1);
ensureSizeIsEnough(1, image_size.width * image_size.height, CV_16UC2, trackBuf2);
void CannyCaller(CannyBuf &buf, oclMat &dst, float low_thresh, float high_thresh)
{
using namespace ::cv::ocl::canny;
- calcMap_gpu(buf.dx, buf.dy, buf.edgeBuf, buf.edgeBuf, dst.rows, dst.cols, low_thresh, high_thresh);
+ oclMat magBuf = buf.edgeBuf(Rect(0, 0, buf.edgeBuf.cols, buf.edgeBuf.rows / 2));
+ oclMat mapBuf = buf.edgeBuf(Rect(0, buf.edgeBuf.rows / 2, buf.edgeBuf.cols, buf.edgeBuf.rows / 2));
- edgesHysteresisLocal_gpu(buf.edgeBuf, buf.trackBuf1, buf.counter, dst.rows, dst.cols);
+ calcMap_gpu(buf.dx, buf.dy, magBuf, mapBuf, dst.rows, dst.cols, low_thresh, high_thresh);
- edgesHysteresisGlobal_gpu(buf.edgeBuf, buf.trackBuf1, buf.trackBuf2, buf.counter, dst.rows, dst.cols);
+ edgesHysteresisLocal_gpu(mapBuf, buf.trackBuf1, buf.counter, dst.rows, dst.cols);
- getEdges_gpu(buf.edgeBuf, dst, dst.rows, dst.cols);
+ edgesHysteresisGlobal_gpu(mapBuf, buf.trackBuf1, buf.trackBuf2, buf.counter, dst.rows, dst.cols);
+
+ getEdges_gpu(mapBuf, dst, dst.rows, dst.cols);
}
}
buf.create(src.size(), apperture_size);
buf.edgeBuf.setTo(Scalar::all(0));
+ oclMat magBuf = buf.edgeBuf(Rect(0, 0, buf.edgeBuf.cols, buf.edgeBuf.rows / 2));
+
if (apperture_size == 3)
{
calcSobelRowPass_gpu(src, buf.dx_buf, buf.dy_buf, src.rows, src.cols);
- calcMagnitude_gpu(buf.dx_buf, buf.dy_buf, buf.dx, buf.dy, buf.edgeBuf, src.rows, src.cols, L2gradient);
+ calcMagnitude_gpu(buf.dx_buf, buf.dy_buf, buf.dx, buf.dy, magBuf, src.rows, src.cols, L2gradient);
}
else
{
buf.filterDX->apply(src, buf.dx);
buf.filterDY->apply(src, buf.dy);
- calcMagnitude_gpu(buf.dx, buf.dy, buf.edgeBuf, src.rows, src.cols, L2gradient);
+ calcMagnitude_gpu(buf.dx, buf.dy, magBuf, src.rows, src.cols, L2gradient);
}
CannyCaller(buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));
}
buf.dy = dy;
buf.create(dx.size(), -1);
buf.edgeBuf.setTo(Scalar::all(0));
- calcMagnitude_gpu(buf.dx, buf.dy, buf.edgeBuf, dx.rows, dx.cols, L2gradient);
+
+ oclMat magBuf = buf.edgeBuf(Rect(0, 0, buf.edgeBuf.cols, buf.edgeBuf.rows / 2));
+
+ calcMagnitude_gpu(buf.dx, buf.dy, magBuf, dx.rows, dx.cols, L2gradient);
CannyCaller(buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));
}
size_t globalThreads[3] = {cols, rows, 1};
size_t localThreads[3] = {16, 16, 1};
- openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
+ openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
}
void canny::calcMagnitude_gpu(const oclMat &dx_buf, const oclMat &dy_buf, oclMat &dx, oclMat &dy, oclMat &mag, int rows, int cols, bool L2Grad)
size_t globalThreads[3] = {cols, rows, 1};
size_t localThreads[3] = {16, 16, 1};
- char build_options [15] = "";
- if(L2Grad)
- {
- strcat(build_options, "-D L2GRAD");
- }
- openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
+ const char * build_options = L2Grad ? "-D L2GRAD":"";
+ openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
}
void canny::calcMagnitude_gpu(const oclMat &dx, const oclMat &dy, oclMat &mag, int rows, int cols, bool L2Grad)
{
size_t globalThreads[3] = {cols, rows, 1};
size_t localThreads[3] = {16, 16, 1};
- char build_options [15] = "";
- if(L2Grad)
- {
- strcat(build_options, "-D L2GRAD");
- }
- openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
+ const char * build_options = L2Grad ? "-D L2GRAD":"";
+ openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
}
void canny::calcMap_gpu(oclMat &dx, oclMat &dy, oclMat &mag, oclMat &map, int rows, int cols, float low_thresh, float high_thresh)
string kernelName = "calcMap";
size_t localThreads[3] = {16, 16, 1};
- openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
+ openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
}
void canny::edgesHysteresisLocal_gpu(oclMat &map, oclMat &st1, void *counter, int rows, int cols)
size_t globalThreads[3] = {cols, rows, 1};
size_t localThreads[3] = {16, 16, 1};
- openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
+ openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
}
void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, void *counter, int rows, int cols)
args.push_back( make_pair( sizeof(cl_int), (void *)&map.step));
args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));
- openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, DISABLE);
+ openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)getoclCommandQueue(), (cl_mem)counter, 1, 0, sizeof(int), &count, 0, NULL, NULL));
std::swap(st1, st2);
}
size_t globalThreads[3] = {cols, rows, 1};
size_t localThreads[3] = {16, 16, 1};
- openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
+ openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
}
map_step /= sizeof(*map);
map_offset /= sizeof(*map);
+ mag += mag_offset;
+ map += map_offset;
+
__local float smem[18][18];
int gidx = get_global_id(0);
(
__global int * map,
__global ushort2 * st,
- volatile __global unsigned int * counter,
+ __global unsigned int * counter,
int rows,
int cols,
int map_step,
map_step /= sizeof(*map);
map_offset /= sizeof(*map);
+ map += map_offset;
+
__local int smem[18][18];
int gidx = get_global_id(0);
if(ly < 14)
{
smem[ly][lx] =
- map[grp_idx + lx + min(grp_idy + ly, rows - 1) * map_step + map_offset];
+ map[grp_idx + lx + min(grp_idy + ly, rows - 1) * map_step];
}
if(ly < 4 && grp_idy + ly + 14 <= rows && grp_idx + lx <= cols)
{
smem[ly + 14][lx] =
- map[grp_idx + lx + min(grp_idy + ly + 14, rows - 1) * map_step + map_offset];
+ map[grp_idx + lx + min(grp_idy + ly + 14, rows - 1) * map_step];
}
barrier(CLK_LOCAL_MEM_FENCE);
__constant int c_dx[8] = {-1, 0, 1, -1, 1, -1, 0, 1};
__constant int c_dy[8] = {-1, -1, -1, 0, 0, 1, 1, 1};
+
#define stack_size 512
__kernel
-void edgesHysteresisGlobal
+void
+__attribute__((reqd_work_group_size(128,1,1)))
+edgesHysteresisGlobal
(
__global int * map,
__global ushort2 * st1,
__global ushort2 * st2,
- volatile __global int * counter,
+ __global int * counter,
int rows,
int cols,
int count,
map_step /= sizeof(*map);
map_offset /= sizeof(*map);
+ map += map_offset;
+
int gidx = get_global_id(0);
int gidy = get_global_id(1);
int grp_idx = get_group_id(0);
int grp_idy = get_group_id(1);
- volatile __local unsigned int s_counter;
+ __local unsigned int s_counter;
__local unsigned int s_ind;
__local ushort2 s_st[stack_size];
pos.x += c_dx[lidx & 7];
pos.y += c_dy[lidx & 7];
- if (map[pos.x + map_offset + pos.y * map_step] == 1)
+ if (map[pos.x + pos.y * map_step] == 1)
{
- map[pos.x + map_offset + pos.y * map_step] = 2;
+ map[pos.x + pos.y * map_step] = 2;
ind = atomic_inc(&s_counter);
if(gidy < rows && gidx < cols)
{
- dst[gidx + gidy * dst_step] = (uchar)(-(map[gidx + 1 + (gidy + 1) * map_step] >> 1));
+ dst[gidx + gidy * dst_step] = (uchar)(-(map[gidx + 1 + (gidy + 1) * map_step + map_offset] >> 1));
}
}
#include "precomp.hpp"
#ifdef HAVE_OPENCL
-#define SHOW_RESULT 0
////////////////////////////////////////////////////////
// Canny
bool useL2gradient;
cv::Mat edges_gold;
- //std::vector<cv::ocl::Info> oclinfo;
virtual void SetUp()
{
apperture_size = GET_PARAM(0);
useL2gradient = GET_PARAM(1);
- //int devnums = getDevice(oclinfo);
- //CV_Assert(devnums > 0);
}
};
double low_thresh = 50.0;
double high_thresh = 100.0;
- cv::resize(img, img, cv::Size(512, 384));
cv::ocl::oclMat ocl_img = cv::ocl::oclMat(img);
cv::ocl::oclMat edges;
cv::ocl::Canny(ocl_img, edges, low_thresh, high_thresh, apperture_size, useL2gradient);
- char filename [100];
- sprintf(filename, "G:/Valve_edges_a%d_L2Grad%d.jpg", apperture_size, (int)useL2gradient);
-
cv::Mat edges_gold;
cv::Canny(img, edges_gold, low_thresh, high_thresh, apperture_size, useL2gradient);
-#if SHOW_RESULT
- cv::Mat edges_x2, ocl_edges(edges);
- edges_x2.create(edges.rows, edges.cols * 2, edges.type());
- edges_x2.setTo(0);
- cv::add(edges_gold, cv::Mat(edges_x2, cv::Rect(0, 0, edges_gold.cols, edges_gold.rows)), cv::Mat(edges_x2, cv::Rect(0, 0, edges_gold.cols, edges_gold.rows)));
- cv::add(ocl_edges, cv::Mat(edges_x2, cv::Rect(edges_gold.cols, 0, edges_gold.cols, edges_gold.rows)), cv::Mat(edges_x2, cv::Rect(edges_gold.cols, 0, edges_gold.cols, edges_gold.rows)));
- cv::namedWindow("Canny result (left: cpu, right: ocl)");
- cv::imshow("Canny result (left: cpu, right: ocl)", edges_x2);
- cv::waitKey();
-#endif //OUTPUT_RESULT
EXPECT_MAT_SIMILAR(edges_gold, edges, 1e-2);
}
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Canny, testing::Combine(
+INSTANTIATE_TEST_CASE_P(OCL_ImgProc, Canny, testing::Combine(
testing::Values(AppertureSize(3), AppertureSize(5)),
testing::Values(L2gradient(false), L2gradient(true))));
#endif
\ No newline at end of file