1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
14 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
18 // Peng Xiao, pengxiao@outlook.com
20 // Redistribution and use in source and binary forms, with or without modification,
21 // are permitted provided that the following conditions are met:
23 // * Redistribution's of source code must retain the above copyright notice,
24 // this list of conditions and the following disclaimer.
26 // * Redistribution's in binary form must reproduce the above copyright notice,
27 // this list of conditions and the following disclaimer in the documentation
28 // and/or other oclMaterials provided with the distribution.
30 // * The name of the copyright holders may not be used to endorse or promote products
31 // derived from this software without specific prior written permission.
33 // This software is provided by the copyright holders and contributors as is and
34 // any express or implied warranties, including, but not limited to, the implied
35 // warranties of merchantability and fitness for a particular purpose are disclaimed.
36 // In no event shall the Intel Corporation or contributors be liable for any direct,
37 // indirect, incidental, special, exemplary, or consequential damages
38 // (including, but not limited to, procurement of substitute goods or services;
39 // loss of use, data, or profits; or business interruption) however caused
40 // and on any theory of liability, whether in contract, strict liability,
41 // or tort (including negligence or otherwise) arising in any way out of
42 // the use of this software, even if advised of the possibility of such damage.
46 #include "precomp.hpp"
49 using namespace cv::ocl;
51 static bool use_cpu_sorter = true;
57 ///////////////////////////OpenCL kernel strings///////////////////////////
58 extern const char *imgproc_gftt;
71 const int GROUP_SIZE = 256;
73 template<SortMethod method>
79 //TODO(pengx): optimize GPU sorter's performance thus CPU sorter is removed.
81 struct Sorter<CPU_STL>
83 typedef oclMat EigType;
88 static int clfloat2Gt(cl_float2 pt1, cl_float2 pt2)
90 float v1 = mat_eig.at<float>(cvRound(pt1.s[1]), cvRound(pt1.s[0]));
91 float v2 = mat_eig.at<float>(cvRound(pt2.s[1]), cvRound(pt2.s[0]));
94 static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count)
96 cv::AutoLock lock(cs);
97 //temporarily use STL's sort function
98 Mat mat_corners = corners;
100 std::sort(mat_corners.begin<cl_float2>(), mat_corners.begin<cl_float2>() + count, clfloat2Gt);
101 corners = mat_corners;
104 cv::Mutex Sorter<CPU_STL>::cs;
105 cv::Mat Sorter<CPU_STL>::mat_eig;
108 struct Sorter<BITONIC>
110 typedef TextureCL EigType;
112 static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count)
114 Context * cxt = Context::getContext();
115 size_t globalThreads[3] = {count / 2, 1, 1};
116 size_t localThreads[3] = {GROUP_SIZE, 1, 1};
118 // 2^numStages should be equal to count or the output is invalid
120 for(int i = count; i > 1; i >>= 1)
125 std::vector< std::pair<size_t, const void *> > args(argc);
126 std::string kernelname = "sortCorners_bitonicSort";
127 args[0] = std::make_pair(sizeof(cl_mem), (void *)&eig_tex);
128 args[1] = std::make_pair(sizeof(cl_mem), (void *)&corners.data);
129 args[2] = std::make_pair(sizeof(cl_int), (void *)&count);
130 for(int stage = 0; stage < numStages; ++stage)
132 args[3] = std::make_pair(sizeof(cl_int), (void *)&stage);
133 for(int passOfStage = 0; passOfStage < stage + 1; ++passOfStage)
135 args[4] = std::make_pair(sizeof(cl_int), (void *)&passOfStage);
136 openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1);
143 struct Sorter<SELECTION>
145 typedef TextureCL EigType;
147 static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count)
149 Context * cxt = Context::getContext();
151 size_t globalThreads[3] = {count, 1, 1};
152 size_t localThreads[3] = {GROUP_SIZE, 1, 1};
154 std::vector< std::pair<size_t, const void *> > args;
156 std::string kernelname = "sortCorners_selectionSortLocal";
157 int lds_size = GROUP_SIZE * sizeof(cl_float2);
158 args.push_back( std::make_pair( sizeof(cl_mem), (void*)&eig_tex) );
159 args.push_back( std::make_pair( sizeof(cl_mem), (void*)&corners.data) );
160 args.push_back( std::make_pair( sizeof(cl_int), (void*)&count) );
161 args.push_back( std::make_pair( lds_size, (void*)NULL) );
163 openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1);
166 kernelname = "sortCorners_selectionSortFinal";
168 openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1);
172 int findCorners_caller(
173 const TextureCL& eig,
174 const float threshold,
180 Context * cxt = Context::getContext();
182 std::vector< std::pair<size_t, const void*> > args;
183 std::string kernelname = "findCorners";
185 const int mask_strip = mask.step / mask.elemSize1();
187 oclMat g_counter(1, 1, CV_32SC1);
190 args.push_back(make_pair( sizeof(cl_mem), (void*)&eig ));
191 args.push_back(make_pair( sizeof(cl_mem), (void*)&mask.data ));
192 args.push_back(make_pair( sizeof(cl_mem), (void*)&corners.data ));
193 args.push_back(make_pair( sizeof(cl_int), (void*)&mask_strip));
194 args.push_back(make_pair( sizeof(cl_float), (void*)&threshold ));
195 args.push_back(make_pair( sizeof(cl_int), (void*)&eig.rows ));
196 args.push_back(make_pair( sizeof(cl_int), (void*)&eig.cols ));
197 args.push_back(make_pair( sizeof(cl_int), (void*)&max_count ));
198 args.push_back(make_pair( sizeof(cl_mem), (void*)&g_counter.data ));
200 size_t globalThreads[3] = {eig.cols, eig.rows, 1};
201 size_t localThreads[3] = {16, 16, 1};
203 const char * opt = mask.empty() ? "" : "-D WITH_MASK";
204 openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1, opt);
205 return std::min(Mat(g_counter).at<int>(0), max_count);
209 void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image, oclMat& corners, const oclMat& mask)
211 CV_Assert(qualityLevel > 0 && minDistance >= 0 && maxCorners >= 0);
212 CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size()));
214 CV_DbgAssert(support_image2d());
216 ensureSizeIsEnough(image.size(), CV_32F, eig_);
218 if (useHarrisDetector)
219 cornerMinEigenVal_dxdy(image, eig_, Dx_, Dy_, blockSize, 3, harrisK);
221 cornerMinEigenVal_dxdy(image, eig_, Dx_, Dy_, blockSize, 3);
224 minMax_buf(eig_, 0, &maxVal, oclMat(), minMaxbuf_);
226 ensureSizeIsEnough(1, std::max(1000, static_cast<int>(image.size().area() * 0.05)), CV_32FC2, tmpCorners_);
228 Ptr<TextureCL> eig_tex = bindTexturePtr(eig_);
229 int total = findCorners_caller(
231 static_cast<float>(maxVal * qualityLevel),
243 Sorter<CPU_STL>::sortCorners_caller(eig_, tmpCorners_, total);
247 //if total is power of 2
248 if(((total - 1) & (total)) == 0)
250 Sorter<BITONIC>::sortCorners_caller(*eig_tex, tmpCorners_, total);
254 Sorter<SELECTION>::sortCorners_caller(*eig_tex, tmpCorners_, total);
260 Rect roi_range(0, 0, maxCorners > 0 ? std::min(maxCorners, total) : total, 1);
261 tmpCorners_(roi_range).copyTo(corners);
265 vector<Point2f> tmp(total);
266 downloadPoints(tmpCorners_, tmp);
268 vector<Point2f> tmp2;
271 const int cell_size = cvRound(minDistance);
272 const int grid_width = (image.cols + cell_size - 1) / cell_size;
273 const int grid_height = (image.rows + cell_size - 1) / cell_size;
275 std::vector< std::vector<Point2f> > grid(grid_width * grid_height);
277 for (int i = 0; i < total; ++i)
283 int x_cell = static_cast<int>(p.x / cell_size);
284 int y_cell = static_cast<int>(p.y / cell_size);
292 x1 = std::max(0, x1);
293 y1 = std::max(0, y1);
294 x2 = std::min(grid_width - 1, x2);
295 y2 = std::min(grid_height - 1, y2);
297 for (int yy = y1; yy <= y2; yy++)
299 for (int xx = x1; xx <= x2; xx++)
301 vector<Point2f>& m = grid[yy * grid_width + xx];
305 for(size_t j = 0; j < m.size(); j++)
307 float dx = p.x - m[j].x;
308 float dy = p.y - m[j].y;
310 if (dx * dx + dy * dy < minDistance * minDistance)
324 grid[y_cell * grid_width + x_cell].push_back(p);
328 if (maxCorners > 0 && tmp2.size() == static_cast<size_t>(maxCorners))
333 corners.upload(Mat(1, static_cast<int>(tmp2.size()), CV_32FC2, &tmp2[0]));
336 void cv::ocl::GoodFeaturesToTrackDetector_OCL::downloadPoints(const oclMat &points, vector<Point2f> &points_v)
338 CV_DbgAssert(points.type() == CV_32FC2);
339 points_v.resize(points.cols);
340 openCLSafeCall(clEnqueueReadBuffer(
341 *reinterpret_cast<cl_command_queue*>(getoclCommandQueue()),
342 reinterpret_cast<cl_mem>(points.data),
345 points.cols * sizeof(Point2f),