CV_EXPORTS void setBinpath(const char *path);
//The two functions below enable other opencl program to use ocl module's cl_context and cl_command_queue
+ //returns cl_context *
CV_EXPORTS void* getoclContext();
-
+ //returns cl_command_queue *
CV_EXPORTS void* getoclCommandQueue();
//explicit call clFinish. The global command queue will be used.
// support all C1 types
CV_EXPORTS void minMax(const oclMat &src, double *minVal, double *maxVal = 0, const oclMat &mask = oclMat());
+ CV_EXPORTS void minMax_buf(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat& buf);
//! finds global minimum and maximum array elements and returns their values with locations
// support all C1 types
CV_EXPORTS void integral(const oclMat &src, oclMat &sum, oclMat &sqsum);
CV_EXPORTS void integral(const oclMat &src, oclMat &sum);
CV_EXPORTS void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
+ CV_EXPORTS void cornerHarris_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy,
+ int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
CV_EXPORTS void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT);
+ CV_EXPORTS void cornerMinEigenVal_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy,
+ int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT);
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////CascadeClassifier//////////////////////////////////////////////////////////////////
public:
explicit BFMatcher_OCL(int norm = NORM_L2) : BruteForceMatcher_OCL_base(norm == NORM_L1 ? L1Dist : norm == NORM_L2 ? L2Dist : HammingDist) {}
};
+
+ class CV_EXPORTS GoodFeaturesToTrackDetector_OCL
+ {
+ public:
+ explicit GoodFeaturesToTrackDetector_OCL(int maxCorners = 1000, double qualityLevel = 0.01, double minDistance = 0.0,
+ int blockSize = 3, bool useHarrisDetector = false, double harrisK = 0.04);
+
+ //! return 1 rows matrix with CV_32FC2 type
+ void operator ()(const oclMat& image, oclMat& corners, const oclMat& mask = oclMat());
+ //! download points of type Point2f to a vector. the vector's content will be erased
+ void downloadPoints(const oclMat &points, vector<Point2f> &points_v);
+
+ int maxCorners;
+ double qualityLevel;
+ double minDistance;
+
+ int blockSize;
+ bool useHarrisDetector;
+ double harrisK;
+ void releaseMemory()
+ {
+ Dx_.release();
+ Dy_.release();
+ eig_.release();
+ minMaxbuf_.release();
+ tmpCorners_.release();
+ }
+ private:
+ oclMat Dx_;
+ oclMat Dy_;
+ oclMat eig_;
+ oclMat minMaxbuf_;
+ oclMat tmpCorners_;
+ };
+
+ inline GoodFeaturesToTrackDetector_OCL::GoodFeaturesToTrackDetector_OCL(int maxCorners_, double qualityLevel_, double minDistance_,
+ int blockSize_, bool useHarrisDetector_, double harrisK_)
+ {
+ maxCorners = maxCorners_;
+ qualityLevel = qualityLevel_;
+ minDistance = minDistance_;
+ blockSize = blockSize_;
+ useHarrisDetector = useHarrisDetector_;
+ harrisK = harrisK_;
+ }
+
/////////////////////////////// PyrLKOpticalFlow /////////////////////////////////////
class CV_EXPORTS PyrLKOpticalFlow
{
cl_mem CV_EXPORTS bindTexture(const oclMat &mat);
void CV_EXPORTS releaseTexture(cl_mem& texture);
+ //Represents an image texture object
+ class CV_EXPORTS TextureCL
+ {
+ public:
+ TextureCL(cl_mem tex, int r, int c, int t)
+ : tex_(tex), rows(r), cols(c), type(t) {}
+ ~TextureCL()
+ {
+ openCLFree(tex_);
+ }
+ operator cl_mem()
+ {
+ return tex_;
+ }
+ cl_mem const tex_;
+ const int rows;
+ const int cols;
+ const int type;
+ private:
+ //disable assignment
+ void operator=(const TextureCL&);
+ };
+ // bind oclMat to OpenCL image textures and retunrs an TextureCL object
+ // note:
+ // for faster clamping, there is no buffer padding for the constructed texture
+ Ptr<TextureCL> CV_EXPORTS bindTexturePtr(const oclMat &mat);
+
// returns whether the current context supports image2d_t format or not
bool CV_EXPORTS support_image2d(Context *clCxt = Context::getContext());
};
template<DEVICE_INFO _it, typename _ty>
_ty queryDeviceInfo(cl_kernel kernel = NULL);
- //info should have been pre-allocated
+
template<>
int CV_EXPORTS queryDeviceInfo<WAVEFRONT_SIZE, int>(cl_kernel kernel);
template<>
}
}
-template <typename T> void arithmetic_minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask)
+template <typename T> void arithmetic_minMax(const oclMat &src, double *minVal, double *maxVal,
+ const oclMat &mask, oclMat &buf)
{
size_t groupnum = src.clCxt->computeUnits();
CV_Assert(groupnum != 0);
groupnum = groupnum * 2;
int vlen = 8;
int dbsize = groupnum * 2 * vlen * sizeof(T) ;
- Context *clCxt = src.clCxt;
- cl_mem dstBuffer = openCLCreateBuffer(clCxt, CL_MEM_WRITE_ONLY, dbsize);
- *minVal = std::numeric_limits<double>::max() , *maxVal = -std::numeric_limits<double>::max();
+
+ ensureSizeIsEnough(1, dbsize, CV_8UC1, buf);
+
+ cl_mem buf_data = reinterpret_cast<cl_mem>(buf.data);
+
if (mask.empty())
{
- arithmetic_minMax_run(src, mask, dstBuffer, vlen, groupnum, "arithm_op_minMax");
+ arithmetic_minMax_run(src, mask, buf_data, vlen, groupnum, "arithm_op_minMax");
}
else
{
- arithmetic_minMax_mask_run(src, mask, dstBuffer, vlen, groupnum, "arithm_op_minMax_mask");
+ arithmetic_minMax_mask_run(src, mask, buf_data, vlen, groupnum, "arithm_op_minMax_mask");
}
- T *p = new T[groupnum * vlen * 2];
- memset(p, 0, dbsize);
- openCLReadBuffer(clCxt, dstBuffer, (void *)p, dbsize);
- if(minVal != NULL){
+
+ Mat matbuf = Mat(buf);
+ T *p = matbuf.ptr<T>();
+ if(minVal != NULL)
+ {
+ *minVal = std::numeric_limits<double>::max();
for(int i = 0; i < vlen * (int)groupnum; i++)
{
*minVal = *minVal < p[i] ? *minVal : p[i];
}
}
- if(maxVal != NULL){
+ if(maxVal != NULL)
+ {
+ *maxVal = -std::numeric_limits<double>::max();
for(int i = vlen * (int)groupnum; i < 2 * vlen * (int)groupnum; i++)
{
*maxVal = *maxVal > p[i] ? *maxVal : p[i];
}
}
- delete[] p;
- openCLFree(dstBuffer);
}
-typedef void (*minMaxFunc)(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask);
+typedef void (*minMaxFunc)(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat &buf);
void cv::ocl::minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask)
{
+ oclMat buf;
+ minMax_buf(src, minVal, maxVal, mask, buf);
+}
+void cv::ocl::minMax_buf(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat &buf)
+{
CV_Assert(src.oclchannels() == 1);
if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
{
};
minMaxFunc func;
func = functab[src.depth()];
- func(src, minVal, maxVal, mask);
+ func(src, minVal, maxVal, mask, buf);
}
//////////////////////////////////////////////////////////////////////////////
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+// Peng Xiao, pengxiao@outlook.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other oclMaterials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+#include <iomanip>
+#include "precomp.hpp"
+
+using namespace cv;
+using namespace cv::ocl;
+
+static bool use_cpu_sorter = true;
+
+namespace cv
+{
+ namespace ocl
+ {
+ ///////////////////////////OpenCL kernel strings///////////////////////////
+ extern const char *imgproc_gfft;
+ }
+}
+
+namespace
+{
+enum SortMethod
+{
+ CPU_STL,
+ BITONIC,
+ SELECTION
+};
+
+const int GROUP_SIZE = 256;
+
+template<SortMethod method>
+struct Sorter
+{
+ //typedef EigType;
+};
+
+//TODO(pengx): optimize GPU sorter's performance thus CPU sorter is removed.
+template<>
+struct Sorter<CPU_STL>
+{
+ typedef oclMat EigType;
+ static cv::Mutex cs;
+ static Mat mat_eig;
+
+ //prototype
+ static int clfloat2Gt(cl_float2 pt1, cl_float2 pt2)
+ {
+ float v1 = mat_eig.at<float>(cvRound(pt1.s[1]), cvRound(pt1.s[0]));
+ float v2 = mat_eig.at<float>(cvRound(pt2.s[1]), cvRound(pt2.s[0]));
+ return v1 > v2;
+ }
+ static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count)
+ {
+ cv::AutoLock lock(cs);
+ //temporarily use STL's sort function
+ Mat mat_corners = corners;
+ mat_eig = eig_tex;
+ std::sort(mat_corners.begin<cl_float2>(), mat_corners.begin<cl_float2>() + count, clfloat2Gt);
+ corners = mat_corners;
+ }
+};
+cv::Mutex Sorter<CPU_STL>::cs;
+cv::Mat Sorter<CPU_STL>::mat_eig;
+
+template<>
+struct Sorter<BITONIC>
+{
+ typedef TextureCL EigType;
+
+ static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count)
+ {
+ Context * cxt = Context::getContext();
+ size_t globalThreads[3] = {count / 2, 1, 1};
+ size_t localThreads[3] = {GROUP_SIZE, 1, 1};
+
+ // 2^numStages should be equal to count or the output is invalid
+ int numStages = 0;
+ for(int i = count; i > 1; i >>= 1)
+ {
+ ++numStages;
+ }
+ const int argc = 5;
+ std::vector< std::pair<size_t, const void *> > args(argc);
+ std::string kernelname = "sortCorners_bitonicSort";
+ args[0] = std::make_pair(sizeof(cl_mem), (void *)&eig_tex);
+ args[1] = std::make_pair(sizeof(cl_mem), (void *)&corners.data);
+ args[2] = std::make_pair(sizeof(cl_int), (void *)&count);
+ for(int stage = 0; stage < numStages; ++stage)
+ {
+ args[3] = std::make_pair(sizeof(cl_int), (void *)&stage);
+ for(int passOfStage = 0; passOfStage < stage + 1; ++passOfStage)
+ {
+ args[4] = std::make_pair(sizeof(cl_int), (void *)&passOfStage);
+ openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1);
+ }
+ }
+ }
+};
+
+template<>
+struct Sorter<SELECTION>
+{
+ typedef TextureCL EigType;
+
+ static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count)
+ {
+ Context * cxt = Context::getContext();
+
+ size_t globalThreads[3] = {count, 1, 1};
+ size_t localThreads[3] = {GROUP_SIZE, 1, 1};
+
+ std::vector< std::pair<size_t, const void *> > args;
+ //local
+ std::string kernelname = "sortCorners_selectionSortLocal";
+ int lds_size = GROUP_SIZE * sizeof(cl_float2);
+ args.push_back( std::make_pair( sizeof(cl_mem), (void*)&eig_tex) );
+ args.push_back( std::make_pair( sizeof(cl_mem), (void*)&corners.data) );
+ args.push_back( std::make_pair( sizeof(cl_int), (void*)&count) );
+ args.push_back( std::make_pair( lds_size, (void*)NULL) );
+
+ openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1);
+
+ //final
+ kernelname = "sortCorners_selectionSortFinal";
+ args.pop_back();
+ openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1);
+ }
+};
+
+int findCorners_caller(
+ const TextureCL& eig,
+ const float threshold,
+ const oclMat& mask,
+ oclMat& corners,
+ const int max_count)
+{
+ std::vector<int> k;
+ Context * cxt = Context::getContext();
+
+ std::vector< std::pair<size_t, const void*> > args;
+ std::string kernelname = "findCorners";
+
+ const int mask_strip = mask.step / mask.elemSize1();
+
+ oclMat g_counter(1, 1, CV_32SC1);
+ g_counter.setTo(0);
+
+ args.push_back(make_pair( sizeof(cl_mem), (void*)&eig ));
+ args.push_back(make_pair( sizeof(cl_mem), (void*)&mask.data ));
+ args.push_back(make_pair( sizeof(cl_mem), (void*)&corners.data ));
+ args.push_back(make_pair( sizeof(cl_int), (void*)&mask_strip));
+ args.push_back(make_pair( sizeof(cl_float), (void*)&threshold ));
+ args.push_back(make_pair( sizeof(cl_int), (void*)&eig.rows ));
+ args.push_back(make_pair( sizeof(cl_int), (void*)&eig.cols ));
+ args.push_back(make_pair( sizeof(cl_int), (void*)&max_count ));
+ args.push_back(make_pair( sizeof(cl_mem), (void*)&g_counter.data ));
+
+ size_t globalThreads[3] = {eig.cols, eig.rows, 1};
+ size_t localThreads[3] = {16, 16, 1};
+
+ const char * opt = mask.empty() ? "" : "-D WITH_MASK";
+ openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1, opt);
+ return std::min(Mat(g_counter).at<int>(0), max_count);
+}
+}//unnamed namespace
+
+void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image, oclMat& corners, const oclMat& mask)
+{
+ CV_Assert(qualityLevel > 0 && minDistance >= 0 && maxCorners >= 0);
+ CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size()));
+
+ CV_DbgAssert(support_image2d());
+
+ ensureSizeIsEnough(image.size(), CV_32F, eig_);
+
+ if (useHarrisDetector)
+ cornerMinEigenVal_dxdy(image, eig_, Dx_, Dy_, blockSize, 3, harrisK);
+ else
+ cornerMinEigenVal_dxdy(image, eig_, Dx_, Dy_, blockSize, 3);
+
+ double maxVal = 0;
+ minMax_buf(eig_, 0, &maxVal, oclMat(), minMaxbuf_);
+
+ ensureSizeIsEnough(1, std::max(1000, static_cast<int>(image.size().area() * 0.05)), CV_32FC2, tmpCorners_);
+
+ Ptr<TextureCL> eig_tex = bindTexturePtr(eig_);
+ int total = findCorners_caller(
+ *eig_tex,
+ static_cast<float>(maxVal * qualityLevel),
+ mask,
+ tmpCorners_,
+ tmpCorners_.cols);
+
+ if (total == 0)
+ {
+ corners.release();
+ return;
+ }
+ if(use_cpu_sorter)
+ {
+ Sorter<CPU_STL>::sortCorners_caller(eig_, tmpCorners_, total);
+ }
+ else
+ {
+ //if total is power of 2
+ if(((total - 1) & (total)) == 0)
+ {
+ Sorter<BITONIC>::sortCorners_caller(*eig_tex, tmpCorners_, total);
+ }
+ else
+ {
+ Sorter<SELECTION>::sortCorners_caller(*eig_tex, tmpCorners_, total);
+ }
+ }
+
+ if (minDistance < 1)
+ {
+ corners = tmpCorners_(Rect(0, 0, maxCorners > 0 ? std::min(maxCorners, total) : total, 1));
+ }
+ else
+ {
+ vector<Point2f> tmp(total);
+ downloadPoints(tmpCorners_, tmp);
+
+ vector<Point2f> tmp2;
+ tmp2.reserve(total);
+
+ const int cell_size = cvRound(minDistance);
+ const int grid_width = (image.cols + cell_size - 1) / cell_size;
+ const int grid_height = (image.rows + cell_size - 1) / cell_size;
+
+ std::vector< std::vector<Point2f> > grid(grid_width * grid_height);
+
+ for (int i = 0; i < total; ++i)
+ {
+ Point2f p = tmp[i];
+
+ bool good = true;
+
+ int x_cell = static_cast<int>(p.x / cell_size);
+ int y_cell = static_cast<int>(p.y / cell_size);
+
+ int x1 = x_cell - 1;
+ int y1 = y_cell - 1;
+ int x2 = x_cell + 1;
+ int y2 = y_cell + 1;
+
+ // boundary check
+ x1 = std::max(0, x1);
+ y1 = std::max(0, y1);
+ x2 = std::min(grid_width - 1, x2);
+ y2 = std::min(grid_height - 1, y2);
+
+ for (int yy = y1; yy <= y2; yy++)
+ {
+ for (int xx = x1; xx <= x2; xx++)
+ {
+ vector<Point2f>& m = grid[yy * grid_width + xx];
+
+ if (!m.empty())
+ {
+ for(size_t j = 0; j < m.size(); j++)
+ {
+ float dx = p.x - m[j].x;
+ float dy = p.y - m[j].y;
+
+ if (dx * dx + dy * dy < minDistance * minDistance)
+ {
+ good = false;
+ goto break_out;
+ }
+ }
+ }
+ }
+ }
+
+ break_out:
+
+ if(good)
+ {
+ grid[y_cell * grid_width + x_cell].push_back(p);
+
+ tmp2.push_back(p);
+
+ if (maxCorners > 0 && tmp2.size() == static_cast<size_t>(maxCorners))
+ break;
+ }
+ }
+
+ corners.upload(Mat(1, static_cast<int>(tmp2.size()), CV_32FC2, &tmp2[0]));
+ }
+}
+void cv::ocl::GoodFeaturesToTrackDetector_OCL::downloadPoints(const oclMat &points, vector<Point2f> &points_v)
+{
+ CV_DbgAssert(points.type() == CV_32FC2);
+ points_v.resize(points.cols);
+ openCLSafeCall(clEnqueueReadBuffer(
+ *reinterpret_cast<cl_command_queue*>(getoclCommandQueue()),
+ reinterpret_cast<cl_mem>(points.data),
+ CL_TRUE,
+ 0,
+ points.cols * sizeof(Point2f),
+ &points_v[0],
+ 0,
+ NULL,
+ NULL));
+}
+
+
void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize,
double k, int borderType)
{
+ oclMat dx, dy;
+ cornerHarris_dxdy(src, dst, dx, dy, blockSize, ksize, k, borderType);
+ }
+
+ void cornerHarris_dxdy(const oclMat &src, oclMat &dst, oclMat &dx, oclMat &dy, int blockSize, int ksize,
+ double k, int borderType)
+ {
if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "select device don't support double");
}
CV_Assert(src.cols >= blockSize / 2 && src.rows >= blockSize / 2);
- oclMat Dx, Dy;
CV_Assert(borderType == cv::BORDER_CONSTANT || borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT);
- extractCovData(src, Dx, Dy, blockSize, ksize, borderType);
+ extractCovData(src, dx, dy, blockSize, ksize, borderType);
dst.create(src.size(), CV_32F);
- corner_ocl(imgproc_calcHarris, "calcHarris", blockSize, static_cast<float>(k), Dx, Dy, dst, borderType);
+ corner_ocl(imgproc_calcHarris, "calcHarris", blockSize, static_cast<float>(k), dx, dy, dst, borderType);
}
void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int borderType)
{
+ oclMat dx, dy;
+ cornerMinEigenVal_dxdy(src, dst, dx, dy, blockSize, ksize, borderType);
+ }
+
+ void cornerMinEigenVal_dxdy(const oclMat &src, oclMat &dst, oclMat &dx, oclMat &dy, int blockSize, int ksize, int borderType)
+ {
if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "select device don't support double");
}
CV_Assert(src.cols >= blockSize / 2 && src.rows >= blockSize / 2);
- oclMat Dx, Dy;
CV_Assert(borderType == cv::BORDER_CONSTANT || borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT);
- extractCovData(src, Dx, Dy, blockSize, ksize, borderType);
+ extractCovData(src, dx, dy, blockSize, ksize, borderType);
dst.create(src.size(), CV_32F);
- corner_ocl(imgproc_calcMinEigenVal, "calcMinEigenVal", blockSize, 0, Dx, Dy, dst, borderType);
+ corner_ocl(imgproc_calcMinEigenVal, "calcMinEigenVal", blockSize, 0, dx, dy, dst, borderType);
}
/////////////////////////////////// MeanShiftfiltering ///////////////////////////////////////////////
static void meanShiftFiltering_gpu(const oclMat &src, oclMat dst, int sp, int sr, int maxIter, float eps)
format.image_channel_order = CL_RGBA;
break;
default:
- CV_Error(-1, "Image forma is not supported");
+ CV_Error(-1, "Image format is not supported");
break;
}
#ifdef CL_VERSION_1_2
openCLSafeCall(err);
return texture;
}
+ Ptr<TextureCL> bindTexturePtr(const oclMat &mat)
+ {
+ return Ptr<TextureCL>(new TextureCL(bindTexture(mat), mat.rows, mat.cols, mat.type()));
+ }
+
void releaseTexture(cl_mem& texture)
{
openCLFree(texture);
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+// Peng Xiao, pengxiao@outlook.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other oclMaterials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef WITH_MASK
+#define WITH_MASK 0
+#endif
+
+__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
+
+inline float ELEM_INT2(image2d_t _eig, int _x, int _y)
+{
+ return read_imagef(_eig, sampler, (int2)(_x, _y)).x;
+}
+
+inline float ELEM_FLT2(image2d_t _eig, float2 pt)
+{
+ return read_imagef(_eig, sampler, pt).x;
+}
+
+__kernel
+ void findCorners
+ (
+ image2d_t eig,
+ __global const char * mask,
+ __global float2 * corners,
+ const int mask_strip,// in pixels
+ const float threshold,
+ const int rows,
+ const int cols,
+ const int max_count,
+ __global int * g_counter
+ )
+{
+ const int j = get_global_id(0);
+ const int i = get_global_id(1);
+
+ if (i > 0 && i < rows - 1 && j > 0 && j < cols - 1
+#if WITH_MASK
+ && mask[i * mask_strip + j] != 0
+#endif
+ )
+ {
+ const float val = ELEM_INT2(eig, j, i);
+
+ if (val > threshold)
+ {
+ float maxVal = val;
+
+ maxVal = fmax(ELEM_INT2(eig, j - 1, i - 1), maxVal);
+ maxVal = fmax(ELEM_INT2(eig, j , i - 1), maxVal);
+ maxVal = fmax(ELEM_INT2(eig, j + 1, i - 1), maxVal);
+
+ maxVal = fmax(ELEM_INT2(eig, j - 1, i), maxVal);
+ maxVal = fmax(ELEM_INT2(eig, j + 1, i), maxVal);
+
+ maxVal = fmax(ELEM_INT2(eig, j - 1, i + 1), maxVal);
+ maxVal = fmax(ELEM_INT2(eig, j , i + 1), maxVal);
+ maxVal = fmax(ELEM_INT2(eig, j + 1, i + 1), maxVal);
+
+ if (val == maxVal)
+ {
+ const int ind = atomic_inc(g_counter);
+
+ if (ind < max_count)
+ corners[ind] = (float2)(j, i);
+ }
+ }
+ }
+}
+
+//bitonic sort
+__kernel
+ void sortCorners_bitonicSort
+ (
+ image2d_t eig,
+ __global float2 * corners,
+ const int count,
+ const int stage,
+ const int passOfStage
+ )
+{
+ const int threadId = get_global_id(0);
+ if(threadId >= count / 2)
+ {
+ return;
+ }
+
+ const int sortOrder = (((threadId/(1 << stage)) % 2)) == 1 ? 1 : 0; // 0 is descent
+
+ const int pairDistance = 1 << (stage - passOfStage);
+ const int blockWidth = 2 * pairDistance;
+
+ const int leftId = min( (threadId % pairDistance)
+ + (threadId / pairDistance) * blockWidth, count );
+
+ const int rightId = min( leftId + pairDistance, count );
+
+ const float2 leftPt = corners[leftId];
+ const float2 rightPt = corners[rightId];
+
+ const float leftVal = ELEM_FLT2(eig, leftPt);
+ const float rightVal = ELEM_FLT2(eig, rightPt);
+
+ const bool compareResult = leftVal > rightVal;
+
+ float2 greater = compareResult ? leftPt:rightPt;
+ float2 lesser = compareResult ? rightPt:leftPt;
+
+ corners[leftId] = sortOrder ? lesser : greater;
+ corners[rightId] = sortOrder ? greater : lesser;
+}
+
+//selection sort for gfft
+//kernel is ported from Bolt library:
+//https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/sort_kernels.cl
+// Local sort will firstly sort elements of each workgroup using selection sort
+// its performance is O(n)
+__kernel
+ void sortCorners_selectionSortLocal
+ (
+ image2d_t eig,
+ __global float2 * corners,
+ const int count,
+ __local float2 * scratch
+ )
+{
+ int i = get_local_id(0); // index in workgroup
+ int numOfGroups = get_num_groups(0); // index in workgroup
+ int groupID = get_group_id(0);
+ int wg = get_local_size(0); // workgroup size = block size
+ int n; // number of elements to be processed for this work group
+
+ int offset = groupID * wg;
+ int same = 0;
+ corners += offset;
+ n = (groupID == (numOfGroups-1))? (count - wg*(numOfGroups-1)) : wg;
+ float2 pt1, pt2;
+
+ pt1 = corners[min(i, n)];
+ scratch[i] = pt1;
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if(i >= n)
+ {
+ return;
+ }
+
+ float val1 = ELEM_FLT2(eig, pt1);
+ float val2;
+
+ int pos = 0;
+ for (int j=0;j<n;++j)
+ {
+ pt2 = scratch[j];
+ val2 = ELEM_FLT2(eig, pt2);
+ if(val2 > val1)
+ pos++;//calculate the rank of this element in this work group
+ else
+ {
+ if(val1 > val2)
+ continue;
+ else
+ {
+ // val1 and val2 are same
+ same++;
+ }
+ }
+ }
+ for (int j=0; j< same; j++)
+ corners[pos + j] = pt1;
+}
+__kernel
+ void sortCorners_selectionSortFinal
+ (
+ image2d_t eig,
+ __global float2 * corners,
+ const int count
+ )
+{
+ const int i = get_local_id(0); // index in workgroup
+ const int numOfGroups = get_num_groups(0); // index in workgroup
+ const int groupID = get_group_id(0);
+ const int wg = get_local_size(0); // workgroup size = block size
+ int pos = 0, same = 0;
+ const int offset = get_group_id(0) * wg;
+ const int remainder = count - wg*(numOfGroups-1);
+
+ if((offset + i ) >= count)
+ return;
+ float2 pt1, pt2;
+ pt1 = corners[groupID*wg + i];
+
+ float val1 = ELEM_FLT2(eig, pt1);
+ float val2;
+
+ for(int j=0; j<numOfGroups-1; j++ )
+ {
+ for(int k=0; k<wg; k++)
+ {
+ pt2 = corners[j*wg + k];
+ val2 = ELEM_FLT2(eig, pt2);
+ if(val1 > val2)
+ break;
+ else
+ {
+ //Increment only if the value is not the same.
+ if( val2 > val1 )
+ pos++;
+ else
+ same++;
+ }
+ }
+ }
+
+ for(int k=0; k<remainder; k++)
+ {
+ pt2 = corners[(numOfGroups-1)*wg + k];
+ val2 = ELEM_FLT2(eig, pt2);
+ if(val1 > val2)
+ break;
+ else
+ {
+ //Don't increment if the value is the same.
+ //Two elements are same if (*userComp)(jData, iData) and (*userComp)(iData, jData) are both false
+ if(val2 > val1)
+ pos++;
+ else
+ same++;
+ }
+ }
+ for (int j=0; j< same; j++)
+ corners[pos + j] = pt1;
+}
+
using namespace std;
extern string workdir;
+
+
+//////////////////////////////////////////////////////
+// GoodFeaturesToTrack
+namespace
+{
+ IMPLEMENT_PARAM_CLASS(MinDistance, double)
+}
+PARAM_TEST_CASE(GoodFeaturesToTrack, MinDistance)
+{
+ double minDistance;
+
+ virtual void SetUp()
+ {
+ minDistance = GET_PARAM(0);
+ }
+};
+
+TEST_P(GoodFeaturesToTrack, Accuracy)
+{
+ cv::Mat frame = readImage(workdir + "../gpu/rubberwhale1.png", cv::IMREAD_GRAYSCALE);
+ ASSERT_FALSE(frame.empty());
+
+ int maxCorners = 1000;
+ double qualityLevel = 0.01;
+
+ cv::ocl::GoodFeaturesToTrackDetector_OCL detector(maxCorners, qualityLevel, minDistance);
+
+ cv::ocl::oclMat d_pts;
+ detector(oclMat(frame), d_pts);
+
+ ASSERT_FALSE(d_pts.empty());
+
+ std::vector<cv::Point2f> pts(d_pts.cols);
+
+ detector.downloadPoints(d_pts, pts);
+
+ std::vector<cv::Point2f> pts_gold;
+ cv::goodFeaturesToTrack(frame, pts_gold, maxCorners, qualityLevel, minDistance);
+
+ ASSERT_EQ(pts_gold.size(), pts.size());
+
+ size_t mistmatch = 0;
+ for (size_t i = 0; i < pts.size(); ++i)
+ {
+ cv::Point2i a = pts_gold[i];
+ cv::Point2i b = pts[i];
+
+ bool eq = std::abs(a.x - b.x) < 1 && std::abs(a.y - b.y) < 1;
+
+ if (!eq)
+ ++mistmatch;
+ }
+
+ double bad_ratio = static_cast<double>(mistmatch) / pts.size();
+
+ ASSERT_LE(bad_ratio, 0.01);
+}
+
+TEST_P(GoodFeaturesToTrack, EmptyCorners)
+{
+ int maxCorners = 1000;
+ double qualityLevel = 0.01;
+
+ cv::ocl::GoodFeaturesToTrackDetector_OCL detector(maxCorners, qualityLevel, minDistance);
+
+ cv::ocl::oclMat src(100, 100, CV_8UC1, cv::Scalar::all(0));
+ cv::ocl::oclMat corners(1, maxCorners, CV_32FC2);
+
+ detector(src, corners);
+
+ ASSERT_TRUE(corners.empty());
+}
+
+INSTANTIATE_TEST_CASE_P(OCL_Video, GoodFeaturesToTrack,
+ testing::Values(MinDistance(0.0), MinDistance(3.0)));
+
//////////////////////////////////////////////////////////////////////////
PARAM_TEST_CASE(TVL1, bool)
{