// Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
CV_EXPORTS void matchTemplate(const oclMat& image, const oclMat& templ, oclMat& result, int method, MatchTemplateBuf& buf);
- ///////////////////////////////////////////// Canny /////////////////////////////////////////////
- struct CV_EXPORTS CannyBuf;
-
- //! compute edges of the input image using Canny operator
- // Support CV_8UC1 only
- CV_EXPORTS void Canny(const oclMat& image, oclMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
- CV_EXPORTS void Canny(const oclMat& image, CannyBuf& buf, oclMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
- CV_EXPORTS void Canny(const oclMat& dx, const oclMat& dy, oclMat& edges, double low_thresh, double high_thresh, bool L2gradient = false);
- CV_EXPORTS void Canny(const oclMat& dx, const oclMat& dy, CannyBuf& buf, oclMat& edges, double low_thresh, double high_thresh, bool L2gradient = false);
-
- struct CV_EXPORTS CannyBuf
- {
- CannyBuf() {}
- explicit CannyBuf(const Size& image_size, int apperture_size = 3) {create(image_size, apperture_size);}
- CannyBuf(const oclMat& dx_, const oclMat& dy_);
-
- void create(const Size& image_size, int apperture_size = 3);
-
- void release();
-
- oclMat dx, dy;
- oclMat dx_buf, dy_buf;
- oclMat edgeBuf;
- oclMat trackBuf1, trackBuf2;
- oclMat counter;
- Ptr<FilterEngine_GPU> filterDX, filterDY;
+ ///////////////////////////////////////////// Canny /////////////////////////////////////////////\r
+ struct CV_EXPORTS CannyBuf;\r
+\r
+ //! compute edges of the input image using Canny operator\r
+ // Support CV_8UC1 only\r
+ CV_EXPORTS void Canny(const oclMat& image, oclMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);\r
+ CV_EXPORTS void Canny(const oclMat& image, CannyBuf& buf, oclMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);\r
+ CV_EXPORTS void Canny(const oclMat& dx, const oclMat& dy, oclMat& edges, double low_thresh, double high_thresh, bool L2gradient = false);\r
+ CV_EXPORTS void Canny(const oclMat& dx, const oclMat& dy, CannyBuf& buf, oclMat& edges, double low_thresh, double high_thresh, bool L2gradient = false);\r
+\r
+ struct CV_EXPORTS CannyBuf\r
+ {\r
+ CannyBuf() {}\r
+ explicit CannyBuf(const Size& image_size, int apperture_size = 3) {create(image_size, apperture_size);}\r
+ CannyBuf(const oclMat& dx_, const oclMat& dy_);\r
+\r
+ void create(const Size& image_size, int apperture_size = 3);\r
+\r
+ void release();\r
+\r
+ oclMat dx, dy;\r
+ oclMat dx_buf, dy_buf;\r
+ oclMat edgeBuf;\r
+ oclMat trackBuf1, trackBuf2;\r
+ void * counter;\r
+ Ptr<FilterEngine_GPU> filterDX, filterDY;\r
};
#ifdef HAVE_CLAMDFFT
const oclMat& src3, double beta, oclMat& dst, int flags = 0);
#endif
- //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
- struct CV_EXPORTS HOGDescriptor
- {
- enum { DEFAULT_WIN_SIGMA = -1 };
- enum { DEFAULT_NLEVELS = 64 };
- enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL };
-
- HOGDescriptor(Size win_size=Size(64, 128), Size block_size=Size(16, 16),
- Size block_stride=Size(8, 8), Size cell_size=Size(8, 8),
- int nbins=9, double win_sigma=DEFAULT_WIN_SIGMA,
- double threshold_L2hys=0.2, bool gamma_correction=true,
- int nlevels=DEFAULT_NLEVELS);
-
- size_t getDescriptorSize() const;
- size_t getBlockHistogramSize() const;
-
- void setSVMDetector(const vector<float>& detector);
-
- static vector<float> getDefaultPeopleDetector();
- static vector<float> getPeopleDetector48x96();
- static vector<float> getPeopleDetector64x128();
-
- void detect(const oclMat& img, vector<Point>& found_locations,
- double hit_threshold=0, Size win_stride=Size(),
- Size padding=Size());
-
- void detectMultiScale(const oclMat& img, vector<Rect>& found_locations,
- double hit_threshold=0, Size win_stride=Size(),
- Size padding=Size(), double scale0=1.05,
- int group_threshold=2);
-
- void getDescriptors(const oclMat& img, Size win_stride,
- oclMat& descriptors,
- int descr_format=DESCR_FORMAT_COL_BY_COL);
-
- Size win_size;
- Size block_size;
- Size block_stride;
- Size cell_size;
- int nbins;
- double win_sigma;
- double threshold_L2hys;
- bool gamma_correction;
- int nlevels;
-
- protected:
- void computeBlockHistograms(const oclMat& img);
- void computeGradient(const oclMat& img, oclMat& grad, oclMat& qangle);
-
- double getWinSigma() const;
- bool checkDetectorSize() const;
-
- static int numPartsWithin(int size, int part_size, int stride);
- static Size numPartsWithin(Size size, Size part_size, Size stride);
-
- // Coefficients of the separating plane
- float free_coef;
- oclMat detector;
-
- // Results of the last classification step
- oclMat labels;
- Mat labels_host;
-
- // Results of the last histogram evaluation step
- oclMat block_hists;
-
- // Gradients conputation results
- oclMat grad, qangle;
-
- std::vector<oclMat> image_scales;
- };
-
- //! Speeded up robust features, port from GPU module.
- ////////////////////////////////// SURF //////////////////////////////////////////
- class CV_EXPORTS SURF_OCL
- {
- public:
- enum KeypointLayout
- {
- X_ROW = 0,
- Y_ROW,
- LAPLACIAN_ROW,
- OCTAVE_ROW,
- SIZE_ROW,
- ANGLE_ROW,
- HESSIAN_ROW,
- ROWS_COUNT
- };
-
- //! the default constructor
- SURF_OCL();
- //! the full constructor taking all the necessary parameters
- explicit SURF_OCL(double _hessianThreshold, int _nOctaves=4,
- int _nOctaveLayers=2, bool _extended=false, float _keypointsRatio=0.01f, bool _upright = false);
-
- //! returns the descriptor size in float's (64 or 128)
- int descriptorSize() const;
-
- //! upload host keypoints to device memory
- void uploadKeypoints(const vector<cv::KeyPoint>& keypoints, oclMat& keypointsocl);
- //! download keypoints from device to host memory
- void downloadKeypoints(const oclMat& keypointsocl, vector<KeyPoint>& keypoints);
-
- //! download descriptors from device to host memory
- void downloadDescriptors(const oclMat& descriptorsocl, vector<float>& descriptors);
-
- //! finds the keypoints using fast hessian detector used in SURF
- //! supports CV_8UC1 images
- //! keypoints will have nFeature cols and 6 rows
- //! keypoints.ptr<float>(X_ROW)[i] will contain x coordinate of i'th feature
- //! keypoints.ptr<float>(Y_ROW)[i] will contain y coordinate of i'th feature
- //! keypoints.ptr<float>(LAPLACIAN_ROW)[i] will contain laplacian sign of i'th feature
- //! keypoints.ptr<float>(OCTAVE_ROW)[i] will contain octave of i'th feature
- //! keypoints.ptr<float>(SIZE_ROW)[i] will contain size of i'th feature
- //! keypoints.ptr<float>(ANGLE_ROW)[i] will contain orientation of i'th feature
- //! keypoints.ptr<float>(HESSIAN_ROW)[i] will contain response of i'th feature
- void operator()(const oclMat& img, const oclMat& mask, oclMat& keypoints);
- //! finds the keypoints and computes their descriptors.
- //! Optionally it can compute descriptors for the user-provided keypoints and recompute keypoints direction
- void operator()(const oclMat& img, const oclMat& mask, oclMat& keypoints, oclMat& descriptors,
- bool useProvidedKeypoints = false);
-
- void operator()(const oclMat& img, const oclMat& mask, std::vector<KeyPoint>& keypoints);
- void operator()(const oclMat& img, const oclMat& mask, std::vector<KeyPoint>& keypoints, oclMat& descriptors,
- bool useProvidedKeypoints = false);
-
- void operator()(const oclMat& img, const oclMat& mask, std::vector<KeyPoint>& keypoints, std::vector<float>& descriptors,
- bool useProvidedKeypoints = false);
-
- void releaseMemory();
-
- // SURF parameters
- float hessianThreshold;
- int nOctaves;
- int nOctaveLayers;
- bool extended;
- bool upright;
-
- //! max keypoints = min(keypointsRatio * img.size().area(), 65535)
- float keypointsRatio;
-
- oclMat sum, mask1, maskSum, intBuffer;
-
- oclMat det, trace;
-
- oclMat maxPosBuffer;
-
- };
+ //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////\r
+ struct CV_EXPORTS HOGDescriptor\r
+ {\r
+ enum { DEFAULT_WIN_SIGMA = -1 };\r
+ enum { DEFAULT_NLEVELS = 64 };\r
+ enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL };\r
+\r
+ HOGDescriptor(Size win_size=Size(64, 128), Size block_size=Size(16, 16),\r
+ Size block_stride=Size(8, 8), Size cell_size=Size(8, 8),\r
+ int nbins=9, double win_sigma=DEFAULT_WIN_SIGMA,\r
+ double threshold_L2hys=0.2, bool gamma_correction=true,\r
+ int nlevels=DEFAULT_NLEVELS);\r
+\r
+ size_t getDescriptorSize() const;\r
+ size_t getBlockHistogramSize() const;\r
+\r
+ void setSVMDetector(const vector<float>& detector);\r
+\r
+ static vector<float> getDefaultPeopleDetector();\r
+ static vector<float> getPeopleDetector48x96();\r
+ static vector<float> getPeopleDetector64x128();\r
+\r
+ void detect(const oclMat& img, vector<Point>& found_locations,\r
+ double hit_threshold=0, Size win_stride=Size(),\r
+ Size padding=Size());\r
+\r
+ void detectMultiScale(const oclMat& img, vector<Rect>& found_locations,\r
+ double hit_threshold=0, Size win_stride=Size(),\r
+ Size padding=Size(), double scale0=1.05,\r
+ int group_threshold=2);\r
+\r
+ void getDescriptors(const oclMat& img, Size win_stride,\r
+ oclMat& descriptors,\r
+ int descr_format=DESCR_FORMAT_COL_BY_COL);\r
+\r
+ Size win_size;\r
+ Size block_size;\r
+ Size block_stride;\r
+ Size cell_size;\r
+ int nbins;\r
+ double win_sigma;\r
+ double threshold_L2hys;\r
+ bool gamma_correction;\r
+ int nlevels;\r
+\r
+ protected:\r
+ // initialize buffers; only need to do once in case of multiscale detection\r
+ void init_buffer(const oclMat& img, Size win_stride);\r
+\r
+ void computeBlockHistograms(const oclMat& img);\r
+ void computeGradient(const oclMat& img, oclMat& grad, oclMat& qangle);\r
+\r
+ double getWinSigma() const;\r
+ bool checkDetectorSize() const;\r
+\r
+ static int numPartsWithin(int size, int part_size, int stride);\r
+ static Size numPartsWithin(Size size, Size part_size, Size stride);\r
+\r
+ // Coefficients of the separating plane\r
+ float free_coef;\r
+ oclMat detector;\r
+\r
+ // Results of the last classification step\r
+ oclMat labels;\r
+ Mat labels_host;\r
+\r
+ // Results of the last histogram evaluation step\r
+ oclMat block_hists;\r
+\r
+ // Gradients conputation results\r
+ oclMat grad, qangle;\r
+\r
+ // scaled image\r
+ oclMat image_scale;\r
+\r
+ // effect size of input image (might be different from original size after scaling)\r
+ Size effect_size;\r
+ };\r
+
+ //! Speeded up robust features, port from GPU module.\r
+ ////////////////////////////////// SURF //////////////////////////////////////////\r
+ class CV_EXPORTS SURF_OCL\r
+ {\r
+ public:\r
+ enum KeypointLayout\r
+ {\r
+ X_ROW = 0,\r
+ Y_ROW,\r
+ LAPLACIAN_ROW,\r
+ OCTAVE_ROW,\r
+ SIZE_ROW,\r
+ ANGLE_ROW,\r
+ HESSIAN_ROW,\r
+ ROWS_COUNT\r
+ };\r
+\r
+ //! the default constructor\r
+ SURF_OCL();\r
+ //! the full constructor taking all the necessary parameters\r
+ explicit SURF_OCL(double _hessianThreshold, int _nOctaves=4,\r
+ int _nOctaveLayers=2, bool _extended=false, float _keypointsRatio=0.01f, bool _upright = false);\r
+\r
+ //! returns the descriptor size in float's (64 or 128)\r
+ int descriptorSize() const;\r
+ \r
+ //! upload host keypoints to device memory\r
+ void uploadKeypoints(const vector<cv::KeyPoint>& keypoints, oclMat& keypointsocl);\r
+ //! download keypoints from device to host memory\r
+ void downloadKeypoints(const oclMat& keypointsocl, vector<KeyPoint>& keypoints);\r
+\r
+ //! download descriptors from device to host memory\r
+ void downloadDescriptors(const oclMat& descriptorsocl, vector<float>& descriptors);\r
+\r
+ //! finds the keypoints using fast hessian detector used in SURF\r
+ //! supports CV_8UC1 images\r
+ //! keypoints will have nFeature cols and 6 rows\r
+ //! keypoints.ptr<float>(X_ROW)[i] will contain x coordinate of i'th feature\r
+ //! keypoints.ptr<float>(Y_ROW)[i] will contain y coordinate of i'th feature\r
+ //! keypoints.ptr<float>(LAPLACIAN_ROW)[i] will contain laplacian sign of i'th feature\r
+ //! keypoints.ptr<float>(OCTAVE_ROW)[i] will contain octave of i'th feature\r
+ //! keypoints.ptr<float>(SIZE_ROW)[i] will contain size of i'th feature\r
+ //! keypoints.ptr<float>(ANGLE_ROW)[i] will contain orientation of i'th feature\r
+ //! keypoints.ptr<float>(HESSIAN_ROW)[i] will contain response of i'th feature\r
+ void operator()(const oclMat& img, const oclMat& mask, oclMat& keypoints);\r
+ //! finds the keypoints and computes their descriptors.\r
+ //! Optionally it can compute descriptors for the user-provided keypoints and recompute keypoints direction\r
+ void operator()(const oclMat& img, const oclMat& mask, oclMat& keypoints, oclMat& descriptors,\r
+ bool useProvidedKeypoints = false);\r
+\r
+ void operator()(const oclMat& img, const oclMat& mask, std::vector<KeyPoint>& keypoints);\r
+ void operator()(const oclMat& img, const oclMat& mask, std::vector<KeyPoint>& keypoints, oclMat& descriptors,\r
+ bool useProvidedKeypoints = false);\r
+\r
+ void operator()(const oclMat& img, const oclMat& mask, std::vector<KeyPoint>& keypoints, std::vector<float>& descriptors,\r
+ bool useProvidedKeypoints = false);\r
+\r
+ void releaseMemory();\r
+\r
+ // SURF parameters\r
+ float hessianThreshold;\r
+ int nOctaves;\r
+ int nOctaveLayers;\r
+ bool extended;\r
+ bool upright;\r
+\r
+ //! max keypoints = min(keypointsRatio * img.size().area(), 65535)\r
+ float keypointsRatio;\r
+\r
+ oclMat sum, mask1, maskSum, intBuffer;\r
+\r
+ oclMat det, trace;\r
+\r
+ oclMat maxPosBuffer;\r
+\r
+ };\r
}
}
#include "opencv2/ocl/matrix_operations.hpp"
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+// Fangfang Bai, fangfang@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other oclMaterials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+#include <iomanip>
+
+#ifdef HAVE_OPENCL
+using namespace cv;
+using namespace cv::ocl;
+using namespace cvtest;
+using namespace testing;
+using namespace std;
+
+PARAM_TEST_CASE(Blend, MatType, int)
+{
+ int type;
+ int channels;
+ std::vector<cv::ocl::Info> oclinfo;
+
+ virtual void SetUp()
+ {
+
+ type = GET_PARAM(0);
+ channels = GET_PARAM(1);
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
+ //cv::ocl::setBinpath(CLBINPATH);
+ }
+};
+
+TEST_P(Blend, Performance)
+{
+ cv::Size size(MWIDTH, MHEIGHT);
+ cv::Mat img1_host = randomMat(size, CV_MAKETYPE(type, channels), 0, type == CV_8U ? 255.0 : 1.0);
+ cv::Mat img2_host = randomMat(size, CV_MAKETYPE(type, channels), 0, type == CV_8U ? 255.0 : 1.0);
+ cv::Mat weights1 = randomMat(size, CV_32F, 0, 1);
+ cv::Mat weights2 = randomMat(size, CV_32F, 0, 1);
+ cv::ocl::oclMat gimg1(size, CV_MAKETYPE(type, channels)), gimg2(size, CV_MAKETYPE(type, channels)), gweights1(size, CV_32F), gweights2(size, CV_32F);
+ cv::ocl::oclMat gdst(size, CV_MAKETYPE(type, channels));
+
+
+ double totalgputick_all = 0;
+ double totalgputick_kernel = 0;
+ double t1 = 0;
+ double t2 = 0;
+
+ for (int j = 0; j < LOOP_TIMES + 1; j ++) //LOOP_TIMES=100
+ {
+ t1 = (double)cvGetTickCount();
+ cv::ocl::oclMat gimg1 = cv::ocl::oclMat(img1_host);
+ cv::ocl::oclMat gimg2 = cv::ocl::oclMat(img2_host);
+ cv::ocl::oclMat gweights1 = cv::ocl::oclMat(weights1);
+ cv::ocl::oclMat gweights2 = cv::ocl::oclMat(weights1);
+
+ t2 = (double)cvGetTickCount();
+ cv::ocl::blendLinear(gimg1, gimg2, gweights1, gweights2, gdst);
+ t2 = (double)cvGetTickCount() - t2;
+
+ cv::Mat m;
+ gdst.download(m);
+ t1 = (double)cvGetTickCount() - t1;
+
+ if (j == 0)
+ {
+ continue;
+ }
+
+ totalgputick_all = t1 + totalgputick_all;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+ };
+
+ cout << "average gpu total runtime is " << totalgputick_all / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+
+ cout << "average gpu runtime without data transfering is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Blend, Combine(
+ Values(CV_8U, CV_32F), Values(1, 4)));
+#endif
\ No newline at end of file
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+// Fangfang Bai, fangfang@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other oclMaterials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+#include <iomanip>
+#ifdef HAVE_OPENCL
+using namespace cv;
+using namespace cv::ocl;
+using namespace cvtest;
+using namespace testing;
+using namespace std;
+
+#define FILTER_IMAGE "../../../samples/gpu/road.png"
+
+#ifndef MWC_TEST_UTILITY
+#define MWC_TEST_UTILITY
+
+// Param class
+#ifndef IMPLEMENT_PARAM_CLASS
+#define IMPLEMENT_PARAM_CLASS(name, type) \
+class name \
+ { \
+ public: \
+ name ( type arg = type ()) : val_(arg) {} \
+ operator type () const {return val_;} \
+ private: \
+ type val_; \
+ }; \
+ inline void PrintTo( name param, std::ostream* os) \
+ { \
+ *os << #name << "(" << testing::PrintToString(static_cast< type >(param)) << ")"; \
+ }
+
+IMPLEMENT_PARAM_CLASS(Channels, int)
+#endif // IMPLEMENT_PARAM_CLASS
+#endif // MWC_TEST_UTILITY
+
+////////////////////////////////////////////////////////
+// Canny1
+
+IMPLEMENT_PARAM_CLASS(AppertureSize, int);
+IMPLEMENT_PARAM_CLASS(L2gradient, bool);
+
+PARAM_TEST_CASE(Canny1, AppertureSize, L2gradient)
+{
+ int apperture_size;
+ bool useL2gradient;
+ //std::vector<cv::ocl::Info> oclinfo;
+
+ virtual void SetUp()
+ {
+ apperture_size = GET_PARAM(0);
+ useL2gradient = GET_PARAM(1);
+
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
+ }
+};
+
+TEST_P(Canny1, Performance)
+{
+ cv::Mat img = readImage(FILTER_IMAGE,cv::IMREAD_GRAYSCALE);
+ ASSERT_FALSE(img.empty());
+
+ double low_thresh = 100.0;
+ double high_thresh = 150.0;
+
+ cv::Mat edges_gold;
+ cv::ocl::oclMat edges;
+
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+
+ double t1=0;
+ double t2=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+
+ t1 = (double)cvGetTickCount();//gpu start1
+
+ cv::ocl::oclMat ocl_img = cv::ocl::oclMat(img);//upload
+
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::Canny(ocl_img, edges, low_thresh, high_thresh, apperture_size, useL2gradient);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+
+ cv::Mat cpu_dst;
+ edges.download (cpu_dst);//download
+
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if(j == 0)
+ continue;
+
+ totalgputick=t1+totalgputick;
+
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+
+
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Canny1, testing::Combine(
+ testing::Values(AppertureSize(3), AppertureSize(5)),
+ testing::Values(L2gradient(false), L2gradient(true))));
+
+
+
+#endif //Have opencl
\ No newline at end of file
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+// Fangfang Bai fangfang@multicorewareinc.com
+//
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other oclMaterials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+#include <iomanip>
+
+using namespace cv;
+using namespace cv::ocl;
+using namespace cvtest;
+using namespace testing;
+using namespace std;
+
+///////////////////////////////////////////////////////////////////////////////
+/// ColumnSum
+
+#ifdef HAVE_OPENCL
+
+////////////////////////////////////////////////////////////////////////
+// ColumnSum
+
+PARAM_TEST_CASE(ColumnSum)
+{
+ cv::Mat src;
+ //std::vector<cv::ocl::Info> oclinfo;
+
+ virtual void SetUp()
+ {
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
+ }
+};
+
+TEST_F(ColumnSum, Performance)
+{
+ cv::Size size(MWIDTH,MHEIGHT);
+ cv::Mat src = randomMat(size, CV_32FC1);
+ cv::ocl::oclMat d_dst;
+
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t1=0;
+ double t2=0;
+
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+
+ t1 = (double)cvGetTickCount();//gpu start1
+
+ cv::ocl::oclMat d_src(src);
+
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::columnSum(d_src,d_dst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+
+ cv::Mat cpu_dst;
+ d_dst.download (cpu_dst);//download
+
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if(j == 0)
+ continue;
+
+ totalgputick=t1+totalgputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+
+
+
+}
+
+
+
+#endif
\ No newline at end of file
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+// Fangfangbai, fangfang@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other oclMaterials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+using namespace std;
+#ifdef HAVE_CLAMDFFT
+////////////////////////////////////////////////////////////////////////////
+// Dft
+PARAM_TEST_CASE(Dft, cv::Size, bool)
+{
+ cv::Size dft_size;
+ bool dft_rows;
+ vector<cv::ocl::Info> info;
+ virtual void SetUp()
+ {
+ dft_size = GET_PARAM(0);
+ dft_rows = GET_PARAM(1);
+ cv::ocl::getDevice(info);
+ }
+};
+
+TEST_P(Dft, C2C)
+{
+ cv::Mat a = randomMat(dft_size, CV_32FC2, 0.0, 10.0);
+ int flags = 0;
+ flags |= dft_rows ? cv::DFT_ROWS : 0;
+
+ cv::ocl::oclMat d_b;
+
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t1=0;
+ double t2=0;
+
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+
+ t1 = (double)cvGetTickCount();//gpu start1
+
+ cv::ocl::oclMat ga=cv::ocl::oclMat(a);//upload
+
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::dft(ga, d_b, a.size(), flags);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+
+ cv::Mat cpu_dst;
+ d_b.download (cpu_dst);//download
+
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if(j == 0)
+ continue;
+
+ totalgputick=t1+totalgputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+}
+
+
+
+TEST_P(Dft, R2CthenC2R)
+{
+ cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 10.0);
+
+ int flags = 0;
+ //flags |= dft_rows ? cv::DFT_ROWS : 0; // not supported yet
+
+ cv::ocl::oclMat d_b, d_c;
+
+ cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags);
+ cv::ocl::dft(d_b, d_c, a.size(), flags + cv::DFT_INVERSE + cv::DFT_REAL_OUTPUT);
+
+ EXPECT_MAT_NEAR(a, d_c, a.size().area() * 1e-4, "");
+}
+
+//INSTANTIATE_TEST_CASE_P(ocl_DFT, Dft, testing::Combine(
+// testing::Values(cv::Size(1280, 1024), cv::Size(1920, 1080),cv::Size(1800, 1500)),
+// testing::Values(false, true)));
+
+#endif // HAVE_CLAMDFFT
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+// Fangfang Bai, fangfang@multicorewareinc.com
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other oclMaterials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+
+#include "precomp.hpp"
+using namespace std;
+#ifdef HAVE_CLAMDBLAS
+////////////////////////////////////////////////////////////////////////////
+// GEMM
+PARAM_TEST_CASE(Gemm, int, cv::Size, int)
+{
+ int type;
+ cv::Size mat_size;
+ int flags;
+ vector<cv::ocl::Info> info;
+ virtual void SetUp()
+ {
+ type = GET_PARAM(0);
+ mat_size = GET_PARAM(1);
+ flags = GET_PARAM(2);
+
+ cv::ocl::getDevice(info);
+ }
+};
+
+TEST_P(Gemm, Performance)
+{
+ cv::Mat a = randomMat(mat_size, type, 0.0, 10.0);
+ cv::Mat b = randomMat(mat_size, type, 0.0, 10.0);
+ cv::Mat c = randomMat(mat_size, type, 0.0, 10.0);
+ cv::ocl::oclMat ocl_dst;
+
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t1=0;
+ double t2=0;
+
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+
+ t1 = (double)cvGetTickCount();//gpu start1
+
+ cv::ocl::oclMat ga = cv::ocl::oclMat(a);//upload
+ cv::ocl::oclMat gb = cv::ocl::oclMat(b);//upload
+ cv::ocl::oclMat gc = cv::ocl::oclMat(c);//upload
+
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::gemm(ga, gb, 1.0,gc, 1.0, ocl_dst, flags);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+
+ cv::Mat cpu_dst;
+ ocl_dst.download (cpu_dst);//download
+
+ t1 = (double)cvGetTickCount() - t1;//gpu end
+
+ if(j == 0)
+ continue;
+
+ totalgputick=t1+totalgputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+}
+
+
+INSTANTIATE_TEST_CASE_P(ocl_gemm, Gemm, testing::Combine(
+ testing::Values(CV_32FC1, CV_32FC2/* , CV_64FC1, CV_64FC2*/),
+ testing::Values(cv::Size(512, 512), cv::Size(1024, 1024)),
+ testing::Values(0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_1_T + cv::GEMM_2_T)));
+#endif
\ No newline at end of file
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// Intel License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+// Fangfang BAI, fangfang@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+#include "opencv2/core/core.hpp"
+#include <iomanip>
+using namespace std;
+
+
+
+#ifdef HAVE_OPENCL
+
+
+PARAM_TEST_CASE(HOG,cv::Size,int)
+{
+ cv::Size winSize;
+ int type;
+ std::vector<cv::ocl::Info> oclinfo;
+
+ virtual void SetUp()
+ {
+ winSize = GET_PARAM(0);
+ type = GET_PARAM(1);
+ int devnums = getDevice(oclinfo);
+ CV_Assert(devnums > 0);
+ }
+};
+
+TEST_P(HOG, GetDescriptors)
+{
+ // Load image
+ cv::Mat img_rgb = readImage("D:road.png");
+ ASSERT_FALSE(img_rgb.empty());
+
+ // Convert image
+ cv::Mat img;
+ switch (type)
+ {
+ case CV_8UC1:
+ cv::cvtColor(img_rgb, img, CV_BGR2GRAY);
+ break;
+ case CV_8UC4:
+ default:
+ cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
+ break;
+ }
+ // HOGs
+ cv::ocl::HOGDescriptor ocl_hog;
+ ocl_hog.gamma_correction = true;
+
+
+ // Compute descriptor
+ cv::ocl::oclMat d_descriptors;
+ //down_descriptors = down_descriptors.reshape(0, down_descriptors.cols * down_descriptors.rows);
+
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t1=0;
+ double t2=0;
+
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+
+ t1 = (double)cvGetTickCount();//gpu start1
+
+ cv::ocl::oclMat d_img=cv::ocl::oclMat(img);//upload
+
+ t2=(double)cvGetTickCount();//kernel
+ ocl_hog.getDescriptors(d_img, ocl_hog.win_size, d_descriptors, ocl_hog.DESCR_FORMAT_COL_BY_COL);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+
+ cv::Mat down_descriptors;
+ d_descriptors.download(down_descriptors);
+
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if(j == 0)
+ continue;
+
+ totalgputick=t1+totalgputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+
+
+}
+
+
+TEST_P(HOG, Detect)
+{
+ // Load image
+ cv::Mat img_rgb = readImage("D:road.png");
+ ASSERT_FALSE(img_rgb.empty());
+
+ // Convert image
+ cv::Mat img;
+ switch (type)
+ {
+ case CV_8UC1:
+ cv::cvtColor(img_rgb, img, CV_BGR2GRAY);
+ break;
+ case CV_8UC4:
+ default:
+ cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
+ break;
+ }
+
+ // HOGs
+ if ((winSize != cv::Size(48, 96)) && (winSize != cv::Size(64, 128)))
+ winSize = cv::Size(64, 128);
+ cv::ocl::HOGDescriptor ocl_hog(winSize);
+ ocl_hog.gamma_correction = true;
+
+ cv::HOGDescriptor hog;
+ hog.winSize = winSize;
+ hog.gammaCorrection = true;
+
+ if (winSize.width == 48 && winSize.height == 96)
+ {
+ // daimler's base
+ ocl_hog.setSVMDetector(ocl_hog.getPeopleDetector48x96());
+ hog.setSVMDetector(hog.getDaimlerPeopleDetector());
+ }
+ else if (winSize.width == 64 && winSize.height == 128)
+ {
+ ocl_hog.setSVMDetector(ocl_hog.getPeopleDetector64x128());
+ hog.setSVMDetector(hog.getDefaultPeopleDetector());
+ }
+ else
+ {
+ ocl_hog.setSVMDetector(ocl_hog.getDefaultPeopleDetector());
+ hog.setSVMDetector(hog.getDefaultPeopleDetector());
+ }
+
+ // OpenCL detection
+ std::vector<cv::Point> d_v_locations;
+
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t1=0;
+ double t2=0;
+
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+
+ t1 = (double)cvGetTickCount();//gpu start1
+
+ cv::ocl::oclMat d_img=cv::ocl::oclMat(img);//upload
+
+ t2=(double)cvGetTickCount();//kernel
+ ocl_hog.detect(d_img, d_v_locations, 0);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+
+}
+
+
+INSTANTIATE_TEST_CASE_P(OCL_ObjDetect, HOG, testing::Combine(
+ testing::Values(cv::Size(64, 128), cv::Size(48, 96)),
+ testing::Values(MatType(CV_8UC1), MatType(CV_8UC4))));
+
+
+#endif //HAVE_OPENCL
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+// Fangfang Bai, fangfang@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other oclMaterials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+#include <iomanip>
+#ifdef HAVE_OPENCL
+using namespace cv;
+using namespace cv::ocl;
+using namespace cvtest;
+using namespace testing;
+using namespace std;
+
+#ifndef MWC_TEST_UTILITY
+#define MWC_TEST_UTILITY
+//////// Utility
+#ifndef DIFFERENT_SIZES
+#else
+#undef DIFFERENT_SIZES
+#endif
+#define DIFFERENT_SIZES testing::Values(cv::Size(256, 256), cv::Size(3000, 3000))
+
+// Param class
+#ifndef IMPLEMENT_PARAM_CLASS
+#define IMPLEMENT_PARAM_CLASS(name, type) \
+class name \
+{ \
+public: \
+ name ( type arg = type ()) : val_(arg) {} \
+ operator type () const {return val_;} \
+private: \
+ type val_; \
+}; \
+ inline void PrintTo( name param, std::ostream* os) \
+{ \
+ *os << #name << "(" << testing::PrintToString(static_cast< type >(param)) << ")"; \
+}
+
+IMPLEMENT_PARAM_CLASS(Channels, int)
+#endif // IMPLEMENT_PARAM_CLASS
+#endif // MWC_TEST_UTILITY
+
+////////////////////////////////////////////////////////////////////////////////
+// MatchTemplate
+#define ALL_TEMPLATE_METHODS testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR), TemplateMethod(cv::TM_CCOEFF), TemplateMethod(cv::TM_SQDIFF_NORMED), TemplateMethod(cv::TM_CCORR_NORMED), TemplateMethod(cv::TM_CCOEFF_NORMED))
+
+IMPLEMENT_PARAM_CLASS(TemplateSize, cv::Size);
+
+const char* TEMPLATE_METHOD_NAMES[6] = {"TM_SQDIFF", "TM_SQDIFF_NORMED", "TM_CCORR", "TM_CCORR_NORMED", "TM_CCOEFF", "TM_CCOEFF_NORMED"};
+
+PARAM_TEST_CASE(MatchTemplate, cv::Size, TemplateSize, Channels, TemplateMethod)
+{
+ cv::Size size;
+ cv::Size templ_size;
+ int cn;
+ int method;
+ //vector<cv::ocl::Info> oclinfo;
+
+ virtual void SetUp()
+ {
+ size = GET_PARAM(0);
+ templ_size = GET_PARAM(1);
+ cn = GET_PARAM(2);
+ method = GET_PARAM(3);
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
+ }
+};
+struct MatchTemplate8U : MatchTemplate {};
+
+TEST_P(MatchTemplate8U, Performance)
+{
+ std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl;
+ std::cout << "Image Size: (" << size.width << ", " << size.height << ")"<< std::endl;
+ std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")"<< std::endl;
+ std::cout << "Channels: " << cn << std::endl;
+
+ cv::Mat image = randomMat(size, CV_MAKETYPE(CV_8U, cn));
+ cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_8U, cn));
+ cv::Mat dst_gold;
+ cv::ocl::oclMat dst;
+
+
+
+
+
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+
+ double t1=0;
+ double t2=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+
+ t1 = (double)cvGetTickCount();//gpu start1
+
+ cv::ocl::oclMat ocl_image = cv::ocl::oclMat(image);//upload
+ cv::ocl::oclMat ocl_templ = cv::ocl::oclMat(templ);//upload
+
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+
+ cv::Mat cpu_dst;
+ dst.download (cpu_dst);//download
+
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if(j == 0)
+ continue;
+
+ totalgputick=t1+totalgputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+
+
+}
+
+
+struct MatchTemplate32F : MatchTemplate {};
+TEST_P(MatchTemplate32F, Performance)
+{
+ std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl;
+ std::cout << "Image Size: (" << size.width << ", " << size.height << ")"<< std::endl;
+ std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")"<< std::endl;
+ std::cout << "Channels: " << cn << std::endl;
+ cv::Mat image = randomMat(size, CV_MAKETYPE(CV_32F, cn));
+ cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_32F, cn));
+
+ cv::Mat dst_gold;
+ cv::ocl::oclMat dst;
+
+
+
+
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+
+ double t1=0;
+ double t2=0;
+ for(int j = 0; j < LOOP_TIMES; j ++)
+ {
+
+ t1 = (double)cvGetTickCount();//gpu start1
+
+ cv::ocl::oclMat ocl_image = cv::ocl::oclMat(image);//upload
+ cv::ocl::oclMat ocl_templ = cv::ocl::oclMat(templ);//upload
+
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+
+ cv::Mat cpu_dst;
+ dst.download (cpu_dst);//download
+
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ totalgputick=t1+totalgputick;
+
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+
+
+
+}
+
+
+INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate8U,
+ testing::Combine(
+ testing::Values(cv::Size(1280, 1024), cv::Size(MWIDTH, MHEIGHT),cv::Size(1800, 1500)),
+ testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/),
+ testing::Values(Channels(1), Channels(4)/*, Channels(3)*/),
+ ALL_TEMPLATE_METHODS
+ )
+);
+
+INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate32F, testing::Combine(
+ testing::Values(cv::Size(1280, 1024), cv::Size(MWIDTH, MHEIGHT),cv::Size(1800, 1500)),
+ testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/),
+ testing::Values(Channels(1), Channels(4) /*, Channels(3)*/),
+ testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR))));
+
+#endif //HAVE_OPENCL
\ No newline at end of file
--- /dev/null
+///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+// fangfang bai, fangfang@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other oclMaterials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+#include <iomanip>
+
+#ifdef HAVE_OPENCL
+
+using namespace cv;
+using namespace cv::ocl;
+using namespace cvtest;
+using namespace testing;
+using namespace std;
+
+PARAM_TEST_CASE(PyrDown, MatType, int)
+{
+ int type;
+ int channels;
+ //src mat
+ cv::Mat mat1;
+ cv::Mat dst;
+
+ //std::vector<cv::ocl::Info> oclinfo;
+ //ocl dst mat for testing
+
+ cv::ocl::oclMat gmat1;
+ cv::ocl::oclMat gdst;
+
+
+ virtual void SetUp()
+ {
+ type = GET_PARAM(0);
+ channels = GET_PARAM(1);
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
+ }
+
+
+};
+
+#define VARNAME(A) string(#A);
+
+////////////////////////////////PyrDown/////////////////////////////////////////////////
+TEST_P(PyrDown, Mat)
+{
+ cv::Size size(MWIDTH, MHEIGHT);
+ cv::RNG &rng = TS::ptr()->get_rng();
+ mat1 = randomMat(rng, size, CV_MAKETYPE(type, channels), 5, 16, false);
+
+
+ cv::ocl::oclMat gdst;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+
+ double t1 = 0;
+ double t2 = 0;
+
+ for (int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+
+ t1 = (double)cvGetTickCount();//gpu start1
+
+ cv::ocl::oclMat gmat1(mat1);
+
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::pyrDown(gmat1, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+
+ cv::Mat cpu_dst;
+ gdst.download(cpu_dst);
+
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if (j == 0)
+ {
+ continue;
+ }
+
+ totalgputick = t1 + totalgputick;
+
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+
+}
+
+//********test****************
+INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrDown, Combine(
+ Values(CV_8U, CV_32F), Values(1, 4)));
+
+
+#endif // HAVE_OPENCL
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+// fangfang bai fangfang@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other oclMaterials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "opencv2/core/core.hpp"
+#include "precomp.hpp"
+#include <iomanip>
+#ifdef HAVE_OPENCL
+using namespace cv;
+using namespace cv::ocl;
+using namespace cvtest;
+using namespace testing;
+using namespace std;
+
+
+PARAM_TEST_CASE(PyrUp, MatType, int)
+{
+ int type;
+ int channels;
+ //std::vector<cv::ocl::Info> oclinfo;
+
+ virtual void SetUp()
+ {
+ type = GET_PARAM(0);
+ channels = GET_PARAM(1);
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
+ }
+};
+
+TEST_P(PyrUp, Performance)
+{
+ cv::Size size(MWIDTH, MHEIGHT);
+ cv::Mat src = randomMat(size, CV_MAKETYPE(type, channels));
+ cv::Mat dst_gold;
+ cv::ocl::oclMat dst;
+
+
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+
+ double t1 = 0;
+ double t2 = 0;
+
+ for (int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ t1 = (double)cvGetTickCount();//gpu start1
+
+ cv::ocl::oclMat srcMat = cv::ocl::oclMat(src);//upload
+
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::pyrUp(srcMat, dst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+
+ cv::Mat cpu_dst;
+ dst.download(cpu_dst); //download
+
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if (j == 0)
+ {
+ continue;
+ }
+
+ totalgputick = t1 + totalgputick;
+
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+
+
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+
+
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrUp, Combine(
+ Values(CV_8U, CV_32F), Values(1, 4)));
+
+#endif // HAVE_OPENCL
\ No newline at end of file
namespace cv
{
- namespace ocl
- {
- ///////////////////////////OpenCL kernel strings///////////////////////////
- extern const char *imgproc_canny;
- }
+ namespace ocl
+ {
+ ///////////////////////////OpenCL kernel strings///////////////////////////
+ extern const char *imgproc_canny;
+ }
}
cv::ocl::CannyBuf::CannyBuf(const oclMat& dx_, const oclMat& dy_) : dx(dx_), dy(dy_)
void cv::ocl::CannyBuf::create(const Size& image_size, int apperture_size)
{
- dx.create(image_size, CV_32SC1);
- dy.create(image_size, CV_32SC1);
-
- if(apperture_size == 3)
- {
- dx_buf.create(image_size, CV_32SC1);
- dy_buf.create(image_size, CV_32SC1);
- }
- else if(apperture_size > 0)
+ dx.create(image_size, CV_32SC1);
+ dy.create(image_size, CV_32SC1);
+
+ if(apperture_size == 3)
+ {
+ dx_buf.create(image_size, CV_32SC1);
+ dy_buf.create(image_size, CV_32SC1);
+ }
+ else if(apperture_size > 0)
{
- Mat kx, ky;
+ Mat kx, ky;
if (!filterDX)
- {
- filterDX = createDerivFilter_GPU(CV_32F, CV_32F, 1, 0, apperture_size, BORDER_REPLICATE);
- }
+ {
+ filterDX = createDerivFilter_GPU(CV_8U, CV_32S, 1, 0, apperture_size, BORDER_REPLICATE);
+ }
if (!filterDY)
- {
- filterDY = createDerivFilter_GPU(CV_32F, CV_32F, 0, 1, apperture_size, BORDER_REPLICATE);
- }
+ {
+ filterDY = createDerivFilter_GPU(CV_8U, CV_32S, 0, 1, apperture_size, BORDER_REPLICATE);
+ }
}
- edgeBuf.create(image_size.height + 2, image_size.width + 2, CV_32FC1);
-
- trackBuf1.create(1, image_size.width * image_size.height, CV_16UC2);
- trackBuf2.create(1, image_size.width * image_size.height, CV_16UC2);
+ edgeBuf.create(image_size.height + 2, image_size.width + 2, CV_32FC1);
- counter.create(1,1, CV_32SC1);
+ trackBuf1.create(1, image_size.width * image_size.height, CV_16UC2);
+ trackBuf2.create(1, image_size.width * image_size.height, CV_16UC2);
+
+ float counter_f [1] = { 0 };
+ int err = 0;
+ counter = clCreateBuffer( Context::getContext()->impl->clContext, CL_MEM_COPY_HOST_PTR, sizeof(float), counter_f, &err );
+ openCLSafeCall(err);
}
void cv::ocl::CannyBuf::release()
edgeBuf.release();
trackBuf1.release();
trackBuf2.release();
- counter.release();
+ openCLFree(counter);
}
namespace cv { namespace ocl {
- namespace canny
- {
+ namespace canny
+ {
void calcSobelRowPass_gpu(const oclMat& src, oclMat& dx_buf, oclMat& dy_buf, int rows, int cols);
void calcMagnitude_gpu(const oclMat& dx_buf, const oclMat& dy_buf, oclMat& dx, oclMat& dy, oclMat& mag, int rows, int cols, bool L2Grad);
void calcMap_gpu(oclMat& dx, oclMat& dy, oclMat& mag, oclMat& map, int rows, int cols, float low_thresh, float high_thresh);
- void edgesHysteresisLocal_gpu(oclMat& map, oclMat& st1, oclMat& counter, int rows, int cols);
+ void edgesHysteresisLocal_gpu(oclMat& map, oclMat& st1, void * counter, int rows, int cols);
- void edgesHysteresisGlobal_gpu(oclMat& map, oclMat& st1, oclMat& st2, oclMat& counter, int rows, int cols);
+ void edgesHysteresisGlobal_gpu(oclMat& map, oclMat& st1, oclMat& st2, void * counter, int rows, int cols);
void getEdges_gpu(oclMat& map, oclMat& dst, int rows, int cols);
- }
+ }
}}// cv::ocl
namespace
std::swap( low_thresh, high_thresh );
dst.create(src.size(), CV_8U);
- dst.setTo(Scalar::all(0));
+ //dst.setTo(Scalar::all(0));
buf.create(src.size(), apperture_size);
- buf.edgeBuf.setTo(Scalar::all(0));
- buf.counter.setTo(Scalar::all(0));
+ //buf.edgeBuf.setTo(Scalar::all(0));
if (apperture_size == 3)
{
}
else
{
- // FIXME:
- // current ocl implementation requires the src and dst having same type
- // convertTo is time consuming so this may be optimized later.
- oclMat src_omat32f = src;
- src.convertTo(src_omat32f, CV_32F); // FIXME
-
- buf.filterDX->apply(src_omat32f, buf.dx);
- buf.filterDY->apply(src_omat32f, buf.dy);
-
- buf.dx.convertTo(buf.dx, CV_32S); // FIXME
- buf.dy.convertTo(buf.dy, CV_32S); // FIXME
+ buf.filterDX->apply(src, buf.dx);
+ buf.filterDY->apply(src, buf.dy);
calcMagnitude_gpu(buf.dx, buf.dy, buf.edgeBuf, src.rows, src.cols, L2gradient);
}
std::swap( low_thresh, high_thresh);
dst.create(dx.size(), CV_8U);
- dst.setTo(Scalar::all(0));
+ //dst.setTo(Scalar::all(0));
buf.dx = dx; buf.dy = dy;
buf.create(dx.size(), -1);
- buf.edgeBuf.setTo(Scalar::all(0));
- buf.counter.setTo(Scalar::all(0));
+ //buf.edgeBuf.setTo(Scalar::all(0));
calcMagnitude_gpu(buf.dx, buf.dy, buf.edgeBuf, dx.rows, dx.cols, L2gradient);
CannyCaller(buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));
void canny::calcSobelRowPass_gpu(const oclMat& src, oclMat& dx_buf, oclMat& dy_buf, int rows, int cols)
{
- Context *clCxt = src.clCxt;
- string kernelName = "calcSobelRowPass";
- vector< pair<size_t, const void *> > args;
-
- args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&dx_buf.data));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&dy_buf.data));
- args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
- args.push_back( make_pair( sizeof(cl_int), (void *)&src.step));
- args.push_back( make_pair( sizeof(cl_int), (void *)&src.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dx_buf.step));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dx_buf.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dy_buf.step));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dy_buf.offset));
-
- size_t globalThreads[3] = {cols, rows, 1};
- size_t localThreads[3] = {16, 16, 1};
- openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
+ Context *clCxt = src.clCxt;
+ string kernelName = "calcSobelRowPass";
+ vector< pair<size_t, const void *> > args;
+
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&dx_buf.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&dy_buf.data));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&src.step));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&src.offset));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&dx_buf.step));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&dx_buf.offset));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&dy_buf.step));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&dy_buf.offset));
+
+ size_t globalThreads[3] = {cols, rows, 1};
+ size_t localThreads[3] = {16, 16, 1};
+ openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
}
void canny::calcMagnitude_gpu(const oclMat& dx_buf, const oclMat& dy_buf, oclMat& dx, oclMat& dy, oclMat& mag, int rows, int cols, bool L2Grad)
{
- Context *clCxt = dx_buf.clCxt;
- string kernelName = "calcMagnitude_buf";
- vector< pair<size_t, const void *> > args;
-
- args.push_back( make_pair( sizeof(cl_mem), (void *)&dx_buf.data));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&dy_buf.data));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&dx.data));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&dy.data));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&mag.data));
- args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dx_buf.step));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dx_buf.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dy_buf.step));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dy_buf.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dx.step));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dx.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dy.step));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dy.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&mag.step));
- args.push_back( make_pair( sizeof(cl_int), (void *)&mag.offset));
-
- size_t globalThreads[3] = {cols, rows, 1};
- size_t localThreads[3] = {16, 16, 1};
-
- char build_options [15] = "";
- if(L2Grad)
- {
- strcat(build_options, "-D L2GRAD");
- }
- openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
+ Context *clCxt = dx_buf.clCxt;
+ string kernelName = "calcMagnitude_buf";
+ vector< pair<size_t, const void *> > args;
+
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&dx_buf.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&dy_buf.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&dx.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&dy.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&mag.data));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&dx_buf.step));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&dx_buf.offset));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&dy_buf.step));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&dy_buf.offset));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&dx.step));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&dx.offset));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&dy.step));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&dy.offset));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&mag.step));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&mag.offset));
+
+ size_t globalThreads[3] = {cols, rows, 1};
+ size_t localThreads[3] = {16, 16, 1};
+
+ char build_options [15] = "";
+ if(L2Grad)
+ {
+ strcat(build_options, "-D L2GRAD");
+ }
+ openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
}
void canny::calcMagnitude_gpu(const oclMat& dx, const oclMat& dy, oclMat& mag, int rows, int cols, bool L2Grad)
{
- Context *clCxt = dx.clCxt;
- string kernelName = "calcMagnitude";
- vector< pair<size_t, const void *> > args;
-
- args.push_back( make_pair( sizeof(cl_mem), (void *)&dx.data));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&dy.data));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&mag.data));
- args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dx.step));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dx.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dy.step));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dy.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&mag.step));
- args.push_back( make_pair( sizeof(cl_int), (void *)&mag.offset));
-
- size_t globalThreads[3] = {cols, rows, 1};
- size_t localThreads[3] = {16, 16, 1};
-
- char build_options [15] = "";
- if(L2Grad)
- {
- strcat(build_options, "-D L2GRAD");
- }
- openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
+ Context *clCxt = dx.clCxt;
+ string kernelName = "calcMagnitude";
+ vector< pair<size_t, const void *> > args;
+
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&dx.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&dy.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&mag.data));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&dx.step));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&dx.offset));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&dy.step));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&dy.offset));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&mag.step));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&mag.offset));
+
+ size_t globalThreads[3] = {cols, rows, 1};
+ size_t localThreads[3] = {16, 16, 1};
+
+ char build_options [15] = "";
+ if(L2Grad)
+ {
+ strcat(build_options, "-D L2GRAD");
+ }
+ openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
}
void canny::calcMap_gpu(oclMat& dx, oclMat& dy, oclMat& mag, oclMat& map, int rows, int cols, float low_thresh, float high_thresh)
{
- Context *clCxt = dx.clCxt;
-
- vector< pair<size_t, const void *> > args;
-
- args.push_back( make_pair( sizeof(cl_mem), (void *)&dx.data));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&dy.data));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&mag.data));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data));
- args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
- args.push_back( make_pair( sizeof(cl_float), (void *)&low_thresh));
- args.push_back( make_pair( sizeof(cl_float), (void *)&high_thresh));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dx.step));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dx.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dy.step));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dy.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&mag.step));
- args.push_back( make_pair( sizeof(cl_int), (void *)&mag.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&map.step));
- args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));
+ Context *clCxt = dx.clCxt;
+
+ vector< pair<size_t, const void *> > args;
+
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&dx.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&dy.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&mag.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
+ args.push_back( make_pair( sizeof(cl_float), (void *)&low_thresh));
+ args.push_back( make_pair( sizeof(cl_float), (void *)&high_thresh));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&dx.step));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&dx.offset));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&dy.step));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&dy.offset));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&mag.step));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&mag.offset));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&map.step));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));
#if CALCMAP_FIXED
- size_t globalThreads[3] = {cols, rows, 1};
- string kernelName = "calcMap";
- size_t localThreads[3] = {16, 16, 1};
+ size_t globalThreads[3] = {cols, rows, 1};
+ string kernelName = "calcMap";
+ size_t localThreads[3] = {16, 16, 1};
#else
- size_t globalThreads[3] = {cols, rows, 1};
- string kernelName = "calcMap_2";
- size_t localThreads[3] = {256, 1, 1};
+ size_t globalThreads[3] = {cols, rows, 1};
+ string kernelName = "calcMap_2";
+ size_t localThreads[3] = {256, 1, 1};
#endif
- openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
+ openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
}
-void canny::edgesHysteresisLocal_gpu(oclMat& map, oclMat& st1, oclMat& counter, int rows, int cols)
+void canny::edgesHysteresisLocal_gpu(oclMat& map, oclMat& st1, void * counter, int rows, int cols)
{
- Context *clCxt = map.clCxt;
- string kernelName = "edgesHysteresisLocal";
- vector< pair<size_t, const void *> > args;
-
- args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&st1.data));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&counter.data));
- args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
- args.push_back( make_pair( sizeof(cl_int), (void *)&map.step));
- args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));
-
- size_t globalThreads[3] = {cols, rows, 1};
- size_t localThreads[3] = {16, 16, 1};
-
- openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
+ Context *clCxt = map.clCxt;
+ string kernelName = "edgesHysteresisLocal";
+ vector< pair<size_t, const void *> > args;
+
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&st1.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&counter));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&map.step));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));
+
+ size_t globalThreads[3] = {cols, rows, 1};
+ size_t localThreads[3] = {16, 16, 1};
+
+ openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
}
-void canny::edgesHysteresisGlobal_gpu(oclMat& map, oclMat& st1, oclMat& st2, oclMat& counter, int rows, int cols)
+void canny::edgesHysteresisGlobal_gpu(oclMat& map, oclMat& st1, oclMat& st2, void * counter, int rows, int cols)
{
- unsigned int count = Mat(counter).at<unsigned int>(0);
-
- Context *clCxt = map.clCxt;
- string kernelName = "edgesHysteresisGlobal";
- vector< pair<size_t, const void *> > args;
- size_t localThreads[3] = {128, 1, 1};
+ unsigned int count;
+ openCLSafeCall(clEnqueueReadBuffer(Context::getContext()->impl->clCmdQueue, (cl_mem)counter, 1, 0, sizeof(float), &count, NULL, NULL, NULL));
+ Context *clCxt = map.clCxt;
+ string kernelName = "edgesHysteresisGlobal";
+ vector< pair<size_t, const void *> > args;
+ size_t localThreads[3] = {128, 1, 1};
#define DIVUP(a, b) ((a)+(b)-1)/(b)
- while(count > 0)
- {
- counter.setTo(0);
- args.clear();
- size_t globalThreads[3] = {std::min(count, 65535u) * 128, DIVUP(count, 65535), 1};
- args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&st1.data));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&st2.data));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&counter.data));
- args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
- args.push_back( make_pair( sizeof(cl_int), (void *)&count));
- args.push_back( make_pair( sizeof(cl_int), (void *)&map.step));
- args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));
-
- openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
- count = Mat(counter).at<unsigned int>(0);
- std::swap(st1, st2);
- }
+ while(count > 0)
+ {
+ //counter.setTo(0);
+ args.clear();
+ size_t globalThreads[3] = {std::min(count, 65535u) * 128, DIVUP(count, 65535), 1};
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&st1.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&st2.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&counter));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&count));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&map.step));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));
+
+ openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
+ openCLSafeCall(clEnqueueReadBuffer(Context::getContext()->impl->clCmdQueue, (cl_mem)counter, 1, 0, sizeof(float), &count, NULL, NULL, NULL));
+ std::swap(st1, st2);
+ }
#undef DIVUP
}
void canny::getEdges_gpu(oclMat& map, oclMat& dst, int rows, int cols)
{
- Context *clCxt = map.clCxt;
- string kernelName = "getEdges";
- vector< pair<size_t, const void *> > args;
-
- args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data));
- args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
- args.push_back( make_pair( sizeof(cl_int), (void *)&map.step));
- args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset));
-
- size_t globalThreads[3] = {cols, rows, 1};
- size_t localThreads[3] = {16, 16, 1};
-
- openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
+ Context *clCxt = map.clCxt;
+ string kernelName = "getEdges";
+ vector< pair<size_t, const void *> > args;
+
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&map.step));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset));
+
+ size_t globalThreads[3] = {cols, rows, 1};
+ size_t localThreads[3] = {16, 16, 1};
+
+ openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
}
#endif // HAVE_OPENCL
void cv::ocl::columnSum(const oclMat& src,oclMat& dst)
{
- CV_Assert(src.type() == CV_32FC1 && dst.type() == CV_32FC1 && src.size() == dst.size());
+ CV_Assert(src.type() == CV_32FC1);
+
+ dst.create(src.size(), src.type());
Context *clCxt = src.clCxt;
float angle_scale, cv::ocl::oclMat& grad, cv::ocl::oclMat& qangle, bool correct_gamma);
void compute_gradients_8UC4(int height, int width, const cv::ocl::oclMat& img,
float angle_scale, cv::ocl::oclMat& grad, cv::ocl::oclMat& qangle, bool correct_gamma);
+
+ void resize( const oclMat &src, oclMat &dst, const Size sz);
}
}}}
cv::Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride);
hog::set_up_constants(nbins, block_stride.width, block_stride.height, blocks_per_win.width, blocks_per_win.height);
+
+ effect_size = Size(0, 0);
}
size_t cv::ocl::HOGDescriptor::getDescriptorSize() const
CV_Assert(checkDetectorSize());
}
-void cv::ocl::HOGDescriptor::computeGradient(const oclMat& img, oclMat& grad, oclMat& qangle)
+void cv::ocl::HOGDescriptor::init_buffer(const oclMat& img, Size win_stride)
{
- CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4);
+ if (!image_scale.empty())
+ return;
- grad.create(img.size(), CV_32FC2);
+ if (effect_size == Size(0, 0))
+ effect_size = img.size();
+ grad.create(img.size(), CV_32FC2);
qangle.create(img.size(), CV_8UC2);
+ const size_t block_hist_size = getBlockHistogramSize();
+ const Size blocks_per_img = numPartsWithin(img.size(), block_size, block_stride);
+ block_hists.create(1, static_cast<int>(block_hist_size * blocks_per_img.area()), CV_32F);
+
+ Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride);
+ labels.create(1, wins_per_img.area(), CV_8U);
+}
+
+void cv::ocl::HOGDescriptor::computeGradient(const oclMat& img, oclMat& grad, oclMat& qangle)
+{
+ CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4);
+
float angleScale = (float)(nbins / CV_PI);
switch (img.type())
{
case CV_8UC1:
- hog::compute_gradients_8UC1(img.rows, img.cols, img, angleScale, grad, qangle, gamma_correction);
+ hog::compute_gradients_8UC1(effect_size.height, effect_size.width, img, angleScale, grad, qangle, gamma_correction);
break;
case CV_8UC4:
- hog::compute_gradients_8UC4(img.rows, img.cols, img, angleScale, grad, qangle, gamma_correction);
+ hog::compute_gradients_8UC4(effect_size.height, effect_size.width, img, angleScale, grad, qangle, gamma_correction);
break;
}
}
{
computeGradient(img, grad, qangle);
- size_t block_hist_size = getBlockHistogramSize();
- Size blocks_per_img = numPartsWithin(img.size(), block_size, block_stride);
-
- block_hists.create(1, static_cast<int>(block_hist_size * blocks_per_img.area()), CV_32F);
-
- hog::compute_hists(nbins, block_stride.width, block_stride.height, img.rows, img.cols, grad, qangle, (float)getWinSigma(), block_hists);
+ hog::compute_hists(nbins, block_stride.width, block_stride.height, effect_size.height, effect_size.width,
+ grad, qangle, (float)getWinSigma(), block_hists);
- hog::normalize_hists(nbins, block_stride.width, block_stride.height, img.rows, img.cols, block_hists, (float)threshold_L2hys);
+ hog::normalize_hists(nbins, block_stride.width, block_stride.height, effect_size.height, effect_size.width,
+ block_hists, (float)threshold_L2hys);
}
{
CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0);
+ init_buffer(img, win_stride);
+
computeBlockHistograms(img);
const size_t block_hist_size = getBlockHistogramSize();
Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride);
- Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride);
+ Size wins_per_img = numPartsWithin(effect_size, win_size, win_stride);
descriptors.create(wins_per_img.area(), static_cast<int>(blocks_per_win.area() * block_hist_size), CV_32F);
{
case DESCR_FORMAT_ROW_BY_ROW:
hog::extract_descrs_by_rows(win_size.height, win_size.width, block_stride.height, block_stride.width,
- win_stride.height, win_stride.width, img.rows, img.cols, block_hists, descriptors);
+ win_stride.height, win_stride.width, effect_size.height, effect_size.width, block_hists, descriptors);
break;
case DESCR_FORMAT_COL_BY_COL:
hog::extract_descrs_by_cols(win_size.height, win_size.width, block_stride.height, block_stride.width,
- win_stride.height, win_stride.width, img.rows, img.cols, block_hists, descriptors);
+ win_stride.height, win_stride.width, effect_size.height, effect_size.width, block_hists, descriptors);
break;
default:
CV_Error(CV_StsBadArg, "Unknown descriptor format");
if (detector.empty())
return;
- computeBlockHistograms(img);
-
if (win_stride == Size())
win_stride = block_stride;
else
CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0);
+ init_buffer(img, win_stride);
- Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride);
- labels.create(1, wins_per_img.area(), CV_8U);
+ computeBlockHistograms(img);
hog::classify_hists(win_size.height, win_size.width, block_stride.height, block_stride.width,
- win_stride.height, win_stride.width, img.rows, img.cols, block_hists,
+ win_stride.height, win_stride.width, effect_size.height, effect_size.width, block_hists,
detector, (float)free_coef, (float)hit_threshold, labels);
labels.download(labels_host);
unsigned char* vec = labels_host.ptr();
+ Size wins_per_img = numPartsWithin(effect_size, win_size, win_stride);
for (int i = 0; i < wins_per_img.area(); i++)
{
int y = i / wins_per_img.width;
Size win_stride, Size padding, double scale0, int group_threshold)
{
CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4);
+ CV_Assert(scale0 > 1);
vector<double> level_scale;
double scale = 1.;
}
levels = std::max(levels, 1);
level_scale.resize(levels);
- image_scales.resize(levels);
std::vector<Rect> all_candidates;
vector<Point> locations;
+ if (win_stride == Size())
+ win_stride = block_stride;
+ else
+ CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0);
+ init_buffer(img, win_stride);
+ image_scale.create(img.size(), img.type());
+
for (size_t i = 0; i < level_scale.size(); i++)
{
scale = level_scale[i];
- Size sz(cvRound(img.cols / scale), cvRound(img.rows / scale));
- oclMat smaller_img;
-
- if (sz == img.size())
- smaller_img = img;
+ effect_size = Size(cvRound(img.cols / scale), cvRound(img.rows / scale));
+ if (effect_size == img.size())
+ {
+ detect(img, locations, hit_threshold, win_stride, padding);
+ }
else
{
- image_scales[i].create(sz, img.type());
- resize(img, image_scales[i], image_scales[i].size(), 0, 0, INTER_LINEAR);
- smaller_img = image_scales[i];
+ hog::resize( img, image_scale, effect_size);
+ detect(image_scale, locations, hit_threshold, win_stride, padding);
}
-
- detect(smaller_img, locations, hit_threshold, win_stride, padding);
Size scaled_win_size(cvRound(win_size.width * scale), cvRound(win_size.height * scale));
for (size_t j = 0; j < locations.size(); j++)
all_candidates.push_back(Rect(Point2d((CvPoint)locations[j]) * scale, scaled_win_size));
openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
}
+void cv::ocl::device::hog::resize( const oclMat &src, oclMat &dst, const Size sz)
+{
+ CV_Assert( (src.channels() == dst.channels()) );
+ Context *clCxt = Context::getContext();
+
+ string kernelName = (src.type() == CV_8UC1) ? "resize_8UC1_kernel" : "resize_8UC4_kernel";
+ size_t blkSizeX = 16, blkSizeY = 16;
+ size_t glbSizeX = sz.width % blkSizeX == 0 ? sz.width : (sz.width / blkSizeX + 1) * blkSizeX;
+ size_t glbSizeY = sz.height % blkSizeY == 0 ? sz.height : (sz.height / blkSizeY + 1) * blkSizeY;
+ size_t globalThreads[3] = {glbSizeX, glbSizeY, 1};
+ size_t localThreads[3] = {blkSizeX, blkSizeY, 1};
+
+ float ifx = (float)src.cols / sz.width;
+ float ify = (float)src.rows / sz.height;
+
+ vector< pair<size_t, const void *> > args;
+ args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data));
+ args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data));
+ args.push_back( make_pair(sizeof(cl_int), (void *)&dst.offset));
+ args.push_back( make_pair(sizeof(cl_int), (void *)&src.offset));
+ args.push_back( make_pair(sizeof(cl_int), (void *)&dst.step));
+ args.push_back( make_pair(sizeof(cl_int), (void *)&src.step));
+ args.push_back( make_pair(sizeof(cl_int), (void *)&src.cols));
+ args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows));
+ args.push_back( make_pair(sizeof(cl_int), (void *)&sz.width));
+ args.push_back( make_pair(sizeof(cl_int), (void *)&sz.height));
+ args.push_back( make_pair(sizeof(cl_float), (void *)&ifx));
+ args.push_back( make_pair(sizeof(cl_float), (void *)&ify));
+
+ openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
+}
+
#endif
}
}
-__kernel void BlendLinear_C3_D0(
- __global uchar *dst,
- __global uchar *img1,
- __global uchar *img2,
- __global float *weight1,
- __global float *weight2,
- int rows,
- int cols,
- int istep,
- int wstep
- )
-{
- int idx = get_global_id(0);
- int idy = get_global_id(1);
- int x = idx / 3;
- int y = idy;
- if (x < cols && y < rows)
- {
- int pos = idy * istep + idx;
- int wpos = idy * (wstep /sizeof(float)) + x;
- float w1 = weight1[wpos];
- float w2 = weight2[wpos];
- dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f);
- }
-}
-
__kernel void BlendLinear_C4_D0(
__global uchar *dst,
__global uchar *img1,
}
}
-__kernel void BlendLinear_C3_D5(
- __global float *dst,
- __global float *img1,
- __global float *img2,
- __global float *weight1,
- __global float *weight2,
- int rows,
- int cols,
- int istep,
- int wstep
- )
-{
- int idx = get_global_id(0);
- int idy = get_global_id(1);
- int x = idx / 3;
- int y = idy;
- if (x < cols && y < rows)
- {
- int pos = idy * (istep / sizeof(float)) + idx;
- int wpos = idy * (wstep /sizeof(float)) + x;
- float w1 = weight1[wpos];
- float w2 = weight2[wpos];
- dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f);
- }
-}
-
__kernel void BlendLinear_C4_D5(
__global float *dst,
__global float *img1,
dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f);
}
}
+
#ifdef L2GRAD
inline float calc(int x, int y)
{
- return sqrt((float)(x * x + y * y));
+ return sqrt((float)(x * x + y * y));
}
#else
inline float calc(int x, int y)
{
- return (float)abs(x) + abs(y);
+ return (float)abs(x) + abs(y);
}
#endif //
// dx_buf output dx buffer
// dy_buf output dy buffer
__kernel
-void calcSobelRowPass
-(
- __global const uchar * src,
- __global int * dx_buf,
- __global int * dy_buf,
- int rows,
- int cols,
- int src_step,
- int src_offset,
- int dx_buf_step,
- int dx_buf_offset,
- int dy_buf_step,
- int dy_buf_offset
-)
+ void calcSobelRowPass
+ (
+ __global const uchar * src,
+ __global int * dx_buf,
+ __global int * dy_buf,
+ int rows,
+ int cols,
+ int src_step,
+ int src_offset,
+ int dx_buf_step,
+ int dx_buf_offset,
+ int dy_buf_step,
+ int dy_buf_offset
+ )
{
- //src_step /= sizeof(*src);
- //src_offset /= sizeof(*src);
- dx_buf_step /= sizeof(*dx_buf);
- dx_buf_offset /= sizeof(*dx_buf);
- dy_buf_step /= sizeof(*dy_buf);
- dy_buf_offset /= sizeof(*dy_buf);
-
- int gidx = get_global_id(0);
- int gidy = get_global_id(1);
-
- int lidx = get_local_id(0);
- int lidy = get_local_id(1);
-
- __local int smem[16][18];
-
- if(gidy < rows)
- {
- smem[lidy][lidx + 1] = src[gidx + gidy * src_step + src_offset];
- if(lidx == 0)
- {
- smem[lidy][0] = src[max(gidx - 1, 0) + gidy * src_step + src_offset];
- smem[lidy][17] = src[min(gidx + 16, cols - 1) + gidy * src_step + src_offset];
- }
- barrier(CLK_LOCAL_MEM_FENCE);
- if(gidx < cols)
- {
- dx_buf[gidx + gidy * dx_buf_step + dx_buf_offset] =
- -smem[lidy][lidx] + smem[lidy][lidx + 2];
- dy_buf[gidx + gidy * dy_buf_step + dy_buf_offset] =
- smem[lidy][lidx] + 2 * smem[lidy][lidx + 1] + smem[lidy][lidx + 2];
- }
- }
+ //src_step /= sizeof(*src);
+ //src_offset /= sizeof(*src);
+ dx_buf_step /= sizeof(*dx_buf);
+ dx_buf_offset /= sizeof(*dx_buf);
+ dy_buf_step /= sizeof(*dy_buf);
+ dy_buf_offset /= sizeof(*dy_buf);
+
+ int gidx = get_global_id(0);
+ int gidy = get_global_id(1);
+
+ int lidx = get_local_id(0);
+ int lidy = get_local_id(1);
+
+ __local int smem[16][18];
+
+ if(gidy < rows)
+ {
+ smem[lidy][lidx + 1] = src[gidx + gidy * src_step + src_offset];
+ if(lidx == 0)
+ {
+ smem[lidy][0] = src[max(gidx - 1, 0) + gidy * src_step + src_offset];
+ smem[lidy][17] = src[min(gidx + 16, cols - 1) + gidy * src_step + src_offset];
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+ if(gidx < cols)
+ {
+ dx_buf[gidx + gidy * dx_buf_step + dx_buf_offset] =
+ -smem[lidy][lidx] + smem[lidy][lidx + 2];
+ dy_buf[gidx + gidy * dy_buf_step + dy_buf_offset] =
+ smem[lidy][lidx] + 2 * smem[lidy][lidx + 1] + smem[lidy][lidx + 2];
+ }
+ }
}
// calculate the magnitude of the filter pass combining both x and y directions
// dy direvitive in y direction output
// mag magnitude direvitive of xy output
__kernel
-void calcMagnitude_buf
-(
- __global const int * dx_buf,
- __global const int * dy_buf,
- __global int * dx,
- __global int * dy,
- __global float * mag,
- int rows,
- int cols,
- int dx_buf_step,
- int dx_buf_offset,
- int dy_buf_step,
- int dy_buf_offset,
- int dx_step,
- int dx_offset,
- int dy_step,
- int dy_offset,
- int mag_step,
- int mag_offset
-)
+ void calcMagnitude_buf
+ (
+ __global const int * dx_buf,
+ __global const int * dy_buf,
+ __global int * dx,
+ __global int * dy,
+ __global float * mag,
+ int rows,
+ int cols,
+ int dx_buf_step,
+ int dx_buf_offset,
+ int dy_buf_step,
+ int dy_buf_offset,
+ int dx_step,
+ int dx_offset,
+ int dy_step,
+ int dy_offset,
+ int mag_step,
+ int mag_offset
+ )
{
- dx_buf_step /= sizeof(*dx_buf);
- dx_buf_offset /= sizeof(*dx_buf);
- dy_buf_step /= sizeof(*dy_buf);
- dy_buf_offset /= sizeof(*dy_buf);
- dx_step /= sizeof(*dx);
- dx_offset /= sizeof(*dx);
- dy_step /= sizeof(*dy);
- dy_offset /= sizeof(*dy);
- mag_step /= sizeof(*mag);
- mag_offset /= sizeof(*mag);
-
- int gidx = get_global_id(0);
- int gidy = get_global_id(1);
-
- int lidx = get_local_id(0);
- int lidy = get_local_id(1);
-
- __local int sdx[18][16];
- __local int sdy[18][16];
-
- if(gidx < cols)
- {
- sdx[lidy + 1][lidx] = dx_buf[gidx + gidy * dx_buf_step + dx_buf_offset];
- sdy[lidy + 1][lidx] = dy_buf[gidx + gidy * dy_buf_step + dy_buf_offset];
- if(lidy == 0)
- {
- sdx[0][lidx] = dx_buf[gidx + max(gidy - 1, 0) * dx_buf_step + dx_buf_offset];
- sdx[17][lidx] = dx_buf[gidx + min(gidy + 16, rows - 1) * dx_buf_step + dx_buf_offset];
-
- sdy[0][lidx] = dy_buf[gidx + max(gidy - 1, 0) * dy_buf_step + dy_buf_offset];
- sdy[17][lidx] = dy_buf[gidx + min(gidy + 16, rows - 1) * dy_buf_step + dy_buf_offset];
- }
- barrier(CLK_LOCAL_MEM_FENCE);
-
- if(gidy < rows)
- {
- int x = sdx[lidy][lidx] + 2 * sdx[lidy + 1][lidx] + sdx[lidy + 2][lidx];
+ dx_buf_step /= sizeof(*dx_buf);
+ dx_buf_offset /= sizeof(*dx_buf);
+ dy_buf_step /= sizeof(*dy_buf);
+ dy_buf_offset /= sizeof(*dy_buf);
+ dx_step /= sizeof(*dx);
+ dx_offset /= sizeof(*dx);
+ dy_step /= sizeof(*dy);
+ dy_offset /= sizeof(*dy);
+ mag_step /= sizeof(*mag);
+ mag_offset /= sizeof(*mag);
+
+ int gidx = get_global_id(0);
+ int gidy = get_global_id(1);
+
+ int lidx = get_local_id(0);
+ int lidy = get_local_id(1);
+
+ __local int sdx[18][16];
+ __local int sdy[18][16];
+
+ if(gidx < cols)
+ {
+ sdx[lidy + 1][lidx] = dx_buf[gidx + gidy * dx_buf_step + dx_buf_offset];
+ sdy[lidy + 1][lidx] = dy_buf[gidx + gidy * dy_buf_step + dy_buf_offset];
+ if(lidy == 0)
+ {
+ sdx[0][lidx] = dx_buf[gidx + max(gidy - 1, 0) * dx_buf_step + dx_buf_offset];
+ sdx[17][lidx] = dx_buf[gidx + min(gidy + 16, rows - 1) * dx_buf_step + dx_buf_offset];
+
+ sdy[0][lidx] = dy_buf[gidx + max(gidy - 1, 0) * dy_buf_step + dy_buf_offset];
+ sdy[17][lidx] = dy_buf[gidx + min(gidy + 16, rows - 1) * dy_buf_step + dy_buf_offset];
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if(gidy < rows)
+ {
+ int x = sdx[lidy][lidx] + 2 * sdx[lidy + 1][lidx] + sdx[lidy + 2][lidx];
int y = -sdy[lidy][lidx] + sdy[lidy + 2][lidx];
-
- dx[gidx + gidy * dx_step + dx_offset] = x;
- dy[gidx + gidy * dy_step + dy_offset] = y;
- mag[(gidx + 1) + (gidy + 1) * mag_step + mag_offset] = calc(x, y);
- }
- }
+ dx[gidx + gidy * dx_step + dx_offset] = x;
+ dy[gidx + gidy * dy_step + dy_offset] = y;
+
+ mag[(gidx + 1) + (gidy + 1) * mag_step + mag_offset] = calc(x, y);
+ }
+ }
}
// calculate the magnitude of the filter pass combining both x and y directions
// dy direvitive in y direction output
// mag magnitude direvitive of xy output
__kernel
-void calcMagnitude
-(
- __global const int * dx,
- __global const int * dy,
- __global float * mag,
- int rows,
- int cols,
- int dx_step,
- int dx_offset,
- int dy_step,
- int dy_offset,
- int mag_step,
- int mag_offset
-)
+ void calcMagnitude
+ (
+ __global const int * dx,
+ __global const int * dy,
+ __global float * mag,
+ int rows,
+ int cols,
+ int dx_step,
+ int dx_offset,
+ int dy_step,
+ int dy_offset,
+ int mag_step,
+ int mag_offset
+ )
{
- dx_step /= sizeof(*dx);
- dx_offset /= sizeof(*dx);
- dy_step /= sizeof(*dy);
- dy_offset /= sizeof(*dy);
- mag_step /= sizeof(*mag);
- mag_offset /= sizeof(*mag);
-
- int gidx = get_global_id(0);
- int gidy = get_global_id(1);
-
- if(gidy < rows && gidx < cols)
- {
- mag[(gidx + 1) + (gidy + 1) * mag_step + mag_offset] =
- calc(
- dx[gidx + gidy * dx_step + dx_offset],
- dy[gidx + gidy * dy_step + dy_offset]
- );
- }
+ dx_step /= sizeof(*dx);
+ dx_offset /= sizeof(*dx);
+ dy_step /= sizeof(*dy);
+ dy_offset /= sizeof(*dy);
+ mag_step /= sizeof(*mag);
+ mag_offset /= sizeof(*mag);
+
+ int gidx = get_global_id(0);
+ int gidy = get_global_id(1);
+
+ if(gidy < rows && gidx < cols)
+ {
+ mag[(gidx + 1) + (gidy + 1) * mag_step + mag_offset] =
+ calc(
+ dx[gidx + gidy * dx_step + dx_offset],
+ dy[gidx + gidy * dy_step + dy_offset]
+ );
+ }
}
//////////////////////////////////////////////////////////////////////////////////////////
// mag magnitudes calculated from calcMagnitude function
// map output containing raw edge types
__kernel
-void calcMap
-(
- __global const int * dx,
- __global const int * dy,
- __global const float * mag,
- __global int * map,
- int rows,
- int cols,
- float low_thresh,
- float high_thresh,
- int dx_step,
- int dx_offset,
- int dy_step,
- int dy_offset,
- int mag_step,
- int mag_offset,
- int map_step,
- int map_offset
-)
+ void calcMap
+ (
+ __global const int * dx,
+ __global const int * dy,
+ __global const float * mag,
+ __global int * map,
+ int rows,
+ int cols,
+ float low_thresh,
+ float high_thresh,
+ int dx_step,
+ int dx_offset,
+ int dy_step,
+ int dy_offset,
+ int mag_step,
+ int mag_offset,
+ int map_step,
+ int map_offset
+ )
{
- dx_step /= sizeof(*dx);
- dx_offset /= sizeof(*dx);
- dy_step /= sizeof(*dy);
- dy_offset /= sizeof(*dy);
- mag_step /= sizeof(*mag);
- mag_offset /= sizeof(*mag);
- map_step /= sizeof(*map);
- map_offset /= sizeof(*map);
-
- __local float smem[18][18];
-
- int gidx = get_global_id(0);
- int gidy = get_global_id(1);
-
- int lidx = get_local_id(0);
- int lidy = get_local_id(1);
-
- int grp_idx = get_global_id(0) & 0xFFFFF0;
- int grp_idy = get_global_id(1) & 0xFFFFF0;
-
- int tid = lidx + lidy * 16;
- int lx = tid % 18;
- int ly = tid / 18;
- if(ly < 14)
- {
- smem[ly][lx] = mag[grp_idx + lx + (grp_idy + ly) * mag_step];
- }
- if(ly < 4 && grp_idy + ly + 14 <= rows && grp_idx + lx <= cols)
- {
- smem[ly + 14][lx] = mag[grp_idx + lx + (grp_idy + ly + 14) * mag_step];
- }
-
- barrier(CLK_LOCAL_MEM_FENCE);
-
- if(gidy < rows && gidx < cols)
- {
- int x = dx[gidx + gidy * dx_step];
- int y = dy[gidx + gidy * dy_step];
- const int s = (x ^ y) < 0 ? -1 : 1;
- const float m = smem[lidy + 1][lidx + 1];
- x = abs(x);
- y = abs(y);
-
- // 0 - the pixel can not belong to an edge
- // 1 - the pixel might belong to an edge
- // 2 - the pixel does belong to an edge
- int edge_type = 0;
- if(m > low_thresh)
- {
- const int tg22x = x * TG22;
- const int tg67x = tg22x + (x << (1 + CANNY_SHIFT));
- y <<= CANNY_SHIFT;
- if(y < tg22x)
- {
- if(m > smem[lidy + 1][lidx] && m >= smem[lidy + 1][lidx + 2])
- {
- edge_type = 1 + (int)(m > high_thresh);
- }
- }
- else if (y > tg67x)
- {
- if(m > smem[lidy][lidx + 1]&& m >= smem[lidy + 2][lidx + 1])
- {
- edge_type = 1 + (int)(m > high_thresh);
- }
- }
- else
- {
- if(m > smem[lidy][lidx + 1 - s]&& m > smem[lidy + 2][lidx + 1 + s])
- {
- edge_type = 1 + (int)(m > high_thresh);
- }
- }
- }
- map[gidx + 1 + (gidy + 1) * map_step] = edge_type;
- }
+ dx_step /= sizeof(*dx);
+ dx_offset /= sizeof(*dx);
+ dy_step /= sizeof(*dy);
+ dy_offset /= sizeof(*dy);
+ mag_step /= sizeof(*mag);
+ mag_offset /= sizeof(*mag);
+ map_step /= sizeof(*map);
+ map_offset /= sizeof(*map);
+
+ __local float smem[18][18];
+
+ int gidx = get_global_id(0);
+ int gidy = get_global_id(1);
+
+ int lidx = get_local_id(0);
+ int lidy = get_local_id(1);
+
+ int grp_idx = get_global_id(0) & 0xFFFFF0;
+ int grp_idy = get_global_id(1) & 0xFFFFF0;
+
+ int tid = lidx + lidy * 16;
+ int lx = tid % 18;
+ int ly = tid / 18;
+ if(ly < 14)
+ {
+ smem[ly][lx] = mag[grp_idx + lx + (grp_idy + ly) * mag_step];
+ }
+ if(ly < 4 && grp_idy + ly + 14 <= rows && grp_idx + lx <= cols)
+ {
+ smem[ly + 14][lx] = mag[grp_idx + lx + (grp_idy + ly + 14) * mag_step];
+ }
+
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if(gidy < rows && gidx < cols)
+ {
+ int x = dx[gidx + gidy * dx_step];
+ int y = dy[gidx + gidy * dy_step];
+ const int s = (x ^ y) < 0 ? -1 : 1;
+ const float m = smem[lidy + 1][lidx + 1];
+ x = abs(x);
+ y = abs(y);
+
+ // 0 - the pixel can not belong to an edge
+ // 1 - the pixel might belong to an edge
+ // 2 - the pixel does belong to an edge
+ int edge_type = 0;
+ if(m > low_thresh)
+ {
+ const int tg22x = x * TG22;
+ const int tg67x = tg22x + (x << (1 + CANNY_SHIFT));
+ y <<= CANNY_SHIFT;
+ if(y < tg22x)
+ {
+ if(m > smem[lidy + 1][lidx] && m >= smem[lidy + 1][lidx + 2])
+ {
+ edge_type = 1 + (int)(m > high_thresh);
+ }
+ }
+ else if (y > tg67x)
+ {
+ if(m > smem[lidy][lidx + 1]&& m >= smem[lidy + 2][lidx + 1])
+ {
+ edge_type = 1 + (int)(m > high_thresh);
+ }
+ }
+ else
+ {
+ if(m > smem[lidy][lidx + 1 - s]&& m > smem[lidy + 2][lidx + 1 + s])
+ {
+ edge_type = 1 + (int)(m > high_thresh);
+ }
+ }
+ }
+ map[gidx + 1 + (gidy + 1) * map_step] = edge_type;
+ }
}
// non local memory version
__kernel
-void calcMap_2
-(
- __global const int * dx,
- __global const int * dy,
- __global const float * mag,
- __global int * map,
- int rows,
- int cols,
- float low_thresh,
- float high_thresh,
- int dx_step,
- int dx_offset,
- int dy_step,
- int dy_offset,
- int mag_step,
- int mag_offset,
- int map_step,
- int map_offset
-)
+ void calcMap_2
+ (
+ __global const int * dx,
+ __global const int * dy,
+ __global const float * mag,
+ __global int * map,
+ int rows,
+ int cols,
+ float low_thresh,
+ float high_thresh,
+ int dx_step,
+ int dx_offset,
+ int dy_step,
+ int dy_offset,
+ int mag_step,
+ int mag_offset,
+ int map_step,
+ int map_offset
+ )
{
- dx_step /= sizeof(*dx);
- dx_offset /= sizeof(*dx);
- dy_step /= sizeof(*dy);
- dy_offset /= sizeof(*dy);
- mag_step /= sizeof(*mag);
- mag_offset /= sizeof(*mag);
- map_step /= sizeof(*map);
- map_offset /= sizeof(*map);
-
-
- int gidx = get_global_id(0);
- int gidy = get_global_id(1);
-
- if(gidy < rows && gidx < cols)
- {
- int x = dx[gidx + gidy * dx_step];
- int y = dy[gidx + gidy * dy_step];
- const int s = (x ^ y) < 0 ? -1 : 1;
- const float m = mag[gidx + 1 + (gidy + 1) * mag_step];
- x = abs(x);
- y = abs(y);
-
- // 0 - the pixel can not belong to an edge
- // 1 - the pixel might belong to an edge
- // 2 - the pixel does belong to an edge
- int edge_type = 0;
- if(m > low_thresh)
- {
- const int tg22x = x * TG22;
- const int tg67x = tg22x + (x << (1 + CANNY_SHIFT));
- y <<= CANNY_SHIFT;
- if(y < tg22x)
- {
- if(m > mag[gidx + (gidy + 1) * mag_step] && m >= mag[gidx + 2 + (gidy + 1) * mag_step])
- {
- edge_type = 1 + (int)(m > high_thresh);
- }
- }
- else if (y > tg67x)
- {
- if(m > mag[gidx + 1 + gidy* mag_step] && m >= mag[gidx + 1 + (gidy + 2) * mag_step])
- {
- edge_type = 1 + (int)(m > high_thresh);
- }
- }
- else
- {
- if(m > mag[gidx + 1 - s + gidy * mag_step] && m > mag[gidx + 1 + s + (gidy + 2) * mag_step])
- {
- edge_type = 1 + (int)(m > high_thresh);
- }
- }
- }
- map[gidx + 1 + (gidy + 1) * map_step] = edge_type;
- }
+ dx_step /= sizeof(*dx);
+ dx_offset /= sizeof(*dx);
+ dy_step /= sizeof(*dy);
+ dy_offset /= sizeof(*dy);
+ mag_step /= sizeof(*mag);
+ mag_offset /= sizeof(*mag);
+ map_step /= sizeof(*map);
+ map_offset /= sizeof(*map);
+
+
+ int gidx = get_global_id(0);
+ int gidy = get_global_id(1);
+
+ if(gidy < rows && gidx < cols)
+ {
+ int x = dx[gidx + gidy * dx_step];
+ int y = dy[gidx + gidy * dy_step];
+ const int s = (x ^ y) < 0 ? -1 : 1;
+ const float m = mag[gidx + 1 + (gidy + 1) * mag_step];
+ x = abs(x);
+ y = abs(y);
+
+ // 0 - the pixel can not belong to an edge
+ // 1 - the pixel might belong to an edge
+ // 2 - the pixel does belong to an edge
+ int edge_type = 0;
+ if(m > low_thresh)
+ {
+ const int tg22x = x * TG22;
+ const int tg67x = tg22x + (x << (1 + CANNY_SHIFT));
+ y <<= CANNY_SHIFT;
+ if(y < tg22x)
+ {
+ if(m > mag[gidx + (gidy + 1) * mag_step] && m >= mag[gidx + 2 + (gidy + 1) * mag_step])
+ {
+ edge_type = 1 + (int)(m > high_thresh);
+ }
+ }
+ else if (y > tg67x)
+ {
+ if(m > mag[gidx + 1 + gidy* mag_step] && m >= mag[gidx + 1 + (gidy + 2) * mag_step])
+ {
+ edge_type = 1 + (int)(m > high_thresh);
+ }
+ }
+ else
+ {
+ if(m > mag[gidx + 1 - s + gidy * mag_step] && m > mag[gidx + 1 + s + (gidy + 2) * mag_step])
+ {
+ edge_type = 1 + (int)(m > high_thresh);
+ }
+ }
+ }
+ map[gidx + 1 + (gidy + 1) * map_step] = edge_type;
+ }
}
// [256, 1, 1] threaded, local memory version
__kernel
-void calcMap_3
-(
- __global const int * dx,
- __global const int * dy,
- __global const float * mag,
- __global int * map,
- int rows,
- int cols,
- float low_thresh,
- float high_thresh,
- int dx_step,
- int dx_offset,
- int dy_step,
- int dy_offset,
- int mag_step,
- int mag_offset,
- int map_step,
- int map_offset
-)
+ void calcMap_3
+ (
+ __global const int * dx,
+ __global const int * dy,
+ __global const float * mag,
+ __global int * map,
+ int rows,
+ int cols,
+ float low_thresh,
+ float high_thresh,
+ int dx_step,
+ int dx_offset,
+ int dy_step,
+ int dy_offset,
+ int mag_step,
+ int mag_offset,
+ int map_step,
+ int map_offset
+ )
{
- dx_step /= sizeof(*dx);
- dx_offset /= sizeof(*dx);
- dy_step /= sizeof(*dy);
- dy_offset /= sizeof(*dy);
- mag_step /= sizeof(*mag);
- mag_offset /= sizeof(*mag);
- map_step /= sizeof(*map);
- map_offset /= sizeof(*map);
-
- __local float smem[18][18];
-
- int lidx = get_local_id(0) % 16;
- int lidy = get_local_id(0) / 16;
-
- int grp_pix = get_global_id(0); // identifies which pixel is processing currently in the target block
- int grp_ind = get_global_id(1); // identifies which block of pixels is currently processing
-
- int grp_idx = (grp_ind % (cols/16)) * 16;
- int grp_idy = (grp_ind / (cols/16)) * 16; //(grp_ind / (cols/16)) * 16
-
- int gidx = grp_idx + lidx;
- int gidy = grp_idy + lidy;
-
- int tid = get_global_id(0) % 256;
- int lx = tid % 18;
- int ly = tid / 18;
- if(ly < 14)
- {
- smem[ly][lx] = mag[grp_idx + lx + (grp_idy + ly) * mag_step];
- }
- if(ly < 4 && grp_idy + ly + 14 <= rows && grp_idx + lx <= cols)
- {
- smem[ly + 14][lx] = mag[grp_idx + lx + (grp_idy + ly + 14) * mag_step];
- }
-
- barrier(CLK_LOCAL_MEM_FENCE);
-
- if(gidy < rows && gidx < cols)
- {
- int x = dx[gidx + gidy * dx_step];
- int y = dy[gidx + gidy * dy_step];
- const int s = (x ^ y) < 0 ? -1 : 1;
- const float m = smem[lidy + 1][lidx + 1];
- x = abs(x);
- y = abs(y);
-
- // 0 - the pixel can not belong to an edge
- // 1 - the pixel might belong to an edge
- // 2 - the pixel does belong to an edge
- int edge_type = 0;
- if(m > low_thresh)
- {
- const int tg22x = x * TG22;
- const int tg67x = tg22x + (x << (1 + CANNY_SHIFT));
- y <<= CANNY_SHIFT;
- if(y < tg22x)
- {
- if(m > smem[lidy + 1][lidx] && m >= smem[lidy + 1][lidx + 2])
- {
- edge_type = 1 + (int)(m > high_thresh);
- }
- }
- else if (y > tg67x)
- {
- if(m > smem[lidy][lidx + 1]&& m >= smem[lidy + 2][lidx + 1])
- {
- edge_type = 1 + (int)(m > high_thresh);
- }
- }
- else
- {
- if(m > smem[lidy][lidx + 1 - s]&& m > smem[lidy + 2][lidx + 1 + s])
- {
- edge_type = 1 + (int)(m > high_thresh);
- }
- }
- }
- map[gidx + 1 + (gidy + 1) * map_step] = edge_type;
- }
+ dx_step /= sizeof(*dx);
+ dx_offset /= sizeof(*dx);
+ dy_step /= sizeof(*dy);
+ dy_offset /= sizeof(*dy);
+ mag_step /= sizeof(*mag);
+ mag_offset /= sizeof(*mag);
+ map_step /= sizeof(*map);
+ map_offset /= sizeof(*map);
+
+ __local float smem[18][18];
+
+ int lidx = get_local_id(0) % 16;
+ int lidy = get_local_id(0) / 16;
+
+ int grp_pix = get_global_id(0); // identifies which pixel is processing currently in the target block
+ int grp_ind = get_global_id(1); // identifies which block of pixels is currently processing
+
+ int grp_idx = (grp_ind % (cols/16)) * 16;
+ int grp_idy = (grp_ind / (cols/16)) * 16; //(grp_ind / (cols/16)) * 16
+
+ int gidx = grp_idx + lidx;
+ int gidy = grp_idy + lidy;
+
+ int tid = get_global_id(0) % 256;
+ int lx = tid % 18;
+ int ly = tid / 18;
+ if(ly < 14)
+ {
+ smem[ly][lx] = mag[grp_idx + lx + (grp_idy + ly) * mag_step];
+ }
+ if(ly < 4 && grp_idy + ly + 14 <= rows && grp_idx + lx <= cols)
+ {
+ smem[ly + 14][lx] = mag[grp_idx + lx + (grp_idy + ly + 14) * mag_step];
+ }
+
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if(gidy < rows && gidx < cols)
+ {
+ int x = dx[gidx + gidy * dx_step];
+ int y = dy[gidx + gidy * dy_step];
+ const int s = (x ^ y) < 0 ? -1 : 1;
+ const float m = smem[lidy + 1][lidx + 1];
+ x = abs(x);
+ y = abs(y);
+
+ // 0 - the pixel can not belong to an edge
+ // 1 - the pixel might belong to an edge
+ // 2 - the pixel does belong to an edge
+ int edge_type = 0;
+ if(m > low_thresh)
+ {
+ const int tg22x = x * TG22;
+ const int tg67x = tg22x + (x << (1 + CANNY_SHIFT));
+ y <<= CANNY_SHIFT;
+ if(y < tg22x)
+ {
+ if(m > smem[lidy + 1][lidx] && m >= smem[lidy + 1][lidx + 2])
+ {
+ edge_type = 1 + (int)(m > high_thresh);
+ }
+ }
+ else if (y > tg67x)
+ {
+ if(m > smem[lidy][lidx + 1]&& m >= smem[lidy + 2][lidx + 1])
+ {
+ edge_type = 1 + (int)(m > high_thresh);
+ }
+ }
+ else
+ {
+ if(m > smem[lidy][lidx + 1 - s]&& m > smem[lidy + 2][lidx + 1 + s])
+ {
+ edge_type = 1 + (int)(m > high_thresh);
+ }
+ }
+ }
+ map[gidx + 1 + (gidy + 1) * map_step] = edge_type;
+ }
}
#undef CANNY_SHIFT
// st the potiential edge points found in this kernel call
// counter the number of potiential edge points
__kernel
-void edgesHysteresisLocal
-(
- __global int * map,
- __global ushort2 * st,
- volatile __global unsigned int * counter,
- int rows,
- int cols,
- int map_step,
- int map_offset
-)
+ void edgesHysteresisLocal
+ (
+ __global int * map,
+ __global ushort2 * st,
+ volatile __global unsigned int * counter,
+ int rows,
+ int cols,
+ int map_step,
+ int map_offset
+ )
{
- map_step /= sizeof(*map);
- map_offset /= sizeof(*map);
-
- __local int smem[18][18];
-
- int gidx = get_global_id(0);
- int gidy = get_global_id(1);
-
- int lidx = get_local_id(0);
- int lidy = get_local_id(1);
-
- int grp_idx = get_global_id(0) & 0xFFFFF0;
- int grp_idy = get_global_id(1) & 0xFFFFF0;
-
- int tid = lidx + lidy * 16;
- int lx = tid % 18;
- int ly = tid / 18;
- if(ly < 14)
- {
- smem[ly][lx] = map[grp_idx + lx + (grp_idy + ly) * map_step + map_offset];
- }
- if(ly < 4 && grp_idy + ly + 14 <= rows && grp_idx + lx <= cols)
- {
- smem[ly + 14][lx] = map[grp_idx + lx + (grp_idy + ly + 14) * map_step + map_offset];
- }
-
- barrier(CLK_LOCAL_MEM_FENCE);
-
- if(gidy < rows && gidx < cols)
- {
- int n;
-
- #pragma unroll
- for (int k = 0; k < 16; ++k)
- {
- n = 0;
-
- if (smem[lidy + 1][lidx + 1] == 1)
- {
- n += smem[lidy ][lidx ] == 2;
- n += smem[lidy ][lidx + 1] == 2;
- n += smem[lidy ][lidx + 2] == 2;
-
- n += smem[lidy + 1][lidx ] == 2;
- n += smem[lidy + 1][lidx + 2] == 2;
-
- n += smem[lidy + 2][lidx ] == 2;
- n += smem[lidy + 2][lidx + 1] == 2;
- n += smem[lidy + 2][lidx + 2] == 2;
- }
-
- if (n > 0)
- smem[lidy + 1][lidx + 1] = 2;
- }
-
- const int e = smem[lidy + 1][lidx + 1];
- map[gidx + 1 + (gidy + 1) * map_step] = e;
-
- n = 0;
- if(e == 2)
- {
- n += smem[lidy ][lidx ] == 1;
- n += smem[lidy ][lidx + 1] == 1;
- n += smem[lidy ][lidx + 2] == 1;
-
- n += smem[lidy + 1][lidx ] == 1;
- n += smem[lidy + 1][lidx + 2] == 1;
-
- n += smem[lidy + 2][lidx ] == 1;
- n += smem[lidy + 2][lidx + 1] == 1;
- n += smem[lidy + 2][lidx + 2] == 1;
- }
-
- if(n > 0)
- {
- unsigned int ind = atomic_inc(counter);
- st[ind] = (ushort2)(gidx + 1, gidy + 1);
- }
- }
+ map_step /= sizeof(*map);
+ map_offset /= sizeof(*map);
+
+ __local int smem[18][18];
+
+ int gidx = get_global_id(0);
+ int gidy = get_global_id(1);
+
+ int lidx = get_local_id(0);
+ int lidy = get_local_id(1);
+
+ int grp_idx = get_global_id(0) & 0xFFFFF0;
+ int grp_idy = get_global_id(1) & 0xFFFFF0;
+
+ int tid = lidx + lidy * 16;
+ int lx = tid % 18;
+ int ly = tid / 18;
+ if(ly < 14)
+ {
+ smem[ly][lx] = map[grp_idx + lx + (grp_idy + ly) * map_step + map_offset];
+ }
+ if(ly < 4 && grp_idy + ly + 14 <= rows && grp_idx + lx <= cols)
+ {
+ smem[ly + 14][lx] = map[grp_idx + lx + (grp_idy + ly + 14) * map_step + map_offset];
+ }
+
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if(gidy < rows && gidx < cols)
+ {
+ int n;
+
+#pragma unroll
+ for (int k = 0; k < 16; ++k)
+ {
+ n = 0;
+
+ if (smem[lidy + 1][lidx + 1] == 1)
+ {
+ n += smem[lidy ][lidx ] == 2;
+ n += smem[lidy ][lidx + 1] == 2;
+ n += smem[lidy ][lidx + 2] == 2;
+
+ n += smem[lidy + 1][lidx ] == 2;
+ n += smem[lidy + 1][lidx + 2] == 2;
+
+ n += smem[lidy + 2][lidx ] == 2;
+ n += smem[lidy + 2][lidx + 1] == 2;
+ n += smem[lidy + 2][lidx + 2] == 2;
+ }
+
+ if (n > 0)
+ smem[lidy + 1][lidx + 1] = 2;
+ }
+
+ const int e = smem[lidy + 1][lidx + 1];
+ map[gidx + 1 + (gidy + 1) * map_step] = e;
+
+ n = 0;
+ if(e == 2)
+ {
+ n += smem[lidy ][lidx ] == 1;
+ n += smem[lidy ][lidx + 1] == 1;
+ n += smem[lidy ][lidx + 2] == 1;
+
+ n += smem[lidy + 1][lidx ] == 1;
+ n += smem[lidy + 1][lidx + 2] == 1;
+
+ n += smem[lidy + 2][lidx ] == 1;
+ n += smem[lidy + 2][lidx + 1] == 1;
+ n += smem[lidy + 2][lidx + 2] == 1;
+ }
+
+ if(n > 0)
+ {
+ unsigned int ind = atomic_inc(counter);
+ st[ind] = (ushort2)(gidx + 1, gidy + 1);
+ }
+ }
}
__constant int c_dx[8] = {-1, 0, 1, -1, 1, -1, 0, 1};
#define stack_size 512
__kernel
-void edgesHysteresisGlobal
-(
- __global int * map,
- __global ushort2 * st1,
- __global ushort2 * st2,
- volatile __global int * counter,
- int rows,
- int cols,
- int count,
- int map_step,
- int map_offset
-)
+ void edgesHysteresisGlobal
+ (
+ __global int * map,
+ __global ushort2 * st1,
+ __global ushort2 * st2,
+ volatile __global int * counter,
+ int rows,
+ int cols,
+ int count,
+ int map_step,
+ int map_offset
+ )
{
- map_step /= sizeof(*map);
- map_offset /= sizeof(*map);
-
- int gidx = get_global_id(0);
- int gidy = get_global_id(1);
-
- int lidx = get_local_id(0);
- int lidy = get_local_id(1);
-
- int grp_idx = get_group_id(0);
- int grp_idy = get_group_id(1);
-
- volatile __local unsigned int s_counter;
- __local unsigned int s_ind;
-
- __local ushort2 s_st[stack_size];
-
- if(lidx == 0)
- {
- s_counter = 0;
- }
- barrier(CLK_LOCAL_MEM_FENCE);
-
- int ind = grp_idy * get_num_groups(0) + grp_idx;
-
- if(ind < count)
- {
- ushort2 pos = st1[ind];
- if (pos.x > 0 && pos.x <= cols && pos.y > 0 && pos.y <= rows)
- {
- if (lidx < 8)
- {
- pos.x += c_dx[lidx];
- pos.y += c_dy[lidx];
-
- if (map[pos.x + pos.y * map_step] == 1)
- {
- map[pos.x + pos.y * map_step] = 2;
-
- ind = atomic_inc(&s_counter);
-
- s_st[ind] = pos;
- }
- }
- barrier(CLK_LOCAL_MEM_FENCE);
-
- while (s_counter > 0 && s_counter <= stack_size - get_num_groups(0))
- {
- const int subTaskIdx = lidx >> 3;
- const int portion = min(s_counter, get_num_groups(0) >> 3);
-
- pos.x = pos.y = 0;
-
- if (subTaskIdx < portion)
- pos = s_st[s_counter - 1 - subTaskIdx];
- barrier(CLK_LOCAL_MEM_FENCE);
-
- if (lidx == 0)
- s_counter -= portion;
- barrier(CLK_LOCAL_MEM_FENCE);
-
- if (pos.x > 0 && pos.x <= cols && pos.y > 0 && pos.y <= rows)
- {
- pos.x += c_dx[lidx & 7];
- pos.y += c_dy[lidx & 7];
-
- if (map[pos.x + map_offset + pos.y * map_step] == 1)
- {
- map[pos.x + map_offset + pos.y * map_step] = 2;
-
- ind = atomic_inc(&s_counter);
-
- s_st[ind] = pos;
- }
- }
- barrier(CLK_LOCAL_MEM_FENCE);
- }
-
- if (s_counter > 0)
- {
- if (lidx == 0)
- {
- ind = atomic_add(counter, s_counter);
- s_ind = ind - s_counter;
- }
- barrier(CLK_LOCAL_MEM_FENCE);
-
- ind = s_ind;
-
- for (int i = lidx; i < s_counter; i += get_num_groups(0))
- {
- st2[ind + i] = s_st[i];
- }
- }
- }
- }
+ map_step /= sizeof(*map);
+ map_offset /= sizeof(*map);
+
+ int gidx = get_global_id(0);
+ int gidy = get_global_id(1);
+
+ int lidx = get_local_id(0);
+ int lidy = get_local_id(1);
+
+ int grp_idx = get_group_id(0);
+ int grp_idy = get_group_id(1);
+
+ volatile __local unsigned int s_counter;
+ __local unsigned int s_ind;
+
+ __local ushort2 s_st[stack_size];
+
+ if(gidx + gidy == 0)
+ {
+ *counter = 0;
+ }
+ barrier(CLK_GLOBAL_MEM_FENCE);
+
+ if(lidx == 0)
+ {
+ s_counter = 0;
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ int ind = grp_idy * get_num_groups(0) + grp_idx;
+
+ if(ind < count)
+ {
+ ushort2 pos = st1[ind];
+ if (pos.x > 0 && pos.x <= cols && pos.y > 0 && pos.y <= rows)
+ {
+ if (lidx < 8)
+ {
+ pos.x += c_dx[lidx];
+ pos.y += c_dy[lidx];
+
+ if (map[pos.x + pos.y * map_step] == 1)
+ {
+ map[pos.x + pos.y * map_step] = 2;
+
+ ind = atomic_inc(&s_counter);
+
+ s_st[ind] = pos;
+ }
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ while (s_counter > 0 && s_counter <= stack_size - get_num_groups(0))
+ {
+ const int subTaskIdx = lidx >> 3;
+ const int portion = min(s_counter, get_num_groups(0) >> 3);
+
+ pos.x = pos.y = 0;
+
+ if (subTaskIdx < portion)
+ pos = s_st[s_counter - 1 - subTaskIdx];
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if (lidx == 0)
+ s_counter -= portion;
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if (pos.x > 0 && pos.x <= cols && pos.y > 0 && pos.y <= rows)
+ {
+ pos.x += c_dx[lidx & 7];
+ pos.y += c_dy[lidx & 7];
+
+ if (map[pos.x + map_offset + pos.y * map_step] == 1)
+ {
+ map[pos.x + map_offset + pos.y * map_step] = 2;
+
+ ind = atomic_inc(&s_counter);
+
+ s_st[ind] = pos;
+ }
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+ }
+
+ if (s_counter > 0)
+ {
+ if (lidx == 0)
+ {
+ ind = atomic_add(counter, s_counter);
+ s_ind = ind - s_counter;
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ ind = s_ind;
+
+ for (int i = lidx; i < s_counter; i += get_num_groups(0))
+ {
+ st2[ind + i] = s_st[i];
+ }
+ }
+ }
+ }
}
#undef stack_size
// map edge type mappings
// dst edge output
__kernel
-void getEdges
-(
- __global const int * map,
- __global uchar * dst,
- int rows,
- int cols,
- int map_step,
- int map_offset,
- int dst_step,
- int dst_offset
-)
+ void getEdges
+ (
+ __global const int * map,
+ __global uchar * dst,
+ int rows,
+ int cols,
+ int map_step,
+ int map_offset,
+ int dst_step,
+ int dst_offset
+ )
{
- map_step /= sizeof(*map);
- map_offset /= sizeof(*map);
- //dst_step /= sizeof(*dst);
- //dst_offset /= sizeof(*dst);
-
- int gidx = get_global_id(0);
- int gidy = get_global_id(1);
-
- if(gidy < rows && gidx < cols)
- {
- //dst[gidx + gidy * dst_step] = map[gidx + 1 + (gidy + 1) * map_step] == 2 ? 255: 0;
- dst[gidx + gidy * dst_step] = (uchar)(-(map[gidx + 1 + (gidy + 1) * map_step] / 2));
- }
+ map_step /= sizeof(*map);
+ map_offset /= sizeof(*map);
+ //dst_step /= sizeof(*dst);
+ //dst_offset /= sizeof(*dst);
+
+ int gidx = get_global_id(0);
+ int gidy = get_global_id(1);
+
+ if(gidy < rows && gidx < cols)
+ {
+ //dst[gidx + gidy * dst_step] = map[gidx + 1 + (gidy + 1) * map_step] == 2 ? 255: 0;
+ dst[gidx + gidy * dst_step] = (uchar)(-(map[gidx + 1 + (gidy + 1) * map_step] / 2));
+ }
}
// Image read mode
__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
+#define FLT_EPSILON (1e-15)
#define CV_PI_F 3.14159265f
-// print greyscale image to show image layout
-__kernel void printImage(image2d_t img)
-{
- printf("(%d, %d) - %3d \n",
- get_global_id(0),
- get_global_id(1),
- read_imageui(img, (int2)(get_global_id(0), get_global_id(1))).x
- );
-}
// Use integral image to calculate haar wavelets.
// N = 2
float val0 = N9[localLin];
if (val0 > c_hessianThreshold)
{
- //printf(\"(%3d, %3d) N9[%3d]=%7.1f val0=%7.1f\\n\", l_x, l_y, localLin - zoff, N9[localLin], val0);
// Coordinates for the start of the wavelet in the sum image. There
// is some integer division involved, so don't try to simplify this
// (cancel out sampleStep) without checking the result is the same
__global float* featureSize = keypoints + SIZE_ROW * keypoints_step;
__global float* featureDir = keypoints + ANGLE_ROW * keypoints_step;
+
volatile __local float s_X[128];
volatile __local float s_Y[128];
volatile __local float s_angle[128];
and building the keypoint descriptor are defined relative to 's' */
const float s = featureSize[get_group_id(0)] * 1.2f / 9.0f;
+
/* To find the dominant orientation, the gradients in x and y are
sampled in a circle of radius 6s using wavelets of size 4s.
We ensure the gradient wavelet size is even to ensure the
Y = c_aptW[tid] * icvCalcHaarPatternSum_2(sumTex, c_NY, 4, grad_wav_size, y, x);
angle = atan2(Y, X);
+
if (angle < 0)
angle += 2.0f * CV_PI_F;
angle *= 180.0f / CV_PI_F;
+
}
}
s_X[tid] = X;
s_Y[tid] = Y;
s_angle[tid] = angle;
barrier(CLK_LOCAL_MEM_FENCE);
-
+
float bestx = 0, besty = 0, best_mod = 0;
#pragma unroll
sumx += s_X[get_local_id(0) + 96];
sumy += s_Y[get_local_id(0) + 96];
}
-
reduce_32_sum(s_sumx + get_local_id(1) * 32, sumx, get_local_id(0));
reduce_32_sum(s_sumy + get_local_id(1) * 32, sumy, get_local_id(0));
bestx = sumx;
besty = sumy;
}
-
barrier(CLK_LOCAL_MEM_FENCE);
}
-
if (get_local_id(0) == 0)
{
s_X[get_local_id(1)] = bestx;
kp_dir += 2.0f * CV_PI_F;
kp_dir *= 180.0f / CV_PI_F;
+ kp_dir = 360.0f - kp_dir;
+ if (fabs(kp_dir - 360.f) < FLT_EPSILON)
+ kp_dir = 0.f;
+
featureDir[get_group_id(0)] = kp_dir;
}
}
const float centerX = featureX[get_group_id(0)];
const float centerY = featureY[get_group_id(0)];
const float size = featureSize[get_group_id(0)];
- const float descriptor_dir = featureDir[get_group_id(0)] * (float)(CV_PI_F / 180.0f);
+ float descriptor_dir = 360.0f - featureDir[get_group_id(0)];
+ if (fabs(descriptor_dir - 360.f) < FLT_EPSILON)
+ descriptor_dir = 0.f;
+ descriptor_dir *= (float)(CV_PI_F / 180.0f);
/* The sampling intervals and wavelet sized for selecting an orientation
and building the keypoint descriptor are defined relative to 's' */
grad[ ((gidY * grad_quadstep + x) << 1) + 1 ] = mag * ang;
}
}
+
+//----------------------------------------------------------------------------
+// Resize
+
+__kernel void resize_8UC4_kernel(__global uchar4 * dst, __global const uchar4 * src,
+ int dst_offset, int src_offset, int dst_step, int src_step,
+ int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify )
+{
+ int dx = get_global_id(0);
+ int dy = get_global_id(1);
+
+ int sx = (int)floor(dx*ifx+0.5f);
+ int sy = (int)floor(dy*ify+0.5f);
+ sx = min(sx, src_cols-1);
+ sy = min(sy, src_rows-1);
+ int dpos = (dst_offset>>2) + dy * (dst_step>>2) + dx;
+ int spos = (src_offset>>2) + sy * (src_step>>2) + sx;
+
+ if(dx<dst_cols && dy<dst_rows)
+ dst[dpos] = src[spos];
+}
+
+__kernel void resize_8UC1_kernel(__global uchar * dst, __global const uchar * src,
+ int dst_offset, int src_offset, int dst_step, int src_step,
+ int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify )
+{
+ int dx = get_global_id(0);
+ int dy = get_global_id(1);
+
+ int sx = (int)floor(dx*ifx+0.5f);
+ int sy = (int)floor(dy*ify+0.5f);
+ sx = min(sx, src_cols-1);
+ sy = min(sy, src_rows-1);
+ int dpos = dst_offset + dy * dst_step + dx;
+ int spos = src_offset + sy * src_step + sx;
+
+ if(dx<dst_cols && dy<dst_rows)
+ dst[dpos] = src[spos];
+}
\ No newline at end of file
using namespace cv::ocl;
using namespace std;
-#define EXT_FP64 0
-
#if !defined (HAVE_OPENCL)
void cv::ocl::matchTemplate(const oclMat&, const oclMat&, oclMat&) { throw_nogpu(); }
#else
//helper routines
namespace cv
{
- namespace ocl
- {
- ///////////////////////////OpenCL kernel strings///////////////////////////
- extern const char *match_template;
- }
+ namespace ocl
+ {
+ ///////////////////////////OpenCL kernel strings///////////////////////////
+ extern const char *match_template;
+ }
}
namespace cv { namespace ocl
{
- void matchTemplate_SQDIFF(
- const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
-
- void matchTemplate_SQDIFF_NORMED(
- const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
-
- void matchTemplate_CCORR(
- const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
-
- void matchTemplate_CCORR_NORMED(
- const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
-
- void matchTemplate_CCOFF(
- const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
-
- void matchTemplate_CCOFF_NORMED(
- const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
-
-
- void matchTemplateNaive_SQDIFF(
- const oclMat& image, const oclMat& templ, oclMat& result, int cn);
-
- void matchTemplateNaive_CCORR(
- const oclMat& image, const oclMat& templ, oclMat& result, int cn);
-
- // Evaluates optimal template's area threshold. If
- // template's area is less than the threshold, we use naive match
- // template version, otherwise FFT-based (if available)
- int getTemplateThreshold(int method, int depth)
- {
- switch (method)
- {
- case CV_TM_CCORR:
- if (depth == CV_32F) return 250;
- if (depth == CV_8U) return 300;
- break;
- case CV_TM_SQDIFF:
- if (depth == CV_32F) return 0x7fffffff; // do naive SQDIFF for CV_32F
- if (depth == CV_8U) return 300;
- break;
- }
- CV_Error(CV_StsBadArg, "getTemplateThreshold: unsupported match template mode");
- return 0;
- }
-
-
- //////////////////////////////////////////////////////////////////////
- // SQDIFF
- void matchTemplate_SQDIFF(
- const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
- {
- result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
- if (templ.size().area() < getTemplateThreshold(CV_TM_SQDIFF, image.depth()))
- {
- matchTemplateNaive_SQDIFF(image, templ, result, image.channels());
- return;
- }
- else
- {
- // TODO
- CV_Error(CV_StsBadArg, "Not supported yet for this size template");
- }
- }
-
- void matchTemplate_SQDIFF_NORMED(
- const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
- {
- matchTemplate_CCORR(image,templ,result,buf);
- buf.image_sums.resize(1);
- buf.image_sqsums.resize(1);
-
- integral(image.reshape(1), buf.image_sums[0], buf.image_sqsums[0]);
-
-#if EXT_FP64 && SQRSUM_FIXED
- unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
-#else
- Mat sqr_mat = templ.reshape(1);
- unsigned long long templ_sqsum = (unsigned long long)sum(sqr_mat.mul(sqr_mat))[0];
-#endif
+ void matchTemplate_SQDIFF(
+ const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
+
+ void matchTemplate_SQDIFF_NORMED(
+ const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
+
+ void matchTemplate_CCORR(
+ const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
+
+ void matchTemplate_CCORR_NORMED(
+ const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
+
+ void matchTemplate_CCOFF(
+ const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
+
+ void matchTemplate_CCOFF_NORMED(
+ const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
+
+
+ void matchTemplateNaive_SQDIFF(
+ const oclMat& image, const oclMat& templ, oclMat& result, int cn);
+
+ void matchTemplateNaive_CCORR(
+ const oclMat& image, const oclMat& templ, oclMat& result, int cn);
+
+ // Evaluates optimal template's area threshold. If
+ // template's area is less than the threshold, we use naive match
+ // template version, otherwise FFT-based (if available)
+ int getTemplateThreshold(int method, int depth)
+ {
+ switch (method)
+ {
+ case CV_TM_CCORR:
+ if (depth == CV_32F) return 250;
+ if (depth == CV_8U) return 300;
+ break;
+ case CV_TM_SQDIFF:
+ if (depth == CV_32F) return 0x7fffffff; // do naive SQDIFF for CV_32F
+ if (depth == CV_8U) return 300;
+ break;
+ }
+ CV_Error(CV_StsBadArg, "getTemplateThreshold: unsupported match template mode");
+ return 0;
+ }
+
+ //////////////////////////////////////////////////////////////////////
+ // SQDIFF
+ void matchTemplate_SQDIFF(
+ const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
+ {
+ result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
+ if (templ.size().area() < getTemplateThreshold(CV_TM_SQDIFF, image.depth()))
+ {
+ matchTemplateNaive_SQDIFF(image, templ, result, image.channels());
+ return;
+ }
+ else
+ {
+ // TODO
+ CV_Error(CV_StsBadArg, "Not supported yet for this size template");
+ }
+ }
+
+ void matchTemplate_SQDIFF_NORMED(
+ const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
+ {
+ matchTemplate_CCORR(image,templ,result,buf);
+ buf.image_sums.resize(1);
+
+
+ integral(image.reshape(1), buf.image_sums[0]);
- Context *clCxt = image.clCxt;
- string kernelName = "matchTemplate_Prepared_SQDIFF_NORMED";
- vector< pair<size_t, const void *> > args;
-
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
- args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
- args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
-
- size_t globalThreads[3] = {result.cols, result.rows, 1};
- size_t localThreads[3] = {32, 8, 1};
- openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U);
- }
-
- void matchTemplateNaive_SQDIFF(
- const oclMat& image, const oclMat& templ, oclMat& result, int cn)
- {
- CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
- || (image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F);
- CV_Assert(image.channels() == templ.channels() && (image.channels() == 1 || image.channels() == 4) && result.channels() == 1);
- CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
-
- Context *clCxt = image.clCxt;
- string kernelName = "matchTemplate_Naive_SQDIFF";
-
- vector< pair<size_t, const void *> > args;
-
- args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
- args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols));
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
- args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&image.step));
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
-
- size_t globalThreads[3] = {result.cols, result.rows, 1};
- size_t localThreads[3] = {32, 8, 1};
- openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
- }
-
- //////////////////////////////////////////////////////////////////////
- // CCORR
- void matchTemplate_CCORR(
- const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
- {
- result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
- if (templ.size().area() < getTemplateThreshold(CV_TM_SQDIFF, image.depth()))
- {
- matchTemplateNaive_CCORR(image, templ, result, image.channels());
- return;
- }
- else
- {
- CV_Error(CV_StsBadArg, "Not supported yet for this size template");
- if(image.depth() == CV_8U && templ.depth() == CV_8U)
- {
- image.convertTo(buf.imagef, CV_32F);
- templ.convertTo(buf.templf, CV_32F);
- }
- CV_Assert(image.channels() == 1);
- oclMat o_result(image.size(), CV_MAKETYPE(CV_32F, image.channels()));
- filter2D(buf.imagef,o_result,CV_32F,buf.templf, Point(0,0));
- result = o_result(Rect(0,0,image.rows - templ.rows + 1, image.cols - templ.cols + 1));
- }
- }
-
- void matchTemplate_CCORR_NORMED(
- const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
- {
- matchTemplate_CCORR(image,templ,result,buf);
- buf.image_sums.resize(1);
- buf.image_sqsums.resize(1);
-
- integral(image.reshape(1), buf.image_sums[0], buf.image_sqsums[0]);
-#if EXT_FP64 && SQRSUM_FIXED
- unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
-#elif EXT_FP64
- oclMat templ_c1 = templ.reshape(1);
- multiply(templ_c1, templ_c1, templ_c1);
- unsigned long long templ_sqsum = (unsigned long long)sum(templ_c1)[0];
+#if SQRSUM_FIXED
+ unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
#else
- Mat m_templ_c1 = templ.reshape(1);
- multiply(m_templ_c1, m_templ_c1, m_templ_c1);
- unsigned long long templ_sqsum = (unsigned long long)sum(m_templ_c1)[0];
+ Mat sqr_mat = templ.reshape(1);
+ unsigned long long templ_sqsum = (unsigned long long)sum(sqr_mat.mul(sqr_mat))[0];
#endif
- Context *clCxt = image.clCxt;
- string kernelName = "normalizeKernel";
- vector< pair<size_t, const void *> > args;
-
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
- args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
- args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
-
- size_t globalThreads[3] = {result.cols, result.rows, 1};
- size_t localThreads[3] = {32, 8, 1};
- openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U);
- }
-
- void matchTemplateNaive_CCORR(
- const oclMat& image, const oclMat& templ, oclMat& result, int cn)
- {
- CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
- || (image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F);
- CV_Assert(image.channels() == templ.channels() && (image.channels() == 1 || image.channels() == 4) && result.channels() == 1);
- CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
-
- Context *clCxt = image.clCxt;
- string kernelName = "matchTemplate_Naive_CCORR";
-
- vector< pair<size_t, const void *> > args;
-
- args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
- args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols));
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
- args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&image.step));
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
-
- size_t globalThreads[3] = {result.cols, result.rows, 1};
- size_t localThreads[3] = {32, 8, 1};
- openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
- }
- //////////////////////////////////////////////////////////////////////
- // CCOFF
- void matchTemplate_CCOFF(
- const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
- {
- CV_Assert(image.depth() == CV_8U && templ.depth() == CV_8U);
-
- matchTemplate_CCORR(image,templ,result,buf);
-
- Context *clCxt = image.clCxt;
- string kernelName;
-
- kernelName = "matchTemplate_Prepared_CCOFF";
- size_t globalThreads[3] = {result.cols, result.rows, 1};
- size_t localThreads[3] = {32, 8, 1};
-
- vector< pair<size_t, const void *> > args;
- args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
- // to be continued in the following section
- if(image.channels() == 1)
- {
- buf.image_sums.resize(1);
- // FIXME: temp fix for incorrect integral kernel
- oclMat tmp_oclmat;
- integral(image, buf.image_sums[0], tmp_oclmat);
-
- float templ_sum = 0;
-#if EXT_FP64
- templ_sum = (float)sum(templ)[0] / templ.size().area();
+
+ Context *clCxt = image.clCxt;
+ string kernelName = "matchTemplate_Prepared_SQDIFF_NORMED";
+ vector< pair<size_t, const void *> > args;
+
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
+ args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
+
+ size_t globalThreads[3] = {result.cols, result.rows, 1};
+ size_t localThreads[3] = {32, 8, 1};
+ openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U);
+ }
+
+ void matchTemplateNaive_SQDIFF(
+ const oclMat& image, const oclMat& templ, oclMat& result, int cn)
+ {
+ CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
+ || (image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F);
+ CV_Assert(image.channels() == templ.channels() && (image.channels() == 1 || image.channels() == 4) && result.channels() == 1);
+ CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
+
+ Context *clCxt = image.clCxt;
+ string kernelName = "matchTemplate_Naive_SQDIFF";
+
+ vector< pair<size_t, const void *> > args;
+
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&image.step));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
+
+ size_t globalThreads[3] = {result.cols, result.rows, 1};
+ size_t localThreads[3] = {32, 8, 1};
+ openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
+ }
+
+ //////////////////////////////////////////////////////////////////////
+ // CCORR
+ void matchTemplate_CCORR(
+ const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
+ {
+ result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
+ if (templ.size().area() < getTemplateThreshold(CV_TM_SQDIFF, image.depth()))
+ {
+ matchTemplateNaive_CCORR(image, templ, result, image.channels());
+ return;
+ }
+ else
+ {
+ CV_Error(CV_StsBadArg, "Not supported yet for this size template");
+ if(image.depth() == CV_8U && templ.depth() == CV_8U)
+ {
+ image.convertTo(buf.imagef, CV_32F);
+ templ.convertTo(buf.templf, CV_32F);
+ }
+ CV_Assert(image.channels() == 1);
+ oclMat o_result(image.size(), CV_MAKETYPE(CV_32F, image.channels()));
+ filter2D(buf.imagef,o_result,CV_32F,buf.templf, Point(0,0));
+ result = o_result(Rect(0,0,image.rows - templ.rows + 1, image.cols - templ.cols + 1));
+ }
+ }
+
+ void matchTemplate_CCORR_NORMED(
+ const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
+ {
+ matchTemplate_CCORR(image,templ,result,buf);
+ buf.image_sums.resize(1);
+ buf.image_sqsums.resize(1);
+
+ integral(image.reshape(1), buf.image_sums[0], buf.image_sqsums[0]);
+#if SQRSUM_FIXED
+ unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
#else
- Mat o_templ = templ;
- templ_sum = (float)sum(o_templ)[0] / o_templ.size().area(); // temp fix for non-double supported machine
-#endif
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
- args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum) );
- }
- else
- {
- Vec4f templ_sum = Vec4f::all(0);
-#if EXT_FP64
- split(image,buf.images);
- templ_sum = sum(templ) / templ.size().area();
-#else
- // temp fix for non-double supported machine
- Mat o_templ = templ, o_image = image;
- vector<Mat> o_mat_vector;
- o_mat_vector.resize(image.channels());
- buf.images.resize(image.channels());
- split(o_image, o_mat_vector);
- for(int i = 0; i < o_mat_vector.size(); i ++)
- {
- buf.images[i] = oclMat(o_mat_vector[i]);
- }
- templ_sum = sum(o_templ) / templ.size().area();
+ oclMat templ_c1 = templ.reshape(1);
+ multiply(templ_c1, templ_c1, templ_c1);
+ unsigned long long templ_sqsum = (unsigned long long)sum(templ_c1)[0];
#endif
- buf.image_sums.resize(buf.images.size());
-
- for(int i = 0; i < image.channels(); i ++)
- {
- // FIXME: temp fix for incorrect integral kernel
- oclMat omat_temp;
- integral(buf.images[i], buf.image_sums[i], omat_temp);
- }
- switch(image.channels())
- {
- case 4:
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) );
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) );
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
- args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[0]) );
- args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[1]) );
- args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[2]) );
- args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[3]) );
- break;
- default:
- CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels");
- break;
- }
- }
- openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
- }
-
- void matchTemplate_CCOFF_NORMED(
- const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
- {
- image.convertTo(buf.imagef, CV_32F);
- templ.convertTo(buf.templf, CV_32F);
-
- matchTemplate_CCORR(buf.imagef, buf.templf, result, buf);
- float scale = 1.f/templ.size().area();
-
- Context *clCxt = image.clCxt;
- string kernelName;
-
- kernelName = "matchTemplate_Prepared_CCOFF_NORMED";
- size_t globalThreads[3] = {result.cols, result.rows, 1};
- size_t localThreads[3] = {32, 8, 1};
-
- vector< pair<size_t, const void *> > args;
- args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
- args.push_back( make_pair( sizeof(cl_float),(void *)&scale) );
- // to be continued in the following section
- if(image.channels() == 1)
- {
- buf.image_sums.resize(1);
- buf.image_sqsums.resize(1);
- integral(image, buf.image_sums[0], buf.image_sqsums[0]);
- float templ_sum = 0;
- float templ_sqsum = 0;
-#if EXT_FP64
- templ_sum = (float)sum(templ)[0];
+ Context *clCxt = image.clCxt;
+ string kernelName = "normalizeKernel";
+ vector< pair<size_t, const void *> > args;
+
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
+ args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
+
+ size_t globalThreads[3] = {result.cols, result.rows, 1};
+ size_t localThreads[3] = {32, 8, 1};
+ openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U);
+ }
+
+ void matchTemplateNaive_CCORR(
+ const oclMat& image, const oclMat& templ, oclMat& result, int cn)
+ {
+ CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
+ || (image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F);
+ CV_Assert(image.channels() == templ.channels() && (image.channels() == 1 || image.channels() == 4) && result.channels() == 1);
+ CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
+
+ Context *clCxt = image.clCxt;
+ string kernelName = "matchTemplate_Naive_CCORR";
+
+ vector< pair<size_t, const void *> > args;
+
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&image.step));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
+
+ size_t globalThreads[3] = {result.cols, result.rows, 1};
+ size_t localThreads[3] = {32, 8, 1};
+ openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
+ }
+ //////////////////////////////////////////////////////////////////////
+ // CCOFF
+ void matchTemplate_CCOFF(
+ const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
+ {
+ CV_Assert(image.depth() == CV_8U && templ.depth() == CV_8U);
+
+ matchTemplate_CCORR(image,templ,result,buf);
+
+ Context *clCxt = image.clCxt;
+ string kernelName;
+
+ kernelName = "matchTemplate_Prepared_CCOFF";
+ size_t globalThreads[3] = {result.cols, result.rows, 1};
+ size_t localThreads[3] = {32, 8, 1};
+
+ vector< pair<size_t, const void *> > args;
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
+ // to be continued in the following section
+ if(image.channels() == 1)
+ {
+ buf.image_sums.resize(1);
+ integral(image, buf.image_sums[0]);
+
+ float templ_sum = 0;
+ templ_sum = (float)sum(templ)[0] / templ.size().area();
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
+ args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum) );
+ }
+ else
+ {
+ Vec4f templ_sum = Vec4f::all(0);
+ split(image,buf.images);
+ templ_sum = sum(templ) / templ.size().area();
+ buf.image_sums.resize(buf.images.size());
+
+ for(int i = 0; i < image.channels(); i ++)
+ {
+ integral(buf.images[i], buf.image_sums[i]);
+ }
+ switch(image.channels())
+ {
+ case 4:
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) );
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) );
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
+ args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[0]) );
+ args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[1]) );
+ args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[2]) );
+ args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[3]) );
+ break;
+ default:
+ CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels");
+ break;
+ }
+ }
+ openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
+ }
+
+ void matchTemplate_CCOFF_NORMED(
+ const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
+ {
+ image.convertTo(buf.imagef, CV_32F);
+ templ.convertTo(buf.templf, CV_32F);
+
+ matchTemplate_CCORR(buf.imagef, buf.templf, result, buf);
+ float scale = 1.f/templ.size().area();
+
+ Context *clCxt = image.clCxt;
+ string kernelName;
+
+ kernelName = "matchTemplate_Prepared_CCOFF_NORMED";
+ size_t globalThreads[3] = {result.cols, result.rows, 1};
+ size_t localThreads[3] = {32, 8, 1};
+
+ vector< pair<size_t, const void *> > args;
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
+ args.push_back( make_pair( sizeof(cl_float),(void *)&scale) );
+ // to be continued in the following section
+ if(image.channels() == 1)
+ {
+ buf.image_sums.resize(1);
+ buf.image_sqsums.resize(1);
+ integral(image, buf.image_sums[0], buf.image_sqsums[0]);
+ float templ_sum = 0;
+ float templ_sqsum = 0;
+ templ_sum = (float)sum(templ)[0];
#if SQRSUM_FIXED
- templ_sqsum = sqrSum(templ);
+ templ_sqsum = sqrSum(templ)[0];
#else
- oclMat templ_sqr = templ;
- multiply(templ,templ, templ_sqr);
- templ_sqsum = sum(templ_sqr)[0];
+ oclMat templ_sqr = templ;
+ multiply(templ,templ, templ_sqr);
+ templ_sqsum = sum(templ_sqr)[0];
#endif //SQRSUM_FIXED
- templ_sqsum -= scale * templ_sum * templ_sum;
- templ_sum *= scale;
-#else
- // temp fix for non-double supported machine
- Mat o_templ = templ;
- templ_sum = (float)sum(o_templ)[0];
- templ_sqsum = sum(o_templ.mul(o_templ))[0] - scale * templ_sum * templ_sum;
- templ_sum *= scale;
-#endif
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) );
- args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum) );
- args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sqsum) );
- }
- else
- {
- Vec4f templ_sum = Vec4f::all(0);
- Vec4f templ_sqsum = Vec4f::all(0);
-#if EXT_FP64
- split(image,buf.images);
- templ_sum = sum(templ);
+ templ_sqsum -= scale * templ_sum * templ_sum;
+ templ_sum *= scale;
+
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) );
+ args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum) );
+ args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sqsum) );
+ }
+ else
+ {
+ Vec4f templ_sum = Vec4f::all(0);
+ Vec4f templ_sqsum = Vec4f::all(0);
+
+ split(image,buf.images);
+ templ_sum = sum(templ);
#if SQRSUM_FIXED
- templ_sqsum = sqrSum(templ);
+ templ_sqsum = sqrSum(templ);
#else
- oclMat templ_sqr = templ;
- multiply(templ,templ, templ_sqr);
- templ_sqsum = sum(templ_sqr);
+ oclMat templ_sqr = templ;
+ multiply(templ,templ, templ_sqr);
+ templ_sqsum = sum(templ_sqr);
#endif //SQRSUM_FIXED
- templ_sqsum -= scale * templ_sum * templ_sum;
-
-#else
- // temp fix for non-double supported machine
- Mat o_templ = templ, o_image = image;
-
- vector<Mat> o_mat_vector;
- o_mat_vector.resize(image.channels());
- buf.images.resize(image.channels());
- split(o_image, o_mat_vector);
- for(int i = 0; i < o_mat_vector.size(); i ++)
- {
- buf.images[i] = oclMat(o_mat_vector[i]);
- }
- templ_sum = sum(o_templ);
- templ_sqsum = sum(o_templ.mul(o_templ));
-#endif
- float templ_sqsum_sum = 0;
- for(int i = 0; i < image.channels(); i ++)
- {
- templ_sqsum_sum += templ_sqsum[i] - scale * templ_sum[i] * templ_sum[i];
- }
- templ_sum *= scale;
- buf.image_sums.resize(buf.images.size());
- buf.image_sqsums.resize(buf.images.size());
-
- for(int i = 0; i < image.channels(); i ++)
- {
- integral(buf.images[i], buf.image_sums[i], buf.image_sqsums[i]);
- }
-
- switch(image.channels())
- {
- case 4:
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) );
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) );
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) );
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[1].data) );
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[2].data) );
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[3].data) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) );
- args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[0]) );
- args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[1]) );
- args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[2]) );
- args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[3]) );
- args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sqsum_sum) );
- break;
- default:
- CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels");
- break;
- }
- }
- openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
- }
+ templ_sqsum -= scale * templ_sum * templ_sum;
+
+ float templ_sqsum_sum = 0;
+ for(int i = 0; i < image.channels(); i ++)
+ {
+ templ_sqsum_sum += templ_sqsum[i] - scale * templ_sum[i] * templ_sum[i];
+ }
+ templ_sum *= scale;
+ buf.image_sums.resize(buf.images.size());
+ buf.image_sqsums.resize(buf.images.size());
+
+ for(int i = 0; i < image.channels(); i ++)
+ {
+ integral(buf.images[i], buf.image_sums[i], buf.image_sqsums[i]);
+ }
+
+ switch(image.channels())
+ {
+ case 4:
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) );
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) );
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) );
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[1].data) );
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[2].data) );
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[3].data) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) );
+ args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[0]) );
+ args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[1]) );
+ args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[2]) );
+ args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[3]) );
+ args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sqsum_sum) );
+ break;
+ default:
+ CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels");
+ break;
+ }
+ }
+ openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
+ }
}/*ocl*/} /*cv*/
void cv::ocl::matchTemplate(const oclMat& image, const oclMat& templ, oclMat& result, int method)
{
- MatchTemplateBuf buf;
- matchTemplate(image,templ, result, method,buf);
+ MatchTemplateBuf buf;
+ matchTemplate(image,templ, result, method,buf);
}
void cv::ocl::matchTemplate(const oclMat& image, const oclMat& templ, oclMat& result, int method, MatchTemplateBuf& buf)
{
- CV_Assert(image.type() == templ.type());
- CV_Assert(image.cols >= templ.cols && image.rows >= templ.rows);
+ CV_Assert(image.type() == templ.type());
+ CV_Assert(image.cols >= templ.cols && image.rows >= templ.rows);
- typedef void (*Caller)(const oclMat&, const oclMat&, oclMat&, MatchTemplateBuf&);
+ typedef void (*Caller)(const oclMat&, const oclMat&, oclMat&, MatchTemplateBuf&);
- const Caller callers[] = {
- ::matchTemplate_SQDIFF, ::matchTemplate_SQDIFF_NORMED,
- ::matchTemplate_CCORR, ::matchTemplate_CCORR_NORMED,
- ::matchTemplate_CCOFF, ::matchTemplate_CCOFF_NORMED
- };
+ const Caller callers[] = {
+ ::matchTemplate_SQDIFF, ::matchTemplate_SQDIFF_NORMED,
+ ::matchTemplate_CCORR, ::matchTemplate_CCORR_NORMED,
+ ::matchTemplate_CCOFF, ::matchTemplate_CCOFF_NORMED
+ };
- Caller caller = callers[method];
- CV_Assert(caller);
- caller(image, templ, result, buf);
+ Caller caller = callers[method];
+ CV_Assert(caller);
+ caller(image, templ, result, buf);
}
#endif //
-
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+// Dachuan Zhao, dachuan@multicorewareinc.com
+// Yao Wang, yao@multicorewareinc.com
+//
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other oclMaterials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
#include "precomp.hpp"
using namespace cv;
template<typename T>
void pyrdown_run(const oclMat &src, const oclMat &dst)
{
- CV_Assert(src.cols / 2 == dst.cols && src.rows / 2 == dst.rows);
CV_Assert(src.type() == dst.type());
CV_Assert(src.depth() != CV_8S);
dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type());
- //dst.step = dst.rows;
+ dst.download_channels = src.download_channels;
pyrdown_run(src, dst);
}
//
// @Authors
// Zhang Chunpeng chunpeng@multicorewareinc.com
+// Yao Wang, yao@multicorewareinc.com
//
//
// Redistribution and use in source and binary forms, with or without modification,
{
extern const char *pyr_up;
void pyrUp(const cv::ocl::oclMat& src,cv::ocl::oclMat& dst)
- {
+ {
dst.create(src.rows * 2, src.cols * 2, src.type());
+ dst.download_channels=src.download_channels;
Context *clCxt = src.clCxt;
const std::string kernelName = "pyrUp";
//loadGlobalConstants(maxCandidates, maxFeatures, img_rows, img_cols, surf_.nOctaveLayers, static_cast<float>(surf_.hessianThreshold));
bindImgTex(img);
- oclMat integral_sqsum;
- integral(img, surf_.sum, integral_sqsum); // the two argumented integral version is incorrect
+ integral(img, surf_.sum); // the two argumented integral version is incorrect
bindSumTex(surf_.sum);
maskSumTex = 0;
TEST_P(ColumnSum, Accuracy)
{
cv::Mat src = randomMat(size, CV_32FC1);
- //cv::Mat src(size,CV_32FC1);
+ cv::ocl::oclMat d_dst;
+ cv::ocl::oclMat d_src(src);
- //cv::ocl::oclMat d_dst = ::createMat(size,src.type(),useRoi);
- cv::ocl::oclMat d_dst = loadMat(src,useRoi);
-
- cv::ocl::columnSum(loadMat(src,useRoi),d_dst);
+ cv::ocl::columnSum(d_src,d_dst);
cv::Mat dst(d_dst);
//
// @Authors
// Dachuan Zhao, dachuan@multicorewareinc.com
+// Yao Wang yao@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
//M*/
-//#define PRINT_CPU_TIME 1000
-//#define PRINT_TIME
-
#include "precomp.hpp"
#include <iomanip>
using namespace testing;
using namespace std;
-PARAM_TEST_CASE(PyrDown, MatType, bool)
+PARAM_TEST_CASE(PyrDown, MatType, int)
{
- int type;
- cv::Scalar val;
-
- //src mat
- cv::Mat mat1;
- cv::Mat mat2;
- cv::Mat mask;
- cv::Mat dst;
- cv::Mat dst1; //bak, for two outputs
-
- // set up roi
- int roicols;
- int roirows;
- int src1x;
- int src1y;
- int src2x;
- int src2y;
- int dstx;
- int dsty;
- int maskx;
- int masky;
-
-
- //src mat with roi
- cv::Mat mat1_roi;
- cv::Mat mat2_roi;
- cv::Mat mask_roi;
- cv::Mat dst_roi;
- cv::Mat dst1_roi; //bak
- //std::vector<cv::ocl::Info> oclinfo;
- //ocl dst mat for testing
- cv::ocl::oclMat gdst_whole;
- cv::ocl::oclMat gdst1_whole; //bak
-
- //ocl mat with roi
- cv::ocl::oclMat gmat1;
- cv::ocl::oclMat gmat2;
- cv::ocl::oclMat gdst;
- cv::ocl::oclMat gdst1; //bak
- cv::ocl::oclMat gmask;
+ int type;
+ int channels;
virtual void SetUp()
{
type = GET_PARAM(0);
-
- cv::RNG &rng = TS::ptr()->get_rng();
-
- cv::Size size(MWIDTH, MHEIGHT);
-
- mat1 = randomMat(rng, size, type, 5, 16, false);
- mat2 = randomMat(rng, size, type, 5, 16, false);
- dst = randomMat(rng, size, type, 5, 16, false);
- dst1 = randomMat(rng, size, type, 5, 16, false);
- mask = randomMat(rng, size, CV_8UC1, 0, 2, false);
-
- cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
-
- val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
+ channels = GET_PARAM(1);
//int devnums = getDevice(oclinfo);
//CV_Assert(devnums > 0);
void Cleanup()
{
- mat1.release();
- mat2.release();
- mask.release();
- dst.release();
- dst1.release();
- mat1_roi.release();
- mat2_roi.release();
- mask_roi.release();
- dst_roi.release();
- dst1_roi.release();
-
- gdst_whole.release();
- gdst1_whole.release();
- gmat1.release();
- gmat2.release();
- gdst.release();
- gdst1.release();
- gmask.release();
}
- void random_roi()
- {
- cv::RNG &rng = TS::ptr()->get_rng();
-
-#ifdef RANDOMROI
- //randomize ROI
- roicols = rng.uniform(1, mat1.cols);
- roirows = rng.uniform(1, mat1.rows);
- src1x = rng.uniform(0, mat1.cols - roicols);
- src1y = rng.uniform(0, mat1.rows - roirows);
- dstx = rng.uniform(0, dst.cols - roicols);
- dsty = rng.uniform(0, dst.rows - roirows);
-#else
- roicols = mat1.cols;
- roirows = mat1.rows;
- src1x = 0;
- src1y = 0;
- dstx = 0;
- dsty = 0;
-#endif
- maskx = rng.uniform(0, mask.cols - roicols);
- masky = rng.uniform(0, mask.rows - roirows);
- src2x = rng.uniform(0, mat2.cols - roicols);
- src2y = rng.uniform(0, mat2.rows - roirows);
- mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows));
- mat2_roi = mat2(Rect(src2x, src2y, roicols, roirows));
- mask_roi = mask(Rect(maskx, masky, roicols, roirows));
- dst_roi = dst(Rect(dstx, dsty, roicols, roirows));
- dst1_roi = dst1(Rect(dstx, dsty, roicols, roirows));
-
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
-
- gdst1_whole = dst1;
- gdst1 = gdst1_whole(Rect(dstx, dsty, roicols, roirows));
-
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- gmask = mask_roi; //end
- }
-
};
-#define VARNAME(A) string(#A);
-
-
-void PrePrint()
-{
- //for(int i = 0; i < MHEIGHT; i++)
- //{
- // printf("(%d) ", i);
- // for(int k = 0; k < MWIDTH; k++)
- // {
- // printf("%d ", mat1_roi.data[i * MHEIGHT + k]);
- // }
- // printf("\n");
- //}
-}
-
-void PostPrint()
-{
- //dst_roi.convertTo(dst_roi,CV_32S);
- //cpu_dst.convertTo(cpu_dst,CV_32S);
- //dst_roi -= cpu_dst;
- //cpu_dst -= dst_roi;
- //for(int i = 0; i < MHEIGHT / 2; i++)
- //{
- // printf("(%d) ", i);
- // for(int k = 0; k < MWIDTH / 2; k++)
- // {
- // if(gmat1.depth() == 0)
- // {
- // if(gmat1.channels() == 1)
- // {
- // printf("%d ", dst_roi.data[i * MHEIGHT / 2 + k]);
- // }
- // else
- // {
- // printf("%d ", ((unsigned*)dst_roi.data)[i * MHEIGHT / 2 + k]);
- // }
- // }
- // else if(gmat1.depth() == 5)
- // {
- // printf("%.6f ", ((float*)dst_roi.data)[i * MHEIGHT / 2 + k]);
- // }
- // }
- // printf("\n");
- //}
- //for(int i = 0; i < MHEIGHT / 2; i++)
- //{
- // printf("(%d) ", i);
- // for(int k = 0; k < MWIDTH / 2; k++)
- // {
- // if(gmat1.depth() == 0)
- // {
- // if(gmat1.channels() == 1)
- // {
- // printf("%d ", cpu_dst.data[i * MHEIGHT / 2 + k]);
- // }
- // else
- // {
- // printf("%d ", ((unsigned*)cpu_dst.data)[i * MHEIGHT / 2 + k]);
- // }
- // }
- // else if(gmat1.depth() == 5)
- // {
- // printf("%.6f ", ((float*)cpu_dst.data)[i * MHEIGHT / 2 + k]);
- // }
- // }
- // printf("\n");
- //}
-}
-
-////////////////////////////////PyrDown/////////////////////////////////////////////////
-//struct PyrDown : ArithmTestBase {};
TEST_P(PyrDown, Mat)
{
for(int j = 0; j < LOOP_TIMES; j++)
{
- random_roi();
+ cv::Size size(MWIDTH, MHEIGHT);
+ cv::RNG &rng = TS::ptr()->get_rng();
+ cv::Mat src=randomMat(rng, size, CV_MAKETYPE(type, channels), 0, 100, false);
- cv::pyrDown(mat1_roi, dst_roi);
- cv::ocl::pyrDown(gmat1, gdst);
+ cv::ocl::oclMat gsrc(src), gdst;
+ cv::Mat dst_cpu;
+ cv::pyrDown(src, dst_cpu);
+ cv::ocl::pyrDown(gsrc, gdst);
- cv::Mat cpu_dst;
- gdst.download(cpu_dst);
- char s[1024];
- sprintf(s, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y);
+ cv::Mat dst;
+ gdst.download(dst);
+ char s[1024]={0};
- EXPECT_MAT_NEAR(dst_roi, cpu_dst, dst_roi.depth() == CV_32F ? 1e-5f : 1.0f, s);
+ EXPECT_MAT_NEAR(dst, dst_cpu, dst.depth() == CV_32F ? 1e-4f : 1.0f, s);
Cleanup();
}
}
-
-
-
-//********test****************
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrDown, Combine(
- Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
- Values(false))); // Values(false) is the reserved parameter
+ Values(CV_8U, CV_32F), Values(1, 3, 4)));
#endif // HAVE_OPENCL
//
// @Authors
// Zhang Chunpeng chunpeng@multicorewareinc.com
+// Yao Wang yao@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
#ifdef HAVE_OPENCL
+using namespace cv;
+using namespace cvtest;
+using namespace testing;
+using namespace std;
-PARAM_TEST_CASE(PyrUp,cv::Size,int)
+PARAM_TEST_CASE(PyrUp, MatType, int)
{
- cv::Size size;
int type;
+ int channels;
//std::vector<cv::ocl::Info> oclinfo;
virtual void SetUp()
{
//int devnums = cv::ocl::getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
//CV_Assert(devnums > 0);
- size = GET_PARAM(0);
- type = GET_PARAM(1);
+ type = GET_PARAM(0);
+ channels = GET_PARAM(1);
}
};
TEST_P(PyrUp,Accuracy)
{
- cv::Mat src = randomMat(size,type);
-
-
- cv::Mat dst_gold;
- cv::pyrUp(src,dst_gold);
-
- cv::ocl::oclMat dst;
- cv::ocl::oclMat srcMat(src);
- cv::ocl::pyrUp(srcMat,dst);
- char s[100]={0};
+ for(int j = 0; j < LOOP_TIMES; j++)
+ {
+ Size size(MWIDTH, MHEIGHT);
+ Mat src = randomMat(size,CV_MAKETYPE(type, channels));
+ Mat dst_gold;
+ pyrUp(src,dst_gold);
+ ocl::oclMat dst;
+ ocl::oclMat srcMat(src);
+ ocl::pyrUp(srcMat,dst);
+ Mat cpu_dst;
+ dst.download(cpu_dst);
+ char s[100]={0};
- EXPECT_MAT_NEAR(dst_gold, dst, (src.depth() == CV_32F ? 1e-4f : 1.0),s);
+ EXPECT_MAT_NEAR(dst_gold, cpu_dst, (src.depth() == CV_32F ? 1e-4f : 1.0),s);
+ }
}
-#if 1
+
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrUp, testing::Combine(
- testing::Values(cv::Size(32, 32)),
- testing::Values(MatType(CV_8UC1),MatType(CV_16UC1),MatType(CV_32FC1),MatType(CV_8UC4),
- MatType(CV_16UC4),MatType(CV_32FC4))));
-#endif
+ Values(CV_8U, CV_32F), Values(1, 3, 4)));
+
#endif // HAVE_OPENCL
\ No newline at end of file