set(OPENCL_FOUND YES)
set(OPENCL_LIBRARIES "-framework OpenCL")
else()
- #find_package(OpenCL QUIET)
+ find_package(OpenCL QUIET)
if(WITH_OPENCLAMDFFT)
find_path(CLAMDFFT_INCLUDE_DIR
NAMES clAmdFft.h)
//! assignment operator. Perfom blocking upload to device.
oclMat &operator = (const Mat &m);
- /* Fixme! To be supported in OpenCL later. */
-#if 0
- //! returns lightweight DevMem2D_ structure for passing to nvcc-compiled code.
- // Contains just image size, data ptr and step.
- template <class T> operator DevMem2D_<T>() const;
- template <class T> operator PtrStep_<T>() const;
-#endif
//! pefroms blocking upload data to oclMat.
void upload(const cv::Mat &m);
- /* Fixme! To be supported in OpenCL later. */
-#if 0
- //! upload async
- void upload(const CudaMem &m, Stream &stream);
-#endif
//! downloads data from device to host memory. Blocking calls.
operator Mat() const;
void download(cv::Mat &m) const;
- /* Fixme! To be supported in OpenCL later. */
-#if 0
- //! download async
- void download(CudaMem &m, Stream &stream) const;
-#endif
//! returns a new oclMatrix header for the specified row
oclMat row(int y) const;
int minNeighbors, int flags, CvSize minSize = cvSize(0, 0), CvSize maxSize = cvSize(0, 0));
};
- ///////////////////////////////////////////////////////jhp_benchmark////////////////////////////////////////////////////
- void benchmark_copy_vectorize(const oclMat &src, oclMat &dst);
- void benchmark_copy_offset_stride(const oclMat &src, oclMat &dst);
- void benchmark_ILP();
//! computes vertical sum, supports only CV_32FC1 images
CV_EXPORTS void columnSum(const oclMat& src, oclMat& sum);
}
-#if PERF_TEST_OCL
-int main(int argc, char** argv)
-{
-
- static std::vector<Info> ocl_info;
- ocl::getDevice(ocl_info);
-
- run_perf_test();
- return 0;
-}
-#else
int main(int argc, char** argv)
{
+ std::vector<cv::ocl::Info> oclinfo;
TS::ptr()->init("ocl");
InitGoogleTest(&argc, argv);
print_info();
-
+ int devnums = getDevice(oclinfo);
+ if(devnums<1)
+ {
+ std::cout << "no device found\n";
+ return -1;
+ }
+ //if you want to use undefault device, set it here
+ //setDevice(oclinfo[0]);
+ setBinpath(CLBINPATH);
return RUN_ALL_TESTS();
}
-#endif // PERF_TEST_OCL
-#else // HAVE_OPENC
+#else // DON'T HAVE_OPENCL
int main()
{
+++ /dev/null
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-// By downloading, copying, installing or using the software you agree to this license.
-// If you do not agree to this license, do not download, install,
-// copy or use the software.
-//
-//
-// License Agreement
-// For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicore Ware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-// Peng Xiao, pengxiao@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-// * Redistribution's of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// * Redistribution's in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other oclMaterials provided with the distribution.
-//
-// * The name of the copyright holders may not be used to endorse or promote products
-// derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-#include <ctime>
-
-#if PERF_TEST_OCL
-
-#ifdef HAVE_OPENCL
-
-#define SHOW_CPU false
-#define REPEAT 1000
-#define COUNT_U 0 // count the uploading execution time for ocl mat structures
-#define COUNT_D 0
-
-
-// the following macro section tests the target function (kernel) performance
-// upload is the code snippet for converting cv::mat to cv::ocl::oclMat
-// downloading is the code snippet for converting cv::ocl::oclMat back to cv::mat
-// change COUNT_U and COUNT_D to take downloading and uploading time into account
-#define P_TEST_FULL( upload, kernel_call, download ) \
-{ \
- std::cout<< "\n" #kernel_call "\n----------------------"; \
- {upload;} \
- R_TEST( kernel_call, 15 ); \
- double t = (double)cvGetTickCount(); \
- R_T( { \
- if( COUNT_U ) {upload;} \
- kernel_call; \
- if( COUNT_D ) {download;} \
- } ); \
- t = (double)cvGetTickCount() - t; \
- std::cout << "runtime is " << t/((double)cvGetTickFrequency()* 1000.) << "ms" << std::endl; \
-}
-
-
-#define R_T2( test ) \
-{ \
- std::cout<< "\n" #test "\n----------------------"; \
- R_TEST( test, 15 ) \
- clock_t st = clock(); \
- R_T( test ) \
- std::cout<< clock() - st << "ms\n"; \
-}
-#define R_T( test ) \
- R_TEST( test, REPEAT )
-#define R_TEST( test, repeat ) \
- try{ \
- for( int i = 0; i < repeat; i ++ ) { test; } \
- } catch( ... ) { std::cout << "||||| Exception catched! |||||\n"; return; }
-
-#define FILTER_TEST_IMAGE "C:/Windows/Web/Wallpaper/Landscapes/img9.jpg"
-#define WARN_NRUN( name ) \
- std::cout << "Warning: " #name " is not runnable!\n";
-
-
-void print_info();
-
-// performance base class
-struct PerfTest
-{
- virtual void Run() = 0;
- protected:
- virtual void SetUp() = 0;
-};
-///////////////////////////////////////
-// Arithm
-struct ArithmTestP : PerfTest
-{
- int type;
- cv::Scalar val;
-
- cv::Size size;
- cv::Mat mat1, mat2;
- cv::Mat mask;
- cv::Mat dst;
- cv::ocl::oclMat oclRes, oclmat1, oclmat2;
- cv::ocl::oclMat oclmask;
- std::vector<cv::Mat> dstv;
- protected:
- ArithmTestP() : type( CV_8UC4 ) {}
- virtual void SetUp()
- {
- cv::RNG& rng = cvtest::TS::ptr()->get_rng();
- size = cv::Size( 3000, 3000 ); // big input image
- mat1 = cvtest::randomMat(rng, size, type, 1, 255, false);
- mat2 = cvtest::randomMat(rng, size, type, 1, 255, false);
- mask = cvtest::randomMat(rng, size, CV_8UC1, 0, 2, false);
-
- cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
-
- val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
-
- oclmat1 = cv::ocl::oclMat(mat1);
- oclmat2 = cv::ocl::oclMat(mat2);
- oclmask = cv::ocl::oclMat(mask);
- }
-};
-
-struct AddArrayP : ArithmTestP
-{
- virtual void Run()
- {
- SetUp();
- P_TEST_FULL(
- oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
- cv::ocl::add(oclmat1, oclmat2, oclRes),
- oclRes.download(dst);
- );
- }
-};
-
-struct SubtractArrayP : ArithmTestP
-{
- virtual void Run()
- {
- SetUp();
- P_TEST_FULL(
- oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
- cv::ocl::subtract(oclmat1, oclmat2, oclRes),
- oclRes.download(dst);
- );
- }
-};
-
-struct MultiplyArrayP : ArithmTestP
-{
- virtual void Run()
- {
- SetUp();
- clock_t start = clock();
- P_TEST_FULL(
- oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
- cv::ocl::multiply(oclmat1, oclmat2, oclRes),
- oclRes.download(dst);
- );
- }
-};
-
-struct DivideArrayP : ArithmTestP
-{
- virtual void Run()
- {
- SetUp();
- P_TEST_FULL(
- oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
- cv::ocl::divide(oclmat1, oclmat2, oclRes),
- oclRes.download(dst);
- );
- }
-};
-
-struct ExpP : ArithmTestP
-{
- void Run()
- {
- type = CV_32FC1;
- SetUp();
- P_TEST_FULL(
- oclmat1 = cv::ocl::oclMat(mat1),
- cv::ocl::exp(oclmat1, oclRes),
- oclRes.download(dst);
- );
- }
-};
-
-struct LogP : ArithmTestP
-{
- void Run()
- {
- type = CV_32FC1;
- SetUp();
- P_TEST_FULL(
- oclmat1 = cv::ocl::oclMat(mat1),
- cv::ocl::log(oclmat1, oclRes),
- oclRes.download(dst);
- );
- }
-};
-
-struct CompareP : ArithmTestP
-{
- virtual void Run()
- {
- type = CV_32FC1;
- SetUp();
- P_TEST_FULL(
- oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
- cv::ocl::compare(oclmat1, oclmat2, oclRes, cv::CMP_EQ),
- oclRes.download(dst);
- );
- }
-};
-
-struct FlipP : ArithmTestP
-{
- virtual void Run()
- {
- SetUp();
- P_TEST_FULL(
- oclmat1 = cv::ocl::oclMat(mat1),
- cv::ocl::flip(oclmat1, oclRes, 0),
- oclRes.download(dst);
- );
- }
- protected:
- virtual void SetUp()
- {
- type = CV_8UC4;
- cv::RNG& rng = cvtest::TS::ptr()->get_rng();
- size = cv::Size(3000, 3000);
- mat1 = cvtest::randomMat(rng, size, type, 1, 255, false);
- oclmat1 = cv::ocl::oclMat(mat1);
- }
-};
-
-struct MagnitudeP : ArithmTestP
-{
- virtual void Run()
- {
- type = CV_32F;
- SetUp();
- P_TEST_FULL(
- oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
- cv::ocl::magnitude(oclmat1, oclmat1, oclRes),
- oclRes.download(dst);
- );
- }
-};
-
-struct LUTP : ArithmTestP
-{
- virtual void Run()
- {
- SetUp();
- P_TEST_FULL(
- oclmat1 = cv::ocl::oclMat(mat1);ocllut = cv::ocl::oclMat(lut),
- cv::ocl::LUT(oclmat1, ocllut, oclRes),
- oclRes.download(dst);
- );
- }
- protected:
- cv::Mat lut;
- cv::ocl::oclMat ocllut;
- virtual void SetUp()
- {
- type = CV_8UC1;
- cv::RNG& rng = cvtest::TS::ptr()->get_rng();
- size = cv::Size(3000, 3000);
- mat1 = cvtest::randomMat(rng, size, type, 1, 255, false);
- lut = cvtest::randomMat(rng, cv::Size(256, 1), CV_8UC1, 100, 200, false);
- oclmat1 = cv::ocl::oclMat(mat1);
- ocllut = cv::ocl::oclMat(lut);
- }
-};
-
-struct MinMaxP : ArithmTestP
-{
- double minVal_gold, minVal;
- double maxVal_gold, maxVal;
-
- virtual void Run()
- {
- SetUp();
- P_TEST_FULL(
- oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
- cv::ocl::minMax(oclmat1, &minVal, &maxVal, oclmat2),
- {};
- );
- }
-
- protected:
- virtual void SetUp()
- {
- type = CV_64F;
-
- cv::RNG& rng = cvtest::TS::ptr()->get_rng();
-
- size = cv::Size(3000, 3000);
-
- mat1 = cvtest::randomMat(rng, size, type, 0.0, 127.0, false);
- mat2 = cvtest::randomMat(rng, size, CV_8UC1, 0, 2, false);
-
- oclmat1 = cv::ocl::oclMat(mat1);
- oclmat2 = cv::ocl::oclMat(mat2);
- }
-};
-
-struct MinMaxLocP : MinMaxP
-{
- cv::Point minLoc_gold;
- cv::Point maxLoc_gold;
- virtual void Run()
- {
- SetUp();
- P_TEST_FULL(
- oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
- cv::ocl::minMaxLoc(oclmat1, &minVal, &maxVal, &minLoc_gold, &maxLoc_gold, oclmat2),
- {}
- );
- }
-};
-
-struct CountNonZeroP : ArithmTestP
-{
- int n;
- virtual void Run()
- {
- SetUp();
- P_TEST_FULL(
- oclmat1 = cv::ocl::oclMat(mat1),
- n = cv::ocl::countNonZero(oclmat1),
- {}
- );
- }
- protected:
- virtual void SetUp()
- {
- type = 6;
-
- cv::RNG& rng = cvtest::TS::ptr()->get_rng();
-
- size = cv::Size( 3000, 3000 );
-
- cv::Mat matBase = cvtest::randomMat(rng, size, CV_8U, 0.0, 1.0, false);
- matBase.convertTo(mat1, type);
-
- oclmat1 = cv::ocl::oclMat(mat1);
- }
-};
-
-struct SumP : ArithmTestP
-{
- virtual void Run()
- {
- SetUp();
- cv::Scalar n;
- P_TEST_FULL(
- oclmat1 = cv::ocl::oclMat(mat1),
- n = cv::ocl::sum(oclmat1),
- {}
- );
- }
-};
-
-struct BitwiseP : ArithmTestP
-{
- protected:
- virtual void SetUp()
- {
- type = CV_8UC4;
-
- cv::RNG& rng = cvtest::TS::ptr()->get_rng();
-
- size = cv::Size( 3000, 3000 );
-
- mat1.create(size, type);
- mat2.create(size, type);
-
- for (int i = 0; i < mat1.rows; ++i)
- {
- cv::Mat row1(1, static_cast<int>(mat1.cols * mat1.elemSize()), CV_8U, (void*)mat1.ptr(i));
- rng.fill(row1, cv::RNG::UNIFORM, cv::Scalar(0), cv::Scalar(255));
-
- cv::Mat row2(1, static_cast<int>(mat2.cols * mat2.elemSize()), CV_8U, (void*)mat2.ptr(i));
- rng.fill(row2, cv::RNG::UNIFORM, cv::Scalar(0), cv::Scalar(255));
- }
- oclmat1 = cv::ocl::oclMat(mat1);
- oclmat2 = cv::ocl::oclMat(mat2);
- }
-};
-
-struct BitwiseNotP : BitwiseP
-{
- virtual void Run()
- {
- SetUp();
- P_TEST_FULL(
- oclmat1 = cv::ocl::oclMat(mat1),
- cv::ocl::bitwise_not(oclmat1, oclRes),
- oclRes.download(dst)
- );
- }
-};
-
-struct BitwiseAndP : BitwiseP
-{
- virtual void Run()
- {
- SetUp();
- P_TEST_FULL(
- oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
- cv::ocl::bitwise_and(oclmat1, oclmat2, oclRes),
- oclRes.download(dst)
- );
- P_TEST_FULL(
- oclmat1 = cv::ocl::oclMat(mat1),
- cv::ocl::bitwise_and(oclmat1, val, oclRes),
- oclRes.download(dst)
- );
- }
-};
-
-struct BitwiseXorP : BitwiseP
-{
- virtual void Run()
- {
- SetUp();
- P_TEST_FULL(
- oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
- cv::ocl::bitwise_xor(oclmat1, oclmat2, oclRes),
- oclRes.download(dst)
- );
- P_TEST_FULL(
- oclmat1 = cv::ocl::oclMat(mat1),
- cv::ocl::bitwise_xor(oclmat1, val, oclRes),
- oclRes.download(dst)
- );
-
- }
-};
-
-struct BitwiseOrP : BitwiseP
-{
- virtual void Run()
- {
- SetUp();
- P_TEST_FULL(
- oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
- cv::ocl::bitwise_or(oclmat1, oclmat2, oclRes),
- oclRes.download(dst)
- );
- P_TEST_FULL(
- oclmat1 = cv::ocl::oclMat(mat1),
- cv::ocl::bitwise_or(oclmat1, val, oclRes),
- oclRes.download(dst)
- );
- }
-};
-
-struct TransposeP : ArithmTestP
-{
- virtual void Run()
- {
- SetUp();
- P_TEST_FULL(
- oclmat1 = cv::ocl::oclMat(mat1),
- cv::ocl::transpose(oclmat1, oclRes),
- oclRes.download(dst)
- );
- }
-};
-
-struct AbsdiffArrayP : ArithmTestP
-{
- virtual void Run()
- {
- type = CV_32FC1;
- SetUp();
- P_TEST_FULL(
- oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
- cv::ocl::absdiff(oclmat1, oclmat2, oclRes),
- oclRes.download(dst)
- );
- }
-};
-
-struct PhaseP : ArithmTestP
-{
- virtual void Run()
- {
- type = CV_32F;
- SetUp();
- P_TEST_FULL(
- oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
- cv::ocl::phase(oclmat1,oclmat2,oclRes,1),
- oclRes.download(dst)
- );
- }
-};
-
-struct CartToPolarP : ArithmTestP
-{
- cv::ocl::oclMat oclRes1;
- virtual void Run()
- {
- type = CV_64FC4;
- SetUp();
- clock_t start = clock();
- R_TEST(
- cv::ocl::cartToPolar(oclmat1,oclmat2,oclRes, oclRes1, 1);
- if( COUNT_D ) {oclRes.download(dst);oclRes1.download(dst);}
- , 5);
- std::cout<< "ocl::CartToPolar -- " << clock() - start << "ms\n";
- }
-};
-
-struct PolarToCartP : ArithmTestP
-{
- cv::ocl::oclMat oclRes1;
- virtual void Run()
- {
- type = CV_64FC4;
- SetUp();
- clock_t start = clock();
- R_TEST(
- cv::ocl::polarToCart(oclmat1,oclmat2,oclRes, oclRes1, 1);
- if( COUNT_D ) {oclRes.download(dst);oclRes1.download(dst);}
- , 2);
- std::cout<< "ocl::polarToCart -- " << clock() - start << "ms\n";
- }
-};
-
-///////////////////////////////////////
-// split & merge
-struct SplitP : ArithmTestP
-{
- virtual void Run()
- {
- SetUp();
- P_TEST_FULL(
- oclmat1 = cv::ocl::oclMat(mat1),
- cv::ocl::split(oclmat1, dev_dst),
- {
- dstv.resize(dev_dst.size());
- for (size_t i = 0; i < dev_dst.size(); ++i)
- {
- dev_dst[i].download(dstv[i]);
- }
- }
- );
- }
- protected:
- std::vector<cv::ocl::oclMat> dev_dst;
- virtual void SetUp()
- {
- size = cv::Size( 3000, 3000 );
-
- mat1.create(size, type);
- mat1.setTo(cv::Scalar(1.0, 2.0, 3.0, 4.0));
-
- oclmat1 = cv::ocl::oclMat(mat1);
- }
-};
-
-struct MergeP : SplitP
-{
- virtual void Run()
- {
- SetUp();
- cv::ocl::split(oclmat1, dev_dst);
- cv::split(mat1, dstv);
- P_TEST_FULL(
- oclmat1 = cv::ocl::oclMat(mat1),
- cv::ocl::merge(dev_dst, oclmat2),
- oclmat2.download(dst)
- );
- }
-};
-
-struct SetToP : ArithmTestP
-{
- virtual void Run()
- {
- SetUp();
- static cv::Scalar s = cv::Scalar(1, 2, 3, 4);
- P_TEST_FULL(
- oclmat2 = cv::ocl::oclMat(mat2),
- oclmat1.setTo( s, oclmat2 ),
- oclmat1.download(dst);
- );
- }
- protected:
- virtual void SetUp()
- {
- type = CV_32FC4;
- size = cv::Size(3000, 3000);
-
- mat1.create(size, type);
- oclmat1.create(size, type);
-
- cv::RNG& rng = cvtest::TS::ptr()->get_rng();
- mat2 = cvtest::randomMat(rng, size, CV_8UC1, 0.0, 1.5, false);
- oclmat2 = cv::ocl::oclMat(mat2);
- }
-};
-
-struct CopyToP : SetToP
-{
- virtual void Run()
- {
- SetUp();
- P_TEST_FULL(
- oclmat1 = cv::ocl::oclMat(mat1),
- oclmat1.copyTo( oclRes, oclmat2 ),
- oclRes.download(dst)
- );
- }
-};
-
-struct ConvertToP : ArithmTestP
-{
- virtual void Run()
- {
- type = CV_32FC1;;
- SetUp();
- cv::RNG& rng = cvtest::TS::ptr()->get_rng();
- const double a = rng.uniform(0.0, 1.0);
- const double b = rng.uniform(-10.0, 10.0);
-
- int type2 = CV_32FC4;
-
- P_TEST_FULL(
- oclmat1 = cv::ocl::oclMat(mat1),
- oclmat1.convertTo( oclRes, type2 /*, a, b */ ), // fails when scaling factors a and b are specified
- oclRes.download(dst)
- );
- }
-};
-
-////////////////////////////////////////////
-// Filters
-
-struct FilterTestP : PerfTest
-{
- protected:
- int ksize;
- int dx, dy;
-
- cv::Mat img_rgba;
- cv::Mat img_gray;
-
- cv::ocl::oclMat ocl_img_rgba;
- cv::ocl::oclMat ocl_img_gray;
-
- cv::ocl::oclMat dev_dst_rgba;
- cv::ocl::oclMat dev_dst_gray;
-
- cv::Mat dst_rgba;
- cv::Mat dst_gray;
-
- cv::Mat kernel;
-
- int bordertype;
-
- virtual void SetUp()
- {
- bordertype = (int)cv::BORDER_DEFAULT;
- ksize = 7;
- dx = ksize/2; dy = ksize/2;
-
- kernel = cv::Mat::ones(ksize, ksize, CV_8U);
-
- cv::Mat img = readImage(FILTER_TEST_IMAGE);
- ASSERT_FALSE(img.empty());
-
- cv::cvtColor(img, img_rgba, CV_BGR2BGRA);
- cv::cvtColor(img, img_gray, CV_BGR2GRAY);
-
- ocl_img_rgba = cv::ocl::oclMat(img_rgba);
- ocl_img_gray = cv::ocl::oclMat(img_gray);
- }
-};
-
-struct BlurP : FilterTestP
-{
- virtual void Run()
- {
- SetUp();
- P_TEST_FULL(
- {
- ocl_img_rgba = cv::ocl::oclMat(img_rgba);
- ocl_img_gray = cv::ocl::oclMat(img_gray);
- },
- {
- cv::ocl::blur(ocl_img_rgba, dev_dst_rgba, cv::Size(ksize, ksize), cv::Point(-1,-1), bordertype);
- cv::ocl::blur(ocl_img_gray, dev_dst_gray, cv::Size(ksize, ksize), cv::Point(-1,-1), bordertype);
- },
- {
- dev_dst_rgba.download(dst_rgba);
- dev_dst_gray.download(dst_gray);
- });
- }
-};
-
-struct SobelP : FilterTestP
-{
- virtual void Run()
- {
- SetUp();
- P_TEST_FULL(
- {
- ocl_img_rgba = cv::ocl::oclMat(img_rgba);
- ocl_img_gray = cv::ocl::oclMat(img_gray);
- },
- {
- cv::ocl::Sobel(ocl_img_rgba, dev_dst_rgba, -1, dx, dy, ksize, 1, 0, bordertype);
- cv::ocl::Sobel(ocl_img_gray, dev_dst_gray, -1, dx, dy, ksize, 1, 0, bordertype);
- },
- {
- dev_dst_rgba.download(dst_rgba);
- dev_dst_gray.download(dst_gray);
- });
- }
-};
-
-struct ScharrP : FilterTestP
-{
- virtual void Run()
- {
- SetUp();
- dx = 0; dy = 1;
- P_TEST_FULL(
- {
- ocl_img_rgba = cv::ocl::oclMat(img_rgba);
- ocl_img_gray = cv::ocl::oclMat(img_gray);
- },
- {
- cv::ocl::Scharr(ocl_img_rgba, dev_dst_rgba, -1, dx, dy, 1, 0, bordertype);
- cv::ocl::Scharr(ocl_img_gray, dev_dst_gray, -1, dx, dy, 1, 0, bordertype);
- },
- {
- dev_dst_rgba.download(dst_rgba);
- dev_dst_gray.download(dst_gray);
- });
- }
-};
-
-struct GaussianBlurP : FilterTestP
-{
- virtual void Run()
- {
- double sigma1 = 3, sigma2 = 3;
- SetUp();
- P_TEST_FULL(
- {
- ocl_img_rgba = cv::ocl::oclMat(img_rgba);
- ocl_img_gray = cv::ocl::oclMat(img_gray);
- },
- {
- cv::ocl::GaussianBlur(ocl_img_rgba, dev_dst_rgba, cv::Size(ksize, ksize), sigma1, sigma2);
- cv::ocl::GaussianBlur(ocl_img_gray, dev_dst_gray, cv::Size(ksize, ksize), sigma1, sigma2);
- },
- {
- dev_dst_rgba.download(dst_rgba);
- dev_dst_gray.download(dst_gray);
- });
- }
-};
-
-struct DilateP : FilterTestP
-{
- virtual void Run()
- {
- SetUp();
- P_TEST_FULL(
- {
- ocl_img_rgba = cv::ocl::oclMat(img_rgba);
- ocl_img_gray = cv::ocl::oclMat(img_gray);
- },
- {
- cv::ocl::dilate(ocl_img_rgba, dev_dst_rgba, kernel);
- cv::ocl::dilate(ocl_img_gray, dev_dst_gray, kernel);
- },
- {
- dev_dst_rgba.download(dst_rgba);
- dev_dst_gray.download(dst_gray);
- });
- }
-};
-
-struct ErodeP : FilterTestP
-{
- virtual void Run()
- {
- SetUp();
- P_TEST_FULL(
- {
- ocl_img_rgba = cv::ocl::oclMat(img_rgba);
- ocl_img_gray = cv::ocl::oclMat(img_gray);
- },
- {
- cv::ocl::erode(ocl_img_rgba, dev_dst_rgba, kernel);
- cv::ocl::erode(ocl_img_gray, dev_dst_gray, kernel);
- },
- {
- dev_dst_rgba.download(dst_rgba);
- dev_dst_gray.download(dst_gray);
- });
- }
-};
-
-struct MorphExP : FilterTestP
-{
- virtual void Run()
- {
- SetUp();
- cv::ocl::oclMat okernel;
- P_TEST_FULL(
- {
- okernel = cv::ocl::oclMat(kernel);
- ocl_img_rgba = cv::ocl::oclMat(img_rgba);
- ocl_img_gray = cv::ocl::oclMat(img_gray);
- },
- {
- cv::ocl::morphologyEx(ocl_img_rgba, dev_dst_rgba, 3, okernel);
- cv::ocl::morphologyEx(ocl_img_gray, dev_dst_gray, 3, okernel);
- },
- {
- dev_dst_rgba.download(dst_rgba);
- dev_dst_gray.download(dst_gray);
- });
- }
-};
-
-struct LaplacianP : FilterTestP
-{
- void Run()
- {
- SetUp();
- P_TEST_FULL(
- {
- ocl_img_rgba = cv::ocl::oclMat(img_rgba);
- ocl_img_gray = cv::ocl::oclMat(img_gray);
- },
- {
- cv::ocl::Laplacian(ocl_img_rgba, dev_dst_rgba, -1, 3 );
- cv::ocl::Laplacian(ocl_img_gray, dev_dst_gray, -1, 3 );
- },
- {
- dev_dst_rgba.download(dst_rgba);
- dev_dst_gray.download(dst_gray);
- });
- }
-};
-
-////////////////////
-// histograms
-struct CalcHistP : PerfTest
-{
- virtual void Run()
- {
- SetUp();
- P_TEST_FULL(
- oclmat = cv::ocl::oclMat( src ),
- cv::ocl::calcHist(oclmat, oclRes),
- oclRes.download(hist)
- );
- }
- protected:
- cv::Size size;
- cv::Mat src, hist;
-
- cv::ocl::oclMat oclmat;
- cv::ocl::oclMat oclRes;
-
- virtual void SetUp()
- {
- cv::RNG& rng = cvtest::TS::ptr()->get_rng();
- size = cv::Size(3000, 3000);
- src = cvtest::randomMat(rng, size, CV_8UC1, 0, 255, false);
- oclmat = cv::ocl::oclMat( src );
- }
-};
-
-struct EqualizeHistP : CalcHistP
-{
- virtual void Run()
- {
- SetUp();
- P_TEST_FULL(
- oclmat = cv::ocl::oclMat( src ),
- cv::ocl::equalizeHist(oclmat, oclRes),
- oclRes.download(hist)
- );
- }
-};
-
-struct ThresholdP : CalcHistP
-{
- virtual void Run()
- {
- SetUp();
- int threshOp = (int)cv::THRESH_TOZERO_INV;;
- double maxVal = 200;
- double thresh = 125;
-
- clock_t start = clock();
-
- P_TEST_FULL(
- oclmat = cv::ocl::oclMat( src ),
- cv::ocl::threshold(oclmat, oclRes, thresh, maxVal, threshOp ),
- oclRes.download(hist)
- );
- }
-};
-
-struct ResizeP : ArithmTestP
-{
- virtual void Run()
- {
- SetUp();
- P_TEST_FULL(
- oclmat1 = cv::ocl::oclMat( mat1 ),
- cv::ocl::resize(oclmat1, oclRes, cv::Size(), 2.0, 2.0),
- oclRes.download(dst)
- );
- }
-};
-
-struct CvtColorP : PerfTest
-{
- virtual void Run()
- {
- SetUp();
- P_TEST_FULL(
- oclmat = cv::ocl::oclMat( img ),
- cv::ocl::cvtColor(oclmat, ocldst, cvtcode),
- ocldst.download(dst)
- );
- }
- protected:
- int type;
- int cvtcode;
-
- cv::Mat img, dst;
- cv::ocl::oclMat oclmat, ocldst;
- virtual void SetUp()
- {
- type = CV_8U;
- cvtcode = CV_BGR2GRAY;
- cv::Mat imgBase = readImage(FILTER_TEST_IMAGE);
- ASSERT_FALSE(imgBase.empty());
-
- imgBase.convertTo(img, type, type == CV_32F ? 1.0 / 255.0 : 1.0);
- oclmat = cv::ocl::oclMat( img );
- };
-};
-
-
-struct WarpAffineP : ArithmTestP
-{
- void Run()
- {
- SetUp();
- const double aplha = CV_PI / 4;
- double mat[2][3] = { {std::cos(aplha), -std::sin(aplha), mat1.cols / 2},
- {std::sin(aplha), std::cos(aplha), 0}};
- cv::Mat M(2, 3, CV_64F, (void*) mat);
-
- P_TEST_FULL(
- oclmat1 = cv::ocl::oclMat( mat1 ),
- cv::ocl::warpAffine( oclmat1, oclRes, M, cv::Size(1500, 1500) ),
- oclRes.download(dst)
- );
- }
-};
-
-struct WarpPerspectiveP : ArithmTestP
-{
- void Run()
- {
- SetUp();
- const double aplha = CV_PI / 4;
- double mat[3][3] = { {std::cos(aplha), -std::sin(aplha), mat1.cols / 2},
- {std::sin(aplha), std::cos(aplha), 0},
- {0.0, 0.0, 1.0}};
- cv::Mat M(3, 3, CV_64F, (void*) mat);
-
- P_TEST_FULL(
- oclmat1 = cv::ocl::oclMat( mat1 ),
- cv::ocl::warpPerspective( oclmat1, oclRes, M, cv::Size(1500, 1500) ),
- oclRes.download(dst)
- );
- }
-};
-
-
-struct CornerHarrisP : FilterTestP
-{
- void Run()
- {
- SetUp();
- bordertype = 2;
- P_TEST_FULL(
- {
- ocl_img_gray = cv::ocl::oclMat(img_gray);
- },
- {
- cv::ocl::cornerHarris(ocl_img_gray, dev_dst_gray, 3, ksize, 0.5, bordertype );
- },
- {
- dev_dst_gray.download(dst_gray);
- });
- }
-};
-
-void test()
-{
- clock_t start = clock();
- std::cout << ">>>>>>>> Performance test started <<<<<<<<\n";
- /*
- {
- AddArrayP AddArrayP;
- AddArrayP.Run();
- SubtractArrayP subarray;
- subarray.Run();
- MultiplyArrayP MultiplyArrayP;
- MultiplyArrayP.Run();
- DivideArrayP DivideArrayP;
- DivideArrayP.Run();
- }
- std::cout.flush();
- {
- CompareP comp;
- comp.Run();
- MagnitudeP magnitude;
- magnitude.Run();
- LUTP lut;
- lut.Run();
- FlipP FlipP;
- FlipP.Run();
- MinMaxP minmax;
- minmax.Run();
- MinMaxLocP minmaxloc;
- minmaxloc.Run();
- CountNonZeroP cnz;
- cnz.Run();
- SumP sum;
- sum.Run();
- }*/
- /* std::cout.flush();
- {
- BitwiseNotP bn;
- bn.Run();
- BitwiseOrP bo;
- bo.Run();
- BitwiseAndP ba;
- ba.Run();
- BitwiseXorP bx;
- bx.Run();
- }*/
-
- std::cout.flush();
- {
- // TransposeP transpose;
- // transpose.Run();
- // AbsdiffArrayP absdiff;
- // absdiff.Run();
- // SplitP split;
- // split.Run();
- // MergeP merge;
- // merge.Run();
- /*
- SetToP setto;
- setto.Run();
- CopyToP copyto;
- copyto.Run();
- ConvertToP convertto;
- convertto.Run();
- */
- }
- /*
- std::cout.flush();
- {
- BlurP blur;
- blur.Run();
- SobelP sobel;
- sobel.Run();
- ScharrP scharr;
- scharr.Run();
- GaussianBlurP gblur;
- gblur.Run();
- DilateP dilate;
- dilate.Run();
- ErodeP erode;
- erode.Run();
- }
- std::cout.flush();
- {
- MorphExP morphex;
- morphex.Run();
- CalcHistP calchist;
- calchist.Run();
- EqualizeHistP eqhist;
- eqhist.Run();
- ThresholdP threshold;
- threshold.Run();
- ResizeP resize;
- resize.Run();
- CvtColorP cvtcolor;
- cvtcolor.Run();
- }
-
- {
- LogP log;
- log.Run();
- ExpP exp;
- exp.Run();
- }
-
- std::cout.flush();
- {
- //PhaseP phase;
- //phase.Run();
- }
- std::cout.flush();
- {
- CartToPolarP ctop;
- ctop.Run();
- }
- std::cout.flush();
- {
- PolarToCartP ptoc;
- ptoc.Run();
- }
- {
- WarpAffineP warpA;
- warpA.Run();
- WarpPerspectiveP warpP;
- warpP.Run();
- }
-
- {
- CornerHarrisP ch;
- ch.Run();
- }
-
- {
- LaplacianP laplacian;
- laplacian.Run();
- }
-
-
- */
- std::cout << ">>>>>>>> Performance test ended <<<<<<<<\ntotal - " << clock() - start << "ms\n";
- std::cout.flush();
-}
-
-void run_perf_test()
-{
- print_info();
- cvtest::TS::ptr()->init("ocl");
- test();
-}
-
-#endif // WITH_OPENCL
-
-#endif // PREF_TEST_OCL
cv::Mat mask_roi;
cv::Mat dst_roi;
cv::Mat dst1_roi; //bak
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
//ocl dst mat for testing
cv::ocl::oclMat gdst_whole;
cv::ocl::oclMat gdst1_whole; //bak
cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
- int devnums = getDevice(oclinfo);
- CV_Assert(devnums>0);
- //if you want to use undefault device, set it here
- //setDevice(oclinfo[0]);
- setBinpath(CLBINPATH);
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums>0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
+ //setBinpath(CLBINPATH);
}
void Has_roi(int b)
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
// src2x = rng.uniform( 0,mat2.cols - 256);
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
cv::RNG& rng = TS::ptr()->get_rng();
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
cv::RNG& rng = TS::ptr()->get_rng();
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gmat1 = mat1_roi;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gmat1 = mat1_roi;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gmat1 = mat1_roi;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gmat1 = mat1_roi;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gmat1 = mat1_roi;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gmat1 = mat1_roi;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
cv::Mat mask_roi;
cv::Mat dst_roi;
cv::Mat dst1_roi; //bak
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
//ocl dst mat for testing
cv::ocl::oclMat gdst_whole;
cv::ocl::oclMat gdst1_whole; //bak
cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
- int devnums = getDevice(oclinfo);
- CV_Assert(devnums>0);
- //if you want to use undefault device, set it here
- //setDevice(oclinfo[0]);
- setBinpath(CLBINPATH);
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums>0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
+ //setBinpath(CLBINPATH);
}
void Has_roi(int b)
}
int cmp_codes[] = {CMP_EQ, CMP_GT, CMP_GE, CMP_LT, CMP_LE, CMP_NE};
- //const char* cmp_str[] = {"CMP_EQ", "CMP_GT", "CMP_GE", "CMP_LT", "CMP_LE", "CMP_NE"};
+ const char* cmp_str[] = {"CMP_EQ", "CMP_GT", "CMP_GE", "CMP_LT", "CMP_LE", "CMP_NE"};
int cmp_num = sizeof(cmp_codes) / sizeof(int);
for (int i = 0; i < cmp_num; ++i)
{
double t0=0;
double t1=0;
double t2=0;
- for(int k=1;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
totalgputick_kernel=t2+totalgputick_kernel;
}
+ cout<<cmp_str[i] <<endl;
if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
double p=4.5;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
cv::ocl::oclMat clmat1(mat1),clmat2(mat2),cldst;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
double alpha=2.0,beta=1.0,gama=3.0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
#else
- //for(int j = 0; j < 2; j ++)
+ //for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
// {
double alpha=2.0,beta=1.0,gama=3.0;
cv::ocl::oclMat clmat1(mat1),clmat2(mat2),cldst;
cv::Mat mask_roi;
cv::Mat dst_roi;
cv::Mat dst1_roi; //bak
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
//ocl dst mat for testing
cv::ocl::oclMat gdst_whole;
cv::ocl::oclMat gdst1_whole; //bak
//src mat with roi
cv::Mat mat1_roi;
cv::Mat dst_roi;
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
//ocl dst mat for testing
cv::ocl::oclMat gdst_whole;
mat1 = randomMat(rng, size, type, 5, 16, false);
dst = randomMat(rng, size, type, 5, 16, false);
- int devnums = getDevice(oclinfo);
- CV_Assert(devnums > 0);
- //if you want to use undefault device, set it here
- //setDevice(oclinfo[0]);
- cv::ocl::setBinpath(CLBINPATH);
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
+ //cv::ocl::setBinpath(CLBINPATH);
}
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
mat = randomMat(rng, size, type, 5, 16, false);
dst = randomMat(rng, size, type, 5, 16, false);
- int devnums = getDevice(oclinfo);
- CV_Assert(devnums > 0);
- //if you want to use undefault device, set it here
- //setDevice(oclinfo[0]);
- cv::ocl::setBinpath(CLBINPATH);
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
+ //cv::ocl::setBinpath(CLBINPATH);
}
void Has_roi(int b)
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
dst = randomMat(rng, size, type, 5, 16, false);
// rng.fill(kernel, cv::RNG::UNIFORM, cv::Scalar::all(0), cv::Scalar::all(3));
kernel = randomMat(rng, Size(3,3), CV_8UC1, 0, 3, false);
- int devnums = getDevice(oclinfo);
- CV_Assert(devnums > 0);
- //if you want to use undefault device, set it here
- //setDevice(oclinfo[0]);
- cv::ocl::setBinpath(CLBINPATH);
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
+ //cv::ocl::setBinpath(CLBINPATH);
}
void Has_roi(int b)
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
//src mat with roi
cv::Mat mat1_roi;
cv::Mat dst_roi;
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
//ocl dst mat for testing
cv::ocl::oclMat gdst_whole;
mat1 = randomMat(rng, size, type, 5, 16, false);
dst = randomMat(rng, size, type, 5, 16, false);
- int devnums = getDevice(oclinfo);
- CV_Assert(devnums > 0);
- //if you want to use undefault device, set it here
- //setDevice(oclinfo[0]);
- cv::ocl::setBinpath(CLBINPATH);
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
+ //cv::ocl::setBinpath(CLBINPATH);
}
void Has_roi(int b)
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
//src mat with roi
cv::Mat mat1_roi;
cv::Mat dst_roi;
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
//ocl dst mat for testing
cv::ocl::oclMat gdst_whole;
mat1 = randomMat(rng, size, type, 5, 16, false);
dst = randomMat(rng, size, type, 5, 16, false);
- int devnums = getDevice(oclinfo);
- CV_Assert(devnums > 0);
- //if you want to use undefault device, set it here
- //setDevice(oclinfo[0]);
- cv::ocl::setBinpath(CLBINPATH);
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
+ //cv::ocl::setBinpath(CLBINPATH);
}
void Has_roi(int b)
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
//src mat with roi
cv::Mat mat1_roi;
cv::Mat dst_roi;
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
//ocl dst mat for testing
cv::ocl::oclMat gdst_whole;
mat1 = randomMat(rng, size, type, 5, 16, false);
dst = randomMat(rng, size, type, 5, 16, false);
- int devnums = getDevice(oclinfo);
- CV_Assert(devnums > 0);
- //if you want to use undefault device, set it here
- //setDevice(oclinfo[0]);
- cv::ocl::setBinpath(CLBINPATH);
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
+ //cv::ocl::setBinpath(CLBINPATH);
}
void Has_roi(int b)
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
INSTANTIATE_TEST_CASE_P(Filter, Dilate, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(false)));
-INSTANTIATE_TEST_CASE_P(Filter, Sobel, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
- Values(1, 2), Values(0, 1), Values(3, 5, 7), Values((MatType)cv::BORDER_CONSTANT,
- (MatType)cv::BORDER_REPLICATE, (MatType)cv::BORDER_REFLECT, (MatType)cv::BORDER_REFLECT_101)));
+INSTANTIATE_TEST_CASE_P(Filter, Sobel, Combine(Values(CV_8UC1, CV_32FC1),
+ Values(1, 2), Values(0, 1), Values(3, 5), Values((MatType)cv::BORDER_CONSTANT,
+ (MatType)cv::BORDER_REPLICATE)));
INSTANTIATE_TEST_CASE_P(Filter, Scharr, Combine(
- Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(0, 1), Values(0, 1),
- Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE, (MatType)cv::BORDER_REFLECT, (MatType)cv::BORDER_REFLECT_101)));
+ Values(CV_8UC1, CV_32FC1), Values(0, 1), Values(0, 1),
+ Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE)));
INSTANTIATE_TEST_CASE_P(Filter, GaussianBlur, Combine(
- Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
- Values(cv::Size(3, 3), cv::Size(5, 5), cv::Size(7, 7)),
- Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE, (MatType)cv::BORDER_REFLECT, (MatType)cv::BORDER_REFLECT_101)));
+ Values(CV_8UC1, CV_32FC1),
+ Values(cv::Size(3, 3), cv::Size(5, 5)),
+ Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE)));
#endif // HAVE_OPENCL
PARAM_TEST_CASE(HaarTestBase, int, int)
{
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
cv::ocl::OclCascadeClassifier cascade, nestedCascade;
cv::CascadeClassifier cpucascade, cpunestedCascade;
// Mat img;
return;
}
- int devnums = getDevice(oclinfo);
- CV_Assert(devnums>0);
- //if you want to use undefault device, set it here
- //setDevice(oclinfo[0]);
- cv::ocl::setBinpath("E:\\");
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums>0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
+ //cv::ocl::setBinpath("E:\\");
}
};
cv::Mat mask_roi;
cv::Mat dst_roi;
cv::Mat dst1_roi; //bak
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
//ocl mat
cv::ocl::oclMat clmat1;
cv::ocl::oclMat clmat2;
cv::RNG& rng = TS::ptr()->get_rng();
cv::Size size(MWIDTH, MHEIGHT);
double min = 1,max = 20;
- int devnums = getDevice(oclinfo);
- CV_Assert(devnums>0);
- //if you want to use undefault device, set it here
- //setDevice(oclinfo[0]);
- cv::ocl::setBinpath(CLBINPATH);
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums>0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
+ //cv::ocl::setBinpath(CLBINPATH);
if(type1!=nulltype)
{
mat1 = randomMat(rng, size, type1, min, max, false);
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
if(type1!=nulltype)
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
if(type1!=nulltype)
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
if(type1!=nulltype)
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
int blockSize = 7, apertureSize= 1 + 2 * (rand() % 4);
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
double kk = 2;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
if(type1!=nulltype)
//src mat with roi
cv::Mat mat1_roi;
cv::Mat dst_roi;
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
//ocl dst mat for testing
cv::ocl::oclMat gdst_whole;
mat1 = randomMat(rng, size, type, 5, 16, false);
dst = randomMat(rng, size, type, 5, 16, false);
- int devnums = getDevice(oclinfo);
- CV_Assert(devnums > 0);
- //if you want to use undefault device, set it here
- //setDevice(oclinfo[0]);
- cv::ocl::setBinpath(CLBINPATH);
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
+ //cv::ocl::setBinpath(CLBINPATH);
}
void Has_roi(int b)
{
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
//src mat with roi
cv::Mat mat1_roi;
cv::Mat dst_roi;
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
//ocl dst mat for testing
cv::ocl::oclMat gdst_whole;
mat1 = randomMat(rng, size, type, 5, 16, false);
dst = randomMat(rng, dsize, type, 5, 16, false);
- int devnums = getDevice(oclinfo);
- CV_Assert(devnums > 0);
- //if you want to use undefault device, set it here
- //setDevice(oclinfo[0]);
- cv::ocl::setBinpath(CLBINPATH);
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
+ //cv::ocl::setBinpath(CLBINPATH);
}
void Has_roi(int b)
{
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
//src mat with roi
cv::Mat mat1_roi;
cv::Mat dst_roi;
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
//ocl dst mat for testing
cv::ocl::oclMat gdst_whole;
mat1 = randomMat(rng, size, type, 5, 16, false);
dst = randomMat(rng, size, type, 5, 16, false);
- int devnums = getDevice(oclinfo);
- CV_Assert(devnums > 0);
- //if you want to use undefault device, set it here
- //setDevice(oclinfo[0]);
- cv::ocl::setBinpath(CLBINPATH);
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
+ //cv::ocl::setBinpath(CLBINPATH);
}
void Has_roi(int b)
{
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
double maxVal = randomDouble(20.0, 127.0);
cv::ocl::oclMat gdst;
cv::ocl::oclMat gdstCoor;
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
//ocl mat with roi
cv::ocl::oclMat gsrc_roi;
cv::ocl::oclMat gdst_roi;
dst = randomMat(rng, size, type, 5, 16, false);
dstCoor = randomMat(rng, size, typeCoor, 5, 16, false);
- int devnums = getDevice(oclinfo);
- CV_Assert(devnums > 0);
- //if you want to use undefault device, set it here
- //setDevice(oclinfo[0]);
- cv::ocl::setBinpath(CLBINPATH);
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
+ //cv::ocl::setBinpath(CLBINPATH);
}
void Has_roi(int b)
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
// NULL_TYPE,
// Values(false))); // Values(false) is the reserved parameter
-//INSTANTIATE_TEST_CASE_P(ImgprocTestBase, cornerMinEigenVal, Combine(
-// Values(CV_8UC1,CV_32FC1),
-// NULL_TYPE,
-// ONE_TYPE(CV_32FC1),
-// NULL_TYPE,
-// NULL_TYPE,
-// Values(false))); // Values(false) is the reserved parameter
-//
-//INSTANTIATE_TEST_CASE_P(ImgprocTestBase, cornerHarris, Combine(
-// Values(CV_8UC1,CV_32FC1),
-// NULL_TYPE,
-// ONE_TYPE(CV_32FC1),
-// NULL_TYPE,
-// NULL_TYPE,
-// Values(false))); // Values(false) is the reserved parameter
+INSTANTIATE_TEST_CASE_P(ImgprocTestBase, cornerMinEigenVal, Combine(
+ Values(CV_8UC1,CV_32FC1),
+ NULL_TYPE,
+ ONE_TYPE(CV_32FC1),
+ NULL_TYPE,
+ NULL_TYPE,
+ Values(false))); // Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(ImgprocTestBase, cornerHarris, Combine(
+ Values(CV_8UC1,CV_32FC1),
+ NULL_TYPE,
+ ONE_TYPE(CV_32FC1),
+ NULL_TYPE,
+ NULL_TYPE,
+ Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(ImgprocTestBase, integral, Combine(
//src mat with roi
cv::Mat mat_roi;
cv::Mat dst_roi;
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
//ocl dst mat for testing
cv::ocl::oclMat gdst_whole;
mat = randomMat(rng, size, type, 5, 16, false);
dst = randomMat(rng, size, type, 5, 16, false);
- int devnums = getDevice(oclinfo);
- CV_Assert(devnums > 0);
- //if you want to use undefault device, set it here
- //setDevice(oclinfo[0]);
- setBinpath(CLBINPATH);
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
+ //setBinpath(CLBINPATH);
}
void Has_roi(int b)
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
cv::Mat mat_roi;
cv::Mat mask_roi;
cv::Mat dst_roi;
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
//ocl dst mat for testing
cv::ocl::oclMat gdst_whole;
mask = randomMat(rng, size, CV_8UC1, 0, 2, false);
cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
- int devnums = getDevice(oclinfo);
- CV_Assert(devnums > 0);
- //if you want to use undefault device, set it here
- //setDevice(oclinfo[0]);
- setBinpath(CLBINPATH);
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
+ //setBinpath(CLBINPATH);
}
void Has_roi(int b)
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gdst_whole = dst;
//src mat with roi
cv::Mat mat_roi;
cv::Mat mask_roi;
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
//ocl dst mat for testing
cv::ocl::oclMat gmat_whole;
cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
- int devnums = getDevice(oclinfo);
- CV_Assert(devnums > 0);
- //if you want to use undefault device, set it here
- //setDevice(oclinfo[0]);
- setBinpath(CLBINPATH);
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
+ //setBinpath(CLBINPATH);
}
void Has_roi(int b)
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gmat_whole = mat;
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gmat_whole = mat;
//dst mat with roi
cv::Mat dst_roi;
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
//ocl dst mat for testing
cv::ocl::oclMat gdst_whole;
mat3 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
mat4 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
dst = randomMat(rng, size, CV_MAKETYPE(type, channels), 5, 16, false);
- int devnums = getDevice(oclinfo);
- CV_Assert(devnums > 0);
- //if you want to use undefault device, set it here
- //setDevice(oclinfo[0]);
- setBinpath(CLBINPATH);
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
+ //setBinpath(CLBINPATH);
}
void Has_roi(int b)
{
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
gmat1 = mat1_roi;
cv::Mat dst2_roi;
cv::Mat dst3_roi;
cv::Mat dst4_roi;
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
//ocl dst mat for testing
cv::ocl::oclMat gdst1_whole;
cv::ocl::oclMat gdst2_whole;
dst2 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
dst3 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
dst4 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
- int devnums = getDevice(oclinfo);
- CV_Assert(devnums > 0);
- //if you want to use undefault device, set it here
- //setDevice(oclinfo[0]);
- setBinpath(CLBINPATH);
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
+ //setBinpath(CLBINPATH);
}
void Has_roi(int b)
double t0=0;
double t1=0;
double t2=0;
- for(int k=0;k<2;k++){
+ for(int k=LOOPROISTART;k<LOOPROIEND;k++){
totalcputick=0;
totalgputick=0;
totalgputick_kernel=0;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
cv::Mat dev_dst[4] = {dst1_roi, dst2_roi, dst3_roi, dst4_roi};
#else
#define LOOP_TIMES 1
#endif
-#define MWIDTH 2557
-#define MHEIGHT 2579
+#define MWIDTH 256
+#define MHEIGHT 256
#define CLBINPATH ".\\"
+#define LOOPROISTART 0
+#define LOOPROIEND 1
int randomInt(int minVal, int maxVal);
double randomDouble(double minVal, double maxVal);
{
if(src1.clCxt -> impl -> double_support ==0 && src1.type() == CV_64F)
{
- CV_Error(-217,"Selected device don't support double\r\n");
+ CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n");
return;
}
{
if(src1.clCxt -> impl -> double_support ==0 && src1.type() == CV_64F)
{
- CV_Error(-217,"Selected device don't support double\r\n");
+ CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n");
return;
}
{
if(src1.clCxt -> impl -> double_support ==0 && src1.type() == CV_64F)
{
- CV_Error(-217,"Selected device don't support double\r\n");
+ CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n");
return;
}
{
if(src.clCxt -> impl -> double_support ==0 && src.type() == CV_64F)
{
- CV_Error(-217,"Selected device don't support double\r\n");
+ CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n");
return;
}
{
if(src.clCxt -> impl -> double_support ==0)
{
- CV_Error(-217,"Selected device don't support double\r\n");
+ CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n");
return;
}
{
if(src.clCxt->impl->double_support==0 && src.depth()==CV_64F)
{
- CV_Error(-217,"select device don't support double");
+ CV_Error(CV_GpuNotSupported,"select device don't support double");
}
static sumFunc functab[2] =
{
CV_Assert(src.channels() == 1);
if(src.clCxt->impl->double_support==0 && src.depth()==CV_64F)
{
- CV_Error(-217,"select device don't support double");
+ CV_Error(CV_GpuNotSupported,"select device don't support double");
}
static minMaxFunc functab[8] =
{
{
if(src.clCxt -> impl -> double_support ==0 && src.type() == CV_64F)
{
- CV_Error(-217,"Selected device don't support double\r\n");
+ CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n");
return;
}
{
if(src.clCxt -> impl -> double_support ==0 && src.type() == CV_64F)
{
- CV_Error(-217,"Selected device don't support double\r\n");
+ CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n");
return;
}
Context *clCxt = src.clCxt;
if(clCxt -> impl -> double_support ==0 && src.type() == CV_64F)
{
- CV_Error(-217,"Selected device don't support double\r\n");
+ CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n");
return;
}
//int channels = dst.channels();
{
if(src1.clCxt -> impl -> double_support ==0 && src1.type() == CV_64F)
{
- CV_Error(-217,"Selected device don't support double\r\n");
+ CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n");
return;
}
{
if(src1.clCxt -> impl -> double_support ==0 && src1.type() == CV_64F)
{
- CV_Error(-217,"Selected device don't support double\r\n");
+ CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n");
return;
}
{
if(src1.clCxt -> impl -> double_support ==0 && src1.type() == CV_64F)
{
- CV_Error(-217,"Selected device don't support double\r\n");
+ CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n");
return;
}
{
if(src1.clCxt -> impl -> double_support ==0 && src1.type() == CV_64F)
{
- CV_Error(-217,"Selected device don't support double\r\n");
+ CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n");
return;
}
{
if(src.clCxt->impl->double_support==0 && src.depth()==CV_64F)
{
- CV_Error(-217,"select device don't support double");
+ CV_Error(CV_GpuNotSupported,"select device don't support double");
}
static minMaxLocFunc functab[2] =
{
size_t groupnum = src.clCxt->impl->maxComputeUnits;
if(src.clCxt->impl->double_support == 0 && src.depth()==CV_64F)
{
- CV_Error(-217,"select device don't support double");
+ CV_Error(CV_GpuNotSupported,"select device don't support double");
}
CV_Assert(groupnum != 0);
groupnum = groupnum * 2;
{
if(src.clCxt -> impl -> double_support ==0 && src.type() == CV_64F)
{
- CV_Error(-217,"Selected device don't support double\r\n");
+ CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n");
return;
}
sprintf(s, "-D VAL=FLT_MAX -D GENTYPE=float4");
break;
default:
- CV_Error(-217,"unsupported type");
+ CV_Error(CV_StsUnsupportedFormat,"unsupported type");
}
char compile_option[128];
sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D ERODE %s", anchor.x, anchor.y, localThreads[0], localThreads[1],s);
sprintf(s, "-D VAL=-FLT_MAX -D GENTYPE=float4");
break;
default:
- CV_Error(-217,"unsupported type");
+ CV_Error(CV_StsUnsupportedFormat,"unsupported type");
}
char compile_option[128];
sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D DILATE %s", anchor.x, anchor.y, localThreads[0], localThreads[1],s);
{
if(src.clCxt -> impl -> double_support ==0 && src.type() == CV_64F)
{
- CV_Error(-217,"Selected device don't support double\r\n");
+ CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n");
return;
}
cl_mem nodebuffer;
cl_mem candidatebuffer;
cl_mem scaleinfobuffer;
- cl_kernel kernel;
- kernel = openCLGetKernelFromSource(gimg.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade");
+ //cl_kernel kernel;
+ //kernel = openCLGetKernelFromSource(gimg.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade");
cv::Rect roi, roi2;
cv::Mat imgroi, imgroisq;
cv::ocl::oclMat resizeroi, gimgroi, gimgroisq;
//openCLVerifyCall(status);
//openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->clCmdQueue,cascadebuffer,1,0,sizeof(GpuHidHaarClassifierCascade),gcascade,0,NULL,NULL));
- stagebuffer = clCreateBuffer(gsum.clCxt->impl->clContext, CL_MEM_READ_ONLY, sizeof(GpuHidHaarStageClassifier) * gcascade->count, NULL, &status);
- openCLVerifyCall(status);
+ stagebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(GpuHidHaarStageClassifier) * gcascade->count);
+ //openCLVerifyCall(status);
openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL));
//classifierbuffer = clCreateBuffer(gsum.clCxt->clContext,CL_MEM_READ_ONLY,sizeof(GpuHidHaarClassifier)*totalclassifier,NULL,&status);
//status = clEnqueueWriteBuffer(gsum.clCxt->clCmdQueue,classifierbuffer,1,0,sizeof(GpuHidHaarClassifier)*totalclassifier,classifier,0,NULL,NULL);
- nodebuffer = clCreateBuffer(gsum.clCxt->impl->clContext, CL_MEM_READ_ONLY,
- nodenum * sizeof(GpuHidHaarTreeNode), NULL, &status);
- openCLVerifyCall(status);
+ nodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY,nodenum * sizeof(GpuHidHaarTreeNode));
+ //openCLVerifyCall(status);
openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, nodebuffer, 1, 0,
nodenum * sizeof(GpuHidHaarTreeNode),
node, 0, NULL, NULL));
- candidatebuffer = clCreateBuffer(gsum.clCxt->impl->clContext, CL_MEM_WRITE_ONLY, 4 * sizeof(int) * outputsz, NULL, &status);
- openCLVerifyCall(status);
- scaleinfobuffer = clCreateBuffer(gsum.clCxt->impl->clContext, CL_MEM_READ_ONLY, sizeof(detect_piramid_info) * loopcount, NULL, &status);
- openCLVerifyCall(status);
+ candidatebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_WRITE_ONLY, 4 * sizeof(int) * outputsz);
+ //openCLVerifyCall(status);
+ scaleinfobuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(detect_piramid_info) * loopcount);
+ //openCLVerifyCall(status);
openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, scaleinfobuffer, 1, 0, sizeof(detect_piramid_info)*loopcount, scaleinfo, 0, NULL, NULL));
//flag = 1;
//}
int argcount = 0;
//int grpnumperline = ((m + localThreads[0] - 1) / localThreads[0]);
//int totalgrp = ((n + localThreads[1] - 1) / localThreads[1])*grpnumperline;
- openCLVerifyKernel(gsum.clCxt, kernel, &blocksize, globalThreads, localThreads);
+ // openCLVerifyKernel(gsum.clCxt, kernel, &blocksize, globalThreads, localThreads);
//openCLSafeCall(clSetKernelArg(kernel,argcount++,sizeof(cl_mem),(void*)&cascadebuffer));
+
+ vector<pair<size_t,const void *> > args;
+ args.push_back ( make_pair(sizeof(cl_mem) , (void *)&stagebuffer ));
+ args.push_back ( make_pair(sizeof(cl_mem) , (void *)&scaleinfobuffer ));
+ args.push_back ( make_pair(sizeof(cl_mem) , (void *)&nodebuffer ));
+ args.push_back ( make_pair(sizeof(cl_mem) , (void *)&gsum.data ));
+ args.push_back ( make_pair(sizeof(cl_mem) , (void *)&gsqsum.data ));
+ args.push_back ( make_pair(sizeof(cl_mem) , (void *)&candidatebuffer ));
+ args.push_back ( make_pair(sizeof(cl_int) , (void *)&pixelstep ));
+ args.push_back ( make_pair(sizeof(cl_int) , (void *)&loopcount ));
+ args.push_back ( make_pair(sizeof(cl_int) , (void *)&startstage ));
+ args.push_back ( make_pair(sizeof(cl_int) , (void *)&splitstage ));
+ args.push_back ( make_pair(sizeof(cl_int) , (void *)&endstage ));
+ args.push_back ( make_pair(sizeof(cl_int) , (void *)&startnode ));
+ args.push_back ( make_pair(sizeof(cl_int) , (void *)&splitnode ));
+ args.push_back ( make_pair(sizeof(cl_int4) , (void *)&p ));
+ args.push_back ( make_pair(sizeof(cl_int4) , (void *)&pq ));
+ args.push_back ( make_pair(sizeof(cl_float) , (void *)&correction ));
+ /*
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&stagebuffer));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&scaleinfobuffer));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&nodebuffer));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&splitnode));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int4), (void *)&p));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int4), (void *)&pq));
- openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_float), (void *)&correction));
+ openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_float), (void *)&correction));*/
//openCLSafeCall(clSetKernelArg(kernel,argcount++,sizeof(cl_int),(void*)&n));
//openCLSafeCall(clSetKernelArg(kernel,argcount++,sizeof(cl_int),(void*)&grpnumperline));
//openCLSafeCall(clSetKernelArg(kernel,argcount++,sizeof(cl_int),(void*)&totalgrp));
- openCLSafeCall(clEnqueueNDRangeKernel(gsum.clCxt->impl->clCmdQueue, kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL));
+ // openCLSafeCall(clEnqueueNDRangeKernel(gsum.clCxt->impl->clCmdQueue, kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL));
- openCLSafeCall(clFinish(gsum.clCxt->impl->clCmdQueue));
- //t = (double)cvGetTickCount() - t;
+ // openCLSafeCall(clFinish(gsum.clCxt->impl->clCmdQueue));
+ openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade", globalThreads, localThreads, args, -1, -1);
+ //t = (double)cvGetTickCount() - t;
//printf( "detection time = %g ms\n", t/((double)cvGetTickFrequency()*1000.) );
//t = (double)cvGetTickCount();
- openCLSafeCall(clEnqueueReadBuffer(gsum.clCxt->impl->clCmdQueue, candidatebuffer, 1, 0, 4 * sizeof(int)*outputsz, candidate, 0, NULL, NULL));
-
+ //openCLSafeCall(clEnqueueReadBuffer(gsum.clCxt->impl->clCmdQueue, candidatebuffer, 1, 0, 4 * sizeof(int)*outputsz, candidate, 0, NULL, NULL));
+ openCLReadBuffer( gsum.clCxt, candidatebuffer, candidate, 4 * sizeof(int)*outputsz );
for(int i = 0; i < outputsz; i++)
if(candidate[4*i+2] != 0)
openCLSafeCall(clReleaseMemObject(scaleinfobuffer));
openCLSafeCall(clReleaseMemObject(nodebuffer));
openCLSafeCall(clReleaseMemObject(candidatebuffer));
- openCLSafeCall(clReleaseKernel(kernel));
+ // openCLSafeCall(clReleaseKernel(kernel));
//t = (double)cvGetTickCount() - t;
//printf( "release time = %g ms\n", t/((double)cvGetTickFrequency()*1000.) );
}
int outputsz = 256 * globalThreads[0] / localThreads[0];
int nodenum = (datasize - sizeof(GpuHidHaarClassifierCascade) -
sizeof(GpuHidHaarStageClassifier) * gcascade->count - sizeof(GpuHidHaarClassifier) * totalclassifier) / sizeof(GpuHidHaarTreeNode);
- nodebuffer = clCreateBuffer(gsum.clCxt->impl->clContext, CL_MEM_READ_ONLY,
- nodenum * sizeof(GpuHidHaarTreeNode), NULL, &status);
- openCLVerifyCall(status);
+ nodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY,
+ nodenum * sizeof(GpuHidHaarTreeNode));
+ //openCLVerifyCall(status);
openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, nodebuffer, 1, 0,
nodenum * sizeof(GpuHidHaarTreeNode),
node, 0, NULL, NULL));
- cl_mem newnodebuffer = clCreateBuffer(gsum.clCxt->impl->clContext, CL_MEM_READ_WRITE,
- loopcount * nodenum * sizeof(GpuHidHaarTreeNode), NULL, &status);
+ cl_mem newnodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_WRITE,
+ loopcount * nodenum * sizeof(GpuHidHaarTreeNode));
int startstage = 0;
int endstage = gcascade->count;
- cl_kernel kernel;
- kernel = openCLGetKernelFromSource(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2");
- cl_kernel kernel2 = openCLGetKernelFromSource(gimg.clCxt, &haarobjectdetect_scaled2, "gpuscaleclassifier");
+ //cl_kernel kernel;
+ //kernel = openCLGetKernelFromSource(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2");
+ //cl_kernel kernel2 = openCLGetKernelFromSource(gimg.clCxt, &haarobjectdetect_scaled2, "gpuscaleclassifier");
for(int i = 0; i < loopcount; i++)
{
sz = sizev[i];
int startnodenum = nodenum * i;
int argcounts = 0;
float factor2 = (float)factor;
+ /*
openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_mem), (void *)&nodebuffer));
openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_mem), (void *)&newnodebuffer));
openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_float), (void *)&factor2));
openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_float), (void *)&correction[i]));
openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_int), (void *)&startnodenum));
- size_t globalThreads2[1] = {nodenum};
- clEnqueueNDRangeKernel(gsum.clCxt->impl->clCmdQueue, kernel2, 1, NULL, globalThreads2, 0, 0, NULL, NULL);
- clFinish(gsum.clCxt->impl->clCmdQueue);
+ */
+
+ vector<pair<size_t,const void *> > args1;
+ args1.push_back ( make_pair(sizeof(cl_mem) , (void *)&nodebuffer ));
+ args1.push_back ( make_pair(sizeof(cl_mem) , (void *)&newnodebuffer ));
+ args1.push_back ( make_pair(sizeof(cl_float) , (void *)&factor2 ));
+ args1.push_back ( make_pair(sizeof(cl_float) , (void *)&correction[i] ));
+ args1.push_back ( make_pair(sizeof(cl_int) , (void *)&startnodenum ));
+
+ size_t globalThreads2[3] = {nodenum,1,1};
+ size_t localThreads2[3] = {256,1,1};
+
+ openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuscaleclassifier", globalThreads2, NULL/*localThreads2*/, args1, -1, -1);
+
+ //clEnqueueNDRangeKernel(gsum.clCxt->impl->clCmdQueue, kernel2, 1, NULL, globalThreads2, 0, 0, NULL, NULL);
+ //clFinish(gsum.clCxt->impl->clCmdQueue);
}
- clReleaseKernel(kernel2);
+ //clReleaseKernel(kernel2);
int step = gsum.step / 4;
int startnode = 0;
int splitstage = 3;
int splitnode = stage[0].count + stage[1].count + stage[2].count;
- stagebuffer = clCreateBuffer(gsum.clCxt->impl->clContext, CL_MEM_READ_ONLY, sizeof(GpuHidHaarStageClassifier) * gcascade->count, NULL, &status);
- openCLVerifyCall(status);
+ stagebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(GpuHidHaarStageClassifier) * gcascade->count);
+ //openCLVerifyCall(status);
openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL));
- candidatebuffer = clCreateBuffer(gsum.clCxt->impl->clContext, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, 4 * sizeof(int) * outputsz, NULL, &status);
- openCLVerifyCall(status);
- scaleinfobuffer = clCreateBuffer(gsum.clCxt->impl->clContext, CL_MEM_READ_ONLY, sizeof(detect_piramid_info) * loopcount, NULL, &status);
- openCLVerifyCall(status);
+ candidatebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, 4 * sizeof(int) * outputsz);
+ //openCLVerifyCall(status);
+ scaleinfobuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(detect_piramid_info) * loopcount);
+ //openCLVerifyCall(status);
openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, scaleinfobuffer, 1, 0, sizeof(detect_piramid_info)*loopcount, scaleinfo, 0, NULL, NULL));
- pbuffer = clCreateBuffer(gsum.clCxt->impl->clContext, CL_MEM_READ_ONLY, sizeof(cl_int4) * loopcount, NULL, &status);
+ pbuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(cl_int4) * loopcount);
openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, pbuffer, 1, 0, sizeof(cl_int4)*loopcount, p, 0, NULL, NULL));
- correctionbuffer = clCreateBuffer(gsum.clCxt->impl->clContext, CL_MEM_READ_ONLY, sizeof(cl_float) * loopcount, NULL, &status);
+ correctionbuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(cl_float) * loopcount);
openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, correctionbuffer, 1, 0, sizeof(cl_float)*loopcount, correction, 0, NULL, NULL));
- int argcount = 0;
- openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&stagebuffer));
+ //int argcount = 0;
+ /*openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&stagebuffer));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&scaleinfobuffer));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&newnodebuffer));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&gsum.data));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&splitnode));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&pbuffer));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&correctionbuffer));
- openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&nodenum));
- openCLSafeCall(clEnqueueNDRangeKernel(gsum.clCxt->impl->clCmdQueue, kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL));
-
- openCLSafeCall(clFinish(gsum.clCxt->impl->clCmdQueue));
+ openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&nodenum));*/
+
+ vector<pair<size_t,const void *> > args;
+ args.push_back ( make_pair(sizeof(cl_mem) , (void *)&stagebuffer ));
+ args.push_back ( make_pair(sizeof(cl_mem) , (void *)&scaleinfobuffer ));
+ args.push_back ( make_pair(sizeof(cl_mem) , (void *)&newnodebuffer ));
+ args.push_back ( make_pair(sizeof(cl_mem) , (void *)&gsum.data ));
+ args.push_back ( make_pair(sizeof(cl_mem) , (void *)&gsqsum.data ));
+ args.push_back ( make_pair(sizeof(cl_mem) , (void *)&candidatebuffer ));
+ args.push_back ( make_pair(sizeof(cl_int) , (void *)&step ));
+ args.push_back ( make_pair(sizeof(cl_int) , (void *)&loopcount ));
+ args.push_back ( make_pair(sizeof(cl_int) , (void *)&startstage ));
+ args.push_back ( make_pair(sizeof(cl_int) , (void *)&splitstage ));
+ args.push_back ( make_pair(sizeof(cl_int) , (void *)&endstage ));
+ args.push_back ( make_pair(sizeof(cl_int) , (void *)&startnode ));
+ args.push_back ( make_pair(sizeof(cl_int) , (void *)&splitnode ));
+ args.push_back ( make_pair(sizeof(cl_mem) , (void *)&pbuffer ));
+ args.push_back ( make_pair(sizeof(cl_mem) , (void *)&correctionbuffer ));
+ args.push_back ( make_pair(sizeof(cl_int) , (void *)&nodenum ));
+
+
+ openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1);
+ //openCLSafeCall(clEnqueueNDRangeKernel(gsum.clCxt->impl->clCmdQueue, kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL));
+ //openCLSafeCall(clFinish(gsum.clCxt->impl->clCmdQueue));
//openCLSafeCall(clEnqueueReadBuffer(gsum.clCxt->clCmdQueue,candidatebuffer,1,0,4*sizeof(int)*outputsz,candidate,0,NULL,NULL));
candidate = (int *)clEnqueueMapBuffer(gsum.clCxt->impl->clCmdQueue, candidatebuffer, 1, CL_MAP_READ, 0, 4 * sizeof(int), 0, 0, 0, &status);
break;
}
default:
- CV_Error(-217, "Unsupported source type");
+ CV_Error(CV_StsUnsupportedFormat, "Unsupported source type");
}
}
CV_Assert(src.type() == CV_8UC1);
if(src.clCxt->impl->double_support == 0 && src.depth() ==CV_64F)
{
- CV_Error(-217,"select device don't support double");
+ CV_Error(CV_GpuNotSupported,"select device don't support double");
}
int vlen = 4;
int offset = src.offset / vlen;
{
if(src.clCxt->impl->double_support == 0 && src.depth() ==CV_64F)
{
- CV_Error(-217,"select device don't support double");
+ CV_Error(CV_GpuNotSupported,"select device don't support double");
}
oclMat Dx, Dy;
CV_Assert(borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT);
{
if(src.clCxt->impl->double_support == 0 && src.depth() ==CV_64F)
{
- CV_Error(-217,"select device don't support double");
+ CV_Error(CV_GpuNotSupported,"select device don't support double");
}
oclMat Dx, Dy;
CV_Assert(borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT);
_devicetype = CL_DEVICE_TYPE_ALL;
break;
default:
- CV_Error(-217,"Unkown device type");
+ CV_Error(CV_GpuApiCallError,"Unkown device type");
}
int devcienums = 0;
// Platform info
char **binaries = (char **)malloc( sizeof(char *) * numDevices );
if(binaries == NULL)
{
- CV_Error(-217,"Failed to allocate host memory.(binaries)\r\n");
+ CV_Error(CV_StsNoMem,"Failed to allocate host memory.(binaries)\r\n");
}
for(i = 0; i < numDevices; i++)
binaries[i] = (char *)malloc( sizeof(char) * binarySizes[i]);
if(binaries[i] == NULL)
{
- CV_Error(-217,"Failed to allocate host memory.(binaries[i])\r\n");
+ CV_Error(CV_StsNoMem,"Failed to allocate host memory.(binaries[i])\r\n");
}
}
else
{
char *temp;
sprintf(temp, "Failed to load kernel file : %s\r\n", fileName);
- CV_Error(-217, temp);
+ CV_Error(CV_GpuApiCallError, temp);
}
else
{
cl_kernel kernel;
kernel = openCLGetKernelFromSource(clCxt, source, kernelName, build_options);
-
- globalThreads[0] = divUp(globalThreads[0], localThreads[0]) * localThreads[0];
- globalThreads[1] = divUp(globalThreads[1], localThreads[1]) * localThreads[1];
- globalThreads[2] = divUp(globalThreads[2], localThreads[2]) * localThreads[2];
-
- size_t blockSize = localThreads[0] * localThreads[1] * localThreads[2];
- cv::ocl::openCLVerifyKernel(clCxt, kernel, &blockSize, globalThreads, localThreads);
-
+
+ if ( localThreads != NULL)
+ {
+ globalThreads[0] = divUp(globalThreads[0], localThreads[0]) * localThreads[0];
+ globalThreads[1] = divUp(globalThreads[1], localThreads[1]) * localThreads[1];
+ globalThreads[2] = divUp(globalThreads[2], localThreads[2]) * localThreads[2];
+
+ size_t blockSize = localThreads[0] * localThreads[1] * localThreads[2];
+ cv::ocl::openCLVerifyKernel(clCxt, kernel, &blockSize, globalThreads, localThreads);
+ }
for(int i = 0; i < args.size(); i ++)
openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second));
//
//
//#pragma OPENCL EXTENSION cl_amd_printf : enable
+#define WORKGROUPSIZE 256
__kernel void convertC3C4(__global const GENTYPE4 * restrict src, __global GENTYPE4 *dst, int cols, int rows,
int dstStep_in_piexl,int pixel_end)
{
int id = get_global_id(0);
+
+ //read data from source
//int pixel_end = mul24(cols -1 , rows -1);
int3 pixelid = (int3)(mul24(id,3),mad24(id,3,1),mad24(id,3,2));
pixelid = clamp(pixelid,0,pixel_end);
outpix2 = (GENTYPE4)(pixel1.z,pixel1.w,pixel2.x,0);
outpix3 = (GENTYPE4)(pixel2.y,pixel2.z,pixel2.w,0);
- int4 outy = (id<<2)/cols;
- int4 outx = (id<<2)%cols;
- outx.y++;
- outx.z+=2;
- outx.w+=3;
- outy = select(outy,outy+1,outx>=cols);
- outx = select(outx,outx-cols,outx>=cols);
- //outpix3 = select(outpix3, outpix0, (uchar4)(outy.w>=rows));
- //outpix2 = select(outpix2, outpix0, (uchar4)(outy.z>=rows));
- //outpix1 = select(outpix1, outpix0, (uchar4)(outy.y>=rows));
- //outx = select(outx,(int4)outx.x,outy>=rows);
- //outy = select(outy,(int4)outy.x,outy>=rows);
+ //permutate the data in LDS to avoid global memory conflict
+ __local GENTYPE4 rearrange[WORKGROUPSIZE*4];
+ int lid = get_local_id(0)<<2;
+
+ rearrange[lid++] = outpix0;
+ rearrange[lid++] = outpix1;
+ rearrange[lid++] = outpix2;
+ rearrange[lid] = outpix3;
+
+ lid = get_local_id(0);
+ barrier(CLK_LOCAL_MEM_FENCE);
+ outpix0 = rearrange[lid];
+ lid+=WORKGROUPSIZE;
+ outpix1 = rearrange[lid];
+ lid+=WORKGROUPSIZE;
+ outpix2 = rearrange[lid];
+ lid+=WORKGROUPSIZE;
+ outpix3 = rearrange[lid];
+
+ //calculate output index
+ int4 outx, outy;
+ int4 startid = mad24((int)get_group_id(0),WORKGROUPSIZE*4,(int)get_local_id(0));
+ startid.y+=WORKGROUPSIZE;
+ startid.z+=WORKGROUPSIZE*2;
+ startid.w+=WORKGROUPSIZE*3;
+ outx = startid%(int4)cols;
+ outy = startid/(int4)cols;
+
+
int4 addr = mad24(outy,dstStep_in_piexl,outx);
if(outx.w<cols && outy.w<rows)
{
x4 = select(x4,x4-(int4)cols,x4>=(int4)cols);
int4 addr = mad24(y4,(int4)srcStep_in_pixel,x4);
GENTYPE4 pixel0,pixel1,pixel2,pixel3, outpixel1, outpixel2;
+ //read data from src
pixel0 = src[addr.x];
pixel1 = src[addr.y];
pixel2 = src[addr.z];
outpixel2.y = pixel3.x;
outpixel2.z = pixel3.y;
outpixel2.w = pixel3.z;
- int4 outaddr = mul24(id>>2 , 3);
- outaddr.y++;
- outaddr.z+=2;
- //printf("%d ",outaddr.z);
- if(outaddr.z <= pixel_end)
+
+ //permutate the data in LDS to avoid global memory conflict
+ __local GENTYPE4 rearrange[WORKGROUPSIZE*3];
+ int lid = mul24((int)get_local_id(0),3);
+ rearrange[lid++] = pixel0;
+ rearrange[lid++] = outpixel1;
+ rearrange[lid] = outpixel2;
+ barrier(CLK_LOCAL_MEM_FENCE);
+ lid = get_local_id(0);
+ pixel0 = rearrange[lid];
+ lid+=WORKGROUPSIZE;
+ outpixel1 = rearrange[lid];
+ lid+=WORKGROUPSIZE;
+ outpixel2 = rearrange[lid];
+
+ //calcultate output index
+ int3 startid = mad24((int)get_group_id(0),WORKGROUPSIZE*3,(int)get_local_id(0));
+ startid.y+=WORKGROUPSIZE;
+ startid.z+=WORKGROUPSIZE*2;
+ //id = mul24(id>>2 , 3);
+
+ if(startid.z <= pixel_end)
{
- dst[outaddr.x] = pixel0;
- dst[outaddr.y] = outpixel1;
- dst[outaddr.z] = outpixel2;
+ dst[startid.x] = pixel0;
+ dst[startid.y] = outpixel1;
+ dst[startid.z] = outpixel2;
}
- else if(outaddr.y <= pixel_end)
+ else if(startid.y <= pixel_end)
{
- dst[outaddr.x] = pixel0;
- dst[outaddr.y] = outpixel1;
+ dst[startid.x] = pixel0;
+ dst[startid.y] = outpixel1;
}
- else if(outaddr.x <= pixel_end)
+ else if(startid.x <= pixel_end)
{
- dst[outaddr.x] = pixel0;
- }
+ dst[startid.x] = pixel0;
+ }
}
src_t[0] = (i + lid < rows ? convert_int4(src[src_offset + (lid+i) * src_step + gid]) : 0);
src_t[1] = (i + lid < rows ? convert_int4(src[src_offset + (lid+i) * src_step + gid + 1]) : 0);
- sum_t[0] = (i == 0 ? 0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]);
- sqsum_t[0] = (i == 0 ? 0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
+ sum_t[0] = (i == 0 ? 0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]);
+ sqsum_t[0] = (i == 0 ? 0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
sum_t[1] = (i == 0 ? 0 : lm_sum[1][LSIZE_2 + LOG_LSIZE]);
sqsum_t[1] = (i == 0 ? 0 : lm_sqsum[1][LSIZE_2 + LOG_LSIZE]);
barrier(CLK_LOCAL_MEM_FENCE);
}
offset <<= 1;
}
+ barrier(CLK_LOCAL_MEM_FENCE);
if(lid < 2)
{
lm_sum[lid][LSIZE_2 + LOG_LSIZE] = 0;
lm_sqsum[lid >> 7][ai] = lm_sqsum[lid >> 7][bi] - lm_sqsum[lid >> 7][ai];
}
}
- if(lid > 0 & (i+lid) <= rows){
- int loc_s0 = gid * dst_step + i + lid - 1 - pre_invalid * dst_step / 4, loc_s1 = loc_s0 + dst_step ;
+ barrier(CLK_LOCAL_MEM_FENCE);
+ int loc_s0 = gid * dst_step + i + lid - 1 - pre_invalid * dst_step / 4, loc_s1 = loc_s0 + dst_step ;
+ if(lid > 0 && (i+lid) <= rows){
lm_sum[0][bf_loc] += sum_t[0];
lm_sum[1][bf_loc] += sum_t[1];
lm_sqsum[0][bf_loc] += sqsum_t[0];
}
offset <<= 1;
}
+ barrier(CLK_LOCAL_MEM_FENCE);
if(lid < 2)
{
lm_sum[lid][LSIZE_2 + LOG_LSIZE] = 0;
lm_sqsum[lid >> 7][ai] = lm_sqsum[lid >> 7][bi] - lm_sqsum[lid >> 7][ai];
}
}
-
+ barrier(CLK_LOCAL_MEM_FENCE);
if(gid == 0 && (i + lid) <= rows)
{
sum[sum_offset + i + lid] = 0;
sqsum[sqsum_offset + loc1 + k * sqsum_step / 4] = 0;
}
}
-
- if(lid > 0 & (i+lid) <= rows){
- int loc_s0 = sum_offset + gid * 2 * sum_step + sum_step / 4 + i + lid, loc_s1 = loc_s0 + sum_step ;
- int loc_sq0 = sqsum_offset + gid * 2 * sqsum_step + sqsum_step / 4 + i + lid, loc_sq1 = loc_sq0 + sqsum_step ;
+ int loc_s0 = sum_offset + gid * 2 * sum_step + sum_step / 4 + i + lid, loc_s1 = loc_s0 + sum_step ;
+ int loc_sq0 = sqsum_offset + gid * 2 * sqsum_step + sqsum_step / 4 + i + lid, loc_sq1 = loc_sq0 + sqsum_step ;
+ if(lid > 0 && (i+lid) <= rows){
lm_sum[0][bf_loc] += sum_t[0];
lm_sum[1][bf_loc] += sum_t[1];
lm_sqsum[0][bf_loc] += sqsum_t[0];
}
offset <<= 1;
}
+ barrier(CLK_LOCAL_MEM_FENCE);
if(lid < 2)
{
lm_sum[lid][LSIZE_2 + LOG_LSIZE] = 0;
lm_sum[lid >> 7][ai] = lm_sum[lid >> 7][bi] - lm_sum[lid >> 7][ai];
}
}
- if(lid > 0 & (i+lid) <= rows){
+ barrier(CLK_LOCAL_MEM_FENCE);
+ if(lid > 0 && (i+lid) <= rows){
int loc_s0 = gid * dst_step + i + lid - 1 - pre_invalid * dst_step / 4, loc_s1 = loc_s0 + dst_step ;
lm_sum[0][bf_loc] += sum_t[0];
lm_sum[1][bf_loc] += sum_t[1];
}
offset <<= 1;
}
+ barrier(CLK_LOCAL_MEM_FENCE);
if(lid < 2)
{
lm_sum[lid][LSIZE_2 + LOG_LSIZE] = 0;
lm_sum[lid >> 7][ai] = lm_sum[lid >> 7][bi] - lm_sum[lid >> 7][ai];
}
}
-
+ barrier(CLK_LOCAL_MEM_FENCE);
if(gid == 0 && (i + lid) <= rows)
{
sum[sum_offset + i + lid] = 0;
}
}
- if(lid > 0 & (i+lid) <= rows){
+ if(lid > 0 && (i+lid) <= rows){
int loc_s0 = sum_offset + gid * 2 * sum_step + sum_step / 4 + i + lid, loc_s1 = loc_s0 + sum_step ;
- lm_sum[0][bf_loc] += sum_t[0];
- lm_sum[1][bf_loc] += sum_t[1];
+ lm_sum[0][bf_loc] += sum_t[0];
+ lm_sum[1][bf_loc] += sum_t[1];
sum_p = (__local int*)(&(lm_sum[0][bf_loc]));
for(int k = 0; k < 4; k++)
{
if(gid * 8 + k >= cols) break;
sum[loc_s0 + k * sum_step / 4] = sum_p[k];
- }
+ }
sum_p = (__local int*)(&(lm_sum[1][bf_loc]));
for(int k = 0; k < 4; k++)
{
sprintf(compile_option, "-D GENTYPE4=double4");
break;
default:
- CV_Error(-217,"unknown depth");
+ CV_Error(CV_StsUnsupportedFormat,"unknown depth");
}
vector< pair<size_t, const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&src));
sprintf(compile_option, "-D GENTYPE4=double4");
break;
default:
- CV_Error(-217,"unknown depth");
+ CV_Error(CV_StsUnsupportedFormat,"unknown depth");
}
vector< pair<size_t, const void *> > args;
args.push_back( make_pair( sizeof(cl_uchar4) , (void *)&val.uval ));
break;
default:
- CV_Error(-217,"unsupported channels");
+ CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
}
break;
case 1:
args.push_back( make_pair( sizeof(cl_char4) , (void *)&val.cval ));
break;
default:
- CV_Error(-217,"unsupported channels");
+ CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
}
break;
case 2:
args.push_back( make_pair( sizeof(cl_ushort4) , (void *)&val.usval ));
break;
default:
- CV_Error(-217,"unsupported channels");
+ CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
}
break;
case 3:
args.push_back( make_pair( sizeof(cl_short4) , (void *)&val.shval ));
break;
default:
- CV_Error(-217,"unsupported channels");
+ CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
}
break;
case 4:
args.push_back( make_pair( sizeof(cl_int4) , (void *)&val.ival ));
break;
default:
- CV_Error(-217,"unsupported channels");
+ CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
}
break;
case 5:
args.push_back( make_pair( sizeof(cl_float4) , (void *)&val.fval ));
break;
default:
- CV_Error(-217,"unsupported channels");
+ CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
}
break;
case 6:
args.push_back( make_pair( sizeof(cl_double4) , (void *)&val.dval ));
break;
default:
- CV_Error(-217,"unsupported channels");
+ CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
}
break;
default:
- CV_Error(-217,"unknown depth");
+ CV_Error(CV_StsUnsupportedFormat,"unknown depth");
}
+#if CL_VERSION_1_2
+ if(dst.offset==0 && dst.cols==dst.wholecols)
+ {
+ clEnqueueFillBuffer(dst.clCxt->impl->clCmdQueue,(cl_mem)dst.data,args[0].second,args[0].first,0,dst.step*dst.rows,0,NULL,NULL);
+ }
+ else
+ {
+ args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&step_in_pixel ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&offset_in_pixel));
+ openCLExecuteKernel(dst.clCxt , &operator_setTo, kernelName, globalThreads,
+ localThreads, args, -1, -1,compile_option);
+ }
+#else
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&offset_in_pixel));
openCLExecuteKernel(dst.clCxt , &operator_setTo, kernelName, globalThreads,
localThreads, args, -1, -1,compile_option);
+#endif
}
void set_to_withmask_run(const oclMat &dst, const Scalar &scalar, const oclMat &mask, string kernelName)
args.push_back( make_pair( sizeof(cl_uchar4) , (void *)&val.uval ));
break;
default:
- CV_Error(-217,"unsupported channels");
+ CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
}
break;
case 1:
args.push_back( make_pair( sizeof(cl_char4) , (void *)&val.cval ));
break;
default:
- CV_Error(-217,"unsupported channels");
+ CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
}
break;
case 2:
args.push_back( make_pair( sizeof(cl_ushort4) , (void *)&val.usval ));
break;
default:
- CV_Error(-217,"unsupported channels");
+ CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
}
break;
case 3:
args.push_back( make_pair( sizeof(cl_short4) , (void *)&val.shval ));
break;
default:
- CV_Error(-217,"unsupported channels");
+ CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
}
break;
case 4:
args.push_back( make_pair( sizeof(cl_int4) , (void *)&val.ival ));
break;
default:
- CV_Error(-217,"unsupported channels");
+ CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
}
break;
case 5:
args.push_back( make_pair( sizeof(cl_float4) , (void *)&val.fval ));
break;
default:
- CV_Error(-217,"unsupported channels");
+ CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
}
break;
case 6:
args.push_back( make_pair( sizeof(cl_double4) , (void *)&val.dval ));
break;
default:
- CV_Error(-217,"unsupported channels");
+ CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
}
break;
default:
- CV_Error(-217,"unknown depth");
+ CV_Error(CV_StsUnsupportedFormat,"unknown depth");
}
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols ));
{
if(mat_dst.clCxt -> impl -> double_support ==0 && mat_dst.type() == CV_64F)
{
- CV_Error(-217,"Selected device don't support double\r\n");
+ CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n");
return;
}
if(mat_src.clCxt -> impl -> double_support ==0 && mat_src.type() == CV_64F)
{
- CV_Error(-217,"Selected device don't support double\r\n");
+ CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n");
return;
}
}
-#if PERF_TEST_OCL
-int main(int argc, char **argv)
-{
- run_perf_test();
- return 0;
-}
-#else
int main(int argc, char **argv)
{
TS::ptr()->init("ocl");
print_info();
std::vector<cv::ocl::Info> oclinfo;
- int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
- if(devnums<1){
+ int devnums = getDevice(oclinfo);
+ if(devnums<1)
+ {
std::cout << "no device found\n";
return -1;
}
return RUN_ALL_TESTS();
}
-#endif // PERF_TEST_OCL
-#else // HAVE_OPENC
+#else // DON'T HAVE_OPENCL
int main()
{
cv::Mat mask_roi;
cv::Mat dst_roi;
cv::Mat dst1_roi; //bak
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
//ocl dst mat for testing
cv::ocl::oclMat gdst_whole;
cv::ocl::oclMat gdst1_whole; //bak
cv::Size size(MWIDTH, MHEIGHT);
mat1 = randomMat(rng, size, type, 5, 16, false);
+ //mat2 = randomMat(rng, size, type, 5, 16, false);
mat2 = randomMat(rng, size, type, 5, 16, false);
dst = randomMat(rng, size, type, 5, 16, false);
dst1 = randomMat(rng, size, type, 5, 16, false);
val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
- int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
- CV_Assert(devnums > 0);
- //if you want to use undefault device, set it here
- //setDevice(oclinfo[0]);
+ //int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
}
void random_roi()
PARAM_TEST_CASE(Blend, cv::Size, MatType/*, UseRoi*/)
{
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
cv::Size size;
int type;
bool useRoi;
type = GET_PARAM(1);
/*useRoi = GET_PARAM(3);*/
- int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
- CV_Assert(devnums > 0);
+ //int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
+ //CV_Assert(devnums > 0);
}
};
bool useL2gradient;
cv::Mat edges_gold;
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
virtual void SetUp()
{
apperture_size = GET_PARAM(0);
useL2gradient = GET_PARAM(1);
- int devnums = getDevice(oclinfo);
- CV_Assert(devnums > 0);
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
}
};
cv::Size size;
cv::Mat src;
bool useRoi;
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
virtual void SetUp()
{
size = GET_PARAM(0);
useRoi = GET_PARAM(1);
- int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
- CV_Assert(devnums > 0);
+ //int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
+ //CV_Assert(devnums > 0);
}
};
{
cv::Size dft_size;
bool dft_rows;
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
virtual void SetUp()
{
- int devnums = getDevice(oclinfo);
- CV_Assert(devnums > 0);
+ //int devnums = getDevice(oclinfo);
+ // CV_Assert(devnums > 0);
dft_size = GET_PARAM(0);
dft_rows = GET_PARAM(1);
}
cv::Mat mask_roi;
cv::Mat dst_roi;
cv::Mat dst1_roi; //bak
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
//ocl dst mat for testing
cv::ocl::oclMat gdst_whole;
cv::ocl::oclMat gdst1_whole; //bak
//src mat with roi
cv::Mat mat1_roi;
cv::Mat dst_roi;
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
//ocl dst mat for testing
cv::ocl::oclMat gdst_whole;
mat1 = randomMat(rng, size, type, 5, 16, false);
dst = randomMat(rng, size, type, 5, 16, false);
- int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
- CV_Assert(devnums > 0);
- //if you want to use undefault device, set it here
- //setDevice(oclinfo[0]);
+ //int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
}
void random_roi()
//src mat with roi
cv::Mat mat_roi;
cv::Mat dst_roi;
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
//ocl dst mat for testing
cv::ocl::oclMat gdst_whole;
mat = randomMat(rng, size, type, 5, 16, false);
dst = randomMat(rng, size, type, 5, 16, false);
- int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
- CV_Assert(devnums > 0);
- //if you want to use undefault device, set it here
- //setDevice(oclinfo[0]);
+ //int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
}
void random_roi()
//src mat with roi
cv::Mat mat1_roi;
cv::Mat dst_roi;
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
//ocl dst mat for testing
cv::ocl::oclMat gdst_whole;
// rng.fill(kernel, cv::RNG::UNIFORM, cv::Scalar::all(0), cv::Scalar::all(3));
kernel = randomMat(rng, Size(3, 3), CV_8UC1, 0, 3, false);
- int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
- CV_Assert(devnums > 0);
- //if you want to use undefault device, set it here
- //setDevice(oclinfo[0]);
+ //int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
}
void random_roi()
//src mat with roi
cv::Mat mat1_roi;
cv::Mat dst_roi;
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
//ocl dst mat for testing
cv::ocl::oclMat gdst_whole;
mat1 = randomMat(rng, size, type, 5, 16, false);
dst = randomMat(rng, size, type, 5, 16, false);
- int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
- CV_Assert(devnums > 0);
- //if you want to use undefault device, set it here
- //setDevice(oclinfo[0]);
+ //int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
}
void random_roi()
//src mat with roi
cv::Mat mat1_roi;
cv::Mat dst_roi;
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
//ocl dst mat for testing
cv::ocl::oclMat gdst_whole;
mat1 = randomMat(rng, size, type, 5, 16, false);
dst = randomMat(rng, size, type, 5, 16, false);
- int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
- CV_Assert(devnums > 0);
- //if you want to use undefault device, set it here
- //setDevice(oclinfo[0]);
+ //int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
}
void random_roi()
//src mat with roi
cv::Mat mat1_roi;
cv::Mat dst_roi;
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
//ocl dst mat for testing
cv::ocl::oclMat gdst_whole;
mat1 = randomMat(rng, size, type, 5, 16, false);
dst = randomMat(rng, size, type, 5, 16, false);
- int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
- CV_Assert(devnums > 0);
- //if you want to use undefault device, set it here
- //setDevice(oclinfo[0]);
+ //int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
}
void random_roi()
INSTANTIATE_TEST_CASE_P(Filter, Sobel, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
- Values(1, 2), Values(0, 1), Values(3, 5, 7), Values((MatType)cv::BORDER_CONSTANT,
- (MatType)cv::BORDER_REPLICATE, (MatType)cv::BORDER_REFLECT, (MatType)cv::BORDER_REFLECT_101)));
+ Values(1, 2), Values(0, 1), Values(3, 5), Values((MatType)cv::BORDER_CONSTANT,
+ (MatType)cv::BORDER_REPLICATE)));
INSTANTIATE_TEST_CASE_P(Filter, Scharr, Combine(
Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(0, 1), Values(0, 1),
- Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE, (MatType)cv::BORDER_REFLECT, (MatType)cv::BORDER_REFLECT_101)));
+ Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE)));
INSTANTIATE_TEST_CASE_P(Filter, GaussianBlur, Combine(
Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
- Values(cv::Size(3, 3), cv::Size(5, 5), cv::Size(7, 7)),
- Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE, (MatType)cv::BORDER_REFLECT, (MatType)cv::BORDER_REFLECT_101)));
+ Values(cv::Size(3, 3), cv::Size(5, 5)),
+ Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE)));
int type;
cv::Size mat_size;
int flags;
- vector<cv::ocl::Info> info;
+ //vector<cv::ocl::Info> info;
virtual void SetUp()
{
type = GET_PARAM(0);
mat_size = GET_PARAM(1);
flags = GET_PARAM(2);
- cv::ocl::getDevice(info);
+ //cv::ocl::getDevice(info);
}
};
PARAM_TEST_CASE(HaarTestBase, int, int)
{
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
cv::ocl::OclCascadeClassifier cascade, nestedCascade;
cv::CascadeClassifier cpucascade, cpunestedCascade;
// Mat img;
return;
}
- int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
- CV_Assert(devnums > 0);
- //if you want to use undefault device, set it here
- //setDevice(oclinfo[0]);
- cv::ocl::setBinpath("E:\\");
+ //int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
+ //cv::ocl::setBinpath("E:\\");
}
};
{
cv::Size winSize;
int type;
- vector<cv::ocl::Info> info;
+ //vector<cv::ocl::Info> info;
virtual void SetUp()
{
winSize = GET_PARAM(0);
type = GET_PARAM(1);
- cv::ocl::getDevice(info);
+ //cv::ocl::getDevice(info);
}
};
cv::Mat mask_roi;
cv::Mat dst_roi;
cv::Mat dst1_roi; //bak
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
//ocl mat
cv::ocl::oclMat clmat1;
cv::ocl::oclMat clmat2;
cv::RNG &rng = TS::ptr()->get_rng();
cv::Size size(MWIDTH, MHEIGHT);
double min = 1, max = 20;
- int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
- CV_Assert(devnums > 0);
- //if you want to use undefault device, set it here
- //setDevice(oclinfo[0]);
+ //int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
if(type1 != nulltype)
{
//src mat with roi
cv::Mat mat1_roi;
cv::Mat dst_roi;
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
//ocl dst mat for testing
cv::ocl::oclMat gdst_whole;
mat1 = randomMat(rng, size, type, 5, 16, false);
dst = randomMat(rng, size, type, 5, 16, false);
- int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
- CV_Assert(devnums > 0);
- //if you want to use undefault device, set it here
- //setDevice(oclinfo[0]);
+ //int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
}
void random_roi()
cv::Mat map1;
cv::Mat map2;
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
int src_roicols;
int src_roirows;
bordertype = GET_PARAM(4);
// borderValue = GET_PARAM(6);
- int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
- CV_Assert(devnums > 0);
+ //int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
+ //CV_Assert(devnums > 0);
cv::RNG& rng = TS::ptr()->get_rng();
//cv::Size size = cv::Size(20, 20);
int dstx;
int dsty;
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
//src mat with roi
cv::Mat mat1_roi;
cv::Mat dst_roi;
mat1 = randomMat(rng, size, type, 5, 16, false);
dst = randomMat(rng, dsize, type, 5, 16, false);
- int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
- CV_Assert(devnums > 0);
- //if you want to use undefault device, set it here
- //setDevice(oclinfo[0]);
+ //int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
}
void random_roi()
//src mat with roi
cv::Mat mat1_roi;
cv::Mat dst_roi;
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
//ocl dst mat for testing
cv::ocl::oclMat gdst_whole;
mat1 = randomMat(rng, size, type, 5, 16, false);
dst = randomMat(rng, size, type, 5, 16, false);
- int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
- CV_Assert(devnums > 0);
- //if you want to use undefault device, set it here
- //setDevice(oclinfo[0]);
+ //int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
}
void random_roi()
cv::ocl::oclMat gdst;
cv::ocl::oclMat gdstCoor;
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
//ocl mat with roi
cv::ocl::oclMat gsrc_roi;
cv::ocl::oclMat gdst_roi;
dst = randomMat(rng, size, type, 5, 16, false);
dstCoor = randomMat(rng, size, typeCoor, 5, 16, false);
- int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
- CV_Assert(devnums > 0);
- //if you want to use undefault device, set it here
- //setDevice(oclinfo[0]);
+ //int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
}
void random_roi()
// NULL_TYPE,
// Values(false))); // Values(false) is the reserved parameter
//
-//INSTANTIATE_TEST_CASE_P(ImgprocTestBase, cornerMinEigenVal, Combine(
-// Values(CV_8UC1,CV_32FC1),
-// NULL_TYPE,
-// ONE_TYPE(CV_32FC1),
-// NULL_TYPE,
-// NULL_TYPE,
-// Values(false))); // Values(false) is the reserved parameter
-//
-//INSTANTIATE_TEST_CASE_P(ImgprocTestBase, cornerHarris, Combine(
-// Values(CV_8UC1,CV_32FC1),
-// NULL_TYPE,
-// ONE_TYPE(CV_32FC1),
-// NULL_TYPE,
-// NULL_TYPE,
-// Values(false))); // Values(false) is the reserved parameter
+INSTANTIATE_TEST_CASE_P(ImgprocTestBase, cornerMinEigenVal, Combine(
+ Values(CV_8UC1,CV_32FC1),
+ NULL_TYPE,
+ ONE_TYPE(CV_32FC1),
+ NULL_TYPE,
+ NULL_TYPE,
+ Values(false))); // Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(ImgprocTestBase, cornerHarris, Combine(
+ Values(CV_8UC1,CV_32FC1),
+ NULL_TYPE,
+ ONE_TYPE(CV_32FC1),
+ NULL_TYPE,
+ NULL_TYPE,
+ Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(ImgprocTestBase, integral, Combine(
cv::Size templ_size;
int cn;
int method;
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
virtual void SetUp()
{
templ_size = GET_PARAM(1);
cn = GET_PARAM(2);
method = GET_PARAM(3);
- int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
- CV_Assert(devnums > 0);
+ //int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
+ //CV_Assert(devnums > 0);
}
};
cv::Size templ_size;
int cn;
int method;
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
virtual void SetUp()
{
templ_size = GET_PARAM(1);
cn = GET_PARAM(2);
method = GET_PARAM(3);
- int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
- CV_Assert(devnums > 0);
+ //int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
+ //CV_Assert(devnums > 0);
}
};
//src mat with roi
cv::Mat mat_roi;
cv::Mat dst_roi;
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
//ocl dst mat for testing
cv::ocl::oclMat gdst_whole;
mat = randomMat(rng, size, type, 5, 16, false);
dst = randomMat(rng, size, type, 5, 16, false);
//std::vector<cv::ocl::Info> oclinfo;
- int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
- CV_Assert(devnums > 0);
- //if you want to use undefault device, set it here
- //setDevice(oclinfo[0]);
+ //int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
}
void random_roi()
cv::Mat mat_roi;
cv::Mat mask_roi;
cv::Mat dst_roi;
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
//ocl dst mat for testing
cv::ocl::oclMat gdst_whole;
cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
- int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
- CV_Assert(devnums > 0);
- //if you want to use undefault device, set it here
- //setDevice(oclinfo[0]);
+ //int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
}
void random_roi()
//src mat with roi
cv::Mat mat_roi;
cv::Mat mask_roi;
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
//ocl dst mat for testing
cv::ocl::oclMat gmat_whole;
cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
- int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
- CV_Assert(devnums > 0);
- //if you want to use undefault device, set it here
- //setDevice(oclinfo[0]);
+ //int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
}
void random_roi()
//src mat with roi
cv::Mat mat1_roi;
cv::Mat dst_roi;
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
//ocl dst mat for testing
cv::ocl::oclMat gdst_whole;
//dst = randomMat(rng, size, type, 5, 16, false);
- int devnums = getDevice(oclinfo);
- CV_Assert(devnums > 0);
- //if you want to use undefault device, set it here
- //setDevice(oclinfo[1]);
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[1]);
}
void random_roi()
cv::Mat mask_roi;
cv::Mat dst_roi;
cv::Mat dst1_roi; //bak
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
//ocl dst mat for testing
cv::ocl::oclMat gdst_whole;
cv::ocl::oclMat gdst1_whole; //bak
val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
- int devnums = getDevice(oclinfo);
- CV_Assert(devnums > 0);
- //if you want to use undefault device, set it here
- //setDevice(oclinfo[0]);
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
}
void Cleanup()
{
cv::Size size;
int type;
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
virtual void SetUp()
{
- int devnums = cv::ocl::getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
- CV_Assert(devnums > 0);
+ //int devnums = cv::ocl::getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
+ //CV_Assert(devnums > 0);
size = GET_PARAM(0);
type = GET_PARAM(1);
}
//dst mat with roi
cv::Mat dst_roi;
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
//ocl dst mat for testing
cv::ocl::oclMat gdst_whole;
mat4 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
dst = randomMat(rng, size, CV_MAKETYPE(type, channels), 5, 16, false);
- int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
- CV_Assert(devnums > 0);
- //if you want to use undefault device, set it here
- //setDevice(oclinfo[0]);
+ //int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
}
void random_roi()
cv::Mat dst2_roi;
cv::Mat dst3_roi;
cv::Mat dst4_roi;
- std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
//ocl dst mat for testing
cv::ocl::oclMat gdst1_whole;
cv::ocl::oclMat gdst2_whole;
dst3 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
dst4 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
- int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
- CV_Assert(devnums > 0);
- //if you want to use undefault device, set it here
- //setDevice(oclinfo[0]);
+ //int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
}
void random_roi()