From 5f20fce6fddf46d5f1665632015c444aebc7c570 Mon Sep 17 00:00:00 2001 From: yao Date: Fri, 17 May 2013 13:18:46 +0800 Subject: [PATCH] add accuracy tests while running perf --- modules/ocl/CMakeLists.txt | 2 +- modules/ocl/perf/perf_arithm.cpp | 430 +++++++++++++----------- modules/ocl/perf/perf_blend.cpp | 8 +- modules/ocl/perf/perf_brute_force_matcher.cpp | 18 +- modules/ocl/perf/perf_canny.cpp | 6 +- modules/ocl/perf/perf_color.cpp | 8 +- modules/ocl/perf/perf_columnsum.cpp | 15 +- modules/ocl/perf/perf_fft.cpp | 10 +- modules/ocl/perf/perf_filters.cpp | 46 ++- modules/ocl/perf/perf_gemm.cpp | 5 +- modules/ocl/perf/perf_haar.cpp | 17 +- modules/ocl/perf/perf_hog.cpp | 80 ++++- modules/ocl/perf/perf_imgproc.cpp | 293 ++++++++++++++--- modules/ocl/perf/perf_match_template.cpp | 9 +- modules/ocl/perf/perf_matrix_operation.cpp | 19 +- modules/ocl/perf/perf_norm.cpp | 6 +- modules/ocl/perf/perf_pyrdown.cpp | 7 +- modules/ocl/perf/perf_pyrlk.cpp | 10 +- modules/ocl/perf/perf_pyrup.cpp | 6 +- modules/ocl/perf/perf_split_merge.cpp | 24 +- modules/ocl/perf/precomp.cpp | 449 +++++++++++++++++++++----- modules/ocl/perf/precomp.hpp | 88 ++++- 22 files changed, 1184 insertions(+), 372 deletions(-) diff --git a/modules/ocl/CMakeLists.txt b/modules/ocl/CMakeLists.txt index a7cd3a0..05b28b8 100644 --- a/modules/ocl/CMakeLists.txt +++ b/modules/ocl/CMakeLists.txt @@ -3,5 +3,5 @@ if(NOT HAVE_OPENCL) endif() set(the_description "OpenCL-accelerated Computer Vision") -ocv_define_module(ocl opencv_core opencv_imgproc opencv_features2d opencv_objdetect opencv_video) +ocv_define_module(ocl opencv_core opencv_imgproc opencv_features2d opencv_objdetect opencv_video opencv_calib3d) ocv_warnings_disable(CMAKE_CXX_FLAGS -Wshadow) diff --git a/modules/ocl/perf/perf_arithm.cpp b/modules/ocl/perf/perf_arithm.cpp index e6e9576..e69fecd 100644 --- a/modules/ocl/perf/perf_arithm.cpp +++ b/modules/ocl/perf/perf_arithm.cpp @@ -16,6 +16,7 @@ // // @Authors // Fangfang Bai, fangfang@multicorewareinc.com +// Jin Ma, jin@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -45,7 +46,7 @@ #include "precomp.hpp" ///////////// Lut //////////////////////// -TEST(lut) +PERFTEST(lut) { Mat src, lut, dst; ocl::oclMat d_src, d_lut, d_dst; @@ -61,7 +62,7 @@ TEST(lut) gen(src, size, size, all_type[j], 0, 256); gen(lut, 1, 256, CV_8UC1, 0, 1); - gen(dst, size, size, all_type[j], 0, 256); + dst = src; LUT(src, lut, dst); @@ -76,9 +77,13 @@ TEST(lut) ocl::LUT(d_src, d_lut, d_dst); WARMUP_OFF; + cv::Mat ocl_mat_dst; + d_dst.download(ocl_mat_dst); + + TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 0)); + GPU_ON; ocl::LUT(d_src, d_lut, d_dst); - ; GPU_OFF; GPU_FULL_ON; @@ -94,7 +99,7 @@ TEST(lut) } ///////////// Exp //////////////////////// -TEST(Exp) +PERFTEST(Exp) { Mat src, dst; ocl::oclMat d_src, d_dst; @@ -103,8 +108,7 @@ TEST(Exp) { SUBTEST << size << 'x' << size << "; CV_32FC1"; - gen(src, size, size, CV_32FC1, 0, 256); - gen(dst, size, size, CV_32FC1, 0, 256); + gen(src, size, size, CV_32FC1, 5, 16); exp(src, dst); @@ -117,9 +121,13 @@ TEST(Exp) ocl::exp(d_src, d_dst); WARMUP_OFF; + cv::Mat ocl_mat_dst; + d_dst.download(ocl_mat_dst); + + TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 2)); + GPU_ON; ocl::exp(d_src, d_dst); - ; GPU_OFF; GPU_FULL_ON; @@ -131,7 +139,7 @@ TEST(Exp) } ///////////// LOG //////////////////////// -TEST(Log) +PERFTEST(Log) { Mat src, dst; ocl::oclMat d_src, d_dst; @@ -153,9 +161,13 @@ TEST(Log) ocl::log(d_src, d_dst); WARMUP_OFF; + cv::Mat ocl_mat_dst; + d_dst.download(ocl_mat_dst); + + TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 1)); + GPU_ON; ocl::log(d_src, d_dst); - ; GPU_OFF; GPU_FULL_ON; @@ -167,7 +179,7 @@ TEST(Log) } ///////////// Add //////////////////////// -TEST(Add) +PERFTEST(Add) { Mat src1, src2, dst; ocl::oclMat d_src1, d_src2, d_dst; @@ -196,9 +208,13 @@ TEST(Add) ocl::add(d_src1, d_src2, d_dst); WARMUP_OFF; + cv::Mat ocl_mat_dst; + d_dst.download(ocl_mat_dst); + + TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 0.0)); + GPU_ON; ocl::add(d_src1, d_src2, d_dst); - ; GPU_OFF; GPU_FULL_ON; @@ -213,7 +229,7 @@ TEST(Add) } ///////////// Mul //////////////////////// -TEST(Mul) +PERFTEST(Mul) { Mat src1, src2, dst; ocl::oclMat d_src1, d_src2, d_dst; @@ -229,8 +245,8 @@ TEST(Mul) gen(src1, size, size, all_type[j], 0, 256); gen(src2, size, size, all_type[j], 0, 256); - gen(dst, size, size, all_type[j], 0, 256); - + dst = src1; + dst.setTo(0); multiply(src1, src2, dst); @@ -244,9 +260,13 @@ TEST(Mul) ocl::multiply(d_src1, d_src2, d_dst); WARMUP_OFF; + cv::Mat ocl_mat_dst; + d_dst.download(ocl_mat_dst); + + TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 0.0)); + GPU_ON; ocl::multiply(d_src1, d_src2, d_dst); - ; GPU_OFF; GPU_FULL_ON; @@ -261,7 +281,7 @@ TEST(Mul) } ///////////// Div //////////////////////// -TEST(Div) +PERFTEST(Div) { Mat src1, src2, dst; ocl::oclMat d_src1, d_src2, d_dst; @@ -276,8 +296,8 @@ TEST(Div) gen(src1, size, size, all_type[j], 0, 256); gen(src2, size, size, all_type[j], 0, 256); - gen(dst, size, size, all_type[j], 0, 256); - + dst = src1; + dst.setTo(0); divide(src1, src2, dst); @@ -291,9 +311,13 @@ TEST(Div) ocl::divide(d_src1, d_src2, d_dst); WARMUP_OFF; + cv::Mat ocl_mat_dst; + d_dst.download(ocl_mat_dst); + + TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 1)); + GPU_ON; ocl::divide(d_src1, d_src2, d_dst); - ; GPU_OFF; GPU_FULL_ON; @@ -308,7 +332,7 @@ TEST(Div) } ///////////// Absdiff //////////////////////// -TEST(Absdiff) +PERFTEST(Absdiff) { Mat src1, src2, dst; ocl::oclMat d_src1, d_src2, d_dst; @@ -326,7 +350,6 @@ TEST(Absdiff) gen(src2, size, size, all_type[j], 0, 256); gen(dst, size, size, all_type[j], 0, 256); - absdiff(src1, src2, dst); CPU_ON; @@ -339,9 +362,13 @@ TEST(Absdiff) ocl::absdiff(d_src1, d_src2, d_dst); WARMUP_OFF; + cv::Mat ocl_mat_dst; + d_dst.download(ocl_mat_dst); + + TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 0.0)); + GPU_ON; ocl::absdiff(d_src1, d_src2, d_dst); - ; GPU_OFF; GPU_FULL_ON; @@ -356,7 +383,7 @@ TEST(Absdiff) } ///////////// CartToPolar //////////////////////// -TEST(CartToPolar) +PERFTEST(CartToPolar) { Mat src1, src2, dst, dst1; ocl::oclMat d_src1, d_src2, d_dst, d_dst1; @@ -388,9 +415,16 @@ TEST(CartToPolar) ocl::cartToPolar(d_src1, d_src2, d_dst, d_dst1, 1); WARMUP_OFF; + cv::Mat ocl_mat_dst; + d_dst.download(ocl_mat_dst); + + cv::Mat ocl_mat_dst1; + d_dst1.download(ocl_mat_dst1); + + TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst1, dst1, 0.5)&&ExpectedMatNear(ocl_mat_dst, dst, 0.5)); + GPU_ON; ocl::cartToPolar(d_src1, d_src2, d_dst, d_dst1, 1); - ; GPU_OFF; GPU_FULL_ON; @@ -406,7 +440,7 @@ TEST(CartToPolar) } ///////////// PolarToCart //////////////////////// -TEST(PolarToCart) +PERFTEST(PolarToCart) { Mat src1, src2, dst, dst1; ocl::oclMat d_src1, d_src2, d_dst, d_dst1; @@ -438,9 +472,16 @@ TEST(PolarToCart) ocl::polarToCart(d_src1, d_src2, d_dst, d_dst1, 1); WARMUP_OFF; + cv::Mat ocl_mat_dst; + d_dst.download(ocl_mat_dst); + + cv::Mat ocl_mat_dst1; + d_dst1.download(ocl_mat_dst1); + + TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst1, dst1, 0.5)&&ExpectedMatNear(ocl_mat_dst, dst, 0.5)); + GPU_ON; ocl::polarToCart(d_src1, d_src2, d_dst, d_dst1, 1); - ; GPU_OFF; GPU_FULL_ON; @@ -456,7 +497,7 @@ TEST(PolarToCart) } ///////////// Magnitude //////////////////////// -TEST(magnitude) +PERFTEST(magnitude) { Mat x, y, mag; ocl::oclMat d_x, d_y, d_mag; @@ -485,9 +526,13 @@ TEST(magnitude) ocl::magnitude(d_x, d_y, d_mag); WARMUP_OFF; + cv::Mat ocl_mat_dst; + d_mag.download(ocl_mat_dst); + + TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, mag, 1e-5)); + GPU_ON; ocl::magnitude(d_x, d_y, d_mag); - ; GPU_OFF; GPU_FULL_ON; @@ -502,7 +547,7 @@ TEST(magnitude) } ///////////// Transpose //////////////////////// -TEST(Transpose) +PERFTEST(Transpose) { Mat src, dst; ocl::oclMat d_src, d_dst; @@ -530,9 +575,13 @@ TEST(Transpose) ocl::transpose(d_src, d_dst); WARMUP_OFF; + cv::Mat ocl_mat_dst; + d_dst.download(ocl_mat_dst); + + TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 1e-5)); + GPU_ON; ocl::transpose(d_src, d_dst); - ; GPU_OFF; GPU_FULL_ON; @@ -546,7 +595,7 @@ TEST(Transpose) } ///////////// Flip //////////////////////// -TEST(Flip) +PERFTEST(Flip) { Mat src, dst; ocl::oclMat d_src, d_dst; @@ -574,9 +623,13 @@ TEST(Flip) ocl::flip(d_src, d_dst, 0); WARMUP_OFF; + cv::Mat ocl_mat_dst; + d_dst.download(ocl_mat_dst); + + TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 1e-5)); + GPU_ON; ocl::flip(d_src, d_dst, 0); - ; GPU_OFF; GPU_FULL_ON; @@ -590,12 +643,13 @@ TEST(Flip) } ///////////// minMax //////////////////////// -TEST(minMax) +PERFTEST(minMax) { Mat src; ocl::oclMat d_src; - double min_val, max_val; + double min_val = 0.0, max_val = 0.0; + double min_val_ = 0.0, max_val_ = 0.0; Point min_loc, max_loc; int all_type[] = {CV_8UC1, CV_32FC1}; std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; @@ -614,12 +668,13 @@ TEST(minMax) d_src.upload(src); WARMUP_ON; - ocl::minMax(d_src, &min_val, &max_val); + ocl::minMax(d_src, &min_val_, &max_val_); WARMUP_OFF; + TestSystem::instance().setAccurate(EeceptDoubleEQ(max_val_, max_val)&&EeceptDoubleEQ(min_val_, min_val)); + GPU_ON; ocl::minMax(d_src, &min_val, &max_val); - ; GPU_OFF; GPU_FULL_ON; @@ -633,13 +688,15 @@ TEST(minMax) } ///////////// minMaxLoc //////////////////////// -TEST(minMaxLoc) +PERFTEST(minMaxLoc) { Mat src; ocl::oclMat d_src; - double min_val, max_val; + double min_val = 0.0, max_val = 0.0; + double min_val_ = 0.0, max_val_ = 0.0; Point min_loc, max_loc; + Point min_loc_, max_loc_; int all_type[] = {CV_8UC1, CV_32FC1}; std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; @@ -657,12 +714,83 @@ TEST(minMaxLoc) d_src.upload(src); WARMUP_ON; - ocl::minMaxLoc(d_src, &min_val, &max_val, &min_loc, &max_loc); + ocl::minMaxLoc(d_src, &min_val_, &max_val_, &min_loc_, &max_loc_); WARMUP_OFF; + double error0 = 0., error1 = 0., minlocVal = 0., minlocVal_ = 0., maxlocVal = 0., maxlocVal_ = 0.; + if(src.depth() == 0) + { + minlocVal = src.at(min_loc); + minlocVal_ = src.at(min_loc_); + maxlocVal = src.at(max_loc); + maxlocVal_ = src.at(max_loc_); + error0 = ::abs(src.at(min_loc_) - src.at(min_loc)); + error1 = ::abs(src.at(max_loc_) - src.at(max_loc)); + } + if(src.depth() == 1) + { + minlocVal = src.at(min_loc); + minlocVal_ = src.at(min_loc_); + maxlocVal = src.at(max_loc); + maxlocVal_ = src.at(max_loc_); + error0 = ::abs(src.at(min_loc_) - src.at(min_loc)); + error1 = ::abs(src.at(max_loc_) - src.at(max_loc)); + } + if(src.depth() == 2) + { + minlocVal = src.at(min_loc); + minlocVal_ = src.at(min_loc_); + maxlocVal = src.at(max_loc); + maxlocVal_ = src.at(max_loc_); + error0 = ::abs(src.at(min_loc_) - src.at(min_loc)); + error1 = ::abs(src.at(max_loc_) - src.at(max_loc)); + } + if(src.depth() == 3) + { + minlocVal = src.at(min_loc); + minlocVal_ = src.at(min_loc_); + maxlocVal = src.at(max_loc); + maxlocVal_ = src.at(max_loc_); + error0 = ::abs(src.at(min_loc_) - src.at(min_loc)); + error1 = ::abs(src.at(max_loc_) - src.at(max_loc)); + } + if(src.depth() == 4) + { + minlocVal = src.at(min_loc); + minlocVal_ = src.at(min_loc_); + maxlocVal = src.at(max_loc); + maxlocVal_ = src.at(max_loc_); + error0 = ::abs(src.at(min_loc_) - src.at(min_loc)); + error1 = ::abs(src.at(max_loc_) - src.at(max_loc)); + } + if(src.depth() == 5) + { + minlocVal = src.at(min_loc); + minlocVal_ = src.at(min_loc_); + maxlocVal = src.at(max_loc); + maxlocVal_ = src.at(max_loc_); + error0 = ::abs(src.at(min_loc_) - src.at(min_loc)); + error1 = ::abs(src.at(max_loc_) - src.at(max_loc)); + } + if(src.depth() == 6) + { + minlocVal = src.at(min_loc); + minlocVal_ = src.at(min_loc_); + maxlocVal = src.at(max_loc); + maxlocVal_ = src.at(max_loc_); + error0 = ::abs(src.at(min_loc_) - src.at(min_loc)); + error1 = ::abs(src.at(max_loc_) - src.at(max_loc)); + } + + TestSystem::instance().setAccurate(EeceptDoubleEQ(error1, 0.0) + &&EeceptDoubleEQ(error0, 0.0) + &&EeceptDoubleEQ(maxlocVal_, maxlocVal) + &&EeceptDoubleEQ(minlocVal_, minlocVal) + &&EeceptDoubleEQ(max_val_, max_val) + &&EeceptDoubleEQ(min_val_, min_val)); + GPU_ON; ocl::minMaxLoc(d_src, &min_val, &max_val, &min_loc, &max_loc); - ; GPU_OFF; GPU_FULL_ON; @@ -675,7 +803,7 @@ TEST(minMaxLoc) } ///////////// Sum //////////////////////// -TEST(Sum) +PERFTEST(Sum) { Mat src; Scalar cpures, gpures; @@ -690,7 +818,7 @@ TEST(Sum) { SUBTEST << size << 'x' << size << "; " << type_name[j] ; - gen(src, size, size, all_type[j], 0, 256); + gen(src, size, size, all_type[j], 0, 60); cpures = sum(src); @@ -703,9 +831,14 @@ TEST(Sum) gpures = ocl::sum(d_src); WARMUP_OFF; + TestSystem::instance().setAccurate(ExceptDoubleNear(cpures[3], gpures[3], 0.1) + &&ExceptDoubleNear(cpures[2], gpures[2], 0.1) + &&ExceptDoubleNear(cpures[1], gpures[1], 0.1) + &&ExceptDoubleNear(cpures[0], gpures[0], 0.1)); + + GPU_ON; gpures = ocl::sum(d_src); - ; GPU_OFF; GPU_FULL_ON; @@ -718,7 +851,7 @@ TEST(Sum) } ///////////// countNonZero //////////////////////// -TEST(countNonZero) +PERFTEST(countNonZero) { Mat src; ocl::oclMat d_src; @@ -736,18 +869,20 @@ TEST(countNonZero) countNonZero(src); + int cpures = 0, gpures = 0; CPU_ON; - countNonZero(src); + cpures = countNonZero(src); CPU_OFF; d_src.upload(src); WARMUP_ON; - ocl::countNonZero(d_src); + gpures = ocl::countNonZero(d_src); WARMUP_OFF; + TestSystem::instance().setAccurate((EeceptDoubleEQ((double)cpures, (double)gpures))); + GPU_ON; ocl::countNonZero(d_src); - ; GPU_OFF; GPU_FULL_ON; @@ -760,7 +895,7 @@ TEST(countNonZero) } ///////////// Phase //////////////////////// -TEST(Phase) +PERFTEST(Phase) { Mat src1, src2, dst; ocl::oclMat d_src1, d_src2, d_dst; @@ -791,9 +926,13 @@ TEST(Phase) ocl::phase(d_src1, d_src2, d_dst, 1); WARMUP_OFF; + cv::Mat ocl_mat_dst; + d_dst.download(ocl_mat_dst); + + TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 1e-2)); + GPU_ON; ocl::phase(d_src1, d_src2, d_dst, 1); - ; GPU_OFF; GPU_FULL_ON; @@ -808,7 +947,7 @@ TEST(Phase) } ///////////// bitwise_and//////////////////////// -TEST(bitwise_and) +PERFTEST(bitwise_and) { Mat src1, src2, dst; ocl::oclMat d_src1, d_src2, d_dst; @@ -839,111 +978,19 @@ TEST(bitwise_and) ocl::bitwise_and(d_src1, d_src2, d_dst); WARMUP_OFF; - GPU_ON; - ocl::bitwise_and(d_src1, d_src2, d_dst); - ; - GPU_OFF; - - GPU_FULL_ON; - d_src1.upload(src1); - d_src2.upload(src2); - ocl::bitwise_and(d_src1, d_src2, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; - } - - } -} - -///////////// bitwise_or//////////////////////// -TEST(bitwise_or) -{ - Mat src1, src2, dst; - ocl::oclMat d_src1, d_src2, d_dst; - - int all_type[] = {CV_8UC1, CV_32SC1}; - std::string type_name[] = {"CV_8UC1", "CV_32SC1"}; - - for (int size = Min_Size; size <= Max_Size; size *= Multiple) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j]; - - gen(src1, size, size, all_type[j], 0, 256); - gen(src2, size, size, all_type[j], 0, 256); - gen(dst, size, size, all_type[j], 0, 256); - - - bitwise_or(src1, src2, dst); - - CPU_ON; - bitwise_or(src1, src2, dst); - CPU_OFF; - d_src1.upload(src1); - d_src2.upload(src2); - - WARMUP_ON; - ocl::bitwise_or(d_src1, d_src2, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::bitwise_or(d_src1, d_src2, d_dst); - ; - GPU_OFF; - - GPU_FULL_ON; - d_src1.upload(src1); - d_src2.upload(src2); - ocl::bitwise_or(d_src1, d_src2, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; - } - - } -} - -///////////// bitwise_xor//////////////////////// -TEST(bitwise_xor) -{ - Mat src1, src2, dst; - ocl::oclMat d_src1, d_src2, d_dst; - - int all_type[] = {CV_8UC1, CV_32SC1}; - std::string type_name[] = {"CV_8UC1", "CV_32SC1"}; - - for (int size = Min_Size; size <= Max_Size; size *= Multiple) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j]; - - gen(src1, size, size, all_type[j], 0, 256); - gen(src2, size, size, all_type[j], 0, 256); - gen(dst, size, size, all_type[j], 0, 256); - - - bitwise_xor(src1, src2, dst); + cv::Mat ocl_mat_dst; + d_dst.download(ocl_mat_dst); - CPU_ON; - bitwise_xor(src1, src2, dst); - CPU_OFF; - d_src1.upload(src1); - d_src2.upload(src2); - - WARMUP_ON; - ocl::bitwise_xor(d_src1, d_src2, d_dst); - WARMUP_OFF; + TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 0.0)); GPU_ON; - ocl::bitwise_xor(d_src1, d_src2, d_dst); - ; + ocl::bitwise_and(d_src1, d_src2, d_dst); GPU_OFF; GPU_FULL_ON; d_src1.upload(src1); d_src2.upload(src2); - ocl::bitwise_xor(d_src1, d_src2, d_dst); + ocl::bitwise_and(d_src1, d_src2, d_dst); d_dst.download(dst); GPU_FULL_OFF; } @@ -952,7 +999,7 @@ TEST(bitwise_xor) } ///////////// bitwise_not//////////////////////// -TEST(bitwise_not) +PERFTEST(bitwise_not) { Mat src1, dst; ocl::oclMat d_src1, d_dst; @@ -981,9 +1028,13 @@ TEST(bitwise_not) ocl::bitwise_not(d_src1, d_dst); WARMUP_OFF; + cv::Mat ocl_mat_dst; + d_dst.download(ocl_mat_dst); + + TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 0.0)); + GPU_ON; ocl::bitwise_not(d_src1, d_dst); - ; GPU_OFF; GPU_FULL_ON; @@ -997,7 +1048,7 @@ TEST(bitwise_not) } ///////////// compare//////////////////////// -TEST(compare) +PERFTEST(compare) { Mat src1, src2, dst; ocl::oclMat d_src1, d_src2, d_dst; @@ -1029,9 +1080,13 @@ TEST(compare) ocl::compare(d_src1, d_src2, d_dst, CMP_EQ); WARMUP_OFF; + cv::Mat ocl_mat_dst; + d_dst.download(ocl_mat_dst); + + TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 0.0)); + GPU_ON; ocl::compare(d_src1, d_src2, d_dst, CMP_EQ); - ; GPU_OFF; GPU_FULL_ON; @@ -1046,7 +1101,7 @@ TEST(compare) } ///////////// pow //////////////////////// -TEST(pow) +PERFTEST(pow) { Mat src, dst; ocl::oclMat d_src, d_dst; @@ -1060,8 +1115,7 @@ TEST(pow) { SUBTEST << size << 'x' << size << "; " << type_name[j] ; - gen(src, size, size, all_type[j], 0, 100); - gen(dst, size, size, all_type[j], 0, 100); + gen(src, size, size, all_type[j], 5, 16); pow(src, -2.0, dst); @@ -1075,9 +1129,13 @@ TEST(pow) ocl::pow(d_src, -2.0, d_dst); WARMUP_OFF; + cv::Mat ocl_mat_dst; + d_dst.download(ocl_mat_dst); + + TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 1.0)); + GPU_ON; ocl::pow(d_src, -2.0, d_dst); - ; GPU_OFF; GPU_FULL_ON; @@ -1091,7 +1149,7 @@ TEST(pow) } ///////////// MagnitudeSqr//////////////////////// -TEST(MagnitudeSqr) +PERFTEST(MagnitudeSqr) { Mat src1, src2, dst; ocl::oclMat d_src1, d_src2, d_dst; @@ -1121,44 +1179,48 @@ TEST(MagnitudeSqr) } - CPU_ON; + CPU_ON; - for (int i = 0; i < src1.rows; ++i) - for (int j = 0; j < src1.cols; ++j) - { - float val1 = src1.at(i, j); - float val2 = src2.at(i, j); + for (int i = 0; i < src1.rows; ++i) + for (int j = 0; j < src1.cols; ++j) + { + float val1 = src1.at(i, j); + float val2 = src2.at(i, j); - ((float *)(dst.data))[i * dst.step / 4 + j] = val1 * val1 + val2 * val2; + ((float *)(dst.data))[i * dst.step / 4 + j] = val1 * val1 + val2 * val2; - } + } - CPU_OFF; - d_src1.upload(src1); - d_src2.upload(src2); + CPU_OFF; + d_src1.upload(src1); + d_src2.upload(src2); - WARMUP_ON; - ocl::magnitudeSqr(d_src1, d_src2, d_dst); - WARMUP_OFF; + WARMUP_ON; + ocl::magnitudeSqr(d_src1, d_src2, d_dst); + WARMUP_OFF; - GPU_ON; - ocl::magnitudeSqr(d_src1, d_src2, d_dst); - ; - GPU_OFF; + cv::Mat ocl_mat_dst; + d_dst.download(ocl_mat_dst); - GPU_FULL_ON; - d_src1.upload(src1); - d_src2.upload(src2); - ocl::magnitudeSqr(d_src1, d_src2, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; + TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 1.0)); + + GPU_ON; + ocl::magnitudeSqr(d_src1, d_src2, d_dst); + GPU_OFF; + + GPU_FULL_ON; + d_src1.upload(src1); + d_src2.upload(src2); + ocl::magnitudeSqr(d_src1, d_src2, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; } } } ///////////// AddWeighted//////////////////////// -TEST(AddWeighted) +PERFTEST(AddWeighted) { Mat src1, src2, dst; ocl::oclMat d_src1, d_src2, d_dst; @@ -1190,9 +1252,13 @@ TEST(AddWeighted) ocl::addWeighted(d_src1, alpha, d_src2, beta, gama, d_dst); WARMUP_OFF; + cv::Mat ocl_mat_dst; + d_dst.download(ocl_mat_dst); + + TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 1e-5)); + GPU_ON; ocl::addWeighted(d_src1, alpha, d_src2, beta, gama, d_dst); - ; GPU_OFF; GPU_FULL_ON; diff --git a/modules/ocl/perf/perf_blend.cpp b/modules/ocl/perf/perf_blend.cpp index 0003470..6dda464 100644 --- a/modules/ocl/perf/perf_blend.cpp +++ b/modules/ocl/perf/perf_blend.cpp @@ -16,6 +16,7 @@ // // @Authors // Fangfang Bai, fangfang@multicorewareinc.com +// Jin Ma, jin@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -68,7 +69,7 @@ void blendLinearGold(const cv::Mat &img1, const cv::Mat &img2, const cv::Mat &we } } } -TEST(blend) +PERFTEST(blend) { Mat src1, src2, weights1, weights2, dst; ocl::oclMat d_src1, d_src2, d_weights1, d_weights2, d_dst; @@ -102,9 +103,12 @@ TEST(blend) ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst); WARMUP_OFF; + cv::Mat ocl_mat; + d_dst.download(ocl_mat); + TestSystem::instance().setAccurate(ExpectedMatNear(dst, ocl_mat, 1.f)); + GPU_ON; ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst); - ; GPU_OFF; GPU_FULL_ON; diff --git a/modules/ocl/perf/perf_brute_force_matcher.cpp b/modules/ocl/perf/perf_brute_force_matcher.cpp index 6562f91..ba87bd8 100644 --- a/modules/ocl/perf/perf_brute_force_matcher.cpp +++ b/modules/ocl/perf/perf_brute_force_matcher.cpp @@ -16,6 +16,7 @@ // // @Authors // Fangfang Bai, fangfang@multicorewareinc.com +// Jin Ma, jin@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -45,7 +46,7 @@ #include "precomp.hpp" //////////////////// BruteForceMatch ///////////////// -TEST(BruteForceMatcher) +PERFTEST(BruteForceMatcher) { Mat trainIdx_cpu; Mat distance_cpu; @@ -66,6 +67,7 @@ TEST(BruteForceMatcher) gen(train, size, desc_len, CV_32F, 0, 1); // Output vector< vector > matches(2); + vector< vector > d_matches(2); // Init GPU matcher ocl::BruteForceMatcher_OCL_base d_matcher(ocl::BruteForceMatcher_OCL_base::L2Dist); @@ -86,9 +88,11 @@ TEST(BruteForceMatcher) d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance); WARMUP_OFF; + d_matcher.match(d_query, d_train, d_matches[0]); + TestSystem::instance().setAccurate(AssertEQ(d_matches[0].size(), matches[0].size())); + GPU_ON; d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance); - ; GPU_OFF; GPU_FULL_ON; @@ -111,15 +115,16 @@ TEST(BruteForceMatcher) GPU_ON; d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, 2); - ; GPU_OFF; GPU_FULL_ON; d_query.upload(query); d_train.upload(train); - d_matcher.knnMatch(d_query, d_train, matches, 2); + d_matcher.knnMatch(d_query, d_train, d_matches, 2); GPU_FULL_OFF; + TestSystem::instance().setAccurate(AssertEQ(d_matches[0].size(), matches[0].size())); + SUBTEST << size << "; radiusMatch"; float max_distance = 2.0f; @@ -138,13 +143,14 @@ TEST(BruteForceMatcher) GPU_ON; d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, max_distance); - ; GPU_OFF; GPU_FULL_ON; d_query.upload(query); d_train.upload(train); - d_matcher.radiusMatch(d_query, d_train, matches, max_distance); + d_matcher.radiusMatch(d_query, d_train, d_matches, max_distance); GPU_FULL_OFF; + + TestSystem::instance().setAccurate(AssertEQ(d_matches[0].size(), matches[0].size())); } } \ No newline at end of file diff --git a/modules/ocl/perf/perf_canny.cpp b/modules/ocl/perf/perf_canny.cpp index 428e036..2acb2f6 100644 --- a/modules/ocl/perf/perf_canny.cpp +++ b/modules/ocl/perf/perf_canny.cpp @@ -16,6 +16,7 @@ // // @Authors // Fangfang Bai, fangfang@multicorewareinc.com +// Jin Ma, jin@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -45,7 +46,7 @@ #include "precomp.hpp" ///////////// Canny //////////////////////// -TEST(Canny) +PERFTEST(Canny) { Mat img = imread(abspath("aloeL.jpg"), CV_LOAD_IMAGE_GRAYSCALE); @@ -70,9 +71,10 @@ TEST(Canny) ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0); WARMUP_OFF; + TestSystem::instance().setAccurate(ExceptedMatSimilar(edges, d_edges, 2e-2)); + GPU_ON; ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0); - ; GPU_OFF; GPU_FULL_ON; diff --git a/modules/ocl/perf/perf_color.cpp b/modules/ocl/perf/perf_color.cpp index e32a183..3ebd32e 100644 --- a/modules/ocl/perf/perf_color.cpp +++ b/modules/ocl/perf/perf_color.cpp @@ -16,6 +16,7 @@ // // @Authors // Fangfang Bai, fangfang@multicorewareinc.com +// Jin Ma, jin@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -45,7 +46,7 @@ #include "precomp.hpp" ///////////// cvtColor//////////////////////// -TEST(cvtColor) +PERFTEST(cvtColor) { Mat src, dst; ocl::oclMat d_src, d_dst; @@ -72,9 +73,12 @@ TEST(cvtColor) ocl::cvtColor(d_src, d_dst, CV_RGBA2GRAY, 4); WARMUP_OFF; + cv::Mat ocl_mat; + d_dst.download(ocl_mat); + TestSystem::instance().setAccurate(ExceptedMatSimilar(dst, ocl_mat, 1e-5)); + GPU_ON; ocl::cvtColor(d_src, d_dst, CV_RGBA2GRAY, 4); - ; GPU_OFF; GPU_FULL_ON; diff --git a/modules/ocl/perf/perf_columnsum.cpp b/modules/ocl/perf/perf_columnsum.cpp index d2e3b45..a07af17 100644 --- a/modules/ocl/perf/perf_columnsum.cpp +++ b/modules/ocl/perf/perf_columnsum.cpp @@ -16,6 +16,7 @@ // // @Authors // Fangfang Bai, fangfang@multicorewareinc.com +// Jin Ma, jin@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -45,7 +46,7 @@ #include "precomp.hpp" ///////////// columnSum//////////////////////// -TEST(columnSum) +PERFTEST(columnSum) { Mat src, dst; ocl::oclMat d_src, d_dst; @@ -58,12 +59,13 @@ TEST(columnSum) CPU_ON; dst.create(src.size(), src.type()); + for (int j = 0; j < src.cols; j++) + dst.at(0, j) = src.at(0, j); for (int i = 1; i < src.rows; ++i) - { - for (int j = 0; j < src.cols; ++j) + {for (int j = 0; j < src.cols; ++j) { - dst.at(i, j) = src.at(i, j) += src.at(i - 1, j); + dst.at(i, j) = dst.at(i - 1 , j) + src.at(i , j); } } @@ -74,9 +76,12 @@ TEST(columnSum) ocl::columnSum(d_src, d_dst); WARMUP_OFF; + cv::Mat ocl_mat; + d_dst.download(ocl_mat); + TestSystem::instance().setAccurate(ExpectedMatNear(dst, ocl_mat, 5e-1)); + GPU_ON; ocl::columnSum(d_src, d_dst); - ; GPU_OFF; GPU_FULL_ON; diff --git a/modules/ocl/perf/perf_fft.cpp b/modules/ocl/perf/perf_fft.cpp index 50be254..49c8882 100644 --- a/modules/ocl/perf/perf_fft.cpp +++ b/modules/ocl/perf/perf_fft.cpp @@ -16,6 +16,7 @@ // // @Authors // Fangfang Bai, fangfang@multicorewareinc.com +// Jin Ma, jin@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -45,13 +46,13 @@ #include "precomp.hpp" ///////////// dft //////////////////////// -TEST(dft) +PERFTEST(dft) { Mat src, dst; ocl::oclMat d_src, d_dst; - int all_type[] = {CV_32FC1, CV_32FC2}; - std::string type_name[] = {"CV_32FC1", "CV_32FC2"}; + int all_type[] = {CV_32FC2}; + std::string type_name[] = {"CV_32FC2"}; for (int size = Min_Size; size <= Max_Size; size *= Multiple) { @@ -73,9 +74,10 @@ TEST(dft) ocl::dft(d_src, d_dst, Size(size, size)); WARMUP_OFF; + TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), src.size().area() * 1e-4)); + GPU_ON; ocl::dft(d_src, d_dst, Size(size, size)); - ; GPU_OFF; GPU_FULL_ON; diff --git a/modules/ocl/perf/perf_filters.cpp b/modules/ocl/perf/perf_filters.cpp index e9646c7..c1cf19e 100644 --- a/modules/ocl/perf/perf_filters.cpp +++ b/modules/ocl/perf/perf_filters.cpp @@ -16,6 +16,7 @@ // // @Authors // Fangfang Bai, fangfang@multicorewareinc.com +// Jin Ma, jin@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -45,7 +46,7 @@ #include "precomp.hpp" ///////////// Blur//////////////////////// -TEST(Blur) +PERFTEST(Blur) { Mat src1, dst; ocl::oclMat d_src1, d_dst; @@ -77,9 +78,10 @@ TEST(Blur) ocl::blur(d_src1, d_dst, ksize, Point(-1, -1), bordertype); WARMUP_OFF; + TestSystem::instance().setAccurate(ExpectedMatNear(cv::Mat(d_dst), dst, 1.0)); + GPU_ON; ocl::blur(d_src1, d_dst, ksize, Point(-1, -1), bordertype); - ; GPU_OFF; GPU_FULL_ON; @@ -92,7 +94,7 @@ TEST(Blur) } } ///////////// Laplacian//////////////////////// -TEST(Laplacian) +PERFTEST(Laplacian) { Mat src1, dst; ocl::oclMat d_src1, d_dst; @@ -123,9 +125,10 @@ TEST(Laplacian) ocl::Laplacian(d_src1, d_dst, -1, ksize, 1); WARMUP_OFF; + TestSystem::instance().setAccurate(ExpectedMatNear(cv::Mat(d_dst), dst, 1e-5)); + GPU_ON; ocl::Laplacian(d_src1, d_dst, -1, ksize, 1); - ; GPU_OFF; GPU_FULL_ON; @@ -139,7 +142,7 @@ TEST(Laplacian) } ///////////// Erode //////////////////// -TEST(Erode) +PERFTEST(Erode) { Mat src, dst, ker; ocl::oclMat d_src, d_dst; @@ -168,9 +171,10 @@ TEST(Erode) ocl::erode(d_src, d_dst, ker); WARMUP_OFF; + TestSystem::instance().setAccurate(ExpectedMatNear(cv::Mat(d_dst), dst, 1e-5)); + GPU_ON; ocl::erode(d_src, d_dst, ker); - ; GPU_OFF; GPU_FULL_ON; @@ -184,7 +188,7 @@ TEST(Erode) } ///////////// Sobel //////////////////////// -TEST(Sobel) +PERFTEST(Sobel) { Mat src, dst; ocl::oclMat d_src, d_dst; @@ -214,9 +218,10 @@ TEST(Sobel) ocl::Sobel(d_src, d_dst, -1, dx, dy); WARMUP_OFF; + TestSystem::instance().setAccurate(ExpectedMatNear(cv::Mat(d_dst), dst, 1)); + GPU_ON; ocl::Sobel(d_src, d_dst, -1, dx, dy); - ; GPU_OFF; GPU_FULL_ON; @@ -229,7 +234,7 @@ TEST(Sobel) } } ///////////// Scharr //////////////////////// -TEST(Scharr) +PERFTEST(Scharr) { Mat src, dst; ocl::oclMat d_src, d_dst; @@ -259,9 +264,10 @@ TEST(Scharr) ocl::Scharr(d_src, d_dst, -1, dx, dy); WARMUP_OFF; + TestSystem::instance().setAccurate(ExpectedMatNear(cv::Mat(d_dst), dst, 1)); + GPU_ON; ocl::Scharr(d_src, d_dst, -1, dx, dy); - ; GPU_OFF; GPU_FULL_ON; @@ -275,7 +281,7 @@ TEST(Scharr) } ///////////// GaussianBlur //////////////////////// -TEST(GaussianBlur) +PERFTEST(GaussianBlur) { Mat src, dst; int all_type[] = {CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4}; @@ -288,6 +294,8 @@ TEST(GaussianBlur) SUBTEST << size << 'x' << size << "; " << type_name[j] ; gen(src, size, size, all_type[j], 0, 256); + dst = src; + dst.setTo(0); GaussianBlur(src, dst, Size(9, 9), 0); @@ -303,9 +311,11 @@ TEST(GaussianBlur) ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0); WARMUP_OFF; + TestSystem::instance().setAccurate(ExpectedMatNear(cv::Mat(d_dst), dst, 1.0)); + + GPU_ON; ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0); - ; GPU_OFF; GPU_FULL_ON; @@ -319,7 +329,7 @@ TEST(GaussianBlur) } ///////////// filter2D//////////////////////// -TEST(filter2D) +PERFTEST(filter2D) { Mat src; @@ -339,7 +349,8 @@ TEST(filter2D) Mat kernel; gen(kernel, ksize, ksize, CV_32FC1, 0.0, 1.0); - Mat dst; + Mat dst(src); + dst.setTo(0); cv::filter2D(src, dst, -1, kernel); CPU_ON; @@ -347,15 +358,18 @@ TEST(filter2D) CPU_OFF; ocl::oclMat d_src(src); - ocl::oclMat d_dst; + ocl::oclMat d_dst(d_src); + d_dst.setTo(0); WARMUP_ON; ocl::filter2D(d_src, d_dst, -1, kernel); WARMUP_OFF; + TestSystem::instance().setAccurate(ExpectedMatNear(cv::Mat(d_dst), dst, 1e-5)); + + GPU_ON; ocl::filter2D(d_src, d_dst, -1, kernel); - ; GPU_OFF; GPU_FULL_ON; diff --git a/modules/ocl/perf/perf_gemm.cpp b/modules/ocl/perf/perf_gemm.cpp index 930ecb0..280a039 100644 --- a/modules/ocl/perf/perf_gemm.cpp +++ b/modules/ocl/perf/perf_gemm.cpp @@ -16,6 +16,7 @@ // // @Authors // Fangfang Bai, fangfang@multicorewareinc.com +// Jin Ma, jin@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -45,7 +46,7 @@ #include "precomp.hpp" ///////////// gemm //////////////////////// -TEST(gemm) +PERFTEST(gemm) { Mat src1, src2, src3, dst; ocl::oclMat d_src1, d_src2, d_src3, d_dst; @@ -71,10 +72,10 @@ TEST(gemm) WARMUP_ON; ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst); WARMUP_OFF; + TestSystem::instance().setAccurate(ExpectedMatNear(cv::Mat(d_dst), dst, src1.cols * src1.rows * 1e-4)); GPU_ON; ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst); - ; GPU_OFF; GPU_FULL_ON; diff --git a/modules/ocl/perf/perf_haar.cpp b/modules/ocl/perf/perf_haar.cpp index 5a909ac..792ead1 100644 --- a/modules/ocl/perf/perf_haar.cpp +++ b/modules/ocl/perf/perf_haar.cpp @@ -16,6 +16,7 @@ // // @Authors // Fangfang Bai, fangfang@multicorewareinc.com +// Jin Ma, jin@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -82,7 +83,7 @@ public: } } -TEST(Haar) +PERFTEST(Haar) { Mat img = imread(abspath("basketball1.png"), CV_LOAD_IMAGE_GRAYSCALE); @@ -106,6 +107,8 @@ TEST(Haar) 1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30)); CPU_OFF; + + vector oclfaces; ocl::CascadeClassifier_GPU faceCascade; if (!faceCascade.load(abspath("haarcascade_frontalface_alt.xml"))) @@ -115,24 +118,24 @@ TEST(Haar) ocl::oclMat d_img(img); - faces.clear(); - WARMUP_ON; - faceCascade.detectMultiScale(d_img, faces, + faceCascade.detectMultiScale(d_img, oclfaces, 1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30)); WARMUP_OFF; + //Testing whether the expected is equal to the actual. + TestSystem::instance().setAccurate(ExpectedEQ::size_type, vector::size_type>(faces.size(), oclfaces.size())); + faces.clear(); GPU_ON; - faceCascade.detectMultiScale(d_img, faces, + faceCascade.detectMultiScale(d_img, oclfaces, 1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30)); - ; GPU_OFF; GPU_FULL_ON; d_img.upload(img); - faceCascade.detectMultiScale(d_img, faces, + faceCascade.detectMultiScale(d_img, oclfaces, 1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30)); GPU_FULL_OFF; } \ No newline at end of file diff --git a/modules/ocl/perf/perf_hog.cpp b/modules/ocl/perf/perf_hog.cpp index b74077f..c425ef4 100644 --- a/modules/ocl/perf/perf_hog.cpp +++ b/modules/ocl/perf/perf_hog.cpp @@ -16,6 +16,7 @@ // // @Authors // Fangfang Bai, fangfang@multicorewareinc.com +// Jin Ma, jin@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -45,7 +46,13 @@ #include "precomp.hpp" ///////////// HOG//////////////////////// -TEST(HOG) +bool match_rect(cv::Rect r1, cv::Rect r2, int threshold) +{ + return ((abs(r1.x - r2.x) < threshold) && (abs(r1.y - r2.y) < threshold) && + (abs(r1.width - r2.width) < threshold) && (abs(r1.height - r2.height) < threshold)); +} + +PERFTEST(HOG) { Mat src = imread(abspath("road.png"), cv::IMREAD_GRAYSCALE); @@ -58,6 +65,7 @@ TEST(HOG) cv::HOGDescriptor hog; hog.setSVMDetector(hog.getDefaultPeopleDetector()); std::vector found_locations; + std::vector d_found_locations; SUBTEST << 768 << 'x' << 576 << "; road.png"; @@ -73,12 +81,78 @@ TEST(HOG) d_src.upload(src); WARMUP_ON; - ocl_hog.detectMultiScale(d_src, found_locations); + ocl_hog.detectMultiScale(d_src, d_found_locations); WARMUP_OFF; + + // Ground-truth rectangular people window + cv::Rect win1_64x128(231, 190, 72, 144); + cv::Rect win2_64x128(621, 156, 97, 194); + cv::Rect win1_48x96(238, 198, 63, 126); + cv::Rect win2_48x96(619, 161, 92, 185); + cv::Rect win3_48x96(488, 136, 56, 112); + + // Compare whether ground-truth windows are detected and compare the number of windows detected. + std::vector d_comp(4); + std::vector comp(4); + for(int i = 0; i < (int)d_comp.size(); i++) + { + d_comp[i] = 0; + comp[i] = 0; + } + + int threshold = 10; + int val = 32; + d_comp[0] = (int)d_found_locations.size(); + comp[0] = (int)found_locations.size(); + + cv::Size winSize = hog.winSize; + + if (winSize == cv::Size(48, 96)) + { + for(int i = 0; i < (int)d_found_locations.size(); i++) + { + if (match_rect(d_found_locations[i], win1_48x96, threshold)) + d_comp[1] = val; + if (match_rect(d_found_locations[i], win2_48x96, threshold)) + d_comp[2] = val; + if (match_rect(d_found_locations[i], win3_48x96, threshold)) + d_comp[3] = val; + } + for(int i = 0; i < (int)found_locations.size(); i++) + { + if (match_rect(found_locations[i], win1_48x96, threshold)) + comp[1] = val; + if (match_rect(found_locations[i], win2_48x96, threshold)) + comp[2] = val; + if (match_rect(found_locations[i], win3_48x96, threshold)) + comp[3] = val; + } + } + else if (winSize == cv::Size(64, 128)) + { + for(int i = 0; i < (int)d_found_locations.size(); i++) + { + if (match_rect(d_found_locations[i], win1_64x128, threshold)) + d_comp[1] = val; + if (match_rect(d_found_locations[i], win2_64x128, threshold)) + d_comp[2] = val; + } + for(int i = 0; i < (int)found_locations.size(); i++) + { + if (match_rect(found_locations[i], win1_64x128, threshold)) + comp[1] = val; + if (match_rect(found_locations[i], win2_64x128, threshold)) + comp[2] = val; + } + } + + cv::Mat ocl_mat; + ocl_mat = cv::Mat(d_comp); + ocl_mat.convertTo(ocl_mat, cv::Mat(comp).type()); + TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat, cv::Mat(comp), 3)); GPU_ON; ocl_hog.detectMultiScale(d_src, found_locations); - ; GPU_OFF; GPU_FULL_ON; diff --git a/modules/ocl/perf/perf_imgproc.cpp b/modules/ocl/perf/perf_imgproc.cpp index 756f695..980d3be 100644 --- a/modules/ocl/perf/perf_imgproc.cpp +++ b/modules/ocl/perf/perf_imgproc.cpp @@ -16,6 +16,7 @@ // // @Authors // Fangfang Bai, fangfang@multicorewareinc.com +// Jin Ma, jin@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -45,7 +46,7 @@ #include "precomp.hpp" ///////////// equalizeHist //////////////////////// -TEST(equalizeHist) +PERFTEST(equalizeHist) { Mat src, dst; int all_type[] = {CV_8UC1}; @@ -74,9 +75,11 @@ TEST(equalizeHist) ocl::equalizeHist(d_src, d_dst); WARMUP_OFF; + TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.1)); + + GPU_ON; ocl::equalizeHist(d_src, d_dst); - ; GPU_OFF; GPU_FULL_ON; @@ -89,7 +92,7 @@ TEST(equalizeHist) } } /////////// CopyMakeBorder ////////////////////// -TEST(CopyMakeBorder) +PERFTEST(CopyMakeBorder) { Mat src, dst; ocl::oclMat d_dst; @@ -119,9 +122,11 @@ TEST(CopyMakeBorder) ocl::copyMakeBorder(d_src, d_dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0)); WARMUP_OFF; + TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 0.0)); + + GPU_ON; ocl::copyMakeBorder(d_src, d_dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0)); - ; GPU_OFF; GPU_FULL_ON; @@ -134,7 +139,7 @@ TEST(CopyMakeBorder) } } ///////////// cornerMinEigenVal //////////////////////// -TEST(cornerMinEigenVal) +PERFTEST(cornerMinEigenVal) { Mat src, dst; ocl::oclMat d_dst; @@ -165,9 +170,11 @@ TEST(cornerMinEigenVal) ocl::cornerMinEigenVal(d_src, d_dst, blockSize, apertureSize, borderType); WARMUP_OFF; + TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.0)); + + GPU_ON; ocl::cornerMinEigenVal(d_src, d_dst, blockSize, apertureSize, borderType); - ; GPU_OFF; GPU_FULL_ON; @@ -180,7 +187,7 @@ TEST(cornerMinEigenVal) } } ///////////// cornerHarris //////////////////////// -TEST(cornerHarris) +PERFTEST(cornerHarris) { Mat src, dst; ocl::oclMat d_src, d_dst; @@ -208,9 +215,10 @@ TEST(cornerHarris) ocl::cornerHarris(d_src, d_dst, 5, 7, 0.1, BORDER_REFLECT); WARMUP_OFF; + TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.0)); + GPU_ON; ocl::cornerHarris(d_src, d_dst, 5, 7, 0.1, BORDER_REFLECT); - ; GPU_OFF; GPU_FULL_ON; @@ -224,7 +232,7 @@ TEST(cornerHarris) } } ///////////// integral //////////////////////// -TEST(integral) +PERFTEST(integral) { Mat src, sum; ocl::oclMat d_src, d_sum, d_buf; @@ -252,9 +260,14 @@ TEST(integral) ocl::integral(d_src, d_sum); WARMUP_OFF; + cv::Mat ocl_mat; + d_sum.download(ocl_mat); + if(sum.type() == ocl_mat.type()) //we won't test accuracy when cpu function overlow + TestSystem::instance().setAccurate(ExpectedMatNear(sum, ocl_mat, 0.0)); + + GPU_ON; ocl::integral(d_src, d_sum); - ; GPU_OFF; GPU_FULL_ON; @@ -267,15 +280,15 @@ TEST(integral) } } ///////////// WarpAffine //////////////////////// -TEST(WarpAffine) +PERFTEST(WarpAffine) { Mat src, dst; ocl::oclMat d_src, d_dst; static const double coeffs[2][3] = { - {cos(3.14 / 6), -sin(3.14 / 6), 100.0}, - {sin(3.14 / 6), cos(3.14 / 6), -100.0} + {cos(CV_PI / 6), -sin(CV_PI / 6), 100.0}, + {sin(CV_PI / 6), cos(CV_PI / 6), -100.0} }; Mat M(2, 3, CV_64F, (void *)coeffs); int interpolation = INTER_NEAREST; @@ -306,9 +319,10 @@ TEST(WarpAffine) ocl::warpAffine(d_src, d_dst, M, size1, interpolation); WARMUP_OFF; + TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.0)); + GPU_ON; ocl::warpAffine(d_src, d_dst, M, size1, interpolation); - ; GPU_OFF; GPU_FULL_ON; @@ -321,19 +335,19 @@ TEST(WarpAffine) } } ///////////// WarpPerspective //////////////////////// -TEST(WarpPerspective) +PERFTEST(WarpPerspective) { Mat src, dst; ocl::oclMat d_src, d_dst; static const double coeffs[3][3] = { - {cos(3.14 / 6), -sin(3.14 / 6), 100.0}, - {sin(3.14 / 6), cos(3.14 / 6), -100.0}, + {cos(CV_PI / 6), -sin(CV_PI / 6), 100.0}, + {sin(CV_PI / 6), cos(CV_PI / 6), -100.0}, {0.0, 0.0, 1.0} }; Mat M(3, 3, CV_64F, (void *)coeffs); - int interpolation = INTER_NEAREST; + int interpolation = INTER_LINEAR; int all_type[] = {CV_8UC1, CV_8UC4}; std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; @@ -360,9 +374,10 @@ TEST(WarpPerspective) ocl::warpPerspective(d_src, d_dst, M, size1, interpolation); WARMUP_OFF; + TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.0)); + GPU_ON; ocl::warpPerspective(d_src, d_dst, M, size1, interpolation); - ; GPU_OFF; GPU_FULL_ON; @@ -376,7 +391,7 @@ TEST(WarpPerspective) } ///////////// resize //////////////////////// -TEST(resize) +PERFTEST(resize) { Mat src, dst; ocl::oclMat d_src, d_dst; @@ -405,9 +420,11 @@ TEST(resize) ocl::resize(d_src, d_dst, Size(), 2.0, 2.0); WARMUP_OFF; + TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.0)); + + GPU_ON; ocl::resize(d_src, d_dst, Size(), 2.0, 2.0); - ; GPU_OFF; GPU_FULL_ON; @@ -439,9 +456,10 @@ TEST(resize) ocl::resize(d_src, d_dst, Size(), 0.5, 0.5); WARMUP_OFF; + TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.0)); + GPU_ON; ocl::resize(d_src, d_dst, Size(), 0.5, 0.5); - ; GPU_OFF; GPU_FULL_ON; @@ -454,7 +472,7 @@ TEST(resize) } } ///////////// threshold//////////////////////// -TEST(threshold) +PERFTEST(threshold) { Mat src, dst; ocl::oclMat d_src, d_dst; @@ -478,9 +496,11 @@ TEST(threshold) ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY); WARMUP_OFF; + TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.0)); + + GPU_ON; ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY); - ; GPU_OFF; GPU_FULL_ON; @@ -509,9 +529,10 @@ TEST(threshold) ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC); WARMUP_OFF; + TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.0)); + GPU_ON; ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC); - ; GPU_OFF; GPU_FULL_ON; @@ -522,9 +543,189 @@ TEST(threshold) } } ///////////// meanShiftFiltering//////////////////////// -TEST(meanShiftFiltering) +COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size size, int sp, int sr, int maxIter, float eps, int *tab) { - int sp = 10, sr = 10; + + int isr2 = sr * sr; + int c0, c1, c2, c3; + int iter; + uchar *ptr = NULL; + uchar *pstart = NULL; + int revx = 0, revy = 0; + c0 = sptr[0]; + c1 = sptr[1]; + c2 = sptr[2]; + c3 = sptr[3]; + // iterate meanshift procedure + for(iter = 0; iter < maxIter; iter++ ) + { + int count = 0; + int s0 = 0, s1 = 0, s2 = 0, sx = 0, sy = 0; + + //mean shift: process pixels in window (p-sigmaSp)x(p+sigmaSp) + int minx = x0 - sp; + int miny = y0 - sp; + int maxx = x0 + sp; + int maxy = y0 + sp; + + //deal with the image boundary + if(minx < 0) minx = 0; + if(miny < 0) miny = 0; + if(maxx >= size.width) maxx = size.width - 1; + if(maxy >= size.height) maxy = size.height - 1; + if(iter == 0) + { + pstart = sptr; + } + else + { + pstart = pstart + revy * sstep + (revx << 2); //point to the new position + } + ptr = pstart; + ptr = ptr + (miny - y0) * sstep + ((minx - x0) << 2); //point to the start in the row + + for( int y = miny; y <= maxy; y++, ptr += sstep - ((maxx - minx + 1) << 2)) + { + int rowCount = 0; + int x = minx; +#if CV_ENABLE_UNROLLED + for( ; x + 4 <= maxx; x += 4, ptr += 16) + { + int t0, t1, t2; + t0 = ptr[0], t1 = ptr[1], t2 = ptr[2]; + if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) + { + s0 += t0; + s1 += t1; + s2 += t2; + sx += x; + rowCount++; + } + t0 = ptr[4], t1 = ptr[5], t2 = ptr[6]; + if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) + { + s0 += t0; + s1 += t1; + s2 += t2; + sx += x + 1; + rowCount++; + } + t0 = ptr[8], t1 = ptr[9], t2 = ptr[10]; + if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) + { + s0 += t0; + s1 += t1; + s2 += t2; + sx += x + 2; + rowCount++; + } + t0 = ptr[12], t1 = ptr[13], t2 = ptr[14]; + if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) + { + s0 += t0; + s1 += t1; + s2 += t2; + sx += x + 3; + rowCount++; + } + } +#endif + for(; x <= maxx; x++, ptr += 4) + { + int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2]; + if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) + { + s0 += t0; + s1 += t1; + s2 += t2; + sx += x; + rowCount++; + } + } + if(rowCount == 0) + continue; + count += rowCount; + sy += y * rowCount; + } + + if( count == 0 ) + break; + + int x1 = sx / count; + int y1 = sy / count; + s0 = s0 / count; + s1 = s1 / count; + s2 = s2 / count; + + bool stopFlag = (x0 == x1 && y0 == y1) || (abs(x1 - x0) + abs(y1 - y0) + + tab[s0 - c0 + 255] + tab[s1 - c1 + 255] + tab[s2 - c2 + 255] <= eps); + + //revise the pointer corresponding to the new (y0,x0) + revx = x1 - x0; + revy = y1 - y0; + + x0 = x1; + y0 = y1; + c0 = s0; + c1 = s1; + c2 = s2; + + if( stopFlag ) + break; + } //for iter + + dptr[0] = (uchar)c0; + dptr[1] = (uchar)c1; + dptr[2] = (uchar)c2; + dptr[3] = (uchar)c3; + + COOR coor; + coor.x = static_cast(x0); + coor.y = static_cast(y0); + return coor; +} +void meanShiftFiltering_(const Mat &src_roi, Mat &dst_roi, int sp, int sr, cv::TermCriteria crit); +void meanShiftFiltering_(const Mat &src_roi, Mat &dst_roi, int sp, int sr, cv::TermCriteria crit) +{ + if( src_roi.empty() ) + CV_Error( CV_StsBadArg, "The input image is empty" ); + + if( src_roi.depth() != CV_8U || src_roi.channels() != 4 ) + CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" ); + + CV_Assert( (src_roi.cols == dst_roi.cols) && (src_roi.rows == dst_roi.rows) ); + CV_Assert( !(dst_roi.step & 0x3) ); + + if( !(crit.type & cv::TermCriteria::MAX_ITER) ) + crit.maxCount = 5; + int maxIter = std::min(std::max(crit.maxCount, 1), 100); + float eps; + if( !(crit.type & cv::TermCriteria::EPS) ) + eps = 1.f; + eps = (float)std::max(crit.epsilon, 0.0); + + int tab[512]; + for(int i = 0; i < 512; i++) + tab[i] = (i - 255) * (i - 255); + uchar *sptr = src_roi.data; + uchar *dptr = dst_roi.data; + int sstep = (int)src_roi.step; + int dstep = (int)dst_roi.step; + cv::Size size = src_roi.size(); + + for(int i = 0; i < size.height; i++, sptr += sstep - (size.width << 2), + dptr += dstep - (size.width << 2)) + { + for(int j = 0; j < size.width; j++, sptr += 4, dptr += 4) + { + do_meanShift(j, i, sptr, dptr, sstep, size, sp, sr, maxIter, eps, tab); + } + } +} + +PERFTEST(meanShiftFiltering) +{ + int sp = 5, sr = 6; Mat src, dst; ocl::oclMat d_src, d_dst; @@ -533,25 +734,32 @@ TEST(meanShiftFiltering) { SUBTEST << size << 'x' << size << "; 8UC3 vs 8UC4"; - gen(src, size, size, CV_8UC3, Scalar::all(0), Scalar::all(256)); + gen(src, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256)); + //gen(dst, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256)); + dst = src; + dst.setTo(0); + + cv::TermCriteria crit(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, 5, 1); - pyrMeanShiftFiltering(src, dst, sp, sr); + meanShiftFiltering_(src, dst, sp, sr, crit); CPU_ON; - pyrMeanShiftFiltering(src, dst, sp, sr); + meanShiftFiltering_(src, dst, sp, sr, crit); CPU_OFF; - gen(src, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256)); - d_src.upload(src); WARMUP_ON; - ocl::meanShiftFiltering(d_src, d_dst, sp, sr); + ocl::meanShiftFiltering(d_src, d_dst, sp, sr, crit); WARMUP_OFF; + cv::Mat ocl_mat; + d_dst.download(ocl_mat); + + TestSystem::instance().setAccurate(ExpectedMatNear(dst, ocl_mat, 0.0)); + GPU_ON; ocl::meanShiftFiltering(d_src, d_dst, sp, sr); - ; GPU_OFF; GPU_FULL_ON; @@ -562,6 +770,7 @@ TEST(meanShiftFiltering) } } ///////////// meanShiftProc//////////////////////// +#if 0 COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size size, int sp, int sr, int maxIter, float eps, int *tab) { @@ -740,6 +949,7 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size coor.y = static_cast(y0); return coor; } +#endif void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi, int sp, int sr, cv::TermCriteria crit) { @@ -798,7 +1008,7 @@ void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi, int sp, } } -TEST(meanShiftProc) +PERFTEST(meanShiftProc) { Mat src, dst, dstCoor_roi; ocl::oclMat d_src, d_dst, d_dstCoor_roi; @@ -825,9 +1035,11 @@ TEST(meanShiftProc) ocl::meanShiftProc(d_src, d_dst, d_dstCoor_roi, 5, 6, crit); WARMUP_OFF; + TestSystem::instance().setAccurate(ExpectedMatNear(dstCoor_roi, cv::Mat(d_dstCoor_roi), 0.0) + &&ExpectedMatNear(dst, cv::Mat(d_dst), 0.0)); + GPU_ON; ocl::meanShiftProc(d_src, d_dst, d_dstCoor_roi, 5, 6, crit); - ; GPU_OFF; GPU_FULL_ON; @@ -841,7 +1053,7 @@ TEST(meanShiftProc) } ///////////// remap//////////////////////// -TEST(remap) +PERFTEST(remap) { Mat src, dst, xmap, ymap; ocl::oclMat d_src, d_dst, d_xmap, d_ymap; @@ -892,9 +1104,14 @@ TEST(remap) ocl::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode); WARMUP_OFF; + if(interpolation == 0) + TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.0)); + else + TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 2.0)); + + GPU_ON; ocl::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode); - ; GPU_OFF; GPU_FULL_ON; diff --git a/modules/ocl/perf/perf_match_template.cpp b/modules/ocl/perf/perf_match_template.cpp index 2828efe..f9f0f6a 100644 --- a/modules/ocl/perf/perf_match_template.cpp +++ b/modules/ocl/perf/perf_match_template.cpp @@ -16,6 +16,7 @@ // // @Authors // Fangfang Bai, fangfang@multicorewareinc.com +// Jin Ma, jin@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -52,7 +53,7 @@ // ocl::oclMat d_src(src), d_templ(templ), d_dst; // ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR); //} -TEST(matchTemplate) +PERFTEST(matchTemplate) { //InitMatchTemplate(); @@ -89,9 +90,10 @@ TEST(matchTemplate) ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR); WARMUP_OFF; + TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), templ.rows * templ.cols * 1e-1)); + GPU_ON; ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR); - ; GPU_OFF; GPU_FULL_ON; @@ -129,9 +131,10 @@ TEST(matchTemplate) ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR_NORMED); WARMUP_OFF; + TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), templ.rows * templ.cols * 1e-1)); + GPU_ON; ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR_NORMED); - ; GPU_OFF; GPU_FULL_ON; diff --git a/modules/ocl/perf/perf_matrix_operation.cpp b/modules/ocl/perf/perf_matrix_operation.cpp index 495b2b8..4b364b0 100644 --- a/modules/ocl/perf/perf_matrix_operation.cpp +++ b/modules/ocl/perf/perf_matrix_operation.cpp @@ -16,6 +16,7 @@ // // @Authors // Fangfang Bai, fangfang@multicorewareinc.com +// Jin Ma, jin@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -45,7 +46,7 @@ #include "precomp.hpp" ///////////// ConvertTo//////////////////////// -TEST(ConvertTo) +PERFTEST(ConvertTo) { Mat src, dst; ocl::oclMat d_src, d_dst; @@ -76,9 +77,11 @@ TEST(ConvertTo) d_src.convertTo(d_dst, CV_32FC1); WARMUP_OFF; + TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 0.0)); + + GPU_ON; d_src.convertTo(d_dst, CV_32FC1); - ; GPU_OFF; GPU_FULL_ON; @@ -91,7 +94,7 @@ TEST(ConvertTo) } } ///////////// copyTo//////////////////////// -TEST(copyTo) +PERFTEST(copyTo) { Mat src, dst; ocl::oclMat d_src, d_dst; @@ -122,9 +125,11 @@ TEST(copyTo) d_src.copyTo(d_dst); WARMUP_OFF; + TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 0.0)); + + GPU_ON; d_src.copyTo(d_dst); - ; GPU_OFF; GPU_FULL_ON; @@ -137,7 +142,7 @@ TEST(copyTo) } } ///////////// setTo//////////////////////// -TEST(setTo) +PERFTEST(setTo) { Mat src, dst; Scalar val(1, 2, 3, 4); @@ -166,9 +171,11 @@ TEST(setTo) d_src.setTo(val); WARMUP_OFF; + TestSystem::instance().setAccurate(ExpectedMatNear(src, cv::Mat(d_src), 1.0)); + + GPU_ON; d_src.setTo(val); - ; GPU_OFF; GPU_FULL_ON; diff --git a/modules/ocl/perf/perf_norm.cpp b/modules/ocl/perf/perf_norm.cpp index 8b7118a..78ff001 100644 --- a/modules/ocl/perf/perf_norm.cpp +++ b/modules/ocl/perf/perf_norm.cpp @@ -16,6 +16,7 @@ // // @Authors // Fangfang Bai, fangfang@multicorewareinc.com +// Jin Ma, jin@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -45,7 +46,7 @@ #include "precomp.hpp" ///////////// norm//////////////////////// -TEST(norm) +PERFTEST(norm) { Mat src, buf; ocl::oclMat d_src, d_buf; @@ -71,9 +72,10 @@ TEST(norm) ocl::norm(d_src, d_buf, NORM_INF); WARMUP_OFF; + TestSystem::instance().setAccurate(ExpectedMatNear(src, cv::Mat(d_buf), .5)); + GPU_ON; ocl::norm(d_src, d_buf, NORM_INF); - ; GPU_OFF; GPU_FULL_ON; diff --git a/modules/ocl/perf/perf_pyrdown.cpp b/modules/ocl/perf/perf_pyrdown.cpp index 1d1d2de..36d2e7e 100644 --- a/modules/ocl/perf/perf_pyrdown.cpp +++ b/modules/ocl/perf/perf_pyrdown.cpp @@ -16,6 +16,7 @@ // // @Authors // Fangfang Bai, fangfang@multicorewareinc.com +// Jin Ma, jin@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -45,7 +46,7 @@ #include "precomp.hpp" ///////////// pyrDown ////////////////////// -TEST(pyrDown) +PERFTEST(pyrDown) { Mat src, dst; int all_type[] = {CV_8UC1, CV_8UC4}; @@ -72,9 +73,11 @@ TEST(pyrDown) ocl::pyrDown(d_src, d_dst); WARMUP_OFF; + TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), dst.depth() == CV_32F ? 1e-4f : 1.0f)); + + GPU_ON; ocl::pyrDown(d_src, d_dst); - ; GPU_OFF; GPU_FULL_ON; diff --git a/modules/ocl/perf/perf_pyrlk.cpp b/modules/ocl/perf/perf_pyrlk.cpp index f7fc22b..32bf145 100644 --- a/modules/ocl/perf/perf_pyrlk.cpp +++ b/modules/ocl/perf/perf_pyrlk.cpp @@ -16,6 +16,7 @@ // // @Authors // Fangfang Bai, fangfang@multicorewareinc.com +// Jin Ma, jin@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -45,7 +46,7 @@ #include "precomp.hpp" ///////////// PyrLKOpticalFlow //////////////////////// -TEST(PyrLKOpticalFlow) +PERFTEST(PyrLKOpticalFlow) { std::string images1[] = {"rubberwhale1.png", "aloeL.jpg"}; std::string images2[] = {"rubberwhale2.png", "aloeR.jpg"}; @@ -115,9 +116,14 @@ TEST(PyrLKOpticalFlow) d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err); WARMUP_OFF; + std::vector ocl_nextPts(d_nextPts.cols); + std::vector ocl_status(d_status.cols); + TestSystem::instance().setAccurate(AssertEQ(nextPts.size(), ocl_nextPts.size())); + TestSystem::instance().setAccurate(AssertEQ(status.size(), ocl_status.size())); + + GPU_ON; d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err); - ; GPU_OFF; GPU_FULL_ON; diff --git a/modules/ocl/perf/perf_pyrup.cpp b/modules/ocl/perf/perf_pyrup.cpp index d3b3003..3b2022e 100644 --- a/modules/ocl/perf/perf_pyrup.cpp +++ b/modules/ocl/perf/perf_pyrup.cpp @@ -16,6 +16,7 @@ // // @Authors // Fangfang Bai, fangfang@multicorewareinc.com +// Jin Ma, jin@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -45,7 +46,7 @@ #include "precomp.hpp" ///////////// pyrUp //////////////////////// -TEST(pyrUp) +PERFTEST(pyrUp) { Mat src, dst; int all_type[] = {CV_8UC1, CV_8UC4}; @@ -72,9 +73,10 @@ TEST(pyrUp) ocl::pyrUp(d_src, d_dst); WARMUP_OFF; + TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), (src.depth() == CV_32F ? 1e-4f : 1.0))); + GPU_ON; ocl::pyrUp(d_src, d_dst); - ; GPU_OFF; GPU_FULL_ON; diff --git a/modules/ocl/perf/perf_split_merge.cpp b/modules/ocl/perf/perf_split_merge.cpp index 48ff1ff..629fbfc 100644 --- a/modules/ocl/perf/perf_split_merge.cpp +++ b/modules/ocl/perf/perf_split_merge.cpp @@ -16,6 +16,7 @@ // // @Authors // Fangfang Bai, fangfang@multicorewareinc.com +// Jin Ma, jin@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -45,7 +46,7 @@ #include "precomp.hpp" ///////////// Merge//////////////////////// -TEST(Merge) +PERFTEST(Merge) { Mat dst; ocl::oclMat d_dst; @@ -84,9 +85,10 @@ TEST(Merge) ocl::merge(d_src, d_dst); WARMUP_OFF; + TestSystem::instance().setAccurate(ExpectedMatNear(cv::Mat(dst), cv::Mat(d_dst), 0.0)); + GPU_ON; ocl::merge(d_src, d_dst); - ; GPU_OFF; GPU_FULL_ON; @@ -105,7 +107,7 @@ TEST(Merge) } ///////////// Split//////////////////////// -TEST(Split) +PERFTEST(Split) { //int channels = 4; int all_type[] = {CV_8UC1, CV_32FC1}; @@ -135,9 +137,23 @@ TEST(Split) ocl::split(d_src, d_dst); WARMUP_OFF; + if(d_dst.size() == dst.size()) + { + TestSystem::instance().setAccurate(1); + for(int i = 0; i < dst.size(); i++) + { + if(ExpectedMatNear(dst[i], cv::Mat(d_dst[i]), 0.0) == 0) + { + TestSystem::instance().setAccurate(0); + break; + } + } + }else + TestSystem::instance().setAccurate(0); + + GPU_ON; ocl::split(d_src, d_dst); - ; GPU_OFF; GPU_FULL_ON; diff --git a/modules/ocl/perf/precomp.cpp b/modules/ocl/perf/precomp.cpp index e35a071..476c73e 100644 --- a/modules/ocl/perf/precomp.cpp +++ b/modules/ocl/perf/precomp.cpp @@ -41,6 +41,10 @@ //M*/ #include "precomp.hpp" +#if GTEST_OS_WINDOWS +#define NOMINMAX +# include +#endif // This program test most of the functions in ocl module and generate data metrix of x-factor in .csv files // All images needed in this test are in samples/gpu folder. @@ -110,6 +114,7 @@ void TestSystem::finishCurrentSubtest() return; } + int is_accurate = is_accurate_; double cpu_time = cpu_elapsed_ / getTickFrequency() * 1000.0; double gpu_time = gpu_elapsed_ / getTickFrequency() * 1000.0; double gpu_full_time = gpu_full_elapsed_ / getTickFrequency() * 1000.0; @@ -166,8 +171,8 @@ void TestSystem::finishCurrentSubtest() deviation = std::sqrt(sum / gpu_times_.size()); } - printMetrics(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup); - writeMetrics(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup, gpu_min, gpu_max, deviation); + printMetrics(is_accurate, cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup); + writeMetrics(is_accurate, cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup, gpu_min, gpu_max, deviation); num_subtests_called_++; resetCurrentSubtest(); @@ -184,10 +189,19 @@ double TestSystem::meanTime(const vector &samples) void TestSystem::printHeading() { cout << endl; - cout << setiosflags(ios_base::left); - cout << TAB << setw(10) << "CPU, ms" << setw(10) << "GPU, ms" - << setw(14) << "SPEEDUP" << setw(14) << "GPUTOTAL, ms" << setw(14) << "TOTALSPEEDUP" - << "DESCRIPTION\n"; + cout<< setiosflags(ios_base::left); + +#if 0 + cout< 0&&n <= cols * rows); + assert(type == CV_8UC1||type == CV_8UC3||type == CV_8UC4 + ||type == CV_32FC1||type == CV_32FC3||type == CV_32FC4); + + RNG rng; + //generate random position without duplication + std::vector pos; + for(int i = 0; i < cols * rows; i++) + { + pos.push_back(i); + } + + for(int i = 0; i < cols * rows; i++) + { + int temp = i + rng.uniform(0, cols * rows - 1 - i); + int temp1 = pos[temp]; + pos[temp]= pos[i]; + pos[i] = temp1; + } + + std::vector selected_pos; + for(int i = 0; i < n; i++) + { + selected_pos.push_back(pos[i]); + } + + pos.clear(); + //end of generating random y without duplication + + if(type == CV_8UC1) + { + typedef struct coorStruct_ + { + int x; + int y; + uchar xy; + }coorStruct; + + coorStruct coor_struct; + + std::vector coor; + + for(int i = 0; i < n; i++) + { + coor_struct.x = -1; + coor_struct.y = -1; + coor_struct.xy = (uchar)rng.uniform(low, high); + coor.push_back(coor_struct); + } + for(int i = 0; i < n; i++) + { + coor[i].y = selected_pos[i]/cols; + coor[i].x = selected_pos[i]%cols; + } + selected_pos.clear(); + + mat.create(rows, cols, type); + mat.setTo(0); + + for(int i = 0; i < n; i++) + { + mat.at(coor[i].y, coor[i].x) = coor[i].xy; + } + } + + if(type == CV_8UC4 || type == CV_8UC3) + { + mat.create(rows, cols, type); + mat.setTo(0); + + typedef struct Coor + { + int x; + int y; + + uchar r; + uchar g; + uchar b; + uchar alpha; + }coor; + + std::vector coor_vect; + + coor xy_coor; + + for(int i = 0; i < n; i++) + { + xy_coor.r = (uchar)rng.uniform(low, high); + xy_coor.g = (uchar)rng.uniform(low, high); + xy_coor.b = (uchar)rng.uniform(low, high); + if(type == CV_8UC4) + xy_coor.alpha = (uchar)rng.uniform(low, high); + + coor_vect.push_back(xy_coor); + } + + for(int i = 0; i < n; i++) + { + coor_vect[i].y = selected_pos[i]/((int)mat.step1()/mat.elemSize()); + coor_vect[i].x = selected_pos[i]%((int)mat.step1()/mat.elemSize()); + //printf("coor_vect[%d] = (%d, %d)\n", i, coor_vect[i].y, coor_vect[i].x); + } + + if(type == CV_8UC4) + { + for(int i = 0; i < n; i++) + { + mat.at(coor_vect[i].y, 4 * coor_vect[i].x) = coor_vect[i].r; + mat.at(coor_vect[i].y, 4 * coor_vect[i].x + 1) = coor_vect[i].g; + mat.at(coor_vect[i].y, 4 * coor_vect[i].x + 2) = coor_vect[i].b; + mat.at(coor_vect[i].y, 4 * coor_vect[i].x + 3) = coor_vect[i].alpha; + } + }else if(type == CV_8UC3) + { + for(int i = 0; i < n; i++) + { + mat.at(coor_vect[i].y, 3 * coor_vect[i].x) = coor_vect[i].r; + mat.at(coor_vect[i].y, 3 * coor_vect[i].x + 1) = coor_vect[i].g; + mat.at(coor_vect[i].y, 3 * coor_vect[i].x + 2) = coor_vect[i].b; + } + } + } +} +#endif string abspath(const string &relpath) { @@ -352,11 +605,57 @@ string abspath(const string &relpath) int CV_CDECL cvErrorCallback(int /*status*/, const char * /*func_name*/, - const char *err_msg, const char * /*file_name*/, - int /*line*/, void * /*userdata*/) + const char *err_msg, const char * /*file_name*/, + int /*line*/, void * /*userdata*/) { TestSystem::instance().printError(err_msg); return 0; } +double checkNorm(const Mat &m) +{ + return norm(m, NORM_INF); +} + +double checkNorm(const Mat &m1, const Mat &m2) +{ + return norm(m1, m2, NORM_INF); +} + +double checkSimilarity(const Mat &m1, const Mat &m2) +{ + Mat diff; + matchTemplate(m1, m2, diff, CV_TM_CCORR_NORMED); + return std::abs(diff.at(0, 0) - 1.f); +} + + +int ExpectedMatNear(cv::Mat dst, cv::Mat cpu_dst, double eps) +{ + assert(dst.type() == cpu_dst.type()); + assert(dst.size() == cpu_dst.size()); + if(checkNorm(cv::Mat(dst), cv::Mat(cpu_dst)) < eps ||checkNorm(cv::Mat(dst), cv::Mat(cpu_dst)) == eps) + return 1; + return 0; +} + +int ExceptDoubleNear(double val1, double val2, double abs_error) +{ + const double diff = fabs(val1 - val2); + if (diff <= abs_error) + return 1; + + return 0; +} + +int ExceptedMatSimilar(cv::Mat dst, cv::Mat cpu_dst, double eps) +{ + assert(dst.type() == cpu_dst.type()); + assert(dst.size() == cpu_dst.size()); + if(checkSimilarity(cv::Mat(cpu_dst), cv::Mat(dst)) <= eps) + return 1; + return 0; +} + + diff --git a/modules/ocl/perf/precomp.hpp b/modules/ocl/perf/precomp.hpp index c2cf123..b025703 100644 --- a/modules/ocl/perf/precomp.hpp +++ b/modules/ocl/perf/precomp.hpp @@ -50,10 +50,15 @@ #include "opencv2/core/core.hpp" #include "opencv2/imgproc/imgproc.hpp" #include "opencv2/highgui/highgui.hpp" +#include "opencv2/calib3d/calib3d.hpp" #include "opencv2/video/video.hpp" #include "opencv2/objdetect/objdetect.hpp" #include "opencv2/features2d/features2d.hpp" #include "opencv2/ocl/ocl.hpp" +#include "opencv2/ts/ts.hpp" +#include "opencv2/ts/ts_perf.hpp" +#include "opencv2/ts/ts_gtest.h" + #define Min_Size 1000 #define Max_Size 4000 @@ -64,6 +69,8 @@ using namespace std; using namespace cv; void gen(Mat &mat, int rows, int cols, int type, Scalar low, Scalar high); +void gen(Mat &mat, int rows, int cols, int type, int low, int high, int n); + string abspath(const string &relpath); int CV_CDECL cvErrorCallback(int, const char *, const char *, const char *, int, void *); typedef struct @@ -76,6 +83,50 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi, int sp, int sr, cv::TermCriteria crit); + +template +int ExpectedEQ(T1 expected, T2 actual) +{ + if(expected == actual) + return 1; + + return 0; +} + +template +int EeceptDoubleEQ(T1 expected, T1 actual) +{ + testing::internal::Double lhs(expected); + testing::internal::Double rhs(actual); + + if (lhs.AlmostEquals(rhs)) + { + return 1; + } + + return 0; +} + +template +int AssertEQ(T expected, T actual) +{ + if(expected == actual) + { + return 1; + } + return 0; +} + +int ExceptDoubleNear(double val1, double val2, double abs_error); +bool match_rect(cv::Rect r1, cv::Rect r2, int threshold); + +double checkNorm(const cv::Mat &m); +double checkNorm(const cv::Mat &m1, const cv::Mat &m2); +double checkSimilarity(const cv::Mat &m1, const cv::Mat &m2); + +int ExpectedMatNear(cv::Mat dst, cv::Mat cpu_dst, double eps); +int ExceptedMatSimilar(cv::Mat dst, cv::Mat cpu_dst, double eps); + class Runnable { public: @@ -171,6 +222,16 @@ public: return cur_iter_idx_ >= cpu_num_iters_; } + int get_cur_iter_idx() + { + return cur_iter_idx_; + } + + int get_cpu_num_iters() + { + return cpu_num_iters_; + } + bool warmupStop() { return cur_warmup_idx_++ >= gpu_warmup_iters_; @@ -252,6 +313,16 @@ public: itname_changed_ = true; } + void setAccurate(int is_accurate = -1) + { + is_accurate_ = is_accurate; + } + + std::stringstream &getCurSubtestDescription() + { + return cur_subtest_description_; + } + private: TestSystem(): cur_subtest_is_empty_(true), cpu_elapsed_(0), @@ -261,7 +332,8 @@ private: speedup_full_faster_count_(0), speedup_full_slower_count_(0), speedup_full_equal_count_(0), is_list_mode_(false), num_iters_(10), cpu_num_iters_(2), gpu_warmup_iters_(1), cur_iter_idx_(0), cur_warmup_idx_(0), - record_(0), recordname_("performance"), itname_changed_(true) + record_(0), recordname_("performance"), itname_changed_(true), + is_accurate_(-1) { cpu_times_.reserve(num_iters_); gpu_times_.reserve(num_iters_); @@ -277,20 +349,22 @@ private: cur_subtest_description_.str(""); cur_subtest_is_empty_ = true; cur_iter_idx_ = 0; + cur_warmup_idx_ = 0; cpu_times_.clear(); gpu_times_.clear(); gpu_full_times_.clear(); + is_accurate_ = -1; } double meanTime(const std::vector &samples); void printHeading(); void printSummary(); - void printMetrics(double cpu_time, double gpu_time = 0.0f, double gpu_full_time = 0.0f, double speedup = 0.0f, double fullspeedup = 0.0f); + void printMetrics(int is_accurate, double cpu_time, double gpu_time = 0.0f, double gpu_full_time = 0.0f, double speedup = 0.0f, double fullspeedup = 0.0f); void writeHeading(); void writeSummary(); - void writeMetrics(double cpu_time, double gpu_time = 0.0f, double gpu_full_time = 0.0f, + void writeMetrics(int is_accurate, double cpu_time, double gpu_time = 0.0f, double gpu_full_time = 0.0f, double speedup = 0.0f, double fullspeedup = 0.0f, double gpu_min = 0.0f, double gpu_max = 0.0f, double std_dev = 0.0f); @@ -340,6 +414,8 @@ private: std::string recordname_; std::string itname_; bool itname_changed_; + + int is_accurate_; }; @@ -353,7 +429,7 @@ struct name##_init: Runnable { \ void name##_init::run() -#define TEST(name) \ +#define PERFTEST(name) \ struct name##_test: Runnable { \ name##_test(): Runnable(#name) { \ TestSystem::instance().addTest(this); \ @@ -375,7 +451,7 @@ struct name##_test: Runnable { \ while (!TestSystem::instance().stop()) { \ TestSystem::instance().gpuOn() #define GPU_OFF \ - ocl::finish(); \ + ocl::finish();\ TestSystem::instance().gpuOff(); \ } TestSystem::instance().gpuComplete() @@ -389,5 +465,5 @@ struct name##_test: Runnable { \ #define WARMUP_ON \ while (!TestSystem::instance().warmupStop()) { #define WARMUP_OFF \ - ocl::finish(); \ + ocl::finish();\ } TestSystem::instance().warmupComplete() -- 2.7.4