Merge pull request #532 from jet47:super-resolution
authorVadim Pisarevsky <vadim.pisarevsky@gmail.com>
Sun, 17 Mar 2013 15:13:37 +0000 (19:13 +0400)
committerOpenCV Buildbot <buildbot@opencv.org>
Sun, 17 Mar 2013 15:13:37 +0000 (19:13 +0400)
122 files changed:
.gitattributes
CMakeLists.txt
android/libinfo/info.c
android/package/AndroidManifest.xml
android/service/engine/jni/BinderComponent/OpenCVEngine.cpp
android/service/engine/jni/NativeService/PackageInfo.cpp
android/service/engine/jni/Tests/OpenCVEngineTest.cpp
android/service/engine/jni/Tests/PackageInfoTest.cpp
android/service/engine/src/org/opencv/engine/manager/ManagerActivity.java
cmake/OpenCVCompilerOptions.cmake
cmake/OpenCVDetectCUDA.cmake
cmake/OpenCVDetectCXXCompiler.cmake
cmake/OpenCVDetectCudaArch.cu [new file with mode: 0644]
cmake/OpenCVDetectPython.cmake
cmake/OpenCVGenConfig.cmake
cmake/templates/OpenCV.mk.in
cmake/templates/OpenCVConfig.cmake.in
data/haarcascades/haarcascade_smile.xml [new file with mode: 0644]
doc/tutorials/introduction/android_binary_package/O4A_SDK.rst
doc/tutorials/introduction/android_binary_package/android_dev_intro.rst
doc/tutorials/introduction/android_binary_package/dev_with_OCV_on_Android.rst
doc/tutorials/introduction/android_binary_package/images/eclipse_10_crystal_clean.png
doc/tutorials/introduction/android_binary_package/images/eclipse_7_select_projects.png
doc/tutorials/introduction/android_binary_package/images/eclipse_cdt_cfg4.png
doc/tutorials/introduction/android_binary_package/images/emulator_canny.png
doc/tutorials/introduction/desktop_java/images/eclipse_run.png
doc/tutorials/introduction/desktop_java/java_dev_intro.rst
modules/contrib/src/spinimages.cpp
modules/core/include/opencv2/core/core.hpp
modules/core/src/algorithm.cpp
modules/core/src/lapack.cpp
modules/core/test/test_dxt.cpp
modules/features2d/include/opencv2/features2d/features2d.hpp
modules/features2d/src/features2d_init.cpp
modules/gpu/include/opencv2/gpu/gpu.hpp
modules/gpu/perf/perf_calib3d.cpp
modules/gpu/perf/perf_core.cpp
modules/gpu/perf/perf_denoising.cpp
modules/gpu/perf/perf_features2d.cpp
modules/gpu/perf/perf_filters.cpp
modules/gpu/perf/perf_imgproc.cpp
modules/gpu/perf/perf_labeling.cpp
modules/gpu/perf/perf_main.cpp
modules/gpu/perf/perf_matop.cpp
modules/gpu/perf/perf_objdetect.cpp
modules/gpu/perf/perf_video.cpp
modules/gpu/perf/utility.cpp
modules/gpu/perf/utility.hpp
modules/gpu/src/color.cpp
modules/gpu/src/cuda/bgfg_mog.cu
modules/gpu/src/cuda/ccomponetns.cu
modules/gpu/src/cuda/debayer.cu
modules/gpu/src/cuda/element_operations.cu
modules/gpu/src/element_operations.cpp
modules/gpu/src/graphcuts.cpp
modules/gpu/src/imgproc.cpp
modules/gpu/src/mssegmentation.cpp
modules/gpu/src/pyrlk.cpp
modules/gpu/src/warp.cpp
modules/gpu/test/test_bgfg.cpp
modules/gpu/test/test_color.cpp
modules/gpu/test/test_core.cpp
modules/gpu/test/test_hough.cpp
modules/gpu/test/test_warp_affine.cpp
modules/gpu/test/test_warp_perspective.cpp
modules/highgui/CMakeLists.txt
modules/imgproc/include/opencv2/imgproc/types_c.h
modules/imgproc/perf/perf_cvt_color.cpp
modules/imgproc/src/color.cpp
modules/imgproc/test/test_cvtyuv.cpp
modules/java/CMakeLists.txt
modules/java/android_lib/lint.xml [new file with mode: 0644]
modules/java/generator/gen_java.py
modules/java/generator/src/java/android+CameraBridgeViewBase.java
modules/java/generator/src/java/android+JavaCameraView.java
modules/java/generator/src/java/android+NativeCameraView.java
modules/ocl/CMakeLists.txt
modules/ocl/doc/introduction.rst
modules/ocl/include/opencv2/ocl/ocl.hpp
modules/ocl/perf/perf_arithm.cpp
modules/ocl/src/arithm.cpp
modules/ocl/src/blend.cpp
modules/ocl/src/brute_force_matcher.cpp
modules/ocl/src/fft.cpp
modules/ocl/src/filtering.cpp
modules/ocl/src/imgproc.cpp
modules/ocl/src/initialization.cpp
modules/ocl/src/interpolate_frames.cpp
modules/ocl/src/kernels/brute_force_match.cl
modules/ocl/src/kernels/cvt_color.cl
modules/ocl/src/kernels/filtering_morph.cl
modules/ocl/src/kernels/moments.cl [new file with mode: 0644]
modules/ocl/src/kernels/nonfree_surf.cl
modules/ocl/src/kernels/objdetect_hog.cl
modules/ocl/src/kernels/stereobm.cl [new file with mode: 0644]
modules/ocl/src/matrix_operations.cpp
modules/ocl/src/mcwutil.cpp
modules/ocl/src/mcwutil.hpp
modules/ocl/src/moments.cpp [new file with mode: 0644]
modules/ocl/src/precomp.hpp
modules/ocl/src/pyrlk.cpp
modules/ocl/src/stereobm.cpp [new file with mode: 0644]
modules/ocl/src/surf.cpp
modules/ocl/test/precomp.hpp
modules/ocl/test/test_calib3d.cpp [new file with mode: 0644]
modules/ocl/test/test_moments.cpp [new file with mode: 0644]
modules/ocl/test/test_surf.cpp [new file with mode: 0644]
modules/python/src2/cv2.cpp
modules/python/src2/cv2.cv.hpp
modules/ts/misc/summary.py
modules/ts/misc/table_formatter.py
samples/android/image-manipulations/src/org/opencv/samples/imagemanipulations/ImageManipulationsActivity.java
samples/android/tutorial-3-cameracontrol/res/layout/tutorial3_surface_view.xml
samples/c/smiledetect.cpp [new file with mode: 0644]
samples/java/ant/build.xml
samples/java/ant/src/SimpleSample.java
samples/java/eclipse/HelloCV/src/Main.java
samples/java/sbt/src/main/scala/Main.scala
samples/ocl/aloe-L.png [new file with mode: 0644]
samples/ocl/aloe-R.png [new file with mode: 0644]
samples/ocl/aloe-disp.png [new file with mode: 0644]
samples/ocl/performance.cpp [new file with mode: 0644]

index cd56bd6..af704cd 100644 (file)
@@ -1,27 +1,36 @@
 .git*       export-ignore
 
-*           text=auto
-*           whitespace=!indent,trail,space
-
-*.py        text whitespace=tab-in-indent,trail,space,fix
-*.cpp       text whitespace=tab-in-indent,trail,space,fix
-*.hpp       text whitespace=tab-in-indent,trail,space,fix
-*.cxx       text whitespace=tab-in-indent,trail,space,fix
-*.hxx       text whitespace=tab-in-indent,trail,space,fix
-*.mm        text whitespace=tab-in-indent,trail,space,fix
-*.c         text whitespace=tab-in-indent,trail,space,fix
-*.h         text whitespace=tab-in-indent,trail,space,fix
-*.i         text whitespace=tab-in-indent,trail,space,fix
-*.java      text whitespace=tab-in-indent,trail,space,fix
-*.cu        text whitespace=tab-in-indent,trail,space,fix
-*.cl        text whitespace=tab-in-indent,trail,space,fix
-
-*.cmake     text whitespace=tab-in-indent,trail,space,fix
-*.cmakein   text whitespace=tab-in-indent,trail,space,fix
-*.in        text whitespace=tab-in-indent,trail,space,fix
-CMakeLists.txt  text whitespace=tab-in-indent,trail,space,fix
-
-*.rst       text whitespace=tab-in-indent,trail,space,fix
+*           text=auto whitespace=trailing-space,space-before-tab,-indent-with-non-tab,tab-in-indent,tabwidth=4
+
+*.py        text
+*.cpp       text
+*.hpp       text
+*.cxx       text
+*.hxx       text
+*.mm        text
+*.c         text
+*.h         text
+*.i         text
+*.js        text
+*.java      text
+*.scala     text
+*.cu        text
+*.cl        text
+*.css_t     text
+*.qrc       text
+*.qss       text
+*.S         text
+*.rst       text
+*.tex       text
+*.sty       text
+
+*.aidl      text
+*.mk        text
+
+*.cmake         text whitespace=tabwidth=2
+*.cmakein       text whitespace=tabwidth=2
+*.in            text whitespace=tabwidth=2
+CMakeLists.txt  text whitespace=tabwidth=2
 
 *.png       binary
 *.jepg      binary
@@ -32,22 +41,21 @@ CMakeLists.txt  text whitespace=tab-in-indent,trail,space,fix
 *.a         binary
 *.so        binary
 *.dll       binary
+*.jar       binary
 
 *.pdf       binary
 *.pbxproj   binary
 *.vec       binary
 *.doc       binary
 
-*.css_t     text
-*.qrc       text
-*.qss       text
-*.S         text
-
-*.xml       -text
-*.yml       -text
+*.xml                      -text whitespace=cr-at-eol
+*.yml                      -text whitespace=cr-at-eol
+.project                   -text whitespace=cr-at-eol merge=union
+.classpath                 -text whitespace=cr-at-eol merge=union
+.cproject                  -text whitespace=cr-at-eol merge=union
+org.eclipse.jdt.core.prefs -text whitespace=cr-at-eol merge=union
 
 *.vcproj    text eol=crlf merge=union
-*.cproject  text eol=crlf merge=union
 *.bat       text eol=crlf
 *.cmd       text eol=crlf
 *.cmd.tmpl  text eol=crlf
index 6a649f9..579312d 100644 (file)
@@ -184,7 +184,7 @@ OCV_OPTION(INSTALL_TO_MANGLED_PATHS "Enables mangled install paths, that help wi
 OCV_OPTION(ENABLE_PRECOMPILED_HEADERS "Use precompiled headers"                                  ON   IF (NOT IOS) )
 OCV_OPTION(ENABLE_SOLUTION_FOLDERS    "Solution folder in Visual Studio or in other IDEs"        (MSVC_IDE OR CMAKE_GENERATOR MATCHES Xcode) IF (CMAKE_VERSION VERSION_GREATER "2.8.0") )
 OCV_OPTION(ENABLE_PROFILING           "Enable profiling in the GCC compiler (Add flags: -g -pg)" OFF  IF CMAKE_COMPILER_IS_GNUCXX )
-OCV_OPTION(ENABLE_OMIT_FRAME_POINTER  "Enable -fomit-frame-pointer for GCC"                      ON   IF CMAKE_COMPILER_IS_GNUCXX )
+OCV_OPTION(ENABLE_OMIT_FRAME_POINTER  "Enable -fomit-frame-pointer for GCC"                      ON   IF CMAKE_COMPILER_IS_GNUCXX AND NOT (APPLE AND CMAKE_COMPILER_IS_CLANGCXX) )
 OCV_OPTION(ENABLE_POWERPC             "Enable PowerPC for GCC"                                   ON   IF (CMAKE_COMPILER_IS_GNUCXX AND CMAKE_SYSTEM_PROCESSOR MATCHES powerpc.*) )
 OCV_OPTION(ENABLE_FAST_MATH           "Enable -ffast-math (not recommended for GCC 4.6.x)"       OFF  IF (CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) )
 OCV_OPTION(ENABLE_SSE                 "Enable SSE instructions"                                  ON   IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
index 225cc10..cfdc881 100644 (file)
@@ -7,7 +7,7 @@ const char* GetLibraryList(void);
 JNIEXPORT jstring JNICALL Java_org_opencv_android_StaticHelper_getLibraryList(JNIEnv *, jclass);
 
 #define PACKAGE_NAME "org.opencv.lib_v" CVAUX_STR(CV_VERSION_EPOCH) CVAUX_STR(CV_VERSION_MAJOR) "_" ANDROID_PACKAGE_PLATFORM
-#define PACKAGE_REVISION CVAUX_STR(CV_VERSION_MINOR) "." CVAUX_STR(ANDROID_PACKAGE_RELEASE)
+#define PACKAGE_REVISION CVAUX_STR(CV_VERSION_MINOR) "." CVAUX_STR(CV_VERSION_REVISION) "." CVAUX_STR(ANDROID_PACKAGE_RELEASE)
 
 const char* GetPackageName(void)
 {
index 3b6bc7d..8997b16 100644 (file)
@@ -1,8 +1,8 @@
 <?xml version="1.0" encoding="utf-8"?>
 <manifest xmlns:android="http://schemas.android.com/apk/res/android"
     package="org.opencv.lib_v@OPENCV_VERSION_MAJOR@@OPENCV_VERSION_MINOR@_@ANDROID_PACKAGE_PLATFORM@"
-    android:versionCode="@OPENCV_VERSION_PATCH@@ANDROID_PACKAGE_RELEASE@"
-    android:versionName="@OPENCV_VERSION_PATCH@.@ANDROID_PACKAGE_RELEASE@" >
+    android:versionCode="@OPENCV_VERSION_PATCH@@OPENCV_VERSION_TWEAK@@ANDROID_PACKAGE_RELEASE@"
+    android:versionName="@OPENCV_VERSION_PATCH@.@OPENCV_VERSION_TWEAK@.@ANDROID_PACKAGE_RELEASE@" >
 
     <uses-sdk android:minSdkVersion="@ANDROID_SDK_VERSION@" />
     <uses-feature android:name="android.hardware.touchscreen" android:required="false"/>
index 1a02b8a..c10ab54 100644 (file)
@@ -15,7 +15,7 @@ using namespace android;
 
 const int OpenCVEngine::Platform = DetectKnownPlatforms();
 const int OpenCVEngine::CpuID = GetCpuID();
-const int OpenCVEngine::KnownVersions[] = {2040000, 2040100, 2040200, 2040300, 2040301, 2040302};
+const int OpenCVEngine::KnownVersions[] = {2040000, 2040100, 2040200, 2040300, 2040301, 2040302, 2040400};
 
 bool OpenCVEngine::ValidateVersion(int version)
 {
index 7428de0..2eb8230 100644 (file)
@@ -136,7 +136,17 @@ inline int SplitVersion(const vector<string>& features, const string& package_ve
 
         // Taking release and build number from package revision
         vector<string> tmp2 = SplitStringVector(package_version, '.');
-        result += atoi(tmp2[0].c_str())*100 + atoi(tmp2[1].c_str());
+        if (tmp2.size() == 2)
+        {
+            // the 2nd digit is revision
+            result += atoi(tmp2[0].c_str())*100 + 00;
+        }
+        else
+        {
+            // the 2nd digit is part of library version
+            // the 3rd digit is revision
+            result += atoi(tmp2[0].c_str())*100 + atoi(tmp2[1].c_str());
+        }
     }
     else
     {
@@ -194,10 +204,10 @@ inline int SplitPlatfrom(const vector<string>& features)
  * Example: armv7_neon
  */
 PackageInfo::PackageInfo(int version, int platform, int cpu_id, std::string install_path):
-Version(version),
-Platform(platform),
-CpuID(cpu_id),
-InstallPath("")
+    Version(version),
+    Platform(platform),
+    CpuID(cpu_id),
+    InstallPath("")
 {
     #ifndef __SUPPORT_TEGRA3
     Platform = PLATFORM_UNKNOWN;
index a42277f..4e39038 100644 (file)
@@ -218,6 +218,66 @@ TEST(OpenCVEngineTest, InstallAndGetVersion)
     #endif
     #endif
 }
+
+TEST(OpenCVEngineTest, GetPathFor2_4_2)
+{
+    sp<IOpenCVEngine> Engine = InitConnect();
+    Starter.PackageManager->InstalledPackages.clear();
+    Starter.PackageManager->InstallVersion(2040200, PLATFORM_UNKNOWN, ARCH_ARMv7);
+    EXPECT_FALSE(NULL == Engine.get());
+    String16 result = Engine->GetLibPathByVersion(String16("2.4.2"));
+    EXPECT_STREQ("/data/data/org.opencv.lib_v24_armv7a/lib", String8(result).string());
+}
+
+TEST(OpenCVEngineTest, GetPathFor2_4_3)
+{
+    sp<IOpenCVEngine> Engine = InitConnect();
+    Starter.PackageManager->InstalledPackages.clear();
+    Starter.PackageManager->InstallVersion(2040300, PLATFORM_UNKNOWN, ARCH_ARMv7);
+    EXPECT_FALSE(NULL == Engine.get());
+    String16 result = Engine->GetLibPathByVersion(String16("2.4.3"));
+    EXPECT_STREQ("/data/data/org.opencv.lib_v24_armv7a/lib", String8(result).string());
+}
+
+TEST(OpenCVEngineTest, GetPathFor2_4_3_1)
+{
+    sp<IOpenCVEngine> Engine = InitConnect();
+    Starter.PackageManager->InstalledPackages.clear();
+    Starter.PackageManager->InstallVersion(2040301, PLATFORM_UNKNOWN, ARCH_ARMv7);
+    EXPECT_FALSE(NULL == Engine.get());
+    String16 result = Engine->GetLibPathByVersion(String16("2.4.3.1"));
+    EXPECT_STREQ("/data/data/org.opencv.lib_v24_armv7a/lib", String8(result).string());
+}
+
+TEST(OpenCVEngineTest, GetPathFor2_4_3_2)
+{
+    sp<IOpenCVEngine> Engine = InitConnect();
+    Starter.PackageManager->InstalledPackages.clear();
+    Starter.PackageManager->InstallVersion(2040302, PLATFORM_UNKNOWN, ARCH_ARMv7);
+    EXPECT_FALSE(NULL == Engine.get());
+    String16 result = Engine->GetLibPathByVersion(String16("2.4.3.2"));
+    EXPECT_STREQ("/data/data/org.opencv.lib_v24_armv7a/lib", String8(result).string());
+}
+
+TEST(OpenCVEngineTest, GetPathFor2_4_4)
+{
+    sp<IOpenCVEngine> Engine = InitConnect();
+    Starter.PackageManager->InstalledPackages.clear();
+    Starter.PackageManager->InstallVersion(2040400, PLATFORM_UNKNOWN, ARCH_ARMv7);
+    EXPECT_FALSE(NULL == Engine.get());
+    String16 result = Engine->GetLibPathByVersion(String16("2.4.4"));
+    EXPECT_STREQ("/data/data/org.opencv.lib_v24_armv7a/lib", String8(result).string());
+}
+
+TEST(OpenCVEngineTest, GetPathFor2_4_5)
+{
+    sp<IOpenCVEngine> Engine = InitConnect();
+    Starter.PackageManager->InstalledPackages.clear();
+    Starter.PackageManager->InstallVersion(2040500, PLATFORM_UNKNOWN, ARCH_ARMv7);
+    EXPECT_FALSE(NULL == Engine.get());
+    String16 result = Engine->GetLibPathByVersion(String16("2.4.5"));
+    EXPECT_EQ(0, result.size()); // 2.4.5 is not published yet
+}
 #endif
 
 #ifndef __i386__
index 2e747c3..6bc8485 100644 (file)
@@ -157,6 +157,20 @@ TEST(PackageInfo, MipsFromFullName)
 }
 #endif
 
+TEST(PackageInfo, Check2DigitRevision)
+{
+    PackageInfo info("org.opencv.lib_v23_armv7a_neon", "/data/data/org.opencv.lib_v23_armv7_neon", "4.1");
+    EXPECT_EQ(2030400, info.GetVersion());
+    EXPECT_EQ(ARCH_ARMv7 | FEATURES_HAS_NEON, info.GetCpuID());
+}
+
+TEST(PackageInfo, Check3DigitRevision)
+{
+    PackageInfo info("org.opencv.lib_v23_armv7a_neon", "/data/data/org.opencv.lib_v23_armv7_neon", "4.1.5");
+    EXPECT_EQ(2030401, info.GetVersion());
+    EXPECT_EQ(ARCH_ARMv7 | FEATURES_HAS_NEON, info.GetCpuID());
+}
+
 TEST(PackageInfo, Comparator1)
 {
     PackageInfo info1(2040000, PLATFORM_UNKNOWN, ARCH_X86);
index 6f604fb..fad2797 100644 (file)
@@ -299,10 +299,9 @@ public class ManagerActivity extends Activity
                 else
                     NativeLibDir = "/data/data/" + mInstalledPackageInfo[i].packageName + "/lib";
 
-                OpenCVLibraryInfo NativeInfo = new OpenCVLibraryInfo(NativeLibDir);
-
                 if (PackageName.equals("org.opencv.engine"))
                 {
+                    OpenCVLibraryInfo NativeInfo = new OpenCVLibraryInfo(NativeLibDir);
                     if (NativeInfo.status())
                     {
                         PublicName = "Built-in OpenCV library";
@@ -348,9 +347,7 @@ public class ManagerActivity extends Activity
 
                 if (null != ActivePackagePath)
                 {
-                    int start = ActivePackagePath.indexOf(mInstalledPackageInfo[i].packageName);
-                    int stop = start + mInstalledPackageInfo[i].packageName.length();
-                    if (start >= 0 && ActivePackagePath.charAt(stop) == '/')
+                    if (ActivePackagePath.equals(NativeLibDir))
                     {
                         temp.put("Activity", "y");
                         Tags = "active";
@@ -405,13 +402,22 @@ public class ManagerActivity extends Activity
         if (OpenCVersion == null || PackageVersion == null)
             return "unknown";
 
-        int dot = PackageVersion.indexOf(".");
-        if (dot == -1 || OpenCVersion.length() == 0)
+        String[] revisions = PackageVersion.split("\\.");
+
+        if (revisions.length <= 1 || OpenCVersion.length() == 0)
             return "unknown";
         else
-            return OpenCVersion.substring(0,  OpenCVersion.length()-1) + "." +
-                OpenCVersion.toCharArray()[OpenCVersion.length()-1] + "." +
-                PackageVersion.substring(0, dot) + " rev " + PackageVersion.substring(dot+1);
+            if (revisions.length == 2)
+                // the 2nd digit is revision
+                return OpenCVersion.substring(0,  OpenCVersion.length()-1) + "." +
+                    OpenCVersion.toCharArray()[OpenCVersion.length()-1] + "." +
+                    revisions[0] + " rev " + revisions[1];
+            else
+                // the 2nd digit is part of library version
+                // the 3rd digit is revision
+                return OpenCVersion.substring(0,  OpenCVersion.length()-1) + "." +
+                    OpenCVersion.toCharArray()[OpenCVersion.length()-1] + "." +
+                    revisions[0] + "." + revisions[1] + " rev " + revisions[2];
     }
 
     protected String ConvertPackageName(String Name, String Version)
index 8981f4b..aeed112 100644 (file)
@@ -97,7 +97,7 @@ if(CMAKE_COMPILER_IS_GNUCXX)
   endif()
 
   # We need pthread's
-  if(UNIX AND NOT ANDROID)
+  if(UNIX AND NOT ANDROID AND NOT (APPLE AND CMAKE_COMPILER_IS_CLANGCXX))
     add_extra_compiler_option(-pthread)
   endif()
 
index 4eaaf9c..e853a8d 100644 (file)
@@ -33,8 +33,48 @@ if(CUDA_FOUND)
 
   message(STATUS "CUDA detected: " ${CUDA_VERSION})
 
-  set(CUDA_ARCH_BIN "1.1 1.2 1.3 2.0 2.1(2.0) 3.0" CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported")
-  set(CUDA_ARCH_PTX "2.0 3.0" CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for")
+  set(_generations "Fermi" "Kepler")
+  if(NOT CMAKE_CROSSCOMPILING)
+    list(APPEND _generations "Auto")
+  endif()
+  set(CUDA_GENERATION "" CACHE STRING "Build CUDA device code only for specific GPU architecture. Leave empty to build for all architectures.")
+  if( CMAKE_VERSION VERSION_GREATER "2.8" )
+    set_property( CACHE CUDA_GENERATION PROPERTY STRINGS "" ${_generations} )
+  endif()
+
+  if(CUDA_GENERATION)
+    if(NOT ";${_generations};" MATCHES ";${CUDA_GENERATION};")
+      string(REPLACE ";" ", " _generations "${_generations}")
+      message(FATAL_ERROR "ERROR: ${_generations} Generations are suppered.")
+    endif()
+    unset(CUDA_ARCH_BIN CACHE)
+    unset(CUDA_ARCH_PTX CACHE)
+  endif()
+
+  set(__cuda_arch_ptx "")
+  if(CUDA_GENERATION STREQUAL "Fermi")
+    set(__cuda_arch_bin "2.0 2.1(2.0)")
+  elseif(CUDA_GENERATION STREQUAL "Kepler")
+    set(__cuda_arch_bin "3.0")
+  elseif(CUDA_GENERATION STREQUAL "Auto")
+    execute_process( COMMAND "${CUDA_NVCC_EXECUTABLE}" "${OpenCV_SOURCE_DIR}/cmake/OpenCVDetectCudaArch.cu" "--run"
+                     WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/"
+                     RESULT_VARIABLE _nvcc_res OUTPUT_VARIABLE _nvcc_out
+                     ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
+    if(NOT _nvcc_res EQUAL 0)
+      message(STATUS "Automatic detection of CUDA generation failed. Going to build for all known architectures.")
+    else()
+      set(__cuda_arch_bin "${_nvcc_out}")
+    endif()
+  endif()
+
+  if(NOT DEFINED __cuda_arch_bin)
+    set(__cuda_arch_bin "1.1 1.2 1.3 2.0 2.1(2.0) 3.0")
+    set(__cuda_arch_ptx "2.0 3.0")
+  endif()
+
+  set(CUDA_ARCH_BIN ${__cuda_arch_bin} CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported")
+  set(CUDA_ARCH_PTX ${__cuda_arch_ptx} CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for")
 
   string(REGEX REPLACE "\\." "" ARCH_BIN_NO_POINTS "${CUDA_ARCH_BIN}")
   string(REGEX REPLACE "\\." "" ARCH_PTX_NO_POINTS "${CUDA_ARCH_PTX}")
index 7b6ff5e..6e02780 100644 (file)
@@ -5,17 +5,17 @@ if(CMAKE_CL_64)
     set(MSVC64 1)
 endif()
 
-if(NOT APPLE)
-  if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
-    set(CMAKE_COMPILER_IS_GNUCXX 1)
-    set(CMAKE_COMPILER_IS_CLANGCXX 1)
-    set(ENABLE_PRECOMPILED_HEADERS OFF CACHE BOOL "" FORCE)
-  endif()
-  if(CMAKE_C_COMPILER_ID STREQUAL "Clang")
-    set(CMAKE_COMPILER_IS_GNUCC 1)
-    set(CMAKE_COMPILER_IS_CLANGCC 1)
-    set(ENABLE_PRECOMPILED_HEADERS OFF CACHE BOOL "" FORCE)
-  endif()
+if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+  set(CMAKE_COMPILER_IS_GNUCXX 1)
+  set(CMAKE_COMPILER_IS_CLANGCXX 1)
+endif()
+if(CMAKE_C_COMPILER_ID STREQUAL "Clang")
+  set(CMAKE_COMPILER_IS_GNUCC 1)
+  set(CMAKE_COMPILER_IS_CLANGCC 1)
+endif()
+
+if((CMAKE_COMPILER_IS_CLANGCXX OR CMAKE_COMPILER_IS_CLANGCC) AND NOT CMAKE_GENERATOR MATCHES "Xcode")
+  set(ENABLE_PRECOMPILED_HEADERS OFF CACHE BOOL "" FORCE)
 endif()
 
 # ----------------------------------------------------------------------------
diff --git a/cmake/OpenCVDetectCudaArch.cu b/cmake/OpenCVDetectCudaArch.cu
new file mode 100644 (file)
index 0000000..008f8ba
--- /dev/null
@@ -0,0 +1,14 @@
+#include <stdio.h>
+int main()
+{
+    int count = 0;
+    if (cudaSuccess != cudaGetDeviceCount(&count)){return -1;}
+    if (count == 0) {return -1;}
+    for (int device = 0; device < count; ++device)
+    {
+        cudaDeviceProp prop;
+        if (cudaSuccess != cudaGetDeviceProperties(&prop, device)){ continue;}
+        printf("%d.%d ", prop.major, prop.minor);
+    }
+    return 0;
+}
\ No newline at end of file
index 6b98bc0..73143b7 100644 (file)
@@ -19,18 +19,25 @@ unset(HAVE_SPHINX CACHE)
 if(PYTHON_EXECUTABLE)
   if(PYTHON_VERSION_STRING)
     set(PYTHON_VERSION_MAJOR_MINOR "${PYTHON_VERSION_MAJOR}.${PYTHON_VERSION_MINOR}")
-    string(REGEX MATCH "[0-9]+.[0-9]+.[0-9]+" PYTHON_VERSION_FULL "${PYTHON_VERSION_STRING}")
+    set(PYTHON_VERSION_FULL "${PYTHON_VERSION_STRING}")
   else()
     execute_process(COMMAND ${PYTHON_EXECUTABLE} --version
       ERROR_VARIABLE PYTHON_VERSION_FULL
       ERROR_STRIP_TRAILING_WHITESPACE)
 
     string(REGEX MATCH "[0-9]+.[0-9]+" PYTHON_VERSION_MAJOR_MINOR "${PYTHON_VERSION_FULL}")
-    string(REGEX MATCH "[0-9]+.[0-9]+.[0-9]+" PYTHON_VERSION_FULL "${PYTHON_VERSION_FULL}")
+  endif()
+
+  if("${PYTHON_VERSION_FULL}" MATCHES "[0-9]+.[0-9]+.[0-9]+")
+    set(PYTHON_VERSION_FULL "${CMAKE_MATCH_0}")
+  elseif("${PYTHON_VERSION_FULL}" MATCHES "[0-9]+.[0-9]+")
+    set(PYTHON_VERSION_FULL "${CMAKE_MATCH_0}")
+  else()
+    unset(PYTHON_VERSION_FULL)
   endif()
 
   if(NOT ANDROID AND NOT IOS)
-    if(CMAKE_VERSION VERSION_GREATER 2.8.8)
+    if(CMAKE_VERSION VERSION_GREATER 2.8.8 AND PYTHON_VERSION_FULL)
       find_host_package(PythonLibs ${PYTHON_VERSION_FULL} EXACT)
     else()
       find_host_package(PythonLibs ${PYTHON_VERSION_FULL})
index f8d9e27..705ccc8 100644 (file)
@@ -53,6 +53,10 @@ if(OpenCV_LIB_COMPONENTS)
   list(REMOVE_ITEM OPENCV_MODULES_CONFIGCMAKE ${OpenCV_LIB_COMPONENTS})
 endif()
 
+if(BUILD_FAT_JAVA_LIB AND HAVE_opencv_java)
+  list(APPEND OPENCV_MODULES_CONFIGCMAKE opencv_java)
+endif()
+
 macro(ocv_generate_dependencies_map_configcmake suffix configuration)
   set(OPENCV_DEPENDENCIES_MAP_${suffix} "")
   set(OPENCV_PROCESSED_LIBS "")
@@ -126,8 +130,13 @@ configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/OpenCVConfig-version.cmake.
 set(OpenCV_INCLUDE_DIRS_CONFIGCMAKE "\"\${OpenCV_INSTALL_PATH}/${OPENCV_INCLUDE_INSTALL_PATH}/opencv" "\${OpenCV_INSTALL_PATH}/${OPENCV_INCLUDE_INSTALL_PATH}\"")
 
 set(OpenCV2_INCLUDE_DIRS_CONFIGCMAKE "\"\"")
-set(OpenCV_LIB_DIRS_CONFIGCMAKE "\"\${OpenCV_INSTALL_PATH}/${OPENCV_LIB_INSTALL_PATH}\"")
-set(OpenCV_3RDPARTY_LIB_DIRS_CONFIGCMAKE "\"\${OpenCV_INSTALL_PATH}/${OPENCV_3P_LIB_INSTALL_PATH}\"")
+if(ANDROID)
+  set(OpenCV_LIB_DIRS_CONFIGCMAKE          "\"\${OpenCV_INSTALL_PATH}/sdk/native/libs/\${ANDROID_NDK_ABI_NAME}\"")
+  set(OpenCV_3RDPARTY_LIB_DIRS_CONFIGCMAKE "\"\${OpenCV_INSTALL_PATH}/sdk/native/3rdparty/libs/\${ANDROID_NDK_ABI_NAME}\"")
+else()
+  set(OpenCV_LIB_DIRS_CONFIGCMAKE          "\"\${OpenCV_INSTALL_PATH}/${OPENCV_LIB_INSTALL_PATH}\"")
+  set(OpenCV_3RDPARTY_LIB_DIRS_CONFIGCMAKE "\"\${OpenCV_INSTALL_PATH}/${OPENCV_3P_LIB_INSTALL_PATH}\"")
+endif()
 if(INSTALL_TO_MANGLED_PATHS)
   string(REPLACE "OpenCV" "OpenCV-${OPENCV_VERSION}" OpenCV_3RDPARTY_LIB_DIRS_CONFIGCMAKE "${OPENCV_3P_LIB_INSTALL_PATH}")
   set(OpenCV_3RDPARTY_LIB_DIRS_CONFIGCMAKE "\"\${OpenCV_INSTALL_PATH}/${OpenCV_3RDPARTY_LIB_DIRS_CONFIGCMAKE}\"")
index 8c1b855..078e020 100644 (file)
@@ -42,7 +42,7 @@ else
         OPENCV_EXTRA_COMPONENTS:=@OPENCV_EXTRA_COMPONENTS_CONFIGMAKE@
     endif
     ifeq ($(TARGET_ARCH_ABI),mips)
-        OPENCV_3RDPARTY_COMPONENTS:=@OPENCV_3RDPARTY_COMPONENTS_CONFIGMAKE_NO_TBB@
+        OPENCV_3RDPARTY_COMPONENTS:=@OPENCV_3RDPARTY_COMPONENTS_CONFIGMAKE@
         OPENCV_EXTRA_COMPONENTS:=@OPENCV_EXTRA_COMPONENTS_CONFIGMAKE@
     endif
 endif
@@ -92,7 +92,7 @@ define add_opencv_camera_module
     include $(PREBUILT_SHARED_LIBRARY)
 endef
 
-ifeq ($(OPENCV_MK_ALREADY_INCLUDED),)
+ifeq ($(OPENCV_MK_$(OPENCV_TARGET_ARCH_ABI)_ALREADY_INCLUDED),)
     ifeq ($(OPENCV_INSTALL_MODULES),on)
         $(foreach module,$(OPENCV_LIBS),$(eval $(call add_opencv_module,$(module))))
     endif
@@ -105,7 +105,7 @@ ifeq ($(OPENCV_MK_ALREADY_INCLUDED),)
     endif
 
     #turn off module installation to prevent their redefinition
-    OPENCV_MK_ALREADY_INCLUDED:=on
+    OPENCV_MK_$(OPENCV_TARGET_ARCH_ABI)_ALREADY_INCLUDED:=on
 endif
 
 ifeq ($(OPENCV_LOCAL_CFLAGS),)
index 235c72b..7441b59 100644 (file)
@@ -151,6 +151,7 @@ endif()
 # ==============================================================
 if(NOT OpenCV_FIND_COMPONENTS)
   set(OpenCV_FIND_COMPONENTS ${OpenCV_LIB_COMPONENTS})
+  list(REMOVE_ITEM OpenCV_FIND_COMPONENTS opencv_java)
   if(GTest_FOUND OR GTEST_FOUND)
     list(REMOVE_ITEM OpenCV_FIND_COMPONENTS opencv_ts)
   endif()
@@ -201,7 +202,7 @@ foreach(__opttype OPT DBG)
         #indicate that this module is also found
         string(TOUPPER "${__cvdep}" __cvdep)
         set(${__cvdep}_FOUND 1)
-      else()
+      elseif(EXISTS "${OpenCV_3RDPARTY_LIB_DIR_${__opttype}}/${OpenCV_${__cvdep}_LIBNAME_${__opttype}}")
         list(APPEND OpenCV_LIBS_${__opttype} "${OpenCV_3RDPARTY_LIB_DIR_${__opttype}}/${OpenCV_${__cvdep}_LIBNAME_${__opttype}}")
       endif()
     endforeach()
@@ -221,7 +222,7 @@ foreach(__opttype OPT DBG)
   endif()
 
   # CUDA
-  if(OpenCV_CUDA_VERSION AND WIN32 AND NOT OpenCV_SHARED)
+  if(OpenCV_CUDA_VERSION AND (CMAKE_CROSSCOMPILING OR (WIN32 AND NOT OpenCV_SHARED)))
     if(NOT CUDA_FOUND)
       find_package(CUDA ${OpenCV_CUDA_VERSION} EXACT REQUIRED)
     else()
@@ -304,3 +305,11 @@ else()
   SET(OpenCV_LIB_DIR ${OpenCV_LIB_DIR_OPT} ${OpenCV_3RDPARTY_LIB_DIR_OPT})
 endif()
 set(OpenCV_LIBRARIES ${OpenCV_LIBS})
+
+if(CMAKE_CROSSCOMPILING AND OpenCV_SHARED AND (CMAKE_SYSTEM_NAME MATCHES "Linux"))
+  foreach(dir ${OpenCV_LIB_DIR})
+    set(CMAKE_EXE_LINKER_FLAGS    "${CMAKE_EXE_LINKER_FLAGS}    -Wl,-rpath-link,${dir}")
+    set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-rpath-link,${dir}")
+    set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -Wl,-rpath-link,${dir}")
+  endforeach()
+endif()
diff --git a/data/haarcascades/haarcascade_smile.xml b/data/haarcascades/haarcascade_smile.xml
new file mode 100644 (file)
index 0000000..b7a6a3a
--- /dev/null
@@ -0,0 +1,8353 @@
+<?xml version="1.0"?>\r
+<!----------------------------------------------------------------------------\r
+  Smile detector\r
+  Contributed by Oscar Deniz Suarez\r
+  More information can be found at http://visilab.etsii.uclm.es/personas/oscar/oscar.html\r
+  \r
+//////////////////////////////////////////////////////////////////////////\r
+| Contributors License Agreement\r
+| IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\r
+|   By downloading, copying, installing or using the software you agree \r
+|   to this license.\r
+|   If you do not agree to this license, do not download, install,\r
+|   copy or use the software.\r
+|\r
+| Copyright (c) 2011, Modesto Castrillon-Santana (IUSIANI, Universidad de\r
+| Las Palmas de Gran Canaria, Spain).\r
+|  All rights reserved.\r
+|\r
+| Redistribution and use in source and binary forms, with or without\r
+| modification, are permitted provided that the following conditions are\r
+| met:\r
+|\r
+|    * Redistributions of source code must retain the above copyright\r
+|       notice, this list of conditions and the following disclaimer.\r
+|    * Redistributions in binary form must reproduce the above\r
+|      copyright notice, this list of conditions and the following\r
+|      disclaimer in the documentation and/or other materials provided\r
+|      with the distribution.  \r
+|    * The name of Contributor may not used to endorse or promote products \r
+|      derived from this software without specific prior written permission.\r
+|\r
+| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\r
+| "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\r
+| LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\r
+| A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE\r
+| CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,\r
+| EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,\r
+| PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR\r
+| PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF\r
+| LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\r
+| NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\r
+| SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  Back to\r
+| Top\r
+//////////////////////////////////////////////////////////////////////////\r
+\r
+------------------------------------------------------------------------>\r
+<opencv_storage>\r
+<!-- Automatically converted from data/classifier, window size = 36x18 -->\r
+<SmileDetector type_id="opencv-haar-classifier">\r
+  <size>\r
+    36 18</size>\r
+  <stages>\r
+    <_>\r
+      <!-- stage 0 -->\r
+      <trees>\r
+        <_>\r
+          <!-- tree 0 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 0 2 4 -1.</_>\r
+                <_>\r
+                  0 2 2 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-4.8783610691316426e-004</threshold>\r
+            <left_val>0.5921934843063355</left_val>\r
+            <right_val>-0.4416360855102539</right_val></_></_>\r
+        <_>\r
+          <!-- tree 1 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  34 10 2 8 -1.</_>\r
+                <_>\r
+                  34 14 2 4 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-4.2209611274302006e-004</threshold>\r
+            <left_val>0.3031865060329437</left_val>\r
+            <right_val>-0.3291291892528534</right_val></_></_>\r
+        <_>\r
+          <!-- tree 2 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 10 2 8 -1.</_>\r
+                <_>\r
+                  0 14 2 4 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-4.9940118333324790e-004</threshold>\r
+            <left_val>0.4856331050395966</left_val>\r
+            <right_val>-0.4292306005954742</right_val></_></_>\r
+        <_>\r
+          <!-- tree 3 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  15 0 18 10 -1.</_>\r
+                <_>\r
+                  24 0 9 5 2.</_>\r
+                <_>\r
+                  15 5 9 5 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0372891984879971</threshold>\r
+            <left_val>-0.2866730093955994</left_val>\r
+            <right_val>0.5997999906539917</right_val></_></_>\r
+        <_>\r
+          <!-- tree 4 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  7 0 4 4 -1.</_>\r
+                <_>\r
+                  7 0 2 4 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>1.4334049774333835e-003</threshold>\r
+            <left_val>-0.3489313125610352</left_val>\r
+            <right_val>0.4048275053501129</right_val></_></_>\r
+        <_>\r
+          <!-- tree 5 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  15 5 6 4 -1.</_>\r
+                <_>\r
+                  15 6 6 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-7.7213020995259285e-003</threshold>\r
+            <left_val>0.7571418881416321</left_val>\r
+            <right_val>-0.1222594976425171</right_val></_></_>\r
+        <_>\r
+          <!-- tree 6 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  13 6 8 3 -1.</_>\r
+                <_>\r
+                  13 7 8 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>8.1067271530628204e-003</threshold>\r
+            <left_val>-0.1665772050619125</left_val>\r
+            <right_val>0.7509614825248718</right_val></_></_>\r
+        <_>\r
+          <!-- tree 7 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  14 6 8 4 -1.</_>\r
+                <_>\r
+                  14 7 8 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-7.7238711528480053e-003</threshold>\r
+            <left_val>0.6266279220581055</left_val>\r
+            <right_val>-0.1912745982408524</right_val></_></_>\r
+        <_>\r
+          <!-- tree 8 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 10 2 8 -1.</_>\r
+                <_>\r
+                  0 14 2 4 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>4.4225031160749495e-004</threshold>\r
+            <left_val>-0.2394447028636932</left_val>\r
+            <right_val>0.4484061896800995</right_val></_></_>\r
+        <_>\r
+          <!-- tree 9 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  34 0 2 16 -1.</_>\r
+                <_>\r
+                  35 0 1 8 2.</_>\r
+                <_>\r
+                  34 8 1 8 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-1.6867710510268807e-003</threshold>\r
+            <left_val>-0.1843906939029694</left_val>\r
+            <right_val>0.0917824134230614</right_val></_></_>\r
+        <_>\r
+          <!-- tree 10 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  1 0 4 7 -1.</_>\r
+                <_>\r
+                  3 0 2 7 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0146256200969219</threshold>\r
+            <left_val>0.1616805940866470</left_val>\r
+            <right_val>-0.8150117993354797</right_val></_></_></trees>\r
+      <stage_threshold>-1.2678639888763428</stage_threshold>\r
+      <parent>-1</parent>\r
+      <next>-1</next></_>\r
+    <_>\r
+      <!-- stage 1 -->\r
+      <trees>\r
+        <_>\r
+          <!-- tree 0 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  4 7 28 3 -1.</_>\r
+                <_>\r
+                  11 7 14 3 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0381411388516426</threshold>\r
+            <left_val>-0.3327588140964508</left_val>\r
+            <right_val>0.7783334255218506</right_val></_></_>\r
+        <_>\r
+          <!-- tree 1 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  34 0 2 2 -1.</_>\r
+                <_>\r
+                  34 1 2 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-1.3136120105627924e-004</threshold>\r
+            <left_val>0.3635309040546417</left_val>\r
+            <right_val>-0.3204346895217896</right_val></_></_>\r
+        <_>\r
+          <!-- tree 2 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 12 4 6 -1.</_>\r
+                <_>\r
+                  0 15 4 3 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-3.8757019210606813e-003</threshold>\r
+            <left_val>0.7135239243507385</left_val>\r
+            <right_val>-0.3518598973751068</right_val></_></_>\r
+        <_>\r
+          <!-- tree 3 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  34 0 2 2 -1.</_>\r
+                <_>\r
+                  34 1 2 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>1.4266290236264467e-003</threshold>\r
+            <left_val>0.0681008473038673</left_val>\r
+            <right_val>-0.6172732710838318</right_val></_></_>\r
+        <_>\r
+          <!-- tree 4 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 0 2 2 -1.</_>\r
+                <_>\r
+                  0 1 2 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-2.4605958606116474e-004</threshold>\r
+            <left_val>0.5727149844169617</left_val>\r
+            <right_val>-0.3786099851131439</right_val></_></_>\r
+        <_>\r
+          <!-- tree 5 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  17 5 9 12 -1.</_>\r
+                <_>\r
+                  20 5 3 12 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0318226404488087</threshold>\r
+            <left_val>-0.6348456144332886</left_val>\r
+            <right_val>0.1164183989167213</right_val></_></_>\r
+        <_>\r
+          <!-- tree 6 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  10 5 9 12 -1.</_>\r
+                <_>\r
+                  13 5 3 12 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0171309504657984</threshold>\r
+            <left_val>-0.6279314756393433</left_val>\r
+            <right_val>0.3247947096824646</right_val></_></_>\r
+        <_>\r
+          <!-- tree 7 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  4 0 32 1 -1.</_>\r
+                <_>\r
+                  4 0 16 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-9.3903783708810806e-003</threshold>\r
+            <left_val>-0.2757895886898041</left_val>\r
+            <right_val>0.2233072966337204</right_val></_></_>\r
+        <_>\r
+          <!-- tree 8 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 0 3 3 -1.</_>\r
+                <_>\r
+                  1 0 1 3 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>2.2802520543336868e-003</threshold>\r
+            <left_val>0.1897764056921005</left_val>\r
+            <right_val>-0.6881762146949768</right_val></_></_>\r
+        <_>\r
+          <!-- tree 9 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  32 7 4 7 -1.</_>\r
+                <_>\r
+                  33 8 2 7 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>2.6840099599212408e-003</threshold>\r
+            <left_val>-0.2235050052404404</left_val>\r
+            <right_val>0.1372579932212830</right_val></_></_>\r
+        <_>\r
+          <!-- tree 10 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  7 0 8 6 -1.</_>\r
+                <_>\r
+                  7 0 4 3 2.</_>\r
+                <_>\r
+                  11 3 4 3 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0106046395376325</threshold>\r
+            <left_val>-0.2142623066902161</left_val>\r
+            <right_val>0.5620787143707275</right_val></_></_></trees>\r
+      <stage_threshold>-1.5844069719314575</stage_threshold>\r
+      <parent>0</parent>\r
+      <next>-1</next></_>\r
+    <_>\r
+      <!-- stage 2 -->\r
+      <trees>\r
+        <_>\r
+          <!-- tree 0 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 0 2 2 -1.</_>\r
+                <_>\r
+                  0 1 2 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-3.1677199876867235e-004</threshold>\r
+            <left_val>0.4659548103809357</left_val>\r
+            <right_val>-0.3742581903934479</right_val></_></_>\r
+        <_>\r
+          <!-- tree 1 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  27 1 8 9 -1.</_>\r
+                <_>\r
+                  29 3 4 9 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-0.0551206283271313</threshold>\r
+            <left_val>0.5417978763580322</left_val>\r
+            <right_val>-0.2265765070915222</right_val></_></_>\r
+        <_>\r
+          <!-- tree 2 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  1 10 1 8 -1.</_>\r
+                <_>\r
+                  1 14 1 4 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-6.4742640824988484e-004</threshold>\r
+            <left_val>0.3770307004451752</left_val>\r
+            <right_val>-0.3348644077777863</right_val></_></_>\r
+        <_>\r
+          <!-- tree 3 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  3 6 30 9 -1.</_>\r
+                <_>\r
+                  13 9 10 3 9.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.3950783908367157</threshold>\r
+            <left_val>-0.1814441978931427</left_val>\r
+            <right_val>0.8132591843605042</right_val></_></_>\r
+        <_>\r
+          <!-- tree 4 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  12 5 8 6 -1.</_>\r
+                <_>\r
+                  12 7 8 2 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0405094102025032</threshold>\r
+            <left_val>-0.0953694134950638</left_val>\r
+            <right_val>0.8059561848640442</right_val></_></_>\r
+        <_>\r
+          <!-- tree 5 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  16 4 6 3 -1.</_>\r
+                <_>\r
+                  16 5 6 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>4.8735421150922775e-003</threshold>\r
+            <left_val>-0.1402366012334824</left_val>\r
+            <right_val>0.6164302825927734</right_val></_></_>\r
+        <_>\r
+          <!-- tree 6 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 0 2 18 -1.</_>\r
+                <_>\r
+                  0 0 1 9 2.</_>\r
+                <_>\r
+                  1 9 1 9 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0105780400335789</threshold>\r
+            <left_val>0.1293267011642456</left_val>\r
+            <right_val>-0.7482334971427918</right_val></_></_>\r
+        <_>\r
+          <!-- tree 7 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  34 2 2 14 -1.</_>\r
+                <_>\r
+                  35 2 1 7 2.</_>\r
+                <_>\r
+                  34 9 1 7 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>9.2986393719911575e-003</threshold>\r
+            <left_val>0.0589406006038189</left_val>\r
+            <right_val>-0.4410730004310608</right_val></_></_>\r
+        <_>\r
+          <!-- tree 8 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 2 2 14 -1.</_>\r
+                <_>\r
+                  0 2 1 7 2.</_>\r
+                <_>\r
+                  1 9 1 7 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-5.0301607698202133e-003</threshold>\r
+            <left_val>-0.6630973219871521</left_val>\r
+            <right_val>0.1810476928949356</right_val></_></_>\r
+        <_>\r
+          <!-- tree 9 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  35 0 1 4 -1.</_>\r
+                <_>\r
+                  35 2 1 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-1.0947990085696802e-004</threshold>\r
+            <left_val>0.2211259007453919</left_val>\r
+            <right_val>-0.2730903923511505</right_val></_></_>\r
+        <_>\r
+          <!-- tree 10 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  5 0 24 18 -1.</_>\r
+                <_>\r
+                  5 0 12 9 2.</_>\r
+                <_>\r
+                  17 9 12 9 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.1168550997972488</threshold>\r
+            <left_val>-0.7720596790313721</left_val>\r
+            <right_val>0.1248165965080261</right_val></_></_>\r
+        <_>\r
+          <!-- tree 11 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  35 16 1 2 -1.</_>\r
+                <_>\r
+                  35 17 1 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-4.3603649828583002e-005</threshold>\r
+            <left_val>0.1367060989141464</left_val>\r
+            <right_val>-0.1612793952226639</right_val></_></_>\r
+        <_>\r
+          <!-- tree 12 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 16 1 2 -1.</_>\r
+                <_>\r
+                  0 17 1 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-1.5056360280141234e-004</threshold>\r
+            <left_val>0.4486046135425568</left_val>\r
+            <right_val>-0.2171128988265991</right_val></_></_>\r
+        <_>\r
+          <!-- tree 13 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  17 6 8 12 -1.</_>\r
+                <_>\r
+                  19 6 4 12 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0163946095854044</threshold>\r
+            <left_val>-0.6582735180854797</left_val>\r
+            <right_val>0.1674550026655197</right_val></_></_>\r
+        <_>\r
+          <!-- tree 14 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  11 5 8 13 -1.</_>\r
+                <_>\r
+                  13 5 4 13 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0144828604534268</threshold>\r
+            <left_val>-0.6834514737129211</left_val>\r
+            <right_val>0.1345615983009338</right_val></_></_>\r
+        <_>\r
+          <!-- tree 15 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  35 16 1 2 -1.</_>\r
+                <_>\r
+                  35 17 1 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>3.9269471017178148e-005</threshold>\r
+            <left_val>-0.1499813944101334</left_val>\r
+            <right_val>0.1601772010326386</right_val></_></_>\r
+        <_>\r
+          <!-- tree 16 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  10 9 12 3 -1.</_>\r
+                <_>\r
+                  10 10 12 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>7.4323131702840328e-003</threshold>\r
+            <left_val>-0.1684845983982086</left_val>\r
+            <right_val>0.5396398901939392</right_val></_></_></trees>\r
+      <stage_threshold>-1.3820559978485107</stage_threshold>\r
+      <parent>1</parent>\r
+      <next>-1</next></_>\r
+    <_>\r
+      <!-- stage 3 -->\r
+      <trees>\r
+        <_>\r
+          <!-- tree 0 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 10 1 8 -1.</_>\r
+                <_>\r
+                  0 14 1 4 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-4.3472499237395823e-004</threshold>\r
+            <left_val>0.4394924044609070</left_val>\r
+            <right_val>-0.4224875867366791</right_val></_></_>\r
+        <_>\r
+          <!-- tree 1 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  20 0 10 10 -1.</_>\r
+                <_>\r
+                  25 0 5 5 2.</_>\r
+                <_>\r
+                  20 5 5 5 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0329953208565712</threshold>\r
+            <left_val>-0.1979825049638748</left_val>\r
+            <right_val>0.5953487157821655</right_val></_></_>\r
+        <_>\r
+          <!-- tree 2 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 0 1 4 -1.</_>\r
+                <_>\r
+                  0 2 1 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-4.1011828579939902e-004</threshold>\r
+            <left_val>0.4440306127071381</left_val>\r
+            <right_val>-0.3074846863746643</right_val></_></_>\r
+        <_>\r
+          <!-- tree 3 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  19 0 13 18 -1.</_>\r
+                <_>\r
+                  19 9 13 9 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0819697380065918</threshold>\r
+            <left_val>-0.5333436727523804</left_val>\r
+            <right_val>0.1671810001134872</right_val></_></_>\r
+        <_>\r
+          <!-- tree 4 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  4 0 14 6 -1.</_>\r
+                <_>\r
+                  4 0 7 3 2.</_>\r
+                <_>\r
+                  11 3 7 3 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0177787002176046</threshold>\r
+            <left_val>-0.2045017927885056</left_val>\r
+            <right_val>0.5144413113594055</right_val></_></_>\r
+        <_>\r
+          <!-- tree 5 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  16 5 6 6 -1.</_>\r
+                <_>\r
+                  16 7 6 2 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0228346996009350</threshold>\r
+            <left_val>-0.1484607011079788</left_val>\r
+            <right_val>0.5624278783798218</right_val></_></_>\r
+        <_>\r
+          <!-- tree 6 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  13 7 7 8 -1.</_>\r
+                <_>\r
+                  13 9 7 4 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0386043414473534</threshold>\r
+            <left_val>-0.1273147016763687</left_val>\r
+            <right_val>0.8149448037147522</right_val></_></_>\r
+        <_>\r
+          <!-- tree 7 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  33 0 3 1 -1.</_>\r
+                <_>\r
+                  34 0 1 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-7.3286908445879817e-004</threshold>\r
+            <left_val>-0.3719344139099121</left_val>\r
+            <right_val>0.0676164999604225</right_val></_></_>\r
+        <_>\r
+          <!-- tree 8 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  7 1 10 4 -1.</_>\r
+                <_>\r
+                  6 2 10 2 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-0.0232290402054787</threshold>\r
+            <left_val>0.7123206257820129</left_val>\r
+            <right_val>-0.1158939003944397</right_val></_></_>\r
+        <_>\r
+          <!-- tree 9 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  15 2 6 16 -1.</_>\r
+                <_>\r
+                  18 2 3 8 2.</_>\r
+                <_>\r
+                  15 10 3 8 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0195753592997789</threshold>\r
+            <left_val>-0.6899073123931885</left_val>\r
+            <right_val>0.1399950981140137</right_val></_></_>\r
+        <_>\r
+          <!-- tree 10 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 10 1 8 -1.</_>\r
+                <_>\r
+                  0 14 1 4 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>4.1991271427832544e-004</threshold>\r
+            <left_val>-0.1835464984178543</left_val>\r
+            <right_val>0.4943555891513825</right_val></_></_>\r
+        <_>\r
+          <!-- tree 11 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  27 4 6 6 -1.</_>\r
+                <_>\r
+                  29 6 2 6 3.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-0.0570897497236729</threshold>\r
+            <left_val>0.6260784864425659</left_val>\r
+            <right_val>-0.0785768479108810</right_val></_></_>\r
+        <_>\r
+          <!-- tree 12 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  14 5 8 8 -1.</_>\r
+                <_>\r
+                  16 5 4 8 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0256996992975473</threshold>\r
+            <left_val>0.1155714020133019</left_val>\r
+            <right_val>-0.8193519115447998</right_val></_></_>\r
+        <_>\r
+          <!-- tree 13 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  27 5 6 6 -1.</_>\r
+                <_>\r
+                  29 7 2 6 3.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>0.0325796194374561</threshold>\r
+            <left_val>-0.1176773980259895</left_val>\r
+            <right_val>0.4277622103691101</right_val></_></_>\r
+        <_>\r
+          <!-- tree 14 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  9 5 6 6 -1.</_>\r
+                <_>\r
+                  7 7 6 2 3.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-0.0205922499299049</threshold>\r
+            <left_val>0.4868524074554443</left_val>\r
+            <right_val>-0.2131853997707367</right_val></_></_>\r
+        <_>\r
+          <!-- tree 15 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  12 5 12 9 -1.</_>\r
+                <_>\r
+                  15 5 6 9 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0174852795898914</threshold>\r
+            <left_val>-0.5228734016418457</left_val>\r
+            <right_val>0.1339704990386963</right_val></_></_>\r
+        <_>\r
+          <!-- tree 16 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 0 3 1 -1.</_>\r
+                <_>\r
+                  1 0 1 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>8.9153228327631950e-004</threshold>\r
+            <left_val>0.0963044911623001</left_val>\r
+            <right_val>-0.6886307001113892</right_val></_></_>\r
+        <_>\r
+          <!-- tree 17 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  15 4 18 6 -1.</_>\r
+                <_>\r
+                  15 6 18 2 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0575339011847973</threshold>\r
+            <left_val>-0.0870805233716965</left_val>\r
+            <right_val>0.4048064947128296</right_val></_></_></trees>\r
+      <stage_threshold>-1.3879380226135254</stage_threshold>\r
+      <parent>2</parent>\r
+      <next>-1</next></_>\r
+    <_>\r
+      <!-- stage 4 -->\r
+      <trees>\r
+        <_>\r
+          <!-- tree 0 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 10 1 6 -1.</_>\r
+                <_>\r
+                  0 13 1 3 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-4.6606198884546757e-004</threshold>\r
+            <left_val>0.4277374148368835</left_val>\r
+            <right_val>-0.3542076945304871</right_val></_></_>\r
+        <_>\r
+          <!-- tree 1 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  3 6 30 6 -1.</_>\r
+                <_>\r
+                  13 8 10 2 9.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.3055455982685089</threshold>\r
+            <left_val>-0.1639281064271927</left_val>\r
+            <right_val>0.8606523275375366</right_val></_></_>\r
+        <_>\r
+          <!-- tree 2 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  11 7 12 4 -1.</_>\r
+                <_>\r
+                  11 8 12 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0114494003355503</threshold>\r
+            <left_val>0.5972732901573181</left_val>\r
+            <right_val>-0.2323434054851532</right_val></_></_>\r
+        <_>\r
+          <!-- tree 3 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  14 8 9 3 -1.</_>\r
+                <_>\r
+                  14 9 9 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>6.3891541212797165e-003</threshold>\r
+            <left_val>-0.1291541010141373</left_val>\r
+            <right_val>0.6105204224586487</right_val></_></_>\r
+        <_>\r
+          <!-- tree 4 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  14 8 7 4 -1.</_>\r
+                <_>\r
+                  14 9 7 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-8.4334248676896095e-003</threshold>\r
+            <left_val>0.4792853891849518</left_val>\r
+            <right_val>-0.1900272965431213</right_val></_></_>\r
+        <_>\r
+          <!-- tree 5 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  12 7 18 6 -1.</_>\r
+                <_>\r
+                  12 9 18 2 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0538089312613010</threshold>\r
+            <left_val>-0.1149377003312111</left_val>\r
+            <right_val>0.5339453816413879</right_val></_></_>\r
+        <_>\r
+          <!-- tree 6 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  7 8 3 10 -1.</_>\r
+                <_>\r
+                  7 13 3 5 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-4.7580219688825309e-004</threshold>\r
+            <left_val>-0.3459854125976563</left_val>\r
+            <right_val>0.2548804879188538</right_val></_></_>\r
+        <_>\r
+          <!-- tree 7 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  35 10 1 6 -1.</_>\r
+                <_>\r
+                  35 13 1 3 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-1.3450840197037905e-004</threshold>\r
+            <left_val>0.2241459041833878</left_val>\r
+            <right_val>-0.1955007016658783</right_val></_></_>\r
+        <_>\r
+          <!-- tree 8 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 10 1 6 -1.</_>\r
+                <_>\r
+                  0 13 1 3 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>5.0016911700367928e-004</threshold>\r
+            <left_val>-0.1972054988145828</left_val>\r
+            <right_val>0.4967764019966126</right_val></_></_>\r
+        <_>\r
+          <!-- tree 9 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  18 13 9 5 -1.</_>\r
+                <_>\r
+                  21 13 3 5 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0150632699951530</threshold>\r
+            <left_val>0.1063077002763748</left_val>\r
+            <right_val>-0.4113821089267731</right_val></_></_>\r
+        <_>\r
+          <!-- tree 10 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  15 9 6 4 -1.</_>\r
+                <_>\r
+                  15 10 6 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>7.7588870190083981e-003</threshold>\r
+            <left_val>-0.1537311971187592</left_val>\r
+            <right_val>0.4893161952495575</right_val></_></_>\r
+        <_>\r
+          <!-- tree 11 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  16 4 18 8 -1.</_>\r
+                <_>\r
+                  16 6 18 4 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0454101189970970</threshold>\r
+            <left_val>-0.0735593065619469</left_val>\r
+            <right_val>0.2773792147636414</right_val></_></_>\r
+        <_>\r
+          <!-- tree 12 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  9 14 9 3 -1.</_>\r
+                <_>\r
+                  12 14 3 3 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0145996697247028</threshold>\r
+            <left_val>-0.7096682786941528</left_val>\r
+            <right_val>0.0975155606865883</right_val></_></_>\r
+        <_>\r
+          <!-- tree 13 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  32 0 4 6 -1.</_>\r
+                <_>\r
+                  32 0 2 6 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0172360707074404</threshold>\r
+            <left_val>0.0168695393949747</left_val>\r
+            <right_val>-0.5738832950592041</right_val></_></_>\r
+        <_>\r
+          <!-- tree 14 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 0 4 6 -1.</_>\r
+                <_>\r
+                  2 0 2 6 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0142307104542851</threshold>\r
+            <left_val>0.0947145000100136</left_val>\r
+            <right_val>-0.7839525938034058</right_val></_></_>\r
+        <_>\r
+          <!-- tree 15 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  27 0 6 7 -1.</_>\r
+                <_>\r
+                  29 2 2 7 3.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-0.0437068603932858</threshold>\r
+            <left_val>0.6097965240478516</left_val>\r
+            <right_val>-0.1560188978910446</right_val></_></_>\r
+        <_>\r
+          <!-- tree 16 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 0 1 4 -1.</_>\r
+                <_>\r
+                  0 2 1 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-6.2343222089111805e-004</threshold>\r
+            <left_val>0.3485119044780731</left_val>\r
+            <right_val>-0.2170491069555283</right_val></_></_>\r
+        <_>\r
+          <!-- tree 17 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  27 8 6 4 -1.</_>\r
+                <_>\r
+                  29 10 2 4 3.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>0.0192450508475304</threshold>\r
+            <left_val>-0.1171097978949547</left_val>\r
+            <right_val>0.3070116043090820</right_val></_></_>\r
+        <_>\r
+          <!-- tree 18 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  4 9 27 6 -1.</_>\r
+                <_>\r
+                  13 11 9 2 9.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.2703577876091003</threshold>\r
+            <left_val>-0.0900964364409447</left_val>\r
+            <right_val>0.7665696144104004</right_val></_></_>\r
+        <_>\r
+          <!-- tree 19 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  31 14 2 3 -1.</_>\r
+                <_>\r
+                  31 14 1 3 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-3.5394480801187456e-004</threshold>\r
+            <left_val>-0.2002478986978531</left_val>\r
+            <right_val>0.1249336004257202</right_val></_></_>\r
+        <_>\r
+          <!-- tree 20 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  10 0 5 6 -1.</_>\r
+                <_>\r
+                  8 2 5 2 3.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-0.0360139608383179</threshold>\r
+            <left_val>0.6702855825424194</left_val>\r
+            <right_val>-0.1057187989354134</right_val></_></_>\r
+        <_>\r
+          <!-- tree 21 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  14 7 11 3 -1.</_>\r
+                <_>\r
+                  14 8 11 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>9.2952791601419449e-003</threshold>\r
+            <left_val>-0.1057471036911011</left_val>\r
+            <right_val>0.4509387910366058</right_val></_></_>\r
+        <_>\r
+          <!-- tree 22 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 12 2 6 -1.</_>\r
+                <_>\r
+                  0 15 2 3 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-3.3304709359072149e-004</threshold>\r
+            <left_val>0.2793382108211517</left_val>\r
+            <right_val>-0.2457676976919174</right_val></_></_>\r
+        <_>\r
+          <!-- tree 23 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  34 13 2 4 -1.</_>\r
+                <_>\r
+                  34 15 2 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-2.9147620807634667e-005</threshold>\r
+            <left_val>0.0858138129115105</left_val>\r
+            <right_val>-0.0954695865511894</right_val></_></_>\r
+        <_>\r
+          <!-- tree 24 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 13 2 4 -1.</_>\r
+                <_>\r
+                  0 15 2 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>4.4382669148035347e-004</threshold>\r
+            <left_val>-0.2022008001804352</left_val>\r
+            <right_val>0.5454357862472534</right_val></_></_></trees>\r
+      <stage_threshold>-1.3538850545883179</stage_threshold>\r
+      <parent>3</parent>\r
+      <next>-1</next></_>\r
+    <_>\r
+      <!-- stage 5 -->\r
+      <trees>\r
+        <_>\r
+          <!-- tree 0 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  3 6 4 12 -1.</_>\r
+                <_>\r
+                  3 10 4 4 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>7.9610757529735565e-003</threshold>\r
+            <left_val>-0.3672207891941071</left_val>\r
+            <right_val>0.4315434992313385</right_val></_></_>\r
+        <_>\r
+          <!-- tree 1 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  14 0 22 12 -1.</_>\r
+                <_>\r
+                  25 0 11 6 2.</_>\r
+                <_>\r
+                  14 6 11 6 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0633948296308517</threshold>\r
+            <left_val>-0.2073971033096314</left_val>\r
+            <right_val>0.5742601752281189</right_val></_></_>\r
+        <_>\r
+          <!-- tree 2 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  8 1 7 6 -1.</_>\r
+                <_>\r
+                  6 3 7 2 3.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-0.0531933493912220</threshold>\r
+            <left_val>0.7255092263221741</left_val>\r
+            <right_val>-0.1434202045202255</right_val></_></_>\r
+        <_>\r
+          <!-- tree 3 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  12 5 14 3 -1.</_>\r
+                <_>\r
+                  12 6 14 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0154607696458697</threshold>\r
+            <left_val>-0.0960538163781166</left_val>\r
+            <right_val>0.7578523755073547</right_val></_></_>\r
+        <_>\r
+          <!-- tree 4 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  7 6 7 4 -1.</_>\r
+                <_>\r
+                  6 7 7 2 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-0.0176431406289339</threshold>\r
+            <left_val>0.6681562066078186</left_val>\r
+            <right_val>-0.1417672932147980</right_val></_></_>\r
+        <_>\r
+          <!-- tree 5 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  18 3 6 4 -1.</_>\r
+                <_>\r
+                  18 4 6 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>9.5065636560320854e-003</threshold>\r
+            <left_val>-0.0962597429752350</left_val>\r
+            <right_val>0.4699633121490479</right_val></_></_>\r
+        <_>\r
+          <!-- tree 6 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  4 5 5 6 -1.</_>\r
+                <_>\r
+                  4 7 5 2 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>4.0446049533784389e-003</threshold>\r
+            <left_val>-0.1973251998424530</left_val>\r
+            <right_val>0.4283801019191742</right_val></_></_>\r
+        <_>\r
+          <!-- tree 7 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  33 0 3 4 -1.</_>\r
+                <_>\r
+                  34 0 1 4 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>3.2312041148543358e-003</threshold>\r
+            <left_val>0.1186169013381004</left_val>\r
+            <right_val>-0.6103963255882263</right_val></_></_>\r
+        <_>\r
+          <!-- tree 8 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  9 0 6 18 -1.</_>\r
+                <_>\r
+                  9 9 6 9 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0401590503752232</threshold>\r
+            <left_val>-0.4166434109210968</left_val>\r
+            <right_val>0.2167232930660248</right_val></_></_>\r
+        <_>\r
+          <!-- tree 9 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  6 6 24 6 -1.</_>\r
+                <_>\r
+                  14 8 8 2 9.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.2852425873279572</threshold>\r
+            <left_val>-0.1043575033545494</left_val>\r
+            <right_val>0.8573396801948547</right_val></_></_>\r
+        <_>\r
+          <!-- tree 10 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  16 8 4 4 -1.</_>\r
+                <_>\r
+                  16 9 4 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-4.9264221452176571e-003</threshold>\r
+            <left_val>0.4706046879291534</left_val>\r
+            <right_val>-0.1399745941162109</right_val></_></_>\r
+        <_>\r
+          <!-- tree 11 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  13 8 13 4 -1.</_>\r
+                <_>\r
+                  13 9 13 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0137817002832890</threshold>\r
+            <left_val>-0.1271356940269470</left_val>\r
+            <right_val>0.4461891949176788</right_val></_></_>\r
+        <_>\r
+          <!-- tree 12 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 16 2 2 -1.</_>\r
+                <_>\r
+                  0 17 2 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-4.9873598618432879e-004</threshold>\r
+            <left_val>0.4702663123607636</left_val>\r
+            <right_val>-0.1548373997211456</right_val></_></_>\r
+        <_>\r
+          <!-- tree 13 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  35 14 1 4 -1.</_>\r
+                <_>\r
+                  35 15 1 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-1.5621389320585877e-004</threshold>\r
+            <left_val>0.1885481029748917</left_val>\r
+            <right_val>-0.0778397768735886</right_val></_></_>\r
+        <_>\r
+          <!-- tree 14 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 14 1 4 -1.</_>\r
+                <_>\r
+                  0 15 1 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-3.7597760092467070e-004</threshold>\r
+            <left_val>0.5769770145416260</left_val>\r
+            <right_val>-0.1335622072219849</right_val></_></_>\r
+        <_>\r
+          <!-- tree 15 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  15 6 9 7 -1.</_>\r
+                <_>\r
+                  18 6 3 7 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0106659103184938</threshold>\r
+            <left_val>-0.4106529951095581</left_val>\r
+            <right_val>0.1556212007999420</right_val></_></_>\r
+        <_>\r
+          <!-- tree 16 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 0 3 4 -1.</_>\r
+                <_>\r
+                  1 0 1 4 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-3.4135230816900730e-003</threshold>\r
+            <left_val>-0.7636343240737915</left_val>\r
+            <right_val>0.1020964980125427</right_val></_></_>\r
+        <_>\r
+          <!-- tree 17 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  34 16 2 2 -1.</_>\r
+                <_>\r
+                  35 16 1 1 2.</_>\r
+                <_>\r
+                  34 17 1 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>5.6471868447260931e-005</threshold>\r
+            <left_val>-0.1644393056631088</left_val>\r
+            <right_val>0.2290841937065125</right_val></_></_>\r
+        <_>\r
+          <!-- tree 18 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 16 2 2 -1.</_>\r
+                <_>\r
+                  0 16 1 1 2.</_>\r
+                <_>\r
+                  1 17 1 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>2.1611599368043244e-004</threshold>\r
+            <left_val>-0.1629032939672470</left_val>\r
+            <right_val>0.4575636088848114</right_val></_></_>\r
+        <_>\r
+          <!-- tree 19 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  22 0 10 4 -1.</_>\r
+                <_>\r
+                  22 0 5 4 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-0.0108227198943496</threshold>\r
+            <left_val>-0.2446253001689911</left_val>\r
+            <right_val>0.1388894021511078</right_val></_></_>\r
+        <_>\r
+          <!-- tree 20 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  15 4 6 14 -1.</_>\r
+                <_>\r
+                  15 4 3 7 2.</_>\r
+                <_>\r
+                  18 11 3 7 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0150849102064967</threshold>\r
+            <left_val>-0.5781347751617432</left_val>\r
+            <right_val>0.1156411990523338</right_val></_></_>\r
+        <_>\r
+          <!-- tree 21 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  15 3 8 10 -1.</_>\r
+                <_>\r
+                  17 3 4 10 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0257159601897001</threshold>\r
+            <left_val>0.0396311990916729</left_val>\r
+            <right_val>-0.6527001261711121</right_val></_></_>\r
+        <_>\r
+          <!-- tree 22 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 0 2 5 -1.</_>\r
+                <_>\r
+                  1 0 1 5 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>2.6093570049852133e-003</threshold>\r
+            <left_val>0.1142188981175423</left_val>\r
+            <right_val>-0.5680108070373535</right_val></_></_></trees>\r
+      <stage_threshold>-1.3707510232925415</stage_threshold>\r
+      <parent>4</parent>\r
+      <next>-1</next></_>\r
+    <_>\r
+      <!-- stage 6 -->\r
+      <trees>\r
+        <_>\r
+          <!-- tree 0 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  7 1 8 6 -1.</_>\r
+                <_>\r
+                  5 3 8 2 3.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-0.0518619008362293</threshold>\r
+            <left_val>0.7043117284774780</left_val>\r
+            <right_val>-0.2214370071887970</right_val></_></_>\r
+        <_>\r
+          <!-- tree 1 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  19 0 11 18 -1.</_>\r
+                <_>\r
+                  19 9 11 9 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0503416284918785</threshold>\r
+            <left_val>-0.4639782905578613</left_val>\r
+            <right_val>0.2804746031761169</right_val></_></_>\r
+        <_>\r
+          <!-- tree 2 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  6 8 24 6 -1.</_>\r
+                <_>\r
+                  14 10 8 2 9.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.2570973038673401</threshold>\r
+            <left_val>-0.1312427967786789</left_val>\r
+            <right_val>0.8239594101905823</right_val></_></_>\r
+        <_>\r
+          <!-- tree 3 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  14 6 10 3 -1.</_>\r
+                <_>\r
+                  14 7 10 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0110318996012211</threshold>\r
+            <left_val>-0.1425814032554627</left_val>\r
+            <right_val>0.6382390260696411</right_val></_></_>\r
+        <_>\r
+          <!-- tree 4 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  12 7 11 4 -1.</_>\r
+                <_>\r
+                  12 8 11 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0185650903731585</threshold>\r
+            <left_val>-0.1512387990951538</left_val>\r
+            <right_val>0.5988119244575501</right_val></_></_>\r
+        <_>\r
+          <!-- tree 5 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  18 0 16 6 -1.</_>\r
+                <_>\r
+                  26 0 8 3 2.</_>\r
+                <_>\r
+                  18 3 8 3 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0175023507326841</threshold>\r
+            <left_val>-0.1261979937553406</left_val>\r
+            <right_val>0.3817803859710693</right_val></_></_>\r
+        <_>\r
+          <!-- tree 6 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  5 3 7 3 -1.</_>\r
+                <_>\r
+                  4 4 7 1 3.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>7.2723729535937309e-003</threshold>\r
+            <left_val>-0.1510328948497772</left_val>\r
+            <right_val>0.5812842249870300</right_val></_></_>\r
+        <_>\r
+          <!-- tree 7 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  18 4 4 4 -1.</_>\r
+                <_>\r
+                  18 5 4 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>8.1504750996828079e-003</threshold>\r
+            <left_val>-0.0654647573828697</left_val>\r
+            <right_val>0.5639755129814148</right_val></_></_>\r
+        <_>\r
+          <!-- tree 8 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  5 3 10 4 -1.</_>\r
+                <_>\r
+                  4 4 10 2 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-0.0185527391731739</threshold>\r
+            <left_val>0.5315709710121155</left_val>\r
+            <right_val>-0.1252657026052475</right_val></_></_>\r
+        <_>\r
+          <!-- tree 9 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  14 8 8 10 -1.</_>\r
+                <_>\r
+                  18 8 4 5 2.</_>\r
+                <_>\r
+                  14 13 4 5 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0231014806777239</threshold>\r
+            <left_val>-0.6794939041137695</left_val>\r
+            <right_val>0.1104625985026360</right_val></_></_>\r
+        <_>\r
+          <!-- tree 10 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  3 0 4 1 -1.</_>\r
+                <_>\r
+                  5 0 2 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-1.8539339362177998e-004</threshold>\r
+            <left_val>0.3010003864765167</left_val>\r
+            <right_val>-0.2120669931173325</right_val></_></_>\r
+        <_>\r
+          <!-- tree 11 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  20 0 10 8 -1.</_>\r
+                <_>\r
+                  25 0 5 4 2.</_>\r
+                <_>\r
+                  20 4 5 4 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0173191204667091</threshold>\r
+            <left_val>-0.0937381312251091</left_val>\r
+            <right_val>0.2100856006145477</right_val></_></_>\r
+        <_>\r
+          <!-- tree 12 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  13 0 10 8 -1.</_>\r
+                <_>\r
+                  13 0 5 4 2.</_>\r
+                <_>\r
+                  18 4 5 4 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0143056204542518</threshold>\r
+            <left_val>0.1800594925880432</left_val>\r
+            <right_val>-0.3977671861648560</right_val></_></_>\r
+        <_>\r
+          <!-- tree 13 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  21 5 6 13 -1.</_>\r
+                <_>\r
+                  23 5 2 13 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0257633402943611</threshold>\r
+            <left_val>8.7056998163461685e-003</left_val>\r
+            <right_val>-0.6289495229721069</right_val></_></_>\r
+        <_>\r
+          <!-- tree 14 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  9 5 6 13 -1.</_>\r
+                <_>\r
+                  11 5 2 13 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0153833404183388</threshold>\r
+            <left_val>-0.5341547131538391</left_val>\r
+            <right_val>0.1038073003292084</right_val></_></_>\r
+        <_>\r
+          <!-- tree 15 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  27 5 5 3 -1.</_>\r
+                <_>\r
+                  27 6 5 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>1.0605469578877091e-003</threshold>\r
+            <left_val>-0.0901285186409950</left_val>\r
+            <right_val>0.1679212003946304</right_val></_></_>\r
+        <_>\r
+          <!-- tree 16 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  10 0 3 6 -1.</_>\r
+                <_>\r
+                  10 2 3 2 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>3.5230729263275862e-003</threshold>\r
+            <left_val>-0.1711069047451019</left_val>\r
+            <right_val>0.3259654045104981</right_val></_></_>\r
+        <_>\r
+          <!-- tree 17 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  26 6 3 6 -1.</_>\r
+                <_>\r
+                  26 8 3 2 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0107892798259854</threshold>\r
+            <left_val>0.3610992133617401</left_val>\r
+            <right_val>-0.0663391500711441</right_val></_></_>\r
+        <_>\r
+          <!-- tree 18 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 11 36 7 -1.</_>\r
+                <_>\r
+                  18 11 18 7 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.2795093953609467</threshold>\r
+            <left_val>-0.0746058970689774</left_val>\r
+            <right_val>0.7336987853050232</right_val></_></_>\r
+        <_>\r
+          <!-- tree 19 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  27 5 5 3 -1.</_>\r
+                <_>\r
+                  27 6 5 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>3.8369540125131607e-003</threshold>\r
+            <left_val>0.0448735393583775</left_val>\r
+            <right_val>-0.1860270053148270</right_val></_></_>\r
+        <_>\r
+          <!-- tree 20 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  4 5 5 3 -1.</_>\r
+                <_>\r
+                  4 6 5 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>1.6195949865505099e-003</threshold>\r
+            <left_val>-0.1392249017953873</left_val>\r
+            <right_val>0.4343700110912323</right_val></_></_>\r
+        <_>\r
+          <!-- tree 21 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  28 6 4 4 -1.</_>\r
+                <_>\r
+                  29 7 2 4 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>0.0116479499265552</threshold>\r
+            <left_val>-0.0743575915694237</left_val>\r
+            <right_val>0.5420144200325012</right_val></_></_>\r
+        <_>\r
+          <!-- tree 22 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  14 15 8 2 -1.</_>\r
+                <_>\r
+                  16 15 4 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-5.9066400863230228e-003</threshold>\r
+            <left_val>-0.7055758833885193</left_val>\r
+            <right_val>0.0864336192607880</right_val></_></_>\r
+        <_>\r
+          <!-- tree 23 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  3 5 30 6 -1.</_>\r
+                <_>\r
+                  13 7 10 2 9.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.3968684077262878</threshold>\r
+            <left_val>-0.0748983696103096</left_val>\r
+            <right_val>0.9406285881996155</right_val></_></_>\r
+        <_>\r
+          <!-- tree 24 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  6 7 16 6 -1.</_>\r
+                <_>\r
+                  6 9 16 2 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0576637797057629</threshold>\r
+            <left_val>-0.0965584069490433</left_val>\r
+            <right_val>0.5418242812156677</right_val></_></_>\r
+        <_>\r
+          <!-- tree 25 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  14 10 12 6 -1.</_>\r
+                <_>\r
+                  14 12 12 2 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0603195689618587</threshold>\r
+            <left_val>-0.0665010735392571</left_val>\r
+            <right_val>0.6402354836463928</right_val></_></_></trees>\r
+      <stage_threshold>-1.3303329944610596</stage_threshold>\r
+      <parent>5</parent>\r
+      <next>-1</next></_>\r
+    <_>\r
+      <!-- stage 7 -->\r
+      <trees>\r
+        <_>\r
+          <!-- tree 0 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  6 0 12 10 -1.</_>\r
+                <_>\r
+                  6 0 6 5 2.</_>\r
+                <_>\r
+                  12 5 6 5 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0190502498298883</threshold>\r
+            <left_val>-0.4443340897560120</left_val>\r
+            <right_val>0.4394856989383698</right_val></_></_>\r
+        <_>\r
+          <!-- tree 1 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  25 2 7 16 -1.</_>\r
+                <_>\r
+                  25 10 7 8 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0201983004808426</threshold>\r
+            <left_val>-0.3170621991157532</left_val>\r
+            <right_val>0.1043293029069901</right_val></_></_>\r
+        <_>\r
+          <!-- tree 2 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  9 6 18 7 -1.</_>\r
+                <_>\r
+                  15 6 6 7 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0214780308306217</threshold>\r
+            <left_val>-0.3502483963966370</left_val>\r
+            <right_val>0.2635537087917328</right_val></_></_>\r
+        <_>\r
+          <!-- tree 3 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  5 0 26 18 -1.</_>\r
+                <_>\r
+                  18 0 13 9 2.</_>\r
+                <_>\r
+                  5 9 13 9 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.1018775999546051</threshold>\r
+            <left_val>-0.5988957881927490</left_val>\r
+            <right_val>0.1768579930067062</right_val></_></_>\r
+        <_>\r
+          <!-- tree 4 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  10 6 10 3 -1.</_>\r
+                <_>\r
+                  10 7 10 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0109741603955626</threshold>\r
+            <left_val>-0.1489523947238922</left_val>\r
+            <right_val>0.6011521816253662</right_val></_></_>\r
+        <_>\r
+          <!-- tree 5 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  17 6 6 4 -1.</_>\r
+                <_>\r
+                  17 7 6 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0114767104387283</threshold>\r
+            <left_val>0.4066570997238159</left_val>\r
+            <right_val>-0.1240468993782997</right_val></_></_>\r
+        <_>\r
+          <!-- tree 6 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  15 6 6 7 -1.</_>\r
+                <_>\r
+                  18 6 3 7 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0234311502426863</threshold>\r
+            <left_val>-0.7148783206939697</left_val>\r
+            <right_val>0.1427811980247498</right_val></_></_>\r
+        <_>\r
+          <!-- tree 7 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  26 6 5 4 -1.</_>\r
+                <_>\r
+                  26 7 5 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>1.4963559806346893e-003</threshold>\r
+            <left_val>-0.1704585999250412</left_val>\r
+            <right_val>0.1719308048486710</right_val></_></_>\r
+        <_>\r
+          <!-- tree 8 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 12 1 6 -1.</_>\r
+                <_>\r
+                  0 15 1 3 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-5.4855772759765387e-004</threshold>\r
+            <left_val>0.3155323863029480</left_val>\r
+            <right_val>-0.2144445031881332</right_val></_></_>\r
+        <_>\r
+          <!-- tree 9 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  9 4 18 14 -1.</_>\r
+                <_>\r
+                  18 4 9 7 2.</_>\r
+                <_>\r
+                  9 11 9 7 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0749126300215721</threshold>\r
+            <left_val>0.0912405624985695</left_val>\r
+            <right_val>-0.6395121216773987</right_val></_></_>\r
+        <_>\r
+          <!-- tree 10 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  7 5 6 3 -1.</_>\r
+                <_>\r
+                  6 6 6 1 3.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>6.8816398270428181e-003</threshold>\r
+            <left_val>-0.1490440964698792</left_val>\r
+            <right_val>0.4795236885547638</right_val></_></_>\r
+        <_>\r
+          <!-- tree 11 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  27 5 6 3 -1.</_>\r
+                <_>\r
+                  29 7 2 3 3.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-0.0382125787436962</threshold>\r
+            <left_val>0.5288773775100708</left_val>\r
+            <right_val>-0.0618947297334671</right_val></_></_>\r
+        <_>\r
+          <!-- tree 12 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  7 8 3 3 -1.</_>\r
+                <_>\r
+                  6 9 3 1 3.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>4.4051730073988438e-003</threshold>\r
+            <left_val>-0.1193412989377976</left_val>\r
+            <right_val>0.5061342120170593</right_val></_></_>\r
+        <_>\r
+          <!-- tree 13 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  28 5 6 5 -1.</_>\r
+                <_>\r
+                  30 7 2 5 3.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>0.0239668991416693</threshold>\r
+            <left_val>-0.0897205099463463</left_val>\r
+            <right_val>0.3315277993679047</right_val></_></_>\r
+        <_>\r
+          <!-- tree 14 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  8 5 5 6 -1.</_>\r
+                <_>\r
+                  6 7 5 2 3.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-0.0341629907488823</threshold>\r
+            <left_val>0.5313478112220764</left_val>\r
+            <right_val>-0.1466650068759918</right_val></_></_>\r
+        <_>\r
+          <!-- tree 15 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  31 0 4 1 -1.</_>\r
+                <_>\r
+                  31 0 2 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>1.9642219413071871e-003</threshold>\r
+            <left_val>0.0907835885882378</left_val>\r
+            <right_val>-0.4303255975246429</right_val></_></_>\r
+        <_>\r
+          <!-- tree 16 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  1 0 4 1 -1.</_>\r
+                <_>\r
+                  3 0 2 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>9.6757910796441138e-005</threshold>\r
+            <left_val>0.2255253940820694</left_val>\r
+            <right_val>-0.2822071015834808</right_val></_></_>\r
+        <_>\r
+          <!-- tree 17 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  17 11 4 3 -1.</_>\r
+                <_>\r
+                  17 12 4 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-3.2862399239093065e-003</threshold>\r
+            <left_val>0.4051502048969269</left_val>\r
+            <right_val>-0.1177619993686676</right_val></_></_>\r
+        <_>\r
+          <!-- tree 18 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  12 3 7 4 -1.</_>\r
+                <_>\r
+                  12 4 7 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0116883097216487</threshold>\r
+            <left_val>-0.0918571278452873</left_val>\r
+            <right_val>0.6283488869667053</right_val></_></_>\r
+        <_>\r
+          <!-- tree 19 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  14 9 9 3 -1.</_>\r
+                <_>\r
+                  14 10 9 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-6.0287420637905598e-003</threshold>\r
+            <left_val>0.3926180899143219</left_val>\r
+            <right_val>-0.1228715032339096</right_val></_></_>\r
+        <_>\r
+          <!-- tree 20 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  1 17 21 1 -1.</_>\r
+                <_>\r
+                  8 17 7 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0137213403359056</threshold>\r
+            <left_val>-0.5529879927635193</left_val>\r
+            <right_val>0.0910412818193436</right_val></_></_>\r
+        <_>\r
+          <!-- tree 21 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  12 9 20 4 -1.</_>\r
+                <_>\r
+                  12 9 10 4 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0756266415119171</threshold>\r
+            <left_val>-0.0449295900762081</left_val>\r
+            <right_val>0.1744275987148285</right_val></_></_>\r
+        <_>\r
+          <!-- tree 22 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  3 9 22 4 -1.</_>\r
+                <_>\r
+                  14 9 11 4 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0934344828128815</threshold>\r
+            <left_val>-0.0845939517021179</left_val>\r
+            <right_val>0.6013116240501404</right_val></_></_>\r
+        <_>\r
+          <!-- tree 23 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  25 0 3 3 -1.</_>\r
+                <_>\r
+                  26 1 1 3 3.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>5.8748829178512096e-003</threshold>\r
+            <left_val>-0.0441314987838268</left_val>\r
+            <right_val>0.3956570923328400</right_val></_></_>\r
+        <_>\r
+          <!-- tree 24 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  14 9 4 3 -1.</_>\r
+                <_>\r
+                  14 10 4 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>4.0064537897706032e-003</threshold>\r
+            <left_val>-0.1141439974308014</left_val>\r
+            <right_val>0.3792538046836853</right_val></_></_>\r
+        <_>\r
+          <!-- tree 25 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  19 4 9 3 -1.</_>\r
+                <_>\r
+                  22 4 3 3 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0229454599320889</threshold>\r
+            <left_val>0.0246731899678707</left_val>\r
+            <right_val>-0.4152199923992157</right_val></_></_>\r
+        <_>\r
+          <!-- tree 26 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  8 4 9 3 -1.</_>\r
+                <_>\r
+                  11 4 3 3 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0128104602918029</threshold>\r
+            <left_val>-0.5155742764472961</left_val>\r
+            <right_val>0.0913196131587029</right_val></_></_>\r
+        <_>\r
+          <!-- tree 27 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 15 36 3 -1.</_>\r
+                <_>\r
+                  12 16 12 1 9.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.2042552977800369</threshold>\r
+            <left_val>-0.0659275427460670</left_val>\r
+            <right_val>0.7594249248504639</right_val></_></_>\r
+        <_>\r
+          <!-- tree 28 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  2 0 4 2 -1.</_>\r
+                <_>\r
+                  2 0 4 1 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>4.9796327948570251e-003</threshold>\r
+            <left_val>0.1080627962946892</left_val>\r
+            <right_val>-0.5001627206802368</right_val></_></_>\r
+        <_>\r
+          <!-- tree 29 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  19 9 2 9 -1.</_>\r
+                <_>\r
+                  19 12 2 3 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0283976309001446</threshold>\r
+            <left_val>-0.0371529608964920</left_val>\r
+            <right_val>0.5401064753532410</right_val></_></_>\r
+        <_>\r
+          <!-- tree 30 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  13 7 8 3 -1.</_>\r
+                <_>\r
+                  13 8 8 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>6.0867150314152241e-003</threshold>\r
+            <left_val>-0.1197860985994339</left_val>\r
+            <right_val>0.3569226861000061</right_val></_></_>\r
+        <_>\r
+          <!-- tree 31 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  30 4 2 2 -1.</_>\r
+                <_>\r
+                  31 4 1 1 2.</_>\r
+                <_>\r
+                  30 5 1 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-2.1456899412441999e-004</threshold>\r
+            <left_val>0.1874015033245087</left_val>\r
+            <right_val>-0.0884172022342682</right_val></_></_>\r
+        <_>\r
+          <!-- tree 32 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  4 4 2 2 -1.</_>\r
+                <_>\r
+                  4 4 1 1 2.</_>\r
+                <_>\r
+                  5 5 1 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>2.8941858909092844e-004</threshold>\r
+            <left_val>-0.1259797960519791</left_val>\r
+            <right_val>0.3998227119445801</right_val></_></_>\r
+        <_>\r
+          <!-- tree 33 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  18 7 4 3 -1.</_>\r
+                <_>\r
+                  18 8 4 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-1.3047619722783566e-003</threshold>\r
+            <left_val>0.1549997031688690</left_val>\r
+            <right_val>-0.0753860473632813</right_val></_></_>\r
+        <_>\r
+          <!-- tree 34 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  9 0 1 8 -1.</_>\r
+                <_>\r
+                  9 0 1 4 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-0.0129750100895762</threshold>\r
+            <left_val>-0.5534411072731018</left_val>\r
+            <right_val>0.0823542475700378</right_val></_></_>\r
+        <_>\r
+          <!-- tree 35 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  25 6 10 3 -1.</_>\r
+                <_>\r
+                  25 7 10 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>7.7442410401999950e-003</threshold>\r
+            <left_val>0.0276998002082109</left_val>\r
+            <right_val>-0.3483599126338959</right_val></_></_>\r
+        <_>\r
+          <!-- tree 36 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  1 6 10 3 -1.</_>\r
+                <_>\r
+                  1 7 10 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>2.4850629270076752e-003</threshold>\r
+            <left_val>-0.1297612935304642</left_val>\r
+            <right_val>0.3790883123874664</right_val></_></_></trees>\r
+      <stage_threshold>-1.5300060510635376</stage_threshold>\r
+      <parent>6</parent>\r
+      <next>-1</next></_>\r
+    <_>\r
+      <!-- stage 8 -->\r
+      <trees>\r
+        <_>\r
+          <!-- tree 0 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  6 6 14 12 -1.</_>\r
+                <_>\r
+                  6 6 7 6 2.</_>\r
+                <_>\r
+                  13 12 7 6 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0403868816792965</threshold>\r
+            <left_val>0.5960354804992676</left_val>\r
+            <right_val>-0.3574176132678986</right_val></_></_>\r
+        <_>\r
+          <!-- tree 1 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  31 14 3 4 -1.</_>\r
+                <_>\r
+                  31 16 3 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-6.6068649175576866e-005</threshold>\r
+            <left_val>0.4462898075580597</left_val>\r
+            <right_val>-0.3595947027206421</right_val></_></_>\r
+        <_>\r
+          <!-- tree 2 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  1 12 2 4 -1.</_>\r
+                <_>\r
+                  1 14 2 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>3.7622239906340837e-003</threshold>\r
+            <left_val>0.1794701963663101</left_val>\r
+            <right_val>-0.7563151121139526</right_val></_></_>\r
+        <_>\r
+          <!-- tree 3 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  15 0 12 5 -1.</_>\r
+                <_>\r
+                  19 0 4 5 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0309677198529243</threshold>\r
+            <left_val>-0.2884705066680908</left_val>\r
+            <right_val>0.0768705308437347</right_val></_></_>\r
+        <_>\r
+          <!-- tree 4 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  10 0 8 14 -1.</_>\r
+                <_>\r
+                  12 0 4 14 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0305665601044893</threshold>\r
+            <left_val>0.1400360018014908</left_val>\r
+            <right_val>-0.7175536751747131</right_val></_></_>\r
+        <_>\r
+          <!-- tree 5 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  28 1 8 7 -1.</_>\r
+                <_>\r
+                  30 3 4 7 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>9.9054910242557526e-004</threshold>\r
+            <left_val>0.0829155892133713</left_val>\r
+            <right_val>-0.2919717133045197</right_val></_></_>\r
+        <_>\r
+          <!-- tree 6 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  8 14 20 4 -1.</_>\r
+                <_>\r
+                  8 14 10 2 2.</_>\r
+                <_>\r
+                  18 16 10 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0125777004286647</threshold>\r
+            <left_val>0.1538071930408478</left_val>\r
+            <right_val>-0.4688293039798737</right_val></_></_>\r
+        <_>\r
+          <!-- tree 7 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  6 11 24 3 -1.</_>\r
+                <_>\r
+                  14 12 8 1 9.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.1239292025566101</threshold>\r
+            <left_val>-0.0908238589763641</left_val>\r
+            <right_val>0.7383757233619690</right_val></_></_>\r
+        <_>\r
+          <!-- tree 8 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  4 5 27 6 -1.</_>\r
+                <_>\r
+                  13 7 9 2 9.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.3773748874664307</threshold>\r
+            <left_val>-0.0542329512536526</left_val>\r
+            <right_val>0.9229121804237366</right_val></_></_>\r
+        <_>\r
+          <!-- tree 9 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  7 0 22 18 -1.</_>\r
+                <_>\r
+                  18 0 11 9 2.</_>\r
+                <_>\r
+                  7 9 11 9 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.1099637001752853</threshold>\r
+            <left_val>0.0915962681174278</left_val>\r
+            <right_val>-0.6597716808319092</right_val></_></_>\r
+        <_>\r
+          <!-- tree 10 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  16 0 3 2 -1.</_>\r
+                <_>\r
+                  16 1 3 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-1.2721329694613814e-003</threshold>\r
+            <left_val>0.3347575068473816</left_val>\r
+            <right_val>-0.1829068958759308</right_val></_></_>\r
+        <_>\r
+          <!-- tree 11 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 17 36 1 -1.</_>\r
+                <_>\r
+                  9 17 18 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0469062514603138</threshold>\r
+            <left_val>-0.0839710533618927</left_val>\r
+            <right_val>0.6984758973121643</right_val></_></_>\r
+        <_>\r
+          <!-- tree 12 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  5 5 12 1 -1.</_>\r
+                <_>\r
+                  5 5 6 1 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>3.2869930146262050e-004</threshold>\r
+            <left_val>0.1879463046789169</left_val>\r
+            <right_val>-0.2929005920886993</right_val></_></_>\r
+        <_>\r
+          <!-- tree 13 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  34 15 2 1 -1.</_>\r
+                <_>\r
+                  34 15 1 1 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>1.7333080177195370e-004</threshold>\r
+            <left_val>-0.2696416079998016</left_val>\r
+            <right_val>0.3494757115840912</right_val></_></_>\r
+        <_>\r
+          <!-- tree 14 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  7 8 16 4 -1.</_>\r
+                <_>\r
+                  7 9 16 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0198009591549635</threshold>\r
+            <left_val>-0.1467922925949097</left_val>\r
+            <right_val>0.4399561882019043</right_val></_></_>\r
+        <_>\r
+          <!-- tree 15 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  35 10 1 6 -1.</_>\r
+                <_>\r
+                  35 12 1 2 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>2.0056760695297271e-004</threshold>\r
+            <left_val>-0.1372741013765335</left_val>\r
+            <right_val>0.2221331000328064</right_val></_></_>\r
+        <_>\r
+          <!-- tree 16 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  13 8 3 4 -1.</_>\r
+                <_>\r
+                  13 9 3 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-1.4923149719834328e-003</threshold>\r
+            <left_val>0.3473525941371918</left_val>\r
+            <right_val>-0.1594821065664291</right_val></_></_>\r
+        <_>\r
+          <!-- tree 17 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  35 10 1 6 -1.</_>\r
+                <_>\r
+                  35 12 1 2 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-4.2736999603221193e-005</threshold>\r
+            <left_val>0.3152787089347839</left_val>\r
+            <right_val>-0.2306694984436035</right_val></_></_>\r
+        <_>\r
+          <!-- tree 18 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  12 0 1 4 -1.</_>\r
+                <_>\r
+                  11 1 1 2 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>6.6625140607357025e-004</threshold>\r
+            <left_val>-0.2013110071420670</left_val>\r
+            <right_val>0.2869189083576202</right_val></_></_>\r
+        <_>\r
+          <!-- tree 19 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  35 10 1 6 -1.</_>\r
+                <_>\r
+                  35 12 1 2 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>1.3850460163666867e-005</threshold>\r
+            <left_val>-0.2021923959255219</left_val>\r
+            <right_val>0.2307330965995789</right_val></_></_>\r
+        <_>\r
+          <!-- tree 20 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  18 0 1 14 -1.</_>\r
+                <_>\r
+                  18 0 1 7 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>0.0409726314246655</threshold>\r
+            <left_val>0.0795431807637215</left_val>\r
+            <right_val>-0.8079563975334168</right_val></_></_></trees>\r
+      <stage_threshold>-1.4114329814910889</stage_threshold>\r
+      <parent>7</parent>\r
+      <next>-1</next></_>\r
+    <_>\r
+      <!-- stage 9 -->\r
+      <trees>\r
+        <_>\r
+          <!-- tree 0 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  5 6 16 12 -1.</_>\r
+                <_>\r
+                  5 6 8 6 2.</_>\r
+                <_>\r
+                  13 12 8 6 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0469829291105270</threshold>\r
+            <left_val>0.7082253098487854</left_val>\r
+            <right_val>-0.3703424036502838</right_val></_></_>\r
+        <_>\r
+          <!-- tree 1 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  18 1 7 8 -1.</_>\r
+                <_>\r
+                  16 3 7 4 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-7.5753079727292061e-004</threshold>\r
+            <left_val>-0.1255030930042267</left_val>\r
+            <right_val>0.1394442021846771</right_val></_></_>\r
+        <_>\r
+          <!-- tree 2 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  14 4 8 10 -1.</_>\r
+                <_>\r
+                  14 4 4 5 2.</_>\r
+                <_>\r
+                  18 9 4 5 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0153272999450564</threshold>\r
+            <left_val>0.2161353975534439</left_val>\r
+            <right_val>-0.5629395246505737</right_val></_></_>\r
+        <_>\r
+          <!-- tree 3 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  22 0 9 3 -1.</_>\r
+                <_>\r
+                  25 0 3 3 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0181470401585102</threshold>\r
+            <left_val>-0.0320796482264996</left_val>\r
+            <right_val>0.3234755992889404</right_val></_></_>\r
+        <_>\r
+          <!-- tree 4 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 10 26 8 -1.</_>\r
+                <_>\r
+                  0 10 13 4 2.</_>\r
+                <_>\r
+                  13 14 13 4 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0473471917212009</threshold>\r
+            <left_val>-0.1738158017396927</left_val>\r
+            <right_val>0.5758044719696045</right_val></_></_>\r
+        <_>\r
+          <!-- tree 5 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  15 10 16 8 -1.</_>\r
+                <_>\r
+                  23 10 8 4 2.</_>\r
+                <_>\r
+                  15 14 8 4 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0598379410803318</threshold>\r
+            <left_val>0.4779787063598633</left_val>\r
+            <right_val>-0.1026028022170067</right_val></_></_>\r
+        <_>\r
+          <!-- tree 6 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  6 0 24 18 -1.</_>\r
+                <_>\r
+                  6 0 12 9 2.</_>\r
+                <_>\r
+                  18 9 12 9 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0527967996895313</threshold>\r
+            <left_val>-0.4798848927021027</left_val>\r
+            <right_val>0.1878775954246521</right_val></_></_>\r
+        <_>\r
+          <!-- tree 7 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  18 0 9 6 -1.</_>\r
+                <_>\r
+                  21 0 3 6 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0243854299187660</threshold>\r
+            <left_val>-0.3084166944026947</left_val>\r
+            <right_val>8.7605630978941917e-003</right_val></_></_>\r
+        <_>\r
+          <!-- tree 8 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  9 0 9 6 -1.</_>\r
+                <_>\r
+                  12 0 3 6 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0252883005887270</threshold>\r
+            <left_val>0.1391403973102570</left_val>\r
+            <right_val>-0.7109494209289551</right_val></_></_>\r
+        <_>\r
+          <!-- tree 9 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  30 1 5 14 -1.</_>\r
+                <_>\r
+                  30 8 5 7 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0216124504804611</threshold>\r
+            <left_val>-0.2328253984451294</left_val>\r
+            <right_val>0.0809946805238724</right_val></_></_>\r
+        <_>\r
+          <!-- tree 10 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  1 1 5 14 -1.</_>\r
+                <_>\r
+                  1 8 5 7 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>3.4023479092866182e-003</threshold>\r
+            <left_val>-0.2298990041017532</left_val>\r
+            <right_val>0.3788951039314270</right_val></_></_>\r
+        <_>\r
+          <!-- tree 11 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  10 8 26 6 -1.</_>\r
+                <_>\r
+                  23 8 13 3 2.</_>\r
+                <_>\r
+                  10 11 13 3 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.1127460002899170</threshold>\r
+            <left_val>-0.0154747096821666</left_val>\r
+            <right_val>0.5703054070472717</right_val></_></_>\r
+        <_>\r
+          <!-- tree 12 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 8 28 6 -1.</_>\r
+                <_>\r
+                  0 8 14 3 2.</_>\r
+                <_>\r
+                  14 11 14 3 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0345168709754944</threshold>\r
+            <left_val>-0.1230008006095886</left_val>\r
+            <right_val>0.5677536725997925</right_val></_></_>\r
+        <_>\r
+          <!-- tree 13 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  12 0 24 12 -1.</_>\r
+                <_>\r
+                  24 0 12 6 2.</_>\r
+                <_>\r
+                  12 6 12 6 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0789848119020462</threshold>\r
+            <left_val>-0.1424216926097870</left_val>\r
+            <right_val>0.4694185853004456</right_val></_></_>\r
+        <_>\r
+          <!-- tree 14 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  3 1 14 2 -1.</_>\r
+                <_>\r
+                  3 1 14 1 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-0.0153778595849872</threshold>\r
+            <left_val>0.6394686102867127</left_val>\r
+            <right_val>-0.1123619005084038</right_val></_></_>\r
+        <_>\r
+          <!-- tree 15 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  33 16 3 2 -1.</_>\r
+                <_>\r
+                  33 17 3 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-2.2373620595317334e-004</threshold>\r
+            <left_val>0.5558329820632935</left_val>\r
+            <right_val>-0.2724758088588715</right_val></_></_>\r
+        <_>\r
+          <!-- tree 16 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  12 0 9 14 -1.</_>\r
+                <_>\r
+                  15 0 3 14 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0247623901814222</threshold>\r
+            <left_val>-0.5040485858917236</left_val>\r
+            <right_val>0.1407779008150101</right_val></_></_>\r
+        <_>\r
+          <!-- tree 17 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  28 16 8 2 -1.</_>\r
+                <_>\r
+                  32 16 4 1 2.</_>\r
+                <_>\r
+                  28 17 4 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-9.4061157142277807e-005</threshold>\r
+            <left_val>0.3719528019428253</left_val>\r
+            <right_val>-0.2250299006700516</right_val></_></_>\r
+        <_>\r
+          <!-- tree 18 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  15 8 6 6 -1.</_>\r
+                <_>\r
+                  15 10 6 2 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0202563591301441</threshold>\r
+            <left_val>0.5105100870132446</left_val>\r
+            <right_val>-0.1429875940084457</right_val></_></_>\r
+        <_>\r
+          <!-- tree 19 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  13 6 22 6 -1.</_>\r
+                <_>\r
+                  24 6 11 3 2.</_>\r
+                <_>\r
+                  13 9 11 3 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0481228791177273</threshold>\r
+            <left_val>-0.0669795125722885</left_val>\r
+            <right_val>0.3662230968475342</right_val></_></_>\r
+        <_>\r
+          <!-- tree 20 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 10 26 4 -1.</_>\r
+                <_>\r
+                  0 10 13 2 2.</_>\r
+                <_>\r
+                  13 12 13 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0237878002226353</threshold>\r
+            <left_val>0.5081325173377991</left_val>\r
+            <right_val>-0.1290815025568008</right_val></_></_>\r
+        <_>\r
+          <!-- tree 21 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  24 16 4 2 -1.</_>\r
+                <_>\r
+                  24 17 4 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-1.0520319920033216e-003</threshold>\r
+            <left_val>-0.1560467034578323</left_val>\r
+            <right_val>0.0662133172154427</right_val></_></_>\r
+        <_>\r
+          <!-- tree 22 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  9 16 3 2 -1.</_>\r
+                <_>\r
+                  9 17 3 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-2.6640200521796942e-003</threshold>\r
+            <left_val>-0.7254558205604553</left_val>\r
+            <right_val>0.0823654532432556</right_val></_></_></trees>\r
+      <stage_threshold>-1.3777890205383301</stage_threshold>\r
+      <parent>8</parent>\r
+      <next>-1</next></_>\r
+    <_>\r
+      <!-- stage 10 -->\r
+      <trees>\r
+        <_>\r
+          <!-- tree 0 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  3 7 18 8 -1.</_>\r
+                <_>\r
+                  3 7 9 4 2.</_>\r
+                <_>\r
+                  12 11 9 4 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0502246208488941</threshold>\r
+            <left_val>0.7084565758705139</left_val>\r
+            <right_val>-0.2558549940586090</right_val></_></_>\r
+        <_>\r
+          <!-- tree 1 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  23 0 8 4 -1.</_>\r
+                <_>\r
+                  23 0 4 4 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0140728699043393</threshold>\r
+            <left_val>0.0630331784486771</left_val>\r
+            <right_val>-0.0598385296761990</right_val></_></_>\r
+        <_>\r
+          <!-- tree 2 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  5 0 8 4 -1.</_>\r
+                <_>\r
+                  9 0 4 4 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0178040098398924</threshold>\r
+            <left_val>0.1941471993923187</left_val>\r
+            <right_val>-0.5844426751136780</right_val></_></_>\r
+        <_>\r
+          <!-- tree 3 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  6 10 24 3 -1.</_>\r
+                <_>\r
+                  14 11 8 1 9.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.1304673999547958</threshold>\r
+            <left_val>-0.1151698008179665</left_val>\r
+            <right_val>0.8504030108451843</right_val></_></_>\r
+        <_>\r
+          <!-- tree 4 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  7 5 5 6 -1.</_>\r
+                <_>\r
+                  5 7 5 2 3.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>0.0175068005919456</threshold>\r
+            <left_val>-0.2071896940469742</left_val>\r
+            <right_val>0.4643828868865967</right_val></_></_>\r
+        <_>\r
+          <!-- tree 5 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  5 16 26 2 -1.</_>\r
+                <_>\r
+                  18 16 13 1 2.</_>\r
+                <_>\r
+                  5 17 13 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-7.4240020476281643e-003</threshold>\r
+            <left_val>-0.6656516790390015</left_val>\r
+            <right_val>0.1403498947620392</right_val></_></_>\r
+        <_>\r
+          <!-- tree 6 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 7 24 4 -1.</_>\r
+                <_>\r
+                  0 7 12 2 2.</_>\r
+                <_>\r
+                  12 9 12 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0345711186528206</threshold>\r
+            <left_val>0.6511297821998596</left_val>\r
+            <right_val>-0.1490191966295242</right_val></_></_>\r
+        <_>\r
+          <!-- tree 7 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  23 14 13 4 -1.</_>\r
+                <_>\r
+                  23 15 13 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>4.2270249687135220e-003</threshold>\r
+            <left_val>-1.6027219826355577e-003</left_val>\r
+            <right_val>0.3895606100559235</right_val></_></_>\r
+        <_>\r
+          <!-- tree 8 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  2 10 18 8 -1.</_>\r
+                <_>\r
+                  2 10 9 4 2.</_>\r
+                <_>\r
+                  11 14 9 4 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0506620407104492</threshold>\r
+            <left_val>0.5803576707839966</left_val>\r
+            <right_val>-0.1514143943786621</right_val></_></_>\r
+        <_>\r
+          <!-- tree 9 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  15 10 6 4 -1.</_>\r
+                <_>\r
+                  15 11 6 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-7.0715770125389099e-003</threshold>\r
+            <left_val>0.5300896763801575</left_val>\r
+            <right_val>-0.1449830979108810</right_val></_></_>\r
+        <_>\r
+          <!-- tree 10 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 6 24 2 -1.</_>\r
+                <_>\r
+                  0 6 12 1 2.</_>\r
+                <_>\r
+                  12 7 12 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0118635101243854</threshold>\r
+            <left_val>0.6729742288589478</left_val>\r
+            <right_val>-0.1106354966759682</right_val></_></_>\r
+        <_>\r
+          <!-- tree 11 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  17 0 18 18 -1.</_>\r
+                <_>\r
+                  17 9 18 9 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0605200305581093</threshold>\r
+            <left_val>-0.3316448926925659</left_val>\r
+            <right_val>0.2119556069374085</right_val></_></_>\r
+        <_>\r
+          <!-- tree 12 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  1 0 11 2 -1.</_>\r
+                <_>\r
+                  1 1 11 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-7.7340779826045036e-003</threshold>\r
+            <left_val>-0.6941440105438232</left_val>\r
+            <right_val>0.0727053135633469</right_val></_></_>\r
+        <_>\r
+          <!-- tree 13 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  15 6 8 12 -1.</_>\r
+                <_>\r
+                  19 6 4 6 2.</_>\r
+                <_>\r
+                  15 12 4 6 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0324861407279968</threshold>\r
+            <left_val>-0.5185081958770752</left_val>\r
+            <right_val>0.0592126213014126</right_val></_></_>\r
+        <_>\r
+          <!-- tree 14 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  2 1 32 12 -1.</_>\r
+                <_>\r
+                  2 1 16 6 2.</_>\r
+                <_>\r
+                  18 7 16 6 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0832797065377235</threshold>\r
+            <left_val>0.1206794008612633</left_val>\r
+            <right_val>-0.5309563279151917</right_val></_></_>\r
+        <_>\r
+          <!-- tree 15 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  29 10 7 8 -1.</_>\r
+                <_>\r
+                  29 12 7 4 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>7.8782817581668496e-004</threshold>\r
+            <left_val>-0.2737655937671661</left_val>\r
+            <right_val>0.2716251909732819</right_val></_></_>\r
+        <_>\r
+          <!-- tree 16 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  12 2 8 10 -1.</_>\r
+                <_>\r
+                  12 2 4 5 2.</_>\r
+                <_>\r
+                  16 7 4 5 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0175391808152199</threshold>\r
+            <left_val>-0.5690230131149292</left_val>\r
+            <right_val>0.1228737011551857</right_val></_></_>\r
+        <_>\r
+          <!-- tree 17 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  15 12 6 4 -1.</_>\r
+                <_>\r
+                  15 13 6 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-5.8226347900927067e-003</threshold>\r
+            <left_val>0.4386585950851440</left_val>\r
+            <right_val>-0.1493742018938065</right_val></_></_>\r
+        <_>\r
+          <!-- tree 18 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 12 8 6 -1.</_>\r
+                <_>\r
+                  0 14 8 2 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0100575601682067</threshold>\r
+            <left_val>-0.6616886258125305</left_val>\r
+            <right_val>0.1144542992115021</right_val></_></_>\r
+        <_>\r
+          <!-- tree 19 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  10 9 26 8 -1.</_>\r
+                <_>\r
+                  23 9 13 4 2.</_>\r
+                <_>\r
+                  10 13 13 4 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0903454273939133</threshold>\r
+            <left_val>-0.0666652470827103</left_val>\r
+            <right_val>0.2870647907257080</right_val></_></_>\r
+        <_>\r
+          <!-- tree 20 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  7 8 22 10 -1.</_>\r
+                <_>\r
+                  7 8 11 5 2.</_>\r
+                <_>\r
+                  18 13 11 5 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0675872936844826</threshold>\r
+            <left_val>-0.5363761186599731</left_val>\r
+            <right_val>0.1123751997947693</right_val></_></_>\r
+        <_>\r
+          <!-- tree 21 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  14 9 8 3 -1.</_>\r
+                <_>\r
+                  14 10 8 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-8.1747528165578842e-003</threshold>\r
+            <left_val>0.4434241950511932</left_val>\r
+            <right_val>-0.1297765970230103</right_val></_></_>\r
+        <_>\r
+          <!-- tree 22 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  11 3 4 9 -1.</_>\r
+                <_>\r
+                  11 6 4 3 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0115505503490567</threshold>\r
+            <left_val>0.3273158073425293</left_val>\r
+            <right_val>-0.1700761020183563</right_val></_></_>\r
+        <_>\r
+          <!-- tree 23 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  29 14 2 2 -1.</_>\r
+                <_>\r
+                  29 14 2 1 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-1.7406829283572733e-004</threshold>\r
+            <left_val>0.1327867954969406</left_val>\r
+            <right_val>-0.1081293970346451</right_val></_></_>\r
+        <_>\r
+          <!-- tree 24 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  14 13 8 3 -1.</_>\r
+                <_>\r
+                  14 14 8 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>4.6040047891438007e-003</threshold>\r
+            <left_val>-0.1226582005620003</left_val>\r
+            <right_val>0.4412580132484436</right_val></_></_></trees>\r
+      <stage_threshold>-1.3266400098800659</stage_threshold>\r
+      <parent>9</parent>\r
+      <next>-1</next></_>\r
+    <_>\r
+      <!-- stage 11 -->\r
+      <trees>\r
+        <_>\r
+          <!-- tree 0 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  11 3 7 8 -1.</_>\r
+                <_>\r
+                  9 5 7 4 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-0.0469432808458805</threshold>\r
+            <left_val>0.6094344258308411</left_val>\r
+            <right_val>-0.2637800872325897</right_val></_></_>\r
+        <_>\r
+          <!-- tree 1 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  28 13 1 4 -1.</_>\r
+                <_>\r
+                  28 13 1 2 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-1.6899159527383745e-004</threshold>\r
+            <left_val>0.1665875017642975</left_val>\r
+            <right_val>-0.1254196017980576</right_val></_></_>\r
+        <_>\r
+          <!-- tree 2 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  8 13 4 1 -1.</_>\r
+                <_>\r
+                  8 13 2 1 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>2.7983370237052441e-003</threshold>\r
+            <left_val>0.1905744969844818</left_val>\r
+            <right_val>-0.6568077206611633</right_val></_></_>\r
+        <_>\r
+          <!-- tree 3 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  16 9 4 3 -1.</_>\r
+                <_>\r
+                  16 10 4 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>4.0413960814476013e-003</threshold>\r
+            <left_val>-0.1731746941804886</left_val>\r
+            <right_val>0.6362075209617615</right_val></_></_>\r
+        <_>\r
+          <!-- tree 4 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  13 8 10 4 -1.</_>\r
+                <_>\r
+                  13 9 10 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-8.6033362895250320e-003</threshold>\r
+            <left_val>0.6025841832160950</left_val>\r
+            <right_val>-0.2316936999559403</right_val></_></_>\r
+        <_>\r
+          <!-- tree 5 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  14 8 8 3 -1.</_>\r
+                <_>\r
+                  14 9 8 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>8.8247945532202721e-003</threshold>\r
+            <left_val>-0.1756583005189896</left_val>\r
+            <right_val>0.7104166746139526</right_val></_></_>\r
+        <_>\r
+          <!-- tree 6 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  2 10 6 2 -1.</_>\r
+                <_>\r
+                  4 12 2 2 3.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-9.2786159366369247e-003</threshold>\r
+            <left_val>-0.6890857219696045</left_val>\r
+            <right_val>0.1789650022983551</right_val></_></_>\r
+        <_>\r
+          <!-- tree 7 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  16 10 6 3 -1.</_>\r
+                <_>\r
+                  16 11 6 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>6.0826768167316914e-003</threshold>\r
+            <left_val>-0.1706372052431107</left_val>\r
+            <right_val>0.5375748276710510</right_val></_></_>\r
+        <_>\r
+          <!-- tree 8 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  8 5 8 13 -1.</_>\r
+                <_>\r
+                  12 5 4 13 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0390073694288731</threshold>\r
+            <left_val>-0.6834635734558106</left_val>\r
+            <right_val>0.1441708058118820</right_val></_></_>\r
+        <_>\r
+          <!-- tree 9 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 0 36 8 -1.</_>\r
+                <_>\r
+                  18 0 18 4 2.</_>\r
+                <_>\r
+                  0 4 18 4 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0703379511833191</threshold>\r
+            <left_val>-0.6508566737174988</left_val>\r
+            <right_val>0.1008547991514206</right_val></_></_>\r
+        <_>\r
+          <!-- tree 10 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  1 5 8 12 -1.</_>\r
+                <_>\r
+                  1 5 4 6 2.</_>\r
+                <_>\r
+                  5 11 4 6 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0331666991114616</threshold>\r
+            <left_val>-0.1932571977376938</left_val>\r
+            <right_val>0.4779865145683289</right_val></_></_>\r
+        <_>\r
+          <!-- tree 11 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  18 8 18 10 -1.</_>\r
+                <_>\r
+                  27 8 9 5 2.</_>\r
+                <_>\r
+                  18 13 9 5 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0752889066934586</threshold>\r
+            <left_val>-0.0695677325129509</left_val>\r
+            <right_val>0.4125064909458160</right_val></_></_>\r
+        <_>\r
+          <!-- tree 12 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 8 18 10 -1.</_>\r
+                <_>\r
+                  0 8 9 5 2.</_>\r
+                <_>\r
+                  9 13 9 5 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0705017298460007</threshold>\r
+            <left_val>0.7157300710678101</left_val>\r
+            <right_val>-0.1022270023822784</right_val></_></_>\r
+        <_>\r
+          <!-- tree 13 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  11 5 14 3 -1.</_>\r
+                <_>\r
+                  11 6 14 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0122494902461767</threshold>\r
+            <left_val>-0.1061242967844009</left_val>\r
+            <right_val>0.6295958161354065</right_val></_></_>\r
+        <_>\r
+          <!-- tree 14 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  10 6 16 6 -1.</_>\r
+                <_>\r
+                  10 8 16 2 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0706446766853333</threshold>\r
+            <left_val>-0.0973746329545975</left_val>\r
+            <right_val>0.6762204170227051</right_val></_></_>\r
+        <_>\r
+          <!-- tree 15 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  7 2 24 16 -1.</_>\r
+                <_>\r
+                  19 2 12 8 2.</_>\r
+                <_>\r
+                  7 10 12 8 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.1624888032674789</threshold>\r
+            <left_val>0.0527133606374264</left_val>\r
+            <right_val>-0.8494657278060913</right_val></_></_>\r
+        <_>\r
+          <!-- tree 16 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 1 18 15 -1.</_>\r
+                <_>\r
+                  6 6 6 5 9.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.1380825042724609</threshold>\r
+            <left_val>0.1406479030847549</left_val>\r
+            <right_val>-0.4764721095561981</right_val></_></_></trees>\r
+      <stage_threshold>-1.4497200250625610</stage_threshold>\r
+      <parent>10</parent>\r
+      <next>-1</next></_>\r
+    <_>\r
+      <!-- stage 12 -->\r
+      <trees>\r
+        <_>\r
+          <!-- tree 0 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  4 5 16 6 -1.</_>\r
+                <_>\r
+                  12 5 8 6 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0418823398649693</threshold>\r
+            <left_val>-0.8077452778816223</left_val>\r
+            <right_val>0.2640967071056366</right_val></_></_>\r
+        <_>\r
+          <!-- tree 1 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  29 0 6 11 -1.</_>\r
+                <_>\r
+                  31 2 2 11 3.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-0.0536229908466339</threshold>\r
+            <left_val>0.5580704212188721</left_val>\r
+            <right_val>-0.2498968988656998</right_val></_></_>\r
+        <_>\r
+          <!-- tree 2 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  2 8 9 1 -1.</_>\r
+                <_>\r
+                  5 11 3 1 3.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>9.3709938228130341e-003</threshold>\r
+            <left_val>0.2650170028209686</left_val>\r
+            <right_val>-0.5990694761276245</right_val></_></_>\r
+        <_>\r
+          <!-- tree 3 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  10 6 17 3 -1.</_>\r
+                <_>\r
+                  10 7 17 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0139097301289439</threshold>\r
+            <left_val>-0.1470918059349060</left_val>\r
+            <right_val>0.7354667186737061</right_val></_></_>\r
+        <_>\r
+          <!-- tree 4 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  18 6 6 2 -1.</_>\r
+                <_>\r
+                  20 8 2 2 3.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>0.0190035700798035</threshold>\r
+            <left_val>-0.1887511014938355</left_val>\r
+            <right_val>0.7487422227859497</right_val></_></_>\r
+        <_>\r
+          <!-- tree 5 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  13 11 12 3 -1.</_>\r
+                <_>\r
+                  13 12 12 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>5.9199850074946880e-003</threshold>\r
+            <left_val>-0.1599563956260681</left_val>\r
+            <right_val>0.5673577785491943</right_val></_></_>\r
+        <_>\r
+          <!-- tree 6 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  2 3 8 8 -1.</_>\r
+                <_>\r
+                  2 3 4 4 2.</_>\r
+                <_>\r
+                  6 7 4 4 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0247051399201155</threshold>\r
+            <left_val>0.7556992173194885</left_val>\r
+            <right_val>-0.1235088035464287</right_val></_></_>\r
+        <_>\r
+          <!-- tree 7 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  18 12 18 4 -1.</_>\r
+                <_>\r
+                  27 12 9 2 2.</_>\r
+                <_>\r
+                  18 14 9 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0160583592951298</threshold>\r
+            <left_val>-0.1282460987567902</left_val>\r
+            <right_val>0.5129454731941223</right_val></_></_>\r
+        <_>\r
+          <!-- tree 8 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  11 5 11 3 -1.</_>\r
+                <_>\r
+                  11 6 11 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>8.8288700208067894e-003</threshold>\r
+            <left_val>-0.1686663925647736</left_val>\r
+            <right_val>0.6152185201644898</right_val></_></_>\r
+        <_>\r
+          <!-- tree 9 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  14 7 14 4 -1.</_>\r
+                <_>\r
+                  14 8 14 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0175563395023346</threshold>\r
+            <left_val>-0.1090169996023178</left_val>\r
+            <right_val>0.5803176164627075</right_val></_></_>\r
+        <_>\r
+          <!-- tree 10 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  9 8 16 10 -1.</_>\r
+                <_>\r
+                  9 8 8 5 2.</_>\r
+                <_>\r
+                  17 13 8 5 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0421881191432476</threshold>\r
+            <left_val>0.1486624032258987</left_val>\r
+            <right_val>-0.6922233104705811</right_val></_></_>\r
+        <_>\r
+          <!-- tree 11 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  18 17 2 1 -1.</_>\r
+                <_>\r
+                  18 17 1 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>5.0687207840383053e-004</threshold>\r
+            <left_val>0.0315808691084385</left_val>\r
+            <right_val>-0.3700995147228241</right_val></_></_>\r
+        <_>\r
+          <!-- tree 12 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  13 10 5 3 -1.</_>\r
+                <_>\r
+                  13 11 5 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>2.7651190757751465e-003</threshold>\r
+            <left_val>-0.2133754044771195</left_val>\r
+            <right_val>0.4704301059246063</right_val></_></_>\r
+        <_>\r
+          <!-- tree 13 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  18 17 2 1 -1.</_>\r
+                <_>\r
+                  18 17 1 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-1.2231520377099514e-003</threshold>\r
+            <left_val>-0.7818967103958130</left_val>\r
+            <right_val>0.0209542606025934</right_val></_></_>\r
+        <_>\r
+          <!-- tree 14 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  7 5 8 3 -1.</_>\r
+                <_>\r
+                  6 6 8 1 3.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>8.5432287305593491e-003</threshold>\r
+            <left_val>-0.1455352008342743</left_val>\r
+            <right_val>0.6789504289627075</right_val></_></_>\r
+        <_>\r
+          <!-- tree 15 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  18 17 2 1 -1.</_>\r
+                <_>\r
+                  18 17 1 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-2.0657219283748418e-004</threshold>\r
+            <left_val>0.2437624037265778</left_val>\r
+            <right_val>-0.0675588026642799</right_val></_></_>\r
+        <_>\r
+          <!-- tree 16 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  10 5 5 3 -1.</_>\r
+                <_>\r
+                  10 6 5 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-4.6798270195722580e-003</threshold>\r
+            <left_val>0.6684169769287109</left_val>\r
+            <right_val>-0.1388788074254990</right_val></_></_>\r
+        <_>\r
+          <!-- tree 17 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  2 5 34 10 -1.</_>\r
+                <_>\r
+                  19 5 17 5 2.</_>\r
+                <_>\r
+                  2 10 17 5 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.1220175996422768</threshold>\r
+            <left_val>0.1102816015481949</left_val>\r
+            <right_val>-0.7530742287635803</right_val></_></_>\r
+        <_>\r
+          <!-- tree 18 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  3 2 12 3 -1.</_>\r
+                <_>\r
+                  6 5 6 3 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>0.0204043406993151</threshold>\r
+            <left_val>0.1645383983850479</left_val>\r
+            <right_val>-0.5223162174224854</right_val></_></_>\r
+        <_>\r
+          <!-- tree 19 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  35 6 1 6 -1.</_>\r
+                <_>\r
+                  35 8 1 2 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>8.0343370791524649e-004</threshold>\r
+            <left_val>-0.1301285028457642</left_val>\r
+            <right_val>0.2635852992534638</right_val></_></_></trees>\r
+      <stage_threshold>-1.4622910022735596</stage_threshold>\r
+      <parent>11</parent>\r
+      <next>-1</next></_>\r
+    <_>\r
+      <!-- stage 13 -->\r
+      <trees>\r
+        <_>\r
+          <!-- tree 0 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  10 6 13 6 -1.</_>\r
+                <_>\r
+                  10 8 13 2 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0727917104959488</threshold>\r
+            <left_val>-0.1372790038585663</left_val>\r
+            <right_val>0.8291574716567993</right_val></_></_>\r
+        <_>\r
+          <!-- tree 1 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  15 5 6 4 -1.</_>\r
+                <_>\r
+                  15 6 6 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>7.5939209200441837e-003</threshold>\r
+            <left_val>-0.1678012013435364</left_val>\r
+            <right_val>0.5683972239494324</right_val></_></_>\r
+        <_>\r
+          <!-- tree 2 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  5 2 11 4 -1.</_>\r
+                <_>\r
+                  4 3 11 2 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-0.0235623903572559</threshold>\r
+            <left_val>0.6500560045242310</left_val>\r
+            <right_val>-0.1424535065889359</right_val></_></_>\r
+        <_>\r
+          <!-- tree 3 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  26 6 10 6 -1.</_>\r
+                <_>\r
+                  31 6 5 3 2.</_>\r
+                <_>\r
+                  26 9 5 3 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0173929501324892</threshold>\r
+            <left_val>-0.1529144942760468</left_val>\r
+            <right_val>0.3425354063510895</right_val></_></_>\r
+        <_>\r
+          <!-- tree 4 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  10 7 11 8 -1.</_>\r
+                <_>\r
+                  10 9 11 4 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0718258023262024</threshold>\r
+            <left_val>-0.0991311371326447</left_val>\r
+            <right_val>0.8279678821563721</right_val></_></_>\r
+        <_>\r
+          <!-- tree 5 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  28 2 4 9 -1.</_>\r
+                <_>\r
+                  29 3 2 9 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>0.0136738000437617</threshold>\r
+            <left_val>-0.0417872704565525</left_val>\r
+            <right_val>0.5078148245811462</right_val></_></_>\r
+        <_>\r
+          <!-- tree 6 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  8 2 10 4 -1.</_>\r
+                <_>\r
+                  7 3 10 2 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-0.0285859592258930</threshold>\r
+            <left_val>0.7011532187461853</left_val>\r
+            <right_val>-0.1314471065998077</right_val></_></_>\r
+        <_>\r
+          <!-- tree 7 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  31 0 5 2 -1.</_>\r
+                <_>\r
+                  31 1 5 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-4.1845720261335373e-004</threshold>\r
+            <left_val>0.2845467031002045</left_val>\r
+            <right_val>-0.3123202919960022</right_val></_></_>\r
+        <_>\r
+          <!-- tree 8 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  10 6 16 12 -1.</_>\r
+                <_>\r
+                  10 10 16 4 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0520956814289093</threshold>\r
+            <left_val>0.4181294143199921</left_val>\r
+            <right_val>-0.1699313074350357</right_val></_></_>\r
+        <_>\r
+          <!-- tree 9 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  18 4 4 3 -1.</_>\r
+                <_>\r
+                  18 5 4 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>3.2256329432129860e-003</threshold>\r
+            <left_val>-0.0904662087559700</left_val>\r
+            <right_val>0.3008623123168945</right_val></_></_>\r
+        <_>\r
+          <!-- tree 10 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  11 10 6 6 -1.</_>\r
+                <_>\r
+                  11 12 6 2 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0347716398537159</threshold>\r
+            <left_val>-0.0842167884111404</left_val>\r
+            <right_val>0.7801663875579834</right_val></_></_>\r
+        <_>\r
+          <!-- tree 11 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  35 8 1 10 -1.</_>\r
+                <_>\r
+                  35 13 1 5 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-1.3356630224734545e-003</threshold>\r
+            <left_val>0.3316453099250794</left_val>\r
+            <right_val>-0.1696092039346695</right_val></_></_>\r
+        <_>\r
+          <!-- tree 12 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 10 36 8 -1.</_>\r
+                <_>\r
+                  18 10 18 8 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.2510198056697846</threshold>\r
+            <left_val>-0.1392046958208084</left_val>\r
+            <right_val>0.6633893251419067</right_val></_></_>\r
+        <_>\r
+          <!-- tree 13 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  16 7 6 8 -1.</_>\r
+                <_>\r
+                  19 7 3 4 2.</_>\r
+                <_>\r
+                  16 11 3 4 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-9.9689997732639313e-003</threshold>\r
+            <left_val>-0.3713817000389099</left_val>\r
+            <right_val>0.1290012001991272</right_val></_></_>\r
+        <_>\r
+          <!-- tree 14 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  7 6 8 4 -1.</_>\r
+                <_>\r
+                  7 6 4 4 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>0.0143037298694253</threshold>\r
+            <left_val>0.1572919934988022</left_val>\r
+            <right_val>-0.5093821287155151</right_val></_></_>\r
+        <_>\r
+          <!-- tree 15 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  21 11 4 3 -1.</_>\r
+                <_>\r
+                  21 12 4 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-7.0856059901416302e-003</threshold>\r
+            <left_val>0.4656791090965271</left_val>\r
+            <right_val>-0.0662708207964897</right_val></_></_>\r
+        <_>\r
+          <!-- tree 16 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 9 1 8 -1.</_>\r
+                <_>\r
+                  0 13 1 4 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-4.6260809176601470e-004</threshold>\r
+            <left_val>0.2933731079101563</left_val>\r
+            <right_val>-0.2333986014127731</right_val></_></_>\r
+        <_>\r
+          <!-- tree 17 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  27 7 6 4 -1.</_>\r
+                <_>\r
+                  29 9 2 4 3.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-0.0344354808330536</threshold>\r
+            <left_val>0.7002474069595337</left_val>\r
+            <right_val>-0.1013351008296013</right_val></_></_>\r
+        <_>\r
+          <!-- tree 18 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  10 14 8 4 -1.</_>\r
+                <_>\r
+                  12 14 4 4 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-7.2570890188217163e-003</threshold>\r
+            <left_val>-0.5628641247749329</left_val>\r
+            <right_val>0.1314862072467804</right_val></_></_>\r
+        <_>\r
+          <!-- tree 19 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  18 17 2 1 -1.</_>\r
+                <_>\r
+                  18 17 1 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>4.8352940939366817e-004</threshold>\r
+            <left_val>0.0262274891138077</left_val>\r
+            <right_val>-0.2605080008506775</right_val></_></_>\r
+        <_>\r
+          <!-- tree 20 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  10 4 11 4 -1.</_>\r
+                <_>\r
+                  10 5 11 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0129999397322536</threshold>\r
+            <left_val>0.5311700105667114</left_val>\r
+            <right_val>-0.1202305033802986</right_val></_></_>\r
+        <_>\r
+          <!-- tree 21 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  17 12 2 4 -1.</_>\r
+                <_>\r
+                  17 13 2 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-1.0009329998865724e-003</threshold>\r
+            <left_val>0.3964129984378815</left_val>\r
+            <right_val>-0.1599515974521637</right_val></_></_>\r
+        <_>\r
+          <!-- tree 22 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  13 4 5 3 -1.</_>\r
+                <_>\r
+                  13 5 5 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>4.1314200498163700e-003</threshold>\r
+            <left_val>-0.1492992043495178</left_val>\r
+            <right_val>0.4295912086963654</right_val></_></_>\r
+        <_>\r
+          <!-- tree 23 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  13 12 11 2 -1.</_>\r
+                <_>\r
+                  13 13 11 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>8.7364455685019493e-003</threshold>\r
+            <left_val>-0.1127102002501488</left_val>\r
+            <right_val>0.4945647120475769</right_val></_></_>\r
+        <_>\r
+          <!-- tree 24 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  1 16 2 2 -1.</_>\r
+                <_>\r
+                  1 16 1 1 2.</_>\r
+                <_>\r
+                  2 17 1 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>2.6352869463153183e-004</threshold>\r
+            <left_val>-0.1212491989135742</left_val>\r
+            <right_val>0.4943937957286835</right_val></_></_>\r
+        <_>\r
+          <!-- tree 25 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  27 7 6 4 -1.</_>\r
+                <_>\r
+                  29 9 2 4 3.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-0.0538859590888023</threshold>\r
+            <left_val>0.7035598754882813</left_val>\r
+            <right_val>-0.0132305501028895</right_val></_></_>\r
+        <_>\r
+          <!-- tree 26 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  4 7 6 6 -1.</_>\r
+                <_>\r
+                  4 9 6 2 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>4.2885672301054001e-003</threshold>\r
+            <left_val>-0.1754055023193359</left_val>\r
+            <right_val>0.3567946851253510</right_val></_></_>\r
+        <_>\r
+          <!-- tree 27 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  30 6 4 5 -1.</_>\r
+                <_>\r
+                  31 7 2 5 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>7.9539399594068527e-003</threshold>\r
+            <left_val>-0.0998840034008026</left_val>\r
+            <right_val>0.3137167096138001</right_val></_></_></trees>\r
+      <stage_threshold>-1.3885619640350342</stage_threshold>\r
+      <parent>12</parent>\r
+      <next>-1</next></_>\r
+    <_>\r
+      <!-- stage 14 -->\r
+      <trees>\r
+        <_>\r
+          <!-- tree 0 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  8 5 20 7 -1.</_>\r
+                <_>\r
+                  13 5 10 7 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0567523688077927</threshold>\r
+            <left_val>-0.3257648050785065</left_val>\r
+            <right_val>0.3737593889236450</right_val></_></_>\r
+        <_>\r
+          <!-- tree 1 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  30 2 3 12 -1.</_>\r
+                <_>\r
+                  30 8 3 6 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>7.0906039327383041e-003</threshold>\r
+            <left_val>-0.1391862928867340</left_val>\r
+            <right_val>0.1503984034061432</right_val></_></_>\r
+        <_>\r
+          <!-- tree 2 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  4 2 12 4 -1.</_>\r
+                <_>\r
+                  4 2 12 2 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-0.0412988215684891</threshold>\r
+            <left_val>0.4702607989311218</left_val>\r
+            <right_val>-0.1617936044931412</right_val></_></_>\r
+        <_>\r
+          <!-- tree 3 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 8 36 6 -1.</_>\r
+                <_>\r
+                  12 10 12 2 9.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.4775018990039825</threshold>\r
+            <left_val>-0.1006157994270325</left_val>\r
+            <right_val>0.7635074257850647</right_val></_></_>\r
+        <_>\r
+          <!-- tree 4 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  3 5 30 6 -1.</_>\r
+                <_>\r
+                  13 7 10 2 9.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.4226649105548859</threshold>\r
+            <left_val>-0.0351909101009369</left_val>\r
+            <right_val>0.8303126096725464</right_val></_></_>\r
+        <_>\r
+          <!-- tree 5 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  14 4 12 9 -1.</_>\r
+                <_>\r
+                  18 4 4 9 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0330318994820118</threshold>\r
+            <left_val>-0.3750554919242859</left_val>\r
+            <right_val>0.0489026196300983</right_val></_></_>\r
+        <_>\r
+          <!-- tree 6 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 17 6 1 -1.</_>\r
+                <_>\r
+                  3 17 3 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>1.1923770216526464e-004</threshold>\r
+            <left_val>-0.2661466896533966</left_val>\r
+            <right_val>0.2234652042388916</right_val></_></_>\r
+        <_>\r
+          <!-- tree 7 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  34 0 1 2 -1.</_>\r
+                <_>\r
+                  34 0 1 1 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>4.2101400904357433e-003</threshold>\r
+            <left_val>8.7575968354940414e-003</left_val>\r
+            <right_val>-0.5938351750373840</right_val></_></_>\r
+        <_>\r
+          <!-- tree 8 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  2 0 2 1 -1.</_>\r
+                <_>\r
+                  2 0 1 1 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>3.3337279455736279e-004</threshold>\r
+            <left_val>-0.2122765928506851</left_val>\r
+            <right_val>0.2473503947257996</right_val></_></_>\r
+        <_>\r
+          <!-- tree 9 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  31 3 3 8 -1.</_>\r
+                <_>\r
+                  32 4 1 8 3.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>0.0117938900366426</threshold>\r
+            <left_val>-0.0689979493618011</left_val>\r
+            <right_val>0.5898082852363586</right_val></_></_>\r
+        <_>\r
+          <!-- tree 10 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  5 6 26 12 -1.</_>\r
+                <_>\r
+                  5 6 13 6 2.</_>\r
+                <_>\r
+                  18 12 13 6 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.1143207997083664</threshold>\r
+            <left_val>-0.7733368277549744</left_val>\r
+            <right_val>0.0628622919321060</right_val></_></_>\r
+        <_>\r
+          <!-- tree 11 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  14 4 12 9 -1.</_>\r
+                <_>\r
+                  18 4 4 9 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0824010074138641</threshold>\r
+            <left_val>0.0168252792209387</left_val>\r
+            <right_val>-0.6170011758804321</right_val></_></_>\r
+        <_>\r
+          <!-- tree 12 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  13 7 10 10 -1.</_>\r
+                <_>\r
+                  13 7 5 5 2.</_>\r
+                <_>\r
+                  18 12 5 5 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0181261505931616</threshold>\r
+            <left_val>0.0995334684848785</left_val>\r
+            <right_val>-0.3830915987491608</right_val></_></_>\r
+        <_>\r
+          <!-- tree 13 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  30 5 4 6 -1.</_>\r
+                <_>\r
+                  31 6 2 6 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>8.9282449334859848e-003</threshold>\r
+            <left_val>-0.1010973975062370</left_val>\r
+            <right_val>0.2948305010795593</right_val></_></_>\r
+        <_>\r
+          <!-- tree 14 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  6 5 6 4 -1.</_>\r
+                <_>\r
+                  5 6 6 2 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-0.0174371004104614</threshold>\r
+            <left_val>0.4614987075328827</left_val>\r
+            <right_val>-0.1050636023283005</right_val></_></_>\r
+        <_>\r
+          <!-- tree 15 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  29 5 4 5 -1.</_>\r
+                <_>\r
+                  30 6 2 5 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-0.0112803103402257</threshold>\r
+            <left_val>0.4561164975166321</left_val>\r
+            <right_val>-0.1013116016983986</right_val></_></_>\r
+        <_>\r
+          <!-- tree 16 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  7 5 5 4 -1.</_>\r
+                <_>\r
+                  6 6 5 2 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>7.0190089754760265e-003</threshold>\r
+            <left_val>-0.1368626952171326</left_val>\r
+            <right_val>0.4173265993595123</right_val></_></_>\r
+        <_>\r
+          <!-- tree 17 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 0 36 1 -1.</_>\r
+                <_>\r
+                  12 0 12 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-3.2439709175378084e-003</threshold>\r
+            <left_val>0.2321648001670837</left_val>\r
+            <right_val>-0.1791536957025528</right_val></_></_>\r
+        <_>\r
+          <!-- tree 18 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  6 3 24 6 -1.</_>\r
+                <_>\r
+                  14 5 8 2 9.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.3561589121818543</threshold>\r
+            <left_val>-0.0486268103122711</left_val>\r
+            <right_val>0.9537345767021179</right_val></_></_>\r
+        <_>\r
+          <!-- tree 19 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  15 12 6 3 -1.</_>\r
+                <_>\r
+                  15 13 6 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>3.8440749049186707e-003</threshold>\r
+            <left_val>-0.1028828024864197</left_val>\r
+            <right_val>0.3671778142452240</right_val></_></_>\r
+        <_>\r
+          <!-- tree 20 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  11 1 9 17 -1.</_>\r
+                <_>\r
+                  14 1 3 17 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0609500296413898</threshold>\r
+            <left_val>0.0561417415738106</left_val>\r
+            <right_val>-0.6458569765090942</right_val></_></_>\r
+        <_>\r
+          <!-- tree 21 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  18 1 18 10 -1.</_>\r
+                <_>\r
+                  18 1 9 10 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.1814922988414764</threshold>\r
+            <left_val>0.0308063905686140</left_val>\r
+            <right_val>-0.4604896008968353</right_val></_></_>\r
+        <_>\r
+          <!-- tree 22 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 1 18 10 -1.</_>\r
+                <_>\r
+                  9 1 9 10 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0923592597246170</threshold>\r
+            <left_val>-0.4524821043014526</left_val>\r
+            <right_val>0.0881522372364998</right_val></_></_>\r
+        <_>\r
+          <!-- tree 23 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  30 7 4 5 -1.</_>\r
+                <_>\r
+                  31 8 2 5 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>7.6072998344898224e-003</threshold>\r
+            <left_val>-0.0971223264932632</left_val>\r
+            <right_val>0.2155224978923798</right_val></_></_>\r
+        <_>\r
+          <!-- tree 24 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 10 1 3 -1.</_>\r
+                <_>\r
+                  0 11 1 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-4.6946710790507495e-004</threshold>\r
+            <left_val>-0.4089371860027313</left_val>\r
+            <right_val>0.0800421908497810</right_val></_></_>\r
+        <_>\r
+          <!-- tree 25 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  33 16 2 2 -1.</_>\r
+                <_>\r
+                  34 16 1 1 2.</_>\r
+                <_>\r
+                  33 17 1 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>1.0301820293534547e-004</threshold>\r
+            <left_val>-0.1153035983443260</left_val>\r
+            <right_val>0.2795535027980804</right_val></_></_>\r
+        <_>\r
+          <!-- tree 26 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  1 16 2 2 -1.</_>\r
+                <_>\r
+                  1 16 1 1 2.</_>\r
+                <_>\r
+                  2 17 1 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>2.7936851256527007e-004</threshold>\r
+            <left_val>-0.1139610037207604</left_val>\r
+            <right_val>0.2931660115718842</right_val></_></_>\r
+        <_>\r
+          <!-- tree 27 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 8 36 3 -1.</_>\r
+                <_>\r
+                  12 9 12 1 9.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.2467595934867859</threshold>\r
+            <left_val>-0.0385956317186356</left_val>\r
+            <right_val>0.8264998197555542</right_val></_></_>\r
+        <_>\r
+          <!-- tree 28 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  14 7 8 4 -1.</_>\r
+                <_>\r
+                  14 8 8 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-8.4232958033680916e-003</threshold>\r
+            <left_val>0.3299596905708313</left_val>\r
+            <right_val>-0.1164536997675896</right_val></_></_>\r
+        <_>\r
+          <!-- tree 29 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  17 9 5 3 -1.</_>\r
+                <_>\r
+                  17 10 5 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-4.2311567813158035e-003</threshold>\r
+            <left_val>0.2714211940765381</left_val>\r
+            <right_val>-0.1081148013472557</right_val></_></_>\r
+        <_>\r
+          <!-- tree 30 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  4 0 1 2 -1.</_>\r
+                <_>\r
+                  4 0 1 1 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>1.5653009759262204e-003</threshold>\r
+            <left_val>0.0782537832856178</left_val>\r
+            <right_val>-0.5209766030311585</right_val></_></_>\r
+        <_>\r
+          <!-- tree 31 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  31 0 3 2 -1.</_>\r
+                <_>\r
+                  31 0 3 1 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-5.0341398455202579e-003</threshold>\r
+            <left_val>0.2948805987834930</left_val>\r
+            <right_val>-0.0469605103135109</right_val></_></_>\r
+        <_>\r
+          <!-- tree 32 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  5 0 2 3 -1.</_>\r
+                <_>\r
+                  5 0 1 3 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>1.4283140189945698e-003</threshold>\r
+            <left_val>-0.1379459947347641</left_val>\r
+            <right_val>0.2432370930910111</right_val></_></_>\r
+        <_>\r
+          <!-- tree 33 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 13 36 5 -1.</_>\r
+                <_>\r
+                  0 13 18 5 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.1903136968612671</threshold>\r
+            <left_val>-0.0520935095846653</left_val>\r
+            <right_val>0.6870803236961365</right_val></_></_>\r
+        <_>\r
+          <!-- tree 34 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  6 3 4 3 -1.</_>\r
+                <_>\r
+                  5 4 4 1 3.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>8.1368777900934219e-003</threshold>\r
+            <left_val>-0.0533115193247795</left_val>\r
+            <right_val>0.5827271938323975</right_val></_></_>\r
+        <_>\r
+          <!-- tree 35 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  28 7 6 3 -1.</_>\r
+                <_>\r
+                  30 9 2 3 3.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-0.0467283688485622</threshold>\r
+            <left_val>0.3552536070346832</left_val>\r
+            <right_val>-0.0178062599152327</right_val></_></_>\r
+        <_>\r
+          <!-- tree 36 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  8 7 3 6 -1.</_>\r
+                <_>\r
+                  6 9 3 2 3.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>0.0143171697854996</threshold>\r
+            <left_val>-0.1262664049863815</left_val>\r
+            <right_val>0.2696101069450378</right_val></_></_>\r
+        <_>\r
+          <!-- tree 37 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  14 5 18 10 -1.</_>\r
+                <_>\r
+                  23 5 9 5 2.</_>\r
+                <_>\r
+                  14 10 9 5 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0961097329854965</threshold>\r
+            <left_val>0.3411748111248016</left_val>\r
+            <right_val>-0.0392176099121571</right_val></_></_>\r
+        <_>\r
+          <!-- tree 38 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  4 5 18 10 -1.</_>\r
+                <_>\r
+                  4 5 9 5 2.</_>\r
+                <_>\r
+                  13 10 9 5 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0748788118362427</threshold>\r
+            <left_val>-0.0648199021816254</left_val>\r
+            <right_val>0.5671138167381287</right_val></_></_>\r
+        <_>\r
+          <!-- tree 39 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  32 17 3 1 -1.</_>\r
+                <_>\r
+                  33 17 1 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-5.1972299843328074e-005</threshold>\r
+            <left_val>0.2874209880828857</left_val>\r
+            <right_val>-0.1642889976501465</right_val></_></_>\r
+        <_>\r
+          <!-- tree 40 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  1 17 3 1 -1.</_>\r
+                <_>\r
+                  2 17 1 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-2.0099039829801768e-004</threshold>\r
+            <left_val>0.2659021019935608</left_val>\r
+            <right_val>-0.1299035996198654</right_val></_></_>\r
+        <_>\r
+          <!-- tree 41 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  5 0 26 2 -1.</_>\r
+                <_>\r
+                  18 0 13 1 2.</_>\r
+                <_>\r
+                  5 1 13 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0155834900215268</threshold>\r
+            <left_val>0.0363226197659969</left_val>\r
+            <right_val>-0.8874331712722778</right_val></_></_>\r
+        <_>\r
+          <!-- tree 42 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 3 27 9 -1.</_>\r
+                <_>\r
+                  9 6 9 3 9.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>6.7313341423869133e-003</threshold>\r
+            <left_val>0.1628185957670212</left_val>\r
+            <right_val>-0.1971620023250580</right_val></_></_>\r
+        <_>\r
+          <!-- tree 43 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  13 0 18 12 -1.</_>\r
+                <_>\r
+                  13 6 18 6 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0452514104545116</threshold>\r
+            <left_val>-0.2031500935554504</left_val>\r
+            <right_val>0.1573408991098404</right_val></_></_>\r
+        <_>\r
+          <!-- tree 44 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 17 4 1 -1.</_>\r
+                <_>\r
+                  1 17 2 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>2.8729529003612697e-004</threshold>\r
+            <left_val>-0.1244959011673927</left_val>\r
+            <right_val>0.2565822899341583</right_val></_></_>\r
+        <_>\r
+          <!-- tree 45 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  29 13 1 3 -1.</_>\r
+                <_>\r
+                  28 14 1 1 3.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-2.1028579212725163e-003</threshold>\r
+            <left_val>-0.5088729262351990</left_val>\r
+            <right_val>0.0340831801295280</right_val></_></_>\r
+        <_>\r
+          <!-- tree 46 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 12 8 6 -1.</_>\r
+                <_>\r
+                  0 14 8 2 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-3.9328099228441715e-003</threshold>\r
+            <left_val>-0.3393375873565674</left_val>\r
+            <right_val>0.0930555686354637</right_val></_></_>\r
+        <_>\r
+          <!-- tree 47 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  23 7 3 3 -1.</_>\r
+                <_>\r
+                  24 7 1 3 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>3.1205590348690748e-003</threshold>\r
+            <left_val>-0.0227940604090691</left_val>\r
+            <right_val>0.2379353046417236</right_val></_></_>\r
+        <_>\r
+          <!-- tree 48 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  11 1 12 6 -1.</_>\r
+                <_>\r
+                  11 3 12 2 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0780286788940430</threshold>\r
+            <left_val>-0.0445036217570305</left_val>\r
+            <right_val>0.6776394248008728</right_val></_></_>\r
+        <_>\r
+          <!-- tree 49 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  5 10 26 8 -1.</_>\r
+                <_>\r
+                  18 10 13 4 2.</_>\r
+                <_>\r
+                  5 14 13 4 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0424769781529903</threshold>\r
+            <left_val>0.0925821065902710</left_val>\r
+            <right_val>-0.3536301851272583</right_val></_></_>\r
+        <_>\r
+          <!-- tree 50 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  11 12 9 6 -1.</_>\r
+                <_>\r
+                  14 12 3 6 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0257683005183935</threshold>\r
+            <left_val>-0.9091991186141968</left_val>\r
+            <right_val>0.0266928393393755</right_val></_></_>\r
+        <_>\r
+          <!-- tree 51 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  14 12 12 3 -1.</_>\r
+                <_>\r
+                  18 13 4 1 9.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0614446699619293</threshold>\r
+            <left_val>-0.0249543990939856</left_val>\r
+            <right_val>0.7212049961090088</right_val></_></_>\r
+        <_>\r
+          <!-- tree 52 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  10 12 12 3 -1.</_>\r
+                <_>\r
+                  14 13 4 1 9.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>3.5776318982243538e-003</threshold>\r
+            <left_val>0.1772899031639099</left_val>\r
+            <right_val>-0.1972344964742661</right_val></_></_></trees>\r
+      <stage_threshold>-1.2766569852828979</stage_threshold>\r
+      <parent>13</parent>\r
+      <next>-1</next></_>\r
+    <_>\r
+      <!-- stage 15 -->\r
+      <trees>\r
+        <_>\r
+          <!-- tree 0 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  4 6 27 6 -1.</_>\r
+                <_>\r
+                  13 8 9 2 9.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.2858596146106720</threshold>\r
+            <left_val>-0.1539604961872101</left_val>\r
+            <right_val>0.6624677181243897</right_val></_></_>\r
+        <_>\r
+          <!-- tree 1 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  17 9 5 4 -1.</_>\r
+                <_>\r
+                  17 10 5 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>9.2271259054541588e-003</threshold>\r
+            <left_val>-0.1074633970856667</left_val>\r
+            <right_val>0.4311806857585907</right_val></_></_>\r
+        <_>\r
+          <!-- tree 2 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 0 16 2 -1.</_>\r
+                <_>\r
+                  0 0 8 1 2.</_>\r
+                <_>\r
+                  8 1 8 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>2.2924109362065792e-003</threshold>\r
+            <left_val>-0.1983013004064560</left_val>\r
+            <right_val>0.3842228949069977</right_val></_></_>\r
+        <_>\r
+          <!-- tree 3 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  22 0 8 8 -1.</_>\r
+                <_>\r
+                  26 0 4 4 2.</_>\r
+                <_>\r
+                  22 4 4 4 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0140045098960400</threshold>\r
+            <left_val>-0.1924948990345001</left_val>\r
+            <right_val>0.3442491888999939</right_val></_></_>\r
+        <_>\r
+          <!-- tree 4 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  1 0 32 12 -1.</_>\r
+                <_>\r
+                  1 0 16 6 2.</_>\r
+                <_>\r
+                  17 6 16 6 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0960232019424438</threshold>\r
+            <left_val>0.1299059987068176</left_val>\r
+            <right_val>-0.6065304875373840</right_val></_></_>\r
+        <_>\r
+          <!-- tree 5 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  28 7 6 10 -1.</_>\r
+                <_>\r
+                  31 7 3 5 2.</_>\r
+                <_>\r
+                  28 12 3 5 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>6.1803720891475677e-003</threshold>\r
+            <left_val>-0.1904646009206772</left_val>\r
+            <right_val>0.1891862004995346</right_val></_></_>\r
+        <_>\r
+          <!-- tree 6 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  2 7 6 10 -1.</_>\r
+                <_>\r
+                  2 7 3 5 2.</_>\r
+                <_>\r
+                  5 12 3 5 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>8.2172285765409470e-003</threshold>\r
+            <left_val>-0.2518267929553986</left_val>\r
+            <right_val>0.2664459049701691</right_val></_></_>\r
+        <_>\r
+          <!-- tree 7 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  20 10 3 3 -1.</_>\r
+                <_>\r
+                  20 11 3 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-1.4542760327458382e-003</threshold>\r
+            <left_val>0.2710269093513489</left_val>\r
+            <right_val>-0.1204148977994919</right_val></_></_>\r
+        <_>\r
+          <!-- tree 8 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  13 10 3 3 -1.</_>\r
+                <_>\r
+                  13 11 3 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>3.0185449868440628e-003</threshold>\r
+            <left_val>-0.1353860944509506</left_val>\r
+            <right_val>0.4733603000640869</right_val></_></_>\r
+        <_>\r
+          <!-- tree 9 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  17 16 6 2 -1.</_>\r
+                <_>\r
+                  19 16 2 2 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-3.4214779734611511e-003</threshold>\r
+            <left_val>-0.5049971938133240</left_val>\r
+            <right_val>0.1042480990290642</right_val></_></_>\r
+        <_>\r
+          <!-- tree 10 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  13 11 7 3 -1.</_>\r
+                <_>\r
+                  13 12 7 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>9.5980763435363770e-003</threshold>\r
+            <left_val>-0.1034729033708572</left_val>\r
+            <right_val>0.5837283730506897</right_val></_></_>\r
+        <_>\r
+          <!-- tree 11 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  25 13 3 2 -1.</_>\r
+                <_>\r
+                  25 13 3 1 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>4.1849957779049873e-003</threshold>\r
+            <left_val>0.0588967092335224</left_val>\r
+            <right_val>-0.4623228907585144</right_val></_></_>\r
+        <_>\r
+          <!-- tree 12 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  13 10 4 4 -1.</_>\r
+                <_>\r
+                  13 11 4 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-4.6107750385999680e-003</threshold>\r
+            <left_val>0.3783561885356903</left_val>\r
+            <right_val>-0.1259022951126099</right_val></_></_>\r
+        <_>\r
+          <!-- tree 13 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  17 16 18 2 -1.</_>\r
+                <_>\r
+                  26 16 9 1 2.</_>\r
+                <_>\r
+                  17 17 9 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>2.8978679329156876e-003</threshold>\r
+            <left_val>-0.1369954943656921</left_val>\r
+            <right_val>0.2595148086547852</right_val></_></_>\r
+        <_>\r
+          <!-- tree 14 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  9 13 4 1 -1.</_>\r
+                <_>\r
+                  9 13 2 1 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>4.2606070637702942e-003</threshold>\r
+            <left_val>0.0882339626550674</left_val>\r
+            <right_val>-0.6390284895896912</right_val></_></_>\r
+        <_>\r
+          <!-- tree 15 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  34 1 2 1 -1.</_>\r
+                <_>\r
+                  34 1 1 1 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-4.2996238917112350e-003</threshold>\r
+            <left_val>-0.7953972816467285</left_val>\r
+            <right_val>0.0170935597270727</right_val></_></_>\r
+        <_>\r
+          <!-- tree 16 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  5 4 24 6 -1.</_>\r
+                <_>\r
+                  13 6 8 2 9.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.3542361855506897</threshold>\r
+            <left_val>-0.0593450404703617</left_val>\r
+            <right_val>0.8557919859886169</right_val></_></_>\r
+        <_>\r
+          <!-- tree 17 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  33 16 3 2 -1.</_>\r
+                <_>\r
+                  33 17 3 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-3.0245838570408523e-004</threshold>\r
+            <left_val>0.3147065043449402</left_val>\r
+            <right_val>-0.1448609977960587</right_val></_></_>\r
+        <_>\r
+          <!-- tree 18 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 17 36 1 -1.</_>\r
+                <_>\r
+                  18 17 18 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0271694902330637</threshold>\r
+            <left_val>-0.1249295026063919</left_val>\r
+            <right_val>0.4280903935432434</right_val></_></_>\r
+        <_>\r
+          <!-- tree 19 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  34 1 2 1 -1.</_>\r
+                <_>\r
+                  34 1 1 1 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>3.4571529831737280e-003</threshold>\r
+            <left_val>0.0397093296051025</left_val>\r
+            <right_val>-0.7089157104492188</right_val></_></_>\r
+        <_>\r
+          <!-- tree 20 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  2 1 1 2 -1.</_>\r
+                <_>\r
+                  2 1 1 1 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>2.1742798853665590e-003</threshold>\r
+            <left_val>0.0658724531531334</left_val>\r
+            <right_val>-0.6949694156646729</right_val></_></_>\r
+        <_>\r
+          <!-- tree 21 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  22 0 8 10 -1.</_>\r
+                <_>\r
+                  24 2 4 10 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>0.0252638105303049</threshold>\r
+            <left_val>-0.1169395968317986</left_val>\r
+            <right_val>0.1904976963996887</right_val></_></_>\r
+        <_>\r
+          <!-- tree 22 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  12 4 8 12 -1.</_>\r
+                <_>\r
+                  12 4 4 6 2.</_>\r
+                <_>\r
+                  16 10 4 6 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0247209891676903</threshold>\r
+            <left_val>-0.4965795874595642</left_val>\r
+            <right_val>0.1017538011074066</right_val></_></_>\r
+        <_>\r
+          <!-- tree 23 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  26 6 6 6 -1.</_>\r
+                <_>\r
+                  29 6 3 3 2.</_>\r
+                <_>\r
+                  26 9 3 3 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0103848800063133</threshold>\r
+            <left_val>-0.1148673966526985</left_val>\r
+            <right_val>0.3374153077602387</right_val></_></_>\r
+        <_>\r
+          <!-- tree 24 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  5 6 4 6 -1.</_>\r
+                <_>\r
+                  5 6 2 3 2.</_>\r
+                <_>\r
+                  7 9 2 3 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>5.0045028328895569e-003</threshold>\r
+            <left_val>-0.1096355020999908</left_val>\r
+            <right_val>0.3925519883632660</right_val></_></_>\r
+        <_>\r
+          <!-- tree 25 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  29 5 2 4 -1.</_>\r
+                <_>\r
+                  29 5 1 4 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>7.1279620751738548e-003</threshold>\r
+            <left_val>-0.0649081915616989</left_val>\r
+            <right_val>0.4042040109634399</right_val></_></_>\r
+        <_>\r
+          <!-- tree 26 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  7 4 18 3 -1.</_>\r
+                <_>\r
+                  7 5 18 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0197004191577435</threshold>\r
+            <left_val>-0.0793758779764175</left_val>\r
+            <right_val>0.5308234095573425</right_val></_></_>\r
+        <_>\r
+          <!-- tree 27 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  29 13 2 3 -1.</_>\r
+                <_>\r
+                  28 14 2 1 3.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>4.2097331024706364e-003</threshold>\r
+            <left_val>0.0407970212399960</left_val>\r
+            <right_val>-0.6044098734855652</right_val></_></_>\r
+        <_>\r
+          <!-- tree 28 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  9 5 3 3 -1.</_>\r
+                <_>\r
+                  8 6 3 1 3.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>4.4459570199251175e-003</threshold>\r
+            <left_val>-0.1038623005151749</left_val>\r
+            <right_val>0.4093598127365112</right_val></_></_>\r
+        <_>\r
+          <!-- tree 29 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  7 16 22 2 -1.</_>\r
+                <_>\r
+                  18 16 11 1 2.</_>\r
+                <_>\r
+                  7 17 11 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-5.9610428288578987e-003</threshold>\r
+            <left_val>-0.5291494727134705</left_val>\r
+            <right_val>0.0805394500494003</right_val></_></_>\r
+        <_>\r
+          <!-- tree 30 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 2 1 3 -1.</_>\r
+                <_>\r
+                  0 3 1 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>5.7519221445545554e-004</threshold>\r
+            <left_val>0.0638044029474258</left_val>\r
+            <right_val>-0.5863661766052246</right_val></_></_>\r
+        <_>\r
+          <!-- tree 31 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  16 3 20 6 -1.</_>\r
+                <_>\r
+                  26 3 10 3 2.</_>\r
+                <_>\r
+                  16 6 10 3 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0605248510837555</threshold>\r
+            <left_val>-0.0337128005921841</left_val>\r
+            <right_val>0.2631115913391113</right_val></_></_>\r
+        <_>\r
+          <!-- tree 32 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  10 5 8 6 -1.</_>\r
+                <_>\r
+                  12 5 4 6 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0103538101539016</threshold>\r
+            <left_val>-0.4792002141475678</left_val>\r
+            <right_val>0.0800439566373825</right_val></_></_>\r
+        <_>\r
+          <!-- tree 33 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  1 8 34 8 -1.</_>\r
+                <_>\r
+                  18 8 17 4 2.</_>\r
+                <_>\r
+                  1 12 17 4 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0227775108069181</threshold>\r
+            <left_val>-0.3116275072097778</left_val>\r
+            <right_val>0.1189998015761375</right_val></_></_>\r
+        <_>\r
+          <!-- tree 34 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  14 9 8 8 -1.</_>\r
+                <_>\r
+                  14 9 4 4 2.</_>\r
+                <_>\r
+                  18 13 4 4 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0224688798189163</threshold>\r
+            <left_val>-0.6608346104621887</left_val>\r
+            <right_val>0.0522344894707203</right_val></_></_>\r
+        <_>\r
+          <!-- tree 35 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  35 0 1 3 -1.</_>\r
+                <_>\r
+                  35 1 1 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>5.8432162040844560e-004</threshold>\r
+            <left_val>0.0546303391456604</left_val>\r
+            <right_val>-0.4639565944671631</right_val></_></_>\r
+        <_>\r
+          <!-- tree 36 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  15 8 3 5 -1.</_>\r
+                <_>\r
+                  16 8 1 5 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-3.6177870351821184e-003</threshold>\r
+            <left_val>0.6744704246520996</left_val>\r
+            <right_val>-0.0587895289063454</right_val></_></_>\r
+        <_>\r
+          <!-- tree 37 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  19 0 10 1 -1.</_>\r
+                <_>\r
+                  19 0 5 1 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>0.0300888605415821</threshold>\r
+            <left_val>0.0331335216760635</left_val>\r
+            <right_val>-0.4646137058734894</right_val></_></_></trees>\r
+      <stage_threshold>-1.4061349630355835</stage_threshold>\r
+      <parent>14</parent>\r
+      <next>-1</next></_>\r
+    <_>\r
+      <!-- stage 16 -->\r
+      <trees>\r
+        <_>\r
+          <!-- tree 0 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  9 3 9 6 -1.</_>\r
+                <_>\r
+                  7 5 9 2 3.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-0.0726009905338287</threshold>\r
+            <left_val>0.6390709280967712</left_val>\r
+            <right_val>-0.1512455046176910</right_val></_></_>\r
+        <_>\r
+          <!-- tree 1 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  6 6 24 6 -1.</_>\r
+                <_>\r
+                  14 8 8 2 9.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.3471255898475647</threshold>\r
+            <left_val>-0.0790246576070786</left_val>\r
+            <right_val>0.7955042123794556</right_val></_></_>\r
+        <_>\r
+          <!-- tree 2 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  4 8 27 6 -1.</_>\r
+                <_>\r
+                  13 10 9 2 9.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.3429723083972931</threshold>\r
+            <left_val>-0.1230095997452736</left_val>\r
+            <right_val>0.6572809815406799</right_val></_></_>\r
+        <_>\r
+          <!-- tree 3 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  5 4 27 6 -1.</_>\r
+                <_>\r
+                  14 6 9 2 9.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.3561694025993347</threshold>\r
+            <left_val>-0.0537334382534027</left_val>\r
+            <right_val>0.8285108208656311</right_val></_></_>\r
+        <_>\r
+          <!-- tree 4 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  5 6 5 6 -1.</_>\r
+                <_>\r
+                  5 8 5 2 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>6.0840700753033161e-003</threshold>\r
+            <left_val>-0.1284721046686173</left_val>\r
+            <right_val>0.3382267951965332</right_val></_></_>\r
+        <_>\r
+          <!-- tree 5 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  35 0 1 2 -1.</_>\r
+                <_>\r
+                  35 1 1 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-1.6281309945043176e-004</threshold>\r
+            <left_val>0.3035660982131958</left_val>\r
+            <right_val>-0.2518202960491180</right_val></_></_>\r
+        <_>\r
+          <!-- tree 6 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  4 3 10 3 -1.</_>\r
+                <_>\r
+                  3 4 10 1 3.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>0.0112819001078606</threshold>\r
+            <left_val>-0.0839143469929695</left_val>\r
+            <right_val>0.4347592890262604</right_val></_></_>\r
+        <_>\r
+          <!-- tree 7 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  29 5 2 4 -1.</_>\r
+                <_>\r
+                  29 5 1 4 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>7.4357059784233570e-003</threshold>\r
+            <left_val>-0.0670880377292633</left_val>\r
+            <right_val>0.3722797930240631</right_val></_></_>\r
+        <_>\r
+          <!-- tree 8 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  3 0 28 16 -1.</_>\r
+                <_>\r
+                  3 0 14 8 2.</_>\r
+                <_>\r
+                  17 8 14 8 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0905762165784836</threshold>\r
+            <left_val>-0.5831961035728455</left_val>\r
+            <right_val>0.0801467597484589</right_val></_></_>\r
+        <_>\r
+          <!-- tree 9 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  31 0 4 2 -1.</_>\r
+                <_>\r
+                  31 0 2 2 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>8.8247694075107574e-003</threshold>\r
+            <left_val>0.1290193051099777</left_val>\r
+            <right_val>-0.4760313034057617</right_val></_></_>\r
+        <_>\r
+          <!-- tree 10 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  4 9 3 9 -1.</_>\r
+                <_>\r
+                  4 12 3 3 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-2.6147770695388317e-003</threshold>\r
+            <left_val>-0.4000220894813538</left_val>\r
+            <right_val>0.1124631017446518</right_val></_></_>\r
+        <_>\r
+          <!-- tree 11 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  32 16 4 2 -1.</_>\r
+                <_>\r
+                  32 17 4 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-2.5541300419718027e-004</threshold>\r
+            <left_val>0.3238615989685059</left_val>\r
+            <right_val>-0.2333187013864517</right_val></_></_>\r
+        <_>\r
+          <!-- tree 12 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  17 0 1 10 -1.</_>\r
+                <_>\r
+                  17 0 1 5 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>0.0265476293861866</threshold>\r
+            <left_val>0.0723338723182678</left_val>\r
+            <right_val>-0.5837839841842651</right_val></_></_>\r
+        <_>\r
+          <!-- tree 13 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  17 4 14 8 -1.</_>\r
+                <_>\r
+                  17 4 7 8 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0513831414282322</threshold>\r
+            <left_val>-0.2244618982076645</left_val>\r
+            <right_val>0.0409497395157814</right_val></_></_>\r
+        <_>\r
+          <!-- tree 14 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  6 0 11 4 -1.</_>\r
+                <_>\r
+                  6 2 11 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>3.3701129723340273e-003</threshold>\r
+            <left_val>-0.1671708971261978</left_val>\r
+            <right_val>0.2552697062492371</right_val></_></_>\r
+        <_>\r
+          <!-- tree 15 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  35 0 1 2 -1.</_>\r
+                <_>\r
+                  35 1 1 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-2.2581920493394136e-003</threshold>\r
+            <left_val>-0.9207922816276550</left_val>\r
+            <right_val>3.4371060319244862e-003</right_val></_></_>\r
+        <_>\r
+          <!-- tree 16 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 0 1 2 -1.</_>\r
+                <_>\r
+                  0 1 1 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-1.3282749569043517e-004</threshold>\r
+            <left_val>0.1857322007417679</left_val>\r
+            <right_val>-0.2249896973371506</right_val></_></_>\r
+        <_>\r
+          <!-- tree 17 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  33 0 2 1 -1.</_>\r
+                <_>\r
+                  33 0 1 1 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-2.8032590635120869e-003</threshold>\r
+            <left_val>-0.8589754104614258</left_val>\r
+            <right_val>0.0463845208287239</right_val></_></_>\r
+        <_>\r
+          <!-- tree 18 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  3 0 1 2 -1.</_>\r
+                <_>\r
+                  3 0 1 1 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>1.3141379458829761e-003</threshold>\r
+            <left_val>0.0796270668506622</left_val>\r
+            <right_val>-0.4610596895217896</right_val></_></_>\r
+        <_>\r
+          <!-- tree 19 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 17 36 1 -1.</_>\r
+                <_>\r
+                  9 17 18 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0638845413923264</threshold>\r
+            <left_val>-0.0534401498734951</left_val>\r
+            <right_val>0.8104500174522400</right_val></_></_>\r
+        <_>\r
+          <!-- tree 20 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  7 13 3 1 -1.</_>\r
+                <_>\r
+                  8 14 1 1 3.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-1.9811019301414490e-003</threshold>\r
+            <left_val>-0.6382514834403992</left_val>\r
+            <right_val>0.0766435563564301</right_val></_></_>\r
+        <_>\r
+          <!-- tree 21 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  17 4 14 8 -1.</_>\r
+                <_>\r
+                  17 4 7 8 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0133598595857620</threshold>\r
+            <left_val>-0.0950375497341156</left_val>\r
+            <right_val>0.0625333487987518</right_val></_></_>\r
+        <_>\r
+          <!-- tree 22 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 16 4 2 -1.</_>\r
+                <_>\r
+                  0 17 4 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-1.0935300088021904e-004</threshold>\r
+            <left_val>0.1747954040765762</left_val>\r
+            <right_val>-0.2287603020668030</right_val></_></_>\r
+        <_>\r
+          <!-- tree 23 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  13 12 10 3 -1.</_>\r
+                <_>\r
+                  13 13 10 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0119106303900480</threshold>\r
+            <left_val>-0.0770419836044312</left_val>\r
+            <right_val>0.5045837759971619</right_val></_></_>\r
+        <_>\r
+          <!-- tree 24 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 12 36 6 -1.</_>\r
+                <_>\r
+                  18 12 18 6 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.2395170032978058</threshold>\r
+            <left_val>-0.0651228874921799</left_val>\r
+            <right_val>0.5042074918746948</right_val></_></_>\r
+        <_>\r
+          <!-- tree 25 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  5 3 27 6 -1.</_>\r
+                <_>\r
+                  14 5 9 2 9.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.3983140885829926</threshold>\r
+            <left_val>-0.0299998205155134</left_val>\r
+            <right_val>0.7968547940254211</right_val></_></_>\r
+        <_>\r
+          <!-- tree 26 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  9 5 5 3 -1.</_>\r
+                <_>\r
+                  8 6 5 1 3.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>6.1875800602138042e-003</threshold>\r
+            <left_val>-0.0853391736745834</left_val>\r
+            <right_val>0.3945176899433136</right_val></_></_>\r
+        <_>\r
+          <!-- tree 27 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  12 7 12 4 -1.</_>\r
+                <_>\r
+                  15 7 6 4 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-9.4047123566269875e-003</threshold>\r
+            <left_val>-0.4344133138656616</left_val>\r
+            <right_val>0.0826191008090973</right_val></_></_>\r
+        <_>\r
+          <!-- tree 28 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  13 5 8 4 -1.</_>\r
+                <_>\r
+                  15 5 4 4 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0117366304621100</threshold>\r
+            <left_val>0.0694831609725952</left_val>\r
+            <right_val>-0.4870649874210358</right_val></_></_>\r
+        <_>\r
+          <!-- tree 29 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  16 14 6 4 -1.</_>\r
+                <_>\r
+                  16 14 3 4 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0151767702773213</threshold>\r
+            <left_val>-0.5854120850563049</left_val>\r
+            <right_val>0.0328795611858368</right_val></_></_>\r
+        <_>\r
+          <!-- tree 30 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  14 10 5 3 -1.</_>\r
+                <_>\r
+                  14 11 5 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>3.0744259711354971e-003</threshold>\r
+            <left_val>-0.1314608007669449</left_val>\r
+            <right_val>0.2546674013137817</right_val></_></_>\r
+        <_>\r
+          <!-- tree 31 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  25 3 6 4 -1.</_>\r
+                <_>\r
+                  25 4 6 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>2.9391339048743248e-003</threshold>\r
+            <left_val>-0.1086023002862930</left_val>\r
+            <right_val>0.2783496081829071</right_val></_></_>\r
+        <_>\r
+          <!-- tree 32 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  3 6 6 8 -1.</_>\r
+                <_>\r
+                  3 8 6 4 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>2.1510310471057892e-003</threshold>\r
+            <left_val>-0.1575057953596115</left_val>\r
+            <right_val>0.2087786048650742</right_val></_></_>\r
+        <_>\r
+          <!-- tree 33 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  27 4 5 6 -1.</_>\r
+                <_>\r
+                  27 6 5 2 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>5.3775361739099026e-003</threshold>\r
+            <left_val>-0.1320703029632568</left_val>\r
+            <right_val>0.3767293989658356</right_val></_></_>\r
+        <_>\r
+          <!-- tree 34 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  4 1 6 9 -1.</_>\r
+                <_>\r
+                  4 4 6 3 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0221741795539856</threshold>\r
+            <left_val>-0.0901802927255630</left_val>\r
+            <right_val>0.4157527089118958</right_val></_></_>\r
+        <_>\r
+          <!-- tree 35 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  21 9 2 4 -1.</_>\r
+                <_>\r
+                  21 10 2 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-1.9948610570281744e-003</threshold>\r
+            <left_val>0.2560858130455017</left_val>\r
+            <right_val>-0.0990849286317825</right_val></_></_>\r
+        <_>\r
+          <!-- tree 36 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  1 10 34 4 -1.</_>\r
+                <_>\r
+                  1 10 17 2 2.</_>\r
+                <_>\r
+                  18 12 17 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0315575599670410</threshold>\r
+            <left_val>0.0741889998316765</left_val>\r
+            <right_val>-0.5494022965431213</right_val></_></_>\r
+        <_>\r
+          <!-- tree 37 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  34 15 2 3 -1.</_>\r
+                <_>\r
+                  34 16 2 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-4.3111158447572961e-005</threshold>\r
+            <left_val>0.3032462894916534</left_val>\r
+            <right_val>-0.1778181046247482</right_val></_></_>\r
+        <_>\r
+          <!-- tree 38 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  3 0 2 2 -1.</_>\r
+                <_>\r
+                  3 0 2 1 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-3.2675920519977808e-003</threshold>\r
+            <left_val>-0.6721243262290955</left_val>\r
+            <right_val>0.0591883286833763</right_val></_></_>\r
+        <_>\r
+          <!-- tree 39 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  33 0 1 2 -1.</_>\r
+                <_>\r
+                  33 0 1 1 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>4.2293380829505622e-004</threshold>\r
+            <left_val>-0.1103409975767136</left_val>\r
+            <right_val>0.1257317960262299</right_val></_></_></trees>\r
+      <stage_threshold>-1.3384460210800171</stage_threshold>\r
+      <parent>15</parent>\r
+      <next>-1</next></_>\r
+    <_>\r
+      <!-- stage 17 -->\r
+      <trees>\r
+        <_>\r
+          <!-- tree 0 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  8 0 10 8 -1.</_>\r
+                <_>\r
+                  6 2 10 4 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-0.0425620190799236</threshold>\r
+            <left_val>0.3334665894508362</left_val>\r
+            <right_val>-0.2986198067665100</right_val></_></_>\r
+        <_>\r
+          <!-- tree 1 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  3 6 30 6 -1.</_>\r
+                <_>\r
+                  13 8 10 2 9.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.4182719886302948</threshold>\r
+            <left_val>-0.0951386988162994</left_val>\r
+            <right_val>0.7570992112159729</right_val></_></_>\r
+        <_>\r
+          <!-- tree 2 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  13 7 10 4 -1.</_>\r
+                <_>\r
+                  13 8 10 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0202563796192408</threshold>\r
+            <left_val>0.4778389036655426</left_val>\r
+            <right_val>-0.1459210067987442</right_val></_></_>\r
+        <_>\r
+          <!-- tree 3 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  16 5 6 12 -1.</_>\r
+                <_>\r
+                  19 5 3 6 2.</_>\r
+                <_>\r
+                  16 11 3 6 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0189483091235161</threshold>\r
+            <left_val>-0.3872750103473663</left_val>\r
+            <right_val>0.0524798892438412</right_val></_></_>\r
+        <_>\r
+          <!-- tree 4 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  10 1 4 6 -1.</_>\r
+                <_>\r
+                  8 3 4 2 3.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-0.0405505895614624</threshold>\r
+            <left_val>0.5464624762535095</left_val>\r
+            <right_val>-0.0813998579978943</right_val></_></_>\r
+        <_>\r
+          <!-- tree 5 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  2 7 33 6 -1.</_>\r
+                <_>\r
+                  13 9 11 2 9.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.5187274813652039</threshold>\r
+            <left_val>-0.0279305391013622</left_val>\r
+            <right_val>0.8458098173141480</right_val></_></_>\r
+        <_>\r
+          <!-- tree 6 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  3 6 30 3 -1.</_>\r
+                <_>\r
+                  13 7 10 1 9.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.2071361988782883</threshold>\r
+            <left_val>-0.0588508695363998</left_val>\r
+            <right_val>0.7960156202316284</right_val></_></_>\r
+        <_>\r
+          <!-- tree 7 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  15 11 6 3 -1.</_>\r
+                <_>\r
+                  15 12 6 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>8.1972572952508926e-003</threshold>\r
+            <left_val>-0.0999663695693016</left_val>\r
+            <right_val>0.4983156025409699</right_val></_></_>\r
+        <_>\r
+          <!-- tree 8 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  14 5 6 12 -1.</_>\r
+                <_>\r
+                  14 5 3 6 2.</_>\r
+                <_>\r
+                  17 11 3 6 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0174453891813755</threshold>\r
+            <left_val>0.0680409595370293</left_val>\r
+            <right_val>-0.5669981837272644</right_val></_></_>\r
+        <_>\r
+          <!-- tree 9 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  5 12 26 6 -1.</_>\r
+                <_>\r
+                  18 12 13 3 2.</_>\r
+                <_>\r
+                  5 15 13 3 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0563102811574936</threshold>\r
+            <left_val>-0.6862804293632507</left_val>\r
+            <right_val>0.0742225572466850</right_val></_></_>\r
+        <_>\r
+          <!-- tree 10 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  4 12 27 3 -1.</_>\r
+                <_>\r
+                  13 13 9 1 9.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.1809556037187576</threshold>\r
+            <left_val>-0.0528081282973289</left_val>\r
+            <right_val>0.8448318243026733</right_val></_></_>\r
+        <_>\r
+          <!-- tree 11 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  16 11 4 3 -1.</_>\r
+                <_>\r
+                  16 12 4 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-2.3450690787285566e-003</threshold>\r
+            <left_val>0.2839694023132324</left_val>\r
+            <right_val>-0.1112336963415146</right_val></_></_>\r
+        <_>\r
+          <!-- tree 12 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  5 12 4 2 -1.</_>\r
+                <_>\r
+                  6 13 2 2 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>3.8937770295888186e-003</threshold>\r
+            <left_val>0.0654993131756783</left_val>\r
+            <right_val>-0.5792096257209778</right_val></_></_>\r
+        <_>\r
+          <!-- tree 13 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  34 17 2 1 -1.</_>\r
+                <_>\r
+                  34 17 1 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>3.9383721741614863e-005</threshold>\r
+            <left_val>-0.3093047142028809</left_val>\r
+            <right_val>0.4223710894584656</right_val></_></_>\r
+        <_>\r
+          <!-- tree 14 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  16 0 1 12 -1.</_>\r
+                <_>\r
+                  16 0 1 6 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>0.0338991582393646</threshold>\r
+            <left_val>0.0307075399905443</left_val>\r
+            <right_val>-0.7229980826377869</right_val></_></_>\r
+        <_>\r
+          <!-- tree 15 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  2 17 34 1 -1.</_>\r
+                <_>\r
+                  2 17 17 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0336443893611431</threshold>\r
+            <left_val>0.4266444146633148</left_val>\r
+            <right_val>-0.0720057785511017</right_val></_></_>\r
+        <_>\r
+          <!-- tree 16 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  5 3 18 4 -1.</_>\r
+                <_>\r
+                  5 4 18 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0388077609241009</threshold>\r
+            <left_val>-0.0417135208845139</left_val>\r
+            <right_val>0.6599556803703308</right_val></_></_>\r
+        <_>\r
+          <!-- tree 17 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  34 17 2 1 -1.</_>\r
+                <_>\r
+                  34 17 1 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-3.9149548683781177e-005</threshold>\r
+            <left_val>0.4933550059795380</left_val>\r
+            <right_val>-0.2426010966300964</right_val></_></_>\r
+        <_>\r
+          <!-- tree 18 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 0 2 2 -1.</_>\r
+                <_>\r
+                  0 1 2 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-2.7580570895224810e-004</threshold>\r
+            <left_val>0.1791010946035385</left_val>\r
+            <right_val>-0.2192519009113312</right_val></_></_>\r
+        <_>\r
+          <!-- tree 19 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  15 5 16 3 -1.</_>\r
+                <_>\r
+                  15 6 16 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0126366596668959</threshold>\r
+            <left_val>-0.0712336227297783</left_val>\r
+            <right_val>0.2534261941909790</right_val></_></_>\r
+        <_>\r
+          <!-- tree 20 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  13 9 3 3 -1.</_>\r
+                <_>\r
+                  13 10 3 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-3.3681739587336779e-003</threshold>\r
+            <left_val>0.3310086131095886</left_val>\r
+            <right_val>-0.1020777970552445</right_val></_></_>\r
+        <_>\r
+          <!-- tree 21 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  20 4 8 14 -1.</_>\r
+                <_>\r
+                  22 4 4 14 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0411845296621323</threshold>\r
+            <left_val>-0.4787198901176453</left_val>\r
+            <right_val>0.0274448096752167</right_val></_></_>\r
+        <_>\r
+          <!-- tree 22 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  7 5 20 6 -1.</_>\r
+                <_>\r
+                  12 5 10 6 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0172852799296379</threshold>\r
+            <left_val>-0.2373382002115250</left_val>\r
+            <right_val>0.1541430056095123</right_val></_></_>\r
+        <_>\r
+          <!-- tree 23 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  26 3 6 6 -1.</_>\r
+                <_>\r
+                  28 5 2 6 3.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-0.0583733208477497</threshold>\r
+            <left_val>0.3635525107383728</left_val>\r
+            <right_val>-0.0629119277000427</right_val></_></_>\r
+        <_>\r
+          <!-- tree 24 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  10 3 6 6 -1.</_>\r
+                <_>\r
+                  8 5 6 2 3.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>0.0252293199300766</threshold>\r
+            <left_val>-0.0943458229303360</left_val>\r
+            <right_val>0.4322442114353180</right_val></_></_>\r
+        <_>\r
+          <!-- tree 25 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  34 0 2 3 -1.</_>\r
+                <_>\r
+                  34 0 1 3 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>4.7925519756972790e-003</threshold>\r
+            <left_val>0.0486642718315125</left_val>\r
+            <right_val>-0.4704689085483551</right_val></_></_>\r
+        <_>\r
+          <!-- tree 26 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 16 2 2 -1.</_>\r
+                <_>\r
+                  0 17 2 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-1.3549529830925167e-004</threshold>\r
+            <left_val>0.1936188042163849</left_val>\r
+            <right_val>-0.1933847069740295</right_val></_></_>\r
+        <_>\r
+          <!-- tree 27 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  30 6 4 8 -1.</_>\r
+                <_>\r
+                  31 7 2 8 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-0.0179694108664989</threshold>\r
+            <left_val>0.2900086045265198</left_val>\r
+            <right_val>-0.0545452795922756</right_val></_></_>\r
+        <_>\r
+          <!-- tree 28 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  6 6 7 4 -1.</_>\r
+                <_>\r
+                  5 7 7 2 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>0.0111410403624177</threshold>\r
+            <left_val>-0.1080225035548210</left_val>\r
+            <right_val>0.3332796096801758</right_val></_></_>\r
+        <_>\r
+          <!-- tree 29 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  20 4 8 14 -1.</_>\r
+                <_>\r
+                  22 4 4 14 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0397595092654228</threshold>\r
+            <left_val>0.0192408692091703</left_val>\r
+            <right_val>-0.4889996051788330</right_val></_></_>\r
+        <_>\r
+          <!-- tree 30 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  8 4 8 14 -1.</_>\r
+                <_>\r
+                  10 4 4 14 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0226527098566294</threshold>\r
+            <left_val>-0.5036928057670593</left_val>\r
+            <right_val>0.0807737335562706</right_val></_></_>\r
+        <_>\r
+          <!-- tree 31 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  17 17 6 1 -1.</_>\r
+                <_>\r
+                  19 17 2 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>1.0915650054812431e-003</threshold>\r
+            <left_val>0.0655540525913239</left_val>\r
+            <right_val>-0.2444387972354889</right_val></_></_>\r
+        <_>\r
+          <!-- tree 32 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 0 20 6 -1.</_>\r
+                <_>\r
+                  10 0 10 6 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0687547475099564</threshold>\r
+            <left_val>0.0891968086361885</left_val>\r
+            <right_val>-0.3565390110015869</right_val></_></_>\r
+        <_>\r
+          <!-- tree 33 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  8 0 22 18 -1.</_>\r
+                <_>\r
+                  8 0 11 18 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.3307105898857117</threshold>\r
+            <left_val>0.4649569988250732</left_val>\r
+            <right_val>-0.0581836998462677</right_val></_></_>\r
+        <_>\r
+          <!-- tree 34 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  13 2 8 12 -1.</_>\r
+                <_>\r
+                  13 2 4 6 2.</_>\r
+                <_>\r
+                  17 8 4 6 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0193072296679020</threshold>\r
+            <left_val>-0.4415718019008637</left_val>\r
+            <right_val>0.0830501168966293</right_val></_></_>\r
+        <_>\r
+          <!-- tree 35 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  11 10 14 8 -1.</_>\r
+                <_>\r
+                  18 10 7 4 2.</_>\r
+                <_>\r
+                  11 14 7 4 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0348087586462498</threshold>\r
+            <left_val>0.0534805804491043</left_val>\r
+            <right_val>-0.5037739872932434</right_val></_></_>\r
+        <_>\r
+          <!-- tree 36 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  1 16 2 2 -1.</_>\r
+                <_>\r
+                  1 16 1 1 2.</_>\r
+                <_>\r
+                  2 17 1 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-3.8908151327632368e-004</threshold>\r
+            <left_val>0.3427126109600067</left_val>\r
+            <right_val>-0.0899231806397438</right_val></_></_>\r
+        <_>\r
+          <!-- tree 37 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  34 0 2 1 -1.</_>\r
+                <_>\r
+                  34 0 1 1 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-2.1421869751065969e-003</threshold>\r
+            <left_val>-0.6064280271530151</left_val>\r
+            <right_val>0.0555892400443554</right_val></_></_>\r
+        <_>\r
+          <!-- tree 38 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  6 3 24 4 -1.</_>\r
+                <_>\r
+                  12 3 12 4 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.1101581007242203</threshold>\r
+            <left_val>-0.0547747202217579</left_val>\r
+            <right_val>0.6878091096878052</right_val></_></_>\r
+        <_>\r
+          <!-- tree 39 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  19 1 2 3 -1.</_>\r
+                <_>\r
+                  19 2 2 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>3.0875208904035389e-004</threshold>\r
+            <left_val>-0.0558342188596725</left_val>\r
+            <right_val>0.0931682363152504</right_val></_></_>\r
+        <_>\r
+          <!-- tree 40 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  2 0 1 2 -1.</_>\r
+                <_>\r
+                  2 0 1 1 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>2.1960400044918060e-003</threshold>\r
+            <left_val>0.0539557486772537</left_val>\r
+            <right_val>-0.6050305962562561</right_val></_></_>\r
+        <_>\r
+          <!-- tree 41 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  15 3 6 8 -1.</_>\r
+                <_>\r
+                  18 3 3 4 2.</_>\r
+                <_>\r
+                  15 7 3 4 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0126062501221895</threshold>\r
+            <left_val>-0.4686402976512909</left_val>\r
+            <right_val>0.0599438697099686</right_val></_></_>\r
+        <_>\r
+          <!-- tree 42 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  14 5 4 2 -1.</_>\r
+                <_>\r
+                  14 6 4 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-2.7497899718582630e-003</threshold>\r
+            <left_val>0.2894253134727478</left_val>\r
+            <right_val>-0.1129785031080246</right_val></_></_>\r
+        <_>\r
+          <!-- tree 43 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  3 7 30 9 -1.</_>\r
+                <_>\r
+                  13 10 10 3 9.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.6096264123916626</threshold>\r
+            <left_val>-0.0478859916329384</left_val>\r
+            <right_val>0.5946549177169800</right_val></_></_>\r
+        <_>\r
+          <!-- tree 44 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  9 8 12 9 -1.</_>\r
+                <_>\r
+                  12 8 6 9 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0450232513248920</threshold>\r
+            <left_val>0.0638310685753822</left_val>\r
+            <right_val>-0.5295680165290833</right_val></_></_></trees>\r
+      <stage_threshold>-1.2722699642181396</stage_threshold>\r
+      <parent>16</parent>\r
+      <next>-1</next></_>\r
+    <_>\r
+      <!-- stage 18 -->\r
+      <trees>\r
+        <_>\r
+          <!-- tree 0 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  10 8 16 5 -1.</_>\r
+                <_>\r
+                  14 8 8 5 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0159072801470757</threshold>\r
+            <left_val>-0.3819232881069183</left_val>\r
+            <right_val>0.2941176891326904</right_val></_></_>\r
+        <_>\r
+          <!-- tree 1 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  30 1 4 10 -1.</_>\r
+                <_>\r
+                  31 2 2 10 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-0.0304830092936754</threshold>\r
+            <left_val>0.6401454806327820</left_val>\r
+            <right_val>-0.1133823990821838</right_val></_></_>\r
+        <_>\r
+          <!-- tree 2 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  13 0 10 8 -1.</_>\r
+                <_>\r
+                  11 2 10 4 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>0.0258412398397923</threshold>\r
+            <left_val>-0.1765469014644623</left_val>\r
+            <right_val>0.2556340098381043</right_val></_></_>\r
+        <_>\r
+          <!-- tree 3 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  32 2 2 14 -1.</_>\r
+                <_>\r
+                  32 2 1 14 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>0.0121606197208166</threshold>\r
+            <left_val>-0.0494619905948639</left_val>\r
+            <right_val>0.3473398983478546</right_val></_></_>\r
+        <_>\r
+          <!-- tree 4 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  4 2 14 2 -1.</_>\r
+                <_>\r
+                  4 2 14 1 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-0.0159101597964764</threshold>\r
+            <left_val>0.4796676933765411</left_val>\r
+            <right_val>-0.1300950944423676</right_val></_></_>\r
+        <_>\r
+          <!-- tree 5 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  30 14 6 4 -1.</_>\r
+                <_>\r
+                  30 14 3 4 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>3.5282061435282230e-004</threshold>\r
+            <left_val>-0.3418492972850800</left_val>\r
+            <right_val>0.2309112995862961</right_val></_></_>\r
+        <_>\r
+          <!-- tree 6 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  11 13 1 4 -1.</_>\r
+                <_>\r
+                  11 15 1 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>6.7633582511916757e-004</threshold>\r
+            <left_val>-0.1543250977993012</left_val>\r
+            <right_val>0.2668730020523071</right_val></_></_>\r
+        <_>\r
+          <!-- tree 7 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  11 0 14 18 -1.</_>\r
+                <_>\r
+                  18 0 7 9 2.</_>\r
+                <_>\r
+                  11 9 7 9 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0599361397325993</threshold>\r
+            <left_val>-0.4880258142948151</left_val>\r
+            <right_val>0.0933274477720261</right_val></_></_>\r
+        <_>\r
+          <!-- tree 8 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 1 20 9 -1.</_>\r
+                <_>\r
+                  10 1 10 9 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.1134240999817848</threshold>\r
+            <left_val>-0.6577144265174866</left_val>\r
+            <right_val>0.0591668188571930</right_val></_></_>\r
+        <_>\r
+          <!-- tree 9 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  21 3 8 3 -1.</_>\r
+                <_>\r
+                  23 3 4 3 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-4.3361280113458633e-003</threshold>\r
+            <left_val>-0.1593652069568634</left_val>\r
+            <right_val>0.0502370409667492</right_val></_></_>\r
+        <_>\r
+          <!-- tree 10 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  13 9 2 4 -1.</_>\r
+                <_>\r
+                  13 10 2 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-1.8627740209922194e-003</threshold>\r
+            <left_val>0.3073025941848755</left_val>\r
+            <right_val>-0.1254066973924637</right_val></_></_>\r
+        <_>\r
+          <!-- tree 11 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  14 9 11 2 -1.</_>\r
+                <_>\r
+                  14 10 11 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0126530099660158</threshold>\r
+            <left_val>-0.1004493013024330</left_val>\r
+            <right_val>0.3749617934226990</right_val></_></_>\r
+        <_>\r
+          <!-- tree 12 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 2 36 9 -1.</_>\r
+                <_>\r
+                  12 5 12 3 9.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.6911857724189758</threshold>\r
+            <left_val>-0.0471464097499847</left_val>\r
+            <right_val>0.8321244120597839</right_val></_></_>\r
+        <_>\r
+          <!-- tree 13 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  34 12 2 6 -1.</_>\r
+                <_>\r
+                  34 15 2 3 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-2.6093868655152619e-004</threshold>\r
+            <left_val>0.3198773860931397</left_val>\r
+            <right_val>-0.2718330919742584</right_val></_></_>\r
+        <_>\r
+          <!-- tree 14 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  11 4 14 6 -1.</_>\r
+                <_>\r
+                  11 6 14 2 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0763450562953949</threshold>\r
+            <left_val>0.4309130012989044</left_val>\r
+            <right_val>-0.0908882692456245</right_val></_></_>\r
+        <_>\r
+          <!-- tree 15 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  31 0 4 1 -1.</_>\r
+                <_>\r
+                  31 0 2 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>2.8098300099372864e-003</threshold>\r
+            <left_val>0.0587311200797558</left_val>\r
+            <right_val>-0.6199675202369690</right_val></_></_>\r
+        <_>\r
+          <!-- tree 16 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  1 0 4 1 -1.</_>\r
+                <_>\r
+                  3 0 2 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-1.3322039740160108e-004</threshold>\r
+            <left_val>0.2000005990266800</left_val>\r
+            <right_val>-0.2012010961771011</right_val></_></_>\r
+        <_>\r
+          <!-- tree 17 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  19 14 6 4 -1.</_>\r
+                <_>\r
+                  21 14 2 4 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0137176299467683</threshold>\r
+            <left_val>-0.7309545278549194</left_val>\r
+            <right_val>0.0271785296499729</right_val></_></_>\r
+        <_>\r
+          <!-- tree 18 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  11 14 6 4 -1.</_>\r
+                <_>\r
+                  13 14 2 4 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-6.2303808517754078e-003</threshold>\r
+            <left_val>-0.5478098988533020</left_val>\r
+            <right_val>0.0687499493360519</right_val></_></_>\r
+        <_>\r
+          <!-- tree 19 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 14 36 1 -1.</_>\r
+                <_>\r
+                  9 14 18 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0499227195978165</threshold>\r
+            <left_val>-0.0473043099045753</left_val>\r
+            <right_val>0.8242310285568237</right_val></_></_>\r
+        <_>\r
+          <!-- tree 20 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  5 0 2 2 -1.</_>\r
+                <_>\r
+                  5 0 2 1 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-1.9126719562336802e-003</threshold>\r
+            <left_val>-0.5394017100334168</left_val>\r
+            <right_val>0.0774475932121277</right_val></_></_>\r
+        <_>\r
+          <!-- tree 21 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  26 3 5 3 -1.</_>\r
+                <_>\r
+                  26 4 5 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>1.1384560493752360e-003</threshold>\r
+            <left_val>-0.0965376868844032</left_val>\r
+            <right_val>0.1548569053411484</right_val></_></_>\r
+        <_>\r
+          <!-- tree 22 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  16 8 1 3 -1.</_>\r
+                <_>\r
+                  15 9 1 1 3.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-2.4732090532779694e-003</threshold>\r
+            <left_val>0.3559078872203827</left_val>\r
+            <right_val>-0.0931698307394981</right_val></_></_>\r
+        <_>\r
+          <!-- tree 23 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  21 11 2 3 -1.</_>\r
+                <_>\r
+                  21 12 2 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-7.1464257780462503e-004</threshold>\r
+            <left_val>0.1452019065618515</left_val>\r
+            <right_val>-0.0741942077875137</right_val></_></_>\r
+        <_>\r
+          <!-- tree 24 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  9 5 6 4 -1.</_>\r
+                <_>\r
+                  8 6 6 2 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-0.0204371493309736</threshold>\r
+            <left_val>0.4416376948356628</left_val>\r
+            <right_val>-0.0809424370527267</right_val></_></_>\r
+        <_>\r
+          <!-- tree 25 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  31 0 2 2 -1.</_>\r
+                <_>\r
+                  31 0 1 2 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-4.0483791381120682e-003</threshold>\r
+            <left_val>-0.5999277830123901</left_val>\r
+            <right_val>0.0330253802239895</right_val></_></_>\r
+        <_>\r
+          <!-- tree 26 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  6 4 3 9 -1.</_>\r
+                <_>\r
+                  6 7 3 3 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0111480504274368</threshold>\r
+            <left_val>-0.1135832965373993</left_val>\r
+            <right_val>0.3264499902725220</right_val></_></_>\r
+        <_>\r
+          <!-- tree 27 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  19 0 11 2 -1.</_>\r
+                <_>\r
+                  19 0 11 1 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>9.8842009902000427e-003</threshold>\r
+            <left_val>0.0554044805467129</left_val>\r
+            <right_val>-0.3273097872734070</right_val></_></_>\r
+        <_>\r
+          <!-- tree 28 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  5 0 2 2 -1.</_>\r
+                <_>\r
+                  5 0 2 1 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>3.1296359375119209e-003</threshold>\r
+            <left_val>0.0774086564779282</left_val>\r
+            <right_val>-0.4595307111740112</right_val></_></_>\r
+        <_>\r
+          <!-- tree 29 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  22 0 14 4 -1.</_>\r
+                <_>\r
+                  29 0 7 2 2.</_>\r
+                <_>\r
+                  22 2 7 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>2.9721839819103479e-003</threshold>\r
+            <left_val>-0.1291726976633072</left_val>\r
+            <right_val>0.1552311033010483</right_val></_></_>\r
+        <_>\r
+          <!-- tree 30 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  15 1 4 13 -1.</_>\r
+                <_>\r
+                  15 1 2 13 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>0.0205544792115688</threshold>\r
+            <left_val>0.0876004695892334</left_val>\r
+            <right_val>-0.4577418863773346</right_val></_></_>\r
+        <_>\r
+          <!-- tree 31 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  21 3 8 4 -1.</_>\r
+                <_>\r
+                  23 3 4 4 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0230272803455591</threshold>\r
+            <left_val>0.3548808991909027</left_val>\r
+            <right_val>-0.0205669198185205</right_val></_></_>\r
+        <_>\r
+          <!-- tree 32 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  7 3 8 4 -1.</_>\r
+                <_>\r
+                  9 3 4 4 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-8.3903772756457329e-003</threshold>\r
+            <left_val>-0.4324072897434235</left_val>\r
+            <right_val>0.0920679792761803</right_val></_></_>\r
+        <_>\r
+          <!-- tree 33 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  32 14 2 2 -1.</_>\r
+                <_>\r
+                  33 14 1 1 2.</_>\r
+                <_>\r
+                  32 15 1 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-1.1431539896875620e-003</threshold>\r
+            <left_val>0.3959133923053742</left_val>\r
+            <right_val>-0.0231928899884224</right_val></_></_>\r
+        <_>\r
+          <!-- tree 34 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  2 14 2 2 -1.</_>\r
+                <_>\r
+                  2 14 1 1 2.</_>\r
+                <_>\r
+                  3 15 1 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-4.9133709399029613e-004</threshold>\r
+            <left_val>0.4274964034557343</left_val>\r
+            <right_val>-0.0855242162942886</right_val></_></_>\r
+        <_>\r
+          <!-- tree 35 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  35 5 1 12 -1.</_>\r
+                <_>\r
+                  35 9 1 4 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>5.1292928401380777e-004</threshold>\r
+            <left_val>-0.1619673967361450</left_val>\r
+            <right_val>0.1961497068405151</right_val></_></_>\r
+        <_>\r
+          <!-- tree 36 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 7 1 9 -1.</_>\r
+                <_>\r
+                  0 10 1 3 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-5.8478871360421181e-003</threshold>\r
+            <left_val>-0.5911636948585510</left_val>\r
+            <right_val>0.0624482408165932</right_val></_></_>\r
+        <_>\r
+          <!-- tree 37 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  12 2 15 6 -1.</_>\r
+                <_>\r
+                  12 4 15 2 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0941330492496490</threshold>\r
+            <left_val>0.4770160913467407</left_val>\r
+            <right_val>-0.0567101612687111</right_val></_></_>\r
+        <_>\r
+          <!-- tree 38 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 17 2 1 -1.</_>\r
+                <_>\r
+                  1 17 1 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>1.0079269850393757e-004</threshold>\r
+            <left_val>-0.1625709980726242</left_val>\r
+            <right_val>0.2140229046344757</right_val></_></_>\r
+        <_>\r
+          <!-- tree 39 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  34 17 2 1 -1.</_>\r
+                <_>\r
+                  34 17 1 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>3.2930231100181118e-005</threshold>\r
+            <left_val>-0.1859605014324188</left_val>\r
+            <right_val>0.1964769065380096</right_val></_></_>\r
+        <_>\r
+          <!-- tree 40 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 17 2 1 -1.</_>\r
+                <_>\r
+                  1 17 1 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-1.1743210052372888e-004</threshold>\r
+            <left_val>0.3182134926319122</left_val>\r
+            <right_val>-0.1328738033771515</right_val></_></_>\r
+        <_>\r
+          <!-- tree 41 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  11 0 16 10 -1.</_>\r
+                <_>\r
+                  15 0 8 10 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.1275181025266647</threshold>\r
+            <left_val>0.0301400795578957</left_val>\r
+            <right_val>-0.7411035895347595</right_val></_></_>\r
+        <_>\r
+          <!-- tree 42 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  5 10 24 8 -1.</_>\r
+                <_>\r
+                  5 10 12 4 2.</_>\r
+                <_>\r
+                  17 14 12 4 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0803262963891029</threshold>\r
+            <left_val>0.0415550395846367</left_val>\r
+            <right_val>-0.8263683915138245</right_val></_></_>\r
+        <_>\r
+          <!-- tree 43 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  27 4 3 3 -1.</_>\r
+                <_>\r
+                  27 5 3 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>1.6904190415516496e-003</threshold>\r
+            <left_val>-0.1029061973094940</left_val>\r
+            <right_val>0.2972418069839478</right_val></_></_></trees>\r
+      <stage_threshold>-1.3022350072860718</stage_threshold>\r
+      <parent>17</parent>\r
+      <next>-1</next></_>\r
+    <_>\r
+      <!-- stage 19 -->\r
+      <trees>\r
+        <_>\r
+          <!-- tree 0 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  6 6 14 12 -1.</_>\r
+                <_>\r
+                  6 6 7 6 2.</_>\r
+                <_>\r
+                  13 12 7 6 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0461227893829346</threshold>\r
+            <left_val>0.4425258934497833</left_val>\r
+            <right_val>-0.2991319894790649</right_val></_></_>\r
+        <_>\r
+          <!-- tree 1 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  6 5 24 6 -1.</_>\r
+                <_>\r
+                  14 7 8 2 9.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.3672331869602203</threshold>\r
+            <left_val>-0.0630117505788803</left_val>\r
+            <right_val>0.7712538242340088</right_val></_></_>\r
+        <_>\r
+          <!-- tree 2 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  12 6 3 4 -1.</_>\r
+                <_>\r
+                  12 7 3 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-3.0962929595261812e-003</threshold>\r
+            <left_val>0.3514241874217987</left_val>\r
+            <right_val>-0.1730643957853317</right_val></_></_>\r
+        <_>\r
+          <!-- tree 3 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  30 7 6 10 -1.</_>\r
+                <_>\r
+                  33 7 3 5 2.</_>\r
+                <_>\r
+                  30 12 3 5 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>9.2647131532430649e-003</threshold>\r
+            <left_val>-0.1607280969619751</left_val>\r
+            <right_val>0.1853290945291519</right_val></_></_>\r
+        <_>\r
+          <!-- tree 4 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  3 12 6 6 -1.</_>\r
+                <_>\r
+                  3 12 3 3 2.</_>\r
+                <_>\r
+                  6 15 3 3 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>3.1748649198561907e-003</threshold>\r
+            <left_val>-0.1968899965286255</left_val>\r
+            <right_val>0.2409728020429611</right_val></_></_>\r
+        <_>\r
+          <!-- tree 5 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  20 0 13 2 -1.</_>\r
+                <_>\r
+                  20 0 13 1 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>8.0439839512109756e-003</threshold>\r
+            <left_val>0.0898629724979401</left_val>\r
+            <right_val>-0.3655225932598114</right_val></_></_>\r
+        <_>\r
+          <!-- tree 6 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  6 10 24 6 -1.</_>\r
+                <_>\r
+                  14 12 8 2 9.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.3275249004364014</threshold>\r
+            <left_val>-0.0568796806037426</left_val>\r
+            <right_val>0.7749336957931519</right_val></_></_>\r
+        <_>\r
+          <!-- tree 7 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  15 4 8 8 -1.</_>\r
+                <_>\r
+                  19 4 4 4 2.</_>\r
+                <_>\r
+                  15 8 4 4 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0190744306892157</threshold>\r
+            <left_val>-0.2895380854606628</left_val>\r
+            <right_val>0.0622916705906391</right_val></_></_>\r
+        <_>\r
+          <!-- tree 8 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  13 4 8 8 -1.</_>\r
+                <_>\r
+                  13 4 4 4 2.</_>\r
+                <_>\r
+                  17 8 4 4 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0205017495900393</threshold>\r
+            <left_val>-0.6262530088424683</left_val>\r
+            <right_val>0.0682769715785980</right_val></_></_>\r
+        <_>\r
+          <!-- tree 9 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  34 16 2 2 -1.</_>\r
+                <_>\r
+                  34 16 1 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>5.3187010053079575e-005</threshold>\r
+            <left_val>-0.2514955997467041</left_val>\r
+            <right_val>0.2613196074962616</right_val></_></_>\r
+        <_>\r
+          <!-- tree 10 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  12 6 3 3 -1.</_>\r
+                <_>\r
+                  12 7 3 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>3.3275580499321222e-003</threshold>\r
+            <left_val>-0.1199077963829041</left_val>\r
+            <right_val>0.3651930093765259</right_val></_></_>\r
+        <_>\r
+          <!-- tree 11 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  21 7 4 4 -1.</_>\r
+                <_>\r
+                  21 8 4 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>5.8408430777490139e-003</threshold>\r
+            <left_val>-0.0827485173940659</left_val>\r
+            <right_val>0.2365082055330277</right_val></_></_>\r
+        <_>\r
+          <!-- tree 12 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  2 8 30 4 -1.</_>\r
+                <_>\r
+                  2 8 15 2 2.</_>\r
+                <_>\r
+                  17 10 15 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0464623309671879</threshold>\r
+            <left_val>-0.6928564906120300</left_val>\r
+            <right_val>0.0781976729631424</right_val></_></_>\r
+        <_>\r
+          <!-- tree 13 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  27 4 3 4 -1.</_>\r
+                <_>\r
+                  27 5 3 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-3.7785700988024473e-003</threshold>\r
+            <left_val>0.3437257111072540</left_val>\r
+            <right_val>-0.1027545034885407</right_val></_></_>\r
+        <_>\r
+          <!-- tree 14 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  5 4 3 4 -1.</_>\r
+                <_>\r
+                  5 5 3 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>1.6655459767207503e-003</threshold>\r
+            <left_val>-0.1160527989268303</left_val>\r
+            <right_val>0.3716202974319458</right_val></_></_>\r
+        <_>\r
+          <!-- tree 15 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  34 16 2 2 -1.</_>\r
+                <_>\r
+                  34 16 1 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-5.7107670727418736e-005</threshold>\r
+            <left_val>0.4589366912841797</left_val>\r
+            <right_val>-0.2123643010854721</right_val></_></_>\r
+        <_>\r
+          <!-- tree 16 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 16 34 2 -1.</_>\r
+                <_>\r
+                  0 16 17 1 2.</_>\r
+                <_>\r
+                  17 17 17 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-9.0066380798816681e-003</threshold>\r
+            <left_val>-0.5953341126441956</left_val>\r
+            <right_val>0.0808764025568962</right_val></_></_>\r
+        <_>\r
+          <!-- tree 17 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  12 5 15 12 -1.</_>\r
+                <_>\r
+                  12 9 15 4 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.1378971040248871</threshold>\r
+            <left_val>0.3957067131996155</left_val>\r
+            <right_val>-0.0898853763937950</right_val></_></_>\r
+        <_>\r
+          <!-- tree 18 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 8 36 6 -1.</_>\r
+                <_>\r
+                  12 10 12 2 9.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.5759987235069275</threshold>\r
+            <left_val>-0.0538108199834824</left_val>\r
+            <right_val>0.8170394897460938</right_val></_></_>\r
+        <_>\r
+          <!-- tree 19 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  25 4 6 2 -1.</_>\r
+                <_>\r
+                  25 5 6 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-2.3918158840388060e-003</threshold>\r
+            <left_val>0.1393374055624008</left_val>\r
+            <right_val>-0.0421559289097786</right_val></_></_>\r
+        <_>\r
+          <!-- tree 20 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 17 2 1 -1.</_>\r
+                <_>\r
+                  1 17 1 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>2.4896071408875287e-004</threshold>\r
+            <left_val>-0.1485866010189056</left_val>\r
+            <right_val>0.2626332938671112</right_val></_></_>\r
+        <_>\r
+          <!-- tree 21 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  16 0 9 9 -1.</_>\r
+                <_>\r
+                  19 0 3 9 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0330624915659428</threshold>\r
+            <left_val>0.0306599102914333</left_val>\r
+            <right_val>-0.3231860101222992</right_val></_></_>\r
+        <_>\r
+          <!-- tree 22 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  11 0 9 9 -1.</_>\r
+                <_>\r
+                  14 0 3 9 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0443218797445297</threshold>\r
+            <left_val>0.0478538200259209</left_val>\r
+            <right_val>-0.7813590168952942</right_val></_></_>\r
+        <_>\r
+          <!-- tree 23 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  20 5 16 5 -1.</_>\r
+                <_>\r
+                  24 5 8 5 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0187181904911995</threshold>\r
+            <left_val>0.1201262027025223</left_val>\r
+            <right_val>-0.1121146976947784</right_val></_></_>\r
+        <_>\r
+          <!-- tree 24 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 3 16 9 -1.</_>\r
+                <_>\r
+                  4 3 8 9 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0923093706369400</threshold>\r
+            <left_val>0.0424630790948868</left_val>\r
+            <right_val>-0.8009700179100037</right_val></_></_>\r
+        <_>\r
+          <!-- tree 25 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  7 6 26 12 -1.</_>\r
+                <_>\r
+                  20 6 13 6 2.</_>\r
+                <_>\r
+                  7 12 13 6 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0906654372811317</threshold>\r
+            <left_val>-0.0223045293241739</left_val>\r
+            <right_val>0.1284797936677933</right_val></_></_>\r
+        <_>\r
+          <!-- tree 26 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  5 6 24 12 -1.</_>\r
+                <_>\r
+                  5 6 12 6 2.</_>\r
+                <_>\r
+                  17 12 12 6 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0582949295639992</threshold>\r
+            <left_val>-0.3936854004859924</left_val>\r
+            <right_val>0.0954821407794952</right_val></_></_>\r
+        <_>\r
+          <!-- tree 27 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  17 4 3 12 -1.</_>\r
+                <_>\r
+                  18 4 1 12 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>4.6649780124425888e-003</threshold>\r
+            <left_val>-0.0656419470906258</left_val>\r
+            <right_val>0.3640717864036560</right_val></_></_>\r
+        <_>\r
+          <!-- tree 28 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  1 11 6 1 -1.</_>\r
+                <_>\r
+                  3 13 2 1 3.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>5.2480432204902172e-003</threshold>\r
+            <left_val>0.0687657818198204</left_val>\r
+            <right_val>-0.5050830245018005</right_val></_></_>\r
+        <_>\r
+          <!-- tree 29 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  21 12 14 2 -1.</_>\r
+                <_>\r
+                  28 12 7 1 2.</_>\r
+                <_>\r
+                  21 13 7 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>2.5315659586340189e-003</threshold>\r
+            <left_val>-0.0933471694588661</left_val>\r
+            <right_val>0.1649612933397293</right_val></_></_>\r
+        <_>\r
+          <!-- tree 30 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  1 13 2 3 -1.</_>\r
+                <_>\r
+                  2 13 1 3 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>2.4391160695813596e-004</threshold>\r
+            <left_val>-0.1888543963432312</left_val>\r
+            <right_val>0.1695670038461685</right_val></_></_>\r
+        <_>\r
+          <!-- tree 31 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  26 8 3 2 -1.</_>\r
+                <_>\r
+                  27 9 1 2 3.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>-6.3037211075425148e-003</threshold>\r
+            <left_val>0.3826352953910828</left_val>\r
+            <right_val>-0.0590420998632908</right_val></_></_>\r
+        <_>\r
+          <!-- tree 32 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  10 8 2 3 -1.</_>\r
+                <_>\r
+                  9 9 2 1 3.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>2.2754059173166752e-003</threshold>\r
+            <left_val>-0.1224882006645203</left_val>\r
+            <right_val>0.2828365862369537</right_val></_></_>\r
+        <_>\r
+          <!-- tree 33 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  12 0 18 18 -1.</_>\r
+                <_>\r
+                  12 0 9 18 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.2769486904144287</threshold>\r
+            <left_val>0.4851497113704681</left_val>\r
+            <right_val>-0.0404825396835804</right_val></_></_>\r
+        <_>\r
+          <!-- tree 34 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  8 9 3 3 -1.</_>\r
+                <_>\r
+                  7 10 3 1 3.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>5.8051547966897488e-003</threshold>\r
+            <left_val>-0.0835584178566933</left_val>\r
+            <right_val>0.4215149879455566</right_val></_></_>\r
+        <_>\r
+          <!-- tree 35 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  28 5 5 6 -1.</_>\r
+                <_>\r
+                  28 7 5 2 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>2.4654529988765717e-003</threshold>\r
+            <left_val>-0.1281685978174210</left_val>\r
+            <right_val>0.2077662944793701</right_val></_></_>\r
+        <_>\r
+          <!-- tree 36 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  9 1 9 8 -1.</_>\r
+                <_>\r
+                  9 1 9 4 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>7.8863510861992836e-003</threshold>\r
+            <left_val>-0.1719754040241242</left_val>\r
+            <right_val>0.2079081982374191</right_val></_></_>\r
+        <_>\r
+          <!-- tree 37 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 0 36 2 -1.</_>\r
+                <_>\r
+                  18 0 18 1 2.</_>\r
+                <_>\r
+                  0 1 18 1 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0118171302601695</threshold>\r
+            <left_val>-0.5788066983222961</left_val>\r
+            <right_val>0.0589591413736343</right_val></_></_>\r
+        <_>\r
+          <!-- tree 38 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  5 0 26 6 -1.</_>\r
+                <_>\r
+                  5 0 13 3 2.</_>\r
+                <_>\r
+                  18 3 13 3 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0641399174928665</threshold>\r
+            <left_val>-0.6368926167488098</left_val>\r
+            <right_val>0.0417975001037121</right_val></_></_>\r
+        <_>\r
+          <!-- tree 39 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  28 3 3 3 -1.</_>\r
+                <_>\r
+                  28 4 3 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-1.2179970508441329e-003</threshold>\r
+            <left_val>0.2356870025396347</left_val>\r
+            <right_val>-0.0805152580142021</right_val></_></_>\r
+        <_>\r
+          <!-- tree 40 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  5 3 5 3 -1.</_>\r
+                <_>\r
+                  5 4 5 1 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>2.8652620967477560e-003</threshold>\r
+            <left_val>-0.0931371971964836</left_val>\r
+            <right_val>0.3902595043182373</right_val></_></_>\r
+        <_>\r
+          <!-- tree 41 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  14 12 8 2 -1.</_>\r
+                <_>\r
+                  16 12 4 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-5.7746102102100849e-003</threshold>\r
+            <left_val>-0.5753986835479736</left_val>\r
+            <right_val>0.0596776902675629</right_val></_></_>\r
+        <_>\r
+          <!-- tree 42 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  13 0 9 14 -1.</_>\r
+                <_>\r
+                  16 0 3 14 3.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.0653770864009857</threshold>\r
+            <left_val>0.0341660715639591</left_val>\r
+            <right_val>-0.7425342202186585</right_val></_></_>\r
+        <_>\r
+          <!-- tree 43 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  23 0 10 1 -1.</_>\r
+                <_>\r
+                  23 0 5 1 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>0.0162657108157873</threshold>\r
+            <left_val>0.0536542609333992</left_val>\r
+            <right_val>-0.2365860939025879</right_val></_></_>\r
+        <_>\r
+          <!-- tree 44 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  8 14 2 2 -1.</_>\r
+                <_>\r
+                  8 14 1 2 2.</_></rects>\r
+              <tilted>1</tilted></feature>\r
+            <threshold>2.2717609535902739e-003</threshold>\r
+            <left_val>0.0533591099083424</left_val>\r
+            <right_val>-0.5494074225425720</right_val></_></_>\r
+        <_>\r
+          <!-- tree 45 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 12 36 3 -1.</_>\r
+                <_>\r
+                  12 13 12 1 9.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>0.2262602001428604</threshold>\r
+            <left_val>-0.0420460589230061</left_val>\r
+            <right_val>0.7791252136230469</right_val></_></_>\r
+        <_>\r
+          <!-- tree 46 -->\r
+          <_>\r
+            <!-- root node -->\r
+            <feature>\r
+              <rects>\r
+                <_>\r
+                  0 13 34 4 -1.</_>\r
+                <_>\r
+                  0 13 17 2 2.</_>\r
+                <_>\r
+                  17 15 17 2 2.</_></rects>\r
+              <tilted>0</tilted></feature>\r
+            <threshold>-0.0293774604797363</threshold>\r
+            <left_val>-0.5947058796882629</left_val>\r
+            <right_val>0.0548178702592850</right_val></_></_></trees>\r
+      <stage_threshold>-1.1933319568634033</stage_threshold>\r
+      <parent>18</parent>\r
+      <next>-1</next></_></stages></SmileDetector>\r
+</opencv_storage>\r
index 7aed351..b37126f 100644 (file)
@@ -48,10 +48,10 @@ The structure of package contents looks as follows:
 
 ::
 
-    OpenCV-2.4.3-android-sdk
+    OpenCV-2.4.4-android-sdk
     |_ apk
-    |   |_ OpenCV_2.4.3_binary_pack_armv7a.apk
-    |   |_ OpenCV_2.4.3_Manager_2.0_XXX.apk
+    |   |_ OpenCV_2.4.4_binary_pack_armv7a.apk
+    |   |_ OpenCV_2.4.4_Manager_2.6_XXX.apk
     |
     |_ doc
     |_ samples
@@ -157,10 +157,10 @@ Get the OpenCV4Android SDK
 
    .. code-block:: bash
 
-      unzip ~/Downloads/OpenCV-2.4.3-android-sdk.zip
+      unzip ~/Downloads/OpenCV-2.4.4-android-sdk.zip
 
-.. |opencv_android_bin_pack| replace:: OpenCV-2.4.3.2-android-sdk.zip
-.. _opencv_android_bin_pack_url: http://sourceforge.net/projects/opencvlibrary/files/opencv-android/2.4.3/OpenCV-2.4.3.2-android-sdk.zip/download
+.. |opencv_android_bin_pack| replace:: :file:`OpenCV-2.4.4-android-sdk.zip`
+.. _opencv_android_bin_pack_url: http://sourceforge.net/projects/opencvlibrary/files/opencv-android/2.4.4/OpenCV-2.4.4-android-sdk.zip/download
 .. |opencv_android_bin_pack_url| replace:: |opencv_android_bin_pack|
 .. |seven_zip| replace:: 7-Zip
 .. _seven_zip: http://www.7-zip.org/
@@ -184,7 +184,7 @@ Import OpenCV library and samples to the Eclipse
    You can simply reference it in your projects.
 
    Each sample included into the |opencv_android_bin_pack| is a regular Android project that already
-   references OpenCV library.Follow the steps below to import OpenCV and samples into the workspace:
+   references OpenCV library. Follow the steps below to import OpenCV and samples into the workspace:
 
    .. note:: OpenCV samples are indeed **dependent** on OpenCV library project so don't forget to import it to your workspace as well.
 
@@ -246,8 +246,8 @@ Running OpenCV Samples
 ----------------------
 
 At this point you should be able to build and run the samples. Keep in mind, that
-``face-detection``, ``Tutorial 3` and ``Tutorial 4`` include some native code and
-require Android NDK and CDT plugin for Eclipse to build working applications. If you haven't
+``face-detection`` and ``Tutorial 2 - Mixed Processing`` include some native code and
+require Android NDK and NDK/CDT plugin for Eclipse to build working applications. If you haven't
 installed these tools, see the corresponding section of :ref:`Android_Dev_Intro`.
 
 .. warning:: Please consider that some samples use Android Java Camera API, which is accessible
@@ -295,7 +295,7 @@ Well, running samples from Eclipse is very simple:
   .. code-block:: sh
     :linenos:
 
-    <Android SDK path>/platform-tools/adb install <OpenCV4Android SDK path>/apk/OpenCV_2.4.3_Manager_armv7a-neon.apk
+    <Android SDK path>/platform-tools/adb install <OpenCV4Android SDK path>/apk/OpenCV_2.4.4_Manager_2.6_armv7a-neon.apk
 
   .. note:: ``armeabi``, ``armv7a-neon``, ``arm7a-neon-android8``, ``mips`` and ``x86`` stand for
             platform targets:
@@ -326,15 +326,16 @@ Well, running samples from Eclipse is very simple:
 
   When done, you will be able to run OpenCV samples on your device/emulator seamlessly.
 
-* Here is ``Tutorial 2 - Use OpenCV Camera`` sample, running on top of stock camera-preview of the emulator.
+* Here is ``Sample - image-manipulations`` sample, running on top of stock camera-preview of the emulator.
 
   .. image:: images/emulator_canny.png
-     :height: 600px
-     :alt: Tutorial 1 Basic - 1. Add OpenCV - running Canny
+     :alt: 'Sample - image-manipulations' running Canny
      :align: center
 
 
 What's next
 ===========
 
-Now, when you have your instance of OpenCV4Adroid SDK set up and configured, you may want to proceed to using OpenCV in your own application. You can learn how to do that in a separate :ref:`dev_with_OCV_on_Android` tutorial.
\ No newline at end of file
+Now, when you have your instance of OpenCV4Adroid SDK set up and configured,
+you may want to proceed to using OpenCV in your own application.
+You can learn how to do that in a separate :ref:`dev_with_OCV_on_Android` tutorial.
index 021561e..9545bee 100644 (file)
@@ -103,8 +103,8 @@ You need the following software to be installed in order to develop for Android
 
    Here is Google's `install guide <http://developer.android.com/sdk/installing.html>`_ for the SDK.
 
-   .. note:: You can choose downloading ``ADT Bundle package`` that in addition to Android SDK Tools includes
-             Eclipse + ADT + CDT plugins, Android Platform-tools, the latest Android platform and the latest
+   .. note:: You can choose downloading **ADT Bundle package** that in addition to Android SDK Tools includes
+             Eclipse + ADT + NDK/CDT plugins, Android Platform-tools, the latest Android platform and the latest
              Android system image for the emulator - this is the best choice for those who is setting up Android
              development environment the first time!
 
@@ -112,15 +112,15 @@ You need the following software to be installed in order to develop for Android
              for use on amd64 and ia64 systems to be installed. You can install them with the
              following command:
 
-      .. code-block:: bash
+             .. code-block:: bash
 
-         sudo apt-get install ia32-libs
+                 sudo apt-get install ia32-libs
 
-      For Red Hat based systems the following command might be helpful:
+             For Red Hat based systems the following command might be helpful:
 
-      .. code-block:: bash
+             .. code-block:: bash
 
-         sudo yum install libXtst.i386
+                 sudo yum install libXtst.i386
 
 #. **Android SDK components**
 
@@ -148,7 +148,7 @@ You need the following software to be installed in order to develop for Android
 
    Check the `Android SDK System Requirements <http://developer.android.com/sdk/requirements.html>`_
    document for a list of Eclipse versions that are compatible with the Android SDK.
-   For OpenCV 2.4.x we recommend **Eclipse 3.7 (Indigo)** or later versions. They work well for
+   For OpenCV 2.4.x we recommend **Eclipse 3.7 (Indigo)** or **Eclipse 4.2 (Juno)**. They work well for
    OpenCV under both Windows and Linux.
 
    If you have no Eclipse installed, you can get it from the `official site <http://www.eclipse.org/downloads/>`_.
index 0bfc6ac..c9635aa 100644 (file)
@@ -55,14 +55,14 @@ Manager to access OpenCV libraries externally installed in the target system.
    :guilabel:`File -> Import -> Existing project in your workspace`.
 
    Press :guilabel:`Browse`  button and locate OpenCV4Android SDK 
-   (:file:`OpenCV-2.4.3-android-sdk/sdk`).
+   (:file:`OpenCV-2.4.4-android-sdk/sdk`).
 
    .. image:: images/eclipse_opencv_dependency0.png
         :alt: Add dependency from OpenCV library
         :align: center
 
 #. In application project add a reference to the OpenCV Java SDK in 
-   :guilabel:`Project -> Properties -> Android -> Library -> Add` select ``OpenCV Library - 2.4.3``.
+   :guilabel:`Project -> Properties -> Android -> Library -> Add` select ``OpenCV Library - 2.4.4``.
 
    .. image:: images/eclipse_opencv_dependency1.png
         :alt: Add dependency from OpenCV library
@@ -128,27 +128,27 @@ described above.
 #. Add the OpenCV library project to your workspace the same way as for the async initialization 
    above. Use menu :guilabel:`File -> Import -> Existing project in your workspace`, 
    press :guilabel:`Browse` button and select OpenCV SDK path 
-   (:file:`OpenCV-2.4.3-android-sdk/sdk`).
+   (:file:`OpenCV-2.4.4-android-sdk/sdk`).
 
    .. image:: images/eclipse_opencv_dependency0.png
         :alt: Add dependency from OpenCV library
         :align: center
 
 #. In the application project add a reference to the OpenCV4Android SDK in 
-   :guilabel:`Project -> Properties -> Android -> Library -> Add` select ``OpenCV Library - 2.4.3``;
+   :guilabel:`Project -> Properties -> Android -> Library -> Add` select ``OpenCV Library - 2.4.4``;
 
    .. image:: images/eclipse_opencv_dependency1.png
        :alt: Add dependency from OpenCV library
        :align: center
 
 #. If your application project **doesn't have a JNI part**, just copy the corresponding OpenCV 
-   native libs from :file:`<OpenCV-2.4.3-android-sdk>/sdk/native/libs/<target_arch>` to your 
+   native libs from :file:`<OpenCV-2.4.4-android-sdk>/sdk/native/libs/<target_arch>` to your
    project directory to folder :file:`libs/<target_arch>`.
 
    In case of the application project **with a JNI part**, instead of manual libraries copying you 
    need to modify your ``Android.mk`` file:
    add the following two code lines after the ``"include $(CLEAR_VARS)"`` and before 
-   ``"include path_to_OpenCV-2.4.3-android-sdk/sdk/native/jni/OpenCV.mk"``
+   ``"include path_to_OpenCV-2.4.4-android-sdk/sdk/native/jni/OpenCV.mk"``
 
    .. code-block:: make
       :linenos:
@@ -221,7 +221,7 @@ taken:
 
    .. code-block:: make
 
-      include C:\Work\OpenCV4Android\OpenCV-2.4.3-android-sdk\sdk\native\jni\OpenCV.mk
+      include C:\Work\OpenCV4Android\OpenCV-2.4.4-android-sdk\sdk\native\jni\OpenCV.mk
 
    Should be inserted into the :file:`jni/Android.mk` file **after** this line:
 
@@ -382,7 +382,7 @@ result.
            OpenCVLoader.initAsync(OpenCVLoader.OPENCV_VERSION_2_4_3, this, mLoaderCallback);
        }
 
-#. Defines that your activity implements CvViewFrameListener interface and fix activity related
+#. Defines that your activity implements ``CvViewFrameListener2`` interface and fix activity related
    errors by defining missed methods. For this activity define ``onCreate``, ``onDestroy`` and
    ``onPause`` and implement them according code snippet bellow. Fix errors by adding requited
    imports.
@@ -423,8 +423,8 @@ result.
        public void onCameraViewStopped() {
        }
 
-       public Mat onCameraFrame(Mat inputFrame) {
-           return inputFrame;
+       public Mat onCameraFrame(CvCameraViewFrame inputFrame) {
+           return inputFrame.rgba();
        }
 
 #. Run your application on device or emulator.
@@ -432,7 +432,7 @@ result.
 Lets discuss some most important steps. Every Android application with UI must implement Activity
 and View. By the first steps we create blank activity and default view layout. The simplest
 OpenCV-centric application must implement OpenCV initialization, create its own view to show
-preview from camera and implements ``CvViewFrameListener`` interface to get frames from camera and
+preview from camera and implements ``CvViewFrameListener2`` interface to get frames from camera and
 process it.
 
 First of all we create our application view using xml layout. Our layout consists of the only
@@ -448,8 +448,13 @@ After creating layout we need to implement ``Activity`` class. OpenCV initializa
 been already discussed above. In this sample we use asynchronous initialization. Implementation of
 ``CvCameraViewListener`` interface allows you to add processing steps after frame grabbing from
 camera and before its rendering on screen. The most important function is ``onCameraFrame``. It is
-callback function and it is called on retrieving frame from camera. The callback input is frame
-from camera. RGBA format is used by default. You can change this behavior by ``SetCaptureFormat``
-method of ``View`` class. ``Highgui.CV_CAP_ANDROID_COLOR_FRAME_RGBA`` and
-``Highgui.CV_CAP_ANDROID_GREY_FRAME`` are supported. It expects that function returns RGBA frame
-that will be drawn on the screen.
+callback function and it is called on retrieving frame from camera. The callback input is object
+of ``CvCameraViewFrame`` class that represents frame from camera.
+
+.. note::
+    Do not save or use ``CvCameraViewFrame`` object out of ``onCameraFrame`` callback. This object
+    does not have its own state and its behavior out of callback is unpredictable!
+
+It has ``rgba()`` and ``gray()`` methods that allows to get frame as RGBA and one channel gray scale
+``Mat`` respectively. It expects that ``onCameraFrame`` function returns RGBA frame that will be
+drawn on the screen.
index 18e14d1..499247a 100644 (file)
Binary files a/doc/tutorials/introduction/android_binary_package/images/eclipse_10_crystal_clean.png and b/doc/tutorials/introduction/android_binary_package/images/eclipse_10_crystal_clean.png differ
index 0c5a4bb..e152bc4 100644 (file)
Binary files a/doc/tutorials/introduction/android_binary_package/images/eclipse_7_select_projects.png and b/doc/tutorials/introduction/android_binary_package/images/eclipse_7_select_projects.png differ
index 633e3d2..f8126b5 100644 (file)
Binary files a/doc/tutorials/introduction/android_binary_package/images/eclipse_cdt_cfg4.png and b/doc/tutorials/introduction/android_binary_package/images/eclipse_cdt_cfg4.png differ
index 1bc0511..d08340b 100644 (file)
Binary files a/doc/tutorials/introduction/android_binary_package/images/emulator_canny.png and b/doc/tutorials/introduction/android_binary_package/images/emulator_canny.png differ
index 2efc8e6..fee34af 100644 (file)
Binary files a/doc/tutorials/introduction/desktop_java/images/eclipse_run.png and b/doc/tutorials/introduction/desktop_java/images/eclipse_run.png differ
index 2bb50f0..1b20bec 100644 (file)
@@ -5,8 +5,6 @@
 Introduction to Java Development
 ********************************
 
-Last updated: 12 February, 2013.
-
 As of OpenCV 2.4.4, OpenCV supports desktop Java development using nearly the same interface as for
 Android development. This guide will help you to create your first Java (or Scala) application using OpenCV.
 We will use either `Eclipse <http://eclipse.org/>`_, `Apache Ant <http://ant.apache.org/>`_ or the
@@ -15,7 +13,7 @@ We will use either `Eclipse <http://eclipse.org/>`_, `Apache Ant <http://ant.apa
 For further reading after this guide, look at the :ref:`Android_Dev_Intro` tutorials.
 
 What we'll do in this guide
-***************************
+===========================
 
 In this guide, we will:
 
@@ -28,10 +26,14 @@ In this guide, we will:
 The same process was used to create the samples in the :file:`samples/java` folder of the OpenCV repository,
 so consult those files if you get lost.
 
-Get OpenCV with desktop Java support
-************************************
+Get proper OpenCV
+=================
 
 Starting from version 2.4.4 OpenCV includes desktop Java bindings.
+
+Download
+--------
+
 The most simple way to get it is downloading the appropriate package of **version 2.4.4 or higher** from the
 `OpenCV SourceForge repository <http://sourceforge.net/projects/opencvlibrary/files/>`_.
 
@@ -45,31 +47,31 @@ In order to build OpenCV with Java bindings you need :abbr:`JDK (Java Developmen
 (we recommend `Oracle/Sun JDK 6 or 7 <http://www.oracle.com/technetwork/java/javase/downloads/>`_),
 `Apache Ant <http://ant.apache.org/>`_ and `Python` v2.6 or higher to be installed.
 
-Build OpenCV
-############
+Build
+-----
 
 Let's build OpenCV:
 
-        .. code-block:: bash
+.. code-block:: bash
 
-           git clone git://github.com/Itseez/opencv.git
-           cd opencv
-           git checkout 2.4
-           mkdir build
-           cd build
+   git clone git://github.com/Itseez/opencv.git
+   cd opencv
+   git checkout 2.4
+   mkdir build
+   cd build
 
 Generate a Makefile or a MS Visual Studio* solution, or whatever you use for
 building executables in your system:
 
-        .. code-block:: bash
+.. code-block:: bash
 
-           cmake -DBUILD_SHARED_LIBS=OFF ..
+   cmake -DBUILD_SHARED_LIBS=OFF ..
 
 or
 
-        .. code-block:: bat
+.. code-block:: bat
 
-           cmake -DBUILD_SHARED_LIBS=OFF -G "Visual Studio 10" ..
+   cmake -DBUILD_SHARED_LIBS=OFF -G "Visual Studio 10" ..
 
 .. note:: When OpenCV is built as a set of **static** libraries (``-DBUILD_SHARED_LIBS=OFF`` option)
           the Java bindings dynamic library is all-sufficient,
@@ -79,39 +81,49 @@ Examine the output of CMake and ensure ``java`` is one of the modules "To be bui
 If not, it's likely you're missing a dependency. You should troubleshoot by looking
 through the CMake output for any Java-related tools that aren't found and installing them.
 
-     .. image:: images/cmake_output.png
-        :alt: CMake output
-        :align: center
+.. image:: images/cmake_output.png
+   :alt: CMake output
+   :align: center
+
+.. note:: If ``CMake`` can't find Java in your system set the ``JAVA_HOME``
+          environment variable with the path to installed JDK
+          before running it. E.g.:
+
+          .. code-block:: bash
+
+             export JAVA_HOME=/usr/lib/jvm/java-6-oracle
+             cmake -DBUILD_SHARED_LIBS=OFF ..
+
 
 Now start the build:
 
-        .. code-block:: bash
+.. code-block:: bash
 
-           make -j8
+   make -j8
 
 or
 
-        .. code-block:: bat
+.. code-block:: bat
 
-           msbuild /m OpenCV.sln /t:Build /p:Configuration=Release /v:m
+   msbuild /m OpenCV.sln /t:Build /p:Configuration=Release /v:m
 
-Besides all this will create a ``jar`` containing the Java interface (:file:`bin/opencv_2.4.4.jar`)
+Besides all this will create a ``jar`` containing the Java interface (:file:`bin/opencv-244.jar`)
 and a native dynamic library containing Java bindings and all the OpenCV stuff
-(:file:`bin/Release/opencv_java244.dll` or :file:`bin/libopencv_java244.so` respectively).
+(:file:`lib/libopencv_java244.so` or :file:`bin/Release/opencv_java244.dll` respectively).
 We'll use these files later.
 
-Create a simple Java sample and an Ant build file for it
-********************************************************
+Java sample with Ant
+====================
 
 .. note::
     The described sample is provided with OpenCV library in the :file:`opencv/samples/java/ant` folder.
 
 * Create a folder where you'll develop this sample application.
 
-* In this folder create an XML file with the following content using any text editor:
+* In this folder create the :file:`build.xml` file with the following content using any text editor:
 
-    .. code-block:: xml
-        :linenos:
+  .. code-block:: xml
+     :linenos:
 
         <project name="SimpleSample" basedir="." default="rebuild-run">
 
@@ -135,7 +147,7 @@ Create a simple Java sample and an Ant build file for it
 
             <target name="compile">
                 <mkdir dir="${classes.dir}"/>
-                <javac srcdir="${src.dir}" destdir="${classes.dir}" classpathref="classpath"/>
+                <javac includeantruntime="false" srcdir="${src.dir}" destdir="${classes.dir}" classpathref="classpath"/>
             </target>
 
             <target name="jar" depends="compile">
@@ -163,33 +175,35 @@ Create a simple Java sample and an Ant build file for it
 
         </project>
 
-    .. note::
-        This XML file can be reused for building other Java applications.
-        It describes a common folder structure in the lines 3 - 12 and common targets
-        for compiling and running the application.
+  .. note::
+    This XML file can be reused for building other Java applications.
+    It describes a common folder structure in the lines 3 - 12 and common targets
+    for compiling and running the application.
 
-        When reusing this XML don't forget to modify the project name in the line 1,
-        that is also the name of the `main` class (line 14).
-        The paths to OpenCV `jar` and `jni lib` are expected as parameters
-        (``"${ocvJarDir}"`` in line 5 and ``"${ocvLibDir}"`` in line 37), but
-        you can hardcode these paths for your convenience.
-        See `Ant documentation <http://ant.apache.org/manual/>`_ for detailed description
-        of its build file format.
+    When reusing this XML don't forget to modify the project name in the line 1,
+    that is also the name of the `main` class (line 14).
+    The paths to OpenCV `jar` and `jni lib` are expected as parameters
+    (``"${ocvJarDir}"`` in line 5 and ``"${ocvLibDir}"`` in line 37), but
+    you can hardcode these paths for your convenience.
+    See `Ant documentation <http://ant.apache.org/manual/>`_ for detailed description
+    of its build file format.
 
 * Create an :file:`src` folder next to the :file:`build.xml` file and a :file:`SimpleSample.java` file in it.
 
 * Put the following Java code into the :file:`SimpleSample.java` file:
     .. code-block:: java
 
+        import org.opencv.core.Core;
         import org.opencv.core.Mat;
         import org.opencv.core.CvType;
         import org.opencv.core.Scalar;
 
         class SimpleSample {
 
-          static{ System.loadLibrary("opencv_java244"); }
+          static{ System.loadLibrary(Core.NATIVE_LIBRARY_NAME); }
 
           public static void main(String[] args) {
+            System.out.println("Welcome to OpenCV " + Core.VERSION);
             Mat m = new Mat(5, 10, CvType.CV_8UC1, new Scalar(0));
             System.out.println("OpenCV Mat: " + m);
             Mat mr1 = m.row(1);
@@ -219,99 +233,100 @@ Create a simple Java sample and an Ant build file for it
         :alt: run app with Ant
         :align: center
 
-Create a simple Java project in Eclipse
-***************************************
+Java project in Eclipse
+=======================
 
 Now let's look at the possiblity of using OpenCV in Java when developing in Eclipse IDE.
 
 * Create a new Eclipse workspace
 * Create a new Java project via :guilabel:`File --> New --> Java Project`
 
-    .. image:: images/eclipse_new_java_prj.png
-        :alt: Eclipse: new Java project
-        :align: center
+  .. image:: images/eclipse_new_java_prj.png
+     :alt: Eclipse: new Java project
+     :align: center
 
-    Call it say "HelloCV".
+  Call it say "HelloCV".
 
 * Open :guilabel:`Java Build Path` tab on :guilabel:`Project Properties` dialog
-    and configure additional library (OpenCV) reference (jar and native library location):
+  and configure additional library (OpenCV) reference (jar and native library location):
 
-    .. image:: images/eclipse_user_lib.png
-        :alt: Eclipse: external JAR
-        :align: center
+  .. image:: images/eclipse_user_lib.png
+     :alt: Eclipse: external JAR
+     :align: center
 
-   ` `
+  |
 
-    .. image:: images/eclipse_user_lib2.png
-        :alt: Eclipse: external JAR
-        :align: center
+  .. image:: images/eclipse_user_lib2.png
+     :alt: Eclipse: external JAR
+     :align: center
 
-   ` `
+  |
 
-    .. image:: images/eclipse_user_lib3.png
-        :alt: Eclipse: external JAR
-        :align: center
+  .. image:: images/eclipse_user_lib3.png
+     :alt: Eclipse: external JAR
+     :align: center
 
-   ` `
+  |
 
-    .. image:: images/eclipse_user_lib4.png
-        :alt: Eclipse: external JAR
-        :align: center
+  .. image:: images/eclipse_user_lib4.png
+     :alt: Eclipse: external JAR
+     :align: center
 
-   ` `
+  |
 
-    .. image:: images/eclipse_user_lib5.png
-        :alt: Eclipse: external JAR
-        :align: center
+  .. image:: images/eclipse_user_lib5.png
+     :alt: Eclipse: external JAR
+     :align: center
 
-   ` `
+  |
 
-    .. image:: images/eclipse_user_lib6.png
-        :alt: Eclipse: external JAR
-        :align: center
+  .. image:: images/eclipse_user_lib6.png
+     :alt: Eclipse: external JAR
+     :align: center
 
-   ` `
+  |
 
-    .. image:: images/eclipse_user_lib7.png
-        :alt: Eclipse: external JAR
-        :align: center
+  .. image:: images/eclipse_user_lib7.png
+     :alt: Eclipse: external JAR
+     :align: center
 
-   ` `
+  |
 
-    .. image:: images/eclipse_user_lib8.png
-        :alt: Eclipse: external JAR
-        :align: center
+  .. image:: images/eclipse_user_lib8.png
+     :alt: Eclipse: external JAR
+     :align: center
 
-   ` `
 
 * Add a new Java class (say ``Main``) containing the application entry:
 
-    .. image:: images/eclipse_main_class.png
-        :alt: Eclipse: Main class
-        :align: center
+  .. image:: images/eclipse_main_class.png
+     :alt: Eclipse: Main class
+     :align: center
 
 * Put some simple OpenCV calls there, e.g.:
-    .. code-block:: java
 
-        import org.opencv.core.CvType;
-        import org.opencv.core.Mat;
+  .. code-block:: java
 
-        public class Main {
-            public static void main(String[] args) {
-                System.loadLibrary("opencv_java244");
-                Mat m  = Mat.eye(3, 3, CvType.CV_8UC1);
-                System.out.println("m = " + m.dump());
-            }
+    import org.opencv.core.Core;
+    import org.opencv.core.CvType;
+    import org.opencv.core.Mat;
+
+    public class Main {
+        public static void main(String[] args) {
+            System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
+            Mat m  = Mat.eye(3, 3, CvType.CV_8UC1);
+            System.out.println("m = " + m.dump());
         }
+    }
 
 * Press :guilabel:`Run` button and find the identity matrix content in the Eclipse ``Console`` window.
 
-    .. image:: images/eclipse_run.png
-        :alt: Eclipse: run
-        :align: center
+  .. image:: images/eclipse_run.png
+     :alt: Eclipse: run
+     :align: center
 
-Create an SBT project and samples in Java and Scala
-***************************************************
+SBT project for Java and Scala
+==============================
 
 Now we'll create a simple Java application using SBT. This serves as a brief introduction to
 those unfamiliar with this build tool. We're using SBT because it is particularly easy and powerful.
@@ -321,66 +336,66 @@ First, download and install `SBT <http://www.scala-sbt.org/>`_ using the instruc
 Next, navigate to a new directory where you'd like the application source to live (outside :file:`opencv` dir).
 Let's call it "JavaSample" and create a directory for it:
 
-        .. code-block:: bash
+.. code-block:: bash
 
-           cd <somewhere outside opencv>
-           mkdir JavaSample
+   cd <somewhere outside opencv>
+   mkdir JavaSample
 
 Now we will create the necessary folders and an SBT project:
 
-        .. code-block:: bash
+.. code-block:: bash
 
-           cd JavaSample
-           mkdir -p src/main/java # This is where SBT expects to find Java sources
-           mkdir project # This is where the build definitions live
+   cd JavaSample
+   mkdir -p src/main/java # This is where SBT expects to find Java sources
+   mkdir project # This is where the build definitions live
 
 Now open :file:`project/build.scala` in your favorite editor and paste the following.
 It defines your project:
 
-        .. code-block:: scala
+.. code-block:: scala
 
-            import sbt._
-            import Keys._
+   import sbt._
+   import Keys._
 
-           object JavaSampleBuild extends Build {
-             def scalaSettings = Seq(
-               scalaVersion := "2.10.0",
-               scalacOptions ++= Seq(
-                 "-optimize",
-                 "-unchecked",
-                 "-deprecation"
-               )
-             )
+   object JavaSampleBuild extends Build {
+     def scalaSettings = Seq(
+       scalaVersion := "2.10.0",
+       scalacOptions ++= Seq(
+         "-optimize",
+         "-unchecked",
+         "-deprecation"
+       )
+     )
 
-             def buildSettings =
-               Project.defaultSettings ++
-               scalaSettings
+     def buildSettings =
+       Project.defaultSettings ++
+       scalaSettings
 
-             lazy val root = {
-               val settings = buildSettings ++ Seq(name := "JavaSample")
-               Project(id = "JavaSample", base = file("."), settings = settings)
-             }
-           }
+     lazy val root = {
+       val settings = buildSettings ++ Seq(name := "JavaSample")
+       Project(id = "JavaSample", base = file("."), settings = settings)
+     }
+   }
 
 Now edit :file:`project/plugins.sbt` and paste the following.
 This will enable auto-generation of an Eclipse project:
 
-        .. code-block:: scala
+.. code-block:: scala
 
-           addSbtPlugin("com.typesafe.sbteclipse" % "sbteclipse-plugin" % "2.1.0")
+   addSbtPlugin("com.typesafe.sbteclipse" % "sbteclipse-plugin" % "2.1.0")
 
 Now run ``sbt`` from the :file:`JavaSample` root and from within SBT run ``eclipse`` to generate an eclipse project:
 
-        .. code-block:: bash
+.. code-block:: bash
 
-           sbt # Starts the sbt console
-           > eclipse # Running "eclipse" from within the sbt console
+   sbt # Starts the sbt console
+   > eclipse # Running "eclipse" from within the sbt console
 
 You should see something like this:
 
-     .. image:: images/sbt_eclipse.png
-        :alt: SBT output
-        :align: center
+.. image:: images/sbt_eclipse.png
+   :alt: SBT output
+   :align: center
 
 You can now import the SBT project to Eclipse using :guilabel:`Import ... -> Existing projects into workspace`.
 Whether you actually do this is optional for the guide;
@@ -389,28 +404,28 @@ we'll be using SBT to build the project, so if you choose to use Eclipse it will
 To test that everything is working, create a simple "Hello OpenCV" application.
 Do this by creating a file :file:`src/main/java/HelloOpenCV.java` with the following contents:
 
-        .. code-block:: java
+.. code-block:: java
 
-            public class HelloOpenCV {
-              public static void main(String[] args) {
-                System.out.println("Hello, OpenCV");
-             }
-           }
+    public class HelloOpenCV {
+      public static void main(String[] args) {
+        System.out.println("Hello, OpenCV");
+     }
+   }
 
 Now execute ``run`` from the sbt console, or more concisely, run ``sbt run`` from the command line:
 
-        .. code-block:: bash
+.. code-block:: bash
 
-           sbt run
+   sbt run
 
 You should see something like this:
 
-     .. image:: images/sbt_run.png
-        :alt: SBT run
-        :align: center
+.. image:: images/sbt_run.png
+   :alt: SBT run
+   :align: center
 
-Copy the OpenCV jar and write a simple application
-********************************************************
+Running SBT samples
+-------------------
 
 Now we'll create a simple face detection application using OpenCV.
 
@@ -418,27 +433,27 @@ First, create a :file:`lib/` folder and copy the OpenCV jar into it.
 By default, SBT adds jars in the lib folder to the Java library search path.
 You can optionally rerun ``sbt eclipse`` to update your Eclipse project.
 
-        .. code-block:: bash
+.. code-block:: bash
 
-           mkdir lib
-           cp <opencv_dir>/build/bin/opencv_<version>.jar lib/
-           sbt eclipse
+   mkdir lib
+   cp <opencv_dir>/build/bin/opencv_<version>.jar lib/
+   sbt eclipse
 
-Next, create the directory src/main/resources and download this Lena image into it:
+Next, create the directory :file:`src/main/resources` and download this Lena image into it:
 
-     .. image:: images/lena.png
-        :alt: Lena
-        :align: center
+.. image:: images/lena.png
+   :alt: Lena
+   :align: center
 
 Make sure it's called :file:`"lena.png"`.
 Items in the resources directory are available to the Java application at runtime.
 
-Next, copy :file:`lbpcascade_frontalface.xml` from :file:`opencv/data/` into the :file:`resources`
+Next, copy :file:`lbpcascade_frontalface.xml` from :file:`opencv/data/lbpcascades/` into the :file:`resources`
 directory:
 
-        .. code-block:: bash
+.. code-block:: bash
 
-           cp <opencv_dir>/data/lbpcascades/lbpcascade_frontalface.xml src/main/resources/
+   cp <opencv_dir>/data/lbpcascades/lbpcascade_frontalface.xml src/main/resources/
 
 Now modify src/main/java/HelloOpenCV.java so it contains the following Java code:
 
@@ -490,33 +505,33 @@ Now modify src/main/java/HelloOpenCV.java so it contains the following Java code
        System.out.println("Hello, OpenCV");
 
        // Load the native library.
-       System.loadLibrary("opencv_java244");
+       System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
        new DetectFaceDemo().run();
      }
    }
 
-Note the call to ``System.loadLibrary("opencv_java244")``.
+Note the call to ``System.loadLibrary(Core.NATIVE_LIBRARY_NAME)``.
 This command must be executed exactly once per Java process prior to using any native OpenCV methods.
 If you don't call it, you will get ``UnsatisfiedLink errors``.
 You will also get errors if you try to load OpenCV when it has already been loaded.
 
 Now run the face detection app using ``sbt run``:
 
-        .. code-block:: bash
+.. code-block:: bash
 
-           sbt run
+   sbt run
 
 You should see something like this:
 
-     .. image:: images/sbt_run_face.png
-        :alt: SBT run
-        :align: center
+.. image:: images/sbt_run_face.png
+   :alt: SBT run
+   :align: center
 
 It should also write the following image to :file:`faceDetection.png`:
 
-     .. image:: images/faceDetection.png
-        :alt: Detected face
-        :align: center
+.. image:: images/faceDetection.png
+   :alt: Detected face
+   :align: center
 
 You're done!
 Now you have a sample Java application working with OpenCV, so you can start the work on your own.
index 21cbc8b..b010900 100644 (file)
@@ -85,10 +85,10 @@ namespace
     };
     size_t colors_mum = sizeof(colors)/sizeof(colors[0]);
 
-#if (defined __cplusplus  && __cplusplus > 199711L) || defined _STLPORT_MAJOR
-#else
-template<class FwIt, class T> void iota(FwIt first, FwIt last, T value) { while(first != last) *first++ = value++; }
-#endif
+template<class FwIt, class T> inline void _iota(FwIt first, FwIt last, T value)
+{
+    while(first != last) *first++ = value++;
+}
 
 void computeNormals( const Octree& Octree, const vector<Point3f>& centers, vector<Point3f>& normals,
                     vector<uchar>& mask, float normalRadius, int minNeighbors = 20)
@@ -799,14 +799,14 @@ void cv::SpinImageModel::selectRandomSubset(float ratio)
     else if (setSize == vtxSize)
     {
         subset.resize(vtxSize);
-        iota(subset.begin(), subset.end(), 0);
+        _iota(subset.begin(), subset.end(), 0);
     }
     else
     {
         RNG& rnd = theRNG();
 
         vector<size_t> left(vtxSize);
-        iota(left.begin(), left.end(), (size_t)0);
+        _iota(left.begin(), left.end(), (size_t)0);
 
         subset.resize(setSize);
         for(size_t i = 0; i < setSize; ++i)
@@ -879,7 +879,7 @@ void cv::SpinImageModel::compute()
     {
         mesh.computeNormals(normalRadius, minNeighbors);
         subset.resize(mesh.vtx.size());
-        iota(subset.begin(), subset.end(), 0);
+        _iota(subset.begin(), subset.end(), 0);
     }
     else
         mesh.computeNormals(subset, normalRadius, minNeighbors);
index 9f18c7c..8ac7138 100644 (file)
@@ -4479,6 +4479,26 @@ public:
                   Ptr<Algorithm> (Algorithm::*getter)()=0,
                   void (Algorithm::*setter)(const Ptr<Algorithm>&)=0,
                   const string& help=string());
+    void addParam(Algorithm& algo, const char* name,
+                  float& value, bool readOnly=false,
+                  float (Algorithm::*getter)()=0,
+                  void (Algorithm::*setter)(float)=0,
+                  const string& help=string());
+    void addParam(Algorithm& algo, const char* name,
+                  unsigned int& value, bool readOnly=false,
+                  unsigned int (Algorithm::*getter)()=0,
+                  void (Algorithm::*setter)(unsigned int)=0,
+                  const string& help=string());
+    void addParam(Algorithm& algo, const char* name,
+                  uint64& value, bool readOnly=false,
+                  uint64 (Algorithm::*getter)()=0,
+                  void (Algorithm::*setter)(uint64)=0,
+                  const string& help=string());
+    void addParam(Algorithm& algo, const char* name,
+                  uchar& value, bool readOnly=false,
+                  uchar (Algorithm::*getter)()=0,
+                  void (Algorithm::*setter)(uchar)=0,
+                  const string& help=string());
     template<typename _Tp, typename _Base> void addParam(Algorithm& algo, const char* name,
                   Ptr<_Tp>& value, bool readOnly=false,
                   Ptr<_Tp> (Algorithm::*getter)()=0,
@@ -4498,7 +4518,7 @@ protected:
 
 struct CV_EXPORTS Param
 {
-    enum { INT=0, BOOLEAN=1, REAL=2, STRING=3, MAT=4, MAT_VECTOR=5, ALGORITHM=6, FLOAT=7, UNSIGNED_INT=8, UINT64=9, SHORT=10 };
+    enum { INT=0, BOOLEAN=1, REAL=2, STRING=3, MAT=4, MAT_VECTOR=5, ALGORITHM=6, FLOAT=7, UNSIGNED_INT=8, UINT64=9, SHORT=10, UCHAR=11 };
 
     Param();
     Param(int _type, bool _readonly, int _offset,
@@ -4601,6 +4621,13 @@ template<> struct ParamType<uint64>
     enum { type = Param::UINT64 };
 };
 
+template<> struct ParamType<uchar>
+{
+    typedef uchar const_param_type;
+    typedef uchar member_type;
+
+    enum { type = Param::UCHAR };
+};
 
 /*!
 "\nThe CommandLineParser class is designed for command line arguments parsing\n"
index 9c46d1c..f96f243 100644 (file)
@@ -324,6 +324,7 @@ void Algorithm::setAlgorithm(const char* parameter, const Ptr<Algorithm>& value)
 
 
 
+
 int Algorithm::getInt(const string& parameter) const
 {
     return get<int>(parameter);
@@ -431,6 +432,14 @@ void AlgorithmInfo::write(const Algorithm* algo, FileStorage& fs) const
             Ptr<Algorithm> nestedAlgo = algo->get<Algorithm>(pname);
             nestedAlgo->write(fs);
         }
+        else if( p.type == Param::FLOAT)
+            cv::write(fs, pname, algo->getDouble(pname));
+        else if( p.type == Param::UNSIGNED_INT)
+            cv::write(fs, pname, algo->getInt(pname));//TODO: implement cv::write(, , unsigned int)
+        else if( p.type == Param::UINT64)
+            cv::write(fs, pname, algo->getInt(pname));//TODO: implement cv::write(, , uint64)
+        else if( p.type == Param::UCHAR)
+            cv::write(fs, pname, algo->getInt(pname));
         else
         {
             string msg = format("unknown/unsupported type of '%s' parameter == %d", pname.c_str(), p.type);
@@ -490,6 +499,26 @@ void AlgorithmInfo::read(Algorithm* algo, const FileNode& fn) const
             nestedAlgo->read(n);
             info->set(algo, pname.c_str(), p.type, &nestedAlgo, true);
         }
+        else if( p.type == Param::FLOAT )
+        {
+            float val = (float)n;
+            info->set(algo, pname.c_str(), p.type, &val, true);
+        }
+        else if( p.type == Param::UNSIGNED_INT )
+        {
+            unsigned int val = (unsigned int)((int)n);//TODO: implement conversion (unsigned int)FileNode
+            info->set(algo, pname.c_str(), p.type, &val, true);
+        }
+        else if( p.type == Param::UINT64)
+        {
+            uint64 val = (uint64)((int)n);//TODO: implement conversion (uint64)FileNode
+            info->set(algo, pname.c_str(), p.type, &val, true);
+        }
+        else if( p.type == Param::UCHAR)
+        {
+            uchar val = (uchar)((int)n);
+            info->set(algo, pname.c_str(), p.type, &val, true);
+        }
         else
         {
             string msg = format("unknown/unsupported type of '%s' parameter == %d", pname.c_str(), p.type);
@@ -512,6 +541,10 @@ union GetSetParam
     Mat (Algorithm::*get_mat)() const;
     vector<Mat> (Algorithm::*get_mat_vector)() const;
     Ptr<Algorithm> (Algorithm::*get_algo)() const;
+    float (Algorithm::*get_float)() const;
+    unsigned int (Algorithm::*get_uint)() const;
+    uint64 (Algorithm::*get_uint64)() const;
+    uchar (Algorithm::*get_uchar)() const;
 
     void (Algorithm::*set_int)(int);
     void (Algorithm::*set_bool)(bool);
@@ -520,6 +553,10 @@ union GetSetParam
     void (Algorithm::*set_mat)(const Mat&);
     void (Algorithm::*set_mat_vector)(const vector<Mat>&);
     void (Algorithm::*set_algo)(const Ptr<Algorithm>&);
+    void (Algorithm::*set_float)(float);
+    void (Algorithm::*set_uint)(unsigned int);
+    void (Algorithm::*set_uint64)(uint64);
+    void (Algorithm::*set_uchar)(uchar);
 };
 
 static string getNameOfType(int argType);
@@ -536,6 +573,10 @@ static string getNameOfType(int argType)
         case Param::MAT: return "cv::Mat";
         case Param::MAT_VECTOR: return "std::vector<cv::Mat>";
         case Param::ALGORITHM: return "algorithm";
+        case Param::FLOAT: return "float";
+        case Param::UNSIGNED_INT: return "unsigned int";
+        case Param::UINT64: return "unsigned int64";
+        case Param::UCHAR: return "unsigned char";
         default: CV_Error(CV_StsBadArg, "Wrong argument type");
     }
     return "";
@@ -547,9 +588,10 @@ static string getErrorMessageForWrongArgumentInSetter(string algoName, string pa
         + " method was called for the parameter '" + paramName + "' of the algorithm '" + algoName
         +"', the parameter has " + getNameOfType(paramType) + " type, ";
 
-    if (paramType == Param::INT || paramType == Param::BOOLEAN || paramType == Param::REAL)
+    if (paramType == Param::INT || paramType == Param::BOOLEAN || paramType == Param::REAL
+            || paramType == Param::FLOAT || paramType == Param::UNSIGNED_INT || paramType == Param::UINT64 || paramType == Param::UCHAR)
     {
-        message += "so it should be set by integer, boolean, or double value, ";
+        message += "so it should be set by integer, unsigned integer, uint64, unsigned char, boolean, float or double value, ";
     }
     else if (paramType == Param::SHORT)
     {
@@ -569,16 +611,20 @@ static string getErrorMessageForWrongArgumentInGetter(string algoName, string pa
 
     if (paramType == Param::BOOLEAN)
     {
-        message += "so it should be get as integer, boolean, or double value, ";
+        message += "so it should be get as integer, unsigned integer, uint64, boolean, unsigned char, float or double value, ";
     }
-    else if (paramType == Param::INT)
+    else if (paramType == Param::INT || paramType == Param::UNSIGNED_INT || paramType == Param::UINT64 || paramType == Param::UCHAR)
     {
-        message += "so it should be get as integer or double value, ";
+        message += "so it should be get as integer, unsigned integer, uint64, unsigned char, float or double value, ";
     }
     else if (paramType == Param::SHORT)
     {
         message += "so it should be get as integer value, ";
     }
+    else if (paramType == Param::FLOAT || paramType == Param::REAL)
+    {
+        message += "so it should be get as float or double value, ";
+    }
     message += "but the getter was called to get a " + getNameOfType(argType) + " value";
 
     return message;
@@ -597,9 +643,12 @@ void AlgorithmInfo::set(Algorithm* algo, const char* parameter, int argType, con
     GetSetParam f;
     f.set_int = p->setter;
 
-    if( argType == Param::INT || argType == Param::BOOLEAN || argType == Param::REAL || argType == Param::SHORT )
+    if( argType == Param::INT || argType == Param::BOOLEAN || argType == Param::REAL || argType == Param::SHORT
+            || argType == Param::FLOAT || argType == Param::UNSIGNED_INT || argType == Param::UINT64 || argType == Param::UCHAR)
     {
-        if ( !( p->type == Param::INT || p->type == Param::REAL || p->type == Param::BOOLEAN || (p->type == Param::SHORT && argType == Param::INT)) )
+        if ( !( p->type == Param::INT || p->type == Param::REAL || p->type == Param::BOOLEAN
+                || p->type == Param::UNSIGNED_INT || p->type == Param::UINT64 || p->type == Param::FLOAT || argType == Param::UCHAR
+                || (p->type == Param::SHORT && argType == Param::INT)) )
         {
             string message = getErrorMessageForWrongArgumentInSetter(algo->name(), parameter, p->type, argType);
             CV_Error(CV_StsBadArg, message);
@@ -607,9 +656,21 @@ void AlgorithmInfo::set(Algorithm* algo, const char* parameter, int argType, con
 
         if( p->type == Param::INT )
         {
+            bool is_ok = true;
             int val = argType == Param::INT ? *(const int*)value :
-            argType == Param::BOOLEAN ? (int)*(const bool*)value :
-            saturate_cast<int>(*(const double*)value);
+                argType == Param::BOOLEAN ? (int)*(const bool*)value :
+                argType == Param::REAL ? saturate_cast<int>(*(const double*)value) :
+                argType == Param::FLOAT ?  saturate_cast<int>(*(const float*)value) :
+                argType == Param::UNSIGNED_INT ? (int)*(const unsigned int*)value :
+                argType == Param::UINT64 ? (int)*(const uint64*)value :
+                argType == Param::UCHAR ? (int)*(const uchar*)value :
+                (int)(is_ok = false);
+
+            if (!is_ok)
+            {
+                CV_Error(CV_StsBadArg, "Wrong argument type in the setter");
+            }
+
             if( p->setter )
                 (algo->*f.set_int)(val);
             else
@@ -617,6 +678,7 @@ void AlgorithmInfo::set(Algorithm* algo, const char* parameter, int argType, con
         }
         else if( p->type == Param::SHORT )
         {
+            CV_DbgAssert(argType == Param::INT);
             int val = *(const int*)value;
             if( p->setter )
                 (algo->*f.set_int)(val);
@@ -625,24 +687,133 @@ void AlgorithmInfo::set(Algorithm* algo, const char* parameter, int argType, con
         }
         else if( p->type == Param::BOOLEAN )
         {
+            bool is_ok = true;
             bool val = argType == Param::INT ? *(const int*)value != 0 :
                     argType == Param::BOOLEAN ? *(const bool*)value :
-                    *(const double*)value != 0;
+                    argType == Param::REAL ? (*(const double*)value != 0) :
+                    argType == Param::FLOAT ?  (*(const float*)value != 0) :
+                    argType == Param::UNSIGNED_INT ? (*(const unsigned int*)value != 0):
+                    argType == Param::UINT64 ? (*(const uint64*)value != 0):
+                    argType == Param::UCHAR ? (*(const uchar*)value != 0):
+                    (int)(is_ok = false);
+
+            if (!is_ok)
+            {
+                CV_Error(CV_StsBadArg, "Wrong argument type in the setter");
+            }
+
             if( p->setter )
                 (algo->*f.set_bool)(val);
             else
                 *(bool*)((uchar*)algo + p->offset) = val;
         }
-        else
+        else if( p->type == Param::REAL )
         {
+            bool is_ok = true;
             double val = argType == Param::INT ? (double)*(const int*)value :
                          argType == Param::BOOLEAN ? (double)*(const bool*)value :
-                        *(const double*)value;
+                         argType == Param::REAL ? (double)(*(const double*)value ) :
+                         argType == Param::FLOAT ?  (double)(*(const float*)value ) :
+                         argType == Param::UNSIGNED_INT ? (double)(*(const unsigned int*)value ) :
+                         argType == Param::UINT64 ? (double)(*(const uint64*)value ) :
+                         argType == Param::UCHAR ? (double)(*(const uchar*)value ) :
+                         (double)(is_ok = false);
+
+            if (!is_ok)
+            {
+                CV_Error(CV_StsBadArg, "Wrong argument type in the setter");
+            }
             if( p->setter )
                 (algo->*f.set_double)(val);
             else
                 *(double*)((uchar*)algo + p->offset) = val;
         }
+        else if( p->type == Param::FLOAT )
+        {
+            bool is_ok = true;
+            double val = argType == Param::INT ? (double)*(const int*)value :
+                         argType == Param::BOOLEAN ? (double)*(const bool*)value :
+                         argType == Param::REAL ? (double)(*(const double*)value ) :
+                         argType == Param::FLOAT ?  (double)(*(const float*)value ) :
+                         argType == Param::UNSIGNED_INT ? (double)(*(const unsigned int*)value ) :
+                         argType == Param::UINT64 ? (double)(*(const uint64*)value ) :
+                         argType == Param::UCHAR ? (double)(*(const uchar*)value ) :
+                         (double)(is_ok = false);
+
+            if (!is_ok)
+            {
+                CV_Error(CV_StsBadArg, "Wrong argument type in the setter");
+            }
+            if( p->setter )
+                (algo->*f.set_float)((float)val);
+            else
+                *(float*)((uchar*)algo + p->offset) = (float)val;
+        }
+        else if( p->type == Param::UNSIGNED_INT )
+        {
+            bool is_ok = true;
+            unsigned int val = argType == Param::INT ? (unsigned int)*(const int*)value :
+                         argType == Param::BOOLEAN ? (unsigned int)*(const bool*)value :
+                         argType == Param::REAL ? saturate_cast<unsigned int>(*(const double*)value ) :
+                         argType == Param::FLOAT ?  saturate_cast<unsigned int>(*(const float*)value ) :
+                         argType == Param::UNSIGNED_INT ? (unsigned int)(*(const unsigned int*)value ) :
+                         argType == Param::UINT64 ? (unsigned int)(*(const uint64*)value ) :
+                         argType == Param::UCHAR ? (unsigned int)(*(const uchar*)value ) :
+                         (int)(is_ok = false);
+
+            if (!is_ok)
+            {
+                CV_Error(CV_StsBadArg, "Wrong argument type in the setter");
+            }
+            if( p->setter )
+                (algo->*f.set_uint)(val);
+            else
+                *(unsigned int*)((uchar*)algo + p->offset) = val;
+        }
+        else if( p->type == Param::UINT64 )
+        {
+            bool is_ok = true;
+            uint64 val = argType == Param::INT ? (uint64)*(const int*)value :
+                         argType == Param::BOOLEAN ? (uint64)*(const bool*)value :
+                         argType == Param::REAL ? saturate_cast<uint64>(*(const double*)value ) :
+                         argType == Param::FLOAT ?  saturate_cast<uint64>(*(const float*)value ) :
+                         argType == Param::UNSIGNED_INT ? (uint64)(*(const unsigned int*)value ) :
+                         argType == Param::UINT64 ? (uint64)(*(const uint64*)value ) :
+                         argType == Param::UCHAR ? (uint64)(*(const uchar*)value ) :
+                         (int)(is_ok = false);
+
+            if (!is_ok)
+            {
+                CV_Error(CV_StsBadArg, "Wrong argument type in the setter");
+            }
+            if( p->setter )
+                (algo->*f.set_uint64)(val);
+            else
+                *(uint64*)((uchar*)algo + p->offset) = val;
+        }
+        else if( p->type == Param::UCHAR )
+        {
+            bool is_ok = true;
+            uchar val = argType == Param::INT ? (uchar)*(const int*)value :
+                         argType == Param::BOOLEAN ? (uchar)*(const bool*)value :
+                         argType == Param::REAL ? saturate_cast<uchar>(*(const double*)value ) :
+                         argType == Param::FLOAT ?  saturate_cast<uchar>(*(const float*)value ) :
+                         argType == Param::UNSIGNED_INT ? (uchar)(*(const unsigned int*)value ) :
+                         argType == Param::UINT64 ? (uchar)(*(const uint64*)value ) :
+                         argType == Param::UCHAR ? (uchar)(*(const uchar*)value ) :
+                         (int)(is_ok = false);
+
+            if (!is_ok)
+            {
+                CV_Error(CV_StsBadArg, "Wrong argument type in the setter");
+            }
+            if( p->setter )
+                (algo->*f.set_uchar)(val);
+            else
+                *(uchar*)((uchar*)algo + p->offset) = val;
+        }
+        else
+            CV_Error(CV_StsBadArg, "Wrong parameter type in the setter");
     }
     else if( argType == Param::STRING )
     {
@@ -713,11 +884,12 @@ void AlgorithmInfo::get(const Algorithm* algo, const char* parameter, int argTyp
     GetSetParam f;
     f.get_int = p->getter;
 
-    if( argType == Param::INT || argType == Param::BOOLEAN || argType == Param::REAL )
+    if( argType == Param::INT || argType == Param::BOOLEAN || argType == Param::REAL || argType == Param::SHORT
+            || argType == Param::FLOAT || argType == Param::UNSIGNED_INT || argType == Param::UINT64 || argType == Param::UCHAR)
     {
         if( p->type == Param::INT )
         {
-            if (!( argType == Param::INT || argType == Param::REAL ))
+            if (!( argType == Param::INT || argType == Param::REAL || argType == Param::FLOAT || argType == Param::UNSIGNED_INT || argType == Param::UINT64 || argType == Param::UCHAR))
             {
                 string message = getErrorMessageForWrongArgumentInGetter(algo->name(), parameter, p->type, argType);
                 CV_Error(CV_StsBadArg, message);
@@ -725,9 +897,20 @@ void AlgorithmInfo::get(const Algorithm* algo, const char* parameter, int argTyp
             int val = p->getter ? (algo->*f.get_int)() : *(int*)((uchar*)algo + p->offset);
 
             if( argType == Param::INT )
-                *(int*)value = val;
+                *(int*)value = (int)val;
+            else if ( argType == Param::REAL )
+                *(double*)value = (double)val;
+            else if ( argType == Param::FLOAT)
+                *(float*)value = (float)val;
+            else if ( argType == Param::UNSIGNED_INT )
+                *(unsigned int*)value = (unsigned int)val;
+            else if ( argType == Param::UINT64 )
+                *(uint64*)value = (uint64)val;
+            else if ( argType == Param::UCHAR)
+                *(uchar*)value = (uchar)val;
             else
-                *(double*)value = val;
+                CV_Error(CV_StsBadArg, "Wrong argument type");
+
         }
         else if( p->type == Param::SHORT )
         {
@@ -742,7 +925,7 @@ void AlgorithmInfo::get(const Algorithm* algo, const char* parameter, int argTyp
         }
         else if( p->type == Param::BOOLEAN )
         {
-            if (!( argType == Param::INT || argType == Param::BOOLEAN || argType == Param::REAL ))
+            if (!( argType == Param::INT || argType == Param::BOOLEAN || argType == Param::REAL || argType == Param::FLOAT || argType == Param::UNSIGNED_INT || argType == Param::UINT64 || argType == Param::UCHAR))
             {
                 string message = getErrorMessageForWrongArgumentInGetter(algo->name(), parameter, p->type, argType);
                 CV_Error(CV_StsBadArg, message);
@@ -753,20 +936,126 @@ void AlgorithmInfo::get(const Algorithm* algo, const char* parameter, int argTyp
                 *(int*)value = (int)val;
             else if( argType == Param::BOOLEAN )
                 *(bool*)value = val;
-            else
+            else if ( argType == Param::REAL )
                 *(double*)value = (int)val;
+            else if ( argType == Param::FLOAT)
+                *(float*)value = (float)((int)val);
+            else if ( argType == Param::UNSIGNED_INT )
+                *(unsigned int*)value = (unsigned int)val;
+            else if ( argType == Param::UINT64 )
+                *(uint64*)value = (int)val;
+            else if ( argType == Param::UCHAR)
+                *(uchar*)value = (uchar)val;
+            else
+                CV_Error(CV_StsBadArg, "Wrong argument type");
         }
-        else
+        else if( p->type == Param::REAL )
         {
-            if( argType != Param::REAL )
+            if(!( argType == Param::REAL || argType == Param::FLOAT))
             {
                 string message = getErrorMessageForWrongArgumentInGetter(algo->name(), parameter, p->type, argType);
                 CV_Error(CV_StsBadArg, message);
             }
             double val = p->getter ? (algo->*f.get_double)() : *(double*)((uchar*)algo + p->offset);
 
-            *(double*)value = val;
+            if ( argType == Param::REAL )
+                *(double*)value = val;
+            else if ( argType == Param::FLOAT)
+                *(float*)value = (float)val;
+            else
+                CV_Error(CV_StsBadArg, "Wrong argument type");
         }
+        else if( p->type == Param::FLOAT )
+        {
+            if(!( argType == Param::REAL || argType == Param::FLOAT))
+            {
+                string message = getErrorMessageForWrongArgumentInGetter(algo->name(), parameter, p->type, argType);
+                CV_Error(CV_StsBadArg, message);
+            }
+            float val = p->getter ? (algo->*f.get_float)() : *(float*)((uchar*)algo + p->offset);
+
+            if ( argType == Param::REAL )
+                *(double*)value = (double)val;
+            else if ( argType == Param::FLOAT)
+                *(float*)value = (float)val;
+            else
+                CV_Error(CV_StsBadArg, "Wrong argument type");
+        }
+        else if( p->type == Param::UNSIGNED_INT )
+        {
+            if (!( argType == Param::INT || argType == Param::REAL || argType == Param::FLOAT || argType == Param::UNSIGNED_INT || argType == Param::UINT64 || argType == Param::UCHAR))
+            {
+                string message = getErrorMessageForWrongArgumentInGetter(algo->name(), parameter, p->type, argType);
+                CV_Error(CV_StsBadArg, message);
+            }
+            unsigned int val = p->getter ? (algo->*f.get_uint)() : *(unsigned int*)((uchar*)algo + p->offset);
+
+            if( argType == Param::INT )
+                *(int*)value = (int)val;
+            else if ( argType == Param::REAL )
+                *(double*)value = (double)val;
+            else if ( argType == Param::FLOAT)
+                *(float*)value = (float)val;
+            else if ( argType == Param::UNSIGNED_INT )
+                *(unsigned int*)value = (unsigned int)val;
+            else if ( argType == Param::UINT64 )
+                *(uint64*)value = (uint64)val;
+            else if ( argType == Param::UCHAR)
+                *(uchar*)value = (uchar)val;
+            else
+                CV_Error(CV_StsBadArg, "Wrong argument type");
+        }
+        else if( p->type == Param::UINT64 )
+        {
+            if (!( argType == Param::INT || argType == Param::REAL || argType == Param::FLOAT || argType == Param::UNSIGNED_INT || argType == Param::UINT64 || argType == Param::UCHAR))
+            {
+                string message = getErrorMessageForWrongArgumentInGetter(algo->name(), parameter, p->type, argType);
+                CV_Error(CV_StsBadArg, message);
+            }
+            uint64 val = p->getter ? (algo->*f.get_uint64)() : *(uint64*)((uchar*)algo + p->offset);
+
+            if( argType == Param::INT )
+                *(int*)value = (int)val;
+            else if ( argType == Param::REAL )
+                *(double*)value = (double)val;
+            else if ( argType == Param::FLOAT)
+                *(float*)value = (float)val;
+            else if ( argType == Param::UNSIGNED_INT )
+                *(unsigned int*)value = (unsigned int)val;
+            else if ( argType == Param::UINT64 )
+                *(uint64*)value = (uint64)val;
+            else if ( argType == Param::UCHAR)
+                *(uchar*)value = (uchar)val;
+            else
+                CV_Error(CV_StsBadArg, "Wrong argument type");
+        }
+        else if( p->type == Param::UCHAR )
+        {
+            if (!( argType == Param::INT || argType == Param::REAL || argType == Param::FLOAT || argType == Param::UNSIGNED_INT || argType == Param::UINT64 || argType == Param::UCHAR))
+            {
+                string message = getErrorMessageForWrongArgumentInGetter(algo->name(), parameter, p->type, argType);
+                CV_Error(CV_StsBadArg, message);
+            }
+            uchar val = p->getter ? (algo->*f.get_uchar)() : *(uchar*)((uchar*)algo + p->offset);
+
+            if( argType == Param::INT )
+                *(int*)value = val;
+            else if ( argType == Param::REAL )
+                *(double*)value = val;
+            else if ( argType == Param::FLOAT)
+                *(float*)value = val;
+            else if ( argType == Param::UNSIGNED_INT )
+                *(unsigned int*)value = val;
+            else if ( argType == Param::UINT64 )
+                *(uint64*)value = val;
+            else if ( argType == Param::UCHAR)
+                *(uchar*)value = val;
+            else
+                CV_Error(CV_StsBadArg, "Wrong argument type");
+
+        }
+        else
+            CV_Error(CV_StsBadArg, "Unknown/unsupported parameter type");
     }
     else if( argType == Param::STRING )
     {
@@ -852,7 +1141,9 @@ void AlgorithmInfo::addParam_(Algorithm& algo, const char* parameter, int argTyp
     CV_Assert( argType == Param::INT || argType == Param::BOOLEAN ||
                argType == Param::REAL || argType == Param::STRING ||
                argType == Param::MAT || argType == Param::MAT_VECTOR ||
-               argType == Param::ALGORITHM || argType == Param::SHORT );
+               argType == Param::ALGORITHM || argType == Param::SHORT
+               || argType == Param::FLOAT || argType == Param::UNSIGNED_INT || argType == Param::UINT64
+               || argType == Param::UCHAR);
     data->params.add(string(parameter), Param(argType, readOnly,
                      (int)((size_t)value - (size_t)(void*)&algo),
                      getter, setter, help));
@@ -939,6 +1230,46 @@ void AlgorithmInfo::addParam(Algorithm& algo, const char* parameter,
               (Algorithm::Getter)getter, (Algorithm::Setter)setter, help);
 }
 
+void AlgorithmInfo::addParam(Algorithm& algo, const char* parameter,
+                             float& value, bool readOnly,
+                             float (Algorithm::*getter)(),
+                             void (Algorithm::*setter)(float),
+                             const string& help)
+{
+    addParam_(algo, parameter, ParamType<float>::type, &value, readOnly,
+              (Algorithm::Getter)getter, (Algorithm::Setter)setter, help);
+}
+
+void AlgorithmInfo::addParam(Algorithm& algo, const char* parameter,
+                             unsigned int& value, bool readOnly,
+                             unsigned int (Algorithm::*getter)(),
+                             void (Algorithm::*setter)(unsigned int),
+                             const string& help)
+{
+    addParam_(algo, parameter, ParamType<unsigned int>::type, &value, readOnly,
+              (Algorithm::Getter)getter, (Algorithm::Setter)setter, help);
+}
+
+void AlgorithmInfo::addParam(Algorithm& algo, const char* parameter,
+                             uint64& value, bool readOnly,
+                             uint64 (Algorithm::*getter)(),
+                             void (Algorithm::*setter)(uint64),
+                             const string& help)
+{
+    addParam_(algo, parameter, ParamType<uint64>::type, &value, readOnly,
+              (Algorithm::Getter)getter, (Algorithm::Setter)setter, help);
+}
+
+void AlgorithmInfo::addParam(Algorithm& algo, const char* parameter,
+                             uchar& value, bool readOnly,
+                             uchar (Algorithm::*getter)(),
+                             void (Algorithm::*setter)(uchar),
+                             const string& help)
+{
+    addParam_(algo, parameter, ParamType<uchar>::type, &value, readOnly,
+              (Algorithm::Getter)getter, (Algorithm::Setter)setter, help);
+}
+
 }
 
 /* End of file. */
index 019d361..62206fa 100644 (file)
 
 #include "precomp.hpp"
 
+#if defined _M_IX86 && defined _MSC_VER && _MSC_VER < 1700
+#pragma float_control(precise, on)
+#endif
+
 namespace cv
 {
 
@@ -1095,6 +1099,7 @@ double cv::invert( InputArray _src, OutputArray _dst, int method )
             if( type == CV_32FC1 )
             {
                 double d = det3(Sf);
+
                 if( d != 0. )
                 {
                     double t[12];
index 0fc0edb..16025fa 100644 (file)
@@ -419,7 +419,9 @@ static void fixCCS( Mat& mat, int cols, int flags )
     }
 }
 
-
+#if defined _MSC_VER &&  _MSC_VER >= 1700
+#pragma optimize("", off)
+#endif
 static void mulComplex( const Mat& src1, const Mat& src2, Mat& dst, int flags )
 {
     dst.create(src1.rows, src1.cols, src1.type());
@@ -439,8 +441,8 @@ static void mulComplex( const Mat& src1, const Mat& src2, Mat& dst, int flags )
             if( !(flags & CV_DXT_MUL_CONJ) )
                 for( j = 0; j < cols; j += 2 )
                 {
-                    double re = (double)a[j]*b[j] - (double)a[j+1]*b[j+1];
-                    double im = (double)a[j+1]*b[j] + (double)a[j]*b[j+1];
+                    double re = (double)a[j]*(double)b[j] - (double)a[j+1]*(double)b[j+1];
+                    double im = (double)a[j+1]*(double)b[j] + (double)a[j]*(double)b[j+1];
 
                     c[j] = (float)re;
                     c[j+1] = (float)im;
@@ -448,8 +450,8 @@ static void mulComplex( const Mat& src1, const Mat& src2, Mat& dst, int flags )
             else
                 for( j = 0; j < cols; j += 2 )
                 {
-                    double re = (double)a[j]*b[j] + (double)a[j+1]*b[j+1];
-                    double im = (double)a[j+1]*b[j] - (double)a[j]*b[j+1];
+                    double re = (double)a[j]*(double)b[j] + (double)a[j+1]*(double)b[j+1];
+                    double im = (double)a[j+1]*(double)b[j] - (double)a[j]*(double)b[j+1];
 
                     c[j] = (float)re;
                     c[j+1] = (float)im;
@@ -482,6 +484,9 @@ static void mulComplex( const Mat& src1, const Mat& src2, Mat& dst, int flags )
         }
     }
 }
+#if defined _MSC_VER &&  _MSC_VER >= 1700
+#pragma optimize("", on)
+#endif
 
 }
 
index e135087..8d644a3 100644 (file)
@@ -658,6 +658,7 @@ protected:
   virtual void findBlobs(const Mat &image, const Mat &binaryImage, vector<Center> &centers) const;
 
   Params params;
+  AlgorithmInfo* info() const;
 };
 
 
index 1e1b0ca..ebac9cb 100644 (file)
@@ -130,6 +130,26 @@ CV_INIT_ALGORITHM(GFTTDetector, "Feature2D.GFTT",
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////////////
 
+CV_INIT_ALGORITHM(SimpleBlobDetector, "Feature2D.SimpleBlob",
+                  obj.info()->addParam(obj, "thresholdStep",    obj.params.thresholdStep);
+                  obj.info()->addParam(obj, "minThreshold",     obj.params.minThreshold);
+                  obj.info()->addParam(obj, "maxThreshold",     obj.params.maxThreshold);
+                  obj.info()->addParam_(obj, "minRepeatability", (sizeof(size_t) == sizeof(uint64))?Param::UINT64 : Param::UNSIGNED_INT, &obj.params.minRepeatability, false, 0, 0);
+                  obj.info()->addParam(obj, "minDistBetweenBlobs", obj.params.minDistBetweenBlobs);
+                  obj.info()->addParam(obj, "filterByColor",    obj.params.filterByColor);
+                  obj.info()->addParam(obj, "blobColor",        obj.params.blobColor);
+                  obj.info()->addParam(obj, "filterByArea",     obj.params.filterByArea);
+                  obj.info()->addParam(obj, "maxArea",          obj.params.maxArea);
+                  obj.info()->addParam(obj, "filterByCircularity", obj.params.filterByCircularity);
+                  obj.info()->addParam(obj, "maxCircularity",   obj.params.maxCircularity);
+                  obj.info()->addParam(obj, "filterByInertia",  obj.params.filterByInertia);
+                  obj.info()->addParam(obj, "maxInertiaRatio",  obj.params.maxInertiaRatio);
+                  obj.info()->addParam(obj, "filterByConvexity", obj.params.filterByConvexity);
+                  obj.info()->addParam(obj, "maxConvexity",     obj.params.maxConvexity);
+                  );
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////
+
 class CV_EXPORTS HarrisDetector : public GFTTDetector
 {
 public:
index a9481b2..802954c 100644 (file)
@@ -627,6 +627,26 @@ CV_EXPORTS void reprojectImageTo3D(const GpuMat& disp, GpuMat& xyzw, const Mat&
 //! converts image from one color space to another
 CV_EXPORTS void cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn = 0, Stream& stream = Stream::Null());
 
+enum
+{
+    // Bayer Demosaicing (Malvar, He, and Cutler)
+    COLOR_BayerBG2BGR_MHT = 256,
+    COLOR_BayerGB2BGR_MHT = 257,
+    COLOR_BayerRG2BGR_MHT = 258,
+    COLOR_BayerGR2BGR_MHT = 259,
+
+    COLOR_BayerBG2RGB_MHT = COLOR_BayerRG2BGR_MHT,
+    COLOR_BayerGB2RGB_MHT = COLOR_BayerGR2BGR_MHT,
+    COLOR_BayerRG2RGB_MHT = COLOR_BayerBG2BGR_MHT,
+    COLOR_BayerGR2RGB_MHT = COLOR_BayerGB2BGR_MHT,
+
+    COLOR_BayerBG2GRAY_MHT = 260,
+    COLOR_BayerGB2GRAY_MHT = 261,
+    COLOR_BayerRG2GRAY_MHT = 262,
+    COLOR_BayerGR2GRAY_MHT = 263
+};
+CV_EXPORTS void demosaicing(const GpuMat& src, GpuMat& dst, int code, int dcn = -1, Stream& stream = Stream::Null());
+
 //! swap channels
 //! dstOrder - Integer array describing how channel values are permutated. The n-th entry
 //!            of the array contains the number of the channel that is stored in the n-th channel of
index 906024f..b174d9a 100644 (file)
@@ -3,15 +3,14 @@
 using namespace std;
 using namespace testing;
 
-namespace {
-
 //////////////////////////////////////////////////////////////////////
 // StereoBM
 
 typedef std::tr1::tuple<string, string> pair_string;
 DEF_PARAM_TEST_1(ImagePair, pair_string);
 
-PERF_TEST_P(ImagePair, Calib3D_StereoBM, Values(pair_string("gpu/perf/aloe.png", "gpu/perf/aloeR.png")))
+PERF_TEST_P(ImagePair, Calib3D_StereoBM,
+            Values(pair_string("gpu/perf/aloe.png", "gpu/perf/aloeR.png")))
 {
     declare.time(5.0);
 
@@ -28,18 +27,13 @@ PERF_TEST_P(ImagePair, Calib3D_StereoBM, Values(pair_string("gpu/perf/aloe.png",
     {
         cv::gpu::StereoBM_GPU d_bm(preset, ndisp);
 
-        cv::gpu::GpuMat d_imgLeft(imgLeft);
-        cv::gpu::GpuMat d_imgRight(imgRight);
-        cv::gpu::GpuMat d_dst;
-
-        d_bm(d_imgLeft, d_imgRight, d_dst);
+        const cv::gpu::GpuMat d_imgLeft(imgLeft);
+        const cv::gpu::GpuMat d_imgRight(imgRight);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE()
-        {
-            d_bm(d_imgLeft, d_imgRight, d_dst);
-        }
+        TEST_CYCLE() d_bm(d_imgLeft, d_imgRight, dst);
 
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
@@ -47,12 +41,7 @@ PERF_TEST_P(ImagePair, Calib3D_StereoBM, Values(pair_string("gpu/perf/aloe.png",
 
         cv::Mat dst;
 
-        bm(imgLeft, imgRight, dst);
-
-        TEST_CYCLE()
-        {
-            bm(imgLeft, imgRight, dst);
-        }
+        TEST_CYCLE() bm(imgLeft, imgRight, dst);
 
         CPU_SANITY_CHECK(dst);
     }
@@ -61,7 +50,8 @@ PERF_TEST_P(ImagePair, Calib3D_StereoBM, Values(pair_string("gpu/perf/aloe.png",
 //////////////////////////////////////////////////////////////////////
 // StereoBeliefPropagation
 
-PERF_TEST_P(ImagePair, Calib3D_StereoBeliefPropagation, Values(pair_string("gpu/stereobp/aloe-L.png", "gpu/stereobp/aloe-R.png")))
+PERF_TEST_P(ImagePair, Calib3D_StereoBeliefPropagation,
+            Values(pair_string("gpu/stereobp/aloe-L.png", "gpu/stereobp/aloe-R.png")))
 {
     declare.time(10.0);
 
@@ -77,29 +67,25 @@ PERF_TEST_P(ImagePair, Calib3D_StereoBeliefPropagation, Values(pair_string("gpu/
     {
         cv::gpu::StereoBeliefPropagation d_bp(ndisp);
 
-        cv::gpu::GpuMat d_imgLeft(imgLeft);
-        cv::gpu::GpuMat d_imgRight(imgRight);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_imgLeft(imgLeft);
+        const cv::gpu::GpuMat d_imgRight(imgRight);
+        cv::gpu::GpuMat dst;
 
-        d_bp(d_imgLeft, d_imgRight, d_dst);
+        TEST_CYCLE() d_bp(d_imgLeft, d_imgRight, dst);
 
-        TEST_CYCLE()
-        {
-            d_bp(d_imgLeft, d_imgRight, d_dst);
-        }
-
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
-        FAIL() << "No such CPU implementation analogy.";
+        FAIL_NO_CPU();
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // StereoConstantSpaceBP
 
-PERF_TEST_P(ImagePair, Calib3D_StereoConstantSpaceBP, Values(pair_string("gpu/stereobm/aloe-L.png", "gpu/stereobm/aloe-R.png")))
+PERF_TEST_P(ImagePair, Calib3D_StereoConstantSpaceBP,
+            Values(pair_string("gpu/stereobm/aloe-L.png", "gpu/stereobm/aloe-R.png")))
 {
     declare.time(10.0);
 
@@ -115,29 +101,25 @@ PERF_TEST_P(ImagePair, Calib3D_StereoConstantSpaceBP, Values(pair_string("gpu/st
     {
         cv::gpu::StereoConstantSpaceBP d_csbp(ndisp);
 
-        cv::gpu::GpuMat d_imgLeft(imgLeft);
-        cv::gpu::GpuMat d_imgRight(imgRight);
-        cv::gpu::GpuMat d_dst;
-
-        d_csbp(d_imgLeft, d_imgRight, d_dst);
+        const cv::gpu::GpuMat d_imgLeft(imgLeft);
+        const cv::gpu::GpuMat d_imgRight(imgRight);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE()
-        {
-            d_csbp(d_imgLeft, d_imgRight, d_dst);
-        }
+        TEST_CYCLE() d_csbp(d_imgLeft, d_imgRight, dst);
 
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
-        FAIL() << "No such CPU implementation analogy.";
+        FAIL_NO_CPU();
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // DisparityBilateralFilter
 
-PERF_TEST_P(ImagePair, Calib3D_DisparityBilateralFilter, Values(pair_string("gpu/stereobm/aloe-L.png", "gpu/stereobm/aloe-disp.png")))
+PERF_TEST_P(ImagePair, Calib3D_DisparityBilateralFilter,
+            Values(pair_string("gpu/stereobm/aloe-L.png", "gpu/stereobm/aloe-disp.png")))
 {
     const cv::Mat img = readImage(GET_PARAM(0), cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(img.empty());
@@ -151,22 +133,17 @@ PERF_TEST_P(ImagePair, Calib3D_DisparityBilateralFilter, Values(pair_string("gpu
     {
         cv::gpu::DisparityBilateralFilter d_filter(ndisp);
 
-        cv::gpu::GpuMat d_img(img);
-        cv::gpu::GpuMat d_disp(disp);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_img(img);
+        const cv::gpu::GpuMat d_disp(disp);
+        cv::gpu::GpuMat dst;
 
-        d_filter(d_disp, d_img, d_dst);
+        TEST_CYCLE() d_filter(d_disp, d_img, dst);
 
-        TEST_CYCLE()
-        {
-            d_filter(d_disp, d_img, d_dst);
-        }
-
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
-        FAIL() << "No such CPU implementation analogy.";
+        FAIL_NO_CPU();
     }
 }
 
@@ -175,45 +152,42 @@ PERF_TEST_P(ImagePair, Calib3D_DisparityBilateralFilter, Values(pair_string("gpu
 
 DEF_PARAM_TEST_1(Count, int);
 
-PERF_TEST_P(Count, Calib3D_TransformPoints, Values(5000, 10000, 20000))
+PERF_TEST_P(Count, Calib3D_TransformPoints,
+            Values(5000, 10000, 20000))
 {
     const int count = GetParam();
 
     cv::Mat src(1, count, CV_32FC3);
-    fillRandom(src, -100, 100);
+    declare.in(src, WARMUP_RNG);
 
     const cv::Mat rvec = cv::Mat::ones(1, 3, CV_32FC1);
     const cv::Mat tvec = cv::Mat::ones(1, 3, CV_32FC1);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
-
-        cv::gpu::transformPoints(d_src, rvec, tvec, d_dst);
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE()
-        {
-            cv::gpu::transformPoints(d_src, rvec, tvec, d_dst);
-        }
+        TEST_CYCLE() cv::gpu::transformPoints(d_src, rvec, tvec, dst);
 
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
-        FAIL() << "No such CPU implementation analogy.";
+        FAIL_NO_CPU();
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // ProjectPoints
 
-PERF_TEST_P(Count, Calib3D_ProjectPoints, Values(5000, 10000, 20000))
+PERF_TEST_P(Count, Calib3D_ProjectPoints,
+            Values(5000, 10000, 20000))
 {
     const int count = GetParam();
 
     cv::Mat src(1, count, CV_32FC3);
-    fillRandom(src, -100, 100);
+    declare.in(src, WARMUP_RNG);
 
     const cv::Mat rvec = cv::Mat::ones(1, 3, CV_32FC1);
     const cv::Mat tvec = cv::Mat::ones(1, 3, CV_32FC1);
@@ -221,28 +195,18 @@ PERF_TEST_P(Count, Calib3D_ProjectPoints, Values(5000, 10000, 20000))
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        cv::gpu::projectPoints(d_src, rvec, tvec, camera_mat, cv::Mat(), d_dst);
+        TEST_CYCLE() cv::gpu::projectPoints(d_src, rvec, tvec, camera_mat, cv::Mat(), dst);
 
-        TEST_CYCLE()
-        {
-            cv::gpu::projectPoints(d_src, rvec, tvec, camera_mat, cv::Mat(), d_dst);
-        }
-
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
         cv::Mat dst;
 
-        cv::projectPoints(src, rvec, tvec, camera_mat, cv::noArray(), dst);
-
-        TEST_CYCLE()
-        {
-            cv::projectPoints(src, rvec, tvec, camera_mat, cv::noArray(), dst);
-        }
+        TEST_CYCLE() cv::projectPoints(src, rvec, tvec, camera_mat, cv::noArray(), dst);
 
         CPU_SANITY_CHECK(dst);
     }
@@ -251,17 +215,18 @@ PERF_TEST_P(Count, Calib3D_ProjectPoints, Values(5000, 10000, 20000))
 //////////////////////////////////////////////////////////////////////
 // SolvePnPRansac
 
-PERF_TEST_P(Count, Calib3D_SolvePnPRansac, Values(5000, 10000, 20000))
+PERF_TEST_P(Count, Calib3D_SolvePnPRansac,
+            Values(5000, 10000, 20000))
 {
     declare.time(10.0);
 
     const int count = GetParam();
 
     cv::Mat object(1, count, CV_32FC3);
-    fillRandom(object, -100, 100);
+    declare.in(object, WARMUP_RNG);
 
     cv::Mat camera_mat(3, 3, CV_32FC1);
-    fillRandom(camera_mat, 0.5, 1);
+    cv::randu(camera_mat, 0.5, 1);
     camera_mat.at<float>(0, 1) = 0.f;
     camera_mat.at<float>(1, 0) = 0.f;
     camera_mat.at<float>(2, 0) = 0.f;
@@ -269,79 +234,66 @@ PERF_TEST_P(Count, Calib3D_SolvePnPRansac, Values(5000, 10000, 20000))
 
     const cv::Mat dist_coef(1, 8, CV_32F, cv::Scalar::all(0));
 
-    std::vector<cv::Point2f> image_vec;
     cv::Mat rvec_gold(1, 3, CV_32FC1);
-    fillRandom(rvec_gold, 0, 1);
+    cv::randu(rvec_gold, 0, 1);
+
     cv::Mat tvec_gold(1, 3, CV_32FC1);
-    fillRandom(tvec_gold, 0, 1);
+    cv::randu(tvec_gold, 0, 1);
+
+    std::vector<cv::Point2f> image_vec;
     cv::projectPoints(object, rvec_gold, tvec_gold, camera_mat, dist_coef, image_vec);
 
-    cv::Mat image(1, count, CV_32FC2, &image_vec[0]);
+    const cv::Mat image(1, count, CV_32FC2, &image_vec[0]);
 
     cv::Mat rvec;
     cv::Mat tvec;
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
+        TEST_CYCLE() cv::gpu::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
 
-        TEST_CYCLE()
-        {
-            cv::gpu::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
-        }
+        GPU_SANITY_CHECK(rvec, 1e-3);
+        GPU_SANITY_CHECK(tvec, 1e-3);
     }
     else
     {
-        cv::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
+        TEST_CYCLE() cv::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
 
-        TEST_CYCLE()
-        {
-            cv::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
-        }
+        CPU_SANITY_CHECK(rvec, 1e-6);
+        CPU_SANITY_CHECK(tvec, 1e-6);
     }
-
-    CPU_SANITY_CHECK(rvec);
-    CPU_SANITY_CHECK(tvec);
 }
 
 //////////////////////////////////////////////////////////////////////
 // ReprojectImageTo3D
 
-PERF_TEST_P(Sz_Depth, Calib3D_ReprojectImageTo3D, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16S)))
+PERF_TEST_P(Sz_Depth, Calib3D_ReprojectImageTo3D,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16S)))
 {
     const cv::Size size = GET_PARAM(0);
     const int depth = GET_PARAM(1);
 
     cv::Mat src(size, depth);
-    fillRandom(src, 5.0, 30.0);
+    declare.in(src, WARMUP_RNG);
 
     cv::Mat Q(4, 4, CV_32FC1);
-    fillRandom(Q, 0.1, 1.0);
+    cv::randu(Q, 0.1, 1.0);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        cv::gpu::reprojectImageTo3D(d_src, d_dst, Q);
+        TEST_CYCLE() cv::gpu::reprojectImageTo3D(d_src, dst, Q);
 
-        TEST_CYCLE()
-        {
-            cv::gpu::reprojectImageTo3D(d_src, d_dst, Q);
-        }
-
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
         cv::Mat dst;
 
-        cv::reprojectImageTo3D(src, dst, Q);
-
-        TEST_CYCLE()
-        {
-            cv::reprojectImageTo3D(src, dst, Q);
-        }
+        TEST_CYCLE() cv::reprojectImageTo3D(src, dst, Q);
 
         CPU_SANITY_CHECK(dst);
     }
@@ -350,32 +302,27 @@ PERF_TEST_P(Sz_Depth, Calib3D_ReprojectImageTo3D, Combine(GPU_TYPICAL_MAT_SIZES,
 //////////////////////////////////////////////////////////////////////
 // DrawColorDisp
 
-PERF_TEST_P(Sz_Depth, Calib3D_DrawColorDisp, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16S)))
+PERF_TEST_P(Sz_Depth, Calib3D_DrawColorDisp,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16S)))
 {
     const cv::Size size = GET_PARAM(0);
     const int type = GET_PARAM(1);
 
     cv::Mat src(size, type);
-    fillRandom(src, 0, 255);
+    declare.in(src, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        cv::gpu::drawColorDisp(d_src, d_dst, 255);
+        TEST_CYCLE() cv::gpu::drawColorDisp(d_src, dst, 255);
 
-        TEST_CYCLE()
-        {
-            cv::gpu::drawColorDisp(d_src, d_dst, 255);
-        }
-
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
-        FAIL() << "No such CPU implementation analogy.";
+        FAIL_NO_CPU();
     }
 }
-
-} // namespace
index b97c499..22840f9 100644 (file)
@@ -2,15 +2,17 @@
 
 using namespace std;
 using namespace testing;
-
-namespace {
+using namespace perf;
 
 #define ARITHM_MAT_DEPTH Values(CV_8U, CV_16U, CV_32F, CV_64F)
 
 //////////////////////////////////////////////////////////////////////
 // Merge
 
-PERF_TEST_P(Sz_Depth_Cn, Core_Merge, Combine(GPU_TYPICAL_MAT_SIZES, ARITHM_MAT_DEPTH, Values(2, 3, 4)))
+PERF_TEST_P(Sz_Depth_Cn, Core_Merge,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    ARITHM_MAT_DEPTH,
+                    Values(2, 3, 4)))
 {
     const cv::Size size = GET_PARAM(0);
     const int depth = GET_PARAM(1);
@@ -18,7 +20,10 @@ PERF_TEST_P(Sz_Depth_Cn, Core_Merge, Combine(GPU_TYPICAL_MAT_SIZES, ARITHM_MAT_D
 
     std::vector<cv::Mat> src(channels);
     for (int i = 0; i < channels; ++i)
-        src[i] = cv::Mat(size, depth, cv::Scalar::all(i));
+    {
+        src[i].create(size, depth);
+        declare.in(src[i], WARMUP_RNG);
+    }
 
     if (PERF_RUN_GPU())
     {
@@ -26,11 +31,11 @@ PERF_TEST_P(Sz_Depth_Cn, Core_Merge, Combine(GPU_TYPICAL_MAT_SIZES, ARITHM_MAT_D
         for (int i = 0; i < channels; ++i)
             d_src[i].upload(src[i]);
 
-        cv::gpu::GpuMat d_dst;
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::merge(d_src, d_dst);
+        TEST_CYCLE() cv::gpu::merge(d_src, dst);
 
-        GPU_SANITY_CHECK(d_dst, 1e-12);
+        GPU_SANITY_CHECK(dst, 1e-10);
     }
     else
     {
@@ -38,31 +43,37 @@ PERF_TEST_P(Sz_Depth_Cn, Core_Merge, Combine(GPU_TYPICAL_MAT_SIZES, ARITHM_MAT_D
 
         TEST_CYCLE() cv::merge(src, dst);
 
-        CPU_SANITY_CHECK(dst, 1e-12);
+        CPU_SANITY_CHECK(dst);
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // Split
 
-PERF_TEST_P(Sz_Depth_Cn, Core_Split, Combine(GPU_TYPICAL_MAT_SIZES, ARITHM_MAT_DEPTH, Values(2, 3, 4)))
+PERF_TEST_P(Sz_Depth_Cn, Core_Split,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    ARITHM_MAT_DEPTH,
+                    Values(2, 3, 4)))
 {
     const cv::Size size = GET_PARAM(0);
     const int depth = GET_PARAM(1);
     const int channels = GET_PARAM(2);
 
-    cv::Mat src(size, CV_MAKE_TYPE(depth, channels), cv::Scalar(1, 2, 3, 4));
+    cv::Mat src(size, CV_MAKE_TYPE(depth, channels));
+    declare.in(src, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
+        const cv::gpu::GpuMat d_src(src);
+        std::vector<cv::gpu::GpuMat> dst;
 
-        std::vector<cv::gpu::GpuMat> d_dst;
+        TEST_CYCLE() cv::gpu::split(d_src, dst);
 
-        TEST_CYCLE() cv::gpu::split(d_src, d_dst);
+        const cv::gpu::GpuMat& dst0 = dst[0];
+        const cv::gpu::GpuMat& dst1 = dst[1];
 
-        cv::gpu::GpuMat first = d_dst[0];
-        GPU_SANITY_CHECK(first, 1e-12);
+        GPU_SANITY_CHECK(dst0, 1e-10);
+        GPU_SANITY_CHECK(dst1, 1e-10);
     }
     else
     {
@@ -70,33 +81,39 @@ PERF_TEST_P(Sz_Depth_Cn, Core_Split, Combine(GPU_TYPICAL_MAT_SIZES, ARITHM_MAT_D
 
         TEST_CYCLE() cv::split(src, dst);
 
-        CPU_SANITY_CHECK(dst, 1e-12);
+        const cv::Mat& dst0 = dst[0];
+        const cv::Mat& dst1 = dst[1];
+
+        CPU_SANITY_CHECK(dst0);
+        CPU_SANITY_CHECK(dst1);
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // AddMat
 
-PERF_TEST_P(Sz_Depth, Core_AddMat, Combine(GPU_TYPICAL_MAT_SIZES, ARITHM_MAT_DEPTH))
+PERF_TEST_P(Sz_Depth, Core_AddMat,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    ARITHM_MAT_DEPTH))
 {
     const cv::Size size = GET_PARAM(0);
     const int depth = GET_PARAM(1);
 
     cv::Mat src1(size, depth);
-    fillRandom(src1);
+    declare.in(src1, WARMUP_RNG);
 
     cv::Mat src2(size, depth);
-    fillRandom(src2);
+    declare.in(src2, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src1(src1);
-        cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src1(src1);
+        const cv::gpu::GpuMat d_src2(src2);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::add(d_src1, d_src2, d_dst);
+        TEST_CYCLE() cv::gpu::add(d_src1, d_src2, dst);
 
-        GPU_SANITY_CHECK(d_dst, 1e-8);
+        GPU_SANITY_CHECK(dst, 1e-10);
     }
     else
     {
@@ -104,31 +121,34 @@ PERF_TEST_P(Sz_Depth, Core_AddMat, Combine(GPU_TYPICAL_MAT_SIZES, ARITHM_MAT_DEP
 
         TEST_CYCLE() cv::add(src1, src2, dst);
 
-        CPU_SANITY_CHECK(dst, 1e-8);
+        CPU_SANITY_CHECK(dst);
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // AddScalar
 
-PERF_TEST_P(Sz_Depth, Core_AddScalar, Combine(GPU_TYPICAL_MAT_SIZES, ARITHM_MAT_DEPTH))
+PERF_TEST_P(Sz_Depth, Core_AddScalar,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    ARITHM_MAT_DEPTH))
 {
     const cv::Size size = GET_PARAM(0);
     const int depth = GET_PARAM(1);
 
     cv::Mat src(size, depth);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
-    cv::Scalar s(1, 2, 3, 4);
+    cv::Scalar s;
+    declare.in(s, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::add(d_src, s, d_dst);
+        TEST_CYCLE() cv::gpu::add(d_src, s, dst);
 
-        GPU_SANITY_CHECK(d_dst, 1e-8);
+        GPU_SANITY_CHECK(dst, 1e-10);
     }
     else
     {
@@ -136,33 +156,35 @@ PERF_TEST_P(Sz_Depth, Core_AddScalar, Combine(GPU_TYPICAL_MAT_SIZES, ARITHM_MAT_
 
         TEST_CYCLE() cv::add(src, s, dst);
 
-        CPU_SANITY_CHECK(dst, 1e-8);
+        CPU_SANITY_CHECK(dst);
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // SubtractMat
 
-PERF_TEST_P(Sz_Depth, Core_SubtractMat, Combine(GPU_TYPICAL_MAT_SIZES, ARITHM_MAT_DEPTH))
+PERF_TEST_P(Sz_Depth, Core_SubtractMat,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    ARITHM_MAT_DEPTH))
 {
     const cv::Size size = GET_PARAM(0);
     const int depth = GET_PARAM(1);
 
     cv::Mat src1(size, depth);
-    fillRandom(src1);
+    declare.in(src1, WARMUP_RNG);
 
     cv::Mat src2(size, depth);
-    fillRandom(src2);
+    declare.in(src2, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src1(src1);
-        cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src1(src1);
+        const cv::gpu::GpuMat d_src2(src2);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::subtract(d_src1, d_src2, d_dst);
+        TEST_CYCLE() cv::gpu::subtract(d_src1, d_src2, dst);
 
-        GPU_SANITY_CHECK(d_dst, 1e-8);
+        GPU_SANITY_CHECK(dst, 1e-10);
     }
     else
     {
@@ -170,31 +192,34 @@ PERF_TEST_P(Sz_Depth, Core_SubtractMat, Combine(GPU_TYPICAL_MAT_SIZES, ARITHM_MA
 
         TEST_CYCLE() cv::subtract(src1, src2, dst);
 
-        CPU_SANITY_CHECK(dst, 1e-8);
+        CPU_SANITY_CHECK(dst);
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // SubtractScalar
 
-PERF_TEST_P(Sz_Depth, Core_SubtractScalar, Combine(GPU_TYPICAL_MAT_SIZES, ARITHM_MAT_DEPTH))
+PERF_TEST_P(Sz_Depth, Core_SubtractScalar,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    ARITHM_MAT_DEPTH))
 {
     const cv::Size size = GET_PARAM(0);
     const int depth = GET_PARAM(1);
 
     cv::Mat src(size, depth);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
-    cv::Scalar s(1, 2, 3, 4);
+    cv::Scalar s;
+    declare.in(s, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::subtract(d_src, s, d_dst);
+        TEST_CYCLE() cv::gpu::subtract(d_src, s, dst);
 
-        GPU_SANITY_CHECK(d_dst, 1e-8);
+        GPU_SANITY_CHECK(dst, 1e-10);
     }
     else
     {
@@ -202,33 +227,35 @@ PERF_TEST_P(Sz_Depth, Core_SubtractScalar, Combine(GPU_TYPICAL_MAT_SIZES, ARITHM
 
         TEST_CYCLE() cv::subtract(src, s, dst);
 
-        CPU_SANITY_CHECK(dst, 1e-8);
+        CPU_SANITY_CHECK(dst);
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // MultiplyMat
 
-PERF_TEST_P(Sz_Depth, Core_MultiplyMat, Combine(GPU_TYPICAL_MAT_SIZES, ARITHM_MAT_DEPTH))
+PERF_TEST_P(Sz_Depth, Core_MultiplyMat,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    ARITHM_MAT_DEPTH))
 {
     const cv::Size size = GET_PARAM(0);
     const int depth = GET_PARAM(1);
 
     cv::Mat src1(size, depth);
-    fillRandom(src1);
+    declare.in(src1, WARMUP_RNG);
 
     cv::Mat src2(size, depth);
-    fillRandom(src2);
+    declare.in(src2, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src1(src1);
-        cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src1(src1);
+        const cv::gpu::GpuMat d_src2(src2);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::multiply(d_src1, d_src2, d_dst);
+        TEST_CYCLE() cv::gpu::multiply(d_src1, d_src2, dst);
 
-        GPU_SANITY_CHECK(d_dst, 1e-8);
+        GPU_SANITY_CHECK(dst, 1e-6);
     }
     else
     {
@@ -236,33 +263,34 @@ PERF_TEST_P(Sz_Depth, Core_MultiplyMat, Combine(GPU_TYPICAL_MAT_SIZES, ARITHM_MA
 
         TEST_CYCLE() cv::multiply(src1, src2, dst);
 
-        CPU_SANITY_CHECK(dst, 1e-8);
+        CPU_SANITY_CHECK(dst);
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // MultiplyScalar
 
-PERF_TEST_P(Sz_Depth, Core_MultiplyScalar, Combine(GPU_TYPICAL_MAT_SIZES, ARITHM_MAT_DEPTH))
+PERF_TEST_P(Sz_Depth, Core_MultiplyScalar,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    ARITHM_MAT_DEPTH))
 {
     const cv::Size size = GET_PARAM(0);
     const int depth = GET_PARAM(1);
 
     cv::Mat src(size, depth);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
-    cv::Scalar s(1, 2, 3, 4);
+    cv::Scalar s;
+    declare.in(s, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        cv::gpu::multiply(d_src, s, d_dst);
+        TEST_CYCLE() cv::gpu::multiply(d_src, s, dst);
 
-        TEST_CYCLE() cv::gpu::multiply(d_src, s, d_dst);
-
-        GPU_SANITY_CHECK(d_dst, 1e-8);
+        GPU_SANITY_CHECK(dst, 1e-6);
     }
     else
     {
@@ -270,33 +298,35 @@ PERF_TEST_P(Sz_Depth, Core_MultiplyScalar, Combine(GPU_TYPICAL_MAT_SIZES, ARITHM
 
         TEST_CYCLE() cv::multiply(src, s, dst);
 
-        CPU_SANITY_CHECK(dst, 1e-8);
+        CPU_SANITY_CHECK(dst);
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // DivideMat
 
-PERF_TEST_P(Sz_Depth, Core_DivideMat, Combine(GPU_TYPICAL_MAT_SIZES, ARITHM_MAT_DEPTH))
+PERF_TEST_P(Sz_Depth, Core_DivideMat,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    ARITHM_MAT_DEPTH))
 {
     const cv::Size size = GET_PARAM(0);
     const int depth = GET_PARAM(1);
 
     cv::Mat src1(size, depth);
-    fillRandom(src1);
+    declare.in(src1, WARMUP_RNG);
 
     cv::Mat src2(size, depth);
-    fillRandom(src2);
+    declare.in(src2, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src1(src1);
-        cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src1(src1);
+        const cv::gpu::GpuMat d_src2(src2);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::divide(d_src1, d_src2, d_dst);
+        TEST_CYCLE() cv::gpu::divide(d_src1, d_src2, dst);
 
-        GPU_SANITY_CHECK(d_dst, 1e-8);
+        GPU_SANITY_CHECK(dst, 1e-6);
     }
     else
     {
@@ -304,31 +334,34 @@ PERF_TEST_P(Sz_Depth, Core_DivideMat, Combine(GPU_TYPICAL_MAT_SIZES, ARITHM_MAT_
 
         TEST_CYCLE() cv::divide(src1, src2, dst);
 
-        CPU_SANITY_CHECK(dst, 1e-8);
+        CPU_SANITY_CHECK(dst);
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // DivideScalar
 
-PERF_TEST_P(Sz_Depth, Core_DivideScalar, Combine(GPU_TYPICAL_MAT_SIZES, ARITHM_MAT_DEPTH))
+PERF_TEST_P(Sz_Depth, Core_DivideScalar,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    ARITHM_MAT_DEPTH))
 {
     const cv::Size size = GET_PARAM(0);
     const int depth = GET_PARAM(1);
 
     cv::Mat src(size, depth);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
-    cv::Scalar s(1, 2, 3, 4);
+    cv::Scalar s;
+    declare.in(s, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::divide(d_src, s, d_dst);
+        TEST_CYCLE() cv::gpu::divide(d_src, s, dst);
 
-        GPU_SANITY_CHECK(d_dst, 1e-8);
+        GPU_SANITY_CHECK(dst, 1e-6);
     }
     else
     {
@@ -336,31 +369,34 @@ PERF_TEST_P(Sz_Depth, Core_DivideScalar, Combine(GPU_TYPICAL_MAT_SIZES, ARITHM_M
 
         TEST_CYCLE() cv::divide(src, s, dst);
 
-        CPU_SANITY_CHECK(dst, 1e-8);
+        CPU_SANITY_CHECK(dst);
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // DivideScalarInv
 
-PERF_TEST_P(Sz_Depth, Core_DivideScalarInv, Combine(GPU_TYPICAL_MAT_SIZES, ARITHM_MAT_DEPTH))
+PERF_TEST_P(Sz_Depth, Core_DivideScalarInv,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    ARITHM_MAT_DEPTH))
 {
     const cv::Size size = GET_PARAM(0);
     const int depth = GET_PARAM(1);
 
     cv::Mat src(size, depth);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
-    double s = 100.0;
+    cv::Scalar s;
+    declare.in(s, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::divide(s, d_src, d_dst);
+        TEST_CYCLE() cv::gpu::divide(s[0], d_src, dst);
 
-        GPU_SANITY_CHECK(d_dst, 1e-8);
+        GPU_SANITY_CHECK(dst, 1e-6);
     }
     else
     {
@@ -368,33 +404,35 @@ PERF_TEST_P(Sz_Depth, Core_DivideScalarInv, Combine(GPU_TYPICAL_MAT_SIZES, ARITH
 
         TEST_CYCLE() cv::divide(s, src, dst);
 
-        CPU_SANITY_CHECK(dst, 1e-8);
+        CPU_SANITY_CHECK(dst);
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // AbsDiffMat
 
-PERF_TEST_P(Sz_Depth, Core_AbsDiffMat, Combine(GPU_TYPICAL_MAT_SIZES, ARITHM_MAT_DEPTH))
+PERF_TEST_P(Sz_Depth, Core_AbsDiffMat,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    ARITHM_MAT_DEPTH))
 {
     const cv::Size size = GET_PARAM(0);
     const int depth = GET_PARAM(1);
 
     cv::Mat src1(size, depth);
-    fillRandom(src1);
+    declare.in(src1, WARMUP_RNG);
 
     cv::Mat src2(size, depth);
-    fillRandom(src2);
+    declare.in(src2, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src1(src1);
-        cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src1(src1);
+        const cv::gpu::GpuMat d_src2(src2);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::absdiff(d_src1, d_src2, d_dst);
+        TEST_CYCLE() cv::gpu::absdiff(d_src1, d_src2, dst);
 
-        GPU_SANITY_CHECK(d_dst, 1e-8);
+        GPU_SANITY_CHECK(dst, 1e-10);
     }
     else
     {
@@ -402,31 +440,34 @@ PERF_TEST_P(Sz_Depth, Core_AbsDiffMat, Combine(GPU_TYPICAL_MAT_SIZES, ARITHM_MAT
 
         TEST_CYCLE() cv::absdiff(src1, src2, dst);
 
-        CPU_SANITY_CHECK(dst, 1e-8);
+        CPU_SANITY_CHECK(dst);
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // AbsDiffScalar
 
-PERF_TEST_P(Sz_Depth, Core_AbsDiffScalar, Combine(GPU_TYPICAL_MAT_SIZES, ARITHM_MAT_DEPTH))
+PERF_TEST_P(Sz_Depth, Core_AbsDiffScalar,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    ARITHM_MAT_DEPTH))
 {
     const cv::Size size = GET_PARAM(0);
     const int depth = GET_PARAM(1);
 
     cv::Mat src(size, depth);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
-    cv::Scalar s(1, 2, 3, 4);
+    cv::Scalar s;
+    declare.in(s, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::absdiff(d_src, s, d_dst);
+        TEST_CYCLE() cv::gpu::absdiff(d_src, s, dst);
 
-        GPU_SANITY_CHECK(d_dst, 1e-8);
+        GPU_SANITY_CHECK(dst, 1e-10);
     }
     else
     {
@@ -434,75 +475,87 @@ PERF_TEST_P(Sz_Depth, Core_AbsDiffScalar, Combine(GPU_TYPICAL_MAT_SIZES, ARITHM_
 
         TEST_CYCLE() cv::absdiff(src, s, dst);
 
-        CPU_SANITY_CHECK(dst, 1e-8);
+        CPU_SANITY_CHECK(dst);
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // Abs
 
-PERF_TEST_P(Sz_Depth, Core_Abs, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_16S, CV_32F)))
+PERF_TEST_P(Sz_Depth, Core_Abs,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_16S, CV_32F)))
 {
     const cv::Size size = GET_PARAM(0);
     const int depth = GET_PARAM(1);
 
     cv::Mat src(size, depth);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::abs(d_src, d_dst);
+        TEST_CYCLE() cv::gpu::abs(d_src, dst);
 
-        GPU_SANITY_CHECK(d_dst, 1e-8);
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        FAIL_NO_CPU();
     }
-    else FAIL_NO_CPU();
 }
 
 //////////////////////////////////////////////////////////////////////
 // Sqr
 
-PERF_TEST_P(Sz_Depth, Core_Sqr, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16S, CV_32F)))
+PERF_TEST_P(Sz_Depth, Core_Sqr,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16S, CV_32F)))
 {
     const cv::Size size = GET_PARAM(0);
     const int depth = GET_PARAM(1);
 
     cv::Mat src(size, depth);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::sqr(d_src, d_dst);
+        TEST_CYCLE() cv::gpu::sqr(d_src, dst);
 
-        GPU_SANITY_CHECK(d_dst, 1e-8);
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        FAIL_NO_CPU();
     }
-    else FAIL_NO_CPU();
 }
 
 //////////////////////////////////////////////////////////////////////
 // Sqrt
 
-PERF_TEST_P(Sz_Depth, Core_Sqrt, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16S, CV_32F)))
+PERF_TEST_P(Sz_Depth, Core_Sqrt,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16S, CV_32F)))
 {
     const cv::Size size = GET_PARAM(0);
     const int depth = GET_PARAM(1);
 
     cv::Mat src(size, depth);
-    fillRandom(src);
+    cv::randu(src, 0, 100000);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::sqrt(d_src, d_dst);
+        TEST_CYCLE() cv::gpu::sqrt(d_src, dst);
 
-        GPU_SANITY_CHECK(d_dst, 1e-8);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
@@ -510,29 +563,31 @@ PERF_TEST_P(Sz_Depth, Core_Sqrt, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV
 
         TEST_CYCLE() cv::sqrt(src, dst);
 
-        CPU_SANITY_CHECK(dst, 1e-8);
+        CPU_SANITY_CHECK(dst);
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // Log
 
-PERF_TEST_P(Sz_Depth, Core_Log, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16S, CV_32F)))
+PERF_TEST_P(Sz_Depth, Core_Log,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16S, CV_32F)))
 {
     const cv::Size size = GET_PARAM(0);
     const int depth = GET_PARAM(1);
 
     cv::Mat src(size, depth);
-    fillRandom(src, 1.0, 255.0);
+    cv::randu(src, 0, 100000);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::log(d_src, d_dst);
+        TEST_CYCLE() cv::gpu::log(d_src, dst);
 
-        GPU_SANITY_CHECK(d_dst, 1e-8);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
@@ -540,37 +595,39 @@ PERF_TEST_P(Sz_Depth, Core_Log, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_
 
         TEST_CYCLE() cv::log(src, dst);
 
-        CPU_SANITY_CHECK(dst, 1e-8);
+        CPU_SANITY_CHECK(dst);
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // Exp
 
-PERF_TEST_P(Sz_Depth, Core_Exp, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16S, CV_32F)))
+PERF_TEST_P(Sz_Depth, Core_Exp,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16S, CV_32F)))
 {
     const cv::Size size = GET_PARAM(0);
     const int depth = GET_PARAM(1);
 
     cv::Mat src(size, depth);
-    fillRandom(src, 1.0, 10.0);
+    cv::randu(src, 0, 10);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::exp(d_src, d_dst);
+        TEST_CYCLE() cv::gpu::exp(d_src, dst);
 
-        GPU_SANITY_CHECK(d_dst, 1e-8);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
         cv::Mat dst;
 
-        TEST_CYCLE() TEST_CYCLE() cv::exp(src, dst);
+        TEST_CYCLE() cv::exp(src, dst);
 
-        CPU_SANITY_CHECK(dst, 1e-8);
+        CPU_SANITY_CHECK(dst);
     }
 }
 
@@ -579,31 +636,34 @@ PERF_TEST_P(Sz_Depth, Core_Exp, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_
 
 DEF_PARAM_TEST(Sz_Depth_Power, cv::Size, MatDepth, double);
 
-PERF_TEST_P(Sz_Depth_Power, Core_Pow, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16S, CV_32F), Values(0.3, 2.0, 2.4)))
+PERF_TEST_P(Sz_Depth_Power, Core_Pow,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16S, CV_32F),
+                    Values(0.3, 2.0, 2.4)))
 {
     const cv::Size size = GET_PARAM(0);
     const int depth = GET_PARAM(1);
     const double power = GET_PARAM(2);
 
     cv::Mat src(size, depth);
-    fillRandom(src, 1.0, 10.0);
+    declare.in(src, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::pow(d_src, power, d_dst);
+        TEST_CYCLE() cv::gpu::pow(d_src, power, dst);
 
-        GPU_SANITY_CHECK(d_dst, 1e-8);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
         cv::Mat dst;
 
-        TEST_CYCLE() cv::pow(src, power,dst);
+        TEST_CYCLE() cv::pow(src, power, dst);
 
-        CPU_SANITY_CHECK(dst, 1e-8);
+        CPU_SANITY_CHECK(dst);
     }
 }
 
@@ -615,27 +675,30 @@ CV_ENUM(CmpCode, cv::CMP_EQ, cv::CMP_GT, cv::CMP_GE, cv::CMP_LT, cv::CMP_LE, cv:
 
 DEF_PARAM_TEST(Sz_Depth_Code, cv::Size, MatDepth, CmpCode);
 
-PERF_TEST_P(Sz_Depth_Code, Core_CompareMat, Combine(GPU_TYPICAL_MAT_SIZES, ARITHM_MAT_DEPTH, ALL_CMP_CODES))
+PERF_TEST_P(Sz_Depth_Code, Core_CompareMat,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    ARITHM_MAT_DEPTH,
+                    ALL_CMP_CODES))
 {
     const cv::Size size = GET_PARAM(0);
     const int depth = GET_PARAM(1);
     const int cmp_code = GET_PARAM(2);
 
     cv::Mat src1(size, depth);
-    fillRandom(src1);
+    declare.in(src1, WARMUP_RNG);
 
     cv::Mat src2(size, depth);
-    fillRandom(src2);
+    declare.in(src2, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src1(src1);
-        cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src1(src1);
+        const cv::gpu::GpuMat d_src2(src2);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::compare(d_src1, d_src2, d_dst, cmp_code);
+        TEST_CYCLE() cv::gpu::compare(d_src1, d_src2, dst, cmp_code);
 
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
@@ -650,25 +713,29 @@ PERF_TEST_P(Sz_Depth_Code, Core_CompareMat, Combine(GPU_TYPICAL_MAT_SIZES, ARITH
 //////////////////////////////////////////////////////////////////////
 // CompareScalar
 
-PERF_TEST_P(Sz_Depth_Code, Core_CompareScalar, Combine(GPU_TYPICAL_MAT_SIZES, ARITHM_MAT_DEPTH, ALL_CMP_CODES))
+PERF_TEST_P(Sz_Depth_Code, Core_CompareScalar,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    ARITHM_MAT_DEPTH,
+                    ALL_CMP_CODES))
 {
     const cv::Size size = GET_PARAM(0);
     const int depth = GET_PARAM(1);
     const int cmp_code = GET_PARAM(2);
 
     cv::Mat src(size, depth);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
-    cv::Scalar s = cv::Scalar::all(100);
+    cv::Scalar s;
+    declare.in(s, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::compare(d_src, s, d_dst, cmp_code);
+        TEST_CYCLE() cv::gpu::compare(d_src, s, dst, cmp_code);
 
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
@@ -683,28 +750,30 @@ PERF_TEST_P(Sz_Depth_Code, Core_CompareScalar, Combine(GPU_TYPICAL_MAT_SIZES, AR
 //////////////////////////////////////////////////////////////////////
 // BitwiseNot
 
-PERF_TEST_P(Sz_Depth, Core_BitwiseNot, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32S)))
+PERF_TEST_P(Sz_Depth, Core_BitwiseNot,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32S)))
 {
     const cv::Size size = GET_PARAM(0);
     const int depth = GET_PARAM(1);
 
     cv::Mat src(size, depth);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::bitwise_not(d_src,d_dst);
+        TEST_CYCLE() cv::gpu::bitwise_not(d_src, dst);
 
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
         cv::Mat dst;
 
-        TEST_CYCLE() cv::bitwise_not(src,dst);
+        TEST_CYCLE() cv::bitwise_not(src, dst);
 
         CPU_SANITY_CHECK(dst);
     }
@@ -713,39 +782,46 @@ PERF_TEST_P(Sz_Depth, Core_BitwiseNot, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_
 //////////////////////////////////////////////////////////////////////
 // BitwiseAndMat
 
-PERF_TEST_P(Sz_Depth, Core_BitwiseAndMat, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32S)))
+PERF_TEST_P(Sz_Depth, Core_BitwiseAndMat,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32S)))
 {
     const cv::Size size = GET_PARAM(0);
     const int depth = GET_PARAM(1);
 
     cv::Mat src1(size, depth);
-    fillRandom(src1);
+    declare.in(src1, WARMUP_RNG);
 
     cv::Mat src2(size, depth);
-    fillRandom(src2);
+    declare.in(src2, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src1(src1);
-        cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src1(src1);
+        const cv::gpu::GpuMat d_src2(src2);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::bitwise_and(d_src1, d_src2,d_dst);
+        TEST_CYCLE() cv::gpu::bitwise_and(d_src1, d_src2, dst);
 
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
         cv::Mat dst;
 
-        TEST_CYCLE() cv::bitwise_and(src1, src2,dst);
+        TEST_CYCLE() cv::bitwise_and(src1, src2, dst);
+
+        CPU_SANITY_CHECK(dst);
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // BitwiseAndScalar
 
-PERF_TEST_P(Sz_Depth_Cn, Core_BitwiseAndScalar, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32S), GPU_CHANNELS_1_3_4))
+PERF_TEST_P(Sz_Depth_Cn, Core_BitwiseAndScalar,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32S),
+                    GPU_CHANNELS_1_3_4))
 {
     const cv::Size size = GET_PARAM(0);
     const int depth = GET_PARAM(1);
@@ -754,24 +830,26 @@ PERF_TEST_P(Sz_Depth_Cn, Core_BitwiseAndScalar, Combine(GPU_TYPICAL_MAT_SIZES, V
     const int type = CV_MAKE_TYPE(depth, channels);
 
     cv::Mat src(size, type);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
-    cv::Scalar s = cv::Scalar::all(100);
+    cv::Scalar s;
+    declare.in(s, WARMUP_RNG);
+    cv::Scalar_<int> is = s;
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::bitwise_and(d_src, s,d_dst);
+        TEST_CYCLE() cv::gpu::bitwise_and(d_src, is, dst);
 
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
         cv::Mat dst;
 
-        TEST_CYCLE() cv::bitwise_and(src, s,dst);
+        TEST_CYCLE() cv::bitwise_and(src, is, dst);
 
         CPU_SANITY_CHECK(dst);
     }
@@ -780,32 +858,34 @@ PERF_TEST_P(Sz_Depth_Cn, Core_BitwiseAndScalar, Combine(GPU_TYPICAL_MAT_SIZES, V
 //////////////////////////////////////////////////////////////////////
 // BitwiseOrMat
 
-PERF_TEST_P(Sz_Depth, Core_BitwiseOrMat, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32S)))
+PERF_TEST_P(Sz_Depth, Core_BitwiseOrMat,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32S)))
 {
     const cv::Size size = GET_PARAM(0);
     const int depth = GET_PARAM(1);
 
     cv::Mat src1(size, depth);
-    fillRandom(src1);
+    declare.in(src1, WARMUP_RNG);
 
     cv::Mat src2(size, depth);
-    fillRandom(src2);
+    declare.in(src2, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src1(src1);
-        cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src1(src1);
+        const cv::gpu::GpuMat d_src2(src2);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::bitwise_or(d_src1, d_src2,d_dst);
+        TEST_CYCLE() cv::gpu::bitwise_or(d_src1, d_src2, dst);
 
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
         cv::Mat dst;
 
-        TEST_CYCLE() cv::bitwise_or(src1, src2,dst);
+        TEST_CYCLE() cv::bitwise_or(src1, src2, dst);
 
         CPU_SANITY_CHECK(dst);
     }
@@ -814,7 +894,10 @@ PERF_TEST_P(Sz_Depth, Core_BitwiseOrMat, Combine(GPU_TYPICAL_MAT_SIZES, Values(C
 //////////////////////////////////////////////////////////////////////
 // BitwiseOrScalar
 
-PERF_TEST_P(Sz_Depth_Cn, Core_BitwiseOrScalar, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32S), GPU_CHANNELS_1_3_4))
+PERF_TEST_P(Sz_Depth_Cn, Core_BitwiseOrScalar,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32S),
+                    GPU_CHANNELS_1_3_4))
 {
     const cv::Size size = GET_PARAM(0);
     const int depth = GET_PARAM(1);
@@ -823,24 +906,26 @@ PERF_TEST_P(Sz_Depth_Cn, Core_BitwiseOrScalar, Combine(GPU_TYPICAL_MAT_SIZES, Va
     const int type = CV_MAKE_TYPE(depth, channels);
 
     cv::Mat src(size, type);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
-    cv::Scalar s = cv::Scalar::all(100);
+    cv::Scalar s;
+    declare.in(s, WARMUP_RNG);
+    cv::Scalar_<int> is = s;
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::bitwise_or(d_src, s,d_dst);
+        TEST_CYCLE() cv::gpu::bitwise_or(d_src, is, dst);
 
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
         cv::Mat dst;
 
-        TEST_CYCLE() cv::bitwise_or(src, s,dst);
+        TEST_CYCLE() cv::bitwise_or(src, is, dst);
 
         CPU_SANITY_CHECK(dst);
     }
@@ -849,39 +934,46 @@ PERF_TEST_P(Sz_Depth_Cn, Core_BitwiseOrScalar, Combine(GPU_TYPICAL_MAT_SIZES, Va
 //////////////////////////////////////////////////////////////////////
 // BitwiseXorMat
 
-PERF_TEST_P(Sz_Depth, Core_BitwiseXorMat, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32S)))
+PERF_TEST_P(Sz_Depth, Core_BitwiseXorMat,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32S)))
 {
     const cv::Size size = GET_PARAM(0);
     const int depth = GET_PARAM(1);
 
     cv::Mat src1(size, depth);
-    fillRandom(src1);
+    declare.in(src1, WARMUP_RNG);
 
     cv::Mat src2(size, depth);
-    fillRandom(src2);
+    declare.in(src2, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src1(src1);
-        cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src1(src1);
+        const cv::gpu::GpuMat d_src2(src2);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::bitwise_xor(d_src1, d_src2,d_dst);
+        TEST_CYCLE() cv::gpu::bitwise_xor(d_src1, d_src2, dst);
 
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
         cv::Mat dst;
 
-        TEST_CYCLE() cv::bitwise_xor(src1, src2,dst);
+        TEST_CYCLE() cv::bitwise_xor(src1, src2, dst);
+
+        CPU_SANITY_CHECK(dst);
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // BitwiseXorScalar
 
-PERF_TEST_P(Sz_Depth_Cn, Core_BitwiseXorScalar, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32S), GPU_CHANNELS_1_3_4))
+PERF_TEST_P(Sz_Depth_Cn, Core_BitwiseXorScalar,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32S),
+                    GPU_CHANNELS_1_3_4))
 {
     const cv::Size size = GET_PARAM(0);
     const int depth = GET_PARAM(1);
@@ -890,24 +982,26 @@ PERF_TEST_P(Sz_Depth_Cn, Core_BitwiseXorScalar, Combine(GPU_TYPICAL_MAT_SIZES, V
     const int type = CV_MAKE_TYPE(depth, channels);
 
     cv::Mat src(size, type);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
-    cv::Scalar s = cv::Scalar::all(100);
+    cv::Scalar s;
+    declare.in(s, WARMUP_RNG);
+    cv::Scalar_<int> is = s;
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::bitwise_xor(d_src, s,d_dst);
+        TEST_CYCLE() cv::gpu::bitwise_xor(d_src, is, dst);
 
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
         cv::Mat dst;
 
-        TEST_CYCLE() cv::bitwise_xor(src, s,dst);
+        TEST_CYCLE() cv::bitwise_xor(src, is, dst);
 
         CPU_SANITY_CHECK(dst);
     }
@@ -916,7 +1010,10 @@ PERF_TEST_P(Sz_Depth_Cn, Core_BitwiseXorScalar, Combine(GPU_TYPICAL_MAT_SIZES, V
 //////////////////////////////////////////////////////////////////////
 // RShift
 
-PERF_TEST_P(Sz_Depth_Cn, Core_RShift, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32S), GPU_CHANNELS_1_3_4))
+PERF_TEST_P(Sz_Depth_Cn, Core_RShift,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32S),
+                    GPU_CHANNELS_1_3_4))
 {
     const cv::Size size = GET_PARAM(0);
     const int depth = GET_PARAM(1);
@@ -925,18 +1022,18 @@ PERF_TEST_P(Sz_Depth_Cn, Core_RShift, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8
     const int type = CV_MAKE_TYPE(depth, channels);
 
     cv::Mat src(size, type);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
     const cv::Scalar_<int> val = cv::Scalar_<int>::all(4);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::rshift(d_src, val,d_dst);
+        TEST_CYCLE() cv::gpu::rshift(d_src, val, dst);
 
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
@@ -947,7 +1044,10 @@ PERF_TEST_P(Sz_Depth_Cn, Core_RShift, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8
 //////////////////////////////////////////////////////////////////////
 // LShift
 
-PERF_TEST_P(Sz_Depth_Cn, Core_LShift, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32S), GPU_CHANNELS_1_3_4))
+PERF_TEST_P(Sz_Depth_Cn, Core_LShift,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32S),
+                    GPU_CHANNELS_1_3_4))
 {
     const cv::Size size = GET_PARAM(0);
     const int depth = GET_PARAM(1);
@@ -956,18 +1056,18 @@ PERF_TEST_P(Sz_Depth_Cn, Core_LShift, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8
     const int type = CV_MAKE_TYPE(depth, channels);
 
     cv::Mat src(size, type);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
     const cv::Scalar_<int> val = cv::Scalar_<int>::all(4);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::lshift(d_src, val,d_dst);
+        TEST_CYCLE() cv::gpu::lshift(d_src, val, dst);
 
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
@@ -978,32 +1078,34 @@ PERF_TEST_P(Sz_Depth_Cn, Core_LShift, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8
 //////////////////////////////////////////////////////////////////////
 // MinMat
 
-PERF_TEST_P(Sz_Depth, Core_MinMat, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F)))
+PERF_TEST_P(Sz_Depth, Core_MinMat,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F)))
 {
     const cv::Size size = GET_PARAM(0);
     const int depth = GET_PARAM(1);
 
     cv::Mat src1(size, depth);
-    fillRandom(src1);
+    declare.in(src1, WARMUP_RNG);
 
     cv::Mat src2(size, depth);
-    fillRandom(src2);
+    declare.in(src2, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src1(src1);
-        cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src1(src1);
+        const cv::gpu::GpuMat d_src2(src2);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::min(d_src1, d_src2,d_dst);
+        TEST_CYCLE() cv::gpu::min(d_src1, d_src2, dst);
 
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
         cv::Mat dst;
 
-        TEST_CYCLE() cv::min(src1, src2,dst);
+        TEST_CYCLE() cv::min(src1, src2, dst);
 
         CPU_SANITY_CHECK(dst);
     }
@@ -1012,30 +1114,33 @@ PERF_TEST_P(Sz_Depth, Core_MinMat, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U,
 //////////////////////////////////////////////////////////////////////
 // MinScalar
 
-PERF_TEST_P(Sz_Depth, Core_MinScalar, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F)))
+PERF_TEST_P(Sz_Depth, Core_MinScalar,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F)))
 {
     const cv::Size size = GET_PARAM(0);
     const int depth = GET_PARAM(1);
 
     cv::Mat src(size, depth);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
-    const double val = 50.0;
+    cv::Scalar val;
+    declare.in(val, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::min(d_src, val,d_dst);
+        TEST_CYCLE() cv::gpu::min(d_src, val[0], dst);
 
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
         cv::Mat dst;
 
-        TEST_CYCLE() cv::min(src, val,dst);
+        TEST_CYCLE() cv::min(src, val[0], dst);
 
         CPU_SANITY_CHECK(dst);
     }
@@ -1044,32 +1149,34 @@ PERF_TEST_P(Sz_Depth, Core_MinScalar, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8
 //////////////////////////////////////////////////////////////////////
 // MaxMat
 
-PERF_TEST_P(Sz_Depth, Core_MaxMat, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F)))
+PERF_TEST_P(Sz_Depth, Core_MaxMat,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F)))
 {
     const cv::Size size = GET_PARAM(0);
     const int depth = GET_PARAM(1);
 
     cv::Mat src1(size, depth);
-    fillRandom(src1);
+    declare.in(src1, WARMUP_RNG);
 
     cv::Mat src2(size, depth);
-    fillRandom(src2);
+    declare.in(src2, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src1(src1);
-        cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src1(src1);
+        const cv::gpu::GpuMat d_src2(src2);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::max(d_src1, d_src2,d_dst);
+        TEST_CYCLE() cv::gpu::max(d_src1, d_src2, dst);
 
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
         cv::Mat dst;
 
-        TEST_CYCLE() cv::max(src1, src2,dst);
+        TEST_CYCLE() cv::max(src1, src2, dst);
 
         CPU_SANITY_CHECK(dst);
     }
@@ -1078,30 +1185,33 @@ PERF_TEST_P(Sz_Depth, Core_MaxMat, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U,
 //////////////////////////////////////////////////////////////////////
 // MaxScalar
 
-PERF_TEST_P(Sz_Depth, Core_MaxScalar, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F)))
+PERF_TEST_P(Sz_Depth, Core_MaxScalar,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F)))
 {
     const cv::Size size = GET_PARAM(0);
     const int depth = GET_PARAM(1);
 
     cv::Mat src(size, depth);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
-    const double val = 50.0;
+    cv::Scalar val;
+    declare.in(val, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::max(d_src, val,d_dst);
+        TEST_CYCLE() cv::gpu::max(d_src, val[0], dst);
 
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
         cv::Mat dst;
 
-        TEST_CYCLE() cv::max(src, val,dst);
+        TEST_CYCLE() cv::max(src, val[0], dst);
 
         CPU_SANITY_CHECK(dst);
     }
@@ -1112,11 +1222,11 @@ PERF_TEST_P(Sz_Depth, Core_MaxScalar, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8
 
 DEF_PARAM_TEST(Sz_3Depth, cv::Size, MatDepth, MatDepth, MatDepth);
 
-PERF_TEST_P(Sz_3Depth, Core_AddWeighted, Combine(
-    GPU_TYPICAL_MAT_SIZES,
-    Values(CV_8U, CV_16U, CV_32F, CV_64F),
-    Values(CV_8U, CV_16U, CV_32F, CV_64F),
-    Values(CV_8U, CV_16U, CV_32F, CV_64F)))
+PERF_TEST_P(Sz_3Depth, Core_AddWeighted,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F, CV_64F),
+                    Values(CV_8U, CV_16U, CV_32F, CV_64F),
+                    Values(CV_8U, CV_16U, CV_32F, CV_64F)))
 {
     const cv::Size size = GET_PARAM(0);
     const int depth1 = GET_PARAM(1);
@@ -1124,20 +1234,20 @@ PERF_TEST_P(Sz_3Depth, Core_AddWeighted, Combine(
     const int dst_depth = GET_PARAM(3);
 
     cv::Mat src1(size, depth1);
-    fillRandom(src1);
+    declare.in(src1, WARMUP_RNG);
 
     cv::Mat src2(size, depth2);
-    fillRandom(src2);
+    declare.in(src2, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src1(src1);
-        cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src1(src1);
+        const cv::gpu::GpuMat d_src2(src2);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::addWeighted(d_src1, 0.5, d_src2, 0.5, 10.0, d_dst, dst_depth);
+        TEST_CYCLE() cv::gpu::addWeighted(d_src1, 0.5, d_src2, 0.5, 10.0, dst, dst_depth);
 
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst, 1e-10);
     }
     else
     {
@@ -1157,76 +1267,76 @@ CV_FLAGS(GemmFlags, 0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_3_T)
 
 DEF_PARAM_TEST(Sz_Type_Flags, cv::Size, MatType, GemmFlags);
 
-PERF_TEST_P(Sz_Type_Flags, Core_GEMM, Combine(
-    Values(cv::Size(512, 512), cv::Size(1024, 1024)),
-    Values(CV_32FC1, CV_32FC2, CV_64FC1, CV_64FC2),
-    ALL_GEMM_FLAGS))
+PERF_TEST_P(Sz_Type_Flags, Core_GEMM,
+            Combine(Values(cv::Size(512, 512), cv::Size(1024, 1024)),
+                    Values(CV_32FC1, CV_32FC2, CV_64FC1),
+                    ALL_GEMM_FLAGS))
 {
-    declare.time(5.0);
-
     const cv::Size size = GET_PARAM(0);
     const int type = GET_PARAM(1);
     const int flags = GET_PARAM(2);
 
     cv::Mat src1(size, type);
-    fillRandom(src1);
+    declare.in(src1, WARMUP_RNG);
 
     cv::Mat src2(size, type);
-    fillRandom(src2);
+    declare.in(src2, WARMUP_RNG);
 
     cv::Mat src3(size, type);
-    fillRandom(src3);
+    declare.in(src3, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src1(src1);
-        cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat d_src3(src3);
-        cv::gpu::GpuMat d_dst;
+        declare.time(5.0);
+
+        const cv::gpu::GpuMat d_src1(src1);
+        const cv::gpu::GpuMat d_src2(src2);
+        const cv::gpu::GpuMat d_src3(src3);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst, flags);
+        TEST_CYCLE() cv::gpu::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, dst, flags);
 
-        GPU_SANITY_CHECK(d_dst, 1e-8);
+        GPU_SANITY_CHECK(dst, 1e-6);
     }
     else
     {
-        cv::Mat dst;
-
         declare.time(50.0);
 
+        cv::Mat dst;
+
         TEST_CYCLE() cv::gemm(src1, src2, 1.0, src3, 1.0, dst, flags);
 
-        CPU_SANITY_CHECK(dst, 1e-8);
+        CPU_SANITY_CHECK(dst);
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // Transpose
 
-PERF_TEST_P(Sz_Type, Core_Transpose, Combine(
-    GPU_TYPICAL_MAT_SIZES,
-    Values(CV_8UC1, CV_8UC4, CV_16UC2, CV_16SC2, CV_32SC1, CV_32SC2, CV_64FC1)))
+PERF_TEST_P(Sz_Type, Core_Transpose,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8UC1, CV_8UC4, CV_16UC2, CV_16SC2, CV_32SC1, CV_32SC2, CV_64FC1)))
 {
-    cv::Size size = GET_PARAM(0);
-    int type = GET_PARAM(1);
+    const cv::Size size = GET_PARAM(0);
+    const int type = GET_PARAM(1);
 
     cv::Mat src(size, type);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::transpose(d_src,d_dst);
+        TEST_CYCLE() cv::gpu::transpose(d_src, dst);
 
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst, 1e-10);
     }
     else
     {
         cv::Mat dst;
 
-        TEST_CYCLE() cv::transpose(src,dst);
+        TEST_CYCLE() cv::transpose(src, dst);
 
         CPU_SANITY_CHECK(dst);
     }
@@ -1241,30 +1351,30 @@ CV_ENUM(FlipCode, FLIP_BOTH, FLIP_X, FLIP_Y)
 
 DEF_PARAM_TEST(Sz_Depth_Cn_Code, cv::Size, MatDepth, MatCn, FlipCode);
 
-PERF_TEST_P(Sz_Depth_Cn_Code, Core_Flip, Combine(
-    GPU_TYPICAL_MAT_SIZES,
-    Values(CV_8U, CV_16U, CV_32F),
-    GPU_CHANNELS_1_3_4,
-    ALL_FLIP_CODES))
+PERF_TEST_P(Sz_Depth_Cn_Code, Core_Flip,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F),
+                    GPU_CHANNELS_1_3_4,
+                    ALL_FLIP_CODES))
 {
-    cv::Size size = GET_PARAM(0);
-    int depth = GET_PARAM(1);
-    int channels = GET_PARAM(2);
-    int flipCode = GET_PARAM(3);
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int channels = GET_PARAM(2);
+    const int flipCode = GET_PARAM(3);
 
-    int type = CV_MAKE_TYPE(depth, channels);
+    const int type = CV_MAKE_TYPE(depth, channels);
 
     cv::Mat src(size, type);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::flip(d_src, d_dst, flipCode);
+        TEST_CYCLE() cv::gpu::flip(d_src, dst, flipCode);
 
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
@@ -1279,27 +1389,27 @@ PERF_TEST_P(Sz_Depth_Cn_Code, Core_Flip, Combine(
 //////////////////////////////////////////////////////////////////////
 // LutOneChannel
 
-PERF_TEST_P(Sz_Type, Core_LutOneChannel, Combine(
-    GPU_TYPICAL_MAT_SIZES,
-    Values(CV_8UC1, CV_8UC3)))
+PERF_TEST_P(Sz_Type, Core_LutOneChannel,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8UC1, CV_8UC3)))
 {
-    cv::Size size = GET_PARAM(0);
-    int type = GET_PARAM(1);
+    const cv::Size size = GET_PARAM(0);
+    const int type = GET_PARAM(1);
 
     cv::Mat src(size, type);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
     cv::Mat lut(1, 256, CV_8UC1);
-    fillRandom(lut);
+    declare.in(lut, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::LUT(d_src, lut,d_dst);
+        TEST_CYCLE() cv::gpu::LUT(d_src, lut, dst);
 
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
@@ -1314,27 +1424,27 @@ PERF_TEST_P(Sz_Type, Core_LutOneChannel, Combine(
 //////////////////////////////////////////////////////////////////////
 // LutMultiChannel
 
-PERF_TEST_P(Sz_Type, Core_LutMultiChannel, Combine(
-    GPU_TYPICAL_MAT_SIZES,
-    Values<MatType>(CV_8UC3)))
+PERF_TEST_P(Sz_Type, Core_LutMultiChannel,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values<MatType>(CV_8UC3)))
 {
-    cv::Size size = GET_PARAM(0);
-    int type = GET_PARAM(1);
+    const cv::Size size = GET_PARAM(0);
+    const int type = GET_PARAM(1);
 
     cv::Mat src(size, type);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
     cv::Mat lut(1, 256, CV_MAKE_TYPE(CV_8U, src.channels()));
-    fillRandom(lut);
+    declare.in(lut, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::LUT(d_src, lut,d_dst);
+        TEST_CYCLE() cv::gpu::LUT(d_src, lut, dst);
 
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
@@ -1349,21 +1459,22 @@ PERF_TEST_P(Sz_Type, Core_LutMultiChannel, Combine(
 //////////////////////////////////////////////////////////////////////
 // MagnitudeComplex
 
-PERF_TEST_P(Sz, Core_MagnitudeComplex, GPU_TYPICAL_MAT_SIZES)
+PERF_TEST_P(Sz, Core_MagnitudeComplex,
+            GPU_TYPICAL_MAT_SIZES)
 {
-    cv::Size size = GetParam();
+    const cv::Size size = GetParam();
 
     cv::Mat src(size, CV_32FC2);
-    fillRandom(src, -100.0, 100.0);
+    declare.in(src, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::magnitude(d_src,d_dst);
+        TEST_CYCLE() cv::gpu::magnitude(d_src, dst);
 
-        GPU_SANITY_CHECK(d_dst, 1e-8);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
@@ -1374,28 +1485,29 @@ PERF_TEST_P(Sz, Core_MagnitudeComplex, GPU_TYPICAL_MAT_SIZES)
 
         TEST_CYCLE() cv::magnitude(xy[0], xy[1], dst);
 
-        CPU_SANITY_CHECK(dst, 1e-8);
+        CPU_SANITY_CHECK(dst);
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // MagnitudeSqrComplex
 
-PERF_TEST_P(Sz, Core_MagnitudeSqrComplex, GPU_TYPICAL_MAT_SIZES)
+PERF_TEST_P(Sz, Core_MagnitudeSqrComplex,
+            GPU_TYPICAL_MAT_SIZES)
 {
-    cv::Size size = GetParam();
+    const cv::Size size = GetParam();
 
     cv::Mat src(size, CV_32FC2);
-    fillRandom(src, -100.0, 100.0);
+    declare.in(src, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::magnitudeSqr(d_src, d_dst);
+        TEST_CYCLE() cv::gpu::magnitudeSqr(d_src, dst);
 
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
@@ -1406,25 +1518,26 @@ PERF_TEST_P(Sz, Core_MagnitudeSqrComplex, GPU_TYPICAL_MAT_SIZES)
 //////////////////////////////////////////////////////////////////////
 // Magnitude
 
-PERF_TEST_P(Sz, Core_Magnitude, GPU_TYPICAL_MAT_SIZES)
+PERF_TEST_P(Sz, Core_Magnitude,
+            GPU_TYPICAL_MAT_SIZES)
 {
-    cv::Size size = GetParam();
+    const cv::Size size = GetParam();
 
     cv::Mat src1(size, CV_32FC1);
-    fillRandom(src1, -100.0, 100.0);
+    declare.in(src1, WARMUP_RNG);
 
     cv::Mat src2(size, CV_32FC1);
-    fillRandom(src2, -100.0, 100.0);
+    declare.in(src2, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src1(src1);
-        cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src1(src1);
+        const cv::gpu::GpuMat d_src2(src2);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::magnitude(d_src1, d_src2, d_dst);
+        TEST_CYCLE() cv::gpu::magnitude(d_src1, d_src2, dst);
 
-        GPU_SANITY_CHECK(d_dst, 1e-8);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
@@ -1432,33 +1545,33 @@ PERF_TEST_P(Sz, Core_Magnitude, GPU_TYPICAL_MAT_SIZES)
 
         TEST_CYCLE() cv::magnitude(src1, src2, dst);
 
-        CPU_SANITY_CHECK(dst, 1e-8);
-
+        CPU_SANITY_CHECK(dst);
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // MagnitudeSqr
 
-PERF_TEST_P(Sz, Core_MagnitudeSqr, GPU_TYPICAL_MAT_SIZES)
+PERF_TEST_P(Sz, Core_MagnitudeSqr,
+            GPU_TYPICAL_MAT_SIZES)
 {
-    cv::Size size = GetParam();
+    const cv::Size size = GetParam();
 
     cv::Mat src1(size, CV_32FC1);
-    fillRandom(src1, -100.0, 100.0);
+    declare.in(src1, WARMUP_RNG);
 
     cv::Mat src2(size, CV_32FC1);
-    fillRandom(src2, -100.0, 100.0);
+    declare.in(src2, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src1(src1);
-        cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src1(src1);
+        const cv::gpu::GpuMat d_src2(src2);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::magnitudeSqr(d_src1, d_src2, d_dst);
+        TEST_CYCLE() cv::gpu::magnitudeSqr(d_src1, d_src2, dst);
 
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
@@ -1471,26 +1584,28 @@ PERF_TEST_P(Sz, Core_MagnitudeSqr, GPU_TYPICAL_MAT_SIZES)
 
 DEF_PARAM_TEST(Sz_AngleInDegrees, cv::Size, bool);
 
-PERF_TEST_P(Sz_AngleInDegrees, Core_Phase, Combine(GPU_TYPICAL_MAT_SIZES, Bool()))
+PERF_TEST_P(Sz_AngleInDegrees, Core_Phase,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Bool()))
 {
-    cv::Size size = GET_PARAM(0);
-    bool angleInDegrees = GET_PARAM(1);
+    const cv::Size size = GET_PARAM(0);
+    const bool angleInDegrees = GET_PARAM(1);
 
     cv::Mat src1(size, CV_32FC1);
-    fillRandom(src1, -100.0, 100.0);
+    declare.in(src1, WARMUP_RNG);
 
     cv::Mat src2(size, CV_32FC1);
-    fillRandom(src2, -100.0, 100.0);
+    declare.in(src2, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src1(src1);
-        cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src1(src1);
+        const cv::gpu::GpuMat d_src2(src2);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::phase(d_src1, d_src2, d_dst, angleInDegrees);
+        TEST_CYCLE() cv::gpu::phase(d_src1, d_src2, dst, angleInDegrees);
 
-        GPU_SANITY_CHECK(d_dst, 1e-8);
+        GPU_SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE);
     }
     else
     {
@@ -1498,36 +1613,37 @@ PERF_TEST_P(Sz_AngleInDegrees, Core_Phase, Combine(GPU_TYPICAL_MAT_SIZES, Bool()
 
         TEST_CYCLE() cv::phase(src1, src2, dst, angleInDegrees);
 
-        CPU_SANITY_CHECK(dst, 1e-8);
+        CPU_SANITY_CHECK(dst);
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // CartToPolar
 
-PERF_TEST_P(Sz_AngleInDegrees, Core_CartToPolar, Combine(GPU_TYPICAL_MAT_SIZES, Bool()))
+PERF_TEST_P(Sz_AngleInDegrees, Core_CartToPolar,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Bool()))
 {
-    cv::Size size = GET_PARAM(0);
-    bool angleInDegrees = GET_PARAM(1);
+    const cv::Size size = GET_PARAM(0);
+    const bool angleInDegrees = GET_PARAM(1);
 
     cv::Mat src1(size, CV_32FC1);
-    fillRandom(src1, -100.0, 100.0);
+    declare.in(src1, WARMUP_RNG);
 
     cv::Mat src2(size, CV_32FC1);
-    fillRandom(src2, -100.0, 100.0);
+    declare.in(src2, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src1(src1);
-        cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat d_magnitude;
-        cv::gpu::GpuMat d_angle;
-
-        TEST_CYCLE() cv::gpu::cartToPolar(d_src1, d_src2, d_magnitude, d_angle, angleInDegrees);
+        const cv::gpu::GpuMat d_src1(src1);
+        const cv::gpu::GpuMat d_src2(src2);
+        cv::gpu::GpuMat magnitude;
+        cv::gpu::GpuMat angle;
 
-        GPU_SANITY_CHECK(d_magnitude, 1e-8);
-        GPU_SANITY_CHECK(d_angle, 1e-8);
+        TEST_CYCLE() cv::gpu::cartToPolar(d_src1, d_src2, magnitude, angle, angleInDegrees);
 
+        GPU_SANITY_CHECK(magnitude);
+        GPU_SANITY_CHECK(angle, 1e-6, ERROR_RELATIVE);
     }
     else
     {
@@ -1536,36 +1652,38 @@ PERF_TEST_P(Sz_AngleInDegrees, Core_CartToPolar, Combine(GPU_TYPICAL_MAT_SIZES,
 
         TEST_CYCLE() cv::cartToPolar(src1, src2, magnitude, angle, angleInDegrees);
 
-        CPU_SANITY_CHECK(magnitude, 1e-8);
-        CPU_SANITY_CHECK(angle, 1e-8);
+        CPU_SANITY_CHECK(magnitude);
+        CPU_SANITY_CHECK(angle);
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // PolarToCart
 
-PERF_TEST_P(Sz_AngleInDegrees, Core_PolarToCart, Combine(GPU_TYPICAL_MAT_SIZES, Bool()))
+PERF_TEST_P(Sz_AngleInDegrees, Core_PolarToCart,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Bool()))
 {
-    cv::Size size = GET_PARAM(0);
-    bool angleInDegrees = GET_PARAM(1);
+    const cv::Size size = GET_PARAM(0);
+    const bool angleInDegrees = GET_PARAM(1);
 
     cv::Mat magnitude(size, CV_32FC1);
-    fillRandom(magnitude, 0.0, 100.0);
+    declare.in(magnitude, WARMUP_RNG);
 
     cv::Mat angle(size, CV_32FC1);
-    fillRandom(angle, 0.0, angleInDegrees ? 360.0 : 2 * CV_PI);
+    declare.in(angle, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_magnitude(magnitude);
-        cv::gpu::GpuMat d_angle(angle);
-        cv::gpu::GpuMat d_x;
-        cv::gpu::GpuMat d_y;
+        const cv::gpu::GpuMat d_magnitude(magnitude);
+        const cv::gpu::GpuMat d_angle(angle);
+        cv::gpu::GpuMat x;
+        cv::gpu::GpuMat y;
 
-        TEST_CYCLE() cv::gpu::polarToCart(d_magnitude, d_angle, d_x, d_y, angleInDegrees);
+        TEST_CYCLE() cv::gpu::polarToCart(d_magnitude, d_angle, x, y, angleInDegrees);
 
-        GPU_SANITY_CHECK(d_x, 1e-8);
-        GPU_SANITY_CHECK(d_y, 1e-8);
+        GPU_SANITY_CHECK(x);
+        GPU_SANITY_CHECK(y);
     }
     else
     {
@@ -1574,37 +1692,45 @@ PERF_TEST_P(Sz_AngleInDegrees, Core_PolarToCart, Combine(GPU_TYPICAL_MAT_SIZES,
 
         TEST_CYCLE() cv::polarToCart(magnitude, angle, x, y, angleInDegrees);
 
-        CPU_SANITY_CHECK(x, 1e-8);
-        CPU_SANITY_CHECK(y, 1e-8);
+        CPU_SANITY_CHECK(x);
+        CPU_SANITY_CHECK(y);
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // MeanStdDev
 
-PERF_TEST_P(Sz, Core_MeanStdDev, GPU_TYPICAL_MAT_SIZES)
+PERF_TEST_P(Sz, Core_MeanStdDev,
+            GPU_TYPICAL_MAT_SIZES)
 {
-    cv::Size size = GetParam();
+    const cv::Size size = GetParam();
 
     cv::Mat src(size, CV_8UC1);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
-    cv::Scalar mean;
-    cv::Scalar stddev;
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
+        const cv::gpu::GpuMat d_src(src);
         cv::gpu::GpuMat d_buf;
+        cv::Scalar gpu_mean;
+        cv::Scalar gpu_stddev;
+
+        TEST_CYCLE() cv::gpu::meanStdDev(d_src, gpu_mean, gpu_stddev, d_buf);
 
-        TEST_CYCLE() cv::gpu::meanStdDev(d_src, mean, stddev, d_buf);
+        SANITY_CHECK(gpu_mean);
+        SANITY_CHECK(gpu_stddev);
     }
     else
     {
-        TEST_CYCLE() cv::meanStdDev(src, mean, stddev);
-    }
+        cv::Scalar cpu_mean;
+        cv::Scalar cpu_stddev;
+
+        TEST_CYCLE() cv::meanStdDev(src, cpu_mean, cpu_stddev);
 
-    GPU_SANITY_CHECK(stddev, 1e-6);
+        SANITY_CHECK(cpu_mean);
+        SANITY_CHECK(cpu_stddev);
+    }
 }
 
 //////////////////////////////////////////////////////////////////////
@@ -1612,33 +1738,36 @@ PERF_TEST_P(Sz, Core_MeanStdDev, GPU_TYPICAL_MAT_SIZES)
 
 DEF_PARAM_TEST(Sz_Depth_Norm, cv::Size, MatDepth, NormType);
 
-PERF_TEST_P(Sz_Depth_Norm, Core_Norm, Combine(
-    GPU_TYPICAL_MAT_SIZES,
-    Values(CV_8U, CV_16U, CV_32S, CV_32F),
-    Values(NormType(cv::NORM_INF), NormType(cv::NORM_L1), NormType(cv::NORM_L2))))
+PERF_TEST_P(Sz_Depth_Norm, Core_Norm,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32S, CV_32F),
+                    Values(NormType(cv::NORM_INF), NormType(cv::NORM_L1), NormType(cv::NORM_L2))))
 {
-    cv::Size size = GET_PARAM(0);
-    int depth = GET_PARAM(1);
-    int normType = GET_PARAM(2);
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int normType = GET_PARAM(2);
 
     cv::Mat src(size, depth);
-    fillRandom(src);
-
-    double dst;
+    declare.in(src, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
+        const cv::gpu::GpuMat d_src(src);
         cv::gpu::GpuMat d_buf;
+        double gpu_dst;
+
+        TEST_CYCLE() gpu_dst = cv::gpu::norm(d_src, normType, d_buf);
 
-        TEST_CYCLE() dst = cv::gpu::norm(d_src, normType, cv::gpu::GpuMat(), d_buf);
+        SANITY_CHECK(gpu_dst, 1e-6, ERROR_RELATIVE);
     }
     else
     {
-        TEST_CYCLE() dst = cv::norm(src, normType);
-    }
+        double cpu_dst;
+
+        TEST_CYCLE() cpu_dst = cv::norm(src, normType);
 
-    SANITY_CHECK(dst, 1e-6);
+        SANITY_CHECK(cpu_dst, 1e-6, ERROR_RELATIVE);
+    }
 }
 
 //////////////////////////////////////////////////////////////////////
@@ -1646,99 +1775,103 @@ PERF_TEST_P(Sz_Depth_Norm, Core_Norm, Combine(
 
 DEF_PARAM_TEST(Sz_Norm, cv::Size, NormType);
 
-PERF_TEST_P(Sz_Norm, Core_NormDiff, Combine(
-    GPU_TYPICAL_MAT_SIZES,
-    Values(NormType(cv::NORM_INF), NormType(cv::NORM_L1), NormType(cv::NORM_L2))))
+PERF_TEST_P(Sz_Norm, Core_NormDiff,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(NormType(cv::NORM_INF), NormType(cv::NORM_L1), NormType(cv::NORM_L2))))
 {
-    cv::Size size = GET_PARAM(0);
-    int normType = GET_PARAM(1);
+    const cv::Size size = GET_PARAM(0);
+    const int normType = GET_PARAM(1);
 
     cv::Mat src1(size, CV_8UC1);
-    fillRandom(src1);
+    declare.in(src1, WARMUP_RNG);
 
     cv::Mat src2(size, CV_8UC1);
-    fillRandom(src2);
-
-    double dst;
+    declare.in(src2, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src1(src1);
-        cv::gpu::GpuMat d_src2(src2);
+        const cv::gpu::GpuMat d_src1(src1);
+        const cv::gpu::GpuMat d_src2(src2);
+        double gpu_dst;
+
+        TEST_CYCLE() gpu_dst = cv::gpu::norm(d_src1, d_src2, normType);
 
-        TEST_CYCLE() dst = cv::gpu::norm(d_src1, d_src2, normType);
+        SANITY_CHECK(gpu_dst);
 
     }
     else
     {
-        TEST_CYCLE() dst = cv::norm(src1, src2, normType);
-    }
+        double cpu_dst;
+
+        TEST_CYCLE() cpu_dst = cv::norm(src1, src2, normType);
 
-    SANITY_CHECK(dst, 1e-6);
+        SANITY_CHECK(cpu_dst);
+    }
 }
 
 //////////////////////////////////////////////////////////////////////
 // Sum
 
-PERF_TEST_P(Sz_Depth_Cn, Core_Sum, Combine(
-    GPU_TYPICAL_MAT_SIZES,
-    Values(CV_8U, CV_16U, CV_32F),
-    GPU_CHANNELS_1_3_4))
+PERF_TEST_P(Sz_Depth_Cn, Core_Sum,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F),
+                    GPU_CHANNELS_1_3_4))
 {
-    cv::Size size = GET_PARAM(0);
-    int depth = GET_PARAM(1);
-    int channels = GET_PARAM(2);
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int channels = GET_PARAM(2);
 
-    int type = CV_MAKE_TYPE(depth, channels);
+    const int type = CV_MAKE_TYPE(depth, channels);
 
     cv::Mat src(size, type);
-    fillRandom(src);
-
-    cv::Scalar dst;
+    declare.in(src, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
+        const cv::gpu::GpuMat d_src(src);
         cv::gpu::GpuMat d_buf;
+        cv::Scalar gpu_dst;
 
-        TEST_CYCLE() dst = cv::gpu::sum(d_src, cv::gpu::GpuMat(), d_buf);
+        TEST_CYCLE() gpu_dst = cv::gpu::sum(d_src, d_buf);
+
+        SANITY_CHECK(gpu_dst, 1e-5, ERROR_RELATIVE);
     }
     else
     {
-        TEST_CYCLE() dst = cv::sum(src);
-    }
+        cv::Scalar cpu_dst;
 
-    double error = (depth == CV_32F) ? 3e+1 : 1e-6;
-    SANITY_CHECK(dst,  error);
+        TEST_CYCLE() cpu_dst = cv::sum(src);
+
+        SANITY_CHECK(cpu_dst, 1e-6, ERROR_RELATIVE);
+    }
 }
 
 //////////////////////////////////////////////////////////////////////
 // SumAbs
 
-PERF_TEST_P(Sz_Depth_Cn, Core_SumAbs, Combine(
-    GPU_TYPICAL_MAT_SIZES,
-    Values(CV_8U, CV_16U, CV_32F),
-    GPU_CHANNELS_1_3_4))
+PERF_TEST_P(Sz_Depth_Cn, Core_SumAbs,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F),
+                    GPU_CHANNELS_1_3_4))
 {
-    cv::Size size = GET_PARAM(0);
-    int depth = GET_PARAM(1);
-    int channels = GET_PARAM(2);
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int channels = GET_PARAM(2);
 
-    int type = CV_MAKE_TYPE(depth, channels);
+    const int type = CV_MAKE_TYPE(depth, channels);
 
     cv::Mat src(size, type);
-    fillRandom(src);
-
-    cv::Scalar dst;
+    declare.in(src, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
+        const cv::gpu::GpuMat d_src(src);
         cv::gpu::GpuMat d_buf;
+        cv::Scalar gpu_dst;
 
-        TEST_CYCLE() dst = cv::gpu::absSum(d_src, cv::gpu::GpuMat(), d_buf);
+        TEST_CYCLE() gpu_dst = cv::gpu::absSum(d_src, d_buf);
 
-        SANITY_CHECK(dst, 1e-6);
+        SANITY_CHECK(gpu_dst, 1e-6, ERROR_RELATIVE);
     }
     else
     {
@@ -1749,30 +1882,29 @@ PERF_TEST_P(Sz_Depth_Cn, Core_SumAbs, Combine(
 //////////////////////////////////////////////////////////////////////
 // SumSqr
 
-PERF_TEST_P(Sz_Depth_Cn, Core_SumSqr, Combine(
-    GPU_TYPICAL_MAT_SIZES,
-    Values<MatDepth>(CV_8U, CV_16U, CV_32F),
-    GPU_CHANNELS_1_3_4))
+PERF_TEST_P(Sz_Depth_Cn, Core_SumSqr,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values<MatDepth>(CV_8U, CV_16U, CV_32F),
+                    GPU_CHANNELS_1_3_4))
 {
-    cv::Size size = GET_PARAM(0);
-    int depth = GET_PARAM(1);
-    int channels = GET_PARAM(2);
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int channels = GET_PARAM(2);
 
-    int type = CV_MAKE_TYPE(depth, channels);
+    const int type = CV_MAKE_TYPE(depth, channels);
 
     cv::Mat src(size, type);
-    fillRandom(src);
-
-    cv::Scalar dst;
+    declare.in(src, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
+        const cv::gpu::GpuMat d_src(src);
         cv::gpu::GpuMat d_buf;
+        cv::Scalar gpu_dst;
 
-        TEST_CYCLE() dst = cv::gpu::sqrSum(d_src, cv::gpu::GpuMat(), d_buf);
+        TEST_CYCLE() gpu_dst = cv::gpu::sqrSum(d_src, d_buf);
 
-        SANITY_CHECK(dst, 1e-6);
+        SANITY_CHECK(gpu_dst, 1e-6, ERROR_RELATIVE);
     }
     else
     {
@@ -1783,98 +1915,106 @@ PERF_TEST_P(Sz_Depth_Cn, Core_SumSqr, Combine(
 //////////////////////////////////////////////////////////////////////
 // MinMax
 
-PERF_TEST_P(Sz_Depth, Core_MinMax, Combine(
-    GPU_TYPICAL_MAT_SIZES,
-    Values(CV_8U, CV_16U, CV_32F, CV_64F)))
+PERF_TEST_P(Sz_Depth, Core_MinMax,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F, CV_64F)))
 {
-    cv::Size size = GET_PARAM(0);
-    int depth = GET_PARAM(1);
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
 
     cv::Mat src(size, depth);
-    fillRandom(src);
-
-    double minVal, maxVal;
+    declare.in(src, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
+        const cv::gpu::GpuMat d_src(src);
         cv::gpu::GpuMat d_buf;
+        double gpu_minVal, gpu_maxVal;
 
-        TEST_CYCLE() cv::gpu::minMax(d_src, &minVal, &maxVal, cv::gpu::GpuMat(), d_buf);
+        TEST_CYCLE() cv::gpu::minMax(d_src, &gpu_minVal, &gpu_maxVal, cv::gpu::GpuMat(), d_buf);
 
-        SANITY_CHECK(minVal);
-        SANITY_CHECK(maxVal);
+        SANITY_CHECK(gpu_minVal, 1e-10);
+        SANITY_CHECK(gpu_maxVal, 1e-10);
     }
     else
     {
-        FAIL_NO_CPU();
+        double cpu_minVal, cpu_maxVal;
+
+        TEST_CYCLE() cv::minMaxLoc(src, &cpu_minVal, &cpu_maxVal);
+
+        SANITY_CHECK(cpu_minVal);
+        SANITY_CHECK(cpu_maxVal);
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // MinMaxLoc
 
-PERF_TEST_P(Sz_Depth, Core_MinMaxLoc, Combine(
-    GPU_TYPICAL_MAT_SIZES,
-    Values(CV_8U, CV_16U, CV_32F, CV_64F)))
+PERF_TEST_P(Sz_Depth, Core_MinMaxLoc,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F, CV_64F)))
 {
-    cv::Size size = GET_PARAM(0);
-    int depth = GET_PARAM(1);
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
 
     cv::Mat src(size, depth);
-    fillRandom(src);
-
-    double minVal, maxVal;
-    cv::Point minLoc, maxLoc;
+    declare.in(src, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
+        const cv::gpu::GpuMat d_src(src);
         cv::gpu::GpuMat d_valbuf, d_locbuf;
+        double gpu_minVal, gpu_maxVal;
+        cv::Point gpu_minLoc, gpu_maxLoc;
 
-        TEST_CYCLE() cv::gpu::minMaxLoc(d_src, &minVal, &maxVal, &minLoc, &maxLoc, cv::gpu::GpuMat(), d_valbuf, d_locbuf);
+        TEST_CYCLE() cv::gpu::minMaxLoc(d_src, &gpu_minVal, &gpu_maxVal, &gpu_minLoc, &gpu_maxLoc, cv::gpu::GpuMat(), d_valbuf, d_locbuf);
+
+        SANITY_CHECK(gpu_minVal, 1e-10);
+        SANITY_CHECK(gpu_maxVal, 1e-10);
     }
     else
     {
-        TEST_CYCLE() cv::minMaxLoc(src, &minVal, &maxVal, &minLoc, &maxLoc);
-    }
+        double cpu_minVal, cpu_maxVal;
+        cv::Point cpu_minLoc, cpu_maxLoc;
 
-    SANITY_CHECK(minVal, 1e-12);
-    SANITY_CHECK(maxVal, 1e-12);
+        TEST_CYCLE() cv::minMaxLoc(src, &cpu_minVal, &cpu_maxVal, &cpu_minLoc, &cpu_maxLoc);
 
-    // unsupported by peft system
-    //SANITY_CHECK(minLoc);
-    //SANITY_CHECK(maxLoc);
+        SANITY_CHECK(cpu_minVal);
+        SANITY_CHECK(cpu_maxVal);
+    }
 }
 
 //////////////////////////////////////////////////////////////////////
 // CountNonZero
 
-PERF_TEST_P(Sz_Depth, Core_CountNonZero, Combine(
-    GPU_TYPICAL_MAT_SIZES,
-    Values(CV_8U, CV_16U, CV_32F, CV_64F)))
+PERF_TEST_P(Sz_Depth, Core_CountNonZero,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F, CV_64F)))
 {
-    cv::Size size = GET_PARAM(0);
-    int depth = GET_PARAM(1);
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
 
     cv::Mat src(size, depth);
-    fillRandom(src);
-
-    int dst = 0;
+    declare.in(src, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
+        const cv::gpu::GpuMat d_src(src);
         cv::gpu::GpuMat d_buf;
+        int gpu_dst = 0;
+
+        TEST_CYCLE() gpu_dst = cv::gpu::countNonZero(d_src, d_buf);
 
-        TEST_CYCLE() dst = cv::gpu::countNonZero(d_src, d_buf);
+        SANITY_CHECK(gpu_dst);
     }
     else
     {
-        TEST_CYCLE() dst = cv::countNonZero(src);
-    }
+        int cpu_dst = 0;
+
+        TEST_CYCLE() cpu_dst = cv::countNonZero(src);
 
-    SANITY_CHECK(dst);
+        SANITY_CHECK(cpu_dst);
+    }
 }
 
 //////////////////////////////////////////////////////////////////////
@@ -1889,32 +2029,32 @@ CV_ENUM(ReduceDim, Rows, Cols)
 
 DEF_PARAM_TEST(Sz_Depth_Cn_Code_Dim, cv::Size, MatDepth, MatCn, ReduceCode, ReduceDim);
 
-PERF_TEST_P(Sz_Depth_Cn_Code_Dim, Core_Reduce, Combine(
-    GPU_TYPICAL_MAT_SIZES,
-    Values(CV_8U, CV_16U, CV_16S, CV_32F),
-    Values(1, 2, 3, 4),
-    ALL_REDUCE_CODES,
-    ALL_REDUCE_DIMS))
+PERF_TEST_P(Sz_Depth_Cn_Code_Dim, Core_Reduce,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_16S, CV_32F),
+                    Values(1, 2, 3, 4),
+                    ALL_REDUCE_CODES,
+                    ALL_REDUCE_DIMS))
 {
-    cv::Size size = GET_PARAM(0);
-    int depth = GET_PARAM(1);
-    int channels = GET_PARAM(2);
-    int reduceOp = GET_PARAM(3);
-    int dim = GET_PARAM(4);
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int channels = GET_PARAM(2);
+    const int reduceOp = GET_PARAM(3);
+    const int dim = GET_PARAM(4);
 
-    int type = CV_MAKE_TYPE(depth, channels);
+    const int type = CV_MAKE_TYPE(depth, channels);
 
     cv::Mat src(size, type);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::reduce(d_src, d_dst, dim, reduceOp);
+        TEST_CYCLE() cv::gpu::reduce(d_src, dst, dim, reduceOp);
 
-        GPU_SANITY_CHECK(d_dst, 1);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
@@ -1922,43 +2062,41 @@ PERF_TEST_P(Sz_Depth_Cn_Code_Dim, Core_Reduce, Combine(
 
         TEST_CYCLE() cv::reduce(src, dst, dim, reduceOp);
 
-        CPU_SANITY_CHECK(dst, 1);
+        CPU_SANITY_CHECK(dst);
     }
 }
-
 //////////////////////////////////////////////////////////////////////
 // Normalize
 
 DEF_PARAM_TEST(Sz_Depth_NormType, cv::Size, MatDepth, NormType);
 
-PERF_TEST_P(Sz_Depth_NormType, Core_Normalize, Combine(
-    GPU_TYPICAL_MAT_SIZES,
-    Values(CV_8U, CV_16U, CV_32F, CV_64F),
-    Values(NormType(cv::NORM_INF),
-           NormType(cv::NORM_L1),
-           NormType(cv::NORM_L2),
-           NormType(cv::NORM_MINMAX))
-    ))
+PERF_TEST_P(Sz_Depth_NormType, Core_Normalize,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F, CV_64F),
+                    Values(NormType(cv::NORM_INF),
+                           NormType(cv::NORM_L1),
+                           NormType(cv::NORM_L2),
+                           NormType(cv::NORM_MINMAX))))
 {
-    cv::Size size = GET_PARAM(0);
-    int type = GET_PARAM(1);
-    int norm_type = GET_PARAM(2);
+    const cv::Size size = GET_PARAM(0);
+    const int type = GET_PARAM(1);
+    const int norm_type = GET_PARAM(2);
 
-    double alpha = 1;
-    double beta = 0;
+    const double alpha = 1;
+    const double beta = 0;
 
     cv::Mat src(size, type);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
         cv::gpu::GpuMat d_norm_buf, d_cvt_buf;
 
-        TEST_CYCLE() cv::gpu::normalize(d_src, d_dst, alpha, beta, norm_type, type, cv::gpu::GpuMat(), d_norm_buf, d_cvt_buf);
+        TEST_CYCLE() cv::gpu::normalize(d_src, dst, alpha, beta, norm_type, type, cv::gpu::GpuMat(), d_norm_buf, d_cvt_buf);
 
-        GPU_SANITY_CHECK(d_dst, 1);
+        GPU_SANITY_CHECK(dst, 1e-6);
     }
     else
     {
@@ -1966,8 +2104,6 @@ PERF_TEST_P(Sz_Depth_NormType, Core_Normalize, Combine(
 
         TEST_CYCLE() cv::normalize(src, dst, alpha, beta, norm_type, type);
 
-        CPU_SANITY_CHECK(dst, 1);
+        CPU_SANITY_CHECK(dst);
     }
 }
-
-} // namespace
index ed63177..6f03994 100644 (file)
@@ -3,8 +3,7 @@
 using namespace std;
 using namespace testing;
 
-#define GPU_DENOISING_IMAGE_SIZES testing::Values(perf::szVGA, perf::szXGA, perf::sz720p, perf::sz1080p)
-
+#define GPU_DENOISING_IMAGE_SIZES testing::Values(perf::szVGA, perf::sz720p)
 
 //////////////////////////////////////////////////////////////////////
 // BilateralFilter
@@ -12,96 +11,86 @@ using namespace testing;
 DEF_PARAM_TEST(Sz_Depth_Cn_KernelSz, cv::Size, MatDepth, MatCn, int);
 
 PERF_TEST_P(Sz_Depth_Cn_KernelSz, Denoising_BilateralFilter,
-            Combine(GPU_DENOISING_IMAGE_SIZES, Values(CV_8U, CV_32F), GPU_CHANNELS_1_3, Values(3, 5, 9)))
+            Combine(GPU_DENOISING_IMAGE_SIZES,
+                    Values(CV_8U, CV_32F),
+                    GPU_CHANNELS_1_3,
+                    Values(3, 5, 9)))
 {
     declare.time(60.0);
 
-    cv::Size size = GET_PARAM(0);
-    int depth = GET_PARAM(1);
-    int channels = GET_PARAM(2);
-    int kernel_size = GET_PARAM(3);
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int channels = GET_PARAM(2);
+    const int kernel_size = GET_PARAM(3);
 
-    float sigma_color = 7;
-    float sigma_spatial = 5;
-    int borderMode = cv::BORDER_REFLECT101;
+    const float sigma_color = 7;
+    const float sigma_spatial = 5;
+    const int borderMode = cv::BORDER_REFLECT101;
 
-    int type = CV_MAKE_TYPE(depth, channels);
+    const int type = CV_MAKE_TYPE(depth, channels);
 
     cv::Mat src(size, type);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
-     if (PERF_RUN_GPU())
+    if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
-
-        cv::gpu::bilateralFilter(d_src, d_dst, kernel_size, sigma_color, sigma_spatial, borderMode);
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE()
-        {
-            cv::gpu::bilateralFilter(d_src, d_dst, kernel_size, sigma_color, sigma_spatial, borderMode);
-        }
+        TEST_CYCLE() cv::gpu::bilateralFilter(d_src, dst, kernel_size, sigma_color, sigma_spatial, borderMode);
 
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
         cv::Mat dst;
 
-        cv::bilateralFilter(src, dst, kernel_size, sigma_color, sigma_spatial, borderMode);
-
-        TEST_CYCLE()
-        {
-            cv::bilateralFilter(src, dst, kernel_size, sigma_color, sigma_spatial, borderMode);
-        }
+        TEST_CYCLE() cv::bilateralFilter(src, dst, kernel_size, sigma_color, sigma_spatial, borderMode);
 
         CPU_SANITY_CHECK(dst);
     }
 }
 
-
 //////////////////////////////////////////////////////////////////////
 // nonLocalMeans
 
 DEF_PARAM_TEST(Sz_Depth_Cn_WinSz_BlockSz, cv::Size, MatDepth, MatCn, int, int);
 
 PERF_TEST_P(Sz_Depth_Cn_WinSz_BlockSz, Denoising_NonLocalMeans,
-            Combine(GPU_DENOISING_IMAGE_SIZES, Values<MatDepth>(CV_8U), GPU_CHANNELS_1_3, Values(21), Values(5, 7)))
+            Combine(GPU_DENOISING_IMAGE_SIZES,
+                    Values<MatDepth>(CV_8U),
+                    GPU_CHANNELS_1_3,
+                    Values(21),
+                    Values(5)))
 {
     declare.time(60.0);
 
-    cv::Size size = GET_PARAM(0);
-    int depth = GET_PARAM(1);
-    int channels = GET_PARAM(2);
-
-    int search_widow_size = GET_PARAM(3);
-    int block_size = GET_PARAM(4);
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int channels = GET_PARAM(2);
+    const int search_widow_size = GET_PARAM(3);
+    const int block_size = GET_PARAM(4);
 
-    float h = 10;
-    int borderMode = cv::BORDER_REFLECT101;
+    const float h = 10;
+    const int borderMode = cv::BORDER_REFLECT101;
 
-    int type = CV_MAKE_TYPE(depth, channels);
+    const int type = CV_MAKE_TYPE(depth, channels);
 
     cv::Mat src(size, type);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        cv::gpu::nonLocalMeans(d_src, d_dst, h, search_widow_size, block_size, borderMode);
+        TEST_CYCLE() cv::gpu::nonLocalMeans(d_src, dst, h, search_widow_size, block_size, borderMode);
 
-        TEST_CYCLE()
-        {
-            cv::gpu::nonLocalMeans(d_src, d_dst, h, search_widow_size, block_size, borderMode);
-        }
-
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
-        FAIL() << "No such CPU implementation analogy";
+        FAIL_NO_CPU();
     }
 }
 
@@ -112,46 +101,41 @@ PERF_TEST_P(Sz_Depth_Cn_WinSz_BlockSz, Denoising_NonLocalMeans,
 DEF_PARAM_TEST(Sz_Depth_Cn_WinSz_BlockSz, cv::Size, MatDepth, MatCn, int, int);
 
 PERF_TEST_P(Sz_Depth_Cn_WinSz_BlockSz, Denoising_FastNonLocalMeans,
-            Combine(GPU_DENOISING_IMAGE_SIZES, Values<MatDepth>(CV_8U), GPU_CHANNELS_1_3, Values(21), Values(7)))
+            Combine(GPU_DENOISING_IMAGE_SIZES,
+                    Values<MatDepth>(CV_8U),
+                    GPU_CHANNELS_1_3,
+                    Values(21),
+                    Values(7)))
 {
-    declare.time(150.0);
-
-    cv::Size size = GET_PARAM(0);
-    int depth = GET_PARAM(1);
+    declare.time(60.0);
 
-    int search_widow_size = GET_PARAM(2);
-    int block_size = GET_PARAM(3);
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int search_widow_size = GET_PARAM(2);
+    const int block_size = GET_PARAM(3);
 
-    float h = 10;
-    int type = CV_MAKE_TYPE(depth, 1);
+    const float h = 10;
+    const int type = CV_MAKE_TYPE(depth, 1);
 
     cv::Mat src(size, type);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
         cv::gpu::FastNonLocalMeansDenoising fnlmd;
 
-        fnlmd.simpleMethod(d_src, d_dst, h, search_widow_size, block_size);
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE()
-        {
-            fnlmd.simpleMethod(d_src, d_dst, h, search_widow_size, block_size);
-        }
+        TEST_CYCLE() fnlmd.simpleMethod(d_src, dst, h, search_widow_size, block_size);
 
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
         cv::Mat dst;
-        cv::fastNlMeansDenoising(src, dst, h, block_size, search_widow_size);
 
-        TEST_CYCLE()
-        {
-            cv::fastNlMeansDenoising(src, dst, h, block_size, search_widow_size);
-        }
+        TEST_CYCLE() cv::fastNlMeansDenoising(src, dst, h, block_size, search_widow_size);
 
         CPU_SANITY_CHECK(dst);
     }
@@ -163,47 +147,41 @@ PERF_TEST_P(Sz_Depth_Cn_WinSz_BlockSz, Denoising_FastNonLocalMeans,
 DEF_PARAM_TEST(Sz_Depth_WinSz_BlockSz, cv::Size, MatDepth, int, int);
 
 PERF_TEST_P(Sz_Depth_WinSz_BlockSz, Denoising_FastNonLocalMeansColored,
-            Combine(GPU_DENOISING_IMAGE_SIZES, Values<MatDepth>(CV_8U), Values(21), Values(7)))
+            Combine(GPU_DENOISING_IMAGE_SIZES,
+                    Values<MatDepth>(CV_8U),
+                    Values(21),
+                    Values(7)))
 {
-    declare.time(350.0);
-
-    cv::Size size = GET_PARAM(0);
-    int depth = GET_PARAM(1);
+    declare.time(60.0);
 
-    int search_widow_size = GET_PARAM(2);
-    int block_size = GET_PARAM(3);
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int search_widow_size = GET_PARAM(2);
+    const int block_size = GET_PARAM(3);
 
-    float h = 10;
-    int type = CV_MAKE_TYPE(depth, 3);
+    const float h = 10;
+    const int type = CV_MAKE_TYPE(depth, 3);
 
     cv::Mat src(size, type);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
         cv::gpu::FastNonLocalMeansDenoising fnlmd;
 
-        fnlmd.labMethod(d_src, d_dst, h, h, search_widow_size, block_size);
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE()
-        {
-            fnlmd.labMethod(d_src, d_dst, h, h, search_widow_size, block_size);
-        }
+        TEST_CYCLE() fnlmd.labMethod(d_src, dst, h, h, search_widow_size, block_size);
 
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
         cv::Mat dst;
-        cv::fastNlMeansDenoisingColored(src, dst, h, h, block_size, search_widow_size);
 
-        TEST_CYCLE()
-        {
-            cv::fastNlMeansDenoisingColored(src, dst, h, h, block_size, search_widow_size);
-        }
+        TEST_CYCLE() cv::fastNlMeansDenoisingColored(src, dst, h, h, block_size, search_widow_size);
 
         CPU_SANITY_CHECK(dst);
     }
-}
\ No newline at end of file
+}
index a93cef9..480f582 100644 (file)
 using namespace std;
 using namespace testing;
 
-namespace {
+struct KeypointIdxCompare
+{
+    std::vector<cv::KeyPoint>* keypoints;
+
+    explicit KeypointIdxCompare(std::vector<cv::KeyPoint>* _keypoints) : keypoints(_keypoints) {}
+
+    bool operator ()(size_t i1, size_t i2) const
+    {
+        cv::KeyPoint kp1 = (*keypoints)[i1];
+        cv::KeyPoint kp2 = (*keypoints)[i2];
+        if (kp1.pt.x != kp2.pt.x)
+            return kp1.pt.x < kp2.pt.x;
+        if (kp1.pt.y != kp2.pt.y)
+            return kp1.pt.y < kp2.pt.y;
+        if (kp1.response != kp2.response)
+            return kp1.response < kp2.response;
+        return kp1.octave < kp2.octave;
+    }
+};
+
+static void sortKeyPoints(std::vector<cv::KeyPoint>& keypoints, cv::InputOutputArray _descriptors = cv::noArray())
+{
+    std::vector<size_t> indexies(keypoints.size());
+    for (size_t i = 0; i < indexies.size(); ++i)
+        indexies[i] = i;
+
+    std::sort(indexies.begin(), indexies.end(), KeypointIdxCompare(&keypoints));
+
+    std::vector<cv::KeyPoint> new_keypoints;
+    cv::Mat new_descriptors;
+
+    new_keypoints.resize(keypoints.size());
+
+    cv::Mat descriptors;
+    if (_descriptors.needed())
+    {
+        descriptors = _descriptors.getMat();
+        new_descriptors.create(descriptors.size(), descriptors.type());
+    }
+
+    for (size_t i = 0; i < indexies.size(); ++i)
+    {
+        size_t new_idx = indexies[i];
+        new_keypoints[i] = keypoints[new_idx];
+        if (!new_descriptors.empty())
+            descriptors.row((int) new_idx).copyTo(new_descriptors.row((int) i));
+    }
+
+    keypoints.swap(new_keypoints);
+    if (_descriptors.needed())
+        new_descriptors.copyTo(_descriptors);
+}
 
 //////////////////////////////////////////////////////////////////////
 // SURF
 
 DEF_PARAM_TEST_1(Image, string);
 
-PERF_TEST_P(Image, Features2D_SURF, Values<string>("gpu/perf/aloe.png"))
+PERF_TEST_P(Image, Features2D_SURF,
+            Values<string>("gpu/perf/aloe.png"))
 {
     declare.time(50.0);
 
-    cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
+    const cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(img.empty());
 
     if (PERF_RUN_GPU())
     {
         cv::gpu::SURF_GPU d_surf;
 
-        cv::gpu::GpuMat d_img(img);
+        const cv::gpu::GpuMat d_img(img);
         cv::gpu::GpuMat d_keypoints, d_descriptors;
 
-        d_surf(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
+        TEST_CYCLE() d_surf(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
+
+        std::vector<cv::KeyPoint> gpu_keypoints;
+        d_surf.downloadKeypoints(d_keypoints, gpu_keypoints);
 
-        TEST_CYCLE()
-        {
-            d_surf(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
-        }
+        cv::Mat gpu_descriptors(d_descriptors);
 
-        GPU_SANITY_CHECK(d_descriptors, 1e-4);
-        GPU_SANITY_CHECK_KEYPOINTS(SURF, d_keypoints);
+        sortKeyPoints(gpu_keypoints, gpu_descriptors);
+
+        SANITY_CHECK_KEYPOINTS(gpu_keypoints);
+        SANITY_CHECK(gpu_descriptors, 1e-3);
     }
     else
     {
         cv::SURF surf;
 
-        std::vector<cv::KeyPoint> keypoints;
-        cv::Mat descriptors;
-
-        surf(img, cv::noArray(), keypoints, descriptors);
+        std::vector<cv::KeyPoint> cpu_keypoints;
+        cv::Mat cpu_descriptors;
 
-        TEST_CYCLE()
-        {
-            keypoints.clear();
-            surf(img, cv::noArray(), keypoints, descriptors);
-        }
+        TEST_CYCLE() surf(img, cv::noArray(), cpu_keypoints, cpu_descriptors);
 
-        SANITY_CHECK_KEYPOINTS(keypoints);
-        SANITY_CHECK(descriptors, 1e-4);
+        SANITY_CHECK_KEYPOINTS(cpu_keypoints);
+        SANITY_CHECK(cpu_descriptors);
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // FAST
 
-PERF_TEST_P(Image, Features2D_FAST, Values<string>("gpu/perf/aloe.png"))
+DEF_PARAM_TEST(Image_Threshold_NonMaxSupression, string, int, bool);
+
+PERF_TEST_P(Image_Threshold_NonMaxSupression, Features2D_FAST,
+            Combine(Values<string>("gpu/perf/aloe.png"),
+                    Values(20),
+                    Bool()))
 {
-    cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
+    const cv::Mat img = readImage(GET_PARAM(0), cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(img.empty());
 
+    const int threshold = GET_PARAM(1);
+    const bool nonMaxSuppersion = GET_PARAM(2);
+
     if (PERF_RUN_GPU())
     {
-        cv::gpu::FAST_GPU d_fast(20);
+        cv::gpu::FAST_GPU d_fast(threshold, nonMaxSuppersion, 0.5);
 
-        cv::gpu::GpuMat d_img(img);
+        const cv::gpu::GpuMat d_img(img);
         cv::gpu::GpuMat d_keypoints;
 
-        d_fast(d_img, cv::gpu::GpuMat(), d_keypoints);
+        TEST_CYCLE() d_fast(d_img, cv::gpu::GpuMat(), d_keypoints);
+
+        std::vector<cv::KeyPoint> gpu_keypoints;
+        d_fast.downloadKeypoints(d_keypoints, gpu_keypoints);
 
-        TEST_CYCLE()
-        {
-            d_fast(d_img, cv::gpu::GpuMat(), d_keypoints);
-        }
+        sortKeyPoints(gpu_keypoints);
 
-        GPU_SANITY_CHECK_RESPONSE(FAST, d_keypoints);
+        SANITY_CHECK_KEYPOINTS(gpu_keypoints);
     }
     else
     {
-        std::vector<cv::KeyPoint> keypoints;
+        std::vector<cv::KeyPoint> cpu_keypoints;
 
-        cv::FAST(img, keypoints, 20);
+        TEST_CYCLE() cv::FAST(img, cpu_keypoints, threshold, nonMaxSuppersion);
 
-        TEST_CYCLE()
-        {
-            keypoints.clear();
-            cv::FAST(img, keypoints, 20);
-        }
-
-        SANITY_CHECK_KEYPOINTS(keypoints);
+        SANITY_CHECK_KEYPOINTS(cpu_keypoints);
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // ORB
 
-PERF_TEST_P(Image, Features2D_ORB, Values<string>("gpu/perf/aloe.png"))
+DEF_PARAM_TEST(Image_NFeatures, string, int);
+
+PERF_TEST_P(Image_NFeatures, Features2D_ORB,
+            Combine(Values<string>("gpu/perf/aloe.png"),
+                    Values(4000)))
 {
-    cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
+    const cv::Mat img = readImage(GET_PARAM(0), cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(img.empty());
 
+    const int nFeatures = GET_PARAM(1);
+
     if (PERF_RUN_GPU())
     {
-        cv::gpu::ORB_GPU d_orb(4000);
+        cv::gpu::ORB_GPU d_orb(nFeatures);
 
-        cv::gpu::GpuMat d_img(img);
+        const cv::gpu::GpuMat d_img(img);
         cv::gpu::GpuMat d_keypoints, d_descriptors;
 
-        d_orb(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
+        TEST_CYCLE() d_orb(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
+
+        std::vector<cv::KeyPoint> gpu_keypoints;
+        d_orb.downloadKeyPoints(d_keypoints, gpu_keypoints);
+
+        cv::Mat gpu_descriptors(d_descriptors);
 
-        TEST_CYCLE()
-        {
-            d_orb(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
-        }
+        gpu_keypoints.resize(10);
+        gpu_descriptors = gpu_descriptors.rowRange(0, 10);
 
-        GPU_SANITY_CHECK_KEYPOINTS(ORB, d_keypoints);
-        GPU_SANITY_CHECK(d_descriptors);
+        sortKeyPoints(gpu_keypoints, gpu_descriptors);
+
+        SANITY_CHECK_KEYPOINTS(gpu_keypoints);
+        SANITY_CHECK(gpu_descriptors);
     }
     else
     {
-        cv::ORB orb(4000);
-
-        std::vector<cv::KeyPoint> keypoints;
-        cv::Mat descriptors;
+        cv::ORB orb(nFeatures);
 
-        orb(img, cv::noArray(), keypoints, descriptors);
+        std::vector<cv::KeyPoint> cpu_keypoints;
+        cv::Mat cpu_descriptors;
 
-        TEST_CYCLE()
-        {
-            keypoints.clear();
-            orb(img, cv::noArray(), keypoints, descriptors);
-        }
+        TEST_CYCLE() orb(img, cv::noArray(), cpu_keypoints, cpu_descriptors);
 
-        SANITY_CHECK_KEYPOINTS(keypoints);
-        SANITY_CHECK(descriptors);
+        SANITY_CHECK_KEYPOINTS(cpu_keypoints);
+        SANITY_CHECK(cpu_descriptors);
     }
 }
 
@@ -144,166 +199,165 @@ PERF_TEST_P(Image, Features2D_ORB, Values<string>("gpu/perf/aloe.png"))
 
 DEF_PARAM_TEST(DescSize_Norm, int, NormType);
 
-PERF_TEST_P(DescSize_Norm, Features2D_BFMatch, Combine(Values(64, 128, 256), Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))))
+PERF_TEST_P(DescSize_Norm, Features2D_BFMatch,
+            Combine(Values(64, 128, 256),
+                    Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))))
 {
     declare.time(20.0);
 
-    int desc_size = GET_PARAM(0);
-    int normType = GET_PARAM(1);
+    const int desc_size = GET_PARAM(0);
+    const int normType = GET_PARAM(1);
 
-    int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
+    const int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
 
     cv::Mat query(3000, desc_size, type);
-    fillRandom(query);
+    declare.in(query, WARMUP_RNG);
 
     cv::Mat train(3000, desc_size, type);
-    fillRandom(train);
+    declare.in(train, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
         cv::gpu::BFMatcher_GPU d_matcher(normType);
 
-        cv::gpu::GpuMat d_query(query);
-        cv::gpu::GpuMat d_train(train);
+        const cv::gpu::GpuMat d_query(query);
+        const cv::gpu::GpuMat d_train(train);
         cv::gpu::GpuMat d_trainIdx, d_distance;
 
-        d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
+        TEST_CYCLE() d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
 
-        TEST_CYCLE()
-        {
-            d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
-        }
+        std::vector<cv::DMatch> gpu_matches;
+        d_matcher.matchDownload(d_trainIdx, d_distance, gpu_matches);
 
-        GPU_SANITY_CHECK(d_trainIdx);
-        GPU_SANITY_CHECK(d_distance);
+        SANITY_CHECK_MATCHES(gpu_matches);
     }
     else
     {
         cv::BFMatcher matcher(normType);
 
-        std::vector<cv::DMatch> matches;
-
-        matcher.match(query, train, matches);
+        std::vector<cv::DMatch> cpu_matches;
 
-        TEST_CYCLE()
-        {
-            matcher.match(query, train, matches);
-        }
+        TEST_CYCLE() matcher.match(query, train, cpu_matches);
 
-        SANITY_CHECK(matches);
+        SANITY_CHECK_MATCHES(cpu_matches);
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // BFKnnMatch
 
+static void toOneRowMatches(const std::vector< std::vector<cv::DMatch> >& src, std::vector<cv::DMatch>& dst)
+{
+    dst.clear();
+    for (size_t i = 0; i < src.size(); ++i)
+        for (size_t j = 0; j < src[i].size(); ++j)
+            dst.push_back(src[i][j]);
+}
+
 DEF_PARAM_TEST(DescSize_K_Norm, int, int, NormType);
 
-PERF_TEST_P(DescSize_K_Norm, Features2D_BFKnnMatch, Combine(
-    Values(64, 128, 256),
-    Values(2, 3),
-    Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))))
+PERF_TEST_P(DescSize_K_Norm, Features2D_BFKnnMatch,
+            Combine(Values(64, 128, 256),
+                    Values(2, 3),
+                    Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2))))
 {
     declare.time(30.0);
 
-    int desc_size = GET_PARAM(0);
-    int k = GET_PARAM(1);
-    int normType = GET_PARAM(2);
+    const int desc_size = GET_PARAM(0);
+    const int k = GET_PARAM(1);
+    const int normType = GET_PARAM(2);
 
-    int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
+    const int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
 
     cv::Mat query(3000, desc_size, type);
-    fillRandom(query);
+    declare.in(query, WARMUP_RNG);
 
     cv::Mat train(3000, desc_size, type);
-    fillRandom(train);
+    declare.in(train, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
         cv::gpu::BFMatcher_GPU d_matcher(normType);
 
-        cv::gpu::GpuMat d_query(query);
-        cv::gpu::GpuMat d_train(train);
+        const cv::gpu::GpuMat d_query(query);
+        const cv::gpu::GpuMat d_train(train);
         cv::gpu::GpuMat d_trainIdx, d_distance, d_allDist;
 
-        d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, k);
+        TEST_CYCLE() d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, k);
+
+        std::vector< std::vector<cv::DMatch> > matchesTbl;
+        d_matcher.knnMatchDownload(d_trainIdx, d_distance, matchesTbl);
 
-        TEST_CYCLE()
-        {
-            d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, k);
-        }
+        std::vector<cv::DMatch> gpu_matches;
+        toOneRowMatches(matchesTbl, gpu_matches);
 
-        GPU_SANITY_CHECK(d_trainIdx);
-        GPU_SANITY_CHECK(d_distance);
+        SANITY_CHECK_MATCHES(gpu_matches);
     }
     else
     {
         cv::BFMatcher matcher(normType);
 
-        std::vector< std::vector<cv::DMatch> > matches;
+        std::vector< std::vector<cv::DMatch> > matchesTbl;
 
-        matcher.knnMatch(query, train, matches, k);
+        TEST_CYCLE() matcher.knnMatch(query, train, matchesTbl, k);
 
-        TEST_CYCLE()
-        {
-            matcher.knnMatch(query, train, matches, k);
-        }
+        std::vector<cv::DMatch> cpu_matches;
+        toOneRowMatches(matchesTbl, cpu_matches);
 
-        SANITY_CHECK(matches);
+        SANITY_CHECK_MATCHES(cpu_matches);
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // BFRadiusMatch
 
-PERF_TEST_P(DescSize_Norm, Features2D_BFRadiusMatch, Combine(Values(64, 128, 256), Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))))
+PERF_TEST_P(DescSize_Norm, Features2D_BFRadiusMatch,
+            Combine(Values(64, 128, 256),
+                    Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2))))
 {
     declare.time(30.0);
 
-    int desc_size = GET_PARAM(0);
-    int normType = GET_PARAM(1);
+    const int desc_size = GET_PARAM(0);
+    const int normType = GET_PARAM(1);
 
-    int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
+    const int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
+    const float maxDistance = 10000;
 
     cv::Mat query(3000, desc_size, type);
-    fillRandom(query, 0.0, 1.0);
+    declare.in(query, WARMUP_RNG);
 
     cv::Mat train(3000, desc_size, type);
-    fillRandom(train, 0.0, 1.0);
+    declare.in(train, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
         cv::gpu::BFMatcher_GPU d_matcher(normType);
 
-        cv::gpu::GpuMat d_query(query);
-        cv::gpu::GpuMat d_train(train);
+        const cv::gpu::GpuMat d_query(query);
+        const cv::gpu::GpuMat d_train(train);
         cv::gpu::GpuMat d_trainIdx, d_nMatches, d_distance;
 
-        d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, 2.0);
+        TEST_CYCLE() d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, maxDistance);
 
-        TEST_CYCLE()
-        {
-            d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, 2.0);
-        }
+        std::vector< std::vector<cv::DMatch> > matchesTbl;
+        d_matcher.radiusMatchDownload(d_trainIdx, d_distance, d_nMatches, matchesTbl);
 
-        GPU_SANITY_CHECK(d_trainIdx);
-        GPU_SANITY_CHECK(d_distance);
+        std::vector<cv::DMatch> gpu_matches;
+        toOneRowMatches(matchesTbl, gpu_matches);
+
+        SANITY_CHECK_MATCHES(gpu_matches);
     }
     else
     {
         cv::BFMatcher matcher(normType);
 
-        std::vector< std::vector<cv::DMatch> > matches;
+        std::vector< std::vector<cv::DMatch> > matchesTbl;
 
-        matcher.radiusMatch(query, train, matches, 2.0);
+        TEST_CYCLE() matcher.radiusMatch(query, train, matchesTbl, maxDistance);
 
-        TEST_CYCLE()
-        {
-            matcher.radiusMatch(query, train, matches, 2.0);
-        }
+        std::vector<cv::DMatch> cpu_matches;
+        toOneRowMatches(matchesTbl, cpu_matches);
 
-        SANITY_CHECK(matches);
+        SANITY_CHECK_MATCHES(cpu_matches);
     }
 }
-
-} // namespace
index 7faf93e..3516954 100644 (file)
@@ -3,48 +3,39 @@
 using namespace std;
 using namespace testing;
 
-namespace {
-
 //////////////////////////////////////////////////////////////////////
 // Blur
 
 DEF_PARAM_TEST(Sz_Type_KernelSz, cv::Size, MatType, int);
 
-PERF_TEST_P(Sz_Type_KernelSz, Filters_Blur, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4), Values(3, 5, 7)))
+PERF_TEST_P(Sz_Type_KernelSz, Filters_Blur,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8UC1, CV_8UC4),
+                    Values(3, 5, 7)))
 {
     declare.time(20.0);
 
-    cv::Size size = GET_PARAM(0);
-    int type = GET_PARAM(1);
-    int ksize = GET_PARAM(2);
+    const cv::Size size = GET_PARAM(0);
+    const int type = GET_PARAM(1);
+    const int ksize = GET_PARAM(2);
 
     cv::Mat src(size, type);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
-
-        cv::gpu::blur(d_src, d_dst, cv::Size(ksize, ksize));
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE()
-        {
-            cv::gpu::blur(d_src, d_dst, cv::Size(ksize, ksize));
-        }
+        TEST_CYCLE() cv::gpu::blur(d_src, dst, cv::Size(ksize, ksize));
 
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
         cv::Mat dst;
 
-        cv::blur(src, dst, cv::Size(ksize, ksize));
-
-        TEST_CYCLE()
-        {
-            cv::blur(src, dst, cv::Size(ksize, ksize));
-        }
+        TEST_CYCLE() cv::blur(src, dst, cv::Size(ksize, ksize));
 
         CPU_SANITY_CHECK(dst);
     }
@@ -57,38 +48,28 @@ PERF_TEST_P(Sz_Type_KernelSz, Filters_Sobel, Combine(GPU_TYPICAL_MAT_SIZES, Valu
 {
     declare.time(20.0);
 
-    cv::Size size = GET_PARAM(0);
-    int type = GET_PARAM(1);
-    int ksize = GET_PARAM(2);
+    const cv::Size size = GET_PARAM(0);
+    const int type = GET_PARAM(1);
+    const int ksize = GET_PARAM(2);
 
     cv::Mat src(size, type);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
         cv::gpu::GpuMat d_buf;
 
-        cv::gpu::Sobel(d_src, d_dst, -1, 1, 1, d_buf, ksize);
+        TEST_CYCLE() cv::gpu::Sobel(d_src, dst, -1, 1, 1, d_buf, ksize);
 
-        TEST_CYCLE()
-        {
-            cv::gpu::Sobel(d_src, d_dst, -1, 1, 1, d_buf, ksize);
-        }
-
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
         cv::Mat dst;
 
-        cv::Sobel(src, dst, -1, 1, 1, ksize);
-
-        TEST_CYCLE()
-        {
-            cv::Sobel(src, dst, -1, 1, 1, ksize);
-        }
+        TEST_CYCLE() cv::Sobel(src, dst, -1, 1, 1, ksize);
 
         CPU_SANITY_CHECK(dst);
     }
@@ -101,37 +82,27 @@ PERF_TEST_P(Sz_Type, Filters_Scharr, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U
 {
     declare.time(20.0);
 
-    cv::Size size = GET_PARAM(0);
-    int type = GET_PARAM(1);
+    const cv::Size size = GET_PARAM(0);
+    const int type = GET_PARAM(1);
 
     cv::Mat src(size, type);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
         cv::gpu::GpuMat d_buf;
 
-        cv::gpu::Scharr(d_src, d_dst, -1, 1, 0, d_buf);
+        TEST_CYCLE() cv::gpu::Scharr(d_src, dst, -1, 1, 0, d_buf);
 
-        TEST_CYCLE()
-        {
-            cv::gpu::Scharr(d_src, d_dst, -1, 1, 0, d_buf);
-        }
-
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
         cv::Mat dst;
 
-        cv::Scharr(src, dst, -1, 1, 0);
-
-        TEST_CYCLE()
-        {
-            cv::Scharr(src, dst, -1, 1, 0);
-        }
+        TEST_CYCLE() cv::Scharr(src, dst, -1, 1, 0);
 
         CPU_SANITY_CHECK(dst);
     }
@@ -144,38 +115,28 @@ PERF_TEST_P(Sz_Type_KernelSz, Filters_GaussianBlur, Combine(GPU_TYPICAL_MAT_SIZE
 {
     declare.time(20.0);
 
-    cv::Size size = GET_PARAM(0);
-    int type = GET_PARAM(1);
-    int ksize = GET_PARAM(2);
+    const cv::Size size = GET_PARAM(0);
+    const int type = GET_PARAM(1);
+    const int ksize = GET_PARAM(2);
 
     cv::Mat src(size, type);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
         cv::gpu::GpuMat d_buf;
 
-        cv::gpu::GaussianBlur(d_src, d_dst, cv::Size(ksize, ksize), d_buf, 0.5);
-
-        TEST_CYCLE()
-        {
-            cv::gpu::GaussianBlur(d_src, d_dst, cv::Size(ksize, ksize), d_buf, 0.5);
-        }
+        TEST_CYCLE() cv::gpu::GaussianBlur(d_src, dst, cv::Size(ksize, ksize), d_buf, 0.5);
 
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
         cv::Mat dst;
 
-        cv::GaussianBlur(src, dst, cv::Size(ksize, ksize), 0.5);
-
-        TEST_CYCLE()
-        {
-            cv::GaussianBlur(src, dst, cv::Size(ksize, ksize), 0.5);
-        }
+        TEST_CYCLE() cv::GaussianBlur(src, dst, cv::Size(ksize, ksize), 0.5);
 
         CPU_SANITY_CHECK(dst);
     }
@@ -188,37 +149,27 @@ PERF_TEST_P(Sz_Type_KernelSz, Filters_Laplacian, Combine(GPU_TYPICAL_MAT_SIZES,
 {
     declare.time(20.0);
 
-    cv::Size size = GET_PARAM(0);
-    int type = GET_PARAM(1);
-    int ksize = GET_PARAM(2);
+    const cv::Size size = GET_PARAM(0);
+    const int type = GET_PARAM(1);
+    const int ksize = GET_PARAM(2);
 
     cv::Mat src(size, type);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
-
-        cv::gpu::Laplacian(d_src, d_dst, -1, ksize);
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE()
-        {
-            cv::gpu::Laplacian(d_src, d_dst, -1, ksize);
-        }
+        TEST_CYCLE() cv::gpu::Laplacian(d_src, dst, -1, ksize);
 
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
         cv::Mat dst;
 
-        cv::Laplacian(src, dst, -1, ksize);
-
-        TEST_CYCLE()
-        {
-            cv::Laplacian(src, dst, -1, ksize);
-        }
+        TEST_CYCLE() cv::Laplacian(src, dst, -1, ksize);
 
         CPU_SANITY_CHECK(dst);
     }
@@ -231,39 +182,29 @@ PERF_TEST_P(Sz_Type, Filters_Erode, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC
 {
     declare.time(20.0);
 
-    cv::Size size = GET_PARAM(0);
-    int type = GET_PARAM(1);
+    const cv::Size size = GET_PARAM(0);
+    const int type = GET_PARAM(1);
 
     cv::Mat src(size, type);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
-    cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
+    const cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
         cv::gpu::GpuMat d_buf;
 
-        cv::gpu::erode(d_src, d_dst, ker, d_buf);
-
-        TEST_CYCLE()
-        {
-            cv::gpu::erode(d_src, d_dst, ker, d_buf);
-        }
+        TEST_CYCLE() cv::gpu::erode(d_src, dst, ker, d_buf);
 
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
         cv::Mat dst;
 
-        cv::erode(src, dst, ker);
-
-        TEST_CYCLE()
-        {
-            cv::erode(src, dst, ker);
-        }
+        TEST_CYCLE() cv::erode(src, dst, ker);
 
         CPU_SANITY_CHECK(dst);
     }
@@ -276,39 +217,29 @@ PERF_TEST_P(Sz_Type, Filters_Dilate, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U
 {
     declare.time(20.0);
 
-    cv::Size size = GET_PARAM(0);
-    int type = GET_PARAM(1);
+    const cv::Size size = GET_PARAM(0);
+    const int type = GET_PARAM(1);
 
     cv::Mat src(size, type);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
-    cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
+    const cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
         cv::gpu::GpuMat d_buf;
 
-        cv::gpu::dilate(d_src, d_dst, ker, d_buf);
+        TEST_CYCLE() cv::gpu::dilate(d_src, dst, ker, d_buf);
 
-        TEST_CYCLE()
-        {
-            cv::gpu::dilate(d_src, d_dst, ker, d_buf);
-        }
-
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
         cv::Mat dst;
 
-        cv::dilate(src, dst, ker);
-
-        TEST_CYCLE()
-        {
-            cv::dilate(src, dst, ker);
-        }
+        TEST_CYCLE() cv::dilate(src, dst, ker);
 
         CPU_SANITY_CHECK(dst);
     }
@@ -326,41 +257,31 @@ PERF_TEST_P(Sz_Type_Op, Filters_MorphologyEx, Combine(GPU_TYPICAL_MAT_SIZES, Val
 {
     declare.time(20.0);
 
-    cv::Size size = GET_PARAM(0);
-    int type = GET_PARAM(1);
-    int morphOp = GET_PARAM(2);
+    const cv::Size size = GET_PARAM(0);
+    const int type = GET_PARAM(1);
+    const int morphOp = GET_PARAM(2);
 
     cv::Mat src(size, type);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
-    cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
+    const cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
         cv::gpu::GpuMat d_buf1;
         cv::gpu::GpuMat d_buf2;
 
-        cv::gpu::morphologyEx(d_src, d_dst, morphOp, ker, d_buf1, d_buf2);
+        TEST_CYCLE() cv::gpu::morphologyEx(d_src, dst, morphOp, ker, d_buf1, d_buf2);
 
-        TEST_CYCLE()
-        {
-            cv::gpu::morphologyEx(d_src, d_dst, morphOp, ker, d_buf1, d_buf2);
-        }
-
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
         cv::Mat dst;
 
-        cv::morphologyEx(src, dst, morphOp, ker);
-
-        TEST_CYCLE()
-        {
-            cv::morphologyEx(src, dst, morphOp, ker);
-        }
+        TEST_CYCLE() cv::morphologyEx(src, dst, morphOp, ker);
 
         CPU_SANITY_CHECK(dst);
     }
@@ -373,43 +294,31 @@ PERF_TEST_P(Sz_Type_KernelSz, Filters_Filter2D, Combine(GPU_TYPICAL_MAT_SIZES, V
 {
     declare.time(20.0);
 
-    cv::Size size = GET_PARAM(0);
-    int type = GET_PARAM(1);
-    int ksize = GET_PARAM(2);
+    const cv::Size size = GET_PARAM(0);
+    const int type = GET_PARAM(1);
+    const int ksize = GET_PARAM(2);
 
     cv::Mat src(size, type);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
     cv::Mat kernel(ksize, ksize, CV_32FC1);
-    fillRandom(kernel, 0.0, 1.0);
+    declare.in(kernel, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
-
-        cv::gpu::filter2D(d_src, d_dst, -1, kernel);
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE()
-        {
-            cv::gpu::filter2D(d_src, d_dst, -1, kernel);
-        }
+        TEST_CYCLE() cv::gpu::filter2D(d_src, dst, -1, kernel);
 
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
         cv::Mat dst;
 
-        cv::filter2D(src, dst, -1, kernel);
-
-        TEST_CYCLE()
-        {
-            cv::filter2D(src, dst, -1, kernel);
-        }
+        TEST_CYCLE() cv::filter2D(src, dst, -1, kernel);
 
         CPU_SANITY_CHECK(dst);
     }
 }
-
-} // namespace
index e3d488e..d1a8796 100644 (file)
@@ -2,13 +2,12 @@
 
 using namespace std;
 using namespace testing;
-
-namespace {
+using namespace perf;
 
 //////////////////////////////////////////////////////////////////////
 // Remap
 
-enum{HALF_SIZE=0, UPSIDE_DOWN, REFLECTION_X, REFLECTION_BOTH};
+enum { HALF_SIZE=0, UPSIDE_DOWN, REFLECTION_X, REFLECTION_BOTH };
 CV_ENUM(RemapMode, HALF_SIZE, UPSIDE_DOWN, REFLECTION_X, REFLECTION_BOTH);
 #define ALL_REMAP_MODES ValuesIn(RemapMode::all())
 
@@ -51,59 +50,50 @@ void generateMap(cv::Mat& map_x, cv::Mat& map_y, int remapMode)
 
 DEF_PARAM_TEST(Sz_Depth_Cn_Inter_Border_Mode, cv::Size, MatDepth, MatCn, Interpolation, BorderMode, RemapMode);
 
-PERF_TEST_P(Sz_Depth_Cn_Inter_Border_Mode, ImgProc_Remap, Combine(
-    GPU_TYPICAL_MAT_SIZES,
-    Values(CV_8U, CV_16U, CV_32F),
-    GPU_CHANNELS_1_3_4,
-    Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
-    ALL_BORDER_MODES,
-    ALL_REMAP_MODES))
+PERF_TEST_P(Sz_Depth_Cn_Inter_Border_Mode, ImgProc_Remap,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F),
+                    GPU_CHANNELS_1_3_4,
+                    Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
+                    ALL_BORDER_MODES,
+                    ALL_REMAP_MODES))
 {
     declare.time(20.0);
 
-    cv::Size size = GET_PARAM(0);
-    int depth = GET_PARAM(1);
-    int channels = GET_PARAM(2);
-    int interpolation = GET_PARAM(3);
-    int borderMode = GET_PARAM(4);
-    int remapMode = GET_PARAM(5);
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int channels = GET_PARAM(2);
+    const int interpolation = GET_PARAM(3);
+    const int borderMode = GET_PARAM(4);
+    const int remapMode = GET_PARAM(5);
 
-    int type = CV_MAKE_TYPE(depth, channels);
+    const int type = CV_MAKE_TYPE(depth, channels);
 
     cv::Mat src(size, type);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
     cv::Mat xmap(size, CV_32FC1);
     cv::Mat ymap(size, CV_32FC1);
-
     generateMap(xmap, ymap, remapMode);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_xmap(xmap);
-        cv::gpu::GpuMat d_ymap(ymap);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        const cv::gpu::GpuMat d_xmap(xmap);
+        const cv::gpu::GpuMat d_ymap(ymap);
+        cv::gpu::GpuMat dst;
 
-        cv::gpu::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode);
-
-        TEST_CYCLE()
-        {
-            cv::gpu::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode);
-        }
+        TEST_CYCLE() cv::gpu::remap(d_src, dst, d_xmap, d_ymap, interpolation, borderMode);
 
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
         cv::Mat dst;
 
-        cv::remap(src, dst, xmap, ymap, interpolation, borderMode);
+        TEST_CYCLE() cv::remap(src, dst, xmap, ymap, interpolation, borderMode);
 
-        TEST_CYCLE()
-        {
-            cv::remap(src, dst, xmap, ymap, interpolation, borderMode);
-        }
+        CPU_SANITY_CHECK(dst);
     }
 }
 
@@ -112,50 +102,42 @@ PERF_TEST_P(Sz_Depth_Cn_Inter_Border_Mode, ImgProc_Remap, Combine(
 
 DEF_PARAM_TEST(Sz_Depth_Cn_Inter_Scale, cv::Size, MatDepth, MatCn, Interpolation, double);
 
-PERF_TEST_P(Sz_Depth_Cn_Inter_Scale, ImgProc_Resize, Combine(
-    GPU_TYPICAL_MAT_SIZES,
-    Values(CV_8U, CV_16U, CV_32F),
-    GPU_CHANNELS_1_3_4,
-    ALL_INTERPOLATIONS,
-    Values(0.5, 0.3, 2.0)))
+PERF_TEST_P(Sz_Depth_Cn_Inter_Scale, ImgProc_Resize,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F),
+                    GPU_CHANNELS_1_3_4,
+                    Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
+                    Values(0.5, 0.3, 2.0)))
 {
     declare.time(20.0);
 
-    cv::Size size = GET_PARAM(0);
-    int depth = GET_PARAM(1);
-    int channels = GET_PARAM(2);
-    int interpolation = GET_PARAM(3);
-    double f = GET_PARAM(4);
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int channels = GET_PARAM(2);
+    const int interpolation = GET_PARAM(3);
+    const double f = GET_PARAM(4);
 
-    int type = CV_MAKE_TYPE(depth, channels);
+    const int type = CV_MAKE_TYPE(depth, channels);
 
     cv::Mat src(size, type);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        cv::gpu::resize(d_src, d_dst, cv::Size(), f, f, interpolation);
+        TEST_CYCLE() cv::gpu::resize(d_src, dst, cv::Size(), f, f, interpolation);
 
-        TEST_CYCLE()
-        {
-            cv::gpu::resize(d_src, d_dst, cv::Size(), f, f, interpolation);
-        }
-
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst, 1e-3, ERROR_RELATIVE);
     }
     else
     {
         cv::Mat dst;
 
-        cv::resize(src, dst, cv::Size(), f, f, interpolation);
+        TEST_CYCLE() cv::resize(src, dst, cv::Size(), f, f, interpolation);
 
-        TEST_CYCLE()
-        {
-            cv::resize(src, dst, cv::Size(), f, f, interpolation);
-        }
+        CPU_SANITY_CHECK(dst);
     }
 }
 
@@ -164,49 +146,41 @@ PERF_TEST_P(Sz_Depth_Cn_Inter_Scale, ImgProc_Resize, Combine(
 
 DEF_PARAM_TEST(Sz_Depth_Cn_Scale, cv::Size, MatDepth, MatCn, double);
 
-PERF_TEST_P(Sz_Depth_Cn_Scale, ImgProc_ResizeArea, Combine(
-    GPU_TYPICAL_MAT_SIZES,
-    Values(CV_8U, CV_16U, CV_32F),
-    GPU_CHANNELS_1_3_4,
-    Values(0.2, 0.1, 0.05)))
+PERF_TEST_P(Sz_Depth_Cn_Scale, ImgProc_ResizeArea,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F),
+                    GPU_CHANNELS_1_3_4,
+                    Values(0.2, 0.1, 0.05)))
 {
     declare.time(1.0);
 
-    cv::Size size = GET_PARAM(0);
-    int depth = GET_PARAM(1);
-    int channels = GET_PARAM(2);
-    int interpolation = cv::INTER_AREA;
-    double f = GET_PARAM(3);
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int channels = GET_PARAM(2);
+    const int interpolation = cv::INTER_AREA;
+    const double f = GET_PARAM(3);
 
-    int type = CV_MAKE_TYPE(depth, channels);
+    const int type = CV_MAKE_TYPE(depth, channels);
 
     cv::Mat src(size, type);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        cv::gpu::resize(d_src, d_dst, cv::Size(), f, f, interpolation);
+        TEST_CYCLE() cv::gpu::resize(d_src, dst, cv::Size(), f, f, interpolation);
 
-        TEST_CYCLE()
-        {
-            cv::gpu::resize(d_src, d_dst, cv::Size(), f, f, interpolation);
-        }
-
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
         cv::Mat dst;
 
-        cv::resize(src, dst, cv::Size(), f, f, interpolation);
+        TEST_CYCLE() cv::resize(src, dst, cv::Size(), f, f, interpolation);
 
-        TEST_CYCLE()
-        {
-            cv::resize(src, dst, cv::Size(), f, f, interpolation);
-        }
+        CPU_SANITY_CHECK(dst);
     }
 }
 
@@ -215,111 +189,98 @@ PERF_TEST_P(Sz_Depth_Cn_Scale, ImgProc_ResizeArea, Combine(
 
 DEF_PARAM_TEST(Sz_Depth_Cn_Inter_Border, cv::Size, MatDepth, MatCn, Interpolation, BorderMode);
 
-PERF_TEST_P(Sz_Depth_Cn_Inter_Border, ImgProc_WarpAffine, Combine(
-    GPU_TYPICAL_MAT_SIZES,
-    Values(CV_8U, CV_16U, CV_32F),
-    GPU_CHANNELS_1_3_4,
-    Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
-    ALL_BORDER_MODES))
+PERF_TEST_P(Sz_Depth_Cn_Inter_Border, ImgProc_WarpAffine,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F),
+                    GPU_CHANNELS_1_3_4,
+                    Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
+                    ALL_BORDER_MODES))
 {
     declare.time(20.0);
 
-    cv::Size size = GET_PARAM(0);
-    int depth = GET_PARAM(1);
-    int channels = GET_PARAM(2);
-    int interpolation = GET_PARAM(3);
-    int borderMode = GET_PARAM(4);
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int channels = GET_PARAM(2);
+    const int interpolation = GET_PARAM(3);
+    const int borderMode = GET_PARAM(4);
 
-    int type = CV_MAKE_TYPE(depth, channels);
+    const int type = CV_MAKE_TYPE(depth, channels);
 
     cv::Mat src(size, type);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
     const double aplha = CV_PI / 4;
-    double mat[2][3] = { {std::cos(aplha), -std::sin(aplha), src.cols / 2},
-                         {std::sin(aplha),  std::cos(aplha), 0}};
-    cv::Mat M(2, 3, CV_64F, (void*) mat);
+    const double mat[2 * 3] =
+    {
+        std::cos(aplha), -std::sin(aplha), src.cols / 2,
+        std::sin(aplha),  std::cos(aplha), 0
+    };
+    const cv::Mat M(2, 3, CV_64F, (void*) mat);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        cv::gpu::warpAffine(d_src, d_dst, M, size, interpolation, borderMode);
+        TEST_CYCLE() cv::gpu::warpAffine(d_src, dst, M, size, interpolation, borderMode);
 
-        TEST_CYCLE()
-        {
-            cv::gpu::warpAffine(d_src, d_dst, M, size, interpolation, borderMode);
-        }
-
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst, 1);
     }
     else
     {
         cv::Mat dst;
 
-        cv::warpAffine(src, dst, M, size, interpolation, borderMode);
+        TEST_CYCLE() cv::warpAffine(src, dst, M, size, interpolation, borderMode);
 
-        TEST_CYCLE()
-        {
-            cv::warpAffine(src, dst, M, size, interpolation, borderMode);
-        }
+        CPU_SANITY_CHECK(dst);
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // WarpPerspective
 
-PERF_TEST_P(Sz_Depth_Cn_Inter_Border, ImgProc_WarpPerspective, Combine(
-    GPU_TYPICAL_MAT_SIZES,
-    Values(CV_8U, CV_16U, CV_32F),
-    GPU_CHANNELS_1_3_4,
-    Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
-    ALL_BORDER_MODES))
+PERF_TEST_P(Sz_Depth_Cn_Inter_Border, ImgProc_WarpPerspective,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F),
+                    GPU_CHANNELS_1_3_4,
+                    Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
+                    ALL_BORDER_MODES))
 {
     declare.time(20.0);
 
-    cv::Size size = GET_PARAM(0);
-    int depth = GET_PARAM(1);
-    int channels = GET_PARAM(2);
-    int interpolation = GET_PARAM(3);
-    int borderMode = GET_PARAM(4);
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int channels = GET_PARAM(2);
+    const int interpolation = GET_PARAM(3);
+    const int borderMode = GET_PARAM(4);
 
-    int type = CV_MAKE_TYPE(depth, channels);
+    const int type = CV_MAKE_TYPE(depth, channels);
 
     cv::Mat src(size, type);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
     const double aplha = CV_PI / 4;
     double mat[3][3] = { {std::cos(aplha), -std::sin(aplha), src.cols / 2},
                          {std::sin(aplha),  std::cos(aplha), 0},
                          {0.0,              0.0,             1.0}};
-    cv::Mat M(3, 3, CV_64F, (void*) mat);
+    const cv::Mat M(3, 3, CV_64F, (void*) mat);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
-
-        cv::gpu::warpPerspective(d_src, d_dst, M, size, interpolation, borderMode);
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE()
-        {
-            cv::gpu::warpPerspective(d_src, d_dst, M, size, interpolation, borderMode);
-        }
+        TEST_CYCLE() cv::gpu::warpPerspective(d_src, dst, M, size, interpolation, borderMode);
 
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst, 1);
     }
     else
     {
         cv::Mat dst;
 
-        cv::warpPerspective(src, dst, M, size, interpolation, borderMode);
+        TEST_CYCLE() cv::warpPerspective(src, dst, M, size, interpolation, borderMode);
 
-        TEST_CYCLE()
-        {
-            cv::warpPerspective(src, dst, M, size, interpolation, borderMode);
-        }
+        CPU_SANITY_CHECK(dst);
     }
 }
 
@@ -328,46 +289,38 @@ PERF_TEST_P(Sz_Depth_Cn_Inter_Border, ImgProc_WarpPerspective, Combine(
 
 DEF_PARAM_TEST(Sz_Depth_Cn_Border, cv::Size, MatDepth, MatCn, BorderMode);
 
-PERF_TEST_P(Sz_Depth_Cn_Border, ImgProc_CopyMakeBorder, Combine(
-    GPU_TYPICAL_MAT_SIZES,
-    Values(CV_8U, CV_16U, CV_32F),
-    GPU_CHANNELS_1_3_4,
-    ALL_BORDER_MODES))
+PERF_TEST_P(Sz_Depth_Cn_Border, ImgProc_CopyMakeBorder,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F),
+                    GPU_CHANNELS_1_3_4,
+                    ALL_BORDER_MODES))
 {
-    cv::Size size = GET_PARAM(0);
-    int depth = GET_PARAM(1);
-    int channels = GET_PARAM(2);
-    int borderMode = GET_PARAM(3);
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int channels = GET_PARAM(2);
+    const int borderMode = GET_PARAM(3);
 
-    int type = CV_MAKE_TYPE(depth, channels);
+    const int type = CV_MAKE_TYPE(depth, channels);
 
     cv::Mat src(size, type);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        cv::gpu::copyMakeBorder(d_src, d_dst, 5, 5, 5, 5, borderMode);
+        TEST_CYCLE() cv::gpu::copyMakeBorder(d_src, dst, 5, 5, 5, 5, borderMode);
 
-        TEST_CYCLE()
-        {
-            cv::gpu::copyMakeBorder(d_src, d_dst, 5, 5, 5, 5, borderMode);
-        }
-
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
         cv::Mat dst;
 
-        cv::copyMakeBorder(src, dst, 5, 5, 5, 5, borderMode);
+        TEST_CYCLE() cv::copyMakeBorder(src, dst, 5, 5, 5, 5, borderMode);
 
-        TEST_CYCLE()
-        {
-            cv::copyMakeBorder(src, dst, 5, 5, 5, 5, borderMode);
-        }
+        CPU_SANITY_CHECK(dst);
     }
 }
 
@@ -379,168 +332,145 @@ CV_ENUM(ThreshOp, cv::THRESH_BINARY, cv::THRESH_BINARY_INV, cv::THRESH_TRUNC, cv
 
 DEF_PARAM_TEST(Sz_Depth_Op, cv::Size, MatDepth, ThreshOp);
 
-PERF_TEST_P(Sz_Depth_Op, ImgProc_Threshold, Combine(
-    GPU_TYPICAL_MAT_SIZES,
-    Values(CV_8U, CV_16U, CV_32F, CV_64F),
-    ALL_THRESH_OPS))
+PERF_TEST_P(Sz_Depth_Op, ImgProc_Threshold,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+            Values(CV_8U, CV_16U, CV_32F, CV_64F),
+            ALL_THRESH_OPS))
 {
-    cv::Size size = GET_PARAM(0);
-    int depth = GET_PARAM(1);
-    int threshOp = GET_PARAM(2);
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int threshOp = GET_PARAM(2);
 
     cv::Mat src(size, depth);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        cv::gpu::threshold(d_src, d_dst, 100.0, 255.0, threshOp);
+        TEST_CYCLE() cv::gpu::threshold(d_src, dst, 100.0, 255.0, threshOp);
 
-        TEST_CYCLE()
-        {
-            cv::gpu::threshold(d_src, d_dst, 100.0, 255.0, threshOp);
-        }
-
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst, 1e-10);
     }
     else
     {
         cv::Mat dst;
 
-        cv::threshold(src, dst, 100.0, 255.0, threshOp);
+        TEST_CYCLE() cv::threshold(src, dst, 100.0, 255.0, threshOp);
 
-        TEST_CYCLE()
-        {
-            cv::threshold(src, dst, 100.0, 255.0, threshOp);
-        }
+        CPU_SANITY_CHECK(dst);
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // Integral
 
-PERF_TEST_P(Sz, ImgProc_Integral, GPU_TYPICAL_MAT_SIZES)
+PERF_TEST_P(Sz, ImgProc_Integral,
+            GPU_TYPICAL_MAT_SIZES)
 {
-    cv::Size size = GetParam();
+    const cv::Size size = GetParam();
 
     cv::Mat src(size, CV_8UC1);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
         cv::gpu::GpuMat d_buf;
 
-        cv::gpu::integralBuffered(d_src, d_dst, d_buf);
-
-        TEST_CYCLE()
-        {
-            cv::gpu::integralBuffered(d_src, d_dst, d_buf);
-        }
+        TEST_CYCLE() cv::gpu::integralBuffered(d_src, dst, d_buf);
 
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
         cv::Mat dst;
 
-        cv::integral(src, dst);
+        TEST_CYCLE() cv::integral(src, dst);
 
-        TEST_CYCLE()
-        {
-            cv::integral(src, dst);
-        }
+        CPU_SANITY_CHECK(dst);
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // IntegralSqr
 
-PERF_TEST_P(Sz, ImgProc_IntegralSqr, GPU_TYPICAL_MAT_SIZES)
+PERF_TEST_P(Sz, ImgProc_IntegralSqr,
+            GPU_TYPICAL_MAT_SIZES)
 {
-    cv::Size size = GetParam();
+    const cv::Size size = GetParam();
 
     cv::Mat src(size, CV_8UC1);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
-
-        cv::gpu::sqrIntegral(d_src, d_dst);
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE()
-        {
-            cv::gpu::sqrIntegral(d_src, d_dst);
-        }
+        TEST_CYCLE() cv::gpu::sqrIntegral(d_src, dst);
 
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
-        FAIL() << "No such CPU implementation analogy";
+        FAIL_NO_CPU();
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // HistEvenC1
 
-PERF_TEST_P(Sz_Depth, ImgProc_HistEvenC1, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_16S)))
+PERF_TEST_P(Sz_Depth, ImgProc_HistEvenC1,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_16S)))
 {
-    cv::Size size = GET_PARAM(0);
-    int depth = GET_PARAM(1);
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
 
     cv::Mat src(size, depth);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_hist;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
         cv::gpu::GpuMat d_buf;
 
-        cv::gpu::histEven(d_src, d_hist, d_buf, 30, 0, 180);
+        TEST_CYCLE() cv::gpu::histEven(d_src, dst, d_buf, 30, 0, 180);
 
-        TEST_CYCLE()
-        {
-            cv::gpu::histEven(d_src, d_hist, d_buf, 30, 0, 180);
-        }
-
-        GPU_SANITY_CHECK(d_hist);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
-        int hbins = 30;
-        float hranges[] = {0.0f, 180.0f};
-        int histSize[] = {hbins};
+        const int hbins = 30;
+        const float hranges[] = {0.0f, 180.0f};
+        const int histSize[] = {hbins};
         const float* ranges[] = {hranges};
-        int channels[] = {0};
+        const int channels[] = {0};
 
-        cv::Mat hist;
+        cv::Mat dst;
 
-        cv::calcHist(&src, 1, channels, cv::Mat(), hist, 1, histSize, ranges);
+        TEST_CYCLE() cv::calcHist(&src, 1, channels, cv::Mat(), dst, 1, histSize, ranges);
 
-        TEST_CYCLE()
-        {
-            cv::calcHist(&src, 1, channels, cv::Mat(), hist, 1, histSize, ranges);
-        }
+        CPU_SANITY_CHECK(dst);
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // HistEvenC4
 
-PERF_TEST_P(Sz_Depth, ImgProc_HistEvenC4, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_16S)))
+PERF_TEST_P(Sz_Depth, ImgProc_HistEvenC4,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_16S)))
 {
-    cv::Size size = GET_PARAM(0);
-    int depth = GET_PARAM(1);
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
 
     cv::Mat src(size, CV_MAKE_TYPE(depth, 4));
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
     int histSize[] = {30, 30, 30, 30};
     int lowerLevel[] = {0, 0, 0, 0};
@@ -548,121 +478,109 @@ PERF_TEST_P(Sz_Depth, ImgProc_HistEvenC4, Combine(GPU_TYPICAL_MAT_SIZES, Values(
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
+        const cv::gpu::GpuMat d_src(src);
         cv::gpu::GpuMat d_hist[4];
-        cv::gpu::GpuMat d_buf, d_hist0;
+        cv::gpu::GpuMat d_buf;
 
-        cv::gpu::histEven(d_src, d_hist, d_buf, histSize, lowerLevel, upperLevel);
+        TEST_CYCLE() cv::gpu::histEven(d_src, d_hist, d_buf, histSize, lowerLevel, upperLevel);
 
-        TEST_CYCLE()
-        {
-            cv::gpu::histEven(d_src, d_hist, d_buf, histSize, lowerLevel, upperLevel);
-        }
-
-        GPU_SANITY_CHECK(d_hist0);
+        cv::Mat cpu_hist0, cpu_hist1, cpu_hist2, cpu_hist3;
+        d_hist[0].download(cpu_hist0);
+        d_hist[1].download(cpu_hist1);
+        d_hist[2].download(cpu_hist2);
+        d_hist[3].download(cpu_hist3);
+        SANITY_CHECK(cpu_hist0);
+        SANITY_CHECK(cpu_hist1);
+        SANITY_CHECK(cpu_hist2);
+        SANITY_CHECK(cpu_hist3);
     }
     else
     {
-        FAIL() << "No such CPU implementation analogy";
+        FAIL_NO_CPU();
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // CalcHist
 
-PERF_TEST_P(Sz, ImgProc_CalcHist, GPU_TYPICAL_MAT_SIZES)
+PERF_TEST_P(Sz, ImgProc_CalcHist,
+            GPU_TYPICAL_MAT_SIZES)
 {
-    cv::Size size = GetParam();
+    const cv::Size size = GetParam();
 
     cv::Mat src(size, CV_8UC1);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_hist;
-
-        cv::gpu::calcHist(d_src, d_hist);
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE()
-        {
-            cv::gpu::calcHist(d_src, d_hist);
-        }
+        TEST_CYCLE() cv::gpu::calcHist(d_src, dst);
 
-        GPU_SANITY_CHECK(d_hist);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
-        FAIL() << "No such CPU implementation analogy";
+        FAIL_NO_CPU();
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // EqualizeHist
 
-PERF_TEST_P(Sz, ImgProc_EqualizeHist, GPU_TYPICAL_MAT_SIZES)
+PERF_TEST_P(Sz, ImgProc_EqualizeHist,
+            GPU_TYPICAL_MAT_SIZES)
 {
-    cv::Size size = GetParam();
+    const cv::Size size = GetParam();
 
     cv::Mat src(size, CV_8UC1);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
         cv::gpu::GpuMat d_hist;
         cv::gpu::GpuMat d_buf;
 
-        cv::gpu::equalizeHist(d_src, d_dst, d_hist, d_buf);
+        TEST_CYCLE() cv::gpu::equalizeHist(d_src, dst, d_hist, d_buf);
 
-        TEST_CYCLE()
-        {
-            cv::gpu::equalizeHist(d_src, d_dst, d_hist, d_buf);
-        }
-
-        GPU_SANITY_CHECK(d_hist);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
         cv::Mat dst;
 
-        cv::equalizeHist(src, dst);
+        TEST_CYCLE() cv::equalizeHist(src, dst);
 
-        TEST_CYCLE()
-        {
-            cv::equalizeHist(src, dst);
-        }
+        CPU_SANITY_CHECK(dst);
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // ColumnSum
 
-PERF_TEST_P(Sz, ImgProc_ColumnSum, GPU_TYPICAL_MAT_SIZES)
+PERF_TEST_P(Sz, ImgProc_ColumnSum,
+            GPU_TYPICAL_MAT_SIZES)
 {
-    cv::Size size = GetParam();
+    const cv::Size size = GetParam();
 
     cv::Mat src(size, CV_32FC1);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        cv::gpu::columnSum(d_src, d_dst);
-
-        TEST_CYCLE()
-        {
-            cv::gpu::columnSum(d_src, d_dst);
-        }
+        TEST_CYCLE() cv::gpu::columnSum(d_src, dst);
 
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
-        FAIL() << "No such CPU implementation analogy";
+        FAIL_NO_CPU();
     }
 }
 
@@ -671,43 +589,38 @@ PERF_TEST_P(Sz, ImgProc_ColumnSum, GPU_TYPICAL_MAT_SIZES)
 
 DEF_PARAM_TEST(Image_AppertureSz_L2gradient, string, int, bool);
 
-PERF_TEST_P(Image_AppertureSz_L2gradient, ImgProc_Canny, Combine(
-    Values("perf/800x600.png", "perf/1280x1024.png", "perf/1680x1050.png"),
-    Values(3, 5),
-    Bool()))
+PERF_TEST_P(Image_AppertureSz_L2gradient, ImgProc_Canny,
+            Combine(Values("perf/800x600.png", "perf/1280x1024.png", "perf/1680x1050.png"),
+                    Values(3, 5),
+                    Bool()))
 {
-    string fileName = GET_PARAM(0);
-    int apperture_size = GET_PARAM(1);
-    bool useL2gradient = GET_PARAM(2);
+    const string fileName = GET_PARAM(0);
+    const int apperture_size = GET_PARAM(1);
+    const bool useL2gradient = GET_PARAM(2);
 
-    cv::Mat image = readImage(fileName, cv::IMREAD_GRAYSCALE);
+    const cv::Mat image = readImage(fileName, cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(image.empty());
 
+    const double low_thresh = 50.0;
+    const double high_thresh = 100.0;
+
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_image(image);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_image(image);
+        cv::gpu::GpuMat dst;
         cv::gpu::CannyBuf d_buf;
 
-        cv::gpu::Canny(d_image, d_buf, d_dst, 50.0, 100.0, apperture_size, useL2gradient);
-
-        TEST_CYCLE()
-        {
-            cv::gpu::Canny(d_image, d_buf, d_dst, 50.0, 100.0, apperture_size, useL2gradient);
-        }
+        TEST_CYCLE() cv::gpu::Canny(d_image, d_buf, dst, low_thresh, high_thresh, apperture_size, useL2gradient);
 
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
         cv::Mat dst;
 
-        cv::Canny(image, dst, 50.0, 100.0, apperture_size, useL2gradient);
+        TEST_CYCLE() cv::Canny(image, dst, low_thresh, high_thresh, apperture_size, useL2gradient);
 
-        TEST_CYCLE()
-        {
-            cv::Canny(image, dst, 50.0, 100.0, apperture_size, useL2gradient);
-        }
+        CPU_SANITY_CHECK(dst);
     }
 }
 
@@ -716,148 +629,142 @@ PERF_TEST_P(Image_AppertureSz_L2gradient, ImgProc_Canny, Combine(
 
 DEF_PARAM_TEST_1(Image, string);
 
-PERF_TEST_P(Image, ImgProc_MeanShiftFiltering, Values<string>("gpu/meanshift/cones.png"))
+PERF_TEST_P(Image, ImgProc_MeanShiftFiltering,
+            Values<string>("gpu/meanshift/cones.png"))
 {
     declare.time(15.0);
 
-    cv::Mat img = readImage(GetParam());
+    const cv::Mat img = readImage(GetParam());
     ASSERT_FALSE(img.empty());
 
     cv::Mat rgba;
     cv::cvtColor(img, rgba, cv::COLOR_BGR2BGRA);
 
+    const int sp = 50;
+    const int sr = 50;
+
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(rgba);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(rgba);
+        cv::gpu::GpuMat dst;
 
-        cv::gpu::meanShiftFiltering(d_src, d_dst, 50, 50);
+        TEST_CYCLE() cv::gpu::meanShiftFiltering(d_src, dst, sp, sr);
 
-        TEST_CYCLE()
-        {
-            cv::gpu::meanShiftFiltering(d_src, d_dst, 50, 50);
-        }
-
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
         cv::Mat dst;
 
-        cv::pyrMeanShiftFiltering(img, dst, 50, 50);
+        TEST_CYCLE() cv::pyrMeanShiftFiltering(img, dst, sp, sr);
 
-        TEST_CYCLE()
-        {
-            cv::pyrMeanShiftFiltering(img, dst, 50, 50);
-        }
+        CPU_SANITY_CHECK(dst);
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // MeanShiftProc
 
-PERF_TEST_P(Image, ImgProc_MeanShiftProc, Values<string>("gpu/meanshift/cones.png"))
+PERF_TEST_P(Image, ImgProc_MeanShiftProc,
+            Values<string>("gpu/meanshift/cones.png"))
 {
     declare.time(5.0);
 
-    cv::Mat img = readImage(GetParam());
+    const cv::Mat img = readImage(GetParam());
     ASSERT_FALSE(img.empty());
 
     cv::Mat rgba;
     cv::cvtColor(img, rgba, cv::COLOR_BGR2BGRA);
 
+    const int sp = 50;
+    const int sr = 50;
+
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(rgba);
-        cv::gpu::GpuMat d_dstr;
-        cv::gpu::GpuMat d_dstsp;
+        const cv::gpu::GpuMat d_src(rgba);
+        cv::gpu::GpuMat dstr;
+        cv::gpu::GpuMat dstsp;
 
-        cv::gpu::meanShiftProc(d_src, d_dstr, d_dstsp, 50, 50);
-
-        TEST_CYCLE()
-        {
-            cv::gpu::meanShiftProc(d_src, d_dstr, d_dstsp, 50, 50);
-        }
+        TEST_CYCLE() cv::gpu::meanShiftProc(d_src, dstr, dstsp, sp, sr);
 
-        GPU_SANITY_CHECK(d_dstr);
+        GPU_SANITY_CHECK(dstr);
+        GPU_SANITY_CHECK(dstsp);
     }
     else
     {
-        FAIL() << "No such CPU implementation analogy";
+        FAIL_NO_CPU();
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // MeanShiftSegmentation
 
-PERF_TEST_P(Image, ImgProc_MeanShiftSegmentation, Values<string>("gpu/meanshift/cones.png"))
+PERF_TEST_P(Image, ImgProc_MeanShiftSegmentation,
+            Values<string>("gpu/meanshift/cones.png"))
 {
     declare.time(5.0);
 
-    cv::Mat img = readImage(GetParam());
+    const cv::Mat img = readImage(GetParam());
     ASSERT_FALSE(img.empty());
 
     cv::Mat rgba;
     cv::cvtColor(img, rgba, cv::COLOR_BGR2BGRA);
 
-    cv::Mat dst;
+    const int sp = 10;
+    const int sr = 10;
+    const int minsize = 20;
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(rgba);
-
-        cv::gpu::meanShiftSegmentation(d_src, dst, 10, 10, 20);
+        const cv::gpu::GpuMat d_src(rgba);
+        cv::Mat dst;
 
-        TEST_CYCLE()
-        {
-            cv::gpu::meanShiftSegmentation(d_src, dst, 10, 10, 20);
-        }
+        TEST_CYCLE() cv::gpu::meanShiftSegmentation(d_src, dst, sp, sr, minsize);
 
         GPU_SANITY_CHECK(dst);
     }
     else
     {
-        FAIL() << "No such CPU implementation analogy";
+        FAIL_NO_CPU();
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // BlendLinear
 
-PERF_TEST_P(Sz_Depth_Cn, ImgProc_BlendLinear, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_32F), GPU_CHANNELS_1_3_4))
+PERF_TEST_P(Sz_Depth_Cn, ImgProc_BlendLinear,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_32F),
+                    GPU_CHANNELS_1_3_4))
 {
-    cv::Size size = GET_PARAM(0);
-    int depth = GET_PARAM(1);
-    int channels = GET_PARAM(2);
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int channels = GET_PARAM(2);
 
-    int type = CV_MAKE_TYPE(depth, channels);
+    const int type = CV_MAKE_TYPE(depth, channels);
 
     cv::Mat img1(size, type);
-    fillRandom(img1);
-
     cv::Mat img2(size, type);
-    fillRandom(img2);
+    declare.in(img1, img2, WARMUP_RNG);
+
+    const cv::Mat weights1(size, CV_32FC1, cv::Scalar::all(0.5));
+    const cv::Mat weights2(size, CV_32FC1, cv::Scalar::all(0.5));
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_img1(img1);
-        cv::gpu::GpuMat d_img2(img2);
-        cv::gpu::GpuMat d_weights1(size, CV_32FC1, cv::Scalar::all(0.5));
-        cv::gpu::GpuMat d_weights2(size, CV_32FC1, cv::Scalar::all(0.5));
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_img1(img1);
+        const cv::gpu::GpuMat d_img2(img2);
+        const cv::gpu::GpuMat d_weights1(weights1);
+        const cv::gpu::GpuMat d_weights2(weights2);
+        cv::gpu::GpuMat dst;
 
-        cv::gpu::blendLinear(d_img1, d_img2, d_weights1, d_weights2, d_dst);
-
-        TEST_CYCLE()
-        {
-            cv::gpu::blendLinear(d_img1, d_img2, d_weights1, d_weights2, d_dst);
-        }
+        TEST_CYCLE() cv::gpu::blendLinear(d_img1, d_img2, d_weights1, d_weights2, dst);
 
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
-        FAIL() << "No such CPU implementation analogy";
+        FAIL_NO_CPU();
     }
 }
 
@@ -866,19 +773,20 @@ PERF_TEST_P(Sz_Depth_Cn, ImgProc_BlendLinear, Combine(GPU_TYPICAL_MAT_SIZES, Val
 
 DEF_PARAM_TEST(Sz_KernelSz_Ccorr, cv::Size, int, bool);
 
-PERF_TEST_P(Sz_KernelSz_Ccorr, ImgProc_Convolve, Combine(GPU_TYPICAL_MAT_SIZES, Values(17, 27, 32, 64), Bool()))
+PERF_TEST_P(Sz_KernelSz_Ccorr, ImgProc_Convolve,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(17, 27, 32, 64),
+                    Bool()))
 {
     declare.time(10.0);
 
-    cv::Size size = GET_PARAM(0);
-    int templ_size = GET_PARAM(1);
-    bool ccorr = GET_PARAM(2);
-
-    cv::Mat image(size, CV_32FC1);
-    image.setTo(1.0);
+    const cv::Size size = GET_PARAM(0);
+    const int templ_size = GET_PARAM(1);
+    const bool ccorr = GET_PARAM(2);
 
-    cv::Mat templ(templ_size, templ_size, CV_32FC1);
-    templ.setTo(1.0);
+    const cv::Mat image(size, CV_32FC1);
+    const cv::Mat templ(templ_size, templ_size, CV_32FC1);
+    declare.in(image, templ, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
@@ -888,30 +796,21 @@ PERF_TEST_P(Sz_KernelSz_Ccorr, ImgProc_Convolve, Combine(GPU_TYPICAL_MAT_SIZES,
         cv::gpu::GpuMat d_templ = cv::gpu::createContinuous(templ_size, templ_size, CV_32FC1);
         d_templ.upload(templ);
 
-        cv::gpu::GpuMat d_dst;
+        cv::gpu::GpuMat dst;
         cv::gpu::ConvolveBuf d_buf;
 
-        cv::gpu::convolve(d_image, d_templ, d_dst, ccorr, d_buf);
+        TEST_CYCLE() cv::gpu::convolve(d_image, d_templ, dst, ccorr, d_buf);
 
-        TEST_CYCLE()
-        {
-            cv::gpu::convolve(d_image, d_templ, d_dst, ccorr, d_buf);
-        }
-
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
-        ASSERT_FALSE(ccorr);
+        if (ccorr)
+            FAIL_NO_CPU();
 
         cv::Mat dst;
 
-        cv::filter2D(image, dst, image.depth(), templ);
-
-        TEST_CYCLE()
-        {
-            cv::filter2D(image, dst, image.depth(), templ);
-        }
+        TEST_CYCLE() cv::filter2D(image, dst, image.depth(), templ);
 
         CPU_SANITY_CHECK(dst);
     }
@@ -925,48 +824,36 @@ CV_ENUM(TemplateMethod, cv::TM_SQDIFF, cv::TM_SQDIFF_NORMED, cv::TM_CCORR, cv::T
 
 DEF_PARAM_TEST(Sz_TemplateSz_Cn_Method, cv::Size, cv::Size, MatCn, TemplateMethod);
 
-PERF_TEST_P(Sz_TemplateSz_Cn_Method, ImgProc_MatchTemplate8U, Combine(
-    GPU_TYPICAL_MAT_SIZES,
-    Values(cv::Size(5, 5), cv::Size(16, 16), cv::Size(30, 30)),
-    GPU_CHANNELS_1_3_4,
-    ALL_TEMPLATE_METHODS))
+PERF_TEST_P(Sz_TemplateSz_Cn_Method, ImgProc_MatchTemplate8U,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(cv::Size(5, 5), cv::Size(16, 16), cv::Size(30, 30)),
+                    GPU_CHANNELS_1_3_4,
+                    ALL_TEMPLATE_METHODS))
 {
-    cv::Size size = GET_PARAM(0);
-    cv::Size templ_size = GET_PARAM(1);
-    int cn = GET_PARAM(2);
-    int method = GET_PARAM(3);
+    const cv::Size size = GET_PARAM(0);
+    const cv::Size templ_size = GET_PARAM(1);
+    const int cn = GET_PARAM(2);
+    const int method = GET_PARAM(3);
 
     cv::Mat image(size, CV_MAKE_TYPE(CV_8U, cn));
-    fillRandom(image);
-
     cv::Mat templ(templ_size, CV_MAKE_TYPE(CV_8U, cn));
-    fillRandom(templ);
+    declare.in(image, templ, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_image(image);
-        cv::gpu::GpuMat d_templ(templ);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_image(image);
+        const cv::gpu::GpuMat d_templ(templ);
+        cv::gpu::GpuMat dst;
 
-        cv::gpu::matchTemplate(d_image, d_templ, d_dst, method);
-
-        TEST_CYCLE()
-        {
-            cv::gpu::matchTemplate(d_image, d_templ, d_dst, method);
-        }
+        TEST_CYCLE() cv::gpu::matchTemplate(d_image, d_templ, dst, method);
 
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst, 1e-5, ERROR_RELATIVE);
     }
     else
     {
         cv::Mat dst;
 
-        cv::matchTemplate(image, templ, dst, method);
-
-        TEST_CYCLE()
-        {
-            cv::matchTemplate(image, templ, dst, method);
-        }
+        TEST_CYCLE() cv::matchTemplate(image, templ, dst, method);
 
         CPU_SANITY_CHECK(dst);
     }
@@ -975,48 +862,36 @@ PERF_TEST_P(Sz_TemplateSz_Cn_Method, ImgProc_MatchTemplate8U, Combine(
 ////////////////////////////////////////////////////////////////////////////////
 // MatchTemplate32F
 
-PERF_TEST_P(Sz_TemplateSz_Cn_Method, ImgProc_MatchTemplate32F, Combine(
-    GPU_TYPICAL_MAT_SIZES,
-    Values(cv::Size(5, 5), cv::Size(16, 16), cv::Size(30, 30)),
-    GPU_CHANNELS_1_3_4,
-    Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR))))
+PERF_TEST_P(Sz_TemplateSz_Cn_Method, ImgProc_MatchTemplate32F,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(cv::Size(5, 5), cv::Size(16, 16), cv::Size(30, 30)),
+                    GPU_CHANNELS_1_3_4,
+                    Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR))))
 {
-    cv::Size size = GET_PARAM(0);
-    cv::Size templ_size = GET_PARAM(1);
-    int cn = GET_PARAM(2);
+    const cv::Size size = GET_PARAM(0);
+    const cv::Size templ_size = GET_PARAM(1);
+    const int cn = GET_PARAM(2);
     int method = GET_PARAM(3);
 
     cv::Mat image(size, CV_MAKE_TYPE(CV_32F, cn));
-    fillRandom(image);
-
     cv::Mat templ(templ_size, CV_MAKE_TYPE(CV_32F, cn));
-    fillRandom(templ);
+    declare.in(image, templ, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_image(image);
-        cv::gpu::GpuMat d_templ(templ);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_image(image);
+        const cv::gpu::GpuMat d_templ(templ);
+        cv::gpu::GpuMat dst;
 
-        cv::gpu::matchTemplate(d_image, d_templ, d_dst, method);
+        TEST_CYCLE() cv::gpu::matchTemplate(d_image, d_templ, dst, method);
 
-        TEST_CYCLE()
-        {
-            cv::gpu::matchTemplate(d_image, d_templ, d_dst, method);
-        }
-
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE);
     }
     else
     {
         cv::Mat dst;
 
-        cv::matchTemplate(image, templ, dst, method);
-
-        TEST_CYCLE()
-        {
-            cv::matchTemplate(image, templ, dst, method);
-        }
+        TEST_CYCLE() cv::matchTemplate(image, templ, dst, method);
 
         CPU_SANITY_CHECK(dst);
     }
@@ -1029,44 +904,32 @@ CV_FLAGS(DftFlags, 0, cv::DFT_INVERSE, cv::DFT_SCALE, cv::DFT_ROWS, cv::DFT_COMP
 
 DEF_PARAM_TEST(Sz_Flags, cv::Size, DftFlags);
 
-PERF_TEST_P(Sz_Flags, ImgProc_MulSpectrums, Combine(
-    GPU_TYPICAL_MAT_SIZES,
-    Values(0, DftFlags(cv::DFT_ROWS))))
+PERF_TEST_P(Sz_Flags, ImgProc_MulSpectrums,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(0, DftFlags(cv::DFT_ROWS))))
 {
-    cv::Size size = GET_PARAM(0);
-    int flag = GET_PARAM(1);
+    const cv::Size size = GET_PARAM(0);
+    const int flag = GET_PARAM(1);
 
     cv::Mat a(size, CV_32FC2);
-    fillRandom(a, 0, 100);
-
     cv::Mat b(size, CV_32FC2);
-    fillRandom(b, 0, 100);
+    declare.in(a, b, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_a(a);
-        cv::gpu::GpuMat d_b(b);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_a(a);
+        const cv::gpu::GpuMat d_b(b);
+        cv::gpu::GpuMat dst;
 
-        cv::gpu::mulSpectrums(d_a, d_b, d_dst, flag);
-
-        TEST_CYCLE()
-        {
-            cv::gpu::mulSpectrums(d_a, d_b, d_dst, flag);
-        }
+        TEST_CYCLE() cv::gpu::mulSpectrums(d_a, d_b, dst, flag);
 
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
         cv::Mat dst;
 
-        cv::mulSpectrums(a, b, dst, flag);
-
-        TEST_CYCLE()
-        {
-            cv::mulSpectrums(a, b, dst, flag);
-        }
+        TEST_CYCLE() cv::mulSpectrums(a, b, dst, flag);
 
         CPU_SANITY_CHECK(dst);
     }
@@ -1075,78 +938,62 @@ PERF_TEST_P(Sz_Flags, ImgProc_MulSpectrums, Combine(
 //////////////////////////////////////////////////////////////////////
 // MulAndScaleSpectrums
 
-PERF_TEST_P(Sz, ImgProc_MulAndScaleSpectrums, GPU_TYPICAL_MAT_SIZES)
+PERF_TEST_P(Sz, ImgProc_MulAndScaleSpectrums,
+            GPU_TYPICAL_MAT_SIZES)
 {
-    cv::Size size = GetParam();
+    const cv::Size size = GetParam();
 
-    float scale = 1.f / size.area();
+    const float scale = 1.f / size.area();
 
     cv::Mat src1(size, CV_32FC2);
-    fillRandom(src1, 0, 100);
-
     cv::Mat src2(size, CV_32FC2);
-    fillRandom(src2, 0, 100);
+    declare.in(src1,src2, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src1(src1);
-        cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat d_dst;
-
-        cv::gpu::mulAndScaleSpectrums(d_src1, d_src2, d_dst, cv::DFT_ROWS, scale, false);
+        const cv::gpu::GpuMat d_src1(src1);
+        const cv::gpu::GpuMat d_src2(src2);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE()
-        {
-            cv::gpu::mulAndScaleSpectrums(d_src1, d_src2, d_dst, cv::DFT_ROWS, scale, false);
-        }
+        TEST_CYCLE() cv::gpu::mulAndScaleSpectrums(d_src1, d_src2, dst, cv::DFT_ROWS, scale, false);
 
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
-        FAIL() << "No such CPU implementation analogy";
+        FAIL_NO_CPU();
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // Dft
 
-PERF_TEST_P(Sz_Flags, ImgProc_Dft, Combine(
-    GPU_TYPICAL_MAT_SIZES,
-    Values(0, DftFlags(cv::DFT_ROWS), DftFlags(cv::DFT_INVERSE))))
+PERF_TEST_P(Sz_Flags, ImgProc_Dft,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(0, DftFlags(cv::DFT_ROWS), DftFlags(cv::DFT_INVERSE))))
 {
     declare.time(10.0);
 
-    cv::Size size = GET_PARAM(0);
-    int flag = GET_PARAM(1);
+    const cv::Size size = GET_PARAM(0);
+    const int flag = GET_PARAM(1);
 
     cv::Mat src(size, CV_32FC2);
-    fillRandom(src, 0, 100);
+    declare.in(src, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        cv::gpu::dft(d_src, d_dst, size, flag);
+        TEST_CYCLE() cv::gpu::dft(d_src, dst, size, flag);
 
-        TEST_CYCLE()
-        {
-            cv::gpu::dft(d_src, d_dst, size, flag);
-        }
-
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE);
     }
     else
     {
         cv::Mat dst;
 
-        cv::dft(src, dst, flag);
-
-        TEST_CYCLE()
-        {
-            cv::dft(src, dst, flag);
-        }
+        TEST_CYCLE() cv::dft(src, dst, flag);
 
         CPU_SANITY_CHECK(dst);
     }
@@ -1157,52 +1004,43 @@ PERF_TEST_P(Sz_Flags, ImgProc_Dft, Combine(
 
 DEF_PARAM_TEST(Image_Type_Border_BlockSz_ApertureSz, string, MatType, BorderMode, int, int);
 
-PERF_TEST_P(Image_Type_Border_BlockSz_ApertureSz, ImgProc_CornerHarris, Combine(
-    Values<string>("gpu/stereobm/aloe-L.png"),
-    Values(CV_8UC1, CV_32FC1),
-    Values(BorderMode(cv::BORDER_REFLECT101), BorderMode(cv::BORDER_REPLICATE), BorderMode(cv::BORDER_REFLECT)),
-    Values(3, 5, 7),
-    Values(0, 3, 5, 7)))
+PERF_TEST_P(Image_Type_Border_BlockSz_ApertureSz, ImgProc_CornerHarris,
+            Combine(Values<string>("gpu/stereobm/aloe-L.png"),
+                    Values(CV_8UC1, CV_32FC1),
+                    Values(BorderMode(cv::BORDER_REFLECT101), BorderMode(cv::BORDER_REPLICATE), BorderMode(cv::BORDER_REFLECT)),
+                    Values(3, 5, 7),
+                    Values(0, 3, 5, 7)))
 {
-    string fileName = GET_PARAM(0);
-    int type = GET_PARAM(1);
-    int borderMode = GET_PARAM(2);
-    int blockSize = GET_PARAM(3);
-    int apertureSize = GET_PARAM(4);
+    const string fileName = GET_PARAM(0);
+    const int type = GET_PARAM(1);
+    const int borderMode = GET_PARAM(2);
+    const int blockSize = GET_PARAM(3);
+    const int apertureSize = GET_PARAM(4);
 
     cv::Mat img = readImage(fileName, cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(img.empty());
+
     img.convertTo(img, type, type == CV_32F ? 1.0 / 255.0 : 1.0);
 
-    double k = 0.5;
+    const double k = 0.5;
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_img(img);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_img(img);
+        cv::gpu::GpuMat dst;
         cv::gpu::GpuMat d_Dx;
         cv::gpu::GpuMat d_Dy;
         cv::gpu::GpuMat d_buf;
 
-        cv::gpu::cornerHarris(d_img, d_dst, d_Dx, d_Dy, d_buf, blockSize, apertureSize, k, borderMode);
-
-        TEST_CYCLE()
-        {
-            cv::gpu::cornerHarris(d_img, d_dst, d_Dx, d_Dy, d_buf, blockSize, apertureSize, k, borderMode);
-        }
+        TEST_CYCLE() cv::gpu::cornerHarris(d_img, dst, d_Dx, d_Dy, d_buf, blockSize, apertureSize, k, borderMode);
 
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
         cv::Mat dst;
 
-        cv::cornerHarris(img, dst, blockSize, apertureSize, k, borderMode);
-
-        TEST_CYCLE()
-        {
-            cv::cornerHarris(img, dst, blockSize, apertureSize, k, borderMode);
-        }
+        TEST_CYCLE() cv::cornerHarris(img, dst, blockSize, apertureSize, k, borderMode);
 
         CPU_SANITY_CHECK(dst);
     }
@@ -1211,18 +1049,18 @@ PERF_TEST_P(Image_Type_Border_BlockSz_ApertureSz, ImgProc_CornerHarris, Combine(
 //////////////////////////////////////////////////////////////////////
 // CornerMinEigenVal
 
-PERF_TEST_P(Image_Type_Border_BlockSz_ApertureSz, ImgProc_CornerMinEigenVal, Combine(
-    Values<string>("gpu/stereobm/aloe-L.png"),
-    Values(CV_8UC1, CV_32FC1),
-    Values(BorderMode(cv::BORDER_REFLECT101), BorderMode(cv::BORDER_REPLICATE), BorderMode(cv::BORDER_REFLECT)),
-    Values(3, 5, 7),
-    Values(0, 3, 5, 7)))
+PERF_TEST_P(Image_Type_Border_BlockSz_ApertureSz, ImgProc_CornerMinEigenVal,
+            Combine(Values<string>("gpu/stereobm/aloe-L.png"),
+                    Values(CV_8UC1, CV_32FC1),
+                    Values(BorderMode(cv::BORDER_REFLECT101), BorderMode(cv::BORDER_REPLICATE), BorderMode(cv::BORDER_REFLECT)),
+                    Values(3, 5, 7),
+                    Values(0, 3, 5, 7)))
 {
-    string fileName = GET_PARAM(0);
-    int type = GET_PARAM(1);
-    int borderMode = GET_PARAM(2);
-    int blockSize = GET_PARAM(3);
-    int apertureSize = GET_PARAM(4);
+    const string fileName = GET_PARAM(0);
+    const int type = GET_PARAM(1);
+    const int borderMode = GET_PARAM(2);
+    const int blockSize = GET_PARAM(3);
+    const int apertureSize = GET_PARAM(4);
 
     cv::Mat img = readImage(fileName, cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(img.empty());
@@ -1231,31 +1069,21 @@ PERF_TEST_P(Image_Type_Border_BlockSz_ApertureSz, ImgProc_CornerMinEigenVal, Com
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_img(img);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_img(img);
+        cv::gpu::GpuMat dst;
         cv::gpu::GpuMat d_Dx;
         cv::gpu::GpuMat d_Dy;
         cv::gpu::GpuMat d_buf;
 
-        cv::gpu::cornerMinEigenVal(d_img, d_dst, d_Dx, d_Dy, d_buf, blockSize, apertureSize, borderMode);
+        TEST_CYCLE() cv::gpu::cornerMinEigenVal(d_img, dst, d_Dx, d_Dy, d_buf, blockSize, apertureSize, borderMode);
 
-        TEST_CYCLE()
-        {
-            cv::gpu::cornerMinEigenVal(d_img, d_dst, d_Dx, d_Dy, d_buf, blockSize, apertureSize, borderMode);
-        }
-
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
         cv::Mat dst;
 
-        cv::cornerMinEigenVal(img, dst, blockSize, apertureSize, borderMode);
-
-        TEST_CYCLE()
-        {
-            cv::cornerMinEigenVal(img, dst, blockSize, apertureSize, borderMode);
-        }
+        TEST_CYCLE() cv::cornerMinEigenVal(img, dst, blockSize, apertureSize, borderMode);
 
         CPU_SANITY_CHECK(dst);
     }
@@ -1264,95 +1092,82 @@ PERF_TEST_P(Image_Type_Border_BlockSz_ApertureSz, ImgProc_CornerMinEigenVal, Com
 //////////////////////////////////////////////////////////////////////
 // BuildWarpPlaneMaps
 
-PERF_TEST_P(Sz, ImgProc_BuildWarpPlaneMaps, GPU_TYPICAL_MAT_SIZES)
+PERF_TEST_P(Sz, ImgProc_BuildWarpPlaneMaps,
+            GPU_TYPICAL_MAT_SIZES)
 {
-    cv::Size size = GetParam();
+    const cv::Size size = GetParam();
 
-    cv::Mat K = cv::Mat::eye(3, 3, CV_32FC1);
-    cv::Mat R = cv::Mat::ones(3, 3, CV_32FC1);
-    cv::Mat T = cv::Mat::zeros(1, 3, CV_32F);
+    const cv::Mat K = cv::Mat::eye(3, 3, CV_32FC1);
+    const cv::Mat R = cv::Mat::ones(3, 3, CV_32FC1);
+    const cv::Mat T = cv::Mat::zeros(1, 3, CV_32F);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_map_x;
-        cv::gpu::GpuMat d_map_y;
-
-        cv::gpu::buildWarpPlaneMaps(size, cv::Rect(0, 0, size.width, size.height), K, R, T, 1.0, d_map_x, d_map_y);
+        cv::gpu::GpuMat map_x;
+        cv::gpu::GpuMat map_y;
 
-        TEST_CYCLE()
-        {
-            cv::gpu::buildWarpPlaneMaps(size, cv::Rect(0, 0, size.width, size.height), K, R, T, 1.0, d_map_x, d_map_y);
-        }
+        TEST_CYCLE() cv::gpu::buildWarpPlaneMaps(size, cv::Rect(0, 0, size.width, size.height), K, R, T, 1.0, map_x, map_y);
 
-        GPU_SANITY_CHECK(d_map_x);
-        GPU_SANITY_CHECK(d_map_y);
+        GPU_SANITY_CHECK(map_x);
+        GPU_SANITY_CHECK(map_y);
     }
     else
     {
-        FAIL() << "No such CPU implementation analogy";
+        FAIL_NO_CPU();
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // BuildWarpCylindricalMaps
 
-PERF_TEST_P(Sz, ImgProc_BuildWarpCylindricalMaps, GPU_TYPICAL_MAT_SIZES)
+PERF_TEST_P(Sz, ImgProc_BuildWarpCylindricalMaps,
+            GPU_TYPICAL_MAT_SIZES)
 {
-    cv::Size size = GetParam();
+    const cv::Size size = GetParam();
 
-    cv::Mat K = cv::Mat::eye(3, 3, CV_32FC1);
-    cv::Mat R = cv::Mat::ones(3, 3, CV_32FC1);
+    const cv::Mat K = cv::Mat::eye(3, 3, CV_32FC1);
+    const cv::Mat R = cv::Mat::ones(3, 3, CV_32FC1);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_map_x;
-        cv::gpu::GpuMat d_map_y;
+        cv::gpu::GpuMat map_x;
+        cv::gpu::GpuMat map_y;
 
-        cv::gpu::buildWarpCylindricalMaps(size, cv::Rect(0, 0, size.width, size.height), K, R, 1.0, d_map_x, d_map_y);
+        TEST_CYCLE() cv::gpu::buildWarpCylindricalMaps(size, cv::Rect(0, 0, size.width, size.height), K, R, 1.0, map_x, map_y);
 
-        TEST_CYCLE()
-        {
-            cv::gpu::buildWarpCylindricalMaps(size, cv::Rect(0, 0, size.width, size.height), K, R, 1.0, d_map_x, d_map_y);
-        }
-
-        GPU_SANITY_CHECK(d_map_x);
-        GPU_SANITY_CHECK(d_map_y);
+        GPU_SANITY_CHECK(map_x);
+        GPU_SANITY_CHECK(map_y);
     }
     else
     {
-        FAIL() << "No such CPU implementation analogy";
+        FAIL_NO_CPU();
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // BuildWarpSphericalMaps
 
-PERF_TEST_P(Sz, ImgProc_BuildWarpSphericalMaps, GPU_TYPICAL_MAT_SIZES)
+PERF_TEST_P(Sz, ImgProc_BuildWarpSphericalMaps,
+            GPU_TYPICAL_MAT_SIZES)
 {
-    cv::Size size = GetParam();
+    const cv::Size size = GetParam();
 
-    cv::Mat K = cv::Mat::eye(3, 3, CV_32FC1);
-    cv::Mat R = cv::Mat::ones(3, 3, CV_32FC1);
+    const cv::Mat K = cv::Mat::eye(3, 3, CV_32FC1);
+    const cv::Mat R = cv::Mat::ones(3, 3, CV_32FC1);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_map_x;
-        cv::gpu::GpuMat d_map_y;
+        cv::gpu::GpuMat map_x;
+        cv::gpu::GpuMat map_y;
 
-        cv::gpu::buildWarpSphericalMaps(size, cv::Rect(0, 0, size.width, size.height), K, R, 1.0, d_map_x, d_map_y);
-
-        TEST_CYCLE()
-        {
-            cv::gpu::buildWarpSphericalMaps(size, cv::Rect(0, 0, size.width, size.height), K, R, 1.0, d_map_x, d_map_y);
-        }
-
-        GPU_SANITY_CHECK(d_map_x);
-        GPU_SANITY_CHECK(d_map_y);
+        TEST_CYCLE() cv::gpu::buildWarpSphericalMaps(size, cv::Rect(0, 0, size.width, size.height), K, R, 1.0, map_x, map_y);
 
+        GPU_SANITY_CHECK(map_x);
+        GPU_SANITY_CHECK(map_y);
     }
     else
     {
-        FAIL() << "No such CPU implementation analogy";
+        FAIL_NO_CPU();
     }
 }
 
@@ -1361,83 +1176,68 @@ PERF_TEST_P(Sz, ImgProc_BuildWarpSphericalMaps, GPU_TYPICAL_MAT_SIZES)
 
 DEF_PARAM_TEST(Sz_Depth_Cn_Inter, cv::Size, MatDepth, MatCn, Interpolation);
 
-PERF_TEST_P(Sz_Depth_Cn_Inter, ImgProc_Rotate, Combine(
-    GPU_TYPICAL_MAT_SIZES,
-    Values(CV_8U, CV_16U, CV_32F),
-    GPU_CHANNELS_1_3_4,
-    Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC))))
+PERF_TEST_P(Sz_Depth_Cn_Inter, ImgProc_Rotate,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F),
+                    GPU_CHANNELS_1_3_4,
+                    Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC))))
 {
-    cv::Size size = GET_PARAM(0);
-    int depth = GET_PARAM(1);
-    int channels = GET_PARAM(2);
-    int interpolation = GET_PARAM(3);
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int channels = GET_PARAM(2);
+    const int interpolation = GET_PARAM(3);
 
-    int type = CV_MAKE_TYPE(depth, channels);
+    const int type = CV_MAKE_TYPE(depth, channels);
 
     cv::Mat src(size, type);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        cv::gpu::rotate(d_src, d_dst, size, 30.0, 0, 0, interpolation);
+        TEST_CYCLE() cv::gpu::rotate(d_src, dst, size, 30.0, 0, 0, interpolation);
 
-        TEST_CYCLE()
-        {
-            cv::gpu::rotate(d_src, d_dst, size, 30.0, 0, 0, interpolation);
-        }
-
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst, 1e-3, ERROR_RELATIVE);
     }
     else
     {
-        FAIL() << "No such CPU implementation analogy";
+        FAIL_NO_CPU();
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // PyrDown
 
-PERF_TEST_P(Sz_Depth_Cn, ImgProc_PyrDown, Combine(
-    GPU_TYPICAL_MAT_SIZES,
-    Values(CV_8U, CV_16U, CV_32F),
-    GPU_CHANNELS_1_3_4))
+PERF_TEST_P(Sz_Depth_Cn, ImgProc_PyrDown,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F),
+                    GPU_CHANNELS_1_3_4))
 {
-    cv::Size size = GET_PARAM(0);
-    int depth = GET_PARAM(1);
-    int channels = GET_PARAM(2);
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int channels = GET_PARAM(2);
 
-    int type = CV_MAKE_TYPE(depth, channels);
+    const int type = CV_MAKE_TYPE(depth, channels);
 
     cv::Mat src(size, type);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        cv::gpu::pyrDown(d_src, d_dst);
+        TEST_CYCLE() cv::gpu::pyrDown(d_src, dst);
 
-        TEST_CYCLE()
-        {
-            cv::gpu::pyrDown(d_src, d_dst);
-        }
-
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
         cv::Mat dst;
 
-        cv::pyrDown(src, dst);
-
-        TEST_CYCLE()
-        {
-            cv::pyrDown(src, dst);
-        }
+        TEST_CYCLE() cv::pyrDown(src, dst);
 
         CPU_SANITY_CHECK(dst);
     }
@@ -1446,44 +1246,34 @@ PERF_TEST_P(Sz_Depth_Cn, ImgProc_PyrDown, Combine(
 //////////////////////////////////////////////////////////////////////
 // PyrUp
 
-PERF_TEST_P(Sz_Depth_Cn, ImgProc_PyrUp, Combine(
-    GPU_TYPICAL_MAT_SIZES,
-    Values(CV_8U, CV_16U, CV_32F),
-    GPU_CHANNELS_1_3_4))
+PERF_TEST_P(Sz_Depth_Cn, ImgProc_PyrUp,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F),
+                    GPU_CHANNELS_1_3_4))
 {
-    cv::Size size = GET_PARAM(0);
-    int depth = GET_PARAM(1);
-    int channels = GET_PARAM(2);
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int channels = GET_PARAM(2);
 
-    int type = CV_MAKE_TYPE(depth, channels);
+    const int type = CV_MAKE_TYPE(depth, channels);
 
     cv::Mat src(size, type);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        cv::gpu::pyrUp(d_src, d_dst);
+        TEST_CYCLE() cv::gpu::pyrUp(d_src, dst);
 
-        TEST_CYCLE()
-        {
-            cv::gpu::pyrUp(d_src, d_dst);
-        }
-
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
         cv::Mat dst;
 
-        cv::pyrUp(src, dst);
-
-        TEST_CYCLE()
-        {
-            cv::pyrUp(src, dst);
-        }
+        TEST_CYCLE() cv::pyrUp(src, dst);
 
         CPU_SANITY_CHECK(dst);
     }
@@ -1494,100 +1284,164 @@ PERF_TEST_P(Sz_Depth_Cn, ImgProc_PyrUp, Combine(
 
 DEF_PARAM_TEST(Sz_Depth_Code, cv::Size, MatDepth, CvtColorInfo);
 
-PERF_TEST_P(Sz_Depth_Code, ImgProc_CvtColor, Combine(
-    GPU_TYPICAL_MAT_SIZES,
-    Values(CV_8U, CV_16U, CV_32F),
-    Values(CvtColorInfo(4, 4, cv::COLOR_RGBA2BGRA),
-           CvtColorInfo(4, 1, cv::COLOR_BGRA2GRAY),
-           CvtColorInfo(1, 4, cv::COLOR_GRAY2BGRA),
-           CvtColorInfo(3, 3, cv::COLOR_BGR2XYZ),
-           CvtColorInfo(3, 3, cv::COLOR_XYZ2BGR),
-           CvtColorInfo(3, 3, cv::COLOR_BGR2YCrCb),
-           CvtColorInfo(3, 3, cv::COLOR_YCrCb2BGR),
-           CvtColorInfo(3, 3, cv::COLOR_BGR2YUV),
-           CvtColorInfo(3, 3, cv::COLOR_YUV2BGR),
-           CvtColorInfo(3, 3, cv::COLOR_BGR2HSV),
-           CvtColorInfo(3, 3, cv::COLOR_HSV2BGR),
-           CvtColorInfo(3, 3, cv::COLOR_BGR2HLS),
-           CvtColorInfo(3, 3, cv::COLOR_HLS2BGR),
-           CvtColorInfo(3, 3, cv::COLOR_BGR2Lab),
-           CvtColorInfo(3, 3, cv::COLOR_LBGR2Lab),
-           CvtColorInfo(3, 3, cv::COLOR_BGR2Luv),
-           CvtColorInfo(3, 3, cv::COLOR_LBGR2Luv),
-           CvtColorInfo(3, 3, cv::COLOR_Lab2BGR),
-           CvtColorInfo(3, 3, cv::COLOR_Lab2LBGR),
-           CvtColorInfo(3, 3, cv::COLOR_Luv2RGB),
-           CvtColorInfo(3, 3, cv::COLOR_Luv2LRGB),
-           CvtColorInfo(1, 3, cv::COLOR_BayerBG2BGR),
-           CvtColorInfo(1, 3, cv::COLOR_BayerGB2BGR),
-           CvtColorInfo(1, 3, cv::COLOR_BayerRG2BGR),
-           CvtColorInfo(1, 3, cv::COLOR_BayerGR2BGR),
-           CvtColorInfo(4, 4, cv::COLOR_RGBA2mRGBA))))
+PERF_TEST_P(Sz_Depth_Code, ImgProc_CvtColor,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_32F),
+                    Values(CvtColorInfo(4, 4, cv::COLOR_RGBA2BGRA),
+                           CvtColorInfo(4, 1, cv::COLOR_BGRA2GRAY),
+                           CvtColorInfo(1, 4, cv::COLOR_GRAY2BGRA),
+                           CvtColorInfo(3, 3, cv::COLOR_BGR2XYZ),
+                           CvtColorInfo(3, 3, cv::COLOR_XYZ2BGR),
+                           CvtColorInfo(3, 3, cv::COLOR_BGR2YCrCb),
+                           CvtColorInfo(3, 3, cv::COLOR_YCrCb2BGR),
+                           CvtColorInfo(3, 3, cv::COLOR_BGR2YUV),
+                           CvtColorInfo(3, 3, cv::COLOR_YUV2BGR),
+                           CvtColorInfo(3, 3, cv::COLOR_BGR2HSV),
+                           CvtColorInfo(3, 3, cv::COLOR_HSV2BGR),
+                           CvtColorInfo(3, 3, cv::COLOR_BGR2HLS),
+                           CvtColorInfo(3, 3, cv::COLOR_HLS2BGR),
+                           CvtColorInfo(3, 3, cv::COLOR_BGR2Lab),
+                           CvtColorInfo(3, 3, cv::COLOR_LBGR2Lab),
+                           CvtColorInfo(3, 3, cv::COLOR_BGR2Luv),
+                           CvtColorInfo(3, 3, cv::COLOR_LBGR2Luv),
+                           CvtColorInfo(3, 3, cv::COLOR_Lab2BGR),
+                           CvtColorInfo(3, 3, cv::COLOR_Lab2LBGR),
+                           CvtColorInfo(3, 3, cv::COLOR_Luv2RGB),
+                           CvtColorInfo(3, 3, cv::COLOR_Luv2LRGB))))
 {
-    cv::Size size = GET_PARAM(0);
-    int depth = GET_PARAM(1);
-    CvtColorInfo info = GET_PARAM(2);
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const CvtColorInfo info = GET_PARAM(2);
 
     cv::Mat src(size, CV_MAKETYPE(depth, info.scn));
-    fillRandom(src);
+    cv::randu(src, 0, depth == CV_8U ? 255.0 : 1.0);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        cv::gpu::cvtColor(d_src, d_dst, info.code, info.dcn);
+        TEST_CYCLE() cv::gpu::cvtColor(d_src, dst, info.code, info.dcn);
 
-        TEST_CYCLE()
-        {
-            cv::gpu::cvtColor(d_src, d_dst, info.code, info.dcn);
-        }
+        GPU_SANITY_CHECK(dst, 1e-4);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::cvtColor(src, dst, info.code, info.dcn);
 
-        GPU_SANITY_CHECK(d_dst);
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+PERF_TEST_P(Sz_Depth_Code, ImgProc_CvtColorBayer,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U),
+                    Values(CvtColorInfo(1, 3, cv::COLOR_BayerBG2BGR),
+                           CvtColorInfo(1, 3, cv::COLOR_BayerGB2BGR),
+                           CvtColorInfo(1, 3, cv::COLOR_BayerRG2BGR),
+                           CvtColorInfo(1, 3, cv::COLOR_BayerGR2BGR),
+
+                           CvtColorInfo(1, 1, cv::COLOR_BayerBG2GRAY),
+                           CvtColorInfo(1, 1, cv::COLOR_BayerGB2GRAY),
+                           CvtColorInfo(1, 1, cv::COLOR_BayerRG2GRAY),
+                           CvtColorInfo(1, 1, cv::COLOR_BayerGR2GRAY))))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const CvtColorInfo info = GET_PARAM(2);
+
+    cv::Mat src(size, CV_MAKETYPE(depth, info.scn));
+    declare.in(src, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::cvtColor(d_src, dst, info.code, info.dcn);
+
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
         cv::Mat dst;
 
-        cv::cvtColor(src, dst, info.code, info.dcn);
+        TEST_CYCLE() cv::cvtColor(src, dst, info.code, info.dcn);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
 
-        TEST_CYCLE()
+CV_ENUM(DemosaicingCode,
+        cv::COLOR_BayerBG2BGR, cv::COLOR_BayerGB2BGR, cv::COLOR_BayerRG2BGR, cv::COLOR_BayerGR2BGR,
+        cv::COLOR_BayerBG2GRAY, cv::COLOR_BayerGB2GRAY, cv::COLOR_BayerRG2GRAY, cv::COLOR_BayerGR2GRAY,
+        cv::gpu::COLOR_BayerBG2BGR_MHT, cv::gpu::COLOR_BayerGB2BGR_MHT, cv::gpu::COLOR_BayerRG2BGR_MHT, cv::gpu::COLOR_BayerGR2BGR_MHT,
+        cv::gpu::COLOR_BayerBG2GRAY_MHT, cv::gpu::COLOR_BayerGB2GRAY_MHT, cv::gpu::COLOR_BayerRG2GRAY_MHT, cv::gpu::COLOR_BayerGR2GRAY_MHT)
+
+DEF_PARAM_TEST(Sz_Code, cv::Size, DemosaicingCode);
+
+PERF_TEST_P(Sz_Code, ImgProc_Demosaicing,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    ValuesIn(DemosaicingCode::all())))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int code = GET_PARAM(1);
+
+    cv::Mat src(size, CV_8UC1);
+    declare.in(src, WARMUP_RNG);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
+
+        TEST_CYCLE() cv::gpu::demosaicing(d_src, dst, code);
+
+        GPU_SANITY_CHECK(dst);
+    }
+    else
+    {
+        if (code >= cv::COLOR_COLORCVT_MAX)
         {
-            cv::cvtColor(src, dst, info.code, info.dcn);
+            FAIL_NO_CPU();
         }
+        else
+        {
+            cv::Mat dst;
 
-        CPU_SANITY_CHECK(dst);
+            TEST_CYCLE() cv::cvtColor(src, dst, code);
+
+            CPU_SANITY_CHECK(dst);
+        }
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // SwapChannels
 
-PERF_TEST_P(Sz, ImgProc_SwapChannels, GPU_TYPICAL_MAT_SIZES)
+PERF_TEST_P(Sz, ImgProc_SwapChannels,
+            GPU_TYPICAL_MAT_SIZES)
 {
-    cv::Size size = GetParam();
+    const cv::Size size = GetParam();
 
     cv::Mat src(size, CV_8UC4);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
     const int dstOrder[] = {2, 1, 0, 3};
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-
-        cv::gpu::swapChannels(d_src, dstOrder);
+        cv::gpu::GpuMat dst(src);
 
-        TEST_CYCLE()
-        {
-            cv::gpu::swapChannels(d_src, dstOrder);
-        }
+        TEST_CYCLE() cv::gpu::swapChannels(dst, dstOrder);
 
-        GPU_SANITY_CHECK(d_src);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
-        FAIL() << "No such CPU implementation analogy";
+        FAIL_NO_CPU();
     }
 }
 
@@ -1599,113 +1453,116 @@ CV_ENUM(AlphaOp, cv::gpu::ALPHA_OVER, cv::gpu::ALPHA_IN, cv::gpu::ALPHA_OUT, cv:
 
 DEF_PARAM_TEST(Sz_Type_Op, cv::Size, MatType, AlphaOp);
 
-PERF_TEST_P(Sz_Type_Op, ImgProc_AlphaComp, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC4, CV_16UC4, CV_32SC4, CV_32FC4), ALL_ALPHA_OPS))
+PERF_TEST_P(Sz_Type_Op, ImgProc_AlphaComp,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8UC4, CV_16UC4, CV_32SC4, CV_32FC4),
+                    ALL_ALPHA_OPS))
 {
-    cv::Size size = GET_PARAM(0);
-    int type = GET_PARAM(1);
-    int alpha_op = GET_PARAM(2);
+    const cv::Size size = GET_PARAM(0);
+    const int type = GET_PARAM(1);
+    const int alpha_op = GET_PARAM(2);
 
     cv::Mat img1(size, type);
-    fillRandom(img1);
-
     cv::Mat img2(size, type);
-    fillRandom(img2);
+    declare.in(img1, img2, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_img1(img1);
-        cv::gpu::GpuMat d_img2(img2);
-        cv::gpu::GpuMat d_dst;
-
-        cv::gpu::alphaComp(d_img1, d_img2, d_dst, alpha_op);
+        const cv::gpu::GpuMat d_img1(img1);
+        const cv::gpu::GpuMat d_img2(img2);
+        cv::gpu::GpuMat dst;
 
-        TEST_CYCLE()
-        {
-            cv::gpu::alphaComp(d_img1, d_img2, d_dst, alpha_op);
-        }
+        TEST_CYCLE() cv::gpu::alphaComp(d_img1, d_img2, dst, alpha_op);
 
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst, 1e-3, ERROR_RELATIVE);
     }
     else
     {
-        FAIL() << "No such CPU implementation analogy";
+        FAIL_NO_CPU();
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // ImagePyramidBuild
 
-PERF_TEST_P(Sz_Depth_Cn, ImgProc_ImagePyramidBuild, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F), GPU_CHANNELS_1_3_4))
+PERF_TEST_P(Sz_Depth_Cn, ImgProc_ImagePyramidBuild,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F),
+                    GPU_CHANNELS_1_3_4))
 {
-    cv::Size size = GET_PARAM(0);
-    int depth = GET_PARAM(1);
-    int channels = GET_PARAM(2);
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int channels = GET_PARAM(2);
 
-    int type = CV_MAKE_TYPE(depth, channels);
+    const int type = CV_MAKE_TYPE(depth, channels);
 
     cv::Mat src(size, type);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
+
+    const int nLayers = 5;
+    const cv::Size dstSize(size.width / 2 + 10, size.height / 2 + 10);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
+        const cv::gpu::GpuMat d_src(src);
 
         cv::gpu::ImagePyramid d_pyr;
 
-        d_pyr.build(d_src, 5);
+        TEST_CYCLE() d_pyr.build(d_src, nLayers);
 
-        TEST_CYCLE()
-        {
-            d_pyr.build(d_src, 5);
-        }
+        cv::gpu::GpuMat dst;
+        d_pyr.getLayer(dst, dstSize);
 
-        GPU_SANITY_CHECK(d_src);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
-        FAIL() << "No such CPU implementation analogy";
+        FAIL_NO_CPU();
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // ImagePyramidGetLayer
 
-PERF_TEST_P(Sz_Depth_Cn, ImgProc_ImagePyramidGetLayer, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F), GPU_CHANNELS_1_3_4))
+PERF_TEST_P(Sz_Depth_Cn, ImgProc_ImagePyramidGetLayer,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F),
+                    GPU_CHANNELS_1_3_4))
 {
-    cv::Size size = GET_PARAM(0);
-    int depth = GET_PARAM(1);
-    int channels = GET_PARAM(2);
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int channels = GET_PARAM(2);
 
-    int type = CV_MAKE_TYPE(depth, channels);
+    const int type = CV_MAKE_TYPE(depth, channels);
 
     cv::Mat src(size, type);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
 
-    cv::Size dstSize(size.width / 2 + 10, size.height / 2 + 10);
+    const int nLayers = 3;
+    const cv::Size dstSize(size.width / 2 + 10, size.height / 2 + 10);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        cv::gpu::ImagePyramid d_pyr(d_src, 3);
+        cv::gpu::ImagePyramid d_pyr(d_src, nLayers);
 
-        d_pyr.getLayer(d_dst, dstSize);
+        TEST_CYCLE() d_pyr.getLayer(dst, dstSize);
 
-        TEST_CYCLE()
-        {
-            d_pyr.getLayer(d_dst, dstSize);
-        }
-
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
-        FAIL() << "No such CPU implementation analogy";
+        FAIL_NO_CPU();
     }
 }
 
-namespace {
+//////////////////////////////////////////////////////////////////////
+// HoughLines
+
+namespace
+{
     struct Vec4iComparator
     {
         bool operator()(const cv::Vec4i& a, const cv::Vec4i b) const
@@ -1735,10 +1592,8 @@ namespace {
     };
 }
 
-//////////////////////////////////////////////////////////////////////
-// HoughLines
-
-PERF_TEST_P(Sz, ImgProc_HoughLines, GPU_TYPICAL_MAT_SIZES)
+PERF_TEST_P(Sz, ImgProc_HoughLines,
+            GPU_TYPICAL_MAT_SIZES)
 {
     declare.time(30.0);
 
@@ -1748,49 +1603,35 @@ PERF_TEST_P(Sz, ImgProc_HoughLines, GPU_TYPICAL_MAT_SIZES)
     const float theta = static_cast<float>(CV_PI / 180.0);
     const int threshold = 300;
 
-    cv::RNG rng(123456789);
-
     cv::Mat src(size, CV_8UC1, cv::Scalar::all(0));
-
-    const int numLines = rng.uniform(100, 300);
-    for (int i = 0; i < numLines; ++i)
-    {
-        cv::Point p1(rng.uniform(0, src.cols), rng.uniform(0, src.rows));
-        cv::Point p2(rng.uniform(0, src.cols), rng.uniform(0, src.rows));
-        cv::line(src, p1, p2, cv::Scalar::all(255), 2);
-    }
+    cv::line(src, cv::Point(0, 100), cv::Point(src.cols, 100), cv::Scalar::all(255), 1);
+    cv::line(src, cv::Point(0, 200), cv::Point(src.cols, 200), cv::Scalar::all(255), 1);
+    cv::line(src, cv::Point(0, 400), cv::Point(src.cols, 400), cv::Scalar::all(255), 1);
+    cv::line(src, cv::Point(100, 0), cv::Point(100, src.rows), cv::Scalar::all(255), 1);
+    cv::line(src, cv::Point(200, 0), cv::Point(200, src.rows), cv::Scalar::all(255), 1);
+    cv::line(src, cv::Point(400, 0), cv::Point(400, src.rows), cv::Scalar::all(255), 1);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
+        const cv::gpu::GpuMat d_src(src);
         cv::gpu::GpuMat d_lines;
         cv::gpu::HoughLinesBuf d_buf;
 
-        cv::gpu::HoughLines(d_src, d_lines, d_buf, rho, theta, threshold);
+        TEST_CYCLE() cv::gpu::HoughLines(d_src, d_lines, d_buf, rho, theta, threshold);
 
-        TEST_CYCLE()
-        {
-            cv::gpu::HoughLines(d_src, d_lines, d_buf, rho, theta, threshold);
-        }
-
-        cv::Mat h_lines(d_lines);
-        cv::Vec2f* begin = (cv::Vec2f*)(h_lines.ptr<char>(0));
-        cv::Vec2f* end = (cv::Vec2f*)(h_lines.ptr<char>(0) + (h_lines.cols) * 2 * sizeof(float));
+        cv::Mat gpu_lines(d_lines.row(0));
+        cv::Vec2f* begin = gpu_lines.ptr<cv::Vec2f>(0);
+        cv::Vec2f* end = begin + gpu_lines.cols;
         std::sort(begin, end, Vec2fComparator());
-        SANITY_CHECK(h_lines);
+        SANITY_CHECK(gpu_lines);
     }
     else
     {
-        std::vector<cv::Vec2f> lines;
-        cv::HoughLines(src, lines, rho, theta, threshold);
+        std::vector<cv::Vec2f> cpu_lines;
 
-        TEST_CYCLE()
-        {
-            cv::HoughLines(src, lines, rho, theta, threshold);
-        }
+        TEST_CYCLE() cv::HoughLines(src, cpu_lines, rho, theta, threshold);
 
-        std::sort(lines.begin(), lines.end(), Vec2fComparator());
-        SANITY_CHECK(lines);
+        SANITY_CHECK(cpu_lines);
     }
 }
 
@@ -1799,11 +1640,12 @@ PERF_TEST_P(Sz, ImgProc_HoughLines, GPU_TYPICAL_MAT_SIZES)
 
 DEF_PARAM_TEST_1(Image, std::string);
 
-PERF_TEST_P(Image, ImgProc_HoughLinesP, testing::Values("cv/shared/pic5.png", "stitching/a1.png"))
+PERF_TEST_P(Image, ImgProc_HoughLinesP,
+            testing::Values("cv/shared/pic5.png", "stitching/a1.png"))
 {
     declare.time(30.0);
 
-    std::string fileName = getDataPath(GetParam());
+    const std::string fileName = getDataPath(GetParam());
 
     const float rho = 1.0f;
     const float theta = static_cast<float>(CV_PI / 180.0);
@@ -1811,42 +1653,33 @@ PERF_TEST_P(Image, ImgProc_HoughLinesP, testing::Values("cv/shared/pic5.png", "s
     const int minLineLenght = 50;
     const int maxLineGap = 5;
 
-    cv::Mat image = cv::imread(fileName, cv::IMREAD_GRAYSCALE);
+    const cv::Mat image = cv::imread(fileName, cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(image.empty());
 
     cv::Mat mask;
     cv::Canny(image, mask, 50, 100);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_mask(mask);
+        const cv::gpu::GpuMat d_mask(mask);
         cv::gpu::GpuMat d_lines;
         cv::gpu::HoughLinesBuf d_buf;
 
-        cv::gpu::HoughLinesP(d_mask, d_lines, d_buf, rho, theta, minLineLenght, maxLineGap);
-
-        TEST_CYCLE()
-        {
-            cv::gpu::HoughLinesP(d_mask, d_lines, d_buf, rho, theta, minLineLenght, maxLineGap);
-        }
+        TEST_CYCLE() cv::gpu::HoughLinesP(d_mask, d_lines, d_buf, rho, theta, minLineLenght, maxLineGap);
 
-        cv::Mat h_lines(d_lines);
-        cv::Vec4i* begin = h_lines.ptr<cv::Vec4i>();
-        cv::Vec4i* end = h_lines.ptr<cv::Vec4i>() + h_lines.cols;
+        cv::Mat gpu_lines(d_lines);
+        cv::Vec4i* begin = gpu_lines.ptr<cv::Vec4i>();
+        cv::Vec4i* end = begin + gpu_lines.cols;
         std::sort(begin, end, Vec4iComparator());
-        SANITY_CHECK(h_lines);
+        SANITY_CHECK(gpu_lines);
     }
     else
     {
-        std::vector<cv::Vec4i> lines;
-        cv::HoughLinesP(mask, lines, rho, theta, threshold, minLineLenght, maxLineGap);
+        std::vector<cv::Vec4i> cpu_lines;
 
-        TEST_CYCLE()
-        {
-            cv::HoughLinesP(mask, lines, rho, theta, threshold, minLineLenght, maxLineGap);
-        }
+        TEST_CYCLE() cv::HoughLinesP(mask, cpu_lines, rho, theta, threshold, minLineLenght, maxLineGap);
 
-        std::sort(lines.begin(), lines.end(), Vec4iComparator());
-        SANITY_CHECK(lines);
+        SANITY_CHECK(cpu_lines);
     }
 }
 
@@ -1855,7 +1688,10 @@ PERF_TEST_P(Image, ImgProc_HoughLinesP, testing::Values("cv/shared/pic5.png", "s
 
 DEF_PARAM_TEST(Sz_Dp_MinDist, cv::Size, float, float);
 
-PERF_TEST_P(Sz_Dp_MinDist, ImgProc_HoughCircles, Combine(GPU_TYPICAL_MAT_SIZES, Values(1.0f, 2.0f, 4.0f), Values(1.0f, 10.0f)))
+PERF_TEST_P(Sz_Dp_MinDist, ImgProc_HoughCircles,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(1.0f, 2.0f, 4.0f),
+                    Values(1.0f)))
 {
     declare.time(30.0);
 
@@ -1868,51 +1704,32 @@ PERF_TEST_P(Sz_Dp_MinDist, ImgProc_HoughCircles, Combine(GPU_TYPICAL_MAT_SIZES,
     const int cannyThreshold = 100;
     const int votesThreshold = 15;
 
-    cv::RNG rng(123456789);
-
     cv::Mat src(size, CV_8UC1, cv::Scalar::all(0));
-
-    const int numCircles = rng.uniform(50, 100);
-    for (int i = 0; i < numCircles; ++i)
-    {
-        cv::Point center(rng.uniform(0, src.cols), rng.uniform(0, src.rows));
-        const int radius = rng.uniform(minRadius, maxRadius + 1);
-
-        cv::circle(src, center, radius, cv::Scalar::all(255), -1);
-    }
+    cv::circle(src, cv::Point(100, 100), 20, cv::Scalar::all(255), -1);
+    cv::circle(src, cv::Point(200, 200), 25, cv::Scalar::all(255), -1);
+    cv::circle(src, cv::Point(200, 100), 25, cv::Scalar::all(255), -1);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
+        const cv::gpu::GpuMat d_src(src);
         cv::gpu::GpuMat d_circles;
         cv::gpu::HoughCirclesBuf d_buf;
 
-        cv::gpu::HoughCircles(d_src, d_circles, d_buf, CV_HOUGH_GRADIENT, dp, minDist, cannyThreshold, votesThreshold, minRadius, maxRadius);
-
-        TEST_CYCLE()
-        {
-            cv::gpu::HoughCircles(d_src, d_circles, d_buf, CV_HOUGH_GRADIENT, dp, minDist, cannyThreshold, votesThreshold, minRadius, maxRadius);
-        }
+        TEST_CYCLE() cv::gpu::HoughCircles(d_src, d_circles, d_buf, CV_HOUGH_GRADIENT, dp, minDist, cannyThreshold, votesThreshold, minRadius, maxRadius);
 
-        cv::Mat h_circles(d_circles);
-        cv::Vec3f* begin = (cv::Vec3f*)(h_circles.ptr<char>(0));
-        cv::Vec3f* end = (cv::Vec3f*)(h_circles.ptr<char>(0) + (h_circles.cols) * 3 * sizeof(float));
+        cv::Mat gpu_circles(d_circles);
+        cv::Vec3f* begin = gpu_circles.ptr<cv::Vec3f>(0);
+        cv::Vec3f* end = begin + gpu_circles.cols;
         std::sort(begin, end, Vec3fComparator());
-        SANITY_CHECK(h_circles);
+        SANITY_CHECK(gpu_circles);
     }
     else
     {
-        std::vector<cv::Vec3f> circles;
+        std::vector<cv::Vec3f> cpu_circles;
 
-        cv::HoughCircles(src, circles, CV_HOUGH_GRADIENT, dp, minDist, cannyThreshold, votesThreshold, minRadius, maxRadius);
+        TEST_CYCLE() cv::HoughCircles(src, cpu_circles, CV_HOUGH_GRADIENT, dp, minDist, cannyThreshold, votesThreshold, minRadius, maxRadius);
 
-        TEST_CYCLE()
-        {
-            cv::HoughCircles(src, circles, CV_HOUGH_GRADIENT, dp, minDist, cannyThreshold, votesThreshold, minRadius, maxRadius);
-        }
-
-        std::sort(circles.begin(), circles.end(), Vec3fComparator());
-        SANITY_CHECK(circles);
+        SANITY_CHECK(cpu_circles);
     }
 }
 
@@ -1923,9 +1740,9 @@ CV_FLAGS(GHMethod, cv::GHT_POSITION, cv::GHT_SCALE, cv::GHT_ROTATION);
 
 DEF_PARAM_TEST(Method_Sz, GHMethod, cv::Size);
 
-PERF_TEST_P(Method_Sz, ImgProc_GeneralizedHough, Combine(
-            Values(GHMethod(cv::GHT_POSITION), GHMethod(cv::GHT_POSITION | cv::GHT_SCALE), GHMethod(cv::GHT_POSITION | cv::GHT_ROTATION), GHMethod(cv::GHT_POSITION | cv::GHT_SCALE | cv::GHT_ROTATION)),
-            GPU_TYPICAL_MAT_SIZES))
+PERF_TEST_P(Method_Sz, ImgProc_GeneralizedHough,
+            Combine(Values(GHMethod(cv::GHT_POSITION), GHMethod(cv::GHT_POSITION | cv::GHT_SCALE), GHMethod(cv::GHT_POSITION | cv::GHT_ROTATION), GHMethod(cv::GHT_POSITION | cv::GHT_SCALE | cv::GHT_ROTATION)),
+                    GPU_TYPICAL_MAT_SIZES))
 {
     declare.time(10);
 
@@ -1936,6 +1753,7 @@ PERF_TEST_P(Method_Sz, ImgProc_GeneralizedHough, Combine(
     ASSERT_FALSE(templ.empty());
 
     cv::Mat image(imageSize, CV_8UC1, cv::Scalar::all(0));
+    templ.copyTo(image(cv::Rect(50, 50, templ.cols, templ.rows)));
 
     cv::RNG rng(123456789);
     const int objCount = rng.uniform(5, 15);
@@ -1967,10 +1785,10 @@ PERF_TEST_P(Method_Sz, ImgProc_GeneralizedHough, Combine(
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_edges(edges);
-        cv::gpu::GpuMat d_dx(dx);
-        cv::gpu::GpuMat d_dy(dy);
-        cv::gpu::GpuMat d_position;
+        const cv::gpu::GpuMat d_edges(edges);
+        const cv::gpu::GpuMat d_dx(dx);
+        const cv::gpu::GpuMat d_dy(dy);
+        cv::gpu::GpuMat posAndVotes;
 
         cv::Ptr<cv::gpu::GeneralizedHough_GPU> d_hough = cv::gpu::GeneralizedHough_GPU::create(method);
         if (method & cv::GHT_ROTATION)
@@ -1981,14 +1799,10 @@ PERF_TEST_P(Method_Sz, ImgProc_GeneralizedHough, Combine(
 
         d_hough->setTemplate(cv::gpu::GpuMat(templ));
 
-        d_hough->detect(d_edges, d_dx, d_dy, d_position);
-
-        TEST_CYCLE()
-        {
-            d_hough->detect(d_edges, d_dx, d_dy, d_position);
-        }
+        TEST_CYCLE() d_hough->detect(d_edges, d_dx, d_dy, posAndVotes);
 
-        GPU_SANITY_CHECK(d_position);
+        const cv::gpu::GpuMat positions(1, posAndVotes.cols, CV_32FC4, posAndVotes.data);
+        GPU_SANITY_CHECK(positions);
     }
     else
     {
@@ -2003,16 +1817,8 @@ PERF_TEST_P(Method_Sz, ImgProc_GeneralizedHough, Combine(
 
         hough->setTemplate(templ);
 
-        hough->detect(edges, dx, dy, positions);
+        TEST_CYCLE() hough->detect(edges, dx, dy, positions);
 
-        TEST_CYCLE()
-        {
-            hough->detect(edges, dx, dy, positions);
-        }
-
-        CPU_SANITY_CHECK(dx);
-        CPU_SANITY_CHECK(dy);
+        CPU_SANITY_CHECK(positions);
     }
 }
-
-} // namespace
index 3b10ba3..f3ad12c 100644 (file)
@@ -3,8 +3,6 @@
 using namespace std;
 using namespace testing;
 
-namespace {
-
 DEF_PARAM_TEST_1(Image, string);
 
 struct GreedyLabeling
@@ -100,28 +98,45 @@ struct GreedyLabeling
     dot* stack;
 };
 
-PERF_TEST_P(Image, Labeling_ConnectedComponents, Values<string>("gpu/labeling/aloe-disp.png"))
+PERF_TEST_P(Image, DISABLED_Labeling_ConnectivityMask,
+            Values<string>("gpu/labeling/aloe-disp.png"))
 {
     declare.time(1.0);
 
-    cv::Mat image = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
+    const cv::Mat image = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(image.empty());
 
     if (PERF_RUN_GPU())
     {
+        cv::gpu::GpuMat d_image(image);
         cv::gpu::GpuMat mask;
-        mask.create(image.rows, image.cols, CV_8UC1);
 
-        cv::gpu::GpuMat components;
-        components.create(image.rows, image.cols, CV_32SC1);
+        TEST_CYCLE() cv::gpu::connectivityMask(d_image, mask, cv::Scalar::all(0), cv::Scalar::all(2));
 
-        cv::gpu::connectivityMask(cv::gpu::GpuMat(image), mask, cv::Scalar::all(0), cv::Scalar::all(2));
+        GPU_SANITY_CHECK(mask);
+    }
+    else
+    {
+        FAIL_NO_CPU();
+    }
+}
 
-        ASSERT_NO_THROW(cv::gpu::labelComponents(mask, components));
+PERF_TEST_P(Image, DISABLED_Labeling_ConnectedComponents,
+            Values<string>("gpu/labeling/aloe-disp.png"))
+{
+    declare.time(1.0);
 
-        TEST_CYCLE()
-        {
-            cv::gpu::labelComponents(mask, components);
-        }
+    const cv::Mat image = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(image.empty());
+
+    if (PERF_RUN_GPU())
+    {
+        cv::gpu::GpuMat d_mask;
+        cv::gpu::connectivityMask(cv::gpu::GpuMat(image), d_mask, cv::Scalar::all(0), cv::Scalar::all(2));
+
+        cv::gpu::GpuMat components;
+
+        TEST_CYCLE() cv::gpu::labelComponents(d_mask, components);
 
         GPU_SANITY_CHECK(components);
     }
@@ -129,17 +144,9 @@ PERF_TEST_P(Image, Labeling_ConnectedComponents, Values<string>("gpu/labeling/al
     {
         GreedyLabeling host(image);
 
-        host(host._labels);
+        TEST_CYCLE() host(host._labels);
 
-        declare.time(1.0);
-
-        TEST_CYCLE()
-        {
-            host(host._labels);
-        }
-
-        CPU_SANITY_CHECK(host._labels);
+        cv::Mat components = host._labels;
+        CPU_SANITY_CHECK(components);
     }
 }
-
-} // namespace
index f8eb23d..312b744 100644 (file)
@@ -1,7 +1,5 @@
 #include "perf_precomp.hpp"
 
-namespace{
-
 static void printOsInfo()
 {
 #if defined _WIN32
@@ -69,6 +67,4 @@ static void printCudaInfo()
 #endif
 }
 
-}
-
-CV_PERF_TEST_MAIN(gpu, printCudaInfo())
\ No newline at end of file
+CV_PERF_TEST_MAIN(gpu, printCudaInfo())
index b6d4a11..1696e3a 100644 (file)
 using namespace std;
 using namespace testing;
 
-namespace {
-
 //////////////////////////////////////////////////////////////////////
 // SetTo
 
-PERF_TEST_P(Sz_Depth_Cn, MatOp_SetTo, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F, CV_64F), GPU_CHANNELS_1_3_4))
+PERF_TEST_P(Sz_Depth_Cn, MatOp_SetTo,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F, CV_64F),
+                    GPU_CHANNELS_1_3_4))
 {
-    cv::Size size = GET_PARAM(0);
-    int depth = GET_PARAM(1);
-    int channels = GET_PARAM(2);
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int channels = GET_PARAM(2);
 
-    int type = CV_MAKE_TYPE(depth, channels);
+    const int type = CV_MAKE_TYPE(depth, channels);
 
-    cv::Scalar val(1, 2, 3, 4);
+    const cv::Scalar val(1, 2, 3, 4);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(size, type);
-
-        d_src.setTo(val);
+        cv::gpu::GpuMat dst(size, type);
 
-        TEST_CYCLE()
-        {
-            d_src.setTo(val);
-        }
+        TEST_CYCLE() dst.setTo(val);
 
-        GPU_SANITY_CHECK(d_src);
+        GPU_SANITY_CHECK(dst);
     }
     else
     {
-        cv::Mat src(size, type);
+        cv::Mat dst(size, type);
 
-        src.setTo(val);
+        TEST_CYCLE() dst.setTo(val);
 
-        TEST_CYCLE()
-        {
-            src.setTo(val);
-        }
-
-        CPU_SANITY_CHECK(src);
+        CPU_SANITY_CHECK(dst);
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // SetToMasked
 
-PERF_TEST_P(Sz_Depth_Cn, MatOp_SetToMasked, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F, CV_64F), GPU_CHANNELS_1_3_4))
+PERF_TEST_P(Sz_Depth_Cn, MatOp_SetToMasked,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F, CV_64F),
+                    GPU_CHANNELS_1_3_4))
 {
-    cv::Size size = GET_PARAM(0);
-    int depth = GET_PARAM(1);
-    int channels = GET_PARAM(2);
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int channels = GET_PARAM(2);
 
-    int type = CV_MAKE_TYPE(depth, channels);
+    const int type = CV_MAKE_TYPE(depth, channels);
 
     cv::Mat src(size, type);
-    fillRandom(src);
-
     cv::Mat mask(size, CV_8UC1);
-    fillRandom(mask, 0, 2);
+    declare.in(src, mask, WARMUP_RNG);
 
-    cv::Scalar val(1, 2, 3, 4);
+    const cv::Scalar val(1, 2, 3, 4);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_mask(mask);
-
-        d_src.setTo(val, d_mask);
+        cv::gpu::GpuMat dst(src);
+        const cv::gpu::GpuMat d_mask(mask);
 
-        TEST_CYCLE()
-        {
-            d_src.setTo(val, d_mask);
-        }
+        TEST_CYCLE() dst.setTo(val, d_mask);
 
-        GPU_SANITY_CHECK(d_src);
+        GPU_SANITY_CHECK(dst, 1e-10);
     }
     else
     {
-        src.setTo(val, mask);
+        cv::Mat dst = src;
 
-        TEST_CYCLE()
-        {
-            src.setTo(val, mask);
-        }
+        TEST_CYCLE() dst.setTo(val, mask);
 
-        CPU_SANITY_CHECK(src);
+        CPU_SANITY_CHECK(dst);
     }
 }
 
 //////////////////////////////////////////////////////////////////////
 // CopyToMasked
 
-PERF_TEST_P(Sz_Depth_Cn, MatOp_CopyToMasked, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F, CV_64F), GPU_CHANNELS_1_3_4))
+PERF_TEST_P(Sz_Depth_Cn, MatOp_CopyToMasked,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F, CV_64F),
+                    GPU_CHANNELS_1_3_4))
 {
-    cv::Size size = GET_PARAM(0);
-    int depth = GET_PARAM(1);
-    int channels = GET_PARAM(2);
+    const cv::Size size = GET_PARAM(0);
+    const int depth = GET_PARAM(1);
+    const int channels = GET_PARAM(2);
 
-    int type = CV_MAKE_TYPE(depth, channels);
+    const int type = CV_MAKE_TYPE(depth, channels);
 
     cv::Mat src(size, type);
-    fillRandom(src);
-
     cv::Mat mask(size, CV_8UC1);
-    fillRandom(mask, 0, 2);
+    declare.in(src, mask, WARMUP_RNG);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_mask(mask);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        const cv::gpu::GpuMat d_mask(mask);
+        cv::gpu::GpuMat dst(d_src.size(), d_src.type(), cv::Scalar::all(0));
 
-        d_src.copyTo(d_dst, d_mask);
+        TEST_CYCLE() d_src.copyTo(dst, d_mask);
 
-        TEST_CYCLE()
-        {
-            d_src.copyTo(d_dst, d_mask);
-        }
-
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst, 1e-10);
     }
     else
     {
-        cv::Mat dst;
+        cv::Mat dst(src.size(), src.type(), cv::Scalar::all(0));
 
-        src.copyTo(dst, mask);
-
-        TEST_CYCLE()
-        {
-            src.copyTo(dst, mask);
-        }
+        TEST_CYCLE() src.copyTo(dst, mask);
 
         CPU_SANITY_CHECK(dst);
     }
@@ -144,42 +119,36 @@ PERF_TEST_P(Sz_Depth_Cn, MatOp_CopyToMasked, Combine(GPU_TYPICAL_MAT_SIZES, Valu
 
 DEF_PARAM_TEST(Sz_2Depth, cv::Size, MatDepth, MatDepth);
 
-PERF_TEST_P(Sz_2Depth, MatOp_ConvertTo, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F, CV_64F), Values(CV_8U, CV_16U, CV_32F, CV_64F)))
+PERF_TEST_P(Sz_2Depth, MatOp_ConvertTo,
+            Combine(GPU_TYPICAL_MAT_SIZES,
+                    Values(CV_8U, CV_16U, CV_32F, CV_64F),
+                    Values(CV_8U, CV_16U, CV_32F, CV_64F)))
 {
-    cv::Size size = GET_PARAM(0);
-    int depth1 = GET_PARAM(1);
-    int depth2 = GET_PARAM(2);
+    const cv::Size size = GET_PARAM(0);
+    const int depth1 = GET_PARAM(1);
+    const int depth2 = GET_PARAM(2);
 
     cv::Mat src(size, depth1);
-    fillRandom(src);
+    declare.in(src, WARMUP_RNG);
+
+    const double a = 0.5;
+    const double b = 1.0;
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_dst;
+        const cv::gpu::GpuMat d_src(src);
+        cv::gpu::GpuMat dst;
 
-        d_src.convertTo(d_dst, depth2, 0.5, 1.0);
+        TEST_CYCLE() d_src.convertTo(dst, depth2, a, b);
 
-        TEST_CYCLE()
-        {
-            d_src.convertTo(d_dst, depth2, 0.5, 1.0);
-        }
-
-        GPU_SANITY_CHECK(d_dst);
+        GPU_SANITY_CHECK(dst, 1e-10);
     }
     else
     {
         cv::Mat dst;
 
-        src.convertTo(dst, depth2, 0.5, 1.0);
-
-        TEST_CYCLE()
-        {
-            src.convertTo(dst, depth2, 0.5, 1.0);
-        }
+        TEST_CYCLE() src.convertTo(dst, depth2, a, b);
 
         CPU_SANITY_CHECK(dst);
     }
 }
-
-} // namespace
index 6d040ac..969ac10 100644 (file)
@@ -3,90 +3,47 @@
 using namespace std;
 using namespace testing;
 
-namespace {
-
 ///////////////////////////////////////////////////////////////
 // HOG
 
 DEF_PARAM_TEST_1(Image, string);
 
-PERF_TEST_P(Image, ObjDetect_HOG, Values<string>("gpu/hog/road.png"))
+PERF_TEST_P(Image, ObjDetect_HOG,
+            Values<string>("gpu/hog/road.png",
+                           "gpu/caltech/image_00000009_0.png",
+                           "gpu/caltech/image_00000032_0.png",
+                           "gpu/caltech/image_00000165_0.png",
+                           "gpu/caltech/image_00000261_0.png",
+                           "gpu/caltech/image_00000469_0.png",
+                           "gpu/caltech/image_00000527_0.png",
+                           "gpu/caltech/image_00000574_0.png"))
 {
-    cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
+    const cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(img.empty());
 
-    std::vector<cv::Rect> found_locations;
-
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_img(img);
+        const cv::gpu::GpuMat d_img(img);
+        std::vector<cv::Rect> gpu_found_locations;
 
         cv::gpu::HOGDescriptor d_hog;
         d_hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
 
-        d_hog.detectMultiScale(d_img, found_locations);
+        TEST_CYCLE() d_hog.detectMultiScale(d_img, gpu_found_locations);
 
-        TEST_CYCLE()
-        {
-            d_hog.detectMultiScale(d_img, found_locations);
-        }
+        SANITY_CHECK(gpu_found_locations);
     }
     else
     {
-        cv::HOGDescriptor hog;
-        hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
-
-        hog.detectMultiScale(img, found_locations);
-
-        TEST_CYCLE()
-        {
-            hog.detectMultiScale(img, found_locations);
-        }
-    }
-
-    SANITY_CHECK(found_locations);
-}
-
-//===========test for CalTech data =============//
-DEF_PARAM_TEST_1(HOG, string);
-
-PERF_TEST_P(HOG, CalTech, Values<string>("gpu/caltech/image_00000009_0.png", "gpu/caltech/image_00000032_0.png",
-    "gpu/caltech/image_00000165_0.png", "gpu/caltech/image_00000261_0.png", "gpu/caltech/image_00000469_0.png",
-    "gpu/caltech/image_00000527_0.png", "gpu/caltech/image_00000574_0.png"))
-{
-    cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
-    ASSERT_FALSE(img.empty());
-
-    std::vector<cv::Rect> found_locations;
-
-    if (PERF_RUN_GPU())
-    {
-        cv::gpu::GpuMat d_img(img);
-
-        cv::gpu::HOGDescriptor d_hog;
-        d_hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
-
-        d_hog.detectMultiScale(d_img, found_locations);
+        std::vector<cv::Rect> cpu_found_locations;
 
-        TEST_CYCLE()
-        {
-            d_hog.detectMultiScale(d_img, found_locations);
-        }
-    }
-    else
-    {
         cv::HOGDescriptor hog;
         hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
 
-        hog.detectMultiScale(img, found_locations);
+        TEST_CYCLE() hog.detectMultiScale(img, cpu_found_locations);
 
-        TEST_CYCLE()
-        {
-            hog.detectMultiScale(img, found_locations);
-        }
+        SANITY_CHECK(cpu_found_locations);
     }
-
-    SANITY_CHECK(found_locations);
 }
 
 ///////////////////////////////////////////////////////////////
@@ -96,9 +53,9 @@ typedef pair<string, string> pair_string;
 DEF_PARAM_TEST_1(ImageAndCascade, pair_string);
 
 PERF_TEST_P(ImageAndCascade, ObjDetect_HaarClassifier,
-    Values<pair_string>(make_pair("gpu/haarcascade/group_1_640x480_VGA.pgm", "gpu/perf/haarcascade_frontalface_alt.xml")))
+            Values<pair_string>(make_pair("gpu/haarcascade/group_1_640x480_VGA.pgm", "gpu/perf/haarcascade_frontalface_alt.xml")))
 {
-    cv::Mat img = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
+    const cv::Mat img = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(img.empty());
 
     if (PERF_RUN_GPU())
@@ -106,33 +63,28 @@ PERF_TEST_P(ImageAndCascade, ObjDetect_HaarClassifier,
         cv::gpu::CascadeClassifier_GPU d_cascade;
         ASSERT_TRUE(d_cascade.load(perf::TestBase::getDataPath(GetParam().second)));
 
-        cv::gpu::GpuMat d_img(img);
-        cv::gpu::GpuMat d_objects_buffer;
+        const cv::gpu::GpuMat d_img(img);
+        cv::gpu::GpuMat objects_buffer;
+        int detections_num = 0;
 
-        d_cascade.detectMultiScale(d_img, d_objects_buffer);
+        TEST_CYCLE() detections_num = d_cascade.detectMultiScale(d_img, objects_buffer);
 
-        TEST_CYCLE()
-        {
-            d_cascade.detectMultiScale(d_img, d_objects_buffer);
-        }
-
-        GPU_SANITY_CHECK(d_objects_buffer);
+        std::vector<cv::Rect> gpu_rects(detections_num);
+        cv::Mat gpu_rects_mat(1, detections_num, cv::DataType<cv::Rect>::type, &gpu_rects[0]);
+        objects_buffer.colRange(0, detections_num).download(gpu_rects_mat);
+        cv::groupRectangles(gpu_rects, 3, 0.2);
+        SANITY_CHECK(gpu_rects);
     }
     else
     {
         cv::CascadeClassifier cascade;
         ASSERT_TRUE(cascade.load(perf::TestBase::getDataPath("gpu/perf/haarcascade_frontalface_alt.xml")));
 
-        std::vector<cv::Rect> rects;
-
-        cascade.detectMultiScale(img, rects);
+        std::vector<cv::Rect> cpu_rects;
 
-        TEST_CYCLE()
-        {
-            cascade.detectMultiScale(img, rects);
-        }
+        TEST_CYCLE() cascade.detectMultiScale(img, cpu_rects);
 
-        CPU_SANITY_CHECK(rects);
+        SANITY_CHECK(cpu_rects);
     }
 }
 
@@ -140,9 +92,9 @@ PERF_TEST_P(ImageAndCascade, ObjDetect_HaarClassifier,
 // LBP cascade
 
 PERF_TEST_P(ImageAndCascade, ObjDetect_LBPClassifier,
-    Values<pair_string>(make_pair("gpu/haarcascade/group_1_640x480_VGA.pgm", "gpu/lbpcascade/lbpcascade_frontalface.xml")))
+            Values<pair_string>(make_pair("gpu/haarcascade/group_1_640x480_VGA.pgm", "gpu/lbpcascade/lbpcascade_frontalface.xml")))
 {
-    cv::Mat img = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
+    const cv::Mat img = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(img.empty());
 
     if (PERF_RUN_GPU())
@@ -150,34 +102,27 @@ PERF_TEST_P(ImageAndCascade, ObjDetect_LBPClassifier,
         cv::gpu::CascadeClassifier_GPU d_cascade;
         ASSERT_TRUE(d_cascade.load(perf::TestBase::getDataPath(GetParam().second)));
 
-        cv::gpu::GpuMat d_img(img);
-        cv::gpu::GpuMat d_gpu_rects;
+        const cv::gpu::GpuMat d_img(img);
+        cv::gpu::GpuMat objects_buffer;
+        int detections_num = 0;
 
-        d_cascade.detectMultiScale(d_img, d_gpu_rects);
+        TEST_CYCLE() detections_num = d_cascade.detectMultiScale(d_img, objects_buffer);
 
-        TEST_CYCLE()
-        {
-            d_cascade.detectMultiScale(d_img, d_gpu_rects);
-        }
-
-        GPU_SANITY_CHECK(d_gpu_rects);
+        std::vector<cv::Rect> gpu_rects(detections_num);
+        cv::Mat gpu_rects_mat(1, detections_num, cv::DataType<cv::Rect>::type, &gpu_rects[0]);
+        objects_buffer.colRange(0, detections_num).download(gpu_rects_mat);
+        cv::groupRectangles(gpu_rects, 3, 0.2);
+        SANITY_CHECK(gpu_rects);
     }
     else
     {
         cv::CascadeClassifier cascade;
         ASSERT_TRUE(cascade.load(perf::TestBase::getDataPath("gpu/lbpcascade/lbpcascade_frontalface.xml")));
 
-        std::vector<cv::Rect> rects;
-
-        cascade.detectMultiScale(img, rects);
+        std::vector<cv::Rect> cpu_rects;
 
-        TEST_CYCLE()
-        {
-            cascade.detectMultiScale(img, rects);
-        }
+        TEST_CYCLE() cascade.detectMultiScale(img, cpu_rects);
 
-        CPU_SANITY_CHECK(rects);
+        SANITY_CHECK(cpu_rects);
     }
 }
-
-} // namespace
\ No newline at end of file
index 83213a1..61c2687 100644 (file)
@@ -2,6 +2,7 @@
 
 using namespace std;
 using namespace testing;
+using namespace perf;
 
 namespace cv
 {
@@ -11,60 +12,15 @@ namespace cv
     }
 }
 
-namespace {
-
 //////////////////////////////////////////////////////
-// BroxOpticalFlow
+// InterpolateFrames
 
 typedef pair<string, string> pair_string;
 
 DEF_PARAM_TEST_1(ImagePair, pair_string);
 
-PERF_TEST_P(ImagePair, Video_BroxOpticalFlow,
-    Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")))
-{
-    declare.time(10);
-
-    cv::Mat frame0 = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
-    ASSERT_FALSE(frame0.empty());
-
-    cv::Mat frame1 = readImage(GetParam().second, cv::IMREAD_GRAYSCALE);
-    ASSERT_FALSE(frame1.empty());
-
-    frame0.convertTo(frame0, CV_32FC1, 1.0 / 255.0);
-    frame1.convertTo(frame1, CV_32FC1, 1.0 / 255.0);
-
-    if (PERF_RUN_GPU())
-    {
-        cv::gpu::GpuMat d_frame0(frame0);
-        cv::gpu::GpuMat d_frame1(frame1);
-        cv::gpu::GpuMat d_u;
-        cv::gpu::GpuMat d_v;
-
-        cv::gpu::BroxOpticalFlow d_flow(0.197f /*alpha*/, 50.0f /*gamma*/, 0.8f /*scale_factor*/,
-                                        10 /*inner_iterations*/, 77 /*outer_iterations*/, 10 /*solver_iterations*/);
-
-        d_flow(d_frame0, d_frame1, d_u, d_v);
-
-        TEST_CYCLE()
-        {
-            d_flow(d_frame0, d_frame1, d_u, d_v);
-        }
-
-        GPU_SANITY_CHECK(d_u);
-        GPU_SANITY_CHECK(d_v);
-    }
-    else
-    {
-        FAIL() << "No such CPU implementation analogy";
-    }
-}
-
-//////////////////////////////////////////////////////
-// InterpolateFrames
-
 PERF_TEST_P(ImagePair, Video_InterpolateFrames,
-    Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")))
+            Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")))
 {
     cv::Mat frame0 = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(frame0.empty());
@@ -77,8 +33,8 @@ PERF_TEST_P(ImagePair, Video_InterpolateFrames,
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_frame0(frame0);
-        cv::gpu::GpuMat d_frame1(frame1);
+        const cv::gpu::GpuMat d_frame0(frame0);
+        const cv::gpu::GpuMat d_frame1(frame1);
         cv::gpu::GpuMat d_fu, d_fv;
         cv::gpu::GpuMat d_bu, d_bv;
 
@@ -88,24 +44,16 @@ PERF_TEST_P(ImagePair, Video_InterpolateFrames,
         d_flow(d_frame0, d_frame1, d_fu, d_fv);
         d_flow(d_frame1, d_frame0, d_bu, d_bv);
 
-        cv::gpu::GpuMat d_newFrame;
+        cv::gpu::GpuMat newFrame;
         cv::gpu::GpuMat d_buf;
 
-        cv::gpu::interpolateFrames(d_frame0, d_frame1, d_fu, d_fv, d_bu, d_bv, 0.5f, d_newFrame, d_buf);
+        TEST_CYCLE() cv::gpu::interpolateFrames(d_frame0, d_frame1, d_fu, d_fv, d_bu, d_bv, 0.5f, newFrame, d_buf);
 
-        TEST_CYCLE()
-        {
-            cv::gpu::interpolateFrames(d_frame0, d_frame1, d_fu, d_fv, d_bu, d_bv, 0.5f, d_newFrame, d_buf);
-        }
-
-        GPU_SANITY_CHECK(d_fu);
-        GPU_SANITY_CHECK(d_fv);
-        GPU_SANITY_CHECK(d_bu);
-        GPU_SANITY_CHECK(d_bv);
+        GPU_SANITY_CHECK(newFrame);
     }
     else
     {
-        FAIL() << "No such CPU implementation analogy";
+        FAIL_NO_CPU();
     }
 }
 
@@ -113,7 +61,7 @@ PERF_TEST_P(ImagePair, Video_InterpolateFrames,
 // CreateOpticalFlowNeedleMap
 
 PERF_TEST_P(ImagePair, Video_CreateOpticalFlowNeedleMap,
-    Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")))
+            Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")))
 {
     cv::Mat frame0 = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(frame0.empty());
@@ -126,31 +74,26 @@ PERF_TEST_P(ImagePair, Video_CreateOpticalFlowNeedleMap,
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_frame0(frame0);
-        cv::gpu::GpuMat d_frame1(frame1);
-        cv::gpu::GpuMat d_u;
-        cv::gpu::GpuMat d_v;
+        const cv::gpu::GpuMat d_frame0(frame0);
+        const cv::gpu::GpuMat d_frame1(frame1);
+        cv::gpu::GpuMat u;
+        cv::gpu::GpuMat v;
 
         cv::gpu::BroxOpticalFlow d_flow(0.197f /*alpha*/, 50.0f /*gamma*/, 0.8f /*scale_factor*/,
                                         10 /*inner_iterations*/, 77 /*outer_iterations*/, 10 /*solver_iterations*/);
 
-        d_flow(d_frame0, d_frame1, d_u, d_v);
+        d_flow(d_frame0, d_frame1, u, v);
 
-        cv::gpu::GpuMat d_vertex, d_colors;
+        cv::gpu::GpuMat vertex, colors;
 
-        cv::gpu::createOpticalFlowNeedleMap(d_u, d_v, d_vertex, d_colors);
-
-        TEST_CYCLE()
-        {
-            cv::gpu::createOpticalFlowNeedleMap(d_u, d_v, d_vertex, d_colors);
-        }
+        TEST_CYCLE() cv::gpu::createOpticalFlowNeedleMap(u, v, vertex, colors);
 
-        GPU_SANITY_CHECK(d_vertex);
-        GPU_SANITY_CHECK(d_colors);
+        GPU_SANITY_CHECK(vertex);
+        GPU_SANITY_CHECK(colors);
     }
     else
     {
-        FAIL() << "No such CPU implementation analogy";
+        FAIL_NO_CPU();
     }
 }
 
@@ -160,71 +103,103 @@ PERF_TEST_P(ImagePair, Video_CreateOpticalFlowNeedleMap,
 DEF_PARAM_TEST(Image_MinDistance, string, double);
 
 PERF_TEST_P(Image_MinDistance, Video_GoodFeaturesToTrack,
-    Combine(Values<string>("gpu/perf/aloe.png"), Values(0.0, 3.0)))
+            Combine(Values<string>("gpu/perf/aloe.png"),
+                    Values(0.0, 3.0)))
 {
-    string fileName = GET_PARAM(0);
-    double minDistance = GET_PARAM(1);
+    const string fileName = GET_PARAM(0);
+    const double minDistance = GET_PARAM(1);
 
-    cv::Mat image = readImage(fileName, cv::IMREAD_GRAYSCALE);
+    const cv::Mat image = readImage(fileName, cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(image.empty());
 
+    const int maxCorners = 8000;
+    const double qualityLevel = 0.01;
+
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GoodFeaturesToTrackDetector_GPU d_detector(8000, 0.01, minDistance);
+        cv::gpu::GoodFeaturesToTrackDetector_GPU d_detector(maxCorners, qualityLevel, minDistance);
 
-        cv::gpu::GpuMat d_image(image);
-        cv::gpu::GpuMat d_pts;
+        const cv::gpu::GpuMat d_image(image);
+        cv::gpu::GpuMat pts;
 
-        d_detector(d_image, d_pts);
+        TEST_CYCLE() d_detector(d_image, pts);
 
-        TEST_CYCLE()
-        {
-            d_detector(d_image, d_pts);
-        }
-
-        GPU_SANITY_CHECK(d_pts);
+        GPU_SANITY_CHECK(pts);
     }
     else
     {
         cv::Mat pts;
 
-        cv::goodFeaturesToTrack(image, pts, 8000, 0.01, minDistance);
-
-        TEST_CYCLE()
-        {
-            cv::goodFeaturesToTrack(image, pts, 8000, 0.01, minDistance);
-        }
+        TEST_CYCLE() cv::goodFeaturesToTrack(image, pts, maxCorners, qualityLevel, minDistance);
 
         CPU_SANITY_CHECK(pts);
     }
 }
 
 //////////////////////////////////////////////////////
+// BroxOpticalFlow
+
+PERF_TEST_P(ImagePair, Video_BroxOpticalFlow,
+            Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")))
+{
+    declare.time(10);
+
+    cv::Mat frame0 = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(frame0.empty());
+
+    cv::Mat frame1 = readImage(GetParam().second, cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(frame1.empty());
+
+    frame0.convertTo(frame0, CV_32FC1, 1.0 / 255.0);
+    frame1.convertTo(frame1, CV_32FC1, 1.0 / 255.0);
+
+    if (PERF_RUN_GPU())
+    {
+        const cv::gpu::GpuMat d_frame0(frame0);
+        const cv::gpu::GpuMat d_frame1(frame1);
+        cv::gpu::GpuMat u;
+        cv::gpu::GpuMat v;
+
+        cv::gpu::BroxOpticalFlow d_flow(0.197f /*alpha*/, 50.0f /*gamma*/, 0.8f /*scale_factor*/,
+                                        10 /*inner_iterations*/, 77 /*outer_iterations*/, 10 /*solver_iterations*/);
+
+        TEST_CYCLE() d_flow(d_frame0, d_frame1, u, v);
+
+        GPU_SANITY_CHECK(u);
+        GPU_SANITY_CHECK(v);
+    }
+    else
+    {
+        FAIL_NO_CPU();
+    }
+}
+
+//////////////////////////////////////////////////////
 // PyrLKOpticalFlowSparse
 
 DEF_PARAM_TEST(ImagePair_Gray_NPts_WinSz_Levels_Iters, pair_string, bool, int, int, int, int);
 
-PERF_TEST_P(ImagePair_Gray_NPts_WinSz_Levels_Iters, Video_PyrLKOpticalFlowSparse, Combine(
-    Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")),
-    Bool(),
-    Values(1000, 2000, 4000, 8000),
-    Values(9, 13, 17, 21),
-    Values(1, 2, 3),
-    Values(1, 10, 30)))
+PERF_TEST_P(ImagePair_Gray_NPts_WinSz_Levels_Iters, Video_PyrLKOpticalFlowSparse,
+            Combine(Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")),
+                    Bool(),
+                    Values(8000),
+                    Values(21),
+                    Values(1, 3),
+                    Values(1, 30)))
 {
     declare.time(20.0);
 
-    pair_string imagePair = GET_PARAM(0);
-    bool useGray = GET_PARAM(1);
-    int points = GET_PARAM(2);
-    int winSize = GET_PARAM(3);
-    int levels = GET_PARAM(4);
-    int iters = GET_PARAM(5);
+    const pair_string imagePair = GET_PARAM(0);
+    const bool useGray = GET_PARAM(1);
+    const int points = GET_PARAM(2);
+    const int winSize = GET_PARAM(3);
+    const int levels = GET_PARAM(4);
+    const int iters = GET_PARAM(5);
 
-    cv::Mat frame0 = readImage(imagePair.first, useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR);
+    const cv::Mat frame0 = readImage(imagePair.first, useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR);
     ASSERT_FALSE(frame0.empty());
 
-    cv::Mat frame1 = readImage(imagePair.second, useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR);
+    const cv::Mat frame1 = readImage(imagePair.second, useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR);
     ASSERT_FALSE(frame1.empty());
 
     cv::Mat gray_frame;
@@ -238,36 +213,28 @@ PERF_TEST_P(ImagePair_Gray_NPts_WinSz_Levels_Iters, Video_PyrLKOpticalFlowSparse
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_pts(pts.reshape(2, 1));
+        const cv::gpu::GpuMat d_pts(pts.reshape(2, 1));
 
         cv::gpu::PyrLKOpticalFlow d_pyrLK;
         d_pyrLK.winSize = cv::Size(winSize, winSize);
         d_pyrLK.maxLevel = levels - 1;
         d_pyrLK.iters = iters;
 
-        cv::gpu::GpuMat d_frame0(frame0);
-        cv::gpu::GpuMat d_frame1(frame1);
-        cv::gpu::GpuMat d_nextPts;
-        cv::gpu::GpuMat d_status;
+        const cv::gpu::GpuMat d_frame0(frame0);
+        const cv::gpu::GpuMat d_frame1(frame1);
+        cv::gpu::GpuMat nextPts;
+        cv::gpu::GpuMat status;
 
-        d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status);
-
-        TEST_CYCLE()
-        {
-            d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status);
-        }
+        TEST_CYCLE() d_pyrLK.sparse(d_frame0, d_frame1, d_pts, nextPts, status);
 
-        GPU_SANITY_CHECK(d_status);
+        GPU_SANITY_CHECK(nextPts);
+        GPU_SANITY_CHECK(status);
     }
     else
     {
         cv::Mat nextPts;
         cv::Mat status;
 
-        cv::calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, cv::noArray(),
-                                 cv::Size(winSize, winSize), levels - 1,
-                                 cv::TermCriteria(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, iters, 0.01));
-
         TEST_CYCLE()
         {
             cv::calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, cv::noArray(),
@@ -275,6 +242,7 @@ PERF_TEST_P(ImagePair_Gray_NPts_WinSz_Levels_Iters, Video_PyrLKOpticalFlowSparse
                                      cv::TermCriteria(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, iters, 0.01));
         }
 
+        CPU_SANITY_CHECK(nextPts);
         CPU_SANITY_CHECK(status);
     }
 }
@@ -284,50 +252,45 @@ PERF_TEST_P(ImagePair_Gray_NPts_WinSz_Levels_Iters, Video_PyrLKOpticalFlowSparse
 
 DEF_PARAM_TEST(ImagePair_WinSz_Levels_Iters, pair_string, int, int, int);
 
-PERF_TEST_P(ImagePair_WinSz_Levels_Iters, Video_PyrLKOpticalFlowDense, Combine(
-    Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")),
-    Values(3, 5, 7, 9, 13, 17, 21),
-    Values(1, 2, 3),
-    Values(1, 10)))
+PERF_TEST_P(ImagePair_WinSz_Levels_Iters, Video_PyrLKOpticalFlowDense,
+            Combine(Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")),
+                    Values(3, 5, 7, 9, 13, 17, 21),
+                    Values(1, 3),
+                    Values(1, 10)))
 {
     declare.time(30);
 
-    pair_string imagePair = GET_PARAM(0);
-    int winSize = GET_PARAM(1);
-    int levels = GET_PARAM(2);
-    int iters = GET_PARAM(3);
+    const pair_string imagePair = GET_PARAM(0);
+    const int winSize = GET_PARAM(1);
+    const int levels = GET_PARAM(2);
+    const int iters = GET_PARAM(3);
 
-    cv::Mat frame0 = readImage(imagePair.first, cv::IMREAD_GRAYSCALE);
+    const cv::Mat frame0 = readImage(imagePair.first, cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(frame0.empty());
 
-    cv::Mat frame1 = readImage(imagePair.second, cv::IMREAD_GRAYSCALE);
+    const cv::Mat frame1 = readImage(imagePair.second, cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(frame1.empty());
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_frame0(frame0);
-        cv::gpu::GpuMat d_frame1(frame1);
-        cv::gpu::GpuMat d_u;
-        cv::gpu::GpuMat d_v;
+        const cv::gpu::GpuMat d_frame0(frame0);
+        const cv::gpu::GpuMat d_frame1(frame1);
+        cv::gpu::GpuMat u;
+        cv::gpu::GpuMat v;
 
         cv::gpu::PyrLKOpticalFlow d_pyrLK;
         d_pyrLK.winSize = cv::Size(winSize, winSize);
         d_pyrLK.maxLevel = levels - 1;
         d_pyrLK.iters = iters;
 
-        d_pyrLK.dense(d_frame0, d_frame1, d_u, d_v);
+        TEST_CYCLE() d_pyrLK.dense(d_frame0, d_frame1, u, v);
 
-        TEST_CYCLE()
-        {
-            d_pyrLK.dense(d_frame0, d_frame1, d_u, d_v);
-        }
-
-        GPU_SANITY_CHECK(d_u);
-        GPU_SANITY_CHECK(d_v);
+        GPU_SANITY_CHECK(u);
+        GPU_SANITY_CHECK(v);
     }
     else
     {
-        FAIL() << "No such CPU implementation analogy";
+        FAIL_NO_CPU();
     }
 }
 
@@ -335,30 +298,30 @@ PERF_TEST_P(ImagePair_WinSz_Levels_Iters, Video_PyrLKOpticalFlowDense, Combine(
 // FarnebackOpticalFlow
 
 PERF_TEST_P(ImagePair, Video_FarnebackOpticalFlow,
-    Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")))
+            Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")))
 {
     declare.time(10);
 
-    cv::Mat frame0 = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
+    const cv::Mat frame0 = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(frame0.empty());
 
-    cv::Mat frame1 = readImage(GetParam().second, cv::IMREAD_GRAYSCALE);
+    const cv::Mat frame1 = readImage(GetParam().second, cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(frame1.empty());
 
-    int numLevels = 5;
-    double pyrScale = 0.5;
-    int winSize = 13;
-    int numIters = 10;
-    int polyN = 5;
-    double polySigma = 1.1;
-    int flags = 0;
+    const int numLevels = 5;
+    const double pyrScale = 0.5;
+    const int winSize = 13;
+    const int numIters = 10;
+    const int polyN = 5;
+    const double polySigma = 1.1;
+    const int flags = 0;
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_frame0(frame0);
-        cv::gpu::GpuMat d_frame1(frame1);
-        cv::gpu::GpuMat d_u;
-        cv::gpu::GpuMat d_v;
+        const cv::gpu::GpuMat d_frame0(frame0);
+        const cv::gpu::GpuMat d_frame1(frame1);
+        cv::gpu::GpuMat u;
+        cv::gpu::GpuMat v;
 
         cv::gpu::FarnebackOpticalFlow d_farneback;
         d_farneback.numLevels = numLevels;
@@ -369,26 +332,16 @@ PERF_TEST_P(ImagePair, Video_FarnebackOpticalFlow,
         d_farneback.polySigma = polySigma;
         d_farneback.flags = flags;
 
-        d_farneback(d_frame0, d_frame1, d_u, d_v);
+        TEST_CYCLE() d_farneback(d_frame0, d_frame1, u, v);
 
-        TEST_CYCLE()
-        {
-            d_farneback(d_frame0, d_frame1, d_u, d_v);
-        }
-
-        GPU_SANITY_CHECK(d_u);
-        GPU_SANITY_CHECK(d_v);
+        GPU_SANITY_CHECK(u, 1e-4);
+        GPU_SANITY_CHECK(v, 1e-4);
     }
     else
     {
         cv::Mat flow;
 
-        cv::calcOpticalFlowFarneback(frame0, frame1, flow, pyrScale, numLevels, winSize, numIters, polyN, polySigma, flags);
-
-        TEST_CYCLE()
-        {
-            cv::calcOpticalFlowFarneback(frame0, frame1, flow, pyrScale, numLevels, winSize, numIters, polyN, polySigma, flags);
-        }
+        TEST_CYCLE() cv::calcOpticalFlowFarneback(frame0, frame1, flow, pyrScale, numLevels, winSize, numIters, polyN, polySigma, flags);
 
         CPU_SANITY_CHECK(flow);
     }
@@ -398,34 +351,29 @@ PERF_TEST_P(ImagePair, Video_FarnebackOpticalFlow,
 // OpticalFlowDual_TVL1
 
 PERF_TEST_P(ImagePair, Video_OpticalFlowDual_TVL1,
-    Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")))
+            Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")))
 {
     declare.time(20);
 
-    cv::Mat frame0 = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
+    const cv::Mat frame0 = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(frame0.empty());
 
-    cv::Mat frame1 = readImage(GetParam().second, cv::IMREAD_GRAYSCALE);
+    const cv::Mat frame1 = readImage(GetParam().second, cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(frame1.empty());
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_frame0(frame0);
-        cv::gpu::GpuMat d_frame1(frame1);
-        cv::gpu::GpuMat d_flowx;
-        cv::gpu::GpuMat d_flowy;
+        const cv::gpu::GpuMat d_frame0(frame0);
+        const cv::gpu::GpuMat d_frame1(frame1);
+        cv::gpu::GpuMat u;
+        cv::gpu::GpuMat v;
 
         cv::gpu::OpticalFlowDual_TVL1_GPU d_alg;
 
-        d_alg(d_frame0, d_frame1, d_flowx, d_flowy);
-
-        TEST_CYCLE()
-        {
-            d_alg(d_frame0, d_frame1, d_flowx, d_flowy);
-        }
+        TEST_CYCLE() d_alg(d_frame0, d_frame1, u, v);
 
-        GPU_SANITY_CHECK(d_flowx);
-        GPU_SANITY_CHECK(d_flowy);
+        GPU_SANITY_CHECK(u, 1e-4);
+        GPU_SANITY_CHECK(v, 1e-4);
     }
     else
     {
@@ -433,12 +381,7 @@ PERF_TEST_P(ImagePair, Video_OpticalFlowDual_TVL1,
 
         cv::Ptr<cv::DenseOpticalFlow> alg = cv::createOptFlow_DualTVL1();
 
-        alg->calc(frame0, frame1, flow);
-
-        TEST_CYCLE()
-        {
-            alg->calc(frame0, frame1, flow);
-        }
+        TEST_CYCLE() alg->calc(frame0, frame1, flow);
 
         CPU_SANITY_CHECK(flow);
     }
@@ -466,98 +409,73 @@ void calcOpticalFlowBM(const cv::Mat& prev, const cv::Mat& curr,
 }
 
 PERF_TEST_P(ImagePair, Video_OpticalFlowBM,
-    Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")))
+            Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")))
 {
     declare.time(400);
 
-    cv::Mat frame0 = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
+    const cv::Mat frame0 = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(frame0.empty());
 
-    cv::Mat frame1 = readImage(GetParam().second, cv::IMREAD_GRAYSCALE);
+    const cv::Mat frame1 = readImage(GetParam().second, cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(frame1.empty());
 
-    cv::Size block_size(16, 16);
-    cv::Size shift_size(1, 1);
-    cv::Size max_range(16, 16);
+    const cv::Size block_size(16, 16);
+    const cv::Size shift_size(1, 1);
+    const cv::Size max_range(16, 16);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_frame0(frame0);
-        cv::gpu::GpuMat d_frame1(frame1);
-        cv::gpu::GpuMat d_velx, d_vely, buf;
+        const cv::gpu::GpuMat d_frame0(frame0);
+        const cv::gpu::GpuMat d_frame1(frame1);
+        cv::gpu::GpuMat u, v, buf;
 
-        cv::gpu::calcOpticalFlowBM(d_frame0, d_frame1, block_size, shift_size, max_range, false, d_velx, d_vely, buf);
+        TEST_CYCLE() cv::gpu::calcOpticalFlowBM(d_frame0, d_frame1, block_size, shift_size, max_range, false, u, v, buf);
 
-        TEST_CYCLE()
-        {
-            cv::gpu::calcOpticalFlowBM(d_frame0, d_frame1, block_size, shift_size, max_range, false, d_velx, d_vely, buf);
-        }
-
-        GPU_SANITY_CHECK(d_velx);
-        GPU_SANITY_CHECK(d_vely);
+        GPU_SANITY_CHECK(u);
+        GPU_SANITY_CHECK(v);
     }
     else
     {
-        cv::Mat velx, vely;
+        cv::Mat u, v;
 
-        calcOpticalFlowBM(frame0, frame1, block_size, shift_size, max_range, false, velx, vely);
-
-        TEST_CYCLE()
-        {
-            calcOpticalFlowBM(frame0, frame1, block_size, shift_size, max_range, false, velx, vely);
-        }
+        TEST_CYCLE() calcOpticalFlowBM(frame0, frame1, block_size, shift_size, max_range, false, u, v);
 
-        CPU_SANITY_CHECK(velx);
-        CPU_SANITY_CHECK(vely);
+        CPU_SANITY_CHECK(u);
+        CPU_SANITY_CHECK(v);
     }
 }
 
 PERF_TEST_P(ImagePair, Video_FastOpticalFlowBM,
-    Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")))
+            Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")))
 {
     declare.time(400);
 
-    cv::Mat frame0 = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
+    const cv::Mat frame0 = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(frame0.empty());
 
-    cv::Mat frame1 = readImage(GetParam().second, cv::IMREAD_GRAYSCALE);
+    const cv::Mat frame1 = readImage(GetParam().second, cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(frame1.empty());
 
-    cv::Size block_size(16, 16);
-    cv::Size shift_size(1, 1);
-    cv::Size max_range(16, 16);
+    const cv::Size block_size(16, 16);
+    const cv::Size shift_size(1, 1);
+    const cv::Size max_range(16, 16);
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_frame0(frame0);
-        cv::gpu::GpuMat d_frame1(frame1);
-        cv::gpu::GpuMat d_velx, d_vely;
+        const cv::gpu::GpuMat d_frame0(frame0);
+        const cv::gpu::GpuMat d_frame1(frame1);
+        cv::gpu::GpuMat u, v;
 
         cv::gpu::FastOpticalFlowBM fastBM;
 
-        fastBM(d_frame0, d_frame1, d_velx, d_vely, max_range.width, block_size.width);
+        TEST_CYCLE() fastBM(d_frame0, d_frame1, u, v, max_range.width, block_size.width);
 
-        TEST_CYCLE()
-        {
-            fastBM(d_frame0, d_frame1, d_velx, d_vely, max_range.width, block_size.width);
-        }
-
-        GPU_SANITY_CHECK(d_velx);
-        GPU_SANITY_CHECK(d_vely);
+        GPU_SANITY_CHECK(u, 2);
+        GPU_SANITY_CHECK(v, 2);
     }
     else
     {
-        cv::Mat velx, vely;
-
-        calcOpticalFlowBM(frame0, frame1, block_size, shift_size, max_range, false, velx, vely);
-
-        TEST_CYCLE()
-        {
-            calcOpticalFlowBM(frame0, frame1, block_size, shift_size, max_range, false, velx, vely);
-        }
-
-        CPU_SANITY_CHECK(velx);
-        CPU_SANITY_CHECK(vely);
+        FAIL_NO_CPU();
     }
 }
 
@@ -566,11 +484,12 @@ PERF_TEST_P(ImagePair, Video_FastOpticalFlowBM,
 
 DEF_PARAM_TEST_1(Video, string);
 
-PERF_TEST_P(Video, DISABLED_Video_FGDStatModel, Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"))
+PERF_TEST_P(Video, Video_FGDStatModel,
+            Values(string("gpu/video/768x576.avi")))
 {
     declare.time(60);
 
-    string inputFile = perf::TestBase::getDataPath(GetParam());
+    const string inputFile = perf::TestBase::getDataPath(GetParam());
 
     cv::VideoCapture cap(inputFile);
     ASSERT_TRUE(cap.isOpened());
@@ -597,6 +516,12 @@ PERF_TEST_P(Video, DISABLED_Video_FGDStatModel, Values("gpu/video/768x576.avi",
             d_model.update(d_frame);
             stopTimer();
         }
+
+        const cv::gpu::GpuMat background = d_model.background;
+        const cv::gpu::GpuMat foreground = d_model.foreground;
+
+        GPU_SANITY_CHECK(background, 1e-2, ERROR_RELATIVE);
+        GPU_SANITY_CHECK(foreground, 1e-2, ERROR_RELATIVE);
     }
     else
     {
@@ -614,6 +539,12 @@ PERF_TEST_P(Video, DISABLED_Video_FGDStatModel, Values("gpu/video/768x576.avi",
             cvUpdateBGStatModel(&ipl_frame, model);
             stopTimer();
         }
+
+        const cv::Mat background = model->background;
+        const cv::Mat foreground = model->foreground;
+
+        CPU_SANITY_CHECK(background);
+        CPU_SANITY_CHECK(foreground);
     }
 }
 
@@ -622,12 +553,14 @@ PERF_TEST_P(Video, DISABLED_Video_FGDStatModel, Values("gpu/video/768x576.avi",
 
 DEF_PARAM_TEST(Video_Cn_LearningRate, string, MatCn, double);
 
-PERF_TEST_P(Video_Cn_LearningRate, DISABLED_Video_MOG,
-    Combine(Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"), GPU_CHANNELS_1_3_4, Values(0.0, 0.01)))
+PERF_TEST_P(Video_Cn_LearningRate, Video_MOG,
+            Combine(Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"),
+                    GPU_CHANNELS_1_3_4,
+                    Values(0.0, 0.01)))
 {
-    string inputFile = perf::TestBase::getDataPath(GET_PARAM(0));
-    int cn = GET_PARAM(1);
-    float learningRate = static_cast<float>(GET_PARAM(2));
+    const string inputFile = perf::TestBase::getDataPath(GET_PARAM(0));
+    const int cn = GET_PARAM(1);
+    const float learningRate = static_cast<float>(GET_PARAM(2));
 
     cv::VideoCapture cap(inputFile);
     ASSERT_TRUE(cap.isOpened());
@@ -651,9 +584,9 @@ PERF_TEST_P(Video_Cn_LearningRate, DISABLED_Video_MOG,
     {
         cv::gpu::GpuMat d_frame(frame);
         cv::gpu::MOG_GPU d_mog;
-        cv::gpu::GpuMat d_foreground;
+        cv::gpu::GpuMat foreground;
 
-        d_mog(d_frame, d_foreground, learningRate);
+        d_mog(d_frame, foreground, learningRate);
 
         for (int i = 0; i < 10; ++i)
         {
@@ -673,9 +606,11 @@ PERF_TEST_P(Video_Cn_LearningRate, DISABLED_Video_MOG,
             d_frame.upload(frame);
 
             startTimer(); next();
-            d_mog(d_frame, d_foreground, learningRate);
+            d_mog(d_frame, foreground, learningRate);
             stopTimer();
         }
+
+        GPU_SANITY_CHECK(foreground);
     }
     else
     {
@@ -703,6 +638,8 @@ PERF_TEST_P(Video_Cn_LearningRate, DISABLED_Video_MOG,
             mog(frame, foreground, learningRate);
             stopTimer();
         }
+
+        CPU_SANITY_CHECK(foreground);
     }
 }
 
@@ -711,11 +648,12 @@ PERF_TEST_P(Video_Cn_LearningRate, DISABLED_Video_MOG,
 
 DEF_PARAM_TEST(Video_Cn, string, int);
 
-PERF_TEST_P(Video_Cn, DISABLED_Video_MOG2,
-    Combine(Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"), GPU_CHANNELS_1_3_4))
+PERF_TEST_P(Video_Cn, Video_MOG2,
+            Combine(Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"),
+                    GPU_CHANNELS_1_3_4))
 {
-    string inputFile = perf::TestBase::getDataPath(GET_PARAM(0));
-    int cn = GET_PARAM(1);
+    const string inputFile = perf::TestBase::getDataPath(GET_PARAM(0));
+    const int cn = GET_PARAM(1);
 
     cv::VideoCapture cap(inputFile);
     ASSERT_TRUE(cap.isOpened());
@@ -737,11 +675,13 @@ PERF_TEST_P(Video_Cn, DISABLED_Video_MOG2,
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_frame(frame);
         cv::gpu::MOG2_GPU d_mog2;
-        cv::gpu::GpuMat d_foreground;
+        d_mog2.bShadowDetection = false;
+
+        cv::gpu::GpuMat d_frame(frame);
+        cv::gpu::GpuMat foreground;
 
-        d_mog2(d_frame, d_foreground);
+        d_mog2(d_frame, foreground);
 
         for (int i = 0; i < 10; ++i)
         {
@@ -761,13 +701,17 @@ PERF_TEST_P(Video_Cn, DISABLED_Video_MOG2,
             d_frame.upload(frame);
 
             startTimer(); next();
-            d_mog2(d_frame, d_foreground);
+            d_mog2(d_frame, foreground);
             stopTimer();
         }
+
+        GPU_SANITY_CHECK(foreground);
     }
     else
     {
         cv::BackgroundSubtractorMOG2 mog2;
+        mog2.set("detectShadows", false);
+
         cv::Mat foreground;
 
         mog2(frame, foreground);
@@ -791,6 +735,8 @@ PERF_TEST_P(Video_Cn, DISABLED_Video_MOG2,
             mog2(frame, foreground);
             stopTimer();
         }
+
+        CPU_SANITY_CHECK(foreground);
     }
 }
 
@@ -798,10 +744,11 @@ PERF_TEST_P(Video_Cn, DISABLED_Video_MOG2,
 // MOG2GetBackgroundImage
 
 PERF_TEST_P(Video_Cn, Video_MOG2GetBackgroundImage,
-    Combine(Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"), GPU_CHANNELS_1_3_4))
+            Combine(Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"),
+                    GPU_CHANNELS_1_3_4))
 {
-    string inputFile = perf::TestBase::getDataPath(GET_PARAM(0));
-    int cn = GET_PARAM(1);
+    const string inputFile = perf::TestBase::getDataPath(GET_PARAM(0));
+    const int cn = GET_PARAM(1);
 
     cv::VideoCapture cap(inputFile);
     ASSERT_TRUE(cap.isOpened());
@@ -834,15 +781,11 @@ PERF_TEST_P(Video_Cn, Video_MOG2GetBackgroundImage,
             d_mog2(d_frame, d_foreground);
         }
 
-        cv::gpu::GpuMat d_background;
-        d_mog2.getBackgroundImage(d_background);
+        cv::gpu::GpuMat background;
 
-        TEST_CYCLE()
-        {
-            d_mog2.getBackgroundImage(d_background);
-        }
+        TEST_CYCLE() d_mog2.getBackgroundImage(background);
 
-        GPU_SANITY_CHECK(d_background);
+        GPU_SANITY_CHECK(background, 1);
     }
     else
     {
@@ -868,12 +811,8 @@ PERF_TEST_P(Video_Cn, Video_MOG2GetBackgroundImage,
         }
 
         cv::Mat background;
-        mog2.getBackgroundImage(background);
 
-        TEST_CYCLE()
-        {
-            mog2.getBackgroundImage(background);
-        }
+        TEST_CYCLE() mog2.getBackgroundImage(background);
 
         CPU_SANITY_CHECK(background);
     }
@@ -882,11 +821,12 @@ PERF_TEST_P(Video_Cn, Video_MOG2GetBackgroundImage,
 //////////////////////////////////////////////////////
 // VIBE
 
-PERF_TEST_P(Video_Cn, DISABLED_Video_VIBE,
-    Combine(Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"), GPU_CHANNELS_1_3_4))
+PERF_TEST_P(Video_Cn, Video_VIBE,
+            Combine(Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"),
+                    GPU_CHANNELS_1_3_4))
 {
-    string inputFile = perf::TestBase::getDataPath(GET_PARAM(0));
-    int cn = GET_PARAM(1);
+    const string inputFile = perf::TestBase::getDataPath(GET_PARAM(0));
+    const int cn = GET_PARAM(1);
 
     cv::VideoCapture cap(inputFile);
     ASSERT_TRUE(cap.isOpened());
@@ -908,10 +848,10 @@ PERF_TEST_P(Video_Cn, DISABLED_Video_VIBE,
     if (PERF_RUN_GPU())
     {
         cv::gpu::GpuMat d_frame(frame);
-        cv::gpu::VIBE_GPU d_vibe;
-        cv::gpu::GpuMat d_foreground;
+        cv::gpu::VIBE_GPU vibe;
+        cv::gpu::GpuMat foreground;
 
-        d_vibe(d_frame, d_foreground);
+        vibe(d_frame, foreground);
 
         for (int i = 0; i < 10; ++i)
         {
@@ -931,13 +871,15 @@ PERF_TEST_P(Video_Cn, DISABLED_Video_VIBE,
             d_frame.upload(frame);
 
             startTimer(); next();
-            d_vibe(d_frame, d_foreground);
+            vibe(d_frame, foreground);
             stopTimer();
         }
+
+        GPU_SANITY_CHECK(foreground);
     }
     else
     {
-        FAIL() << "No such CPU implementation analogy";
+        FAIL_NO_CPU();
     }
 }
 
@@ -946,12 +888,14 @@ PERF_TEST_P(Video_Cn, DISABLED_Video_VIBE,
 
 DEF_PARAM_TEST(Video_Cn_MaxFeatures, string, MatCn, int);
 
-PERF_TEST_P(Video_Cn_MaxFeatures, DISABLED_Video_GMG,
-    Combine(Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"), GPU_CHANNELS_1_3_4, Values(20, 40, 60)))
+PERF_TEST_P(Video_Cn_MaxFeatures, Video_GMG,
+            Combine(Values(string("gpu/video/768x576.avi")),
+                    GPU_CHANNELS_1_3_4,
+                    Values(20, 40, 60)))
 {
-    std::string inputFile = perf::TestBase::getDataPath(GET_PARAM(0));
-    int cn = GET_PARAM(1);
-    int maxFeatures = GET_PARAM(2);
+    const std::string inputFile = perf::TestBase::getDataPath(GET_PARAM(0));
+    const int cn = GET_PARAM(1);
+    const int maxFeatures = GET_PARAM(2);
 
     cv::VideoCapture cap(inputFile);
     ASSERT_TRUE(cap.isOpened());
@@ -973,12 +917,12 @@ PERF_TEST_P(Video_Cn_MaxFeatures, DISABLED_Video_GMG,
     if (PERF_RUN_GPU())
     {
         cv::gpu::GpuMat d_frame(frame);
-        cv::gpu::GpuMat d_fgmask;
+        cv::gpu::GpuMat foreground;
 
         cv::gpu::GMG_GPU d_gmg;
         d_gmg.maxFeatures = maxFeatures;
 
-        d_gmg(d_frame, d_fgmask);
+        d_gmg(d_frame, foreground);
 
         for (int i = 0; i < 150; ++i)
         {
@@ -1003,20 +947,22 @@ PERF_TEST_P(Video_Cn_MaxFeatures, DISABLED_Video_GMG,
             d_frame.upload(frame);
 
             startTimer(); next();
-            d_gmg(d_frame, d_fgmask);
+            d_gmg(d_frame, foreground);
             stopTimer();
         }
+
+        GPU_SANITY_CHECK(foreground);
     }
     else
     {
-        cv::Mat fgmask;
+        cv::Mat foreground;
         cv::Mat zeros(frame.size(), CV_8UC1, cv::Scalar::all(0));
 
         cv::BackgroundSubtractorGMG gmg;
         gmg.set("maxFeatures", maxFeatures);
         gmg.initialize(frame.size(), 0.0, 255.0);
 
-        gmg(frame, fgmask);
+        gmg(frame, foreground);
 
         for (int i = 0; i < 150; ++i)
         {
@@ -1039,21 +985,60 @@ PERF_TEST_P(Video_Cn_MaxFeatures, DISABLED_Video_GMG,
             }
 
             startTimer(); next();
-            gmg(frame, fgmask);
+            gmg(frame, foreground);
             stopTimer();
         }
+
+        CPU_SANITY_CHECK(foreground);
+    }
+}
+
+#ifdef HAVE_NVCUVID
+
+//////////////////////////////////////////////////////
+// VideoReader
+
+PERF_TEST_P(Video, Video_VideoReader, Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"))
+{
+    declare.time(20);
+
+    const string inputFile = perf::TestBase::getDataPath(GetParam());
+
+    if (PERF_RUN_GPU())
+    {
+        cv::gpu::VideoReader_GPU d_reader(inputFile);
+        ASSERT_TRUE( d_reader.isOpened() );
+
+        cv::gpu::GpuMat frame;
+
+        TEST_CYCLE_N(10) d_reader.read(frame);
+
+        GPU_SANITY_CHECK(frame);
+    }
+    else
+    {
+        cv::VideoCapture reader(inputFile);
+        ASSERT_TRUE( reader.isOpened() );
+
+        cv::Mat frame;
+
+        TEST_CYCLE_N(10) reader >> frame;
+
+        CPU_SANITY_CHECK(frame);
     }
 }
 
 //////////////////////////////////////////////////////
 // VideoWriter
 
-PERF_TEST_P(Video, DISABLED_Video_VideoWriter, Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"))
+#ifdef WIN32
+
+PERF_TEST_P(Video, Video_VideoWriter, Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"))
 {
     declare.time(30);
 
-    string inputFile = perf::TestBase::getDataPath(GetParam());
-    string outputFile = cv::tempfile(".avi");
+    const string inputFile = perf::TestBase::getDataPath(GetParam());
+    const string outputFile = cv::tempfile(".avi");
 
     const double FPS = 25.0;
 
@@ -1100,49 +1085,10 @@ PERF_TEST_P(Video, DISABLED_Video_VideoWriter, Values("gpu/video/768x576.avi", "
             stopTimer();
         }
     }
-}
-
-//////////////////////////////////////////////////////
-// VideoReader
-
-PERF_TEST_P(Video, Video_VideoReader, Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"))
-{
-    declare.time(20);
-
-    string inputFile = perf::TestBase::getDataPath(GetParam());
-
-    if (PERF_RUN_GPU())
-    {
-        cv::gpu::VideoReader_GPU d_reader(inputFile);
-        ASSERT_TRUE( d_reader.isOpened() );
-
-        cv::gpu::GpuMat d_frame;
-
-        d_reader.read(d_frame);
-
-        TEST_CYCLE_N(10)
-        {
-            d_reader.read(d_frame);
-        }
-
-        GPU_SANITY_CHECK(d_frame);
-    }
-    else
-    {
-        cv::VideoCapture reader(inputFile);
-        ASSERT_TRUE( reader.isOpened() );
-
-        cv::Mat frame;
-
-        reader >> frame;
 
-        TEST_CYCLE_N(10)
-        {
-            reader >> frame;
-        }
-
-        CPU_SANITY_CHECK(frame);
-    }
+    SANITY_CHECK(frame);
 }
 
-} // namespace
+#endif // WIN32
+
+#endif // HAVE_NVCUVID
index c309903..16c61e0 100644 (file)
@@ -2,13 +2,6 @@
 
 using namespace std;
 using namespace cv;
-using namespace cv::gpu;
-
-void fillRandom(Mat& m, double a, double b)
-{
-    RNG rng(123456789);
-    rng.fill(m, RNG::UNIFORM, Scalar::all(a), Scalar::all(b));
-}
 
 Mat readImage(const string& fileName, int flags)
 {
@@ -188,4 +181,4 @@ void PrintTo(const CvtColorInfo& info, ostream* os)
     };
 
     *os << str[info.code];
-}
\ No newline at end of file
+}
index 6782b93..cff4cdd 100644 (file)
@@ -2,11 +2,9 @@
 #define __OPENCV_PERF_GPU_UTILITY_HPP__
 
 #include "opencv2/core/core.hpp"
-#include "opencv2/core/gpumat.hpp"
 #include "opencv2/imgproc/imgproc.hpp"
 #include "opencv2/ts/ts_perf.hpp"
 
-void fillRandom(cv::Mat& m, double a = 0.0, double b = 255.0);
 cv::Mat readImage(const std::string& fileName, int flags = cv::IMREAD_COLOR);
 
 using perf::MatType;
@@ -17,12 +15,13 @@ CV_ENUM(BorderMode, cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONS
 
 CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC, cv::INTER_AREA)
 #define ALL_INTERPOLATIONS testing::ValuesIn(Interpolation::all())
+
 CV_ENUM(NormType, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_HAMMING, cv::NORM_MINMAX)
 
-const int Gray = 1, TwoChannel = 2, BGR = 3, BGRA = 4;
+enum { Gray = 1, TwoChannel = 2, BGR = 3, BGRA = 4 };
 CV_ENUM(MatCn, Gray, TwoChannel, BGR, BGRA)
-#define GPU_CHANNELS_1_3_4 testing::Values(Gray, BGR, BGRA)
-#define GPU_CHANNELS_1_3 testing::Values(Gray, BGR)
+#define GPU_CHANNELS_1_3_4 testing::Values(MatCn(Gray), MatCn(BGR), MatCn(BGRA))
+#define GPU_CHANNELS_1_3 testing::Values(MatCn(Gray), MatCn(BGR))
 
 struct CvtColorInfo
 {
@@ -30,7 +29,8 @@ struct CvtColorInfo
     int dcn;
     int code;
 
-    explicit CvtColorInfo(int scn_=0, int dcn_=0, int code_=0) : scn(scn_), dcn(dcn_), code(code_) {}
+    CvtColorInfo() {}
+    explicit CvtColorInfo(int scn_, int dcn_, int code_) : scn(scn_), dcn(dcn_), code(code_) {}
 };
 void PrintTo(const CvtColorInfo& info, std::ostream* os);
 
@@ -46,39 +46,18 @@ DEF_PARAM_TEST(Sz_Depth_Cn, cv::Size, MatDepth, MatCn);
 
 #define GPU_TYPICAL_MAT_SIZES testing::Values(perf::sz720p, perf::szSXGA, perf::sz1080p)
 
-#define GPU_SANITY_CHECK(dmat, ...) \
-    do{ \
-        cv::Mat d##dmat(dmat); \
-        SANITY_CHECK(d##dmat, ## __VA_ARGS__); \
-    } while(0)
+#define FAIL_NO_CPU() FAIL() << "No such CPU implementation analogy"
 
-#define CPU_SANITY_CHECK(cmat, ...) \
+#define GPU_SANITY_CHECK(mat, ...) \
     do{ \
-        SANITY_CHECK(cmat, ## __VA_ARGS__); \
+        cv::Mat gpu_##mat(mat); \
+        SANITY_CHECK(gpu_##mat, ## __VA_ARGS__); \
     } while(0)
 
-#define GPU_SANITY_CHECK_KEYPOINTS(alg, dmat, ...)                                          \
-    do{                                                                                     \
-        cv::Mat d##dmat(dmat);                                                              \
-        cv::Mat __pt_x      = d##dmat.row(cv::gpu::alg##_GPU::X_ROW);                       \
-        cv::Mat __pt_y      = d##dmat.row(cv::gpu::alg##_GPU::Y_ROW);                       \
-        cv::Mat __angle     = d##dmat.row(cv::gpu::alg##_GPU::ANGLE_ROW);                   \
-        cv::Mat __octave    = d##dmat.row(cv::gpu::alg##_GPU::OCTAVE_ROW);                               \
-        cv::Mat __size      = d##dmat.row(cv::gpu::alg##_GPU::SIZE_ROW);                                 \
-        ::perf::Regression::add(this, std::string(#dmat) + "-pt-x-row",     __pt_x,     ## __VA_ARGS__); \
-        ::perf::Regression::add(this, std::string(#dmat) + "-pt-y-row",     __pt_y,     ## __VA_ARGS__); \
-        ::perf::Regression::add(this, std::string(#dmat) + "-angle-row",    __angle,    ## __VA_ARGS__); \
-        ::perf::Regression::add(this, std::string(#dmat) + "octave-row",    __octave,   ## __VA_ARGS__); \
-        ::perf::Regression::add(this, std::string(#dmat) + "-pt-size-row",  __size,     ## __VA_ARGS__); \
-    } while(0)
-
-#define GPU_SANITY_CHECK_RESPONSE(alg, dmat, ...) \
-    do{                                                                                     \
-        cv::Mat d##dmat(dmat);                                                              \
-        cv::Mat __response  = d##dmat.row(cv::gpu::alg##_GPU::RESPONSE_ROW);                \
-        ::perf::Regression::add(this, std::string(#dmat) + "-response-row", __response, ## __VA_ARGS__); \
+#define CPU_SANITY_CHECK(mat, ...) \
+    do{ \
+        cv::Mat cpu_##mat(mat); \
+        SANITY_CHECK(cpu_##mat, ## __VA_ARGS__); \
     } while(0)
 
-#define FAIL_NO_CPU()   FAIL() << "No such CPU implementation analogy"
-
 #endif // __OPENCV_PERF_GPU_UTILITY_HPP__
index 05d4609..76793d5 100644 (file)
@@ -48,6 +48,7 @@ using namespace cv::gpu;
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
 void cv::gpu::cvtColor(const GpuMat&, GpuMat&, int, int, Stream&) { throw_nogpu(); }
+void cv::gpu::demosaicing(const GpuMat&, GpuMat&, int, int, Stream&) { throw_nogpu(); }
 void cv::gpu::swapChannels(GpuMat&, const int[], Stream&) { throw_nogpu(); }
 void cv::gpu::gammaCorrection(const GpuMat&, GpuMat&, bool, Stream&) { throw_nogpu(); }
 
@@ -62,6 +63,9 @@ namespace cv { namespace gpu {
         void Bayer2BGR_8u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
         template <int cn>
         void Bayer2BGR_16u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
+
+        template <int cn>
+        void MHCdemosaic(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream);
     }
 }}
 
@@ -1620,26 +1624,56 @@ namespace
 
         funcs[src.depth()][dcn - 1](src, dst, blue_last, start_with_green, StreamAccessor::getStream(stream));
     }
-
     void bayerBG_to_bgr(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
     {
         bayer_to_bgr(src, dst, dcn, false, false, stream);
     }
-
     void bayerGB_to_bgr(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
     {
         bayer_to_bgr(src, dst, dcn, false, true, stream);
     }
-
     void bayerRG_to_bgr(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
     {
         bayer_to_bgr(src, dst, dcn, true, false, stream);
     }
-
     void bayerGR_to_bgr(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
     {
         bayer_to_bgr(src, dst, dcn, true, true, stream);
     }
+
+    void bayer_to_gray(const GpuMat& src, GpuMat& dst, bool blue_last, bool start_with_green, Stream& stream)
+    {
+        typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
+        static const func_t funcs[3] =
+        {
+            Bayer2BGR_8u_gpu<1>,
+            0,
+            Bayer2BGR_16u_gpu<1>,
+        };
+
+        CV_Assert(src.type() == CV_8UC1 || src.type() == CV_16UC1);
+        CV_Assert(src.rows > 2 && src.cols > 2);
+
+        dst.create(src.size(), CV_MAKETYPE(src.depth(), 1));
+
+        funcs[src.depth()](src, dst, blue_last, start_with_green, StreamAccessor::getStream(stream));
+    }
+    void bayerBG_to_gray(const GpuMat& src, GpuMat& dst, int /*dcn*/, Stream& stream)
+    {
+        bayer_to_gray(src, dst, false, false, stream);
+    }
+    void bayerGB_to_gray(const GpuMat& src, GpuMat& dst, int /*dcn*/, Stream& stream)
+    {
+        bayer_to_gray(src, dst, false, true, stream);
+    }
+    void bayerRG_to_gray(const GpuMat& src, GpuMat& dst, int /*dcn*/, Stream& stream)
+    {
+        bayer_to_gray(src, dst, true, false, stream);
+    }
+    void bayerGR_to_gray(const GpuMat& src, GpuMat& dst, int /*dcn*/, Stream& stream)
+    {
+        bayer_to_gray(src, dst, true, true, stream);
+    }
 }
 
 void cv::gpu::cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn, Stream& stream)
@@ -1756,10 +1790,10 @@ void cv::gpu::cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn, Stream
         yuv_to_bgr,             // CV_YUV2BGR      = 84
         yuv_to_rgb,             // CV_YUV2RGB      = 85
 
-        0,                      // CV_BayerBG2GRAY = 86
-        0,                      // CV_BayerGB2GRAY = 87
-        0,                      // CV_BayerRG2GRAY = 88
-        0,                      // CV_BayerGR2GRAY = 89
+        bayerBG_to_gray,        // CV_BayerBG2GRAY = 86
+        bayerGB_to_gray,        // CV_BayerGB2GRAY = 87
+        bayerRG_to_gray,        // CV_BayerRG2GRAY = 88
+        bayerGR_to_gray,        // CV_BayerGR2GRAY = 89
 
         //YUV 4:2:0 formats family
         0,                      // CV_YUV2RGB_NV12 = 90,
@@ -1825,6 +1859,74 @@ void cv::gpu::cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn, Stream
     func(src, dst, dcn, stream);
 }
 
+void cv::gpu::demosaicing(const GpuMat& src, GpuMat& dst, int code, int dcn, Stream& stream)
+{
+    const int depth = src.depth();
+
+    CV_Assert( src.channels() == 1 );
+
+    switch (code)
+    {
+    case CV_BayerBG2GRAY: case CV_BayerGB2GRAY: case CV_BayerRG2GRAY: case CV_BayerGR2GRAY:
+        bayer_to_gray(src, dst, code == CV_BayerBG2GRAY || code == CV_BayerGB2GRAY, code == CV_BayerGB2GRAY || code == CV_BayerGR2GRAY, stream);
+        break;
+
+    case CV_BayerBG2BGR: case CV_BayerGB2BGR: case CV_BayerRG2BGR: case CV_BayerGR2BGR:
+        bayer_to_bgr(src, dst, dcn, code == CV_BayerBG2BGR || code == CV_BayerGB2BGR, code == CV_BayerGB2BGR || code == CV_BayerGR2BGR, stream);
+        break;
+
+    case COLOR_BayerBG2BGR_MHT: case COLOR_BayerGB2BGR_MHT: case COLOR_BayerRG2BGR_MHT: case COLOR_BayerGR2BGR_MHT:
+    {
+        if (dcn <= 0)
+            dcn = 3;
+
+        CV_Assert( depth == CV_8U );
+        CV_Assert( dcn == 3 || dcn == 4 );
+
+        dst.create(src.size(), CV_MAKETYPE(depth, dcn));
+        dst.setTo(Scalar::all(0));
+
+        Size wholeSize;
+        Point ofs;
+        src.locateROI(wholeSize, ofs);
+        PtrStepSzb srcWhole(wholeSize.height, wholeSize.width, src.datastart, src.step);
+
+        const int2 firstRed = make_int2(code == COLOR_BayerRG2BGR_MHT || code == COLOR_BayerGB2BGR_MHT ? 0 : 1,
+                                        code == COLOR_BayerRG2BGR_MHT || code == COLOR_BayerGR2BGR_MHT ? 0 : 1);
+
+        if (dcn == 3)
+            device::MHCdemosaic<3>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream));
+        else
+            device::MHCdemosaic<4>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream));
+
+        break;
+    }
+
+    case COLOR_BayerBG2GRAY_MHT: case COLOR_BayerGB2GRAY_MHT: case COLOR_BayerRG2GRAY_MHT: case COLOR_BayerGR2GRAY_MHT:
+    {
+        CV_Assert( depth == CV_8U );
+
+        dst.create(src.size(), CV_MAKETYPE(depth, 1));
+        dst.setTo(Scalar::all(0));
+
+        Size wholeSize;
+        Point ofs;
+        src.locateROI(wholeSize, ofs);
+        PtrStepSzb srcWhole(wholeSize.height, wholeSize.width, src.datastart, src.step);
+
+        const int2 firstRed = make_int2(code == COLOR_BayerRG2BGR_MHT || code == COLOR_BayerGB2BGR_MHT ? 0 : 1,
+                                        code == COLOR_BayerRG2BGR_MHT || code == COLOR_BayerGR2BGR_MHT ? 0 : 1);
+
+        device::MHCdemosaic<1>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream));
+
+        break;
+    }
+
+    default:
+        CV_Error( CV_StsBadFlag, "Unknown / unsupported color conversion code" );
+    }
+}
+
 void cv::gpu::swapChannels(GpuMat& image, const int dstOrder[4], Stream& s)
 {
     CV_Assert(image.type() == CV_8UC4);
index 4f78dbd..d99ffc4 100644 (file)
@@ -648,7 +648,7 @@ namespace cv { namespace gpu { namespace device
                     tWeight += gmm_weight(mode * frame.rows + y, x);
                     if (tWeight > c_TB)
                         break;
-                };
+                }
             }
 
             fgmask(y, x) = background ? 0 : isShadow ? c_shadowVal : 255;
@@ -761,4 +761,4 @@ namespace cv { namespace gpu { namespace device
 }}}
 
 
-#endif /* CUDA_DISABLER */
\ No newline at end of file
+#endif /* CUDA_DISABLER */
index c094e08..0533522 100644 (file)
@@ -194,10 +194,10 @@ namespace cv { namespace gpu { namespace device
             if ( y > 0 && connected(intensity, image(y - 1, x)))
                 c |= UP;
 
-            if ( x - 1 < image.cols && connected(intensity, image(y, x + 1)))
+            if ( x + 1 < image.cols && connected(intensity, image(y, x + 1)))
                 c |= RIGHT;
 
-            if ( y - 1 < image.rows && connected(intensity, image(y + 1, x)))
+            if ( y + 1 < image.rows && connected(intensity, image(y + 1, x)))
                 c |= DOWN;
 
             components(y, x) = c;
index 57322ed..1d2f18e 100644 (file)
 
 #if !defined CUDA_DISABLER
 
-#include <opencv2/gpu/device/common.hpp>
-#include <opencv2/gpu/device/vec_traits.hpp>
-#include <opencv2/gpu/device/vec_math.hpp>
-#include <opencv2/gpu/device/limits.hpp>
-
-namespace cv { namespace gpu {
-    namespace device
+#include "opencv2/gpu/device/common.hpp"
+#include "opencv2/gpu/device/vec_traits.hpp"
+#include "opencv2/gpu/device/vec_math.hpp"
+#include "opencv2/gpu/device/limits.hpp"
+#include "opencv2/gpu/device/color.hpp"
+#include "opencv2/gpu/device/saturate_cast.hpp"
+
+namespace cv { namespace gpu { namespace device
+{
+    template <typename T> struct Bayer2BGR;
+
+    template <> struct Bayer2BGR<uchar>
     {
-        template <typename D>
-        __global__ void Bayer2BGR_8u(const PtrStepb src, PtrStepSz<D> dst, const bool blue_last, const bool start_with_green)
-        {
-            const int s_x = blockIdx.x * blockDim.x + threadIdx.x;
-            int s_y = blockIdx.y * blockDim.y + threadIdx.y;
-
-            if (s_y >= dst.rows || (s_x << 2) >= dst.cols)
-                return;
-
-            s_y = ::min(::max(s_y, 1), dst.rows - 2);
+        uchar3 res0;
+        uchar3 res1;
+        uchar3 res2;
+        uchar3 res3;
 
+        __device__ void apply(const PtrStepSzb& src, int s_x, int s_y, bool blue_last, bool start_with_green)
+        {
             uchar4 patch[3][3];
             patch[0][1] = ((const uchar4*) src.ptr(s_y - 1))[s_x];
             patch[0][0] = ((const uchar4*) src.ptr(s_y - 1))[::max(s_x - 1, 0)];
-            patch[0][2] = ((const uchar4*) src.ptr(s_y - 1))[::min(s_x + 1, ((dst.cols + 3) >> 2) - 1)];
+            patch[0][2] = ((const uchar4*) src.ptr(s_y - 1))[::min(s_x + 1, ((src.cols + 3) >> 2) - 1)];
 
             patch[1][1] = ((const uchar4*) src.ptr(s_y))[s_x];
             patch[1][0] = ((const uchar4*) src.ptr(s_y))[::max(s_x - 1, 0)];
-            patch[1][2] = ((const uchar4*) src.ptr(s_y))[::min(s_x + 1, ((dst.cols + 3) >> 2) - 1)];
+            patch[1][2] = ((const uchar4*) src.ptr(s_y))[::min(s_x + 1, ((src.cols + 3) >> 2) - 1)];
 
             patch[2][1] = ((const uchar4*) src.ptr(s_y + 1))[s_x];
             patch[2][0] = ((const uchar4*) src.ptr(s_y + 1))[::max(s_x - 1, 0)];
-            patch[2][2] = ((const uchar4*) src.ptr(s_y + 1))[::min(s_x + 1, ((dst.cols + 3) >> 2) - 1)];
-
-            D res0 = VecTraits<D>::all(numeric_limits<uchar>::max());
-            D res1 = VecTraits<D>::all(numeric_limits<uchar>::max());
-            D res2 = VecTraits<D>::all(numeric_limits<uchar>::max());
-            D res3 = VecTraits<D>::all(numeric_limits<uchar>::max());
+            patch[2][2] = ((const uchar4*) src.ptr(s_y + 1))[::min(s_x + 1, ((src.cols + 3) >> 2) - 1)];
 
             if ((s_y & 1) ^ start_with_green)
             {
@@ -181,45 +177,69 @@ namespace cv { namespace gpu {
                     res3.z = t7;
                 }
             }
+        }
+    };
 
-            const int d_x = (blockIdx.x * blockDim.x + threadIdx.x) << 2;
-            const int d_y = blockIdx.y * blockDim.y + threadIdx.y;
+    template <typename D> __device__ __forceinline__ D toDst(const uchar3& pix);
+    template <> __device__ __forceinline__ uchar toDst<uchar>(const uchar3& pix)
+    {
+        typename bgr_to_gray_traits<uchar>::functor_type f = bgr_to_gray_traits<uchar>::create_functor();
+        return f(pix);
+    }
+    template <> __device__ __forceinline__ uchar3 toDst<uchar3>(const uchar3& pix)
+    {
+        return pix;
+    }
+    template <> __device__ __forceinline__ uchar4 toDst<uchar4>(const uchar3& pix)
+    {
+        return make_uchar4(pix.x, pix.y, pix.z, 255);
+    }
 
-            dst(d_y, d_x) = res0;
-            if (d_x + 1 < dst.cols)
-                dst(d_y, d_x + 1) = res1;
-            if (d_x + 2 < dst.cols)
-                dst(d_y, d_x + 2) = res2;
-            if (d_x + 3 < dst.cols)
-                dst(d_y, d_x + 3) = res3;
-        }
+    template <typename D>
+    __global__ void Bayer2BGR_8u(const PtrStepSzb src, PtrStep<D> dst, const bool blue_last, const bool start_with_green)
+    {
+        const int s_x = blockIdx.x * blockDim.x + threadIdx.x;
+        int s_y = blockIdx.y * blockDim.y + threadIdx.y;
 
-        template <typename D>
-        __global__ void Bayer2BGR_16u(const PtrStepb src, PtrStepSz<D> dst, const bool blue_last, const bool start_with_green)
-        {
-            const int s_x = blockIdx.x * blockDim.x + threadIdx.x;
-            int s_y = blockIdx.y * blockDim.y + threadIdx.y;
+        if (s_y >= src.rows || (s_x << 2) >= src.cols)
+            return;
+
+        s_y = ::min(::max(s_y, 1), src.rows - 2);
+
+        Bayer2BGR<uchar> bayer;
+        bayer.apply(src, s_x, s_y, blue_last, start_with_green);
+
+        const int d_x = (blockIdx.x * blockDim.x + threadIdx.x) << 2;
+        const int d_y = blockIdx.y * blockDim.y + threadIdx.y;
 
-            if (s_y >= dst.rows || (s_x << 1) >= dst.cols)
-                return;
+        dst(d_y, d_x) = toDst<D>(bayer.res0);
+        if (d_x + 1 < src.cols)
+            dst(d_y, d_x + 1) = toDst<D>(bayer.res1);
+        if (d_x + 2 < src.cols)
+            dst(d_y, d_x + 2) = toDst<D>(bayer.res2);
+        if (d_x + 3 < src.cols)
+            dst(d_y, d_x + 3) = toDst<D>(bayer.res3);
+    }
 
-            s_y = ::min(::max(s_y, 1), dst.rows - 2);
+    template <> struct Bayer2BGR<ushort>
+    {
+        ushort3 res0;
+        ushort3 res1;
 
+        __device__ void apply(const PtrStepSzb& src, int s_x, int s_y, bool blue_last, bool start_with_green)
+        {
             ushort2 patch[3][3];
             patch[0][1] = ((const ushort2*) src.ptr(s_y - 1))[s_x];
             patch[0][0] = ((const ushort2*) src.ptr(s_y - 1))[::max(s_x - 1, 0)];
-            patch[0][2] = ((const ushort2*) src.ptr(s_y - 1))[::min(s_x + 1, ((dst.cols + 1) >> 1) - 1)];
+            patch[0][2] = ((const ushort2*) src.ptr(s_y - 1))[::min(s_x + 1, ((src.cols + 1) >> 1) - 1)];
 
             patch[1][1] = ((const ushort2*) src.ptr(s_y))[s_x];
             patch[1][0] = ((const ushort2*) src.ptr(s_y))[::max(s_x - 1, 0)];
-            patch[1][2] = ((const ushort2*) src.ptr(s_y))[::min(s_x + 1, ((dst.cols + 1) >> 1) - 1)];
+            patch[1][2] = ((const ushort2*) src.ptr(s_y))[::min(s_x + 1, ((src.cols + 1) >> 1) - 1)];
 
             patch[2][1] = ((const ushort2*) src.ptr(s_y + 1))[s_x];
             patch[2][0] = ((const ushort2*) src.ptr(s_y + 1))[::max(s_x - 1, 0)];
-            patch[2][2] = ((const ushort2*) src.ptr(s_y + 1))[::min(s_x + 1, ((dst.cols + 1) >> 1) - 1)];
-
-            D res0 = VecTraits<D>::all(numeric_limits<ushort>::max());
-            D res1 = VecTraits<D>::all(numeric_limits<ushort>::max());
+            patch[2][2] = ((const ushort2*) src.ptr(s_y + 1))[::min(s_x + 1, ((src.cols + 1) >> 1) - 1)];
 
             if ((s_y & 1) ^ start_with_green)
             {
@@ -279,53 +299,246 @@ namespace cv { namespace gpu {
                     res1.z = t3;
                 }
             }
+        }
+    };
+
+    template <typename D> __device__ __forceinline__ D toDst(const ushort3& pix);
+    template <> __device__ __forceinline__ ushort toDst<ushort>(const ushort3& pix)
+    {
+        typename bgr_to_gray_traits<ushort>::functor_type f = bgr_to_gray_traits<ushort>::create_functor();
+        return f(pix);
+    }
+    template <> __device__ __forceinline__ ushort3 toDst<ushort3>(const ushort3& pix)
+    {
+        return pix;
+    }
+    template <> __device__ __forceinline__ ushort4 toDst<ushort4>(const ushort3& pix)
+    {
+        return make_ushort4(pix.x, pix.y, pix.z, numeric_limits<ushort>::max());
+    }
 
-            const int d_x = (blockIdx.x * blockDim.x + threadIdx.x) << 1;
-            const int d_y = blockIdx.y * blockDim.y + threadIdx.y;
+    template <typename D>
+    __global__ void Bayer2BGR_16u(const PtrStepSzb src, PtrStep<D> dst, const bool blue_last, const bool start_with_green)
+    {
+        const int s_x = blockIdx.x * blockDim.x + threadIdx.x;
+        int s_y = blockIdx.y * blockDim.y + threadIdx.y;
 
-            dst(d_y, d_x) = res0;
-            if (d_x + 1 < dst.cols)
-                dst(d_y, d_x + 1) = res1;
-        }
+        if (s_y >= src.rows || (s_x << 1) >= src.cols)
+            return;
 
-        template <int cn>
-        void Bayer2BGR_8u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream)
-        {
-            typedef typename TypeVec<uchar, cn>::vec_type dst_t;
+        s_y = ::min(::max(s_y, 1), src.rows - 2);
 
-            const dim3 block(32, 8);
-            const dim3 grid(divUp(dst.cols, 4 * block.x), divUp(dst.rows, block.y));
+        Bayer2BGR<ushort> bayer;
+        bayer.apply(src, s_x, s_y, blue_last, start_with_green);
 
-            cudaSafeCall( cudaFuncSetCacheConfig(Bayer2BGR_8u<dst_t>, cudaFuncCachePreferL1) );
+        const int d_x = (blockIdx.x * blockDim.x + threadIdx.x) << 1;
+        const int d_y = blockIdx.y * blockDim.y + threadIdx.y;
 
-            Bayer2BGR_8u<dst_t><<<grid, block, 0, stream>>>(src, (PtrStepSz<dst_t>)dst, blue_last, start_with_green);
-            cudaSafeCall( cudaGetLastError() );
+        dst(d_y, d_x) = toDst<D>(bayer.res0);
+        if (d_x + 1 < src.cols)
+            dst(d_y, d_x + 1) = toDst<D>(bayer.res1);
+    }
 
-            if (stream == 0)
-                cudaSafeCall( cudaDeviceSynchronize() );
-        }
-        template <int cn>
-        void Bayer2BGR_16u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream)
-        {
-            typedef typename TypeVec<ushort, cn>::vec_type dst_t;
+    template <int cn>
+    void Bayer2BGR_8u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream)
+    {
+        typedef typename TypeVec<uchar, cn>::vec_type dst_t;
 
-            const dim3 block(32, 8);
-            const dim3 grid(divUp(dst.cols, 2 * block.x), divUp(dst.rows, block.y));
+        const dim3 block(32, 8);
+        const dim3 grid(divUp(src.cols, 4 * block.x), divUp(src.rows, block.y));
 
-            cudaSafeCall( cudaFuncSetCacheConfig(Bayer2BGR_16u<dst_t>, cudaFuncCachePreferL1) );
+        cudaSafeCall( cudaFuncSetCacheConfig(Bayer2BGR_8u<dst_t>, cudaFuncCachePreferL1) );
 
-            Bayer2BGR_16u<dst_t><<<grid, block, 0, stream>>>(src, (PtrStepSz<dst_t>)dst, blue_last, start_with_green);
-            cudaSafeCall( cudaGetLastError() );
+        Bayer2BGR_8u<dst_t><<<grid, block, 0, stream>>>(src, (PtrStepSz<dst_t>)dst, blue_last, start_with_green);
+        cudaSafeCall( cudaGetLastError() );
 
-            if (stream == 0)
-                cudaSafeCall( cudaDeviceSynchronize() );
-        }
+        if (stream == 0)
+            cudaSafeCall( cudaDeviceSynchronize() );
+    }
+
+    template <int cn>
+    void Bayer2BGR_16u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream)
+    {
+        typedef typename TypeVec<ushort, cn>::vec_type dst_t;
+
+        const dim3 block(32, 8);
+        const dim3 grid(divUp(src.cols, 2 * block.x), divUp(src.rows, block.y));
+
+        cudaSafeCall( cudaFuncSetCacheConfig(Bayer2BGR_16u<dst_t>, cudaFuncCachePreferL1) );
+
+        Bayer2BGR_16u<dst_t><<<grid, block, 0, stream>>>(src, (PtrStepSz<dst_t>)dst, blue_last, start_with_green);
+        cudaSafeCall( cudaGetLastError() );
 
-        template void Bayer2BGR_8u_gpu<3>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
-        template void Bayer2BGR_8u_gpu<4>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
-        template void Bayer2BGR_16u_gpu<3>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
-        template void Bayer2BGR_16u_gpu<4>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
+        if (stream == 0)
+            cudaSafeCall( cudaDeviceSynchronize() );
     }
-}}
 
-#endif /* CUDA_DISABLER */
\ No newline at end of file
+    template void Bayer2BGR_8u_gpu<1>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
+    template void Bayer2BGR_8u_gpu<3>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
+    template void Bayer2BGR_8u_gpu<4>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
+
+    template void Bayer2BGR_16u_gpu<1>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
+    template void Bayer2BGR_16u_gpu<3>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
+    template void Bayer2BGR_16u_gpu<4>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
+
+    //////////////////////////////////////////////////////////////
+    // Bayer Demosaicing (Malvar, He, and Cutler)
+    //
+    // by Morgan McGuire, Williams College
+    // http://graphics.cs.williams.edu/papers/BayerJGT09/#shaders
+    //
+    // ported to CUDA
+
+    texture<uchar, cudaTextureType2D, cudaReadModeElementType> sourceTex(false, cudaFilterModePoint, cudaAddressModeClamp);
+
+    template <typename DstType>
+    __global__ void MHCdemosaic(PtrStepSz<DstType> dst, const int2 sourceOffset, const int2 firstRed)
+    {
+        const float   kAx = -1.0f / 8.0f,     kAy = -1.5f / 8.0f,     kAz =  0.5f / 8.0f    /*kAw = -1.0f / 8.0f*/;
+        const float   kBx =  2.0f / 8.0f,   /*kBy =  0.0f / 8.0f,*/ /*kBz =  0.0f / 8.0f,*/   kBw =  4.0f / 8.0f  ;
+        const float   kCx =  4.0f / 8.0f,     kCy =  6.0f / 8.0f,     kCz =  5.0f / 8.0f    /*kCw =  5.0f / 8.0f*/;
+        const float /*kDx =  0.0f / 8.0f,*/   kDy =  2.0f / 8.0f,     kDz = -1.0f / 8.0f    /*kDw = -1.0f / 8.0f*/;
+        const float   kEx = -1.0f / 8.0f,     kEy = -1.5f / 8.0f,   /*kEz = -1.0f / 8.0f,*/   kEw =  0.5f / 8.0f  ;
+        const float   kFx =  2.0f / 8.0f,   /*kFy =  0.0f / 8.0f,*/   kFz =  4.0f / 8.0f    /*kFw =  0.0f / 8.0f*/;
+
+        const int x = blockIdx.x * blockDim.x + threadIdx.x;
+        const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+        if (x == 0 || x >= dst.cols - 1 || y == 0 || y >= dst.rows - 1)
+            return;
+
+        int2 center;
+        center.x = x + sourceOffset.x;
+        center.y = y + sourceOffset.y;
+
+        int4 xCoord;
+        xCoord.x = center.x - 2;
+        xCoord.y = center.x - 1;
+        xCoord.z = center.x + 1;
+        xCoord.w = center.x + 2;
+
+        int4 yCoord;
+        yCoord.x = center.y - 2;
+        yCoord.y = center.y - 1;
+        yCoord.z = center.y + 1;
+        yCoord.w = center.y + 2;
+
+        float C = tex2D(sourceTex, center.x, center.y); // ( 0, 0)
+
+        float4 Dvec;
+        Dvec.x = tex2D(sourceTex, xCoord.y, yCoord.y); // (-1,-1)
+        Dvec.y = tex2D(sourceTex, xCoord.y, yCoord.z); // (-1, 1)
+        Dvec.z = tex2D(sourceTex, xCoord.z, yCoord.y); // ( 1,-1)
+        Dvec.w = tex2D(sourceTex, xCoord.z, yCoord.z); // ( 1, 1)
+
+        float4 value;
+        value.x = tex2D(sourceTex, center.x, yCoord.x); // ( 0,-2) A0
+        value.y = tex2D(sourceTex, center.x, yCoord.y); // ( 0,-1) B0
+        value.z = tex2D(sourceTex, xCoord.x, center.y); // (-2, 0) E0
+        value.w = tex2D(sourceTex, xCoord.y, center.y); // (-1, 0) F0
+
+        // (A0 + A1), (B0 + B1), (E0 + E1), (F0 + F1)
+        value.x += tex2D(sourceTex, center.x, yCoord.w); // ( 0, 2) A1
+        value.y += tex2D(sourceTex, center.x, yCoord.z); // ( 0, 1) B1
+        value.z += tex2D(sourceTex, xCoord.w, center.y); // ( 2, 0) E1
+        value.w += tex2D(sourceTex, xCoord.z, center.y); // ( 1, 0) F1
+
+        float4 PATTERN;
+        PATTERN.x = kCx * C;
+        PATTERN.y = kCy * C;
+        PATTERN.z = kCz * C;
+        PATTERN.w = PATTERN.z;
+
+        float D = Dvec.x + Dvec.y + Dvec.z + Dvec.w;
+
+        // There are five filter patterns (identity, cross, checker,
+        // theta, phi). Precompute the terms from all of them and then
+        // use swizzles to assign to color channels.
+        //
+        // Channel Matches
+        // x cross (e.g., EE G)
+        // y checker (e.g., EE B)
+        // z theta (e.g., EO R)
+        // w phi (e.g., EO B)
+
+        #define A value.x  // A0 + A1
+        #define B value.y  // B0 + B1
+        #define E value.z  // E0 + E1
+        #define F value.w  // F0 + F1
+
+        float3 temp;
+
+        // PATTERN.yzw += (kD.yz * D).xyy;
+        temp.x = kDy * D;
+        temp.y = kDz * D;
+        PATTERN.y += temp.x;
+        PATTERN.z += temp.y;
+        PATTERN.w += temp.y;
+
+        // PATTERN += (kA.xyz * A).xyzx;
+        temp.x = kAx * A;
+        temp.y = kAy * A;
+        temp.z = kAz * A;
+        PATTERN.x += temp.x;
+        PATTERN.y += temp.y;
+        PATTERN.z += temp.z;
+        PATTERN.w += temp.x;
+
+        // PATTERN += (kE.xyw * E).xyxz;
+        temp.x = kEx * E;
+        temp.y = kEy * E;
+        temp.z = kEw * E;
+        PATTERN.x += temp.x;
+        PATTERN.y += temp.y;
+        PATTERN.z += temp.x;
+        PATTERN.w += temp.z;
+
+        // PATTERN.xw += kB.xw * B;
+        PATTERN.x += kBx * B;
+        PATTERN.w += kBw * B;
+
+        // PATTERN.xz += kF.xz * F;
+        PATTERN.x += kFx * F;
+        PATTERN.z += kFz * F;
+
+        // Determine which of four types of pixels we are on.
+        int2 alternate;
+        alternate.x = (x + firstRed.x) % 2;
+        alternate.y = (y + firstRed.y) % 2;
+
+        // in BGR sequence;
+        uchar3 pixelColor =
+            (alternate.y == 0) ?
+                ((alternate.x == 0) ?
+                    make_uchar3(saturate_cast<uchar>(PATTERN.y), saturate_cast<uchar>(PATTERN.x), saturate_cast<uchar>(C)) :
+                    make_uchar3(saturate_cast<uchar>(PATTERN.w), saturate_cast<uchar>(C), saturate_cast<uchar>(PATTERN.z))) :
+                ((alternate.x == 0) ?
+                    make_uchar3(saturate_cast<uchar>(PATTERN.z), saturate_cast<uchar>(C), saturate_cast<uchar>(PATTERN.w)) :
+                    make_uchar3(saturate_cast<uchar>(C), saturate_cast<uchar>(PATTERN.x), saturate_cast<uchar>(PATTERN.y)));
+
+        dst(y, x) = toDst<DstType>(pixelColor);
+    }
+
+    template <int cn>
+    void MHCdemosaic(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream)
+    {
+        typedef typename TypeVec<uchar, cn>::vec_type dst_t;
+
+        const dim3 block(32, 8);
+        const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
+
+        bindTexture(&sourceTex, src);
+
+        MHCdemosaic<dst_t><<<grid, block, 0, stream>>>((PtrStepSz<dst_t>)dst, sourceOffset, firstRed);
+        cudaSafeCall( cudaGetLastError() );
+
+        if (stream == 0)
+            cudaSafeCall( cudaDeviceSynchronize() );
+    }
+
+    template void MHCdemosaic<1>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream);
+    template void MHCdemosaic<3>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream);
+    template void MHCdemosaic<4>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream);
+}}}
+
+#endif /* CUDA_DISABLER */
index 27fb61f..5165b35 100644 (file)
@@ -2284,15 +2284,18 @@ namespace arithm
 
     template void bitScalarAnd<uchar>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
     template void bitScalarAnd<ushort>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
-    template void bitScalarAnd<uint>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
+    template void bitScalarAnd<int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
+    template void bitScalarAnd<unsigned int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
 
     template void bitScalarOr<uchar>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
     template void bitScalarOr<ushort>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
-    template void bitScalarOr<uint>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
+    template void bitScalarOr<int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
+    template void bitScalarOr<unsigned int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
 
     template void bitScalarXor<uchar>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
     template void bitScalarXor<ushort>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
-    template void bitScalarXor<uint>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
+    template void bitScalarXor<int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
+    template void bitScalarXor<unsigned int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
 }
 
 //////////////////////////////////////////////////////////////////////////
index 3d6cde3..eedb313 100644 (file)
@@ -2280,11 +2280,11 @@ namespace
 {
     typedef void (*bit_scalar_func_t)(PtrStepSzb src1, unsigned int src2, PtrStepSzb dst, cudaStream_t stream);
 
-    template <bit_scalar_func_t func> struct BitScalar
+    template <typename T, bit_scalar_func_t func> struct BitScalar
     {
         static void call(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream)
         {
-            func(src, static_cast<unsigned int>(sc.val[0]), dst, stream);
+            func(src, saturate_cast<T>(sc.val[0]), dst, stream);
         }
     };
 
@@ -2292,14 +2292,12 @@ namespace
     {
         static void call(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream)
         {
-            Scalar_<unsigned int> isc = sc;
-
             unsigned int packedVal = 0;
 
-            packedVal |= (isc.val[0] & 0xffff);
-            packedVal |= (isc.val[1] & 0xffff) << 8;
-            packedVal |= (isc.val[2] & 0xffff) << 16;
-            packedVal |= (isc.val[3] & 0xffff) << 24;
+            packedVal |= (saturate_cast<unsigned char>(sc.val[0]) & 0xffff);
+            packedVal |= (saturate_cast<unsigned char>(sc.val[1]) & 0xffff) << 8;
+            packedVal |= (saturate_cast<unsigned char>(sc.val[2]) & 0xffff) << 16;
+            packedVal |= (saturate_cast<unsigned char>(sc.val[3]) & 0xffff) << 24;
 
             func(src, packedVal, dst, stream);
         }
@@ -2330,7 +2328,7 @@ namespace
             oSizeROI.width = src.cols;
             oSizeROI.height = src.rows;
 
-            const npp_t pConstants[] = {static_cast<npp_t>(sc.val[0]), static_cast<npp_t>(sc.val[1]), static_cast<npp_t>(sc.val[2]), static_cast<npp_t>(sc.val[3])};
+            const npp_t pConstants[] = {saturate_cast<npp_t>(sc.val[0]), saturate_cast<npp_t>(sc.val[1]), saturate_cast<npp_t>(sc.val[2]), saturate_cast<npp_t>(sc.val[3])};
 
             nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step), pConstants, dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI) );
 
@@ -2350,7 +2348,7 @@ namespace
             oSizeROI.width = src.cols;
             oSizeROI.height = src.rows;
 
-            nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step), static_cast<npp_t>(sc.val[0]), dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI) );
+            nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step), saturate_cast<npp_t>(sc.val[0]), dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI) );
 
             if (stream == 0)
                 cudaSafeCall( cudaDeviceSynchronize() );
@@ -2365,11 +2363,11 @@ void cv::gpu::bitwise_and(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stre
     typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream);
     static const func_t funcs[5][4] =
     {
-        {BitScalar< bitScalarAnd<unsigned char> >::call , 0, NppBitwiseC<CV_8U , 3, nppiAndC_8u_C3R >::call, BitScalar4< bitScalarAnd<unsigned int> >::call},
+        {BitScalar<unsigned char, bitScalarAnd<unsigned char> >::call  , 0, NppBitwiseC<CV_8U , 3, nppiAndC_8u_C3R >::call, BitScalar4< bitScalarAnd<unsigned int> >::call},
         {0,0,0,0},
-        {BitScalar< bitScalarAnd<unsigned short> >::call, 0, NppBitwiseC<CV_16U, 3, nppiAndC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiAndC_16u_C4R>::call},
+        {BitScalar<unsigned short, bitScalarAnd<unsigned short> >::call, 0, NppBitwiseC<CV_16U, 3, nppiAndC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiAndC_16u_C4R>::call},
         {0,0,0,0},
-        {BitScalar< bitScalarAnd<unsigned int> >::call  , 0, NppBitwiseC<CV_32S, 3, nppiAndC_32s_C3R>::call, NppBitwiseC<CV_32S, 4, nppiAndC_32s_C4R>::call}
+        {BitScalar<int, bitScalarAnd<int> >::call                      , 0, NppBitwiseC<CV_32S, 3, nppiAndC_32s_C3R>::call, NppBitwiseC<CV_32S, 4, nppiAndC_32s_C4R>::call}
     };
 
     const int depth = src.depth();
@@ -2390,11 +2388,11 @@ void cv::gpu::bitwise_or(const GpuMat& src, const Scalar& sc, GpuMat& dst, Strea
     typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream);
     static const func_t funcs[5][4] =
     {
-        {BitScalar< bitScalarOr<unsigned char> >::call , 0, NppBitwiseC<CV_8U , 3, nppiOrC_8u_C3R >::call, BitScalar4< bitScalarOr<unsigned int> >::call},
+        {BitScalar<unsigned char, bitScalarOr<unsigned char> >::call  , 0, NppBitwiseC<CV_8U , 3, nppiOrC_8u_C3R >::call, BitScalar4< bitScalarOr<unsigned int> >::call},
         {0,0,0,0},
-        {BitScalar< bitScalarOr<unsigned short> >::call, 0, NppBitwiseC<CV_16U, 3, nppiOrC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiOrC_16u_C4R>::call},
+        {BitScalar<unsigned short, bitScalarOr<unsigned short> >::call, 0, NppBitwiseC<CV_16U, 3, nppiOrC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiOrC_16u_C4R>::call},
         {0,0,0,0},
-        {BitScalar< bitScalarOr<unsigned int> >::call  , 0, NppBitwiseC<CV_32S, 3, nppiOrC_32s_C3R>::call, NppBitwiseC<CV_32S, 4, nppiOrC_32s_C4R>::call}
+        {BitScalar<int, bitScalarOr<int> >::call                      , 0, NppBitwiseC<CV_32S, 3, nppiOrC_32s_C3R>::call, NppBitwiseC<CV_32S, 4, nppiOrC_32s_C4R>::call}
     };
 
     const int depth = src.depth();
@@ -2415,11 +2413,11 @@ void cv::gpu::bitwise_xor(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stre
     typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream);
     static const func_t funcs[5][4] =
     {
-        {BitScalar< bitScalarXor<unsigned char> >::call , 0, NppBitwiseC<CV_8U , 3, nppiXorC_8u_C3R >::call, BitScalar4< bitScalarXor<unsigned int> >::call},
+        {BitScalar<unsigned char, bitScalarXor<unsigned char> >::call  , 0, NppBitwiseC<CV_8U , 3, nppiXorC_8u_C3R >::call, BitScalar4< bitScalarXor<unsigned int> >::call},
         {0,0,0,0},
-        {BitScalar< bitScalarXor<unsigned short> >::call, 0, NppBitwiseC<CV_16U, 3, nppiXorC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiXorC_16u_C4R>::call},
+        {BitScalar<unsigned short, bitScalarXor<unsigned short> >::call, 0, NppBitwiseC<CV_16U, 3, nppiXorC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiXorC_16u_C4R>::call},
         {0,0,0,0},
-        {BitScalar< bitScalarXor<unsigned int> >::call  , 0, NppBitwiseC<CV_32S, 3, nppiXorC_32s_C3R>::call, NppBitwiseC<CV_32S, 4, nppiXorC_32s_C4R>::call}
+        {BitScalar<int, bitScalarXor<int> >::call                      , 0, NppBitwiseC<CV_32S, 3, nppiXorC_32s_C3R>::call, NppBitwiseC<CV_32S, 4, nppiXorC_32s_C4R>::call}
     };
 
     const int depth = src.depth();
index ab16070..49230e6 100644 (file)
@@ -104,12 +104,12 @@ void cv::gpu::connectivityMask(const GpuMat& image, GpuMat& mask, const cv::Scal
 
 void cv::gpu::labelComponents(const GpuMat& mask, GpuMat& components, int flags, Stream& s)
 {
-    if (!TargetArchs::builtWith(SHARED_ATOMICS) || !DeviceInfo().supports(SHARED_ATOMICS))
-        CV_Error(CV_StsNotImplemented, "The device doesn't support shared atomics and communicative synchronization!");
     CV_Assert(!mask.empty() && mask.type() == CV_8U);
 
-    if (mask.size() != components.size() || components.type() != CV_32SC1)
-        components.create(mask.size(), CV_32SC1);
+    if (!deviceSupports(SHARED_ATOMICS))
+        CV_Error(CV_StsNotImplemented, "The device doesn't support shared atomics and communicative synchronization!");
+
+    components.create(mask.size(), CV_32SC1);
 
     cudaStream_t stream = StreamAccessor::getStream(s);
     device::ccl::labelComponents(mask, components, flags, stream);
index 3184717..d9ca468 100644 (file)
@@ -522,6 +522,7 @@ void cv::gpu::rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, d
     CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC);
 
     dst.create(dsize, src.type());
+    dst.setTo(Scalar::all(0));
 
     funcs[src.depth()][src.channels() - 1](src, dst, dsize, angle, xShift, yShift, interpolation, StreamAccessor::getStream(stream));
 }
index 39c0b5c..65b05b7 100644 (file)
@@ -382,6 +382,7 @@ void cv::gpu::meanShiftSegmentation(const GpuMat& src, Mat& dst, int sp, int sr,
             dstcol[0] = static_cast<uchar>(sumcol[0] / comps.size[parent]);
             dstcol[1] = static_cast<uchar>(sumcol[1] / comps.size[parent]);
             dstcol[2] = static_cast<uchar>(sumcol[2] / comps.size[parent]);
+            dstcol[3] = 255;
         }
     }
 }
index 148bcb5..ffc035c 100644 (file)
@@ -209,6 +209,8 @@ void cv::gpu::PyrLKOpticalFlow::dense(const GpuMat& prevImg, const GpuMat& nextI
     ensureSizeIsEnough(prevImg.size(), CV_32FC1, vPyr_[0]);
     ensureSizeIsEnough(prevImg.size(), CV_32FC1, uPyr_[1]);
     ensureSizeIsEnough(prevImg.size(), CV_32FC1, vPyr_[1]);
+    uPyr_[0].setTo(Scalar::all(0));
+    vPyr_[0].setTo(Scalar::all(0));
     uPyr_[1].setTo(Scalar::all(0));
     vPyr_[1].setTo(Scalar::all(0));
 
index 0fb19ad..827d521 100644 (file)
@@ -232,10 +232,8 @@ void cv::gpu::warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsiz
     };
 
     bool useNpp = borderMode == BORDER_CONSTANT && ofs.x == 0 && ofs.y == 0 && useNppTab[src.depth()][src.channels() - 1][interpolation];
-    #ifdef linux
-        // NPP bug on float data
-        useNpp = useNpp && src.depth() != CV_32F;
-    #endif
+    // NPP bug on float data
+    useNpp = useNpp && src.depth() != CV_32F;
 
     if (useNpp)
     {
@@ -372,10 +370,8 @@ void cv::gpu::warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size
     };
 
     bool useNpp = borderMode == BORDER_CONSTANT && ofs.x == 0 && ofs.y == 0 && useNppTab[src.depth()][src.channels() - 1][interpolation];
-    #ifdef linux
-        // NPP bug on float data
-        useNpp = useNpp && src.depth() != CV_32F;
-    #endif
+    // NPP bug on float data
+    useNpp = useNpp && src.depth() != CV_32F;
 
     if (useNpp)
     {
index bac835e..ebf0a88 100644 (file)
@@ -207,11 +207,17 @@ INSTANTIATE_TEST_CASE_P(GPU_Video, MOG, testing::Combine(
 //////////////////////////////////////////////////////
 // MOG2
 
-PARAM_TEST_CASE(MOG2, cv::gpu::DeviceInfo, std::string, UseGray, UseRoi)
+namespace
+{
+    IMPLEMENT_PARAM_CLASS(DetectShadow, bool)
+}
+
+PARAM_TEST_CASE(MOG2, cv::gpu::DeviceInfo, std::string, UseGray, DetectShadow, UseRoi)
 {
     cv::gpu::DeviceInfo devInfo;
     std::string inputFile;
     bool useGray;
+    bool detectShadow;
     bool useRoi;
 
     virtual void SetUp()
@@ -220,10 +226,9 @@ PARAM_TEST_CASE(MOG2, cv::gpu::DeviceInfo, std::string, UseGray, UseRoi)
         cv::gpu::setDevice(devInfo.deviceID());
 
         inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "video/" + GET_PARAM(1);
-
         useGray = GET_PARAM(2);
-
-        useRoi = GET_PARAM(3);
+        detectShadow = GET_PARAM(3);
+        useRoi = GET_PARAM(4);
     }
 };
 
@@ -237,9 +242,11 @@ GPU_TEST_P(MOG2, Update)
     ASSERT_FALSE(frame.empty());
 
     cv::gpu::MOG2_GPU mog2;
+    mog2.bShadowDetection = detectShadow;
     cv::gpu::GpuMat foreground = createMat(frame.size(), CV_8UC1, useRoi);
 
     cv::BackgroundSubtractorMOG2 mog2_gold;
+    mog2_gold.set("detectShadows", detectShadow);
     cv::Mat foreground_gold;
 
     for (int i = 0; i < 10; ++i)
@@ -258,11 +265,14 @@ GPU_TEST_P(MOG2, Update)
 
         mog2_gold(frame, foreground_gold);
 
-        double norm = cv::norm(foreground_gold, cv::Mat(foreground), cv::NORM_L1);
-
-        norm /= foreground_gold.size().area();
-
-        ASSERT_LE(norm, 0.09);
+        if (detectShadow)
+        {
+            ASSERT_MAT_SIMILAR(foreground_gold, foreground, 1e-2);
+        }
+        else
+        {
+            ASSERT_MAT_NEAR(foreground_gold, foreground, 0);
+        }
     }
 }
 
@@ -277,9 +287,11 @@ GPU_TEST_P(MOG2, getBackgroundImage)
     cv::Mat frame;
 
     cv::gpu::MOG2_GPU mog2;
+    mog2.bShadowDetection = detectShadow;
     cv::gpu::GpuMat foreground;
 
     cv::BackgroundSubtractorMOG2 mog2_gold;
+    mog2_gold.set("detectShadows", detectShadow);
     cv::Mat foreground_gold;
 
     for (int i = 0; i < 10; ++i)
@@ -305,6 +317,7 @@ INSTANTIATE_TEST_CASE_P(GPU_Video, MOG2, testing::Combine(
     ALL_DEVICES,
     testing::Values(std::string("768x576.avi")),
     testing::Values(UseGray(true), UseGray(false)),
+    testing::Values(DetectShadow(true), DetectShadow(false)),
     WHOLE_SUBMAT));
 
 //////////////////////////////////////////////////////
index 5aee14d..3657107 100644 (file)
@@ -2218,6 +2218,70 @@ GPU_TEST_P(CvtColor, BayerGR2BGR4)
     EXPECT_MAT_NEAR(dst_gold(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), dst3(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), 0);
 }
 
+GPU_TEST_P(CvtColor, BayerBG2Gray)
+{
+    if ((depth != CV_8U && depth != CV_16U) || useRoi)
+        return;
+
+    cv::Mat src = randomMat(size, depth);
+
+    cv::gpu::GpuMat dst;
+    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BayerBG2GRAY);
+
+    cv::Mat dst_gold;
+    cv::cvtColor(src, dst_gold, cv::COLOR_BayerBG2GRAY);
+
+    EXPECT_MAT_NEAR(dst_gold(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), dst(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), 2);
+}
+
+GPU_TEST_P(CvtColor, BayerGB2Gray)
+{
+    if ((depth != CV_8U && depth != CV_16U) || useRoi)
+        return;
+
+    cv::Mat src = randomMat(size, depth);
+
+    cv::gpu::GpuMat dst;
+    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BayerGB2GRAY);
+
+    cv::Mat dst_gold;
+    cv::cvtColor(src, dst_gold, cv::COLOR_BayerGB2GRAY);
+
+    EXPECT_MAT_NEAR(dst_gold(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), dst(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), 2);
+}
+
+GPU_TEST_P(CvtColor, BayerRG2Gray)
+{
+    if ((depth != CV_8U && depth != CV_16U) || useRoi)
+        return;
+
+    cv::Mat src = randomMat(size, depth);
+
+    cv::gpu::GpuMat dst;
+    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BayerRG2GRAY);
+
+    cv::Mat dst_gold;
+    cv::cvtColor(src, dst_gold, cv::COLOR_BayerRG2GRAY);
+
+    EXPECT_MAT_NEAR(dst_gold(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), dst(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), 2);
+}
+
+GPU_TEST_P(CvtColor, BayerGR2Gray)
+{
+    if ((depth != CV_8U && depth != CV_16U) || useRoi)
+        return;
+
+    cv::Mat src = randomMat(size, depth);
+
+    cv::gpu::GpuMat dst;
+    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BayerGR2GRAY);
+
+    cv::Mat dst_gold;
+    cv::cvtColor(src, dst_gold, cv::COLOR_BayerGR2GRAY);
+
+    EXPECT_MAT_NEAR(dst_gold(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), dst(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), 2);
+}
+
 INSTANTIATE_TEST_CASE_P(GPU_ImgProc, CvtColor, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
@@ -2225,6 +2289,175 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, CvtColor, testing::Combine(
     WHOLE_SUBMAT));
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
+// Demosaicing
+
+struct Demosaicing : testing::TestWithParam<cv::gpu::DeviceInfo>
+{
+    cv::gpu::DeviceInfo devInfo;
+
+    virtual void SetUp()
+    {
+        devInfo = GetParam();
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+
+    static void mosaic(const cv::Mat_<cv::Vec3b>& src, cv::Mat_<uchar>& dst, cv::Point firstRed)
+    {
+        dst.create(src.size());
+
+        for (int y = 0; y < src.rows; ++y)
+        {
+            for (int x = 0; x < src.cols; ++x)
+            {
+                cv::Vec3b pix = src(y, x);
+
+                cv::Point alternate;
+                alternate.x = (x + firstRed.x) % 2;
+                alternate.y = (y + firstRed.y) % 2;
+
+                if (alternate.y == 0)
+                {
+                    if (alternate.x == 0)
+                    {
+                        // RG
+                        // GB
+                        dst(y, x) = pix[2];
+                    }
+                    else
+                    {
+                        // GR
+                        // BG
+                        dst(y, x) = pix[1];
+                    }
+                }
+                else
+                {
+                    if (alternate.x == 0)
+                    {
+                        // GB
+                        // RG
+                        dst(y, x) = pix[1];
+                    }
+                    else
+                    {
+                        // BG
+                        // GR
+                        dst(y, x) = pix[0];
+                    }
+                }
+            }
+        }
+    }
+};
+
+GPU_TEST_P(Demosaicing, BayerBG2BGR)
+{
+    cv::Mat img = readImage("stereobm/aloe-L.png");
+
+    cv::Mat_<uchar> src;
+    mosaic(img, src, cv::Point(1, 1));
+
+    cv::gpu::GpuMat dst;
+    cv::gpu::demosaicing(loadMat(src), dst, cv::COLOR_BayerBG2BGR);
+
+    EXPECT_MAT_SIMILAR(img, dst, 2e-2);
+}
+
+GPU_TEST_P(Demosaicing, BayerGB2BGR)
+{
+    cv::Mat img = readImage("stereobm/aloe-L.png");
+
+    cv::Mat_<uchar> src;
+    mosaic(img, src, cv::Point(0, 1));
+
+    cv::gpu::GpuMat dst;
+    cv::gpu::demosaicing(loadMat(src), dst, cv::COLOR_BayerGB2BGR);
+
+    EXPECT_MAT_SIMILAR(img, dst, 2e-2);
+}
+
+GPU_TEST_P(Demosaicing, BayerRG2BGR)
+{
+    cv::Mat img = readImage("stereobm/aloe-L.png");
+
+    cv::Mat_<uchar> src;
+    mosaic(img, src, cv::Point(0, 0));
+
+    cv::gpu::GpuMat dst;
+    cv::gpu::demosaicing(loadMat(src), dst, cv::COLOR_BayerRG2BGR);
+
+    EXPECT_MAT_SIMILAR(img, dst, 2e-2);
+}
+
+GPU_TEST_P(Demosaicing, BayerGR2BGR)
+{
+    cv::Mat img = readImage("stereobm/aloe-L.png");
+
+    cv::Mat_<uchar> src;
+    mosaic(img, src, cv::Point(1, 0));
+
+    cv::gpu::GpuMat dst;
+    cv::gpu::demosaicing(loadMat(src), dst, cv::COLOR_BayerGR2BGR);
+
+    EXPECT_MAT_SIMILAR(img, dst, 2e-2);
+}
+
+GPU_TEST_P(Demosaicing, BayerBG2BGR_MHT)
+{
+    cv::Mat img = readImage("stereobm/aloe-L.png");
+
+    cv::Mat_<uchar> src;
+    mosaic(img, src, cv::Point(1, 1));
+
+    cv::gpu::GpuMat dst;
+    cv::gpu::demosaicing(loadMat(src), dst, cv::gpu::COLOR_BayerBG2BGR_MHT);
+
+    EXPECT_MAT_SIMILAR(img, dst, 5e-3);
+}
+
+GPU_TEST_P(Demosaicing, BayerGB2BGR_MHT)
+{
+    cv::Mat img = readImage("stereobm/aloe-L.png");
+
+    cv::Mat_<uchar> src;
+    mosaic(img, src, cv::Point(0, 1));
+
+    cv::gpu::GpuMat dst;
+    cv::gpu::demosaicing(loadMat(src), dst, cv::gpu::COLOR_BayerGB2BGR_MHT);
+
+    EXPECT_MAT_SIMILAR(img, dst, 5e-3);
+}
+
+GPU_TEST_P(Demosaicing, BayerRG2BGR_MHT)
+{
+    cv::Mat img = readImage("stereobm/aloe-L.png");
+
+    cv::Mat_<uchar> src;
+    mosaic(img, src, cv::Point(0, 0));
+
+    cv::gpu::GpuMat dst;
+    cv::gpu::demosaicing(loadMat(src), dst, cv::gpu::COLOR_BayerRG2BGR_MHT);
+
+    EXPECT_MAT_SIMILAR(img, dst, 5e-3);
+}
+
+GPU_TEST_P(Demosaicing, BayerGR2BGR_MHT)
+{
+    cv::Mat img = readImage("stereobm/aloe-L.png");
+
+    cv::Mat_<uchar> src;
+    mosaic(img, src, cv::Point(1, 0));
+
+    cv::gpu::GpuMat dst;
+    cv::gpu::demosaicing(loadMat(src), dst, cv::gpu::COLOR_BayerGR2BGR_MHT);
+
+    EXPECT_MAT_SIMILAR(img, dst, 5e-3);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Demosaicing, ALL_DEVICES);
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////
 // swapChannels
 
 PARAM_TEST_CASE(SwapChannels, cv::gpu::DeviceInfo, cv::Size, UseRoi)
index 736256c..affc306 100644 (file)
@@ -1873,7 +1873,7 @@ PARAM_TEST_CASE(Bitwise_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channel
         cv::gpu::setDevice(devInfo.deviceID());
 
         src = randomMat(size, CV_MAKE_TYPE(depth, channels));
-        cv::Scalar_<int> ival = randomScalar(0.0, 255.0);
+        cv::Scalar_<int> ival = randomScalar(0.0, std::numeric_limits<int>::max());
         val = ival;
     }
 };
index 9044e5b..b348331 100644 (file)
@@ -130,7 +130,7 @@ GPU_TEST_P(HoughCircles, Accuracy)
     const bool useRoi = GET_PARAM(2);
 
     const float dp = 2.0f;
-    const float minDist = 10.0f;
+    const float minDist = 0.0f;
     const int minRadius = 10;
     const int maxRadius = 20;
     const int cannyThreshold = 100;
@@ -163,7 +163,7 @@ GPU_TEST_P(HoughCircles, Accuracy)
         {
             cv::Vec3f gold = circles_gold[j];
 
-            if (std::fabs(cur[0] - gold[0]) < minDist && std::fabs(cur[1] - gold[1]) < minDist && std::fabs(cur[2] - gold[2]) < minDist)
+            if (std::fabs(cur[0] - gold[0]) < 5 && std::fabs(cur[1] - gold[1]) < 5 && std::fabs(cur[2] - gold[2]) < 5)
             {
                 found = true;
                 break;
index de8bc5d..fe3a135 100644 (file)
@@ -252,6 +252,8 @@ PARAM_TEST_CASE(WarpAffineNPP, cv::gpu::DeviceInfo, MatType, Inverse, Interpolat
 GPU_TEST_P(WarpAffineNPP, Accuracy)
 {
     cv::Mat src = readImageType("stereobp/aloe-L.png", type);
+    ASSERT_FALSE(src.empty());
+
     cv::Mat M = createTransfomMatrix(src.size(), CV_PI / 4);
     int flags = interpolation;
     if (inverse)
index 534edc0..dd2054a 100644 (file)
@@ -255,6 +255,8 @@ PARAM_TEST_CASE(WarpPerspectiveNPP, cv::gpu::DeviceInfo, MatType, Inverse, Inter
 GPU_TEST_P(WarpPerspectiveNPP, Accuracy)
 {
     cv::Mat src = readImageType("stereobp/aloe-L.png", type);
+    ASSERT_FALSE(src.empty());
+
     cv::Mat M = createTransfomMatrix(src.size(), CV_PI / 4);
     int flags = interpolation;
     if (inverse)
index 8474d5e..7e5fae3 100644 (file)
@@ -282,7 +282,7 @@ if(WIN32 AND WITH_FFMPEG)
                        COMMAND ${CMAKE_COMMAND} -E copy "${ffmpeg_path}" "${EXECUTABLE_OUTPUT_PATH}/Release/${ffmpeg_bare_name_ver}"
                        COMMAND ${CMAKE_COMMAND} -E copy "${ffmpeg_path}" "${EXECUTABLE_OUTPUT_PATH}/Debug/${ffmpeg_bare_name_ver}"
                        COMMENT "Copying ${ffmpeg_path} to the output directory")
-  elseif(MSVC)
+  elseif(MSVC AND (CMAKE_GENERATOR MATCHES "Visual"))
     add_custom_command(TARGET ${the_module} POST_BUILD
                        COMMAND ${CMAKE_COMMAND} -E copy "${ffmpeg_path}" "${EXECUTABLE_OUTPUT_PATH}/${CMAKE_BUILD_TYPE}/${ffmpeg_bare_name_ver}"
                        COMMENT "Copying ${ffmpeg_path} to the output directory")
index 0e4f0a2..ba25af9 100644 (file)
@@ -309,8 +309,22 @@ enum
     // alpha premultiplication
     CV_RGBA2mRGBA = 125,
     CV_mRGBA2RGBA = 126,
-
-    CV_COLORCVT_MAX  = 127
+    
+    CV_RGB2YUV_I420 = 127,
+    CV_BGR2YUV_I420 = 128,
+    CV_RGB2YUV_IYUV = CV_RGB2YUV_I420,
+    CV_BGR2YUV_IYUV = CV_BGR2YUV_I420,
+
+    CV_RGBA2YUV_I420 = 129,
+    CV_BGRA2YUV_I420 = 130,
+    CV_RGBA2YUV_IYUV = CV_RGBA2YUV_I420,
+    CV_BGRA2YUV_IYUV = CV_BGRA2YUV_I420,
+    CV_RGB2YUV_YV12  = 131,
+    CV_BGR2YUV_YV12  = 132,
+    CV_RGBA2YUV_YV12 = 133,
+    CV_BGRA2YUV_YV12 = 134,
+
+    CV_COLORCVT_MAX  = 135
 };
 
 
index fd1d045..446212e 100644 (file)
@@ -115,6 +115,9 @@ CV_ENUM(CvtMode2, CV_YUV2BGR_NV12, CV_YUV2BGRA_NV12, CV_YUV2RGB_NV12, CV_YUV2RGB
                   COLOR_YUV2GRAY_420, CV_YUV2RGB_UYVY, CV_YUV2BGR_UYVY, CV_YUV2RGBA_UYVY, CV_YUV2BGRA_UYVY, CV_YUV2RGB_YUY2, CV_YUV2BGR_YUY2, CV_YUV2RGB_YVYU,
                   CV_YUV2BGR_YVYU, CV_YUV2RGBA_YUY2, CV_YUV2BGRA_YUY2, CV_YUV2RGBA_YVYU, CV_YUV2BGRA_YVYU)
 
+CV_ENUM(CvtMode3, CV_RGB2YUV_IYUV, CV_BGR2YUV_IYUV, CV_RGBA2YUV_IYUV, CV_BGRA2YUV_IYUV,
+                  CV_RGB2YUV_YV12, CV_BGR2YUV_YV12, CV_RGBA2YUV_YV12, CV_BGRA2YUV_YV12)
+
 struct ChPair
 {
     ChPair(int _scn, int _dcn): scn(_scn), dcn(_dcn) {}
@@ -162,6 +165,8 @@ ChPair getConversionInfo(int cvtMode)
     case CV_BGR5652BGRA: case CV_BGR5652RGBA:
         return ChPair(2,4);
     case CV_BGR2GRAY: case CV_RGB2GRAY:
+    case CV_RGB2YUV_IYUV: case CV_RGB2YUV_YV12:
+    case CV_BGR2YUV_IYUV: case CV_BGR2YUV_YV12:
         return ChPair(3,1);
     case CV_BGR2BGR555: case CV_BGR2BGR565:
     case CV_RGB2BGR555: case CV_RGB2BGR565:
@@ -204,6 +209,8 @@ ChPair getConversionInfo(int cvtMode)
     case CX_YUV2BGRA: case CX_YUV2RGBA:
         return ChPair(3,4);
     case CV_BGRA2GRAY: case CV_RGBA2GRAY:
+    case CV_RGBA2YUV_IYUV: case CV_RGBA2YUV_YV12:
+    case CV_BGRA2YUV_IYUV: case CV_BGRA2YUV_YV12:
         return ChPair(4,1);
     case CV_BGRA2BGR555: case CV_BGRA2BGR565:
     case CV_RGBA2BGR555: case CV_RGBA2BGR565:
@@ -306,3 +313,28 @@ PERF_TEST_P(Size_CvtMode2, cvtColorYUV420,
 
     SANITY_CHECK(dst, 1);
 }
+
+typedef std::tr1::tuple<Size, CvtMode3> Size_CvtMode3_t;
+typedef perf::TestBaseWithParam<Size_CvtMode3_t> Size_CvtMode3;
+
+PERF_TEST_P(Size_CvtMode3, cvtColorRGB2YUV420p,
+            testing::Combine(
+                testing::Values(szVGA, sz720p, sz1080p, Size(130, 60)),
+                testing::ValuesIn(CvtMode3::all())
+                )
+            )
+{
+    Size sz = get<0>(GetParam());
+    int mode = get<1>(GetParam());
+    ChPair ch = getConversionInfo(mode);
+
+    Mat src(sz, CV_8UC(ch.scn));
+    Mat dst(sz.height + sz.height / 2, sz.width, CV_8UC(ch.dcn));
+
+    declare.time(100);
+    declare.in(src, WARMUP_RNG).out(dst);
+
+    TEST_CYCLE() cvtColor(src, dst, mode, ch.dcn);
+
+    SANITY_CHECK(dst, 1);
+}
index e85acea..3799d43 100644 (file)
@@ -2744,6 +2744,16 @@ const int ITUR_BT_601_CVG = -852492;
 const int ITUR_BT_601_CVR = 1673527;
 const int ITUR_BT_601_SHIFT = 20;
 
+// Coefficients for RGB to YUV420p conversion
+const int ITUR_BT_601_CRY =  269484;
+const int ITUR_BT_601_CGY =  528482;
+const int ITUR_BT_601_CBY =  102760;
+const int ITUR_BT_601_CRU = -155188;
+const int ITUR_BT_601_CGU = -305135;
+const int ITUR_BT_601_CBU =  460324;
+const int ITUR_BT_601_CGV = -385875;
+const int ITUR_BT_601_CBV = -74448;
+
 template<int bIdx, int uIdx>
 struct YUV420sp2RGB888Invoker
 {
@@ -3076,6 +3086,84 @@ inline void cvtYUV420p2RGBA(Mat& _dst, int _stride, const uchar* _y1, const ucha
         converter(BlockedRange(0, _dst.rows/2));
 }
 
+///////////////////////////////////// RGB -> YUV420p /////////////////////////////////////
+
+template<int bIdx>
+struct RGB888toYUV420pInvoker: public ParallelLoopBody
+{
+    RGB888toYUV420pInvoker( const Mat& src, Mat* dst, const int uIdx )
+        : src_(src),
+          dst_(dst),
+          uIdx_(uIdx) { }
+
+    void operator()(const Range& rowRange) const
+    {
+        const int w = src_.cols;
+        const int h = src_.rows;
+
+        const int cn = src_.channels();
+        for( int i = rowRange.start; i < rowRange.end; i++ )
+        {
+            const uchar* row0 = src_.ptr<uchar>(2 * i);
+            const uchar* row1 = src_.ptr<uchar>(2 * i + 1);
+
+            uchar* y = dst_->ptr<uchar>(2*i);
+            uchar* u = dst_->ptr<uchar>(h + i/2) + (i % 2) * (w/2);
+            uchar* v = dst_->ptr<uchar>(h + (i + h/2)/2) + ((i + h/2) % 2) * (w/2);
+            if( uIdx_ == 2 ) std::swap(u, v);
+
+            for( int j = 0, k = 0; j < w * cn; j += 2 * cn, k++ )
+            {
+                int r00 = row0[2-bIdx + j];      int g00 = row0[1 + j];      int b00 = row0[bIdx + j];
+                int r01 = row0[2-bIdx + cn + j]; int g01 = row0[1 + cn + j]; int b01 = row0[bIdx + cn + j];
+                int r10 = row1[2-bIdx + j];      int g10 = row1[1 + j];      int b10 = row1[bIdx + j];
+                int r11 = row1[2-bIdx + cn + j]; int g11 = row1[1 + cn + j]; int b11 = row1[bIdx + cn + j];
+
+                const int shifted16 = (16 << ITUR_BT_601_SHIFT);
+                const int halfShift = (1 << (ITUR_BT_601_SHIFT - 1));
+                int y00 = ITUR_BT_601_CRY * r00 + ITUR_BT_601_CGY * g00 + ITUR_BT_601_CBY * b00 + halfShift + shifted16;
+                int y01 = ITUR_BT_601_CRY * r01 + ITUR_BT_601_CGY * g01 + ITUR_BT_601_CBY * b01 + halfShift + shifted16;
+                int y10 = ITUR_BT_601_CRY * r10 + ITUR_BT_601_CGY * g10 + ITUR_BT_601_CBY * b10 + halfShift + shifted16;
+                int y11 = ITUR_BT_601_CRY * r11 + ITUR_BT_601_CGY * g11 + ITUR_BT_601_CBY * b11 + halfShift + shifted16;
+
+                y[2*k + 0]            = saturate_cast<uchar>(y00 >> ITUR_BT_601_SHIFT);
+                y[2*k + 1]            = saturate_cast<uchar>(y01 >> ITUR_BT_601_SHIFT);
+                y[2*k + dst_->step + 0] = saturate_cast<uchar>(y10 >> ITUR_BT_601_SHIFT);
+                y[2*k + dst_->step + 1] = saturate_cast<uchar>(y11 >> ITUR_BT_601_SHIFT);
+
+                const int shifted128 = (128 << ITUR_BT_601_SHIFT);
+                int u00 = ITUR_BT_601_CRU * r00 + ITUR_BT_601_CGU * g00 + ITUR_BT_601_CBU * b00 + halfShift + shifted128;
+                int v00 = ITUR_BT_601_CBU * r00 + ITUR_BT_601_CGV * g00 + ITUR_BT_601_CBV * b00 + halfShift + shifted128;
+
+                u[k] = saturate_cast<uchar>(u00 >> ITUR_BT_601_SHIFT);
+                v[k] = saturate_cast<uchar>(v00 >> ITUR_BT_601_SHIFT);
+            }
+        }
+    }
+
+    static bool isFit( const Mat& src )
+    {
+        return (src.total() >= 320*240);
+    }
+
+private:
+    RGB888toYUV420pInvoker& operator=(const RGB888toYUV420pInvoker&);
+
+    const Mat& src_;
+    Mat* const dst_;
+    const int uIdx_;
+};
+
+template<int bIdx, int uIdx>
+static void cvtRGBtoYUV420p(const Mat& src, Mat& dst)
+{
+    RGB888toYUV420pInvoker<bIdx> colorConverter(src, &dst, uIdx);
+    if( RGB888toYUV420pInvoker<bIdx>::isFit(src) )
+        parallel_for_(Range(0, src.rows/2), colorConverter);
+    else
+        colorConverter(Range(0, src.rows/2));
+}
+
 ///////////////////////////////////// YUV422 -> RGB /////////////////////////////////////
 
 template<int bIdx, int uIdx, int yIdx>
@@ -3713,6 +3801,31 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
                 src(Range(0, dstSz.height), Range::all()).copyTo(dst);
             }
             break;
+        case CV_RGB2YUV_YV12: case CV_BGR2YUV_YV12: case CV_RGBA2YUV_YV12: case CV_BGRA2YUV_YV12:
+        case CV_RGB2YUV_IYUV: case CV_BGR2YUV_IYUV: case CV_RGBA2YUV_IYUV: case CV_BGRA2YUV_IYUV:
+            {
+                if (dcn <= 0) dcn = 1;
+                const int bIdx = (code == CV_BGR2YUV_IYUV || code == CV_BGRA2YUV_IYUV || code == CV_BGR2YUV_YV12 || code == CV_BGRA2YUV_YV12) ? 0 : 2;
+                const int uIdx = (code == CV_BGR2YUV_IYUV || code == CV_BGRA2YUV_IYUV || code == CV_RGB2YUV_IYUV || code == CV_RGBA2YUV_IYUV) ? 1 : 2;
+
+                CV_Assert( (scn == 3 || scn == 4) && depth == CV_8U );
+                CV_Assert( dcn == 1 );
+                CV_Assert( sz.width % 2 == 0 && sz.height % 2 == 0 );
+
+                Size dstSz(sz.width, sz.height / 2 * 3);
+                _dst.create(dstSz, CV_MAKETYPE(depth, dcn));
+                dst = _dst.getMat();
+
+                switch(bIdx + uIdx*10)
+                {
+                    case 10: cvtRGBtoYUV420p<0, 1>(src, dst); break;
+                    case 12: cvtRGBtoYUV420p<2, 1>(src, dst); break;
+                    case 20: cvtRGBtoYUV420p<0, 2>(src, dst); break;
+                    case 22: cvtRGBtoYUV420p<2, 2>(src, dst); break;
+                    default: CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" ); break;
+                };
+            }
+            break;
         case CV_YUV2RGB_UYVY: case CV_YUV2BGR_UYVY: case CV_YUV2RGBA_UYVY: case CV_YUV2BGRA_UYVY:
         case CV_YUV2RGB_YUY2: case CV_YUV2BGR_YUY2: case CV_YUV2RGB_YVYU: case CV_YUV2BGR_YVYU:
         case CV_YUV2RGBA_YUY2: case CV_YUV2BGRA_YUY2: case CV_YUV2RGBA_YVYU: case CV_YUV2BGRA_YVYU:
@@ -3795,7 +3908,7 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
                     CV_Error( CV_StsBadArg, "Unsupported image depth" );
                 }
             }
-            break;
+            break;   
         default:
             CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" );
     }
index 1a2ab01..61bc9bd 100644 (file)
@@ -30,6 +30,16 @@ public:
     static YUVreader* getReader(int code);
 };
 
+class RGBreader
+{
+public:
+    virtual ~RGBreader() {}
+    virtual RGB read(const Mat& rgb, int row, int col) = 0;
+    virtual int channels() = 0;
+
+    static RGBreader* getReader(int code);
+};
+
 class RGBwriter
 {
 public:
@@ -56,6 +66,21 @@ public:
     static GRAYwriter* getWriter(int code);
 };
 
+class YUVwriter
+{
+public:
+    virtual ~YUVwriter() {}
+
+    virtual void write(Mat& yuv, int row, int col, const YUV& val) = 0;
+    virtual int channels() = 0;
+    virtual Size size(Size imgSize) = 0;
+
+    virtual bool requiresEvenHeight() { return true; }
+    virtual bool requiresEvenWidth() { return true; }
+
+    static YUVwriter* getWriter(int code);
+};
+
 class RGB888Writer : public RGBwriter
 {
     void write(Mat& rgb, int row, int col, const RGB& val)
@@ -99,6 +124,42 @@ class BGRA8888Writer : public RGBwriter
     int channels() { return 4; }
 };
 
+class YUV420pWriter: public YUVwriter
+{
+    int channels() { return 1; }
+    Size size(Size imgSize) { return Size(imgSize.width, imgSize.height + imgSize.height/2); }
+};
+
+class YV12Writer: public YUV420pWriter
+{
+    void write(Mat& yuv, int row, int col, const YUV& val)
+    {
+        int h = yuv.rows * 2 / 3;
+
+        yuv.ptr<uchar>(row)[col] = val[0];
+        if( row % 2 == 0 && col % 2 == 0 )
+        {
+            yuv.ptr<uchar>(h + row/4)[col/2 + ((row/2) % 2) * (yuv.cols/2)] = val[2];
+            yuv.ptr<uchar>(h + (row/2 + h/2)/2)[col/2 + ((row/2 + h/2) % 2) * (yuv.cols/2)] = val[1];
+        }
+    }
+};
+
+class I420Writer: public YUV420pWriter
+{
+    void write(Mat& yuv, int row, int col, const YUV& val)
+    {
+        int h = yuv.rows * 2 / 3;
+
+        yuv.ptr<uchar>(row)[col] = val[0];
+        if( row % 2 == 0 && col % 2 == 0 )
+        {
+            yuv.ptr<uchar>(h + row/4)[col/2 + ((row/2) % 2) * (yuv.cols/2)] = val[1];
+            yuv.ptr<uchar>(h + (row/2 + h/2)/2)[col/2 + ((row/2 + h/2) % 2) * (yuv.cols/2)] = val[2];
+        }
+    }
+};
+
 class YUV420Reader: public YUVreader
 {
     int channels() { return 1; }
@@ -212,6 +273,49 @@ class YUV888Reader : public YUVreader
     bool requiresEvenWidth() { return false; }
 };
 
+class RGB888Reader : public RGBreader
+{
+    RGB read(const Mat& rgb, int row, int col)
+    {
+        return rgb.at<RGB>(row, col);
+    }
+
+    int channels() { return 3; }
+};
+
+class BGR888Reader : public RGBreader
+{
+    RGB read(const Mat& rgb, int row, int col)
+    {
+        RGB tmp = rgb.at<RGB>(row, col);
+        return RGB(tmp[2], tmp[1], tmp[0]);
+    }
+
+    int channels() { return 3; }
+};
+
+class RGBA8888Reader : public RGBreader
+{
+    RGB read(const Mat& rgb, int row, int col)
+    {
+        Vec4b rgba = rgb.at<Vec4b>(row, col);
+        return RGB(rgba[0], rgba[1], rgba[2]);
+    }
+
+    int channels() { return 4; }
+};
+
+class BGRA8888Reader : public RGBreader
+{
+    RGB read(const Mat& rgb, int row, int col)
+    {
+        Vec4b rgba = rgb.at<Vec4b>(row, col);
+        return RGB(rgba[2], rgba[1], rgba[0]);
+    }
+
+    int channels() { return 4; }
+};
+
 class YUV2RGB_Converter
 {
 public:
@@ -237,6 +341,23 @@ public:
     }
 };
 
+class RGB2YUV_Converter
+{
+public:
+    YUV convert(RGB rgb)
+    {
+        int r = rgb[0];
+        int g = rgb[1];
+        int b = rgb[2];
+        
+        uchar y = saturate_cast<uchar>((int)( 0.257f*r + 0.504f*g + 0.098f*b + 0.5f) + 16);
+        uchar u = saturate_cast<uchar>((int)(-0.148f*r - 0.291f*g + 0.439f*b + 0.5f) + 128);
+        uchar v = saturate_cast<uchar>((int)( 0.439f*r - 0.368f*g - 0.071f*b + 0.5f) + 128);
+
+        return YUV(y, u, v);
+    }
+};
+
 YUVreader* YUVreader::getReader(int code)
 {
     switch(code)
@@ -295,6 +416,27 @@ YUVreader* YUVreader::getReader(int code)
     }
 }
 
+RGBreader* RGBreader::getReader(int code)
+{
+    switch(code)
+    {
+    case CV_RGB2YUV_YV12:
+    case CV_RGB2YUV_I420:
+        return new RGB888Reader();
+    case CV_BGR2YUV_YV12:
+    case CV_BGR2YUV_I420:
+        return new BGR888Reader();
+    case CV_RGBA2YUV_I420:
+    case CV_RGBA2YUV_YV12:
+        return new RGBA8888Reader();
+    case CV_BGRA2YUV_YV12:
+    case CV_BGRA2YUV_I420:
+        return new BGRA8888Reader();
+    default:
+        return 0;
+    };
+}
+
 RGBwriter* RGBwriter::getWriter(int code)
 {
     switch(code)
@@ -355,6 +497,25 @@ GRAYwriter* GRAYwriter::getWriter(int code)
     }
 }
 
+YUVwriter* YUVwriter::getWriter(int code)
+{
+    switch(code)
+    {
+    case CV_RGB2YUV_YV12:
+    case CV_BGR2YUV_YV12:
+    case CV_RGBA2YUV_YV12:
+    case CV_BGRA2YUV_YV12:
+        return new YV12Writer();
+    case CV_RGB2YUV_I420:
+    case CV_BGR2YUV_I420:
+    case CV_RGBA2YUV_I420:
+    case CV_BGRA2YUV_I420:
+        return new I420Writer();
+    default:
+        return 0;
+    };
+}
+
 template<class convertor>
 void referenceYUV2RGB(const Mat& yuv, Mat& rgb, YUVreader* yuvReader, RGBwriter* rgbWriter)
 {
@@ -375,6 +536,64 @@ void referenceYUV2GRAY(const Mat& yuv, Mat& rgb, YUVreader* yuvReader, GRAYwrite
             grayWriter->write(rgb, row, col, cvt.convert(yuvReader->read(yuv, row, col)));
 }
 
+template<class convertor>
+void referenceRGB2YUV(const Mat& rgb, Mat& yuv, RGBreader* rgbReader, YUVwriter* yuvWriter)
+{
+    convertor cvt;
+
+    for(int row = 0; row < rgb.rows; ++row)
+        for(int col = 0; col < rgb.cols; ++col)
+            yuvWriter->write(yuv, row, col, cvt.convert(rgbReader->read(rgb, row, col)));
+}
+
+struct ConversionYUV
+{
+    ConversionYUV( const int code )
+    {
+        yuvReader_  = YUVreader :: getReader(code);
+        yuvWriter_  = YUVwriter :: getWriter(code);
+        rgbReader_  = RGBreader :: getReader(code);
+        rgbWriter_  = RGBwriter :: getWriter(code);
+        grayWriter_ = GRAYwriter:: getWriter(code);
+    }
+
+    int getDcn()
+    {
+        return (rgbWriter_ != 0) ? rgbWriter_->channels() : ((grayWriter_ != 0) ? grayWriter_->channels() : yuvWriter_->channels());
+    }
+
+    int getScn()
+    {
+        return (yuvReader_ != 0) ? yuvReader_->channels() : rgbReader_->channels();
+    }
+
+    Size getSrcSize( const Size& imgSize )
+    {
+        return (yuvReader_ != 0) ? yuvReader_->size(imgSize) : imgSize;
+    }
+
+    Size getDstSize( const Size& imgSize )
+    {
+        return (yuvWriter_ != 0) ? yuvWriter_->size(imgSize) : imgSize;
+    }
+
+    bool requiresEvenHeight()
+    {
+        return (yuvReader_ != 0) ? yuvReader_->requiresEvenHeight() : ((yuvWriter_ != 0) ? yuvWriter_->requiresEvenHeight() : false);
+    }
+
+    bool requiresEvenWidth()
+    {
+        return (yuvReader_ != 0) ? yuvReader_->requiresEvenWidth() : ((yuvWriter_ != 0) ? yuvWriter_->requiresEvenWidth() : false);
+    }
+
+    YUVreader*  yuvReader_;
+    YUVwriter*  yuvWriter_;
+    RGBreader*  rgbReader_;
+    RGBwriter*  rgbWriter_;
+    GRAYwriter* grayWriter_;
+};
+
 CV_ENUM(YUVCVTS, CV_YUV2RGB_NV12, CV_YUV2BGR_NV12, CV_YUV2RGB_NV21, CV_YUV2BGR_NV21,
                  CV_YUV2RGBA_NV12, CV_YUV2BGRA_NV12, CV_YUV2RGBA_NV21, CV_YUV2BGRA_NV21,
                  CV_YUV2RGB_YV12, CV_YUV2BGR_YV12, CV_YUV2RGB_IYUV, CV_YUV2BGR_IYUV,
@@ -383,7 +602,8 @@ CV_ENUM(YUVCVTS, CV_YUV2RGB_NV12, CV_YUV2BGR_NV12, CV_YUV2RGB_NV21, CV_YUV2BGR_N
                  CV_YUV2RGB_YUY2, CV_YUV2BGR_YUY2, CV_YUV2RGB_YVYU, CV_YUV2BGR_YVYU,
                  CV_YUV2RGBA_YUY2, CV_YUV2BGRA_YUY2, CV_YUV2RGBA_YVYU, CV_YUV2BGRA_YVYU,
                  CV_YUV2GRAY_420, CV_YUV2GRAY_UYVY, CV_YUV2GRAY_YUY2,
-                 CV_YUV2BGR, CV_YUV2RGB);
+                 CV_YUV2BGR, CV_YUV2RGB, CV_RGB2YUV_YV12, CV_BGR2YUV_YV12, CV_RGBA2YUV_YV12,
+                 CV_BGRA2YUV_YV12, CV_RGB2YUV_I420, CV_BGR2YUV_I420, CV_RGBA2YUV_I420, CV_BGRA2YUV_I420);
 
 typedef ::testing::TestWithParam<YUVCVTS> Imgproc_ColorYUV;
 
@@ -392,31 +612,32 @@ TEST_P(Imgproc_ColorYUV, accuracy)
     int code = GetParam();
     RNG& random = theRNG();
 
-    YUVreader* yuvReader = YUVreader::getReader(code);
-    RGBwriter* rgbWriter = RGBwriter::getWriter(code);
-    GRAYwriter* grayWriter = GRAYwriter::getWriter(code);
-
-    int dcn = (rgbWriter == 0) ? grayWriter->channels() : rgbWriter->channels();
+    ConversionYUV cvt(code);
 
+    const int scn = cvt.getScn();
+    const int dcn = cvt.getDcn();
     for(int iter = 0; iter < 30; ++iter)
     {
         Size sz(random.uniform(1, 641), random.uniform(1, 481));
 
-        if(yuvReader->requiresEvenWidth()) sz.width += sz.width % 2;
-        if(yuvReader->requiresEvenHeight()) sz.height += sz.height % 2;
+        if(cvt.requiresEvenWidth())  sz.width  += sz.width % 2;
+        if(cvt.requiresEvenHeight()) sz.height += sz.height % 2;
 
-        Size ysz = yuvReader->size(sz);
-        Mat src = Mat(ysz.height, ysz.width * yuvReader->channels(), CV_8UC1).reshape(yuvReader->channels());
+        Size srcSize = cvt.getSrcSize(sz);
+        Mat src = Mat(srcSize.height, srcSize.width * scn, CV_8UC1).reshape(scn);
 
-        Mat dst = Mat(sz.height, sz.width * dcn, CV_8UC1).reshape(dcn);
-        Mat gold(sz, CV_8UC(dcn));
+        Size dstSize = cvt.getDstSize(sz);
+        Mat dst = Mat(dstSize.height, dstSize.width * dcn, CV_8UC1).reshape(dcn);
+        Mat gold(dstSize, CV_8UC(dcn));
 
         random.fill(src, RNG::UNIFORM, 0, 256);
 
-        if(rgbWriter)
-            referenceYUV2RGB<YUV2RGB_Converter>(src, gold, yuvReader, rgbWriter);
-        else
-            referenceYUV2GRAY<YUV2GRAY_Converter>(src, gold, yuvReader, grayWriter);
+        if(cvt.rgbWriter_)
+            referenceYUV2RGB<YUV2RGB_Converter>  (src, gold, cvt.yuvReader_, cvt.rgbWriter_);
+        else if(cvt.grayWriter_)
+            referenceYUV2GRAY<YUV2GRAY_Converter>(src, gold, cvt.yuvReader_, cvt.grayWriter_);
+        else if(cvt.yuvWriter_)
+            referenceRGB2YUV<RGB2YUV_Converter>  (src, gold, cvt.rgbReader_, cvt.yuvWriter_);
 
         cv::cvtColor(src, dst, code, -1);
 
@@ -429,40 +650,41 @@ TEST_P(Imgproc_ColorYUV, roi_accuracy)
     int code = GetParam();
     RNG& random = theRNG();
 
-    YUVreader* yuvReader = YUVreader::getReader(code);
-    RGBwriter* rgbWriter = RGBwriter::getWriter(code);
-    GRAYwriter* grayWriter = GRAYwriter::getWriter(code);
-
-    int dcn = (rgbWriter == 0) ? grayWriter->channels() : rgbWriter->channels();
+    ConversionYUV cvt(code);
 
+    const int scn = cvt.getScn();
+    const int dcn = cvt.getDcn();
     for(int iter = 0; iter < 30; ++iter)
     {
         Size sz(random.uniform(1, 641), random.uniform(1, 481));
 
-        if(yuvReader->requiresEvenWidth()) sz.width += sz.width % 2;
-        if(yuvReader->requiresEvenHeight()) sz.height += sz.height % 2;
+        if(cvt.requiresEvenWidth())  sz.width  += sz.width % 2;
+        if(cvt.requiresEvenHeight()) sz.height += sz.height % 2;
 
         int roi_offset_top = random.uniform(0, 6);
         int roi_offset_bottom = random.uniform(0, 6);
         int roi_offset_left = random.uniform(0, 6);
         int roi_offset_right = random.uniform(0, 6);
 
-        Size ysz = yuvReader->size(sz);
+        Size srcSize = cvt.getSrcSize(sz);
+        Mat src_full(srcSize.height + roi_offset_top + roi_offset_bottom, srcSize.width + roi_offset_left + roi_offset_right, CV_8UC(scn));
 
-        Mat src_full(ysz.height + roi_offset_top + roi_offset_bottom, ysz.width + roi_offset_left + roi_offset_right, CV_8UC(yuvReader->channels()));
-        Mat dst_full(sz.height  + roi_offset_left + roi_offset_right, sz.width + roi_offset_top + roi_offset_bottom, CV_8UC(dcn), Scalar::all(0));
+        Size dstSize = cvt.getDstSize(sz);
+        Mat dst_full(dstSize.height  + roi_offset_left + roi_offset_right, dstSize.width + roi_offset_top + roi_offset_bottom, CV_8UC(dcn), Scalar::all(0));
         Mat gold_full(dst_full.size(), CV_8UC(dcn), Scalar::all(0));
 
         random.fill(src_full, RNG::UNIFORM, 0, 256);
 
-        Mat src = src_full(Range(roi_offset_top, roi_offset_top + ysz.height), Range(roi_offset_left, roi_offset_left + ysz.width));
-        Mat dst = dst_full(Range(roi_offset_left, roi_offset_left + sz.height), Range(roi_offset_top, roi_offset_top + sz.width));
-        Mat gold = gold_full(Range(roi_offset_left, roi_offset_left + sz.height), Range(roi_offset_top, roi_offset_top + sz.width));
+        Mat src = src_full(Range(roi_offset_top, roi_offset_top + srcSize.height), Range(roi_offset_left, roi_offset_left + srcSize.width));
+        Mat dst = dst_full(Range(roi_offset_left, roi_offset_left + dstSize.height), Range(roi_offset_top, roi_offset_top + dstSize.width));
+        Mat gold = gold_full(Range(roi_offset_left, roi_offset_left + dstSize.height), Range(roi_offset_top, roi_offset_top + dstSize.width));
 
-        if(rgbWriter)
-            referenceYUV2RGB<YUV2RGB_Converter>(src, gold, yuvReader, rgbWriter);
-        else
-            referenceYUV2GRAY<YUV2GRAY_Converter>(src, gold, yuvReader, grayWriter);
+        if(cvt.rgbWriter_)
+            referenceYUV2RGB<YUV2RGB_Converter>  (src, gold, cvt.yuvReader_, cvt.rgbWriter_);
+        else if(cvt.grayWriter_)
+            referenceYUV2GRAY<YUV2GRAY_Converter>(src, gold, cvt.yuvReader_, cvt.grayWriter_);
+        else if(cvt.yuvWriter_)
+            referenceRGB2YUV<RGB2YUV_Converter>  (src, gold, cvt.rgbReader_, cvt.yuvWriter_);
 
         cv::cvtColor(src, dst, code, -1);
 
@@ -475,7 +697,9 @@ INSTANTIATE_TEST_CASE_P(cvt420, Imgproc_ColorYUV,
                       (int)CV_YUV2RGBA_NV12, (int)CV_YUV2BGRA_NV12, (int)CV_YUV2RGBA_NV21, (int)CV_YUV2BGRA_NV21,
                       (int)CV_YUV2RGB_YV12, (int)CV_YUV2BGR_YV12, (int)CV_YUV2RGB_IYUV, (int)CV_YUV2BGR_IYUV,
                       (int)CV_YUV2RGBA_YV12, (int)CV_YUV2BGRA_YV12, (int)CV_YUV2RGBA_IYUV, (int)CV_YUV2BGRA_IYUV,
-                      (int)CV_YUV2GRAY_420));
+                      (int)CV_YUV2GRAY_420, (int)CV_RGB2YUV_YV12, (int)CV_BGR2YUV_YV12, (int)CV_RGBA2YUV_YV12,
+                      (int)CV_BGRA2YUV_YV12, (int)CV_RGB2YUV_I420, (int)CV_BGR2YUV_I420, (int)CV_RGBA2YUV_I420,
+                      (int)CV_BGRA2YUV_I420));
 
 INSTANTIATE_TEST_CASE_P(cvt422, Imgproc_ColorYUV,
     ::testing::Values((int)CV_YUV2RGB_UYVY, (int)CV_YUV2BGR_UYVY, (int)CV_YUV2RGBA_UYVY, (int)CV_YUV2BGRA_UYVY,
index 2fc52a7..cf99226 100644 (file)
@@ -217,6 +217,12 @@ endif(ANDROID AND ANDROID_EXECUTABLE)
 
 set(step3_depends ${step2_depends} ${step3_input_files} ${copied_files})
 
+if(ANDROID)
+  set(LIB_NAME_SUFIX "")
+else()
+  set(LIB_NAME_SUFIX "${OPENCV_VERSION_MAJOR}${OPENCV_VERSION_MINOR}${OPENCV_VERSION_PATCH}")
+endif()
+
 # step 4: build jar
 if(ANDROID)
   set(JAR_FILE "${OpenCV_BINARY_DIR}/bin/classes.jar")
@@ -241,7 +247,7 @@ if(ANDROID)
                       )
   endif()
 else(ANDROID)
-  set(JAR_NAME opencv-${OPENCV_VERSION}.jar)
+  set(JAR_NAME opencv-${LIB_NAME_SUFIX}.jar)
   set(JAR_FILE "${OpenCV_BINARY_DIR}/bin/${JAR_NAME}")
   configure_file("${CMAKE_CURRENT_SOURCE_DIR}/build.xml.in" "${OpenCV_BINARY_DIR}/build.xml" IMMEDIATE @ONLY)
   list(APPEND step3_depends "${OpenCV_BINARY_DIR}/build.xml")
@@ -294,17 +300,15 @@ endif()
 
 # Additional target properties
 set_target_properties(${the_module} PROPERTIES
-    OUTPUT_NAME "${the_module}${OPENCV_DLLVERSION}"
-    DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
+    OUTPUT_NAME "${the_module}${LIB_NAME_SUFIX}"
     ARCHIVE_OUTPUT_DIRECTORY ${LIBRARY_OUTPUT_PATH}
+    LIBRARY_OUTPUT_DIRECTORY ${LIBRARY_OUTPUT_PATH}
     RUNTIME_OUTPUT_DIRECTORY ${EXECUTABLE_OUTPUT_PATH}
     INSTALL_NAME_DIR ${OPENCV_LIB_INSTALL_PATH}
     LINK_INTERFACE_LIBRARIES ""
     )
 
-if(ANDROID)
-  set_target_properties(${the_module} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${LIBRARY_OUTPUT_PATH})
-else()
+if(WIN32)
   set_target_properties(${the_module} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${EXECUTABLE_OUTPUT_PATH})
 endif()
 
diff --git a/modules/java/android_lib/lint.xml b/modules/java/android_lib/lint.xml
new file mode 100644 (file)
index 0000000..e54ced1
--- /dev/null
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<lint>
+    <issue id="NewApi">
+        <ignore path="src\org\opencv\android\JavaCameraView.java" />
+    </issue>
+</lint>
\ No newline at end of file
index 0f3ba1d..c0da34f 100755 (executable)
@@ -559,6 +559,15 @@ func_arg_fix = {
     }, # '', i.e. no class
 } # func_arg_fix
 
+
+def getLibVersion(version_hpp_path):
+    version_file = open(version_hpp_path, "rt").read()
+    epoch = re.search("^W*#\W*define\W+CV_VERSION_EPOCH\W+(\d+)\W*$", version_file, re.MULTILINE).group(1)
+    major = re.search("^W*#\W*define\W+CV_VERSION_MAJOR\W+(\d+)\W*$", version_file, re.MULTILINE).group(1)
+    minor = re.search("^W*#\W*define\W+CV_VERSION_MINOR\W+(\d+)\W*$", version_file, re.MULTILINE).group(1)
+    revision = re.search("^W*#\W*define\W+CV_VERSION_REVISION\W+(\d+)\W*$", version_file, re.MULTILINE).group(1)
+    return (epoch, major, minor, revision)
+
 class ConstInfo(object):
     def __init__(self, cname, name, val, addedManually=False):
         self.cname = cname
@@ -721,13 +730,16 @@ $imports
 public class %(jc)s {
 """ % { 'm' : self.module, 'jc' : jname } )
 
-#        self.java_code[class_name]["jn_code"].write("""
-#    //
-#    // native stuff
-#    //
-#    static { System.loadLibrary("opencv_java"); }
-#""" )
-
+        if class_name == 'Core':
+            (epoch, major, minor, revision) = getLibVersion(
+                (os.path.dirname(__file__) or '.') + '/../../core/include/opencv2/core/version.hpp')
+            version_str    = '.'.join( (epoch, major, minor, revision) )
+            version_suffix =  ''.join( (epoch, major, minor) )
+            self.classes[class_name].imports.add("java.lang.String")
+            self.java_code[class_name]["j_code"].write("""
+    public static final String VERSION = "%(v)s", NATIVE_LIBRARY_NAME = "opencv_java%(vs)s";
+    public static final int VERSION_EPOCH = %(ep)s, VERSION_MAJOR = %(ma)s, VERSION_MINOR = %(mi)s, VERSION_REVISION = %(re)s;
+""" % { 'v' : version_str, 'vs' : version_suffix, 'ep' : epoch, 'ma' : major, 'mi' : minor, 're' : revision } )
 
 
     def add_class(self, decl):
index 2b25e1d..e76ac26 100644 (file)
@@ -15,6 +15,7 @@ import android.content.DialogInterface;
 import android.content.res.TypedArray;
 import android.graphics.Bitmap;
 import android.graphics.Canvas;
+import android.graphics.Rect;
 import android.util.AttributeSet;
 import android.util.Log;
 import android.view.SurfaceHolder;
@@ -44,6 +45,7 @@ public abstract class CameraBridgeViewBase extends SurfaceView implements Surfac
     protected int mFrameHeight;
     protected int mMaxHeight;
     protected int mMaxWidth;
+    protected float mScale = 0;
     protected int mPreviewFormat = Highgui.CV_CAP_ANDROID_COLOR_FRAME_RGBA;
     protected int mCameraIndex = -1;
     protected boolean mEnabled;
@@ -156,9 +158,21 @@ public abstract class CameraBridgeViewBase extends SurfaceView implements Surfac
         private CvCameraViewListener mOldStyleListener;
     };
 
+    /**
+     * This class interface is abstract representation of single frame from camera for onCameraFrame callback
+     * Attention: Do not use objects, that represents this interface out of onCameraFrame callback!
+     */
     public interface CvCameraViewFrame {
-        public abstract Mat rgba();
-        public abstract Mat gray();
+
+        /**
+         * This method returns RGBA Mat with frame
+         */
+        public Mat rgba();
+
+        /**
+         * This method returns single channel gray scale Mat with frame
+         */
+        public Mat gray();
     };
 
     public void surfaceChanged(SurfaceHolder arg0, int arg1, int arg2, int arg3) {
@@ -377,7 +391,22 @@ public abstract class CameraBridgeViewBase extends SurfaceView implements Surfac
             Canvas canvas = getHolder().lockCanvas();
             if (canvas != null) {
                 canvas.drawColor(0, android.graphics.PorterDuff.Mode.CLEAR);
-                canvas.drawBitmap(mCacheBitmap, (canvas.getWidth() - mCacheBitmap.getWidth()) / 2, (canvas.getHeight() - mCacheBitmap.getHeight()) / 2, null);
+                Log.d(TAG, "mStretch value: " + mScale);
+
+                if (mScale != 0) {
+                    canvas.drawBitmap(mCacheBitmap, new Rect(0,0,mCacheBitmap.getWidth(), mCacheBitmap.getHeight()),
+                         new Rect((int)((canvas.getWidth() - mScale*mCacheBitmap.getWidth()) / 2),
+                         (int)((canvas.getHeight() - mScale*mCacheBitmap.getHeight()) / 2),
+                         (int)((canvas.getWidth() - mScale*mCacheBitmap.getWidth()) / 2 + mScale*mCacheBitmap.getWidth()),
+                         (int)((canvas.getHeight() - mScale*mCacheBitmap.getHeight()) / 2 + mScale*mCacheBitmap.getHeight())), null);
+                } else {
+                     canvas.drawBitmap(mCacheBitmap, new Rect(0,0,mCacheBitmap.getWidth(), mCacheBitmap.getHeight()),
+                         new Rect((canvas.getWidth() - mCacheBitmap.getWidth()) / 2,
+                         (canvas.getHeight() - mCacheBitmap.getHeight()) / 2,
+                         (canvas.getWidth() - mCacheBitmap.getWidth()) / 2 + mCacheBitmap.getWidth(),
+                         (canvas.getHeight() - mCacheBitmap.getHeight()) / 2 + mCacheBitmap.getHeight()), null);
+                }
+
                 if (mFpsMeter != null) {
                     mFpsMeter.measure();
                     mFpsMeter.draw(canvas, 20, 30);
index 818be7f..34fe609 100644 (file)
@@ -2,7 +2,6 @@ package org.opencv.android;
 
 import java.util.List;
 
-import android.annotation.TargetApi;
 import android.content.Context;
 import android.graphics.ImageFormat;
 import android.graphics.SurfaceTexture;
@@ -11,7 +10,7 @@ import android.hardware.Camera.PreviewCallback;
 import android.os.Build;
 import android.util.AttributeSet;
 import android.util.Log;
-import android.view.SurfaceHolder;
+import android.view.ViewGroup.LayoutParams;
 
 import org.opencv.core.CvType;
 import org.opencv.core.Mat;
@@ -64,7 +63,6 @@ public class JavaCameraView extends CameraBridgeViewBase implements PreviewCallb
         Log.d(TAG, "Java camera view ctor");
     }
 
-    @TargetApi(11)
     protected boolean initializeCamera(int width, int height) {
         Log.d(TAG, "Initialize java camera");
         boolean result = true;
@@ -133,6 +131,11 @@ public class JavaCameraView extends CameraBridgeViewBase implements PreviewCallb
                     mFrameWidth = params.getPreviewSize().width;
                     mFrameHeight = params.getPreviewSize().height;
 
+                    if ((getLayoutParams().width == LayoutParams.MATCH_PARENT) && (getLayoutParams().height == LayoutParams.MATCH_PARENT))
+                        mScale = Math.min(((float)height)/mFrameHeight, ((float)width)/mFrameWidth);
+                    else
+                        mScale = 0;
+
                     if (mFpsMeter != null) {
                         mFpsMeter.setResolution(mFrameWidth, mFrameHeight);
                     }
@@ -154,7 +157,6 @@ public class JavaCameraView extends CameraBridgeViewBase implements PreviewCallb
 
                     if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.HONEYCOMB) {
                         mSurfaceTexture = new SurfaceTexture(MAGIC_TEXTURE_ID);
-                        getHolder().setType(SurfaceHolder.SURFACE_TYPE_PUSH_BUFFERS);
                         mCamera.setPreviewTexture(mSurfaceTexture);
                     } else
                        mCamera.setPreviewDisplay(null);
@@ -234,7 +236,6 @@ public class JavaCameraView extends CameraBridgeViewBase implements PreviewCallb
         releaseCamera();
     }
 
-    @TargetApi(Build.VERSION_CODES.FROYO)
     public void onPreviewFrame(byte[] frame, Camera arg1) {
         Log.i(TAG, "Preview Frame received. Need to create MAT and deliver it to clients");
         Log.i(TAG, "Frame size  is " + frame.length);
index 3802167..496ed53 100644 (file)
@@ -8,6 +8,7 @@ import org.opencv.highgui.VideoCapture;
 import android.content.Context;
 import android.util.AttributeSet;
 import android.util.Log;
+import android.view.ViewGroup.LayoutParams;
 
 /**
  * This class is an implementation of a bridge between SurfaceView and native OpenCV camera.
@@ -102,6 +103,11 @@ public class NativeCameraView extends CameraBridgeViewBase {
             mFrameWidth = (int)frameSize.width;
             mFrameHeight = (int)frameSize.height;
 
+            if ((getLayoutParams().width == LayoutParams.MATCH_PARENT) && (getLayoutParams().height == LayoutParams.MATCH_PARENT))
+                mScale = Math.min(((float)height)/mFrameHeight, ((float)width)/mFrameWidth);
+            else
+                mScale = 0;
+
             if (mFpsMeter != null) {
                 mFpsMeter.setResolution(mFrameWidth, mFrameHeight);
             }
index d0e254d..a9ec2f4 100644 (file)
@@ -4,7 +4,7 @@ if(NOT HAVE_OPENCL)
 endif()
 
 set(the_description "OpenCL-accelerated Computer Vision")
-ocv_add_module(ocl opencv_core opencv_imgproc opencv_features2d opencv_objdetect opencv_video)
+ocv_add_module(ocl opencv_core opencv_imgproc opencv_features2d opencv_objdetect opencv_video opencv_nonfree)
 
 ocv_module_include_directories()
 
index 006b439..5da848f 100644 (file)
@@ -6,13 +6,23 @@ OpenCL Module Introduction
 General Information
 -------------------
 
-The OpenCV OCL module is a set of classes and functions to utilize OpenCL compatible device. In theroy, it supports any OpenCL 1.1 compatible device, but we only test it on AMD's, Intel's and NVIDIA's GPU at this stage. The compatibility, correctness and performance on CPU is not guaranteed. The OpenCV OCL module includes utility functions, low-level vision primitives, and high-level algorithms. The utility functions and low-level primitives provide a powerful infrastructure for developing fast vision algorithms taking advangtage of OCL whereas the high-level functionality includes some state-of-the-art algorithms(such as surf detector, face detector) ready to be used by the application developers.
+The OpenCV OCL module contains  a set of classes and functions that implement and accelerate select openCV functionality on OpenCL compatible devices. OpenCL is a Khronos standard, implemented by a variety of devices (CPUs, GPUs, FPGAs, ARM), abstracting the exact hardware details, while enabling vendors to provide native implementation for maximal acceleration on their hardware. The standard enjoys wide industry support, and the end user of the module will enjoy the data parallelism benefits that the specific platform/hardware may be capable of, in a platform/hardware independent manner. 
+
+While in the future we hope to validate (and enable) the OCL module in all OpenCL capable devices, we currently develop and test on GPU devices only. This includes both discrete GPUs (NVidia, AMD), as well as integrated chips(AMD APU and intel HD devices). Performance of any particular algorithm will depend on the particular platform characteristics and capabilities. However, currently (as of 2.4.4), accuracy and  mathematical correctness has been verified to be identical to that of the pure CPU implementation on all tested GPU devices and platforms (both windows and linux). 
+
+
+The OpenCV OCL module includes utility functions, low-level vision primitives, and high-level algorithms. The utility functions and low-level primitives provide a powerful infrastructure for developing fast vision algorithms taking advangtage of OCL whereas the high-level functionality (samples)includes some state-of-the-art algorithms (including LK Optical flow, and Face detection) ready to be used by the application developers. The module is also accompanied by an extensive performance and accuracy test suite.
+
+The OpenCV OCL module is designed for ease of use and does not require any knowledge of OpenCL. At a minimuml level, it can be viewed as a set of accelerators, that can take advantage of the high compute throughput that GPU/APU devices can provide. However, it can also be viewed as a starting point to really integratethe  built-in functionality with your own custom OpenCL kernels, with or without modifying the source of OpenCV-OCL. Of course, knowledge of OpenCL will certainly help, however we hope that OpenCV-OCL module, and  the kernels it contains in source code, can be very useful as a means of actually learning openCL. Such a knowledge would be necessary to further fine-tune any of the existing OpenCL kernels, or for extending the framework with new kernels. As of OpenCV 2.4.4, we introduce interoperability with OpenCL, enabling easy use of custom OpenCL kernels within the OpenCV framework. 
+
+To use the OCL module, you need to make sure that you have the OpenCL SDK provided with your device vendor. To correctly run the OCL module, you need to have the OpenCL runtime provide by the device vendor, typically the device driver.
+
+To enable OCL support, configure OpenCV using CMake with WITH\_OPENCL=ON. When the flag is set and if OpenCL SDK is installed, the full-featured OpenCV OCL module is built. Otherwise, the module may be not built. If you have AMD'S FFT and BLAS library, you can select it with WITH\_OPENCLAMDFFT=ON, WITH\_OPENCLAMDBLAS=ON.
+
+The ocl module can be found under the "modules" directory. In "modules/ocl/src" you can find the source code for the cpp class that wrap around the direct kernel invocation. The kernels themselves can be found in "modules/ocl/src/kernels."  Samples can be found under "samples/ocl." Accuracy tests can be found in "modules/ocl/test," and performance tests under "module/ocl/perf."
 
-The OpenCV OCL module is designed as a host-level API plus device-level kernels. The device-level kernels are collected as strings at OpenCV compile time and are compiled at runtime, so it need OpenCL runtime support. To correctly build the OpenCV OCL module, make sure you have OpenCL SDK provided your device vendor. To correctly run the OpenCV OCL module, make sure you have OpenCL runtime provided by your device vendor, which is device driver normally.
 
-The OpenCV OCL module is designed for ease of use and does not require any knowledge of OpenCL. Though, such a knowledge will certainly be useful to handle non-trivial cases or achieve the highest performance. It is helpful to understand the cost of various operations, what the OCL does, what the preferred data formats are, and so on. Since there is data transfer between OpenCL host and OpenCL device, for better performance it's recommended to copy data once to the OpenCL host memory (i.e. copy ``cv::Mat`` to ``cv::ocl::OclMat``), then call several ``cv::ocl`` functions and then copy the result back to CPU memory, rather than do forward and backward transfer for each OCL function.
 
-To enable OCL support, configure OpenCV using CMake with WIHT\_OPENCL=ON. When the flag is set and if OpenCL SDK is installed, the full-featured OpenCV OCL module is built. Otherwise, the module may be not built. If you have AMD'S FFT and BLAS library, you can select it with WITH\_OPENCLAMDFFT=ON, WIHT\_OPENCLAMDBLAS=ON.
 
 Right now, the user should define the cv::ocl::Info class in the application and call cv::ocl::getDevice before any cv::ocl::func. This operation initialize OpenCL runtime and set the first found device as computing device. If there are more than one device and you want to use undefault device, you can call cv::ocl::setDevice then.
 
@@ -21,24 +31,28 @@ In the current version, all the thread share the same context and device so the
 Developer Notes
 -------------------
 
-This section descripe the design details of ocl module for who are interested in the detail of this module or want to contribute this module. User who isn't interested the details, can safely ignore it.
+In a heterogeneous device environment, there may be cost associated with data transfer. This would be the case, for example, when data needs to be moved from host memory (accessible to the CPU), to device memory (accessible to a discrete GPU). in the case of integrated graphics chips, there may be performance issues, relating to memory coherency between access from the GPU "part" of the integrated device, or the CPU "part." For best performance, in either case, it is recommended that you do not introduce dat transfers between CPU and the discrete GPU, except in the beginning and the end of the algorithmic pipeline.
+
+Some tidbits:
 
 1. OpenCL version should be larger than 1.1 with FULL PROFILE.
 
-2. There's only one OpenCL context and commandqueue and generated as a singleton. So now it only support one device with single commandqueue.
+2. Currently (2.4.4) the user call the cv::ocl::getDevice before any other function in the ocl module. This will initialize the OpenCL runtime and set the first found device as computing device. If there are more than one device and you want to use undefault device, you can call cv::ocl::setDevice thereafter.
+
+2. Currently there's only one OpenCL context and command queue. We hope to implement multi device and multi queue support in the future.
 
-3. All the functions use 256 as its workgroup size if possible, so the max work group size of the device must larger than 256.
+3. Many kernels use 256 as its workgroup size if possible, so the max work group size of the device must larger than 256. All GPU devices we are aware of indeed support 256 workitems in a workgroup, however non GPU devices may not. This will be improved in the future.
 
-4. If the device support double, we will use double in kernel if OpenCV cpu version use double, otherwise, we use float instead.
+4. If the device does not support double arithetic, we revert to float.
 
-5. The oclMat use buffer object, not image object.
+5. The oclMat uses buffer object, not image object.
 
-6. All the 3-channel matrix(i.e. RGB image) are represented by 4-channel matrix in oclMat. It means 3-channel image have 4-channel space with the last channel unused. We provide a transparent interface to handle the difference between OpenCV Mat and oclMat.
+6. All the 3-channel matrices(i.e. RGB image) are represented by 4-channel matrices in oclMat, with the last channel unused. We provide a transparent interface to handle the difference between OpenCV Mat and oclMat.
 
 7. All the matrix in oclMat is aligned in column(now the alignment factor is 32 byte). It means, if a matrix is n columns m rows with the element size x byte, we will assign ALIGNMENT(x*n) bytes for each column with the last ALIGNMENT(x*n) - x*n bytes unused, so there's small holes after each column if its size is not the multiply of ALIGN.
 
-8. Data transfer between Mat and oclMat. If the CPU matrix is aligned in column, we will use faster API to transfer between Mat and oclMat, otherwise, we will use clEnqueueRead/WriteBufferRect to transfer data to guarantee the alignment. 3-channel matrix is an exception, it's directly transferred to a temp buffer and then padded to 4-channel matrix(also aligned) when uploading and do the reverse operation when downloading.
+8. Data transfer between Mat and oclMat: If the CPU matrix is aligned in column, we will use faster API to transfer between Mat and oclMat, otherwise, we will use clEnqueueRead/WriteBufferRect to transfer data to guarantee the alignment. 3-channel matrix is an exception, it's directly transferred to a temp buffer and then padded to 4-channel matrix(also aligned) when uploading and do the reverse operation when downloading.
 
-9. Data transfer between Mat and oclMat. ROI is a feature of OpenCV, which allow users process a sub rectangle of a matrix. When a CPU matrix which has ROI will be transfered to GPU, the whole matrix will be transfered and set ROI as CPU's. In a word, we always transfer the whole matrix despite whether it has ROI or not.
+9. Data transfer between Mat and oclMat: ROI is a feature of OpenCV, which allow users process a sub rectangle of a matrix. When a CPU matrix which has ROI will be transfered to GPU, the whole matrix will be transfered and set ROI as CPU's. In a word, we always transfer the whole matrix despite whether it has ROI or not.
 
 10. All the kernel file should locate in ocl/src/kernels/ with the extension ".cl". ALL the kernel files are transformed to pure characters at compilation time in kernels.cpp, and the file name without extension is the name of the characters.
index a60eb36..4c2d54f 100644 (file)
@@ -66,6 +66,32 @@ namespace cv
             //CVCL_DEVICE_TYPE_CUSTOM      = (1 << 4)
             CVCL_DEVICE_TYPE_ALL         = 0xFFFFFFFF
         };
+
+        enum DevMemRW
+        {
+            DEVICE_MEM_R_W = 0, 
+            DEVICE_MEM_R_ONLY, 
+            DEVICE_MEM_W_ONLY
+        };
+        enum DevMemType
+        { 
+            DEVICE_MEM_DEFAULT = 0, 
+            DEVICE_MEM_AHP,         //alloc host pointer
+            DEVICE_MEM_UHP,         //use host pointer
+            DEVICE_MEM_CHP,         //copy host pointer
+            DEVICE_MEM_PM           //persistent memory
+        };
+
+        //Get the global device memory and read/write type     
+        //return 1 if unified memory system supported, otherwise return 0
+        CV_EXPORTS int getDevMemType(DevMemRW& rw_type, DevMemType& mem_type);
+
+        //Set the global device memory and read/write type, 
+        //the newly generated oclMat will all use this type
+        //return -1 if the target type is unsupported, otherwise return 0
+        CV_EXPORTS int setDevMemType(DevMemRW rw_type = DEVICE_MEM_R_W, DevMemType mem_type = DEVICE_MEM_DEFAULT); 
+
         //this class contains ocl runtime information
         class CV_EXPORTS Info
         {
@@ -228,6 +254,11 @@ namespace cv
             // previous data is unreferenced if needed.
             void create(int rows, int cols, int type);
             void create(Size size, int type);
+
+            //! allocates new oclMatrix with specified device memory type.
+            void createEx(int rows, int cols, int type, DevMemRW rw_type, DevMemType mem_type);
+            void createEx(Size size, int type, DevMemRW rw_type, DevMemType mem_type);
+
             //! decreases reference counter;
             // deallocate the data when reference counter reaches 0.
             void release();
@@ -1773,6 +1804,42 @@ namespace cv
                                           const oclMat &bu, const oclMat &bv,
                                           float pos, oclMat &newFrame, oclMat &buf);
 
+        //! computes moments of the rasterized shape or a vector of points
+        CV_EXPORTS Moments ocl_moments(InputArray _array, bool binaryImage);
+
+        class CV_EXPORTS StereoBM_OCL
+        {
+        public:
+            enum { BASIC_PRESET = 0, PREFILTER_XSOBEL = 1 };
+
+            enum { DEFAULT_NDISP = 64, DEFAULT_WINSZ = 19 };
+
+            //! the default constructor
+            StereoBM_OCL();
+            //! the full constructor taking the camera-specific preset, number of disparities and the SAD window size. ndisparities must be multiple of 8.
+            StereoBM_OCL(int preset, int ndisparities = DEFAULT_NDISP, int winSize = DEFAULT_WINSZ);
+
+            //! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair
+            //! Output disparity has CV_8U type.
+            void operator() ( const oclMat &left, const oclMat &right, oclMat &disparity);
+
+            //! Some heuristics that tries to estmate
+            // if current GPU will be faster then CPU in this algorithm.
+            // It queries current active device.
+            static bool checkIfGpuCallReasonable();
+
+            int preset;
+            int ndisp;
+            int winSize;
+
+            // If avergeTexThreshold  == 0 => post procesing is disabled
+            // If avergeTexThreshold != 0 then disparity is set 0 in each point (x,y) where for left image
+            // SumOfHorizontalGradiensInWindow(x, y, winSize) < (winSize * winSize) * avergeTexThreshold
+            // i.e. input left image is low textured.
+            float avergeTexThreshold;
+        private:
+            oclMat minSSD, leBuf, riBuf;
+        };
     }
 }
 #if defined _MSC_VER && _MSC_VER >= 1200
index 9f1dfa3..b7f82b6 100644 (file)
@@ -4317,11 +4317,11 @@ INSTANTIATE_TEST_CASE_P(Arithm, Lut, Combine(
                             Values(false))); // Values(false) is the reserved parameter
 
 INSTANTIATE_TEST_CASE_P(Arithm, Exp, Combine(
-                            Values(CV_32FC1, CV_64FC1),
+                            Values(CV_32FC1, CV_32FC1),
                             Values(false))); // Values(false) is the reserved parameter
 
 INSTANTIATE_TEST_CASE_P(Arithm, Log, Combine(
-                            Values(CV_32FC1, CV_64FC1),
+                            Values(CV_32FC1, CV_32FC1),
                             Values(false))); // Values(false) is the reserved parameter
 
 INSTANTIATE_TEST_CASE_P(Arithm, Add, Combine(
index a7e4fd9..4e2c819 100644 (file)
@@ -2123,12 +2123,16 @@ void cv::ocl::addWeighted(const oclMat &src1, double alpha, const oclMat &src2,
                               };
 
     int dst_step1 = dst.cols * dst.elemSize();
+    int src1_step = (int) src1.step;
+    int src2_step = (int) src2.step;
+    int dst_step  = (int) dst.step;
+    float alpha_f = alpha, beta_f = beta, gama_f = gama;
     vector<pair<size_t , const void *> > args;
     args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&src1.step ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&src1_step ));
     args.push_back( make_pair( sizeof(cl_int), (void *)&src1.offset));
     args.push_back( make_pair( sizeof(cl_mem), (void *)&src2.data ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&src2.step ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&src2_step ));
     args.push_back( make_pair( sizeof(cl_int), (void *)&src2.offset));
 
     if(src1.clCxt -> impl -> double_support != 0)
@@ -2139,14 +2143,13 @@ void cv::ocl::addWeighted(const oclMat &src1, double alpha, const oclMat &src2,
     }
     else
     {
-        float alpha_f = alpha, beta_f = beta, gama_f = gama;
         args.push_back( make_pair( sizeof(cl_float), (void *)&alpha_f ));
         args.push_back( make_pair( sizeof(cl_float), (void *)&beta_f ));
         args.push_back( make_pair( sizeof(cl_float), (void *)&gama_f ));
     }
 
     args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step ));
     args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset));
     args.push_back( make_pair( sizeof(cl_int), (void *)&src1.rows ));
     args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
index 5eead47..ed4a400 100644 (file)
@@ -74,7 +74,7 @@ void cv::ocl::blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &
     size_t localSize[] = {256, 1, 1};
 
     vector< pair<size_t, const void *> > args;
-
+    result.create(img1.size(), CV_MAKE_TYPE(depth,img1.channels()));
     if(globalSize[0] != 0)
     {
         args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data ));
index c81e342..818f3c1 100644 (file)
@@ -51,7 +51,6 @@ using namespace cv;
 using namespace cv::ocl;
 using namespace std;
 
-using namespace std;
 namespace cv
 {
     namespace ocl
@@ -62,7 +61,7 @@ namespace cv
 }
 
 template < int BLOCK_SIZE, int MAX_DESC_LEN,  typename T/*, typename Mask*/ >
-void matchUnrolledCached(const oclMat &query, const oclMat &train, const oclMat &mask,
+void matchUnrolledCached(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
                          const oclMat &trainIdx, const oclMat &distance, int distType)
 {
     cv::ocl::Context *ctx = query.clCxt;
@@ -77,7 +76,7 @@ void matchUnrolledCached(const oclMat &query, const oclMat &train, const oclMat
     {
         args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
         args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data ));
-        args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data ));
+        //args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data ));
         args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
         args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data ));
         args.push_back( make_pair( smemSize, (void *)NULL));
@@ -103,7 +102,7 @@ void matchUnrolledCached(const oclMat /*query*/, const oclMat * /*trains*/, int
 }
 
 template < int BLOCK_SIZE,  typename T/*, typename Mask*/ >
-void match(const oclMat &query, const oclMat &train, const oclMat &mask,
+void match(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
            const oclMat &trainIdx, const oclMat &distance, int distType)
 {
     cv::ocl::Context *ctx = query.clCxt;
@@ -117,7 +116,7 @@ void match(const oclMat &query, const oclMat &train, const oclMat &mask,
     {
         args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
         args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data ));
-        args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data ));
+        //args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data ));
         args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
         args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data ));
         args.push_back( make_pair( smemSize, (void *)NULL));
@@ -143,7 +142,7 @@ void match(const oclMat /*query*/, const oclMat * /*trains*/, int /*n*/, const o
 
 //radius_matchUnrolledCached
 template < int BLOCK_SIZE, int MAX_DESC_LEN,  typename T/*, typename Mask*/ >
-void matchUnrolledCached(const oclMat &query, const oclMat &train, float maxDistance, const oclMat &mask,
+void matchUnrolledCached(const oclMat &query, const oclMat &train, float maxDistance, const oclMat &/*mask*/,
                          const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches, int distType)
 {
     cv::ocl::Context *ctx = query.clCxt;
@@ -159,7 +158,7 @@ void matchUnrolledCached(const oclMat &query, const oclMat &train, float maxDist
         args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
         args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data ));
         args.push_back( make_pair( sizeof(cl_float), (void *)&maxDistance ));
-        args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data ));
+        //args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data ));
         args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
         args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data ));
         args.push_back( make_pair( sizeof(cl_mem), (void *)&nMatches.data ));
@@ -183,7 +182,7 @@ void matchUnrolledCached(const oclMat &query, const oclMat &train, float maxDist
 
 //radius_match
 template < int BLOCK_SIZE, typename T/*, typename Mask*/ >
-void radius_match(const oclMat &query, const oclMat &train, float maxDistance, const oclMat &mask,
+void radius_match(const oclMat &query, const oclMat &train, float maxDistance, const oclMat &/*mask*/,
                   const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches, int distType)
 {
     cv::ocl::Context *ctx = query.clCxt;
@@ -198,7 +197,7 @@ void radius_match(const oclMat &query, const oclMat &train, float maxDistance, c
         args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
         args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data ));
         args.push_back( make_pair( sizeof(cl_float), (void *)&maxDistance ));
-        args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data ));
+        //args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data ));
         args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
         args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data ));
         args.push_back( make_pair( sizeof(cl_mem), (void *)&nMatches.data ));
@@ -472,7 +471,7 @@ void matchDispatcher(const oclMat &query, const oclMat &train, int n, float maxD
 
 //knn match Dispatcher
 template < int BLOCK_SIZE, int MAX_DESC_LEN,  typename T/*, typename Mask*/ >
-void knn_matchUnrolledCached(const oclMat &query, const oclMat &train, const oclMat &mask,
+void knn_matchUnrolledCached(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
                              const oclMat &trainIdx, const oclMat &distance, int distType)
 {
     cv::ocl::Context *ctx = query.clCxt;
@@ -487,7 +486,7 @@ void knn_matchUnrolledCached(const oclMat &query, const oclMat &train, const ocl
     {
         args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
         args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data ));
-        args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data ));
+        //args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data ));
         args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
         args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data ));
         args.push_back( make_pair( smemSize, (void *)NULL));
@@ -507,7 +506,7 @@ void knn_matchUnrolledCached(const oclMat &query, const oclMat &train, const ocl
 }
 
 template < int BLOCK_SIZE,  typename T/*, typename Mask*/ >
-void knn_match(const oclMat &query, const oclMat &train, const oclMat &mask,
+void knn_match(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
                const oclMat &trainIdx, const oclMat &distance, int distType)
 {
     cv::ocl::Context *ctx = query.clCxt;
@@ -521,7 +520,7 @@ void knn_match(const oclMat &query, const oclMat &train, const oclMat &mask,
     {
         args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
         args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data ));
-        args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data ));
+        //args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data ));
         args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
         args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data ));
         args.push_back( make_pair( smemSize, (void *)NULL));
@@ -540,7 +539,7 @@ void knn_match(const oclMat &query, const oclMat &train, const oclMat &mask,
 }
 
 template < int BLOCK_SIZE, int MAX_DESC_LEN, typename T/*, typename Mask*/ >
-void calcDistanceUnrolled(const oclMat &query, const oclMat &train, const oclMat &mask, const oclMat &allDist, int distType)
+void calcDistanceUnrolled(const oclMat &query, const oclMat &train, const oclMat &/*mask*/, const oclMat &allDist, int distType)
 {
     cv::ocl::Context *ctx = query.clCxt;
     size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1};
@@ -554,7 +553,7 @@ void calcDistanceUnrolled(const oclMat &query, const oclMat &train, const oclMat
     {
         args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
         args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data ));
-        args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data ));
+        //args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data ));
         args.push_back( make_pair( sizeof(cl_mem), (void *)&allDist.data ));
         args.push_back( make_pair( smemSize, (void *)NULL));
         args.push_back( make_pair( sizeof(cl_int), (void *)&block_size ));
@@ -573,7 +572,7 @@ void calcDistanceUnrolled(const oclMat &query, const oclMat &train, const oclMat
 }
 
 template < int BLOCK_SIZE, typename T/*, typename Mask*/ >
-void calcDistance(const oclMat &query, const oclMat &train, const oclMat &mask, const oclMat &allDist, int distType)
+void calcDistance(const oclMat &query, const oclMat &train, const oclMat &/*mask*/, const oclMat &allDist, int distType)
 {
     cv::ocl::Context *ctx = query.clCxt;
     size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1};
@@ -586,7 +585,7 @@ void calcDistance(const oclMat &query, const oclMat &train, const oclMat &mask,
     {
         args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
         args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data ));
-        args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data ));
+        //args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data ));
         args.push_back( make_pair( sizeof(cl_mem), (void *)&allDist.data ));
         args.push_back( make_pair( smemSize, (void *)NULL));
         args.push_back( make_pair( sizeof(cl_int), (void *)&block_size ));
@@ -1007,6 +1006,7 @@ void cv::ocl::BruteForceMatcher_OCL_base::matchConvert(const Mat &trainIdx, cons
 
 void cv::ocl::BruteForceMatcher_OCL_base::match(const oclMat &query, const oclMat &train, vector<DMatch> &matches, const oclMat &mask)
 {
+       assert(mask.empty()); // mask is not supported at the moment
     oclMat trainIdx, distance;
     matchSingle(query, train, trainIdx, distance, mask);
     matchDownload(trainIdx, distance, matches);
@@ -1696,4 +1696,6 @@ void cv::ocl::BruteForceMatcher_OCL_base::radiusMatch(const oclMat &query, vecto
     oclMat trainIdx, imgIdx, distance, nMatches;
     radiusMatchCollection(query, trainIdx, imgIdx, distance, nMatches, maxDistance, masks);
     radiusMatchDownload(trainIdx, imgIdx, distance, nMatches, matches, compactResult);
-}
\ No newline at end of file
+}
+
+
index 300ae60..aab2a04 100644 (file)
@@ -54,6 +54,10 @@ void cv::ocl::dft(const oclMat&, oclMat&, Size, int)
 {
     CV_Error(CV_StsNotImplemented, "OpenCL DFT is not implemented");
 }
+namespace cv { namespace ocl {
+    void fft_teardown();
+}}
+void cv::ocl::fft_teardown(){}
 #else
 #include "clAmdFft.h"
 namespace cv
index 6697c95..e229fab 100644 (file)
@@ -195,7 +195,7 @@ public:
 
 namespace
 {
-typedef void (*GPUMorfFilter_t)(const oclMat & , oclMat & , oclMat & , Size &, const Point, bool rectKernel, bool usrROI);
+typedef void (*GPUMorfFilter_t)(const oclMat & , oclMat & , oclMat & , Size &, const Point, bool rectKernel);
 
 class MorphFilter_GPU : public BaseFilter_GPU
 {
@@ -205,7 +205,7 @@ public:
 
     virtual void operator()(const oclMat &src, oclMat &dst)
     {
-        func(src, dst, kernel, ksize, anchor, rectKernel, false) ;
+        func(src, dst, kernel, ksize, anchor, rectKernel) ;
     }
 
     oclMat kernel;
@@ -220,7 +220,7 @@ public:
 **Note that the kernel need to be further refined.
 */
 static void GPUErode(const oclMat &src, oclMat &dst, oclMat &mat_kernel, 
-                         Size &ksize, const Point anchor, bool rectKernel, bool useROI)
+                         Size &ksize, const Point anchor, bool rectKernel)
 {
     //Normalize the result by default
     //float alpha = ksize.height * ksize.width;
@@ -276,11 +276,10 @@ static void GPUErode(const oclMat &src, oclMat &dst, oclMat &mat_kernel,
     }
 
     char compile_option[128];
-    sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D ERODE %s %s %s", 
+    sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D ERODE %s %s", 
         anchor.x, anchor.y, (int)localThreads[0], (int)localThreads[1], 
-           rectKernel?"-D RECTKERNEL":"",
-           useROI?"-D USEROI":"",
-           s);
+        rectKernel?"-D RECTKERNEL":"",
+        s);
     vector< pair<size_t, const void *> > args;
     args.push_back(make_pair(sizeof(cl_mem), (void *)&src.data));
     args.push_back(make_pair(sizeof(cl_mem), (void *)&dst.data));
@@ -300,7 +299,7 @@ static void GPUErode(const oclMat &src, oclMat &dst, oclMat &mat_kernel,
 
 //! data type supported: CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4
 static void GPUDilate(const oclMat &src, oclMat &dst, oclMat &mat_kernel, 
-                          Size &ksize, const Point anchor, bool rectKernel, bool useROI)
+                          Size &ksize, const Point anchor, bool rectKernel)
 {
     //Normalize the result by default
     //float alpha = ksize.height * ksize.width;
@@ -357,10 +356,9 @@ static void GPUDilate(const oclMat &src, oclMat &dst, oclMat &mat_kernel,
     }
 
     char compile_option[128];
-    sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D DILATE %s %s %s", 
+    sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D DILATE %s %s", 
         anchor.x, anchor.y, (int)localThreads[0], (int)localThreads[1], 
         rectKernel?"-D RECTKERNEL":"",
-        useROI?"-D USEROI":"",
         s);
     vector< pair<size_t, const void *> > args;
     args.push_back(make_pair(sizeof(cl_mem), (void *)&src.data));
index 8fbada1..9b6cf74 100644 (file)
@@ -289,13 +289,14 @@ namespace cv
                 args.push_back( make_pair(sizeof(cl_int), (void *)&map1.cols));
                 args.push_back( make_pair(sizeof(cl_int), (void *)&map1.rows));
                 args.push_back( make_pair(sizeof(cl_int), (void *)&cols));
-                if(src.clCxt -> impl -> double_support != 0)
+                float borderFloat[4] = {(float)borderValue[0], (float)borderValue[1], (float)borderValue[2], (float)borderValue[3]};
+               if(src.clCxt -> impl -> double_support != 0)
                 {
                     args.push_back( make_pair(sizeof(cl_double4), (void *)&borderValue));
                 }
                 else
                 {
-                    float borderFloat[4] = {(float)borderValue[0], (float)borderValue[1], (float)borderValue[2], (float)borderValue[3]};
                     args.push_back( make_pair(sizeof(cl_float4), (void *)&borderFloat));
                 }
             }
index c90b62b..5930562 100644 (file)
@@ -58,13 +58,16 @@ using std::endl;
 
 //#define PRINT_KERNEL_RUN_TIME
 #define RUN_TIMES 100
-
+#ifndef CL_MEM_USE_PERSISTENT_MEM_AMD
+#define CL_MEM_USE_PERSISTENT_MEM_AMD 0
+#endif
 //#define AMD_DOUBLE_DIFFER
 
 namespace cv
 {
     namespace ocl
     {
+        extern void fft_teardown();
         /*
          * The binary caching system to eliminate redundant program source compilation.
          * Strictly, this is not a cache because we do not implement evictions right now.
@@ -72,6 +75,15 @@ namespace cv
          */
         auto_ptr<ProgramCache> ProgramCache::programCache;
         ProgramCache *programCache = NULL;
+        DevMemType gDeviceMemType = DEVICE_MEM_DEFAULT;
+        DevMemRW gDeviceMemRW = DEVICE_MEM_R_W;
+        int gDevMemTypeValueMap[5] = {0, 
+                                      CL_MEM_ALLOC_HOST_PTR,
+                                      CL_MEM_USE_HOST_PTR,
+                                      CL_MEM_COPY_HOST_PTR,
+                                      CL_MEM_USE_PERSISTENT_MEM_AMD};
+        int gDevMemRWValueMap[3] = {CL_MEM_READ_WRITE, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY};
+
         ProgramCache::ProgramCache()
         {
             codeCache.clear();
@@ -113,30 +125,25 @@ namespace cv
         }
 
         ////////////////////////Common OpenCL specific calls///////////////
-        //Info::Info()
-        //{
-        //     oclplatform = 0;
-        //     oclcontext = 0;
-        //     devnum = 0;
-        //}
-        //Info::~Info()
-        //{
-        //     release();
-        //}
-        //void Info::release()
-        //{
-        //     if(oclplatform)
-        //     {
-        //             oclplatform = 0;
-        //     }
-        //     if(oclcontext)
-        //     {
-        //             openCLSafeCall(clReleaseContext(oclcontext));
-        //     }
-        //     devices.empty();
-        //     devName.empty();
-        //}
-        struct Info::Impl
+        int getDevMemType(DevMemRW& rw_type, DevMemType& mem_type)
+        { 
+            rw_type = gDeviceMemRW; 
+            mem_type = gDeviceMemType; 
+            return Context::getContext()->impl->unified_memory;
+        }
+
+        int setDevMemType(DevMemRW rw_type, DevMemType mem_type)
+        { 
+            if( (mem_type == DEVICE_MEM_PM && Context::getContext()->impl->unified_memory == 0) ||
+                 mem_type == DEVICE_MEM_UHP ||
+                 mem_type == DEVICE_MEM_CHP )
+                return -1;
+            gDeviceMemRW = rw_type;
+            gDeviceMemType = mem_type;
+            return 0; 
+        }
+       struct Info::Impl
         {
             cl_platform_id oclplatform;
             std::vector<cl_device_id> devices;
@@ -290,11 +297,8 @@ namespace cv
          }
 
         void *getoclContext()
-
         {
-
             return &(Context::getContext()->impl->clContext);
-
         }
 
         void *getoclCommandQueue()
@@ -320,9 +324,15 @@ namespace cv
         void openCLMallocPitch(Context *clCxt, void **dev_ptr, size_t *pitch,
                                size_t widthInBytes, size_t height)
         {
+            openCLMallocPitchEx(clCxt, dev_ptr, pitch, widthInBytes, height, gDeviceMemRW, gDeviceMemType);
+        }
+
+        void openCLMallocPitchEx(Context *clCxt, void **dev_ptr, size_t *pitch,
+                               size_t widthInBytes, size_t height, DevMemRW rw_type, DevMemType mem_type)
+        {
             cl_int status;
 
-            *dev_ptr = clCreateBuffer(clCxt->impl->clContext, CL_MEM_READ_WRITE,
+            *dev_ptr = clCreateBuffer(clCxt->impl->clContext, gDevMemRWValueMap[rw_type]|gDevMemTypeValueMap[mem_type],
                                       widthInBytes * height, 0, &status);
             openCLVerifyCall(status);
             *pitch = widthInBytes;
@@ -837,6 +847,11 @@ namespace cv
             clcxt->impl->double_support = oclinfo.impl->double_support;
             //extra options to recognize compiler options
             memcpy(clcxt->impl->extra_options, oclinfo.impl->extra_options, 512);
+            cl_bool unfymem = false;
+            openCLSafeCall(clGetDeviceInfo(clcxt->impl->devices, CL_DEVICE_HOST_UNIFIED_MEMORY,
+                                           sizeof(cl_bool), (void *)&unfymem, NULL));
+            if(unfymem)
+                clcxt->impl->unified_memory = 1;
         }
         Context::Context()
         {
@@ -853,6 +868,7 @@ namespace cv
             impl->double_support = 0;
             //extra options to recognize vendor specific fp64 extensions
             memset(impl->extra_options, 0, 512);
+            impl->unified_memory = 0; 
             programCache = ProgramCache::getProgramCache();
         }
 
@@ -877,6 +893,7 @@ namespace cv
         }
         void Info::release()
         {
+            fft_teardown();
             if(impl->oclplatform)
             {
                 impl->oclplatform = 0;
index 6b9f53b..db228f5 100644 (file)
@@ -45,6 +45,7 @@
 
 #include <iomanip>
 #include "precomp.hpp"
+#include "mcwutil.hpp"
 
 using namespace std;
 using namespace cv;
@@ -231,73 +232,10 @@ void interpolate::blendFrames(const oclMat &frame0, const oclMat &/*frame1*/, co
 
 void interpolate::bindImgTex(const oclMat &img, cl_mem &texture)
 {
-    cl_image_format format;
-    int err;
-    int depth    = img.depth();
-    int channels = img.channels();
-
-    switch(depth)
-    {
-    case CV_8U:
-        format.image_channel_data_type = CL_UNSIGNED_INT8;
-        break;
-    case CV_32S:
-        format.image_channel_data_type = CL_UNSIGNED_INT32;
-        break;
-    case CV_32F:
-        format.image_channel_data_type = CL_FLOAT;
-        break;
-    default:
-        throw std::exception();
-        break;
-    }
-    switch(channels)
-    {
-    case 1:
-        format.image_channel_order     = CL_R;
-        break;
-    case 3:
-        format.image_channel_order     = CL_RGB;
-        break;
-    case 4:
-        format.image_channel_order     = CL_RGBA;
-        break;
-    default:
-        throw std::exception();
-        break;
-    }
     if(texture)
     {
         openCLFree(texture);
     }
-
-#ifdef CL_VERSION_1_2
-    cl_image_desc desc;
-    desc.image_type       = CL_MEM_OBJECT_IMAGE2D;
-    desc.image_width      = img.step / img.elemSize();
-    desc.image_height     = img.rows;
-    desc.image_depth      = 0;
-    desc.image_array_size = 1;
-    desc.image_row_pitch  = 0;
-    desc.image_slice_pitch = 0;
-    desc.buffer           = NULL;
-    desc.num_mip_levels   = 0;
-    desc.num_samples      = 0;
-    texture = clCreateImage(Context::getContext()->impl->clContext, CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
-#else
-    texture = clCreateImage2D(
-                  Context::getContext()->impl->clContext,
-                  CL_MEM_READ_WRITE,
-                  &format,
-                  img.step / img.elemSize(),
-                  img.rows,
-                  0,
-                  NULL,
-                  &err);
-#endif
-    size_t origin[] = { 0, 0, 0 };
-    size_t region[] = { img.step / img.elemSize(), img.rows, 1 };
-    clEnqueueCopyBufferToImage(img.clCxt->impl->clCmdQueue, (cl_mem)img.data, texture, 0, origin, region, 0, NULL, 0);
-    openCLSafeCall(err);
+       texture = bindTexture(img);
 }
 
index a5d0d7b..e5dd29e 100644 (file)
@@ -3,14 +3,16 @@
 
 int bit1Count(float x)
 {
-    int c = 0;
-    int ix = (int)x;
-    for (int i = 0 ; i < 32 ; i++)
-    {
-        c += ix & 0x1;
-        ix >>= 1;
-    }
-    return (float)c;
+       int c = 0;
+       int ix = (int)x;
+       
+       for (int i = 0 ; i < 32 ; i++)
+       {
+               c += ix & 0x1;
+               ix >>= 1;
+       }
+       
+       return (float)c;
 }
 /* 2dim launch, global size: dim0 is (query rows + block_size - 1) / block_size * block_size, dim1 is block_size
 local size: dim0 is block_size, dim1 is block_size.
@@ -18,7 +20,7 @@ local size: dim0 is block_size, dim1 is block_size.
 __kernel void BruteForceMatch_UnrollMatch(
     __global float *query,
     __global float *train,
-    __global float *mask,
+    //__global float *mask,
     __global int *bestTrainIdx,
     __global float *bestDistance,
     __local float *sharebuffer,
@@ -30,113 +32,122 @@ __kernel void BruteForceMatch_UnrollMatch(
     int train_cols,
     int step,
     int distType
-    )
+)
 {
-    const int lidx = get_local_id(0);
-    const int lidy = get_local_id(1);
-    const int groupidx = get_group_id(0);
-
-    __local float *s_query = sharebuffer;
-    __local float *s_train = sharebuffer + block_size * max_desc_len;
-
-    int queryIdx = groupidx * block_size + lidy;
-    // load the query into local memory.
-    for (int i = 0 ;  i <  max_desc_len / block_size; i ++)
-    {
-        int loadx = lidx + i * block_size;
-        s_query[lidy * max_desc_len + loadx] = loadx < query_cols ? query[min(queryIdx, query_rows - 1)  * (step / sizeof(float)) + loadx] : 0;
-    }
-
-    float myBestDistance = MAX_FLOAT;
-    int myBestTrainIdx = -1;
-
-    // loopUnrolledCached to find the best trainIdx and best distance.
-    volatile int imgIdx = 0;
-    for (int t = 0 ; t < (train_rows + block_size - 1) / block_size ; t++)
-    {
-        float result = 0;
-        for (int i = 0 ; i < max_desc_len / block_size ; i++)
-        {
-            //load a block_size * block_size block into local train.
-            const int loadx = lidx + i * block_size;
-            s_train[lidx * block_size + lidy] = loadx < train_cols ? train[min(t * block_size + lidy, train_rows - 1) * (step / sizeof(float)) + loadx] : 0;
-
-            //synchronize to make sure each elem for reduceIteration in share memory is written already.
-            barrier(CLK_LOCAL_MEM_FENCE);
-
-            /* there are threee types in the reducer. the first is L1Dist, which to sum the abs(v1, v2), the second is L2Dist, which to
-            sum the (v1 - v2) * (v1 - v2), the third is humming, which to popc(v1 ^ v2), popc is to count the bits are set to 1*/
-
-            switch(distType)
-            {
-            case 0:
-                for (int j = 0 ; j < block_size ; j++)
-                {
-                    result += fabs(s_query[lidy * max_desc_len + i * block_size + j] -  s_train[j * block_size + lidx]);
-                }
-                break;
-            case 1:
-                for (int j = 0 ; j < block_size ; j++)
-                {
-                    float qr = s_query[lidy * max_desc_len + i * block_size + j] -  s_train[j * block_size + lidx];
-                    result += qr * qr;
-                }
-                break;
-            case 2:
-                for (int j = 0 ; j < block_size ; j++)
-                {
-                    //result += popcount((uint)s_query[lidy * max_desc_len + i * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
-                    result += bit1Count((uint)s_query[lidy * max_desc_len + i * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
-                }
-                break;
-            }
-
-            barrier(CLK_LOCAL_MEM_FENCE);
-        }
-
-        int trainIdx = t * block_size + lidx;
-
-        if (queryIdx < query_rows && trainIdx < train_rows && result < myBestDistance/* && mask(queryIdx, trainIdx)*/)
-        {
-            //bestImgIdx = imgIdx;
-            myBestDistance = result;
-            myBestTrainIdx = trainIdx;
-        }
-    }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-    __local float *s_distance = (__local float*)(sharebuffer);
-    __local int* s_trainIdx = (__local int *)(sharebuffer + block_size * block_size);
-
-    //find BestMatch
-    s_distance += lidy * block_size;
-    s_trainIdx += lidy * block_size;
-    s_distance[lidx] = myBestDistance;
-    s_trainIdx[lidx] = myBestTrainIdx;
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    //reduce -- now all reduce implement in each threads.
-    for (int k = 0 ; k < block_size; k++)
-    {
-        if (myBestDistance > s_distance[k])
-        {
-            myBestDistance = s_distance[k];
-            myBestTrainIdx = s_trainIdx[k];
-        }
-    }
-
-    if (queryIdx < query_rows && lidx == 0)
-    {
-        bestTrainIdx[queryIdx] = myBestTrainIdx;
-        bestDistance[queryIdx] = myBestDistance;
-    }
+       const int lidx = get_local_id(0);
+       const int lidy = get_local_id(1);
+       const int groupidx = get_group_id(0);
+       
+       __local float *s_query = sharebuffer;
+       __local float *s_train = sharebuffer + block_size * max_desc_len;
+       
+       int queryIdx = groupidx * block_size + lidy;
+       
+       // load the query into local memory.
+       for (int i = 0 ;  i <  max_desc_len / block_size; i ++)
+       {
+               int loadx = lidx + i * block_size;
+               s_query[lidy * max_desc_len + loadx] = loadx < query_cols ? query[min(queryIdx, query_rows - 1)  * (step / sizeof(float)) + loadx] : 0;
+       }
+       
+       float myBestDistance = MAX_FLOAT;
+       int myBestTrainIdx = -1;
+       
+       // loopUnrolledCached to find the best trainIdx and best distance.
+       volatile int imgIdx = 0;
+       
+       for (int t = 0 ; t < (train_rows + block_size - 1) / block_size ; t++)
+       {
+               float result = 0;
+               
+               for (int i = 0 ; i < max_desc_len / block_size ; i++)
+               {
+                       //load a block_size * block_size block into local train.
+                       const int loadx = lidx + i * block_size;
+                       s_train[lidx * block_size + lidy] = loadx < train_cols ? train[min(t * block_size + lidy, train_rows - 1) * (step / sizeof(float)) + loadx] : 0;
+                       
+                       //synchronize to make sure each elem for reduceIteration in share memory is written already.
+                       barrier(CLK_LOCAL_MEM_FENCE);
+                       
+                       /* there are threee types in the reducer. the first is L1Dist, which to sum the abs(v1, v2), the second is L2Dist, which to
+                       sum the (v1 - v2) * (v1 - v2), the third is humming, which to popc(v1 ^ v2), popc is to count the bits are set to 1*/
+                       
+                       switch (distType)
+                       {
+                               case 0:
+                               
+                                       for (int j = 0 ; j < block_size ; j++)
+                                       {
+                                               result += fabs(s_query[lidy * max_desc_len + i * block_size + j] -  s_train[j * block_size + lidx]);
+                                       }
+                                       
+                                       break;
+                               case 1:
+                               
+                                       for (int j = 0 ; j < block_size ; j++)
+                                       {
+                                               float qr = s_query[lidy * max_desc_len + i * block_size + j] -  s_train[j * block_size + lidx];
+                                               result += qr * qr;
+                                       }
+                                       
+                                       break;
+                               case 2:
+                               
+                                       for (int j = 0 ; j < block_size ; j++)
+                                       {
+                                               //result += popcount((uint)s_query[lidy * max_desc_len + i * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
+                                               result += bit1Count((uint)s_query[lidy * max_desc_len + i * block_size + j] ^(uint)s_train[j * block_size + lidx]);
+                                       }
+                                       
+                                       break;
+                       }
+                       
+                       barrier(CLK_LOCAL_MEM_FENCE);
+               }
+               
+               int trainIdx = t * block_size + lidx;
+               
+               if (queryIdx < query_rows && trainIdx < train_rows && result < myBestDistance/* && mask(queryIdx, trainIdx)*/)
+               {
+                       //bestImgIdx = imgIdx;
+                       myBestDistance = result;
+                       myBestTrainIdx = trainIdx;
+               }
+       }
+       
+       barrier(CLK_LOCAL_MEM_FENCE);
+       __local float *s_distance = (__local float *)(sharebuffer);
+       __local int *s_trainIdx = (__local int *)(sharebuffer + block_size * block_size);
+       
+       //find BestMatch
+       s_distance += lidy * block_size;
+       s_trainIdx += lidy * block_size;
+       s_distance[lidx] = myBestDistance;
+       s_trainIdx[lidx] = myBestTrainIdx;
+       
+       barrier(CLK_LOCAL_MEM_FENCE);
+       
+       //reduce -- now all reduce implement in each threads.
+       for (int k = 0 ; k < block_size; k++)
+       {
+               if (myBestDistance > s_distance[k])
+               {
+                       myBestDistance = s_distance[k];
+                       myBestTrainIdx = s_trainIdx[k];
+               }
+       }
+       
+       if (queryIdx < query_rows && lidx == 0)
+       {
+               bestTrainIdx[queryIdx] = myBestTrainIdx;
+               bestDistance[queryIdx] = myBestDistance;
+       }
 }
 
 __kernel void BruteForceMatch_Match(
     __global float *query,
     __global float *train,
-    __global float *mask,
+    //__global float *mask,
     __global int *bestTrainIdx,
     __global float *bestDistance,
     __local float *sharebuffer,
@@ -147,108 +158,115 @@ __kernel void BruteForceMatch_Match(
     int train_cols,
     int step,
     int distType
-    )
+)
 {
-    const int lidx = get_local_id(0);
-    const int lidy = get_local_id(1);
-    const int groupidx = get_group_id(0);
-
-    const int queryIdx = groupidx * block_size + lidy;
-
-    float myBestDistance = MAX_FLOAT;
-    int myBestTrainIdx = -1;
-
-    __local float *s_query = sharebuffer;
-    __local float *s_train = sharebuffer + block_size * block_size;
-
-    // loop
-    for (int t = 0 ;  t < (train_rows + block_size - 1) / block_size ; t++)
-    {
-        //Dist dist;
-        float result = 0;
-        for (int i = 0 ; i < (query_cols + block_size - 1) / block_size ; i++)
-        {
-            const int loadx = lidx + i * block_size;
-            //load query and train into local memory
-            s_query[lidy * block_size + lidx] = 0;
-            s_train[lidx * block_size + lidy] = 0;
-
-            if (loadx < query_cols)
-            {
-                s_query[lidy * block_size + lidx] = query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx];
-                s_train[lidx * block_size + lidy] = train[min(t * block_size + lidy, train_rows - 1) * (step / sizeof(float)) + loadx];
-            }
-
-            barrier(CLK_LOCAL_MEM_FENCE);
-
-            /* there are threee types in the reducer. the first is L1Dist, which to sum the abs(v1, v2), the second is L2Dist, which to
-            sum the (v1 - v2) * (v1 - v2), the third is humming, which to popc(v1 ^ v2), popc is to count the bits are set to 1*/
-
-            switch(distType)
-            {
-            case 0:
-                for (int j = 0 ; j < block_size ; j++)
-                {
-                    result += fabs(s_query[lidy * block_size + j] -  s_train[j * block_size + lidx]);
-                }
-                break;
-            case 1:
-                for (int j = 0 ; j < block_size ; j++)
-                {
-                    float qr = s_query[lidy * block_size + j] -  s_train[j * block_size + lidx];
-                    result += qr * qr;
-                }
-                break;
-            case 2:
-                for (int j = 0 ; j < block_size ; j++)
-                {
-                    //result += popcount((uint)s_query[lidy * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
-                    result += bit1Count((uint)s_query[lidy * block_size + j] ^ (uint)s_train[(uint)j * block_size + lidx]);
-                }
-                break;
-            }
-
-            barrier(CLK_LOCAL_MEM_FENCE);
-        }
-
-        const int trainIdx = t * block_size + lidx;
-
-        if (queryIdx < query_rows && trainIdx < train_rows && result < myBestDistance /*&& mask(queryIdx, trainIdx)*/)
-        {
-            //myBestImgidx = imgIdx;
-            myBestDistance = result;
-            myBestTrainIdx = trainIdx;
-        }
-    }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    __local float *s_distance = (__local float *)sharebuffer;
-    __local int *s_trainIdx = (__local int *)(sharebuffer + block_size * block_size);
-
-    //findBestMatch
-    s_distance += lidy * block_size;
-    s_trainIdx += lidy * block_size;
-    s_distance[lidx] = myBestDistance;
-    s_trainIdx[lidx] = myBestTrainIdx;
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    //reduce -- now all reduce implement in each threads.
-    for (int k = 0 ; k < block_size; k++)
-    {
-        if (myBestDistance > s_distance[k])
-        {
-            myBestDistance = s_distance[k];
-            myBestTrainIdx = s_trainIdx[k];
-        }
-    }
-
-    if (queryIdx < query_rows && lidx == 0)
-    {
-        bestTrainIdx[queryIdx] = myBestTrainIdx;
-        bestDistance[queryIdx] = myBestDistance;
-    }
+       const int lidx = get_local_id(0);
+       const int lidy = get_local_id(1);
+       const int groupidx = get_group_id(0);
+       
+       const int queryIdx = groupidx * block_size + lidy;
+       
+       float myBestDistance = MAX_FLOAT;
+       int myBestTrainIdx = -1;
+       
+       __local float *s_query = sharebuffer;
+       __local float *s_train = sharebuffer + block_size * block_size;
+       
+       // loop
+       for (int t = 0 ;  t < (train_rows + block_size - 1) / block_size ; t++)
+       {
+               //Dist dist;
+               float result = 0;
+               
+               for (int i = 0 ; i < (query_cols + block_size - 1) / block_size ; i++)
+               {
+                       const int loadx = lidx + i * block_size;
+                       //load query and train into local memory
+                       s_query[lidy * block_size + lidx] = 0;
+                       s_train[lidx * block_size + lidy] = 0;
+                       
+                       if (loadx < query_cols)
+                       {
+                               s_query[lidy * block_size + lidx] = query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx];
+                               s_train[lidx * block_size + lidy] = train[min(t * block_size + lidy, train_rows - 1) * (step / sizeof(float)) + loadx];
+                       }
+                       
+                       barrier(CLK_LOCAL_MEM_FENCE);
+                       
+                       /* there are threee types in the reducer. the first is L1Dist, which to sum the abs(v1, v2), the second is L2Dist, which to
+                       sum the (v1 - v2) * (v1 - v2), the third is humming, which to popc(v1 ^ v2), popc is to count the bits are set to 1*/
+                       
+                       switch (distType)
+                       {
+                               case 0:
+                               
+                                       for (int j = 0 ; j < block_size ; j++)
+                                       {
+                                               result += fabs(s_query[lidy * block_size + j] -  s_train[j * block_size + lidx]);
+                                       }
+                                       
+                                       break;
+                               case 1:
+                               
+                                       for (int j = 0 ; j < block_size ; j++)
+                                       {
+                                               float qr = s_query[lidy * block_size + j] -  s_train[j * block_size + lidx];
+                                               result += qr * qr;
+                                       }
+                                       
+                                       break;
+                               case 2:
+                               
+                                       for (int j = 0 ; j < block_size ; j++)
+                                       {
+                                               //result += popcount((uint)s_query[lidy * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
+                                               result += bit1Count((uint)s_query[lidy * block_size + j] ^(uint)s_train[(uint)j * block_size + lidx]);
+                                       }
+                                       
+                                       break;
+                       }
+                       
+                       barrier(CLK_LOCAL_MEM_FENCE);
+               }
+               
+               const int trainIdx = t * block_size + lidx;
+               
+               if (queryIdx < query_rows && trainIdx < train_rows && result < myBestDistance /*&& mask(queryIdx, trainIdx)*/)
+               {
+                       //myBestImgidx = imgIdx;
+                       myBestDistance = result;
+                       myBestTrainIdx = trainIdx;
+               }
+       }
+       
+       barrier(CLK_LOCAL_MEM_FENCE);
+       
+       __local float *s_distance = (__local float *)sharebuffer;
+       __local int *s_trainIdx = (__local int *)(sharebuffer + block_size * block_size);
+       
+       //findBestMatch
+       s_distance += lidy * block_size;
+       s_trainIdx += lidy * block_size;
+       s_distance[lidx] = myBestDistance;
+       s_trainIdx[lidx] = myBestTrainIdx;
+       
+       barrier(CLK_LOCAL_MEM_FENCE);
+       
+       //reduce -- now all reduce implement in each threads.
+       for (int k = 0 ; k < block_size; k++)
+       {
+               if (myBestDistance > s_distance[k])
+               {
+                       myBestDistance = s_distance[k];
+                       myBestTrainIdx = s_trainIdx[k];
+               }
+       }
+       
+       if (queryIdx < query_rows && lidx == 0)
+       {
+               bestTrainIdx[queryIdx] = myBestTrainIdx;
+               bestDistance[queryIdx] = myBestDistance;
+       }
 }
 
 //radius_unrollmatch
@@ -256,7 +274,7 @@ __kernel void BruteForceMatch_RadiusUnrollMatch(
     __global float *query,
     __global float *train,
     float maxDistance,
-    __global float *mask,
+    //__global float *mask,
     __global int *bestTrainIdx,
     __global float *bestDistance,
     __global int *nMatches,
@@ -271,71 +289,78 @@ __kernel void BruteForceMatch_RadiusUnrollMatch(
     int step,
     int ostep,
     int distType
-    )
+)
 {
-    const int lidx = get_local_id(0);
-    const int lidy = get_local_id(1);
-    const int groupidx = get_group_id(0);
-    const int groupidy = get_group_id(1);
-
-    const int queryIdx = groupidy * block_size + lidy;
-    const int trainIdx = groupidx * block_size + lidx;
-
-    __local float *s_query = sharebuffer;
-    __local float *s_train = sharebuffer + block_size * block_size;
-
-    float result = 0;
-    for (int i = 0 ; i < max_desc_len / block_size ; ++i)
-    {
-        //load a block_size * block_size block into local train.
-        const int loadx = lidx + i * block_size;
-
-        s_query[lidy * block_size + lidx] = loadx < query_cols ? query[min(queryIdx, query_rows - 1)  * (step / sizeof(float)) + loadx] : 0;
-        s_train[lidx * block_size + lidy] = loadx < query_cols ? train[min(groupidx * block_size + lidy, train_rows - 1)  * (step / sizeof(float)) + loadx] : 0;
-
-        //synchronize to make sure each elem for reduceIteration in share memory is written already.
-        barrier(CLK_LOCAL_MEM_FENCE);
-
-        /* there are three types in the reducer. the first is L1Dist, which to sum the abs(v1, v2), the second is L2Dist, which to
-        sum the (v1 - v2) * (v1 - v2), the third is humming, which to popc(v1 ^ v2), popc is to count the bits are set to 1*/
-
-        switch(distType)
-        {
-        case 0:
-            for (int j = 0 ; j < block_size ; ++j)
-            {
-                result += fabs(s_query[lidy * block_size + j] - s_train[j * block_size + lidx]);
-            }
-            break;
-        case 1:
-            for (int j = 0 ; j < block_size ; ++j)
-            {
-                float qr = s_query[lidy * block_size + j] - s_train[j * block_size + lidx];
-                result += qr * qr;
-            }
-            break;
-        case 2:
-            for (int j = 0 ; j < block_size ; ++j)
-            {
-                result += bit1Count((uint)s_query[lidy * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
-            }
-            break;
-        }
-
-        barrier(CLK_LOCAL_MEM_FENCE);
-    }
-
-    if (queryIdx < query_rows && trainIdx < train_rows && result < maxDistance/* && mask(queryIdx, trainIdx)*/)
-    {
-        unsigned int ind = atom_inc(nMatches + queryIdx/*, (unsigned int) -1*/);
-
-        if(ind < bestTrainIdx_cols)
-        {
-            //bestImgIdx = imgIdx;
-            bestTrainIdx[queryIdx * (ostep / sizeof(int)) + ind] = trainIdx;
-            bestDistance[queryIdx * (ostep / sizeof(float)) + ind] = result;
-        }
-    }
+       const int lidx = get_local_id(0);
+       const int lidy = get_local_id(1);
+       const int groupidx = get_group_id(0);
+       const int groupidy = get_group_id(1);
+       
+       const int queryIdx = groupidy * block_size + lidy;
+       const int trainIdx = groupidx * block_size + lidx;
+       
+       __local float *s_query = sharebuffer;
+       __local float *s_train = sharebuffer + block_size * block_size;
+       
+       float result = 0;
+       
+       for (int i = 0 ; i < max_desc_len / block_size ; ++i)
+       {
+               //load a block_size * block_size block into local train.
+               const int loadx = lidx + i * block_size;
+               
+               s_query[lidy * block_size + lidx] = loadx < query_cols ? query[min(queryIdx, query_rows - 1)  * (step / sizeof(float)) + loadx] : 0;
+               s_train[lidx * block_size + lidy] = loadx < query_cols ? train[min(groupidx * block_size + lidy, train_rows - 1)  * (step / sizeof(float)) + loadx] : 0;
+               
+               //synchronize to make sure each elem for reduceIteration in share memory is written already.
+               barrier(CLK_LOCAL_MEM_FENCE);
+               
+               /* there are three types in the reducer. the first is L1Dist, which to sum the abs(v1, v2), the second is L2Dist, which to
+               sum the (v1 - v2) * (v1 - v2), the third is humming, which to popc(v1 ^ v2), popc is to count the bits are set to 1*/
+               
+               switch (distType)
+               {
+                       case 0:
+                       
+                               for (int j = 0 ; j < block_size ; ++j)
+                               {
+                                       result += fabs(s_query[lidy * block_size + j] - s_train[j * block_size + lidx]);
+                               }
+                               
+                               break;
+                       case 1:
+                       
+                               for (int j = 0 ; j < block_size ; ++j)
+                               {
+                                       float qr = s_query[lidy * block_size + j] - s_train[j * block_size + lidx];
+                                       result += qr * qr;
+                               }
+                               
+                               break;
+                       case 2:
+                       
+                               for (int j = 0 ; j < block_size ; ++j)
+                               {
+                                       result += bit1Count((uint)s_query[lidy * block_size + j] ^(uint)s_train[j * block_size + lidx]);
+                               }
+                               
+                               break;
+               }
+               
+               barrier(CLK_LOCAL_MEM_FENCE);
+       }
+       
+       if (queryIdx < query_rows && trainIdx < train_rows && result < maxDistance/* && mask(queryIdx, trainIdx)*/)
+       {
+               unsigned int ind = atom_inc(nMatches + queryIdx/*, (unsigned int) -1*/);
+               
+               if (ind < bestTrainIdx_cols)
+               {
+                       //bestImgIdx = imgIdx;
+                       bestTrainIdx[queryIdx * (ostep / sizeof(int)) + ind] = trainIdx;
+                       bestDistance[queryIdx * (ostep / sizeof(float)) + ind] = result;
+               }
+       }
 }
 
 //radius_match
@@ -343,7 +368,7 @@ __kernel void BruteForceMatch_RadiusMatch(
     __global float *query,
     __global float *train,
     float maxDistance,
-    __global float *mask,
+    //__global float *mask,
     __global int *bestTrainIdx,
     __global float *bestDistance,
     __global int *nMatches,
@@ -357,78 +382,85 @@ __kernel void BruteForceMatch_RadiusMatch(
     int step,
     int ostep,
     int distType
-    )
+)
 {
-    const int lidx = get_local_id(0);
-    const int lidy = get_local_id(1);
-    const int groupidx = get_group_id(0);
-    const int groupidy = get_group_id(1);
-
-    const int queryIdx = groupidy * block_size + lidy;
-    const int trainIdx = groupidx * block_size + lidx;
-
-    __local float *s_query = sharebuffer;
-    __local float *s_train = sharebuffer + block_size * block_size;
-
-    float result = 0;
-    for (int i = 0 ; i < (query_cols + block_size - 1) / block_size ; ++i)
-    {
-        //load a block_size * block_size block into local train.
-        const int loadx = lidx + i * block_size;
-
-        s_query[lidy * block_size + lidx] = loadx < query_cols ? query[min(queryIdx, query_rows - 1)  * (step / sizeof(float)) + loadx] : 0;
-        s_train[lidx * block_size + lidy] = loadx < query_cols ? train[min(groupidx * block_size + lidy, train_rows - 1)  * (step / sizeof(float)) + loadx] : 0;
-
-        //synchronize to make sure each elem for reduceIteration in share memory is written already.
-        barrier(CLK_LOCAL_MEM_FENCE);
-
-        /* there are three types in the reducer. the first is L1Dist, which to sum the abs(v1, v2), the second is L2Dist, which to
-        sum the (v1 - v2) * (v1 - v2), the third is humming, which to popc(v1 ^ v2), popc is to count the bits are set to 1*/
-
-        switch(distType)
-        {
-        case 0:
-            for (int j = 0 ; j < block_size ; ++j)
-            {
-                result += fabs(s_query[lidy * block_size + j] - s_train[j * block_size + lidx]);
-            }
-            break;
-        case 1:
-            for (int j = 0 ; j < block_size ; ++j)
-            {
-                float qr = s_query[lidy * block_size + j] - s_train[j * block_size + lidx];
-                result += qr * qr;
-            }
-            break;
-        case 2:
-            for (int j = 0 ; j < block_size ; ++j)
-            {
-                result += bit1Count((uint)s_query[lidy * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
-            }
-            break;
-        }
-
-        barrier(CLK_LOCAL_MEM_FENCE);
-    }
-
-    if (queryIdx < query_rows && trainIdx < train_rows && result < maxDistance/* && mask(queryIdx, trainIdx)*/)
-    {
-        unsigned int ind = atom_inc(nMatches + queryIdx/*, (unsigned int) -1*/);
-
-        if(ind < bestTrainIdx_cols)
-        {
-            //bestImgIdx = imgIdx;
-            bestTrainIdx[queryIdx * (ostep / sizeof(int)) + ind] = trainIdx;
-            bestDistance[queryIdx * (ostep / sizeof(float)) + ind] = result;
-        }
-    }
+       const int lidx = get_local_id(0);
+       const int lidy = get_local_id(1);
+       const int groupidx = get_group_id(0);
+       const int groupidy = get_group_id(1);
+       
+       const int queryIdx = groupidy * block_size + lidy;
+       const int trainIdx = groupidx * block_size + lidx;
+       
+       __local float *s_query = sharebuffer;
+       __local float *s_train = sharebuffer + block_size * block_size;
+       
+       float result = 0;
+       
+       for (int i = 0 ; i < (query_cols + block_size - 1) / block_size ; ++i)
+       {
+               //load a block_size * block_size block into local train.
+               const int loadx = lidx + i * block_size;
+               
+               s_query[lidy * block_size + lidx] = loadx < query_cols ? query[min(queryIdx, query_rows - 1)  * (step / sizeof(float)) + loadx] : 0;
+               s_train[lidx * block_size + lidy] = loadx < query_cols ? train[min(groupidx * block_size + lidy, train_rows - 1)  * (step / sizeof(float)) + loadx] : 0;
+               
+               //synchronize to make sure each elem for reduceIteration in share memory is written already.
+               barrier(CLK_LOCAL_MEM_FENCE);
+               
+               /* there are three types in the reducer. the first is L1Dist, which to sum the abs(v1, v2), the second is L2Dist, which to
+               sum the (v1 - v2) * (v1 - v2), the third is humming, which to popc(v1 ^ v2), popc is to count the bits are set to 1*/
+               
+               switch (distType)
+               {
+                       case 0:
+                       
+                               for (int j = 0 ; j < block_size ; ++j)
+                               {
+                                       result += fabs(s_query[lidy * block_size + j] - s_train[j * block_size + lidx]);
+                               }
+                               
+                               break;
+                       case 1:
+                       
+                               for (int j = 0 ; j < block_size ; ++j)
+                               {
+                                       float qr = s_query[lidy * block_size + j] - s_train[j * block_size + lidx];
+                                       result += qr * qr;
+                               }
+                               
+                               break;
+                       case 2:
+                       
+                               for (int j = 0 ; j < block_size ; ++j)
+                               {
+                                       result += bit1Count((uint)s_query[lidy * block_size + j] ^(uint)s_train[j * block_size + lidx]);
+                               }
+                               
+                               break;
+               }
+               
+               barrier(CLK_LOCAL_MEM_FENCE);
+       }
+       
+       if (queryIdx < query_rows && trainIdx < train_rows && result < maxDistance/* && mask(queryIdx, trainIdx)*/)
+       {
+               unsigned int ind = atom_inc(nMatches + queryIdx/*, (unsigned int) -1*/);
+               
+               if (ind < bestTrainIdx_cols)
+               {
+                       //bestImgIdx = imgIdx;
+                       bestTrainIdx[queryIdx * (ostep / sizeof(int)) + ind] = trainIdx;
+                       bestDistance[queryIdx * (ostep / sizeof(float)) + ind] = result;
+               }
+       }
 }
 
 
 __kernel void BruteForceMatch_knnUnrollMatch(
     __global float *query,
     __global float *train,
-    __global float *mask,
+    //__global float *mask,
     __global int2 *bestTrainIdx,
     __global float2 *bestDistance,
     __local float *sharebuffer,
@@ -440,169 +472,178 @@ __kernel void BruteForceMatch_knnUnrollMatch(
     int train_cols,
     int step,
     int distType
-    )
+)
 {
-    const int lidx = get_local_id(0);
-    const int lidy = get_local_id(1);
-    const int groupidx = get_group_id(0);
-
-    const int queryIdx = groupidx * block_size + lidy;
-    local float *s_query = sharebuffer;
-    local float *s_train = sharebuffer + block_size * max_desc_len;
-
-    // load the query into local memory.
-    for (int i = 0 ;  i <  max_desc_len / block_size; i ++)
-    {
-        int loadx = lidx + i * block_size;
-        s_query[lidy * max_desc_len + loadx] = loadx < query_cols ? query[min(queryIdx, query_rows - 1)  * (step / sizeof(float)) + loadx] : 0;
-    }
-
-    float myBestDistance1 = MAX_FLOAT;
-    float myBestDistance2 = MAX_FLOAT;
-    int myBestTrainIdx1 = -1;
-    int myBestTrainIdx2 = -1;
-
-    //loopUnrolledCached
-    volatile int imgIdx = 0;
-    for (int t = 0 ; t < (train_rows + block_size - 1) / block_size ; t++)
-    {
-        float result = 0;
-        for (int i = 0 ; i < max_desc_len / block_size ; i++)
-        {
-            const int loadX = lidx + i * block_size;
-            //load a block_size * block_size block into local train.
-            const int loadx = lidx + i * block_size;
-            s_train[lidx * block_size + lidy] = loadx < train_cols ? train[min(t * block_size + lidy, train_rows - 1) * (step / sizeof(float)) + loadx] : 0;
-
-            //synchronize to make sure each elem for reduceIteration in share memory is written already.
-            barrier(CLK_LOCAL_MEM_FENCE);
-
-            /* there are threee types in the reducer. the first is L1Dist, which to sum the abs(v1, v2), the second is L2Dist, which to
-            sum the (v1 - v2) * (v1 - v2), the third is humming, which to popc(v1 ^ v2), popc is to count the bits are set to 1*/
-
-            switch(distType)
-            {
-            case 0:
-                for (int j = 0 ; j < block_size ; j++)
-                {
-                    result += fabs(s_query[lidy * max_desc_len + i * block_size + j] -  s_train[j * block_size + lidx]);
-                }
-                break;
-            case 1:
-                for (int j = 0 ; j < block_size ; j++)
-                {
-                    float qr = s_query[lidy * max_desc_len + i * block_size + j] -  s_train[j * block_size + lidx];
-                    result += qr * qr;
-                }
-                break;
-            case 2:
-                for (int j = 0 ; j < block_size ; j++)
-                {
-                    //result += popcount((uint)s_query[lidy * max_desc_len + i * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
-                    result += bit1Count((uint)s_query[lidy * max_desc_len + i * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
-                }
-                break;
-            }
-
-            barrier(CLK_LOCAL_MEM_FENCE);
-        }
-
-        const int trainIdx = t * block_size + lidx;
-
-        if (queryIdx < query_rows && trainIdx < train_rows)
-        {
-            if (result < myBestDistance1)
-            {
-                myBestDistance2 = myBestDistance1;
-                myBestTrainIdx2 = myBestTrainIdx1;
-                myBestDistance1 = result;
-                myBestTrainIdx1 = trainIdx;
-            }
-            else if (result < myBestDistance2)
-            {
-                myBestDistance2 = result;
-                myBestTrainIdx2 = trainIdx;
-            }
-        }
-    }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    local float *s_distance = (local float *)sharebuffer;
-    local int *s_trainIdx = (local int *)(sharebuffer + block_size * block_size);
-
-    // find BestMatch
-    s_distance += lidy * block_size;
-    s_trainIdx += lidy * block_size;
-
-    s_distance[lidx] = myBestDistance1;
-    s_trainIdx[lidx] = myBestTrainIdx1;
-
-    float bestDistance1 = MAX_FLOAT;
-    float bestDistance2 = MAX_FLOAT;
-    int bestTrainIdx1 = -1;
-    int bestTrainIdx2 = -1;
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (lidx == 0)
-    {
-        for (int i = 0 ; i < block_size ; i++)
-        {
-            float val = s_distance[i];
-            if (val < bestDistance1)
-            {
-                bestDistance2 = bestDistance1;
-                bestTrainIdx2 = bestTrainIdx1;
-
-                bestDistance1 = val;
-                bestTrainIdx1 = s_trainIdx[i];
-            }
-            else if (val < bestDistance2)
-            {
-                bestDistance2 = val;
-                bestTrainIdx2 = s_trainIdx[i];
-            }
-        }
-    }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    s_distance[lidx] = myBestDistance2;
-    s_trainIdx[lidx] = myBestTrainIdx2;
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (lidx == 0)
-    {
-        for (int i = 0 ; i < block_size ; i++)
-        {
-            float val = s_distance[i];
-
-            if (val < bestDistance2)
-            {
-                bestDistance2 = val;
-                bestTrainIdx2 = s_trainIdx[i];
-            }
-        }
-    }
-
-    myBestDistance1 = bestDistance1;
-    myBestDistance2 = bestDistance2;
-
-    myBestTrainIdx1 = bestTrainIdx1;
-    myBestTrainIdx2 = bestTrainIdx2;
-
-    if (queryIdx < query_rows && lidx == 0)
-    {
-        bestTrainIdx[queryIdx] = (int2)(myBestTrainIdx1, myBestTrainIdx2);
-        bestDistance[queryIdx] = (float2)(myBestDistance1, myBestDistance2);
-    }
+       const int lidx = get_local_id(0);
+       const int lidy = get_local_id(1);
+       const int groupidx = get_group_id(0);
+       
+       const int queryIdx = groupidx * block_size + lidy;
+       local float *s_query = sharebuffer;
+       local float *s_train = sharebuffer + block_size * max_desc_len;
+       
+       // load the query into local memory.
+       for (int i = 0 ;  i <  max_desc_len / block_size; i ++)
+       {
+               int loadx = lidx + i * block_size;
+               s_query[lidy * max_desc_len + loadx] = loadx < query_cols ? query[min(queryIdx, query_rows - 1)  * (step / sizeof(float)) + loadx] : 0;
+       }
+       
+       float myBestDistance1 = MAX_FLOAT;
+       float myBestDistance2 = MAX_FLOAT;
+       int myBestTrainIdx1 = -1;
+       int myBestTrainIdx2 = -1;
+       
+       //loopUnrolledCached
+       volatile int imgIdx = 0;
+       
+       for (int t = 0 ; t < (train_rows + block_size - 1) / block_size ; t++)
+       {
+               float result = 0;
+               
+               for (int i = 0 ; i < max_desc_len / block_size ; i++)
+               {
+                       const int loadX = lidx + i * block_size;
+                       //load a block_size * block_size block into local train.
+                       const int loadx = lidx + i * block_size;
+                       s_train[lidx * block_size + lidy] = loadx < train_cols ? train[min(t * block_size + lidy, train_rows - 1) * (step / sizeof(float)) + loadx] : 0;
+                       
+                       //synchronize to make sure each elem for reduceIteration in share memory is written already.
+                       barrier(CLK_LOCAL_MEM_FENCE);
+                       
+                       /* there are threee types in the reducer. the first is L1Dist, which to sum the abs(v1, v2), the second is L2Dist, which to
+                       sum the (v1 - v2) * (v1 - v2), the third is humming, which to popc(v1 ^ v2), popc is to count the bits are set to 1*/
+                       
+                       switch (distType)
+                       {
+                               case 0:
+                               
+                                       for (int j = 0 ; j < block_size ; j++)
+                                       {
+                                               result += fabs(s_query[lidy * max_desc_len + i * block_size + j] -  s_train[j * block_size + lidx]);
+                                       }
+                                       
+                                       break;
+                               case 1:
+                               
+                                       for (int j = 0 ; j < block_size ; j++)
+                                       {
+                                               float qr = s_query[lidy * max_desc_len + i * block_size + j] -  s_train[j * block_size + lidx];
+                                               result += qr * qr;
+                                       }
+                                       
+                                       break;
+                               case 2:
+                               
+                                       for (int j = 0 ; j < block_size ; j++)
+                                       {
+                                               //result += popcount((uint)s_query[lidy * max_desc_len + i * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
+                                               result += bit1Count((uint)s_query[lidy * max_desc_len + i * block_size + j] ^(uint)s_train[j * block_size + lidx]);
+                                       }
+                                       
+                                       break;
+                       }
+                       
+                       barrier(CLK_LOCAL_MEM_FENCE);
+               }
+               
+               const int trainIdx = t * block_size + lidx;
+               
+               if (queryIdx < query_rows && trainIdx < train_rows)
+               {
+                       if (result < myBestDistance1)
+                       {
+                               myBestDistance2 = myBestDistance1;
+                               myBestTrainIdx2 = myBestTrainIdx1;
+                               myBestDistance1 = result;
+                               myBestTrainIdx1 = trainIdx;
+                       }
+                       else if (result < myBestDistance2)
+                       {
+                               myBestDistance2 = result;
+                               myBestTrainIdx2 = trainIdx;
+                       }
+               }
+       }
+       
+       barrier(CLK_LOCAL_MEM_FENCE);
+       
+       local float *s_distance = (local float *)sharebuffer;
+       local int *s_trainIdx = (local int *)(sharebuffer + block_size * block_size);
+       
+       // find BestMatch
+       s_distance += lidy * block_size;
+       s_trainIdx += lidy * block_size;
+       
+       s_distance[lidx] = myBestDistance1;
+       s_trainIdx[lidx] = myBestTrainIdx1;
+       
+       float bestDistance1 = MAX_FLOAT;
+       float bestDistance2 = MAX_FLOAT;
+       int bestTrainIdx1 = -1;
+       int bestTrainIdx2 = -1;
+       barrier(CLK_LOCAL_MEM_FENCE);
+       
+       if (lidx == 0)
+       {
+               for (int i = 0 ; i < block_size ; i++)
+               {
+                       float val = s_distance[i];
+                       
+                       if (val < bestDistance1)
+                       {
+                               bestDistance2 = bestDistance1;
+                               bestTrainIdx2 = bestTrainIdx1;
+                               
+                               bestDistance1 = val;
+                               bestTrainIdx1 = s_trainIdx[i];
+                       }
+                       else if (val < bestDistance2)
+                       {
+                               bestDistance2 = val;
+                               bestTrainIdx2 = s_trainIdx[i];
+                       }
+               }
+       }
+       
+       barrier(CLK_LOCAL_MEM_FENCE);
+       
+       s_distance[lidx] = myBestDistance2;
+       s_trainIdx[lidx] = myBestTrainIdx2;
+       
+       barrier(CLK_LOCAL_MEM_FENCE);
+       
+       if (lidx == 0)
+       {
+               for (int i = 0 ; i < block_size ; i++)
+               {
+                       float val = s_distance[i];
+                       
+                       if (val < bestDistance2)
+                       {
+                               bestDistance2 = val;
+                               bestTrainIdx2 = s_trainIdx[i];
+                       }
+               }
+       }
+       
+       myBestDistance1 = bestDistance1;
+       myBestDistance2 = bestDistance2;
+       
+       myBestTrainIdx1 = bestTrainIdx1;
+       myBestTrainIdx2 = bestTrainIdx2;
+       
+       if (queryIdx < query_rows && lidx == 0)
+       {
+               bestTrainIdx[queryIdx] = (int2)(myBestTrainIdx1, myBestTrainIdx2);
+               bestDistance[queryIdx] = (float2)(myBestDistance1, myBestDistance2);
+       }
 }
 
 __kernel void BruteForceMatch_knnMatch(
     __global float *query,
     __global float *train,
-    __global float *mask,
+    //__global float *mask,
     __global int2 *bestTrainIdx,
     __global float2 *bestDistance,
     __local float *sharebuffer,
@@ -613,166 +654,174 @@ __kernel void BruteForceMatch_knnMatch(
     int train_cols,
     int step,
     int distType
-    )
+)
 {
-    const int lidx = get_local_id(0);
-    const int lidy = get_local_id(1);
-    const int groupidx = get_group_id(0);
-
-    const int queryIdx = groupidx * block_size + lidy;
-    local float *s_query = sharebuffer;
-    local float *s_train = sharebuffer + block_size * block_size;
-
-    float myBestDistance1 = MAX_FLOAT;
-    float myBestDistance2 = MAX_FLOAT;
-    int myBestTrainIdx1 = -1;
-    int myBestTrainIdx2 = -1;
-
-    //loop
-    for (int  t = 0 ; t < (train_rows + block_size - 1) / block_size ; t++)
-    {
-        float result = 0.0f;
-        for (int i = 0 ; i < (query_cols + block_size -1) / block_size ; i++)
-        {
-            const int loadx = lidx + i * block_size;
-            //load query and train into local memory
-            s_query[lidy * block_size + lidx] = 0;
-            s_train[lidx * block_size + lidy] = 0;
-
-            if (loadx < query_cols)
-            {
-                s_query[lidy * block_size + lidx] = query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx];
-                s_train[lidx * block_size + lidy] = train[min(t * block_size + lidy, train_rows - 1) * (step / sizeof(float)) + loadx];
-            }
-
-            barrier(CLK_LOCAL_MEM_FENCE);
-
-            /* there are threee types in the reducer. the first is L1Dist, which to sum the abs(v1, v2), the second is L2Dist, which to
-            sum the (v1 - v2) * (v1 - v2), the third is humming, which to popc(v1 ^ v2), popc is to count the bits are set to 1*/
-
-            switch(distType)
-            {
-            case 0:
-                for (int j = 0 ; j < block_size ; j++)
-                {
-                    result += fabs(s_query[lidy * block_size + j] -  s_train[j * block_size + lidx]);
-                }
-                break;
-            case 1:
-                for (int j = 0 ; j < block_size ; j++)
-                {
-                    float qr = s_query[lidy * block_size + j] -  s_train[j * block_size + lidx];
-                    result += qr * qr;
-                }
-                break;
-            case 2:
-                for (int j = 0 ; j < block_size ; j++)
-                {
-                    //result += popcount((uint)s_query[lidy * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
-                    result += bit1Count((uint)s_query[lidy * block_size + j] ^ (uint)s_train[(uint)j * block_size + lidx]);
-                }
-                break;
-            }
-
-            barrier(CLK_LOCAL_MEM_FENCE);
-        }
-
-        const int trainIdx = t * block_size + lidx;
-
-        if (queryIdx < query_rows && trainIdx < train_rows /*&& mask(queryIdx, trainIdx)*/)
-        {
-            if (result < myBestDistance1)
-            {
-                myBestDistance2 = myBestDistance1;
-                myBestTrainIdx2 = myBestTrainIdx1;
-                myBestDistance1 = result;
-                myBestTrainIdx1 = trainIdx;
-            }
-            else if (result < myBestDistance2)
-            {
-                myBestDistance2 = result;
-                myBestTrainIdx2 = trainIdx;
-            }
-        }
-    }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    __local float *s_distance = (__local float *)sharebuffer;
-    __local int *s_trainIdx = (__local int *)(sharebuffer + block_size * block_size);
-
-    //findBestMatch
-    s_distance += lidy * block_size;
-    s_trainIdx += lidy * block_size;
-
-    s_distance[lidx] = myBestDistance1;
-    s_trainIdx[lidx] = myBestTrainIdx1;
-
-    float bestDistance1 = MAX_FLOAT;
-    float bestDistance2 = MAX_FLOAT;
-    int bestTrainIdx1 = -1;
-    int bestTrainIdx2 = -1;
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (lidx == 0)
-    {
-        for (int i = 0 ; i < block_size ; i++)
-        {
-            float val = s_distance[i];
-            if (val < bestDistance1)
-            {
-                bestDistance2 = bestDistance1;
-                bestTrainIdx2 = bestTrainIdx1;
-
-                bestDistance1 = val;
-                bestTrainIdx1 = s_trainIdx[i];
-            }
-            else if (val < bestDistance2)
-            {
-                bestDistance2 = val;
-                bestTrainIdx2 = s_trainIdx[i];
-            }
-        }
-    }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    s_distance[lidx] = myBestDistance2;
-    s_trainIdx[lidx] = myBestTrainIdx2;
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (lidx == 0)
-    {
-        for (int i = 0 ; i < block_size ; i++)
-        {
-            float val = s_distance[i];
-
-            if (val < bestDistance2)
-            {
-                bestDistance2 = val;
-                bestTrainIdx2 = s_trainIdx[i];
-            }
-        }
-    }
-
-    myBestDistance1 = bestDistance1;
-    myBestDistance2 = bestDistance2;
-
-    myBestTrainIdx1 = bestTrainIdx1;
-    myBestTrainIdx2 = bestTrainIdx2;
-
-    if (queryIdx < query_rows && lidx == 0)
-    {
-        bestTrainIdx[queryIdx] = (int2)(myBestTrainIdx1, myBestTrainIdx2);
-        bestDistance[queryIdx] = (float2)(myBestDistance1, myBestDistance2);
-    }
+       const int lidx = get_local_id(0);
+       const int lidy = get_local_id(1);
+       const int groupidx = get_group_id(0);
+       
+       const int queryIdx = groupidx * block_size + lidy;
+       local float *s_query = sharebuffer;
+       local float *s_train = sharebuffer + block_size * block_size;
+       
+       float myBestDistance1 = MAX_FLOAT;
+       float myBestDistance2 = MAX_FLOAT;
+       int myBestTrainIdx1 = -1;
+       int myBestTrainIdx2 = -1;
+       
+       //loop
+       for (int  t = 0 ; t < (train_rows + block_size - 1) / block_size ; t++)
+       {
+               float result = 0.0f;
+               
+               for (int i = 0 ; i < (query_cols + block_size - 1) / block_size ; i++)
+               {
+                       const int loadx = lidx + i * block_size;
+                       //load query and train into local memory
+                       s_query[lidy * block_size + lidx] = 0;
+                       s_train[lidx * block_size + lidy] = 0;
+                       
+                       if (loadx < query_cols)
+                       {
+                               s_query[lidy * block_size + lidx] = query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx];
+                               s_train[lidx * block_size + lidy] = train[min(t * block_size + lidy, train_rows - 1) * (step / sizeof(float)) + loadx];
+                       }
+                       
+                       barrier(CLK_LOCAL_MEM_FENCE);
+                       
+                       /* there are threee types in the reducer. the first is L1Dist, which to sum the abs(v1, v2), the second is L2Dist, which to
+                       sum the (v1 - v2) * (v1 - v2), the third is humming, which to popc(v1 ^ v2), popc is to count the bits are set to 1*/
+                       
+                       switch (distType)
+                       {
+                               case 0:
+                               
+                                       for (int j = 0 ; j < block_size ; j++)
+                                       {
+                                               result += fabs(s_query[lidy * block_size + j] -  s_train[j * block_size + lidx]);
+                                       }
+                                       
+                                       break;
+                               case 1:
+                               
+                                       for (int j = 0 ; j < block_size ; j++)
+                                       {
+                                               float qr = s_query[lidy * block_size + j] -  s_train[j * block_size + lidx];
+                                               result += qr * qr;
+                                       }
+                                       
+                                       break;
+                               case 2:
+                               
+                                       for (int j = 0 ; j < block_size ; j++)
+                                       {
+                                               //result += popcount((uint)s_query[lidy * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
+                                               result += bit1Count((uint)s_query[lidy * block_size + j] ^(uint)s_train[(uint)j * block_size + lidx]);
+                                       }
+                                       
+                                       break;
+                       }
+                       
+                       barrier(CLK_LOCAL_MEM_FENCE);
+               }
+               
+               const int trainIdx = t * block_size + lidx;
+               
+               if (queryIdx < query_rows && trainIdx < train_rows /*&& mask(queryIdx, trainIdx)*/)
+               {
+                       if (result < myBestDistance1)
+                       {
+                               myBestDistance2 = myBestDistance1;
+                               myBestTrainIdx2 = myBestTrainIdx1;
+                               myBestDistance1 = result;
+                               myBestTrainIdx1 = trainIdx;
+                       }
+                       else if (result < myBestDistance2)
+                       {
+                               myBestDistance2 = result;
+                               myBestTrainIdx2 = trainIdx;
+                       }
+               }
+       }
+       
+       barrier(CLK_LOCAL_MEM_FENCE);
+       
+       __local float *s_distance = (__local float *)sharebuffer;
+       __local int *s_trainIdx = (__local int *)(sharebuffer + block_size * block_size);
+       
+       //findBestMatch
+       s_distance += lidy * block_size;
+       s_trainIdx += lidy * block_size;
+       
+       s_distance[lidx] = myBestDistance1;
+       s_trainIdx[lidx] = myBestTrainIdx1;
+       
+       float bestDistance1 = MAX_FLOAT;
+       float bestDistance2 = MAX_FLOAT;
+       int bestTrainIdx1 = -1;
+       int bestTrainIdx2 = -1;
+       barrier(CLK_LOCAL_MEM_FENCE);
+       
+       if (lidx == 0)
+       {
+               for (int i = 0 ; i < block_size ; i++)
+               {
+                       float val = s_distance[i];
+                       
+                       if (val < bestDistance1)
+                       {
+                               bestDistance2 = bestDistance1;
+                               bestTrainIdx2 = bestTrainIdx1;
+                               
+                               bestDistance1 = val;
+                               bestTrainIdx1 = s_trainIdx[i];
+                       }
+                       else if (val < bestDistance2)
+                       {
+                               bestDistance2 = val;
+                               bestTrainIdx2 = s_trainIdx[i];
+                       }
+               }
+       }
+       
+       barrier(CLK_LOCAL_MEM_FENCE);
+       
+       s_distance[lidx] = myBestDistance2;
+       s_trainIdx[lidx] = myBestTrainIdx2;
+       
+       barrier(CLK_LOCAL_MEM_FENCE);
+       
+       if (lidx == 0)
+       {
+               for (int i = 0 ; i < block_size ; i++)
+               {
+                       float val = s_distance[i];
+                       
+                       if (val < bestDistance2)
+                       {
+                               bestDistance2 = val;
+                               bestTrainIdx2 = s_trainIdx[i];
+                       }
+               }
+       }
+       
+       myBestDistance1 = bestDistance1;
+       myBestDistance2 = bestDistance2;
+       
+       myBestTrainIdx1 = bestTrainIdx1;
+       myBestTrainIdx2 = bestTrainIdx2;
+       
+       if (queryIdx < query_rows && lidx == 0)
+       {
+               bestTrainIdx[queryIdx] = (int2)(myBestTrainIdx1, myBestTrainIdx2);
+               bestDistance[queryIdx] = (float2)(myBestDistance1, myBestDistance2);
+       }
 }
 
 kernel void BruteForceMatch_calcDistanceUnrolled(
     __global float *query,
     __global float *train,
-    __global float *mask,
+    //__global float *mask,
     __global float *allDist,
     __local float *sharebuffer,
     int block_size,
@@ -784,13 +833,13 @@ kernel void BruteForceMatch_calcDistanceUnrolled(
     int step,
     int distType)
 {
-    /* Todo */
+       /* Todo */
 }
 
 kernel void BruteForceMatch_calcDistance(
     __global float *query,
     __global float *train,
-    __global float *mask,
+    //__global float *mask,
     __global float *allDist,
     __local float *sharebuffer,
     int block_size,
@@ -801,16 +850,16 @@ kernel void BruteForceMatch_calcDistance(
     int step,
     int distType)
 {
-    /* Todo */
+       /* Todo */
 }
 
 kernel void BruteForceMatch_findBestMatch(
     __global float *allDist,
     __global int *bestTrainIdx,
     __global float *bestDistance,
-     int k,
-     int block_size
-    )
+    int k,
+    int block_size
+)
 {
-    /* Todo */
+       /* Todo */
 }
\ No newline at end of file
index 9521939..410f8fc 100644 (file)
@@ -203,8 +203,8 @@ __kernel void YUV2RGB(int cols,int rows,int src_step,int dst_step,int channels,
 
 __constant int ITUR_BT_601_CY = 1220542;
 __constant int ITUR_BT_601_CUB = 2116026;
-__constant int ITUR_BT_601_CUG = -409993;
-__constant int ITUR_BT_601_CVG = -852492;
+__constant int ITUR_BT_601_CUG = 409993;
+__constant int ITUR_BT_601_CVG = 852492;
 __constant int ITUR_BT_601_CVR = 1673527;
 __constant int ITUR_BT_601_SHIFT = 20;
 
@@ -229,7 +229,7 @@ __kernel void YUV2RGBA_NV12(int cols,int rows,int src_step,int dst_step,
         int V  = usrc[1] - 128;
 
         int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * V;
-        int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * V + ITUR_BT_601_CUG * U;
+        int guv = (1 << (ITUR_BT_601_SHIFT - 1)) - ITUR_BT_601_CVG * V - ITUR_BT_601_CUG * U;
         int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * U;
 
         Y1 = max(0, Y1 - 16) * ITUR_BT_601_CY;
index f60d76a..4964000 100644 (file)
@@ -120,8 +120,7 @@ __kernel void morph_C1_D0(__global const uchar * restrict src,
     int gidy = get_global_id(1);
     int out_addr = mad24(gidy,dst_step_in_pixel,gidx+dst_offset_in_pixel);
 
-#ifdef USEROI
-    if(gidx+3<cols && gidy<rows && (dst_offset_in_pixel&3==0))
+    if(gidx+3<cols && gidy<rows && (dst_offset_in_pixel&3)==0)
     {
         *(__global uchar4*)&dst[out_addr] = res;
     }
@@ -150,9 +149,6 @@ __kernel void morph_C1_D0(__global const uchar * restrict src,
             dst[out_addr] = res.x;
         }
     }
-#else
-    *(__global uchar4*)&dst[out_addr] = res;
-#endif
 }
 #else
 __kernel void morph(__global const GENTYPE * restrict src,
diff --git a/modules/ocl/src/kernels/moments.cl b/modules/ocl/src/kernels/moments.cl
new file mode 100644 (file)
index 0000000..6048837
--- /dev/null
@@ -0,0 +1,953 @@
+#if defined (DOUBLE_SUPPORT)
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#elif defined (cl_amd_fp64)
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#endif
+typedef double T;
+
+#else
+typedef float double;
+typedef float4 double4;
+typedef long T;
+#define convert_double4 convert_float4
+#endif
+//#pragma OPENCL EXTENSION cl_amd_printf:enable
+//#if defined (DOUBLE_SUPPORT)
+#define DST_ROW_A00     0
+#define DST_ROW_A10     1
+#define DST_ROW_A01     2
+#define DST_ROW_A20     3
+#define DST_ROW_A11     4
+#define DST_ROW_A02     5
+#define DST_ROW_A30     6
+#define DST_ROW_A21     7
+#define DST_ROW_A12     8
+#define DST_ROW_A03     9
+
+__kernel void icvContourMoments(int contour_total,
+                                __global float* reader_oclmat_data, 
+                                __global T* dst_a,
+                                int dst_step)
+{
+    T xi_1, yi_1, xi_12, yi_12, xi, yi, xi2, yi2, dxy, xii_1, yii_1;
+    int idx = get_global_id(0);
+
+    if (idx < 0 || idx >= contour_total)
+        return;
+
+    xi_1 = (T)(*(reader_oclmat_data + (get_global_id(0) << 1)));
+    yi_1 = (T)(*(reader_oclmat_data + (get_global_id(0) << 1) + 1));
+    xi_12 = xi_1 * xi_1;
+    yi_12 = yi_1 * yi_1;
+
+    if(idx == contour_total - 1)
+    {
+        xi = (T)(*(reader_oclmat_data));
+        yi = (T)(*(reader_oclmat_data + 1));
+    }
+    else
+    {
+        xi = (T)(*(reader_oclmat_data + (idx + 1) * 2));
+        yi = (T)(*(reader_oclmat_data + (idx + 1) * 2 + 1));
+    }
+
+    xi2 = xi * xi;
+    yi2 = yi * yi;
+    dxy = xi_1 * yi - xi * yi_1;
+    xii_1 = xi_1 + xi;
+    yii_1 = yi_1 + yi;
+    
+    dst_step /= sizeof(T);
+    *( dst_a + DST_ROW_A00 * dst_step + idx) = dxy;
+    *( dst_a + DST_ROW_A10 * dst_step + idx) = dxy * xii_1;
+    *( dst_a + DST_ROW_A01 * dst_step + idx) = dxy * yii_1;
+    *( dst_a + DST_ROW_A20 * dst_step + idx) = dxy * (xi_1 * xii_1 + xi2);
+    *( dst_a + DST_ROW_A11 * dst_step + idx) = dxy * (xi_1 * (yii_1 + yi_1) + xi * (yii_1 + yi));
+    *( dst_a + DST_ROW_A02 * dst_step + idx) = dxy * (yi_1 * yii_1 + yi2);
+    *( dst_a + DST_ROW_A30 * dst_step + idx) = dxy * xii_1 * (xi_12 + xi2);
+    *( dst_a + DST_ROW_A03 * dst_step + idx) = dxy * yii_1 * (yi_12 + yi2);
+    *( dst_a + DST_ROW_A21 * dst_step + idx) =
+        dxy * (xi_12 * (3 * yi_1 + yi) + 2 * xi * xi_1 * yii_1 +
+               xi2 * (yi_1 + 3 * yi));
+    *( dst_a + DST_ROW_A12 * dst_step + idx) =
+        dxy * (yi_12 * (3 * xi_1 + xi) + 2 * yi * yi_1 * xii_1 +
+               yi2 * (xi_1 + 3 * xi));
+}
+//#endif
+
+//#if defined (DOUBLE_SUPPORT)
+__kernel void CvMoments_D0(__global uchar16* src_data, int src_rows, int src_cols, int src_step, int tileSize_width, int tileSize_height,
+                           __global double* dst_m00,
+                           __global double* dst_m10,
+                           __global double* dst_m01,
+                           __global double* dst_m20,
+                           __global double* dst_m11,
+                           __global double* dst_m02,
+                           __global double* dst_m30,
+                           __global double* dst_m21,
+                           __global double* dst_m12,
+                           __global double* dst_m03,
+                           int dst_cols, int dst_step, int type, int depth, int cn, int coi, int binary, int TILE_SIZE)
+{
+    uchar tmp_coi[16]; // get the coi data
+    uchar16 tmp[16];
+    int VLEN_C = 16;  // vector length of uchar
+
+    int gidy = get_global_id(0);
+    int gidx = get_global_id(1);
+    int wgidy = get_group_id(0);
+    int wgidx = get_group_id(1);
+    int lidy = get_local_id(0);
+    int lidx = get_local_id(1);
+    int y = wgidy*TILE_SIZE; // vector length of uchar
+    int x = wgidx*TILE_SIZE;  // vector length of uchar
+    int kcn = (cn==2)?2:4;
+    int rstep = min(src_step, TILE_SIZE);
+    tileSize_height = min(TILE_SIZE, src_rows - y);
+    tileSize_width = min(TILE_SIZE, src_cols - x);
+
+    if( tileSize_width < TILE_SIZE )
+        for(int i = tileSize_width; i < rstep; i++ )
+            *((__global uchar*)src_data+(y+lidy)*src_step+x+i) = 0;
+    if( coi > 0 )      //channel of interest
+        for(int i = 0; i < tileSize_width; i += VLEN_C)
+        {
+            for(int j=0; j<VLEN_C; j++)
+                tmp_coi[j] = *((__global uchar*)src_data+(y+lidy)*src_step+(x+i+j)*kcn+coi-1);
+            tmp[i/VLEN_C] = (uchar16)(tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3],tmp_coi[4],tmp_coi[5],tmp_coi[6],tmp_coi[7],
+                                      tmp_coi[8],tmp_coi[9],tmp_coi[10],tmp_coi[11],tmp_coi[12],tmp_coi[13],tmp_coi[14],tmp_coi[15]);
+        }
+    else
+        for(int i=0; i < tileSize_width; i+=VLEN_C)
+            tmp[i/VLEN_C] = *(src_data+(y+lidy)*src_step/VLEN_C+(x+i)/VLEN_C);
+    uchar16 zero = (uchar16)(0);
+    uchar16 full = (uchar16)(255);
+    if( binary )
+        for(int i=0; i < tileSize_width; i+=VLEN_C)
+            tmp[i/VLEN_C] = (tmp[i/VLEN_C]!=zero)?full:zero;
+    double mom[10];
+    __local int m[10][128];
+    if(lidy == 0)
+        for(int i=0; i<10; i++)
+            for(int j=0; j<128; j++)
+                m[i][j]=0;
+    barrier(CLK_LOCAL_MEM_FENCE);
+    int lm[10] = {0};
+    int16 x0 = (int16)(0);
+    int16 x1 = (int16)(0);
+    int16 x2 = (int16)(0);
+    int16 x3 = (int16)(0);
+    for( int xt = 0 ; xt < tileSize_width; xt+=(VLEN_C) )
+    {
+        int16 v_xt = (int16)(xt, xt+1, xt+2, xt+3, xt+4, xt+5, xt+6, xt+7, xt+8, xt+9, xt+10, xt+11, xt+12, xt+13, xt+14, xt+15);
+        int16 p = convert_int16(tmp[xt/VLEN_C]);
+        int16 xp = v_xt * p, xxp = xp *v_xt;
+        x0 += p;
+        x1 += xp;
+        x2 += xxp;
+        x3 += xxp * v_xt;
+    }
+    x0.s0 += x0.s1 + x0.s2 + x0.s3 + x0.s4 + x0.s5 + x0.s6 + x0.s7 + x0.s8 + x0.s9 + x0.sa + x0.sb + x0.sc + x0.sd + x0.se + x0.sf;
+    x1.s0 += x1.s1 + x1.s2 + x1.s3 + x1.s4 + x1.s5 + x1.s6 + x1.s7 + x1.s8 + x1.s9 + x1.sa + x1.sb + x1.sc + x1.sd + x1.se + x1.sf;
+    x2.s0 += x2.s1 + x2.s2 + x2.s3 + x2.s4 + x2.s5 + x2.s6 + x2.s7 + x2.s8 + x2.s9 + x2.sa + x2.sb + x2.sc + x2.sd + x2.se + x2.sf;
+    x3.s0 += x3.s1 + x3.s2 + x3.s3 + x3.s4 + x3.s5 + x3.s6 + x3.s7 + x3.s8 + x3.s9 + x3.sa + x3.sb + x3.sc + x3.sd + x3.se + x3.sf;
+    int py = lidy * ((int)x0.s0);
+    int sy = lidy*lidy;
+    int bheight = min(tileSize_height, TILE_SIZE/2);
+    if(bheight >= TILE_SIZE/2&&lidy > bheight-1&&lidy < tileSize_height)
+    {
+        m[9][lidy-bheight] = ((int)py) * sy;  // m03
+        m[8][lidy-bheight] = ((int)x1.s0) * sy;  // m12
+        m[7][lidy-bheight] = ((int)x2.s0) * lidy;  // m21
+        m[6][lidy-bheight] = x3.s0;             // m30
+        m[5][lidy-bheight] = x0.s0 * sy;        // m02
+        m[4][lidy-bheight] = x1.s0 * lidy;         // m11
+        m[3][lidy-bheight] = x2.s0;             // m20
+        m[2][lidy-bheight] = py;             // m01
+        m[1][lidy-bheight] = x1.s0;             // m10
+        m[0][lidy-bheight] = x0.s0;             // m00
+    }
+    else if(lidy < bheight)
+    {
+        lm[9] = ((int)py) * sy;  // m03
+        lm[8] = ((int)x1.s0) * sy;  // m12
+        lm[7] = ((int)x2.s0) * lidy;  // m21
+        lm[6] = x3.s0;             // m30
+        lm[5] = x0.s0 * sy;        // m02
+        lm[4] = x1.s0 * lidy;         // m11
+        lm[3] = x2.s0;             // m20
+        lm[2] = py;             // m01
+        lm[1] = x1.s0;             // m10
+        lm[0] = x0.s0;             // m00
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+    for( int j = bheight; j >= 1; j = j/2 )
+    {
+        if(lidy < j)
+            for( int i = 0; i < 10; i++ )
+                lm[i] = lm[i] + m[i][lidy];
+        barrier(CLK_LOCAL_MEM_FENCE);
+        if(lidy >= j/2&&lidy < j)
+            for( int i = 0; i < 10; i++ )
+                m[i][lidy-j/2] = lm[i];
+        barrier(CLK_LOCAL_MEM_FENCE);
+    }
+    if(lidy == 0&&lidx == 0)
+    {
+        for( int mt = 0; mt < 10; mt++ )
+            mom[mt] = (double)lm[mt];
+        if(binary)
+        {
+            double s = 1./255;
+            for( int mt = 0; mt < 10; mt++ )
+                mom[mt] *= s;
+        }
+        double xm = x * mom[0], ym = y * mom[0];
+
+        // accumulate moments computed in each tile
+
+        // + m00 ( = m00' )
+        dst_m00[wgidy*dst_cols+wgidx] = mom[0];
+
+        // + m10 ( = m10' + x*m00' )
+        dst_m10[wgidy*dst_cols+wgidx]  = mom[1] + xm;
+
+        // + m01 ( = m01' + y*m00' )
+        dst_m01[wgidy*dst_cols+wgidx]  = mom[2] + ym;
+
+        // + m20 ( = m20' + 2*x*m10' + x*x*m00' )
+        dst_m20[wgidy*dst_cols+wgidx]  = mom[3] + x * (mom[1] * 2 + xm);
+
+        // + m11 ( = m11' + x*m01' + y*m10' + x*y*m00' )
+        dst_m11[wgidy*dst_cols+wgidx]  = mom[4] + x * (mom[2] + ym) + y * mom[1];
+
+        // + m02 ( = m02' + 2*y*m01' + y*y*m00' )
+        dst_m02[wgidy*dst_cols+wgidx]  = mom[5] + y * (mom[2] * 2 + ym);
+
+        // + m30 ( = m30' + 3*x*m20' + 3*x*x*m10' + x*x*x*m00' )
+        dst_m30[wgidy*dst_cols+wgidx]  = mom[6] + x * (3. * mom[3] + x * (3. * mom[1] + xm));
+
+        // + m21 ( = m21' + x*(2*m11' + 2*y*m10' + x*m01' + x*y*m00') + y*m20')
+        dst_m21[wgidy*dst_cols+wgidx]  = mom[7] + x * (2 * (mom[4] + y * mom[1]) + x * (mom[2] + ym)) + y * mom[3];
+
+        // + m12 ( = m12' + y*(2*m11' + 2*x*m01' + y*m10' + x*y*m00') + x*m02')
+        dst_m12[wgidy*dst_cols+wgidx]  = mom[8] + y * (2 * (mom[4] + x * mom[2]) + y * (mom[1] + xm)) + x * mom[5];
+
+        // + m03 ( = m03' + 3*y*m02' + 3*y*y*m01' + y*y*y*m00' )
+        dst_m03[wgidy*dst_cols+wgidx] = mom[9] + y * (3. * mom[5] + y * (3. * mom[2] + ym));
+    }
+}
+//#endif
+//#if defined (DOUBLE_SUPPORT)
+__kernel void dst_sum(int src_rows, int src_cols, int tile_height, int tile_width, int TILE_SIZE, __global double* sum, __global double* dst_m00,
+                      __global double* dst_m10,
+                      __global double* dst_m01,
+                      __global double* dst_m20,
+                      __global double* dst_m11,
+                      __global double* dst_m02,
+                      __global double* dst_m30,
+                      __global double* dst_m21,
+                      __global double* dst_m12,
+                      __global double* dst_m03)
+{
+    int gidy = get_global_id(0);
+    int gidx = get_global_id(1);
+    int block_y = src_rows/tile_height;
+    int block_x = src_cols/tile_width;
+    int block_num;
+
+    if(src_rows > TILE_SIZE && src_rows % TILE_SIZE != 0)
+        block_y ++;
+    if(src_cols > TILE_SIZE && src_cols % TILE_SIZE != 0)
+        block_x ++;
+    block_num = block_y * block_x;
+    __local double dst_sum[10][128];
+    if(gidy<128-block_num)
+        for(int i=0; i<10; i++)
+            dst_sum[i][gidy+block_num]=0;
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if(gidy<block_num)
+    {
+        dst_sum[0][gidy] = dst_m00[gidy];
+        dst_sum[1][gidy] = dst_m10[gidy];
+        dst_sum[2][gidy] = dst_m01[gidy];
+        dst_sum[3][gidy] = dst_m20[gidy];
+        dst_sum[4][gidy] = dst_m11[gidy];
+        dst_sum[5][gidy] = dst_m02[gidy];
+        dst_sum[6][gidy] = dst_m30[gidy];
+        dst_sum[7][gidy] = dst_m21[gidy];
+        dst_sum[8][gidy] = dst_m12[gidy];
+        dst_sum[9][gidy] = dst_m03[gidy];
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+    for(int lsize=64; lsize>0; lsize>>=1)
+    {
+        if(gidy<lsize)
+        {
+            int lsize2 = gidy + lsize;
+            for(int i=0; i<10; i++)
+                dst_sum[i][gidy] += dst_sum[i][lsize2];
+        }
+        barrier(CLK_LOCAL_MEM_FENCE);
+    }
+    if(gidy==0)
+        for(int i=0; i<10; i++)
+            sum[i] = dst_sum[i][0];
+}
+//#endif
+//#if defined (DOUBLE_SUPPORT)
+__kernel void CvMoments_D2(__global ushort8* src_data, int src_rows, int src_cols, int src_step, int tileSize_width, int tileSize_height,
+                           __global double* dst_m00,
+                           __global double* dst_m10,
+                           __global double* dst_m01,
+                           __global double* dst_m20,
+                           __global double* dst_m11,
+                           __global double* dst_m02,
+                           __global double* dst_m30,
+                           __global double* dst_m21,
+                           __global double* dst_m12,
+                           __global double* dst_m03,
+                           int dst_cols, int dst_step,
+                           int type, int depth, int cn, int coi, int binary, const int TILE_SIZE)
+{
+    ushort tmp_coi[8]; // get the coi data
+    ushort8 tmp[32];
+    int VLEN_US = 8; // vector length of ushort
+    int gidy = get_global_id(0);
+    int gidx = get_global_id(1);
+    int wgidy = get_group_id(0);
+    int wgidx = get_group_id(1);
+    int lidy = get_local_id(0);
+    int lidx = get_local_id(1);
+    int y = wgidy*TILE_SIZE;  // real Y index of pixel
+    int x = wgidx*TILE_SIZE;  // real X index of pixel
+    int kcn = (cn==2)?2:4;
+    int rstep = min(src_step/2, TILE_SIZE);
+    tileSize_height = min(TILE_SIZE, src_rows - y);
+    tileSize_width = min(TILE_SIZE, src_cols -x);
+    if(src_cols > TILE_SIZE && tileSize_width < TILE_SIZE)
+        for(int i=tileSize_width; i < rstep; i++ )
+            *((__global ushort*)src_data+(y+lidy)*src_step/2+x+i) = 0;
+    if( coi > 0 )
+        for(int i=0; i < tileSize_width; i+=VLEN_US)
+        {
+            for(int j=0; j<VLEN_US; j++)
+                tmp_coi[j] = *((__global ushort*)src_data+(y+lidy)*(int)src_step/2+(x+i+j)*kcn+coi-1);
+            tmp[i/VLEN_US] = (ushort8)(tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3],tmp_coi[4],tmp_coi[5],tmp_coi[6],tmp_coi[7]);
+        }
+    else
+        for(int i=0; i < tileSize_width; i+=VLEN_US)
+            tmp[i/VLEN_US] = *(src_data+(y+lidy)*src_step/(2*VLEN_US)+(x+i)/VLEN_US);
+    ushort8 zero = (ushort8)(0);
+    ushort8 full = (ushort8)(255);
+    if( binary )
+        for(int i=0; i < tileSize_width; i+=VLEN_US)
+            tmp[i/VLEN_US] = (tmp[i/VLEN_US]!=zero)?full:zero;
+    double mom[10];
+    __local long m[10][128];
+    if(lidy == 0)
+        for(int i=0; i<10; i++)
+            for(int j=0; j<128; j++)
+                m[i][j]=0;
+    barrier(CLK_LOCAL_MEM_FENCE);
+    long lm[10] = {0};
+    int8 x0 = (int8)(0);
+    int8 x1 = (int8)(0);
+    int8 x2 = (int8)(0);
+    long8 x3 = (long8)(0);
+    for( int xt = 0 ; xt < tileSize_width; xt+=(VLEN_US) )
+    {
+        int8 v_xt = (int8)(xt, xt+1, xt+2, xt+3, xt+4, xt+5, xt+6, xt+7);
+        int8 p = convert_int8(tmp[xt/VLEN_US]);
+        int8 xp = v_xt * p, xxp = xp * v_xt;
+        x0 += p;
+        x1 += xp;
+        x2 += xxp;
+        x3 += convert_long8(xxp) *convert_long8(v_xt);
+    }
+    x0.s0 += x0.s1 + x0.s2 + x0.s3 + x0.s4 + x0.s5 + x0.s6 + x0.s7;
+    x1.s0 += x1.s1 + x1.s2 + x1.s3 + x1.s4 + x1.s5 + x1.s6 + x1.s7;
+    x2.s0 += x2.s1 + x2.s2 + x2.s3 + x2.s4 + x2.s5 + x2.s6 + x2.s7;
+    x3.s0 += x3.s1 + x3.s2 + x3.s3 + x3.s4 + x3.s5 + x3.s6 + x3.s7;
+
+    int py = lidy * x0.s0, sy = lidy*lidy;
+    int bheight = min(tileSize_height, TILE_SIZE/2);
+    if(bheight >= TILE_SIZE/2&&lidy > bheight-1&&lidy < tileSize_height)
+    {
+        m[9][lidy-bheight] = ((long)py) * sy;  // m03
+        m[8][lidy-bheight] = ((long)x1.s0) * sy;  // m12
+        m[7][lidy-bheight] = ((long)x2.s0) * lidy;  // m21
+        m[6][lidy-bheight] = x3.s0;             // m30
+        m[5][lidy-bheight] = x0.s0 * sy;        // m02
+        m[4][lidy-bheight] = x1.s0 * lidy;         // m11
+        m[3][lidy-bheight] = x2.s0;             // m20
+        m[2][lidy-bheight] = py;             // m01
+        m[1][lidy-bheight] = x1.s0;             // m10
+        m[0][lidy-bheight] = x0.s0;             // m00
+    }
+    else if(lidy < bheight)
+    {
+        lm[9] = ((long)py) * sy;  // m03
+        lm[8] = ((long)x1.s0) * sy;  // m12
+        lm[7] = ((long)x2.s0) * lidy;  // m21
+        lm[6] = x3.s0;             // m30
+        lm[5] = x0.s0 * sy;        // m02
+        lm[4] = x1.s0 * lidy;         // m11
+        lm[3] = x2.s0;             // m20
+        lm[2] = py;             // m01
+        lm[1] = x1.s0;             // m10
+        lm[0] = x0.s0;             // m00
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+    for( int j = TILE_SIZE/2; j >= 1; j = j/2 )
+    {
+        if(lidy < j)
+            for( int i = 0; i < 10; i++ )
+                lm[i] = lm[i] + m[i][lidy];
+        barrier(CLK_LOCAL_MEM_FENCE);
+        if(lidy >= j/2&&lidy < j)
+            for( int i = 0; i < 10; i++ )
+                m[i][lidy-j/2] = lm[i];
+        barrier(CLK_LOCAL_MEM_FENCE);
+    }
+    if(lidy == 0&&lidx == 0)
+    {
+        for(int mt = 0; mt < 10; mt++ )
+            mom[mt] = (double)lm[mt];
+
+        if(binary)
+        {
+            double s = 1./255;
+            for( int mt = 0; mt < 10; mt++ )
+                mom[mt] *= s;
+        }
+
+        double xm = x  *mom[0], ym = y * mom[0];
+
+        // accumulate moments computed in each tile
+
+        // + m00 ( = m00' )
+        dst_m00[wgidy*dst_cols+wgidx] = mom[0];
+
+        // + m10 ( = m10' + x*m00' )
+        dst_m10[wgidy*dst_cols+wgidx]  = mom[1] + xm;
+
+        // + m01 ( = m01' + y*m00' )
+        dst_m01[wgidy*dst_cols+wgidx]  = mom[2] + ym;
+
+        // + m20 ( = m20' + 2*x*m10' + x*x*m00' )
+        dst_m20[wgidy*dst_cols+wgidx]  = mom[3] + x * (mom[1] * 2 + xm);
+
+        // + m11 ( = m11' + x*m01' + y*m10' + x*y*m00' )
+        dst_m11[wgidy*dst_cols+wgidx]  = mom[4] + x * (mom[2] + ym) + y * mom[1];
+
+        // + m02 ( = m02' + 2*y*m01' + y*y*m00' )
+        dst_m02[wgidy*dst_cols+wgidx]  = mom[5] + y * (mom[2] * 2 + ym);
+
+        // + m30 ( = m30' + 3*x*m20' + 3*x*x*m10' + x*x*x*m00' )
+        dst_m30[wgidy*dst_cols+wgidx]  = mom[6] + x * (3. * mom[3] + x * (3. * mom[1] + xm));
+
+        // + m21 ( = m21' + x*(2*m11' + 2*y*m10' + x*m01' + x*y*m00') + y*m20')
+        dst_m21[wgidy*dst_cols+wgidx] = mom[7] + x * (2 * (mom[4] + y * mom[1]) + x * (mom[2] + ym)) + y * mom[3];
+
+        // + m12 ( = m12' + y*(2*m11' + 2*x*m01' + y*m10' + x*y*m00') + x*m02')
+        dst_m12[wgidy*dst_cols+wgidx]  = mom[8] + y * (2 * (mom[4] + x * mom[2]) + y * (mom[1] + xm)) + x * mom[5];
+
+        // + m03 ( = m03' + 3*y*m02' + 3*y*y*m01' + y*y*y*m00' )
+        dst_m03[wgidy*dst_cols+wgidx]  = mom[9] + y * (3. * mom[5] + y * (3. * mom[2] + ym));
+    }
+}
+//#endif
+//#if defined (DOUBLE_SUPPORT)
+__kernel void CvMoments_D3(__global short8* src_data, int src_rows, int src_cols, int src_step, int tileSize_width, int tileSize_height,
+                           __global double* dst_m00,
+                           __global double* dst_m10,
+                           __global double* dst_m01,
+                           __global double* dst_m20,
+                           __global double* dst_m11,
+                           __global double* dst_m02,
+                           __global double* dst_m30,
+                           __global double* dst_m21,
+                           __global double* dst_m12,
+                           __global double* dst_m03,
+                           int dst_cols, int dst_step,
+                           int type, int depth, int cn, int coi, int binary, const int TILE_SIZE)
+{
+    short tmp_coi[8]; // get the coi data
+    short8 tmp[32];
+    int VLEN_S =8; // vector length of short
+    int gidy = get_global_id(0);
+    int gidx = get_global_id(1);
+    int wgidy = get_group_id(0);
+    int wgidx = get_group_id(1);
+    int lidy = get_local_id(0);
+    int lidx = get_local_id(1);
+    int y = wgidy*TILE_SIZE;  // real Y index of pixel
+    int x = wgidx*TILE_SIZE;  // real X index of pixel
+    int kcn = (cn==2)?2:4;
+    int rstep = min(src_step/2, TILE_SIZE);
+    tileSize_height = min(TILE_SIZE, src_rows - y);
+    tileSize_width = min(TILE_SIZE, src_cols -x);
+    if(tileSize_width < TILE_SIZE)
+        for(int i = tileSize_width; i < rstep; i++ )
+            *((__global short*)src_data+(y+lidy)*src_step/2+x+i) = 0;
+    if( coi > 0 )
+        for(int i=0; i < tileSize_width; i+=VLEN_S)
+        {
+            for(int j=0; j<VLEN_S; j++)
+                tmp_coi[j] = *((__global short*)src_data+(y+lidy)*src_step/2+(x+i+j)*kcn+coi-1);
+            tmp[i/VLEN_S] = (short8)(tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3],tmp_coi[4],tmp_coi[5],tmp_coi[6],tmp_coi[7]);
+        }
+    else
+        for(int i=0; i < tileSize_width; i+=VLEN_S)
+            tmp[i/VLEN_S] = *(src_data+(y+lidy)*src_step/(2*VLEN_S)+(x+i)/VLEN_S);
+    short8 zero = (short8)(0);
+    short8 full = (short8)(255);
+    if( binary )
+        for(int i=0; i < tileSize_width; i+=(VLEN_S))
+            tmp[i/VLEN_S] = (tmp[i/VLEN_S]!=zero)?full:zero;
+
+    double mom[10];
+    __local long m[10][128];
+    if(lidy == 0)
+        for(int i=0; i<10; i++)
+            for(int j=0; j<128; j++)
+                m[i][j]=0;
+    barrier(CLK_LOCAL_MEM_FENCE);
+    long lm[10] = {0};
+    int8 x0 = (int8)(0);
+    int8 x1 = (int8)(0);
+    int8 x2 = (int8)(0);
+    long8 x3 = (long8)(0);
+    for( int xt = 0 ; xt < tileSize_width; xt+= (VLEN_S))
+    {
+        int8 v_xt = (int8)(xt, xt+1, xt+2, xt+3, xt+4, xt+5, xt+6, xt+7);
+        int8 p = convert_int8(tmp[xt/VLEN_S]);
+        int8 xp = v_xt * p, xxp = xp * v_xt;
+        x0 += p;
+        x1 += xp;
+        x2 += xxp;
+        x3 += convert_long8(xxp) * convert_long8(v_xt);
+    }
+    x0.s0 += x0.s1 + x0.s2 + x0.s3 + x0.s4 + x0.s5 + x0.s6 + x0.s7;
+    x1.s0 += x1.s1 + x1.s2 + x1.s3 + x1.s4 + x1.s5 + x1.s6 + x1.s7;
+    x2.s0 += x2.s1 + x2.s2 + x2.s3 + x2.s4 + x2.s5 + x2.s6 + x2.s7;
+    x3.s0 += x3.s1 + x3.s2 + x3.s3 + x3.s4 + x3.s5 + x3.s6 + x3.s7;
+
+    int py = lidy * x0.s0, sy = lidy*lidy;
+    int bheight = min(tileSize_height, TILE_SIZE/2);
+    if(bheight >= TILE_SIZE/2&&lidy > bheight-1&&lidy < tileSize_height)
+    {
+        m[9][lidy-bheight] = ((long)py) * sy;  // m03
+        m[8][lidy-bheight] = ((long)x1.s0) * sy;  // m12
+        m[7][lidy-bheight] = ((long)x2.s0) * lidy;  // m21
+        m[6][lidy-bheight] = x3.s0;             // m30
+        m[5][lidy-bheight] = x0.s0 * sy;        // m02
+        m[4][lidy-bheight] = x1.s0 * lidy;         // m11
+        m[3][lidy-bheight] = x2.s0;             // m20
+        m[2][lidy-bheight] = py;             // m01
+        m[1][lidy-bheight] = x1.s0;             // m10
+        m[0][lidy-bheight] = x0.s0;             // m00
+    }
+    else if(lidy < bheight)
+    {
+        lm[9] = ((long)py) * sy;  // m03
+        lm[8] = ((long)(x1.s0)) * sy;  // m12
+        lm[7] = ((long)(x2.s0)) * lidy;  // m21
+        lm[6] = x3.s0;             // m30
+        lm[5] = x0.s0 * sy;        // m02
+        lm[4] = x1.s0 * lidy;         // m11
+        lm[3] = x2.s0;             // m20
+        lm[2] = py;             // m01
+        lm[1] = x1.s0;             // m10
+        lm[0] = x0.s0;             // m00
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+    for( int j = TILE_SIZE/2; j >=1; j = j/2 )
+    {
+        if(lidy < j)
+            for( int i = 0; i < 10; i++ )
+                lm[i] = lm[i] + m[i][lidy];
+        barrier(CLK_LOCAL_MEM_FENCE);
+        if(lidy >= j/2&&lidy < j)
+            for( int i = 0; i < 10; i++ )
+                m[i][lidy-j/2] = lm[i];
+        barrier(CLK_LOCAL_MEM_FENCE);
+    }
+    if(lidy ==0 &&lidx ==0)
+    {
+        for(int mt = 0; mt < 10; mt++ )
+            mom[mt] = (double)lm[mt];
+
+        if(binary)
+        {
+            double s = 1./255;
+            for( int mt = 0; mt < 10; mt++ )
+                mom[mt] *= s;
+        }
+
+        double xm = x * mom[0], ym = y*mom[0];
+
+        // accumulate moments computed in each tile
+
+        // + m00 ( = m00' )
+        dst_m00[wgidy*dst_cols+wgidx]  = mom[0];
+
+        // + m10 ( = m10' + x*m00' )
+        dst_m10[wgidy*dst_cols+wgidx]  = mom[1] + xm;
+
+        // + m01 ( = m01' + y*m00' )
+        dst_m01[wgidy*dst_cols+wgidx]  = mom[2] + ym;
+
+        // + m20 ( = m20' + 2*x*m10' + x*x*m00' )
+        dst_m20[wgidy*dst_cols+wgidx]  = mom[3] + x * (mom[1] * 2 + xm);
+
+        // + m11 ( = m11' + x*m01' + y*m10' + x*y*m00' )
+        dst_m11[wgidy*dst_cols+wgidx]   = mom[4] + x * (mom[2] + ym) + y * mom[1];
+
+        // + m02 ( = m02' + 2*y*m01' + y*y*m00' )
+        dst_m02[wgidy*dst_cols+wgidx]   = mom[5] + y * (mom[2] * 2 + ym);
+
+        // + m30 ( = m30' + 3*x*m20' + 3*x*x*m10' + x*x*x*m00' )
+        dst_m30[wgidy*dst_cols+wgidx]  = mom[6] + x * (3. * mom[3] + x * (3. * mom[1] + xm));
+
+        // + m21 ( = m21' + x*(2*m11' + 2*y*m10' + x*m01' + x*y*m00') + y*m20')
+        dst_m21[wgidy*dst_cols+wgidx]  = mom[7] + x * (2 * (mom[4] + y * mom[1]) + x * (mom[2] + ym)) + y * mom[3];
+
+        // + m12 ( = m12' + y*(2*m11' + 2*x*m01' + y*m10' + x*y*m00') + x*m02')
+        dst_m12[wgidy*dst_cols+wgidx]  = mom[8] + y * (2 * (mom[4] + x * mom[2]) + y * (mom[1] + xm)) + x * mom[5];
+
+        // + m03 ( = m03' + 3*y*m02' + 3*y*y*m01' + y*y*y*m00' )
+        dst_m03[wgidy*dst_cols+wgidx]  = mom[9] + y * (3. * mom[5] + y * (3. * mom[2] + ym));
+    }
+}
+//#endif
+//#if defined (DOUBLE_SUPPORT)
+__kernel void CvMoments_D5( __global float* src_data, int src_rows, int src_cols, int src_step, int tileSize_width, int tileSize_height,
+                            __global double* dst_m00,
+                            __global double* dst_m10,
+                            __global double* dst_m01,
+                            __global double* dst_m20,
+                            __global double* dst_m11,
+                            __global double* dst_m02,
+                            __global double* dst_m30,
+                            __global double* dst_m21,
+                            __global double* dst_m12,
+                            __global double* dst_m03,
+                            int dst_cols, int dst_step,
+                            int type, int depth, int cn, int coi, int binary, const int TILE_SIZE)
+{
+    float tmp_coi[4]; // get the coi data
+    float4 tmp[64] ;
+    int VLEN_F = 4; // vector length of float
+    int gidy = get_global_id(0);
+    int gidx = get_global_id(1);
+    int wgidy = get_group_id(0);
+    int wgidx = get_group_id(1);
+    int lidy = get_local_id(0);
+    int lidx = get_local_id(1);
+    int y = wgidy*TILE_SIZE;  // real Y index of pixel
+    int x = wgidx*TILE_SIZE;  // real X index of pixel
+    int kcn = (cn==2)?2:4;
+    int rstep = min(src_step/4, TILE_SIZE);
+    tileSize_height = min(TILE_SIZE, src_rows - y);
+    tileSize_width = min(TILE_SIZE, src_cols -x);
+    if(tileSize_width < TILE_SIZE)
+        for(int i = tileSize_width; i < rstep; i++ )
+            *((__global float*)src_data+(y+lidy)*src_step/4+x+i) = 0;
+    if( coi > 0 )
+        for(int i=0; i < tileSize_width; i+=VLEN_F)
+        {
+            for(int j=0; j<4; j++)
+                tmp_coi[j] = *(src_data+(y+lidy)*src_step/4+(x+i+j)*kcn+coi-1);
+            tmp[i/VLEN_F] = (float4)(tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3]);
+        }
+    else
+        for(int i=0; i < tileSize_width; i+=VLEN_F)
+            tmp[i/VLEN_F] = (float4)(*(src_data+(y+lidy)*src_step/4+x+i),*(src_data+(y+lidy)*src_step/4+x+i+1),*(src_data+(y+lidy)*src_step/4+x+i+2),*(src_data+(y+lidy)*src_step/4+x+i+3));
+    float4 zero = (float4)(0);
+    float4 full = (float4)(255);
+    if( binary )
+        for(int i=0; i < tileSize_width; i+=4)
+            tmp[i/VLEN_F] = (tmp[i/VLEN_F]!=zero)?full:zero;
+    double mom[10];
+    __local double m[10][128];
+    if(lidy == 0)
+        for(int i = 0; i < 10; i ++)
+            for(int j = 0; j < 128; j ++)
+                m[i][j] = 0;
+    barrier(CLK_LOCAL_MEM_FENCE);
+    double lm[10] = {0};
+    double4 x0 = (double4)(0);
+    double4 x1 = (double4)(0);
+    double4 x2 = (double4)(0);
+    double4 x3 = (double4)(0);
+    for( int xt = 0 ; xt < tileSize_width; xt+=VLEN_F )
+    {
+        double4 v_xt = (double4)(xt, xt+1, xt+2, xt+3);
+        double4 p = convert_double4(tmp[xt/VLEN_F]);
+        double4 xp = v_xt * p, xxp = xp * v_xt;
+        x0 += p;
+        x1 += xp;
+        x2 += xxp;
+        x3 += xxp * v_xt;
+    }
+    x0.s0 += x0.s1 + x0.s2 + x0.s3;
+    x1.s0 += x1.s1 + x1.s2 + x1.s3;
+    x2.s0 += x2.s1 + x2.s2 + x2.s3;
+    x3.s0 += x3.s1 + x3.s2 + x3.s3;
+/*
+    double py = lidy * x0.s0, sy = lidy*lidy;
+    int bheight = min(tileSize_height, TILE_SIZE/2);
+    if(bheight >= TILE_SIZE/2&&lidy > bheight-1&&lidy < tileSize_height)
+    {
+        m[9][lidy-bheight] = ((double)py) * sy;  // m03
+        m[8][lidy-bheight] = ((double)x1.s0) * sy;  // m12
+        m[7][lidy-bheight] = ((double)x2.s0) * lidy;  // m21
+        m[6][lidy-bheight] = x3.s0;             // m30
+        m[5][lidy-bheight] = x0.s0 * sy;        // m02
+        m[4][lidy-bheight] = x1.s0 * lidy;         // m11
+        m[3][lidy-bheight] = x2.s0;             // m20
+        m[2][lidy-bheight] = py;             // m01
+        m[1][lidy-bheight] = x1.s0;             // m10
+        m[0][lidy-bheight] = x0.s0;             // m00
+    }
+    else if(lidy < bheight)
+    {
+        lm[9] = ((double)py) * sy;  // m03
+        lm[8] = ((double)x1.s0) * sy;  // m12
+        lm[7] = ((double)x2.s0) * lidy;  // m21
+        lm[6] = x3.s0;             // m30
+        lm[5] = x0.s0 * sy;        // m02
+        lm[4] = x1.s0 * lidy;         // m11
+        lm[3] = x2.s0;             // m20
+        lm[2] = py;             // m01
+        lm[1] = x1.s0;             // m10
+        lm[0] = x0.s0;             // m00
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+    for( int j = TILE_SIZE/2; j >= 1; j = j/2 )
+    {
+        if(lidy < j)
+            for( int i = 0; i < 10; i++ )
+                lm[i] = lm[i] + m[i][lidy];
+        barrier(CLK_LOCAL_MEM_FENCE);
+        if(lidy >= j/2&&lidy < j)
+            for( int i = 0; i < 10; i++ )
+                m[i][lidy-j/2] = lm[i];
+        barrier(CLK_LOCAL_MEM_FENCE);
+    }
+    if(lidy == 0&&lidx == 0)
+    {
+        for(int mt = 0; mt < 10; mt++ )
+            mom[mt] = (double)lm[mt];
+
+        if(binary)
+        {
+            double s = 1./255;
+            for( int mt = 0; mt < 10; mt++ )
+                mom[mt] *= s;
+        }
+
+        double xm = x * mom[0], ym = y * mom[0];
+
+        // accumulate moments computed in each tile
+
+        // + m00 ( = m00' )
+        dst_m00[wgidy*dst_cols+wgidx]= mom[0];
+
+        // + m10 ( = m10' + x*m00' )
+        dst_m10[wgidy*dst_cols+wgidx] = mom[1] + xm;
+
+        // + m01 ( = m01' + y*m00' )
+        dst_m01[wgidy*dst_cols+wgidx] = mom[2] + ym;
+
+        // + m20 ( = m20' + 2*x*m10' + x*x*m00' )
+        dst_m20[wgidy*dst_cols+wgidx] = mom[3] + x * (mom[1] * 2 + xm);
+
+        // + m11 ( = m11' + x*m01' + y*m10' + x*y*m00' )
+        dst_m11[wgidy*dst_cols+wgidx] = mom[4] + x * (mom[2] + ym) + y * mom[1];
+
+        // + m02 ( = m02' + 2*y*m01' + y*y*m00' )
+        dst_m02[wgidy*dst_cols+wgidx]= mom[5] + y * (mom[2] * 2 + ym);
+
+        // + m30 ( = m30' + 3*x*m20' + 3*x*x*m10' + x*x*x*m00' )
+        dst_m30[wgidy*dst_cols+wgidx]= mom[6] + x * (3. * mom[3] + x * (3. * mom[1] + xm));
+
+        // + m21 ( = m21' + x*(2*m11' + 2*y*m10' + x*m01' + x*y*m00') + y*m20')
+        dst_m21[wgidy*dst_cols+wgidx] = mom[7] + x * (2 * (mom[4] + y * mom[1]) + x * (mom[2] + ym)) + y * mom[3];
+
+        // + m12 ( = m12' + y*(2*m11' + 2*x*m01' + y*m10' + x*y*m00') + x*m02')
+        dst_m12[wgidy*dst_cols+wgidx] = mom[8] + y * (2 * (mom[4] + x * mom[2]) + y * (mom[1] + xm)) + x * mom[5];
+
+        // + m03 ( = m03' + 3*y*m02' + 3*y*y*m01' + y*y*y*m00' )
+        dst_m03[wgidy*dst_cols+wgidx]= mom[9] + y * (3. * mom[5] + y * (3. * mom[2] + ym));
+    }*/
+}
+//#endif
+//#if defined (DOUBLE_SUPPORT)
+__kernel void CvMoments_D6(__global double* src_data,  int src_rows, int src_cols, int src_step, int tileSize_width, int tileSize_height,
+                           __global double* dst_m00,
+                           __global double* dst_m10,
+                           __global double* dst_m01,
+                           __global double* dst_m20,
+                           __global double* dst_m11,
+                           __global double* dst_m02,
+                           __global double* dst_m30,
+                           __global double* dst_m21,
+                           __global double* dst_m12,
+                           __global double* dst_m03,
+                           int dst_cols, int dst_step,
+                           int type, int depth, int cn, int coi, int binary, const int TILE_SIZE)
+{
+    double tmp_coi[4]; // get the coi data
+    double4 tmp[64];
+    int VLEN_D = 4; // length of vetor
+    int gidy = get_global_id(0);
+    int gidx = get_global_id(1);
+    int wgidy = get_group_id(0);
+    int wgidx = get_group_id(1);
+    int lidy = get_local_id(0);
+    int lidx = get_local_id(1);
+    int y = wgidy*TILE_SIZE;  // real Y index of pixel
+    int x = wgidx*TILE_SIZE;  // real X index of pixel
+    int kcn = (cn==2)?2:4;
+    int rstep = min(src_step/8, TILE_SIZE);
+    tileSize_height = min(TILE_SIZE,  src_rows - y);
+    tileSize_width = min(TILE_SIZE, src_cols - x);
+
+    if(tileSize_width < TILE_SIZE)
+        for(int i = tileSize_width; i < rstep; i++ )
+            *((__global double*)src_data+(y+lidy)*src_step/8+x+i) = 0;
+    if( coi > 0 )
+        for(int i=0; i < tileSize_width; i+=VLEN_D)
+        {
+            for(int j=0; j<4; j++)
+                tmp_coi[j] = *(src_data+(y+lidy)*src_step/8+(x+i+j)*kcn+coi-1);
+            tmp[i/VLEN_D] = (double4)(tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3]);
+        }
+    else
+        for(int i=0; i < tileSize_width; i+=VLEN_D)
+            tmp[i/VLEN_D] = (double4)(*(src_data+(y+lidy)*src_step/8+x+i),*(src_data+(y+lidy)*src_step/8+x+i+1),*(src_data+(y+lidy)*src_step/8+x+i+2),*(src_data+(y+lidy)*src_step/8+x+i+3));
+    double4 zero = (double4)(0);
+    double4 full = (double4)(255);
+    if( binary )
+        for(int i=0; i < tileSize_width; i+=VLEN_D)
+            tmp[i/VLEN_D] = (tmp[i/VLEN_D]!=zero)?full:zero;
+    double mom[10];
+    __local double m[10][128];
+    if(lidy == 0)
+        for(int i=0; i<10; i++)
+            for(int j=0; j<128; j++)
+                m[i][j]=0;
+    barrier(CLK_LOCAL_MEM_FENCE);
+    double lm[10] = {0};
+    double4 x0 = (double4)(0);
+    double4 x1 = (double4)(0);
+    double4 x2 = (double4)(0);
+    double4 x3 = (double4)(0);
+    for( int xt = 0 ; xt < tileSize_width; xt+=VLEN_D )
+    {
+        double4 v_xt = (double4)(xt, xt+1, xt+2, xt+3);
+        double4 p = tmp[xt/VLEN_D];
+        double4 xp = v_xt * p, xxp = xp * v_xt;
+        x0 += p;
+        x1 += xp;
+        x2 += xxp;
+        x3 += xxp *v_xt;
+    }
+    x0.s0 += x0.s1 + x0.s2 + x0.s3;
+    x1.s0 += x1.s1 + x1.s2 + x1.s3;
+    x2.s0 += x2.s1 + x2.s2 + x2.s3;
+    x3.s0 += x3.s1 + x3.s2 + x3.s3;
+
+    double py = lidy * x0.s0, sy = lidy*lidy;
+    int bheight = min(tileSize_height, TILE_SIZE/2);
+    if(bheight >= TILE_SIZE/2&&lidy > bheight-1&&lidy < tileSize_height)
+    {
+        m[9][lidy-bheight] = ((double)py) * sy;  // m03
+        m[8][lidy-bheight] = ((double)x1.s0) * sy;  // m12
+        m[7][lidy-bheight] = ((double)x2.s0) * lidy;  // m21
+        m[6][lidy-bheight] = x3.s0;             // m30
+        m[5][lidy-bheight] = x0.s0 * sy;        // m02
+        m[4][lidy-bheight] = x1.s0 * lidy;         // m11
+        m[3][lidy-bheight] = x2.s0;             // m20
+        m[2][lidy-bheight] = py;             // m01
+        m[1][lidy-bheight] = x1.s0;             // m10
+        m[0][lidy-bheight] = x0.s0;             // m00
+    }
+
+    else if(lidy < bheight)
+    {
+        lm[9] = ((double)py) * sy;  // m03
+        lm[8] = ((double)x1.s0) * sy;  // m12
+        lm[7] = ((double)x2.s0) * lidy;  // m21
+        lm[6] = x3.s0;             // m30
+        lm[5] = x0.s0 * sy;        // m02
+        lm[4] = x1.s0 * lidy;         // m11
+        lm[3] = x2.s0;             // m20
+        lm[2] = py;             // m01
+        lm[1] = x1.s0;             // m10
+        lm[0] = x0.s0;             // m00
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+    for( int j = TILE_SIZE/2; j >= 1; j = j/2 )
+    {
+        if(lidy < j)
+            for( int i = 0; i < 10; i++ )
+                lm[i] = lm[i] + m[i][lidy];
+        barrier(CLK_LOCAL_MEM_FENCE);
+        if(lidy >= j/2&&lidy < j)
+            for( int i = 0; i < 10; i++ )
+                m[i][lidy-j/2] = lm[i];
+        barrier(CLK_LOCAL_MEM_FENCE);
+    }
+    if(lidy == 0&&lidx == 0)
+    {
+        for( int mt = 0; mt < 10; mt++ )
+            mom[mt] = (double)lm[mt];
+        if(binary)
+        {
+            double s = 1./255;
+            for( int mt = 0; mt < 10; mt++ )
+                mom[mt] *= s;
+        }
+
+        double xm = x * mom[0], ym = y * mom[0];
+
+        // accumulate moments computed in each tile
+
+        // + m00 ( = m00' )
+        dst_m00[wgidy*dst_cols+wgidx] = mom[0];
+
+        // + m10 ( = m10' + x*m00' )
+        dst_m10[wgidy*dst_cols+wgidx] = mom[1] + xm;
+
+        // + m01 ( = m01' + y*m00' )
+        dst_m01[wgidy*dst_cols+wgidx] = mom[2] + ym;
+
+        // + m20 ( = m20' + 2*x*m10' + x*x*m00' )
+        dst_m20[wgidy*dst_cols+wgidx]  = mom[3] + x * (mom[1] * 2 + xm);
+
+        // + m11 ( = m11' + x*m01' + y*m10' + x*y*m00' )
+        dst_m11[wgidy*dst_cols+wgidx]  = mom[4] + x * (mom[2] + ym) + y * mom[1];
+
+        // + m02 ( = m02' + 2*y*m01' + y*y*m00' )
+        dst_m02[wgidy*dst_cols+wgidx]  = mom[5] + y * (mom[2] * 2 + ym);
+
+        // + m30 ( = m30' + 3*x*m20' + 3*x*x*m10' + x*x*x*m00' )
+        dst_m30[wgidy*dst_cols+wgidx]  = mom[6] + x * (3. * mom[3] + x * (3. * mom[1] + xm));
+
+        // + m21 ( = m21' + x*(2*m11' + 2*y*m10' + x*m01' + x*y*m00') + y*m20')
+        dst_m21[wgidy*dst_cols+wgidx]  = mom[7] + x * (2 * (mom[4] + y * mom[1]) + x * (mom[2] + ym)) + y * mom[3];
+
+        // + m12 ( = m12' + y*(2*m11' + 2*x*m01' + y*m10' + x*y*m00') + x*m02')
+        dst_m12[wgidy*dst_cols+wgidx]  = mom[8] + y * (2 * (mom[4] + x * mom[2]) + y * (mom[1] + xm)) + x * mom[5];
+
+        // + m03 ( = m03' + 3*y*m02' + 3*y*y*m01' + y*y*y*m00' )
+        dst_m03[wgidy*dst_cols+wgidx]  = mom[9] + y * (3. * mom[5] + y * (3. * mom[2] + ym));
+    }
+}
+//#endif
index e51b2d0..8cffe3d 100644 (file)
 //
 //M*/
 
-#pragma OPENCL EXTENSION cl_amd_printf : enable
 #pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
 #pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
 
+// specialized for non-image2d_t supported platform, intel HD4000, for example
+#ifdef DISABLE_IMAGE2D
+#define IMAGE_INT32 __global uint  *
+#define IMAGE_INT8  __global uchar *
+#else
+#define IMAGE_INT32 image2d_t
+#define IMAGE_INT8  image2d_t
+#endif
+
+uint read_sumTex(IMAGE_INT32 img, sampler_t sam, int2 coord, int rows, int cols, int elemPerRow)
+{
+#ifdef DISABLE_IMAGE2D
+    int x = clamp(coord.x, 0, cols);
+    int y = clamp(coord.y, 0, rows);
+    return img[elemPerRow * y + x];
+#else
+    return read_imageui(img, sam, coord).x;
+#endif
+}
+uchar read_imgTex(IMAGE_INT8 img, sampler_t sam, float2 coord, int rows, int cols, int elemPerRow)
+{
+#ifdef DISABLE_IMAGE2D
+    int x = clamp(convert_int_rte(coord.x), 0, cols - 1);
+    int y = clamp(convert_int_rte(coord.y), 0, rows - 1);
+    return img[elemPerRow * y + x];
+#else
+    return (uchar)read_imageui(img, sam, coord).x;
+#endif
+}
+
 // dynamically change the precision used for floating type
 
-#if defined (__ATI__) || defined (__NVIDIA__)
+#if defined (DOUBLE_SUPPORT)
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#elif defined (cl_amd_fp64)
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#endif
 #define F double
 #else
 #define F float
 // Image read mode
 __constant sampler_t sampler    = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
 
+#ifndef FLT_EPSILON
 #define FLT_EPSILON (1e-15)
-#define CV_PI_F 3.14159265f
+#endif
 
+#ifndef CV_PI_F
+#define CV_PI_F 3.14159265f
+#endif
 
 // Use integral image to calculate haar wavelets.
 // N = 2
 // for simple haar paatern
-float icvCalcHaarPatternSum_2(image2d_t sumTex, __constant float src[2][5], int oldSize, int newSize, int y, int x)
+float icvCalcHaarPatternSum_2(
+    IMAGE_INT32 sumTex, 
+    __constant float src[2][5], 
+    int oldSize, 
+    int newSize, 
+    int y, int x, 
+    int rows, int cols, int elemPerRow)
 {
 
     float ratio = (float)newSize / oldSize;
@@ -81,11 +125,10 @@ float icvCalcHaarPatternSum_2(image2d_t sumTex, __constant float src[2][5], int
         int dy2 = convert_int_rte(ratio * src[k][3]);
 
         F t = 0;
-        t += read_imageui(sumTex, sampler, (int2)(x + dx1, y + dy1)).x;
-        t -= read_imageui(sumTex, sampler, (int2)(x + dx1, y + dy2)).x;
-        t -= read_imageui(sumTex, sampler, (int2)(x + dx2, y + dy1)).x;
-        t += read_imageui(sumTex, sampler, (int2)(x + dx2, y + dy2)).x;
-
+        t += read_sumTex( sumTex, sampler, (int2)(x + dx1, y + dy1), rows, cols, elemPerRow );
+        t -= read_sumTex( sumTex, sampler, (int2)(x + dx1, y + dy2), rows, cols, elemPerRow );
+        t -= read_sumTex( sumTex, sampler, (int2)(x + dx2, y + dy1), rows, cols, elemPerRow );
+        t += read_sumTex( sumTex, sampler, (int2)(x + dx2, y + dy2), rows, cols, elemPerRow );
         d += t * src[k][4] / ((dx2 - dx1) * (dy2 - dy1));
     }
 
@@ -93,7 +136,13 @@ float icvCalcHaarPatternSum_2(image2d_t sumTex, __constant float src[2][5], int
 }
 
 // N = 3
-float icvCalcHaarPatternSum_3(image2d_t sumTex, __constant float src[3][5], int oldSize, int newSize, int y, int x)
+float icvCalcHaarPatternSum_3(
+    IMAGE_INT32 sumTex, 
+    __constant float src[2][5], 
+    int oldSize, 
+    int newSize, 
+    int y, int x, 
+    int rows, int cols, int elemPerRow)
 {
 
     float ratio = (float)newSize / oldSize;
@@ -109,11 +158,10 @@ float icvCalcHaarPatternSum_3(image2d_t sumTex, __constant float src[3][5], int
         int dy2 = convert_int_rte(ratio * src[k][3]);
 
         F t = 0;
-        t += read_imageui(sumTex, sampler, (int2)(x + dx1, y + dy1)).x;
-        t -= read_imageui(sumTex, sampler, (int2)(x + dx1, y + dy2)).x;
-        t -= read_imageui(sumTex, sampler, (int2)(x + dx2, y + dy1)).x;
-        t += read_imageui(sumTex, sampler, (int2)(x + dx2, y + dy2)).x;
-
+        t += read_sumTex( sumTex, sampler, (int2)(x + dx1, y + dy1), rows, cols, elemPerRow );
+        t -= read_sumTex( sumTex, sampler, (int2)(x + dx1, y + dy2), rows, cols, elemPerRow );
+        t -= read_sumTex( sumTex, sampler, (int2)(x + dx2, y + dy1), rows, cols, elemPerRow );
+        t += read_sumTex( sumTex, sampler, (int2)(x + dx2, y + dy2), rows, cols, elemPerRow );
         d += t * src[k][4] / ((dx2 - dx1) * (dy2 - dy1));
     }
 
@@ -121,7 +169,13 @@ float icvCalcHaarPatternSum_3(image2d_t sumTex, __constant float src[3][5], int
 }
 
 // N = 4
-float icvCalcHaarPatternSum_4(image2d_t sumTex, __constant float src[4][5], int oldSize, int newSize, int y, int x)
+float icvCalcHaarPatternSum_4(
+    IMAGE_INT32 sumTex, 
+    __constant float src[2][5], 
+    int oldSize, 
+    int newSize, 
+    int y, int x, 
+    int rows, int cols, int elemPerRow)
 {
 
     float ratio = (float)newSize / oldSize;
@@ -137,11 +191,10 @@ float icvCalcHaarPatternSum_4(image2d_t sumTex, __constant float src[4][5], int
         int dy2 = convert_int_rte(ratio * src[k][3]);
 
         F t = 0;
-        t += read_imageui(sumTex, sampler, (int2)(x + dx1, y + dy1)).x;
-        t -= read_imageui(sumTex, sampler, (int2)(x + dx1, y + dy2)).x;
-        t -= read_imageui(sumTex, sampler, (int2)(x + dx2, y + dy1)).x;
-        t += read_imageui(sumTex, sampler, (int2)(x + dx2, y + dy2)).x;
-
+        t += read_sumTex( sumTex, sampler, (int2)(x + dx1, y + dy1), rows, cols, elemPerRow );
+        t -= read_sumTex( sumTex, sampler, (int2)(x + dx1, y + dy2), rows, cols, elemPerRow );
+        t -= read_sumTex( sumTex, sampler, (int2)(x + dx2, y + dy1), rows, cols, elemPerRow );
+        t += read_sumTex( sumTex, sampler, (int2)(x + dx2, y + dy2), rows, cols, elemPerRow );
         d += t * src[k][4] / ((dx2 - dx1) * (dy2 - dy1));
     }
 
@@ -172,7 +225,7 @@ __inline int calcSize(int octave, int layer)
 
 //calculate targeted layer per-pixel determinant and trace with an integral image
 __kernel void icvCalcLayerDetAndTrace(
-    image2d_t sumTex, // input integral image
+    IMAGE_INT32 sumTex, // input integral image
     __global float * det,      // output Determinant
     __global float * trace,    // output trace
     int det_step,     // the step of det in bytes
@@ -181,11 +234,13 @@ __kernel void icvCalcLayerDetAndTrace(
     int c_img_cols,
     int c_nOctaveLayers,
     int c_octave,
-    int c_layer_rows
+    int c_layer_rows,
+    int sumTex_step
     )
 {
     det_step   /= sizeof(*det);
     trace_step /= sizeof(*trace);
+    sumTex_step/= sizeof(uint);
     // Determine the indices
     const int gridDim_y  = get_num_groups(1) / (c_nOctaveLayers + 2);
     const int blockIdx_y = get_group_id(1) % gridDim_y;
@@ -205,12 +260,12 @@ __kernel void icvCalcLayerDetAndTrace(
 
     if (size <= c_img_rows && size <= c_img_cols && i < samples_i && j < samples_j)
     {
-        const float dx  = icvCalcHaarPatternSum_3(sumTex, c_DX , 9, size, i << c_octave, j << c_octave);
-        const float dy  = icvCalcHaarPatternSum_3(sumTex, c_DY , 9, size, i << c_octave, j << c_octave);
-        const float dxy = icvCalcHaarPatternSum_4(sumTex, c_DXY, 9, size, i << c_octave, j << c_octave);
+        const float dx  = icvCalcHaarPatternSum_3(sumTex, c_DX , 9, size, i << c_octave, j << c_octave, c_img_rows, c_img_cols, sumTex_step);
+        const float dy  = icvCalcHaarPatternSum_3(sumTex, c_DY , 9, size, i << c_octave, j << c_octave, c_img_rows, c_img_cols, sumTex_step);
+        const float dxy = icvCalcHaarPatternSum_4(sumTex, c_DXY, 9, size, i << c_octave, j << c_octave, c_img_rows, c_img_cols, sumTex_step);
 
         det  [j + margin + det_step   * (layer * c_layer_rows + i + margin)] = dx * dy - 0.81f * dxy * dxy;
-        trace[j + margin + trace_step * (layer * c_layer_rows + i + margin)] = dx + dy;
+        trace[j + margin + trace_step * (layer * c_layer_rows + i + margin)] = dx + dy; 
     }
 }
 
@@ -220,7 +275,7 @@ __kernel void icvCalcLayerDetAndTrace(
 
 __constant float c_DM[5] = {0, 0, 9, 9, 1};
 
-bool within_check(image2d_t maskSumTex, int sum_i, int sum_j, int size)
+bool within_check(IMAGE_INT32 maskSumTex, int sum_i, int sum_j, int size, int rows, int cols, int step)
 {
     float ratio = (float)size / 9.0f;
 
@@ -233,10 +288,10 @@ bool within_check(image2d_t maskSumTex, int sum_i, int sum_j, int size)
 
     float t = 0;
 
-    t += read_imageui(maskSumTex, sampler, (int2)(sum_j + dx1, sum_i + dy1)).x;
-    t -= read_imageui(maskSumTex, sampler, (int2)(sum_j + dx1, sum_i + dy2)).x;
-    t -= read_imageui(maskSumTex, sampler, (int2)(sum_j + dx2, sum_i + dy1)).x;
-    t += read_imageui(maskSumTex, sampler, (int2)(sum_j + dx2, sum_i + dy2)).x;
+    t += read_sumTex(maskSumTex, sampler, (int2)(sum_j + dx1, sum_i + dy1), rows, cols, step);
+    t -= read_sumTex(maskSumTex, sampler, (int2)(sum_j + dx1, sum_i + dy2), rows, cols, step);
+    t -= read_sumTex(maskSumTex, sampler, (int2)(sum_j + dx2, sum_i + dy1), rows, cols, step);
+    t += read_sumTex(maskSumTex, sampler, (int2)(sum_j + dx2, sum_i + dy2), rows, cols, step);
 
     d += t * c_DM[4] / ((dx2 - dx1) * (dy2 - dy1));
 
@@ -246,10 +301,10 @@ bool within_check(image2d_t maskSumTex, int sum_i, int sum_j, int size)
 // Non-maximal suppression to further filtering the candidates from previous step
 __kernel
     void icvFindMaximaInLayer_withmask(
-    __global const float * det,
-    __global const float * trace,
-    __global int4 * maxPosBuffer,
-    volatile __global unsigned int* maxCounter,
+    __global const float * det, 
+    __global const float * trace, 
+    __global int4 * maxPosBuffer, 
+    volatile __global int* maxCounter,
     int counter_offset,
     int det_step,     // the step of det in bytes
     int trace_step,   // the step of trace in bytes
@@ -261,7 +316,8 @@ __kernel
     int c_layer_cols,
     int c_max_candidates,
     float c_hessianThreshold,
-    image2d_t maskSumTex
+    IMAGE_INT32 maskSumTex,
+    int mask_step
     )
 {
     volatile __local  float N9[768]; // threads.x * threads.y * 3
@@ -269,6 +325,7 @@ __kernel
     det_step   /= sizeof(*det);
     trace_step /= sizeof(*trace);
     maxCounter += counter_offset;
+    mask_step  /= sizeof(uint);
 
     // Determine the indices
     const int gridDim_y  = get_num_groups(1) / c_nOctaveLayers;
@@ -288,26 +345,26 @@ __kernel
     // Is this thread within the hessian buffer?
     const int zoff = get_local_size(0) * get_local_size(1);
     const int localLin = get_local_id(0) + get_local_id(1) * get_local_size(0) + zoff;
-    N9[localLin - zoff] =
-        det[det_step *
+    N9[localLin - zoff] = 
+        det[det_step * 
         (c_layer_rows * (layer - 1) + min(max(i, 0), c_img_rows - 1)) // y
         + min(max(j, 0), c_img_cols - 1)];                            // x
-    N9[localLin       ] =
-        det[det_step *
+    N9[localLin       ] = 
+        det[det_step * 
         (c_layer_rows * (layer    ) + min(max(i, 0), c_img_rows - 1)) // y
         + min(max(j, 0), c_img_cols - 1)];                            // x
-    N9[localLin + zoff] =
-        det[det_step *
+    N9[localLin + zoff] = 
+        det[det_step * 
         (c_layer_rows * (layer + 1) + min(max(i, 0), c_img_rows - 1)) // y
         + min(max(j, 0), c_img_cols - 1)];                            // x
 
     barrier(CLK_LOCAL_MEM_FENCE);
 
-    if (i < c_layer_rows - margin
+    if (i < c_layer_rows - margin 
         && j < c_layer_cols - margin
-        && get_local_id(0) > 0
+        && get_local_id(0) > 0 
         && get_local_id(0) < get_local_size(0) - 1
-        && get_local_id(1) > 0
+        && get_local_id(1) > 0 
         && get_local_id(1) < get_local_size(1) - 1 // these are unnecessary conditions ported from CUDA
         )
     {
@@ -321,7 +378,7 @@ __kernel
             const int sum_i = (i - ((size >> 1) >> c_octave)) << c_octave;
             const int sum_j = (j - ((size >> 1) >> c_octave)) << c_octave;
 
-            if (within_check(maskSumTex, sum_i, sum_j, size))
+            if (within_check(maskSumTex, sum_i, sum_j, size, c_img_rows, c_img_cols, mask_step))
             {
                 // Check to see if we have a max (in its 26 neighbours)
                 const bool condmax = val0 > N9[localLin - 1 - get_local_size(0) - zoff]
@@ -356,7 +413,7 @@ __kernel
 
                 if(condmax)
                 {
-                    unsigned int ind = atomic_inc(maxCounter);
+                    int ind = atomic_inc(maxCounter);
 
                     if (ind < c_max_candidates)
                     {
@@ -372,10 +429,10 @@ __kernel
 
 __kernel
     void icvFindMaximaInLayer(
-    __global float * det,
-    __global float * trace,
-    __global int4 * maxPosBuffer,
-    volatile __global unsigned int* maxCounter,
+    __global float * det, 
+    __global float * trace, 
+    __global int4 * maxPosBuffer, 
+    volatile __global  int* maxCounter,
     int counter_offset,
     int det_step,     // the step of det in bytes
     int trace_step,   // the step of trace in bytes
@@ -417,19 +474,19 @@ __kernel
     int l_x = min(max(j, 0), c_img_cols - 1);
     int l_y = c_layer_rows * layer + min(max(i, 0), c_img_rows - 1);
 
-    N9[localLin - zoff] =
+    N9[localLin - zoff] = 
         det[det_step * (l_y - c_layer_rows) + l_x];
-    N9[localLin       ] =
+    N9[localLin       ] = 
         det[det_step * (l_y               ) + l_x];
-    N9[localLin + zoff] =
+    N9[localLin + zoff] = 
         det[det_step * (l_y + c_layer_rows) + l_x];
     barrier(CLK_LOCAL_MEM_FENCE);
 
-    if (i < c_layer_rows - margin
+    if (i < c_layer_rows - margin 
         && j < c_layer_cols - margin
-        && get_local_id(0) > 0
+        && get_local_id(0) > 0 
         && get_local_id(0) < get_local_size(0) - 1
-        && get_local_id(1) > 0
+        && get_local_id(1) > 0 
         && get_local_id(1) < get_local_size(1) - 1 // these are unnecessary conditions ported from CUDA
         )
     {
@@ -473,7 +530,7 @@ __kernel
 
             if(condmax)
             {
-                unsigned int ind = atomic_inc(maxCounter);
+                 int ind = atomic_inc(maxCounter);
 
                 if (ind < c_max_candidates)
                 {
@@ -497,17 +554,17 @@ inline bool solve3x3_float(volatile __local  const float A[3][3], volatile __loc
     {
         F invdet = 1.0 / det;
 
-        x[0] = invdet *
+        x[0] = invdet * 
             (b[0]    * (A[1][1] * A[2][2] - A[1][2] * A[2][1]) -
             A[0][1] * (b[1]    * A[2][2] - A[1][2] * b[2]   ) +
             A[0][2] * (b[1]    * A[2][1] - A[1][1] * b[2]   ));
 
-        x[1] = invdet *
+        x[1] = invdet * 
             (A[0][0] * (b[1]    * A[2][2] - A[1][2] * b[2]   ) -
             b[0]    * (A[1][0] * A[2][2] - A[1][2] * A[2][0]) +
             A[0][2] * (A[1][0] * b[2]    - b[1]    * A[2][0]));
 
-        x[2] = invdet *
+        x[2] = invdet * 
             (A[0][0] * (A[1][1] * b[2]    - b[1]    * A[2][1]) -
             A[0][1] * (A[1][0] * b[2]    - b[1]    * A[2][0]) +
             b[0]    * (A[1][0] * A[2][1] - A[1][1] * A[2][0]));
@@ -528,12 +585,12 @@ inline bool solve3x3_float(volatile __local  const float A[3][3], volatile __loc
 
 ////////////////////////////////////////////////////////////////////////
 // INTERPOLATION
-__kernel
+__kernel 
     void icvInterpolateKeypoint(
-    __global const float * det,
+    __global const float * det, 
     __global const int4 * maxPosBuffer,
     __global float * keypoints,
-    volatile __global unsigned int * featureCounter,
+    volatile __global  int * featureCounter,
     int det_step,
     int keypoints_step,
     int c_img_rows,
@@ -560,7 +617,7 @@ __kernel
 
     volatile __local  float N9[3][3][3];
 
-    N9[get_local_id(2)][get_local_id(1)][get_local_id(0)] =
+    N9[get_local_id(2)][get_local_id(1)][get_local_id(0)] = 
         det[det_step * (c_layer_rows * layer + i) + j];
     barrier(CLK_LOCAL_MEM_FENCE);
 
@@ -632,7 +689,7 @@ __kernel
                 if ((c_img_rows + 1) >= grad_wav_size && (c_img_cols + 1) >= grad_wav_size)
                 {
                     // Get a new feature index.
-                    unsigned int ind = atomic_inc(featureCounter);
+                     int ind = atomic_inc(featureCounter);
 
                     if (ind < c_max_features)
                     {
@@ -658,60 +715,62 @@ __kernel
 
 __constant float c_aptX[ORI_SAMPLES] = {-6, -5, -5, -5, -5, -5, -5, -5, -4, -4, -4, -4, -4, -4, -4, -4, -4, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 6};
 __constant float c_aptY[ORI_SAMPLES] = {0, -3, -2, -1, 0, 1, 2, 3, -4, -3, -2, -1, 0, 1, 2, 3, 4, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -4, -3, -2, -1, 0, 1, 2, 3, 4, -3, -2, -1, 0, 1, 2, 3, 0};
-__constant float c_aptW[ORI_SAMPLES] = {0.001455130288377404f, 0.001707611023448408f, 0.002547456417232752f, 0.003238451667129993f, 0.0035081731621176f,
-    0.003238451667129993f, 0.002547456417232752f, 0.001707611023448408f, 0.002003900473937392f, 0.0035081731621176f, 0.005233579315245152f,
-    0.00665318313986063f, 0.00720730796456337f, 0.00665318313986063f, 0.005233579315245152f, 0.0035081731621176f,
-    0.002003900473937392f, 0.001707611023448408f, 0.0035081731621176f, 0.006141661666333675f, 0.009162282571196556f,
-    0.01164754293859005f, 0.01261763460934162f, 0.01164754293859005f, 0.009162282571196556f, 0.006141661666333675f,
-    0.0035081731621176f, 0.001707611023448408f, 0.002547456417232752f, 0.005233579315245152f, 0.009162282571196556f,
-    0.01366852037608624f, 0.01737609319388866f, 0.0188232995569706f, 0.01737609319388866f, 0.01366852037608624f,
-    0.009162282571196556f, 0.005233579315245152f, 0.002547456417232752f, 0.003238451667129993f, 0.00665318313986063f,
-    0.01164754293859005f, 0.01737609319388866f, 0.02208934165537357f, 0.02392910048365593f, 0.02208934165537357f,
-    0.01737609319388866f, 0.01164754293859005f, 0.00665318313986063f, 0.003238451667129993f, 0.001455130288377404f,
-    0.0035081731621176f, 0.00720730796456337f, 0.01261763460934162f, 0.0188232995569706f, 0.02392910048365593f,
-    0.02592208795249462f, 0.02392910048365593f, 0.0188232995569706f, 0.01261763460934162f, 0.00720730796456337f,
-    0.0035081731621176f, 0.001455130288377404f, 0.003238451667129993f, 0.00665318313986063f, 0.01164754293859005f,
-    0.01737609319388866f, 0.02208934165537357f, 0.02392910048365593f, 0.02208934165537357f, 0.01737609319388866f,
+__constant float c_aptW[ORI_SAMPLES] = {0.001455130288377404f, 0.001707611023448408f, 0.002547456417232752f, 0.003238451667129993f, 0.0035081731621176f, 
+    0.003238451667129993f, 0.002547456417232752f, 0.001707611023448408f, 0.002003900473937392f, 0.0035081731621176f, 0.005233579315245152f, 
+    0.00665318313986063f, 0.00720730796456337f, 0.00665318313986063f, 0.005233579315245152f, 0.0035081731621176f, 
+    0.002003900473937392f, 0.001707611023448408f, 0.0035081731621176f, 0.006141661666333675f, 0.009162282571196556f, 
+    0.01164754293859005f, 0.01261763460934162f, 0.01164754293859005f, 0.009162282571196556f, 0.006141661666333675f, 
+    0.0035081731621176f, 0.001707611023448408f, 0.002547456417232752f, 0.005233579315245152f, 0.009162282571196556f, 
+    0.01366852037608624f, 0.01737609319388866f, 0.0188232995569706f, 0.01737609319388866f, 0.01366852037608624f, 
+    0.009162282571196556f, 0.005233579315245152f, 0.002547456417232752f, 0.003238451667129993f, 0.00665318313986063f, 
+    0.01164754293859005f, 0.01737609319388866f, 0.02208934165537357f, 0.02392910048365593f, 0.02208934165537357f, 
+    0.01737609319388866f, 0.01164754293859005f, 0.00665318313986063f, 0.003238451667129993f, 0.001455130288377404f, 
+    0.0035081731621176f, 0.00720730796456337f, 0.01261763460934162f, 0.0188232995569706f, 0.02392910048365593f, 
+    0.02592208795249462f, 0.02392910048365593f, 0.0188232995569706f, 0.01261763460934162f, 0.00720730796456337f, 
+    0.0035081731621176f, 0.001455130288377404f, 0.003238451667129993f, 0.00665318313986063f, 0.01164754293859005f, 
+    0.01737609319388866f, 0.02208934165537357f, 0.02392910048365593f, 0.02208934165537357f, 0.01737609319388866f, 
     0.01164754293859005f, 0.00665318313986063f, 0.003238451667129993f, 0.002547456417232752f, 0.005233579315245152f,
-    0.009162282571196556f, 0.01366852037608624f, 0.01737609319388866f, 0.0188232995569706f, 0.01737609319388866f,
-    0.01366852037608624f, 0.009162282571196556f, 0.005233579315245152f, 0.002547456417232752f, 0.001707611023448408f,
-    0.0035081731621176f, 0.006141661666333675f, 0.009162282571196556f, 0.01164754293859005f, 0.01261763460934162f,
+    0.009162282571196556f, 0.01366852037608624f, 0.01737609319388866f, 0.0188232995569706f, 0.01737609319388866f, 
+    0.01366852037608624f, 0.009162282571196556f, 0.005233579315245152f, 0.002547456417232752f, 0.001707611023448408f, 
+    0.0035081731621176f, 0.006141661666333675f, 0.009162282571196556f, 0.01164754293859005f, 0.01261763460934162f, 
     0.01164754293859005f, 0.009162282571196556f, 0.006141661666333675f, 0.0035081731621176f, 0.001707611023448408f,
-    0.002003900473937392f, 0.0035081731621176f, 0.005233579315245152f, 0.00665318313986063f, 0.00720730796456337f,
-    0.00665318313986063f, 0.005233579315245152f, 0.0035081731621176f, 0.002003900473937392f, 0.001707611023448408f,
+    0.002003900473937392f, 0.0035081731621176f, 0.005233579315245152f, 0.00665318313986063f, 0.00720730796456337f, 
+    0.00665318313986063f, 0.005233579315245152f, 0.0035081731621176f, 0.002003900473937392f, 0.001707611023448408f, 
     0.002547456417232752f, 0.003238451667129993f, 0.0035081731621176f, 0.003238451667129993f, 0.002547456417232752f,
     0.001707611023448408f, 0.001455130288377404f};
 
 __constant float c_NX[2][5] = {{0, 0, 2, 4, -1}, {2, 0, 4, 4, 1}};
 __constant float c_NY[2][5] = {{0, 0, 4, 2, 1}, {0, 2, 4, 4, -1}};
 
-void reduce_32_sum(volatile __local  float * data, float partial_reduction, int tid)
+void reduce_32_sum(volatile __local  float * data, volatile float* partial_reduction, int tid)
 {
-#define op(A, B) (A)+(B)
-    data[tid] = partial_reduction;
+#define op(A, B) (*A)+(B)
+    data[tid] = *partial_reduction;
     barrier(CLK_LOCAL_MEM_FENCE);
 
-    if (tid < 16)
+    if (tid < 16) 
     {
-        data[tid] = partial_reduction = op(partial_reduction, data[tid + 16]);
-        data[tid] = partial_reduction = op(partial_reduction, data[tid + 8 ]);
-        data[tid] = partial_reduction = op(partial_reduction, data[tid + 4 ]);
-        data[tid] = partial_reduction = op(partial_reduction, data[tid + 2 ]);
-        data[tid] = partial_reduction = op(partial_reduction, data[tid + 1 ]);
+        data[tid] = *partial_reduction = op(partial_reduction, data[tid + 16]);
+        data[tid] = *partial_reduction = op(partial_reduction, data[tid + 8 ]);
+        data[tid] = *partial_reduction = op(partial_reduction, data[tid + 4 ]);
+        data[tid] = *partial_reduction = op(partial_reduction, data[tid + 2 ]);
+        data[tid] = *partial_reduction = op(partial_reduction, data[tid + 1 ]); 
     }
 #undef op
 }
 
 __kernel
     void icvCalcOrientation(
-    image2d_t sumTex,
+    IMAGE_INT32 sumTex,
     __global float * keypoints,
     int keypoints_step,
     int c_img_rows,
-    int c_img_cols
+    int c_img_cols,
+    int sum_step
     )
 {
     keypoints_step /= sizeof(*keypoints);
+    sum_step       /= sizeof(uint);
     __global float* featureX    = keypoints + X_ROW * keypoints_step;
     __global float* featureY    = keypoints + Y_ROW * keypoints_step;
     __global float* featureSize = keypoints + SIZE_ROW * keypoints_step;
@@ -754,8 +813,8 @@ __kernel
         if (y >= 0 && y < (c_img_rows + 1) - grad_wav_size &&
             x >= 0 && x < (c_img_cols + 1) - grad_wav_size)
         {
-            X = c_aptW[tid] * icvCalcHaarPatternSum_2(sumTex, c_NX, 4, grad_wav_size, y, x);
-            Y = c_aptW[tid] * icvCalcHaarPatternSum_2(sumTex, c_NY, 4, grad_wav_size, y, x);
+            X = c_aptW[tid] * icvCalcHaarPatternSum_2(sumTex, c_NX, 4, grad_wav_size, y, x, c_img_rows, c_img_cols, sum_step);
+            Y = c_aptW[tid] * icvCalcHaarPatternSum_2(sumTex, c_NY, 4, grad_wav_size, y, x, c_img_rows, c_img_cols, sum_step);
 
             angle = atan2(Y, X);
 
@@ -777,7 +836,7 @@ __kernel
     {
         const int dir = (i * 4 + get_local_id(1)) * ORI_SEARCH_INC;
 
-        float sumx = 0.0f, sumy = 0.0f;
+        volatile float sumx = 0.0f, sumy = 0.0f;
         int d = abs(convert_int_rte(s_angle[get_local_id(0)]) - dir);
         if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
         {
@@ -802,8 +861,8 @@ __kernel
             sumx += s_X[get_local_id(0) + 96];
             sumy += s_Y[get_local_id(0) + 96];
         }
-        reduce_32_sum(s_sumx + get_local_id(1) * 32, sumx, get_local_id(0));
-        reduce_32_sum(s_sumy + get_local_id(1) * 32, sumy, get_local_id(0));
+        reduce_32_sum(s_sumx + get_local_id(1) * 32, &sumx, get_local_id(0));
+        reduce_32_sum(s_sumy + get_local_id(1) * 32, &sumy, get_local_id(0));
 
         const float temp_mod = sumx * sumx + sumy * sumy;
         if (temp_mod > best_mod)
@@ -846,6 +905,24 @@ __kernel
     }
 }
 
+
+__kernel
+    void icvSetUpright(
+    __global float * keypoints,
+    int keypoints_step,
+    int nFeatures
+    )
+{
+    keypoints_step /= sizeof(*keypoints);
+    __global float* featureDir  = keypoints + ANGLE_ROW * keypoints_step;
+
+    if(get_global_id(0) <= nFeatures)
+    {
+        featureDir[get_global_id(0)] = 270.0f;
+    }
+}
+
+
 #undef ORI_SEARCH_INC
 #undef ORI_WIN
 #undef ORI_SAMPLES
@@ -881,20 +958,20 @@ __constant float c_DW[PATCH_SZ * PATCH_SZ] =
 
 // utility for linear filter
 inline uchar readerGet(
-    image2d_t src,
-    const float centerX, const float centerY, const float win_offset, const float cos_dir, const float sin_dir,
-    int i, int j
+    IMAGE_INT8 src, 
+    const float centerX, const float centerY, const float win_offset, const float cos_dir, const float sin_dir, 
+    int i, int j, int rows, int cols, int elemPerRow
     )
 {
     float pixel_x = centerX + (win_offset + j) * cos_dir + (win_offset + i) * sin_dir;
     float pixel_y = centerY - (win_offset + j) * sin_dir + (win_offset + i) * cos_dir;
-    return (uchar)read_imageui(src, sampler, (float2)(pixel_x, pixel_y)).x;
+    return read_imgTex(src, sampler, (float2)(pixel_x, pixel_y), rows, cols, elemPerRow);
 }
 
 inline float linearFilter(
-    image2d_t src,
-    const float centerX, const float centerY, const float win_offset, const float cos_dir, const float sin_dir,
-    float y, float x
+    IMAGE_INT8 src, 
+    const float centerX, const float centerY, const float win_offset, const float cos_dir, const float sin_dir,  
+    float y, float x, int rows, int cols, int elemPerRow
     )
 {
     x -= 0.5f;
@@ -907,38 +984,43 @@ inline float linearFilter(
     const int x2 = x1 + 1;
     const int y2 = y1 + 1;
 
-    uchar src_reg = readerGet(src, centerX, centerY, win_offset, cos_dir, sin_dir, y1, x1);
+    uchar src_reg = readerGet(src, centerX, centerY, win_offset, cos_dir, sin_dir, y1, x1, rows, cols, elemPerRow);
     out = out + src_reg * ((x2 - x) * (y2 - y));
 
-    src_reg = readerGet(src, centerX, centerY, win_offset, cos_dir, sin_dir, y1, x2);
+    src_reg = readerGet(src, centerX, centerY, win_offset, cos_dir, sin_dir, y1, x2, rows, cols, elemPerRow);
     out = out + src_reg * ((x - x1) * (y2 - y));
 
-    src_reg = readerGet(src, centerX, centerY, win_offset, cos_dir, sin_dir, y2, x1);
+    src_reg = readerGet(src, centerX, centerY, win_offset, cos_dir, sin_dir, y2, x1, rows, cols, elemPerRow);
     out = out + src_reg * ((x2 - x) * (y - y1));
 
-    src_reg = readerGet(src, centerX, centerY, win_offset, cos_dir, sin_dir, y2, x2);
+    src_reg = readerGet(src, centerX, centerY, win_offset, cos_dir, sin_dir, y2, x2, rows, cols, elemPerRow);
     out = out + src_reg * ((x - x1) * (y - y1));
 
     return out;
 }
 
 void calc_dx_dy(
-    image2d_t imgTex,
+    IMAGE_INT8 imgTex,
     volatile __local  float s_dx_bin[25],
     volatile __local  float s_dy_bin[25],
     volatile __local  float s_PATCH[6][6],
-    __global const float* featureX,
-    __global const float* featureY,
-    __global const float* featureSize,
-    __global const float* featureDir
+    __global const float* featureX, 
+    __global const float* featureY, 
+    __global const float* featureSize, 
+    __global const float* featureDir,
+    int rows,
+    int cols,
+    int elemPerRow
     )
 {
     const float centerX = featureX[get_group_id(0)];
     const float centerY = featureY[get_group_id(0)];
     const float size = featureSize[get_group_id(0)];
     float descriptor_dir = 360.0f - featureDir[get_group_id(0)];
-    if (fabs(descriptor_dir - 360.f) < FLT_EPSILON)
-        descriptor_dir = 0.f;
+    if(fabs(descriptor_dir - 360.0f) < FLT_EPSILON)
+    {
+        descriptor_dir = 0.0f;
+    }
     descriptor_dir *= (float)(CV_PI_F / 180.0f);
 
     /* The sampling intervals and wavelet sized for selecting an orientation
@@ -965,7 +1047,7 @@ void calc_dx_dy(
     const float icoo = ((float)yIndex / (PATCH_SZ + 1)) * win_size;
     const float jcoo = ((float)xIndex / (PATCH_SZ + 1)) * win_size;
 
-    s_PATCH[get_local_id(1)][get_local_id(0)] = linearFilter(imgTex, centerX, centerY, win_offset, cos_dir, sin_dir, icoo, jcoo);
+    s_PATCH[get_local_id(1)][get_local_id(0)] = linearFilter(imgTex, centerX, centerY, win_offset, cos_dir, sin_dir, icoo, jcoo, rows, cols, elemPerRow);
 
     barrier(CLK_LOCAL_MEM_FENCE);
 
@@ -976,26 +1058,26 @@ void calc_dx_dy(
         const float dw = c_DW[yIndex * PATCH_SZ + xIndex];
 
         const float vx = (
-            s_PATCH[get_local_id(1)    ][get_local_id(0) + 1] -
-            s_PATCH[get_local_id(1)    ][get_local_id(0)    ] +
-            s_PATCH[get_local_id(1) + 1][get_local_id(0) + 1] -
-            s_PATCH[get_local_id(1) + 1][get_local_id(0)    ])
+            s_PATCH[get_local_id(1)    ][get_local_id(0) + 1] - 
+            s_PATCH[get_local_id(1)    ][get_local_id(0)    ] + 
+            s_PATCH[get_local_id(1) + 1][get_local_id(0) + 1] - 
+            s_PATCH[get_local_id(1) + 1][get_local_id(0)    ]) 
             * dw;
         const float vy = (
-            s_PATCH[get_local_id(1) + 1][get_local_id(0)    ] -
-            s_PATCH[get_local_id(1)    ][get_local_id(0)    ] +
-            s_PATCH[get_local_id(1) + 1][get_local_id(0) + 1] -
-            s_PATCH[get_local_id(1)    ][get_local_id(0) + 1])
+            s_PATCH[get_local_id(1) + 1][get_local_id(0)    ] - 
+            s_PATCH[get_local_id(1)    ][get_local_id(0)    ] + 
+            s_PATCH[get_local_id(1) + 1][get_local_id(0) + 1] - 
+            s_PATCH[get_local_id(1)    ][get_local_id(0) + 1]) 
             * dw;
         s_dx_bin[tid] = vx;
         s_dy_bin[tid] = vy;
     }
 }
 void reduce_sum25(
-    volatile __local  float* sdata1,
-    volatile __local  float* sdata2,
-    volatile __local  float* sdata3,
-    volatile __local  float* sdata4,
+    volatile __local  float* sdata1, 
+    volatile __local  float* sdata2, 
+    volatile __local  float* sdata3, 
+    volatile __local  float* sdata4, 
     int tid
     )
 {
@@ -1033,18 +1115,20 @@ void reduce_sum25(
     }
 }
 
-__kernel
+__kernel 
     void compute_descriptors64(
-    image2d_t imgTex,
-    volatile __global float * descriptors,
+    IMAGE_INT8 imgTex,
+    volatile __global float * descriptors, 
     __global const float * keypoints,
     int descriptors_step,
-    int keypoints_step
+    int keypoints_step, 
+    int rows,
+    int cols,
+    int img_step
     )
 {
     descriptors_step /= sizeof(float);
     keypoints_step   /= sizeof(float);
-
     __global const float * featureX    = keypoints + X_ROW * keypoints_step;
     __global const float * featureY    = keypoints + Y_ROW * keypoints_step;
     __global const float * featureSize = keypoints + SIZE_ROW * keypoints_step;
@@ -1057,7 +1141,7 @@ __kernel
     volatile __local  float sdyabs[25];
     volatile __local  float s_PATCH[6][6];
 
-    calc_dx_dy(imgTex, sdx, sdy, s_PATCH, featureX, featureY, featureSize, featureDir);
+    calc_dx_dy(imgTex, sdx, sdy, s_PATCH, featureX, featureY, featureSize, featureDir, rows, cols, img_step);
     barrier(CLK_LOCAL_MEM_FENCE);
 
     const int tid = get_local_id(1) * get_local_size(0) + get_local_id(0);
@@ -1066,11 +1150,15 @@ __kernel
     {
         sdxabs[tid] = fabs(sdx[tid]); // |dx| array
         sdyabs[tid] = fabs(sdy[tid]); // |dy| array
-        barrier(CLK_LOCAL_MEM_FENCE);
-
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (tid < 25)
+    {
         reduce_sum25(sdx, sdy, sdxabs, sdyabs, tid);
-        barrier(CLK_LOCAL_MEM_FENCE);
-
+    }    
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (tid < 25)
+    {
         volatile __global float* descriptors_block = descriptors + descriptors_step * get_group_id(0) + (get_group_id(1) << 2);
 
         // write dx, dy, |dx|, |dy|
@@ -1083,13 +1171,16 @@ __kernel
         }
     }
 }
-__kernel
+__kernel 
     void compute_descriptors128(
-    image2d_t imgTex,
-    __global volatile float * descriptors,
+    IMAGE_INT8 imgTex,
+    __global volatile float * descriptors, 
     __global float * keypoints,
     int descriptors_step,
-    int keypoints_step
+    int keypoints_step,
+    int rows,
+    int cols,
+    int img_step
     )
 {
     descriptors_step /= sizeof(*descriptors);
@@ -1111,7 +1202,7 @@ __kernel
     volatile __local  float sdabs2[25];
     volatile __local  float s_PATCH[6][6];
 
-    calc_dx_dy(imgTex, sdx, sdy, s_PATCH, featureX, featureY, featureSize, featureDir);
+    calc_dx_dy(imgTex, sdx, sdy, s_PATCH, featureX, featureY, featureSize, featureDir, rows, cols, img_step);
     barrier(CLK_LOCAL_MEM_FENCE);
 
     const int tid = get_local_id(1) * get_local_size(0) + get_local_id(0);
@@ -1132,10 +1223,10 @@ __kernel
             sd2[tid] = sdx[tid];
             sdabs2[tid] = fabs(sdx[tid]);
         }
-        barrier(CLK_LOCAL_MEM_FENCE);
+        //barrier(CLK_LOCAL_MEM_FENCE);
 
         reduce_sum25(sd1, sd2, sdabs1, sdabs2, tid);
-        barrier(CLK_LOCAL_MEM_FENCE);
+        //barrier(CLK_LOCAL_MEM_FENCE);
 
         volatile __global float* descriptors_block = descriptors + descriptors_step * get_group_id(0) + (get_group_id(1) << 3);
 
@@ -1162,10 +1253,10 @@ __kernel
             sd2[tid] = sdy[tid];
             sdabs2[tid] = fabs(sdy[tid]);
         }
-        barrier(CLK_LOCAL_MEM_FENCE);
+        //barrier(CLK_LOCAL_MEM_FENCE);
 
         reduce_sum25(sd1, sd2, sdabs1, sdabs2, tid);
-        barrier(CLK_LOCAL_MEM_FENCE);
+        //barrier(CLK_LOCAL_MEM_FENCE);
 
         // write dy (dx >= 0), |dy| (dx >= 0), dy (dx < 0), |dy| (dx < 0)
         if (tid == 0)
@@ -1178,7 +1269,7 @@ __kernel
     }
 }
 
-__kernel
+__kernel 
     void normalize_descriptors128(__global float * descriptors, int descriptors_step)
 {
     descriptors_step /= sizeof(*descriptors);
@@ -1219,7 +1310,7 @@ __kernel
     // normalize and store in output
     descriptor_base[get_local_id(0)] = lookup / len;
 }
-__kernel
+__kernel 
     void normalize_descriptors64(__global float * descriptors, int descriptors_step)
 {
     descriptors_step /= sizeof(*descriptors);
index 076c22c..db11ed1 100644 (file)
@@ -140,6 +140,10 @@ float reduce_smem(volatile __local float* smem, int size)
     if (tid < 32)
     {
         if (size >= 64) smem[tid] = sum = sum + smem[tid + 32];
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (tid < 16)
+    {
         if (size >= 32) smem[tid] = sum = sum + smem[tid + 16];
         if (size >= 16) smem[tid] = sum = sum + smem[tid + 8];
         if (size >= 8) smem[tid] = sum = sum + smem[tid + 4];
@@ -224,6 +228,11 @@ __kernel void classify_hists_kernel(const int cblock_hist_size, const int cdescr
     {
         volatile __local float* smem = products;
         smem[tid] = product = product + smem[tid + 32];
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (tid < 16)
+    {
+        volatile __local float* smem = products;
         smem[tid] = product = product + smem[tid + 16];
         smem[tid] = product = product + smem[tid + 8];
         smem[tid] = product = product + smem[tid + 4];
diff --git a/modules/ocl/src/kernels/stereobm.cl b/modules/ocl/src/kernels/stereobm.cl
new file mode 100644 (file)
index 0000000..4edab86
--- /dev/null
@@ -0,0 +1,427 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Jia Haipeng, jiahaipeng95@gmail.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#define ROWSperTHREAD 21     // the number of rows a thread will process
+#define BLOCK_W       128    // the thread block width (464)
+#define N_DISPARITIES 8
+
+#define STEREO_MIND 0                    // The minimum d range to check
+#define STEREO_DISP_STEP N_DISPARITIES   // the d step, must be <= 1 to avoid aliasing
+
+int SQ(int a)
+{
+    return a * a;
+}
+
+unsigned int CalcSSD(volatile __local unsigned int *col_ssd_cache, 
+                     volatile __local unsigned int *col_ssd, int radius)
+{      
+    unsigned int cache = 0;
+    unsigned int cache2 = 0;
+
+    for(int i = 1; i <= radius; i++)
+        cache += col_ssd[i];
+
+    col_ssd_cache[0] = cache;
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (get_local_id(0) < BLOCK_W - radius)
+        cache2 = col_ssd_cache[radius];
+    else
+        for(int i = radius + 1; i < (2 * radius + 1); i++)
+            cache2 += col_ssd[i];
+
+    return col_ssd[0] + cache + cache2;
+}
+
+uint2 MinSSD(volatile __local unsigned int *col_ssd_cache, 
+             volatile __local unsigned int *col_ssd, int radius)
+{
+    unsigned int ssd[N_DISPARITIES];
+
+    //See above:  #define COL_SSD_SIZE (BLOCK_W + 2 * radius)
+    ssd[0] = CalcSSD(col_ssd_cache, col_ssd + 0 * (BLOCK_W + 2 * radius), radius);
+    barrier(CLK_LOCAL_MEM_FENCE);
+    ssd[1] = CalcSSD(col_ssd_cache, col_ssd + 1 * (BLOCK_W + 2 * radius), radius);
+    barrier(CLK_LOCAL_MEM_FENCE);
+    ssd[2] = CalcSSD(col_ssd_cache, col_ssd + 2 * (BLOCK_W + 2 * radius), radius);
+    barrier(CLK_LOCAL_MEM_FENCE);
+    ssd[3] = CalcSSD(col_ssd_cache, col_ssd + 3 * (BLOCK_W + 2 * radius), radius);
+    barrier(CLK_LOCAL_MEM_FENCE);
+    ssd[4] = CalcSSD(col_ssd_cache, col_ssd + 4 * (BLOCK_W + 2 * radius), radius);
+    barrier(CLK_LOCAL_MEM_FENCE);
+    ssd[5] = CalcSSD(col_ssd_cache, col_ssd + 5 * (BLOCK_W + 2 * radius), radius);
+    barrier(CLK_LOCAL_MEM_FENCE);
+    ssd[6] = CalcSSD(col_ssd_cache, col_ssd + 6 * (BLOCK_W + 2 * radius), radius);
+    barrier(CLK_LOCAL_MEM_FENCE);
+    ssd[7] = CalcSSD(col_ssd_cache, col_ssd + 7 * (BLOCK_W + 2 * radius), radius);
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    unsigned int mssd = min(min(min(ssd[0], ssd[1]), min(ssd[4], ssd[5])), min(min(ssd[2], ssd[3]), min(ssd[6], ssd[7])));
+
+    int bestIdx = 0;
+    for (int i = 0; i < N_DISPARITIES; i++)
+    {
+        if (mssd == ssd[i])
+            bestIdx = i;
+    }
+
+    return (uint2)(mssd, bestIdx);
+}
+
+void StepDown(int idx1, int idx2, __global unsigned char* imageL, 
+              __global unsigned char* imageR, int d, volatile  __local unsigned int *col_ssd, int radius)
+{
+    unsigned char leftPixel1;
+    unsigned char leftPixel2;
+    unsigned char rightPixel1[8];
+    unsigned char rightPixel2[8];
+    unsigned int diff1, diff2;
+
+    leftPixel1 = imageL[idx1];
+    leftPixel2 = imageL[idx2];
+
+    idx1 = idx1 - d;
+    idx2 = idx2 - d;
+
+    rightPixel1[7] = imageR[idx1 - 7];
+    rightPixel1[0] = imageR[idx1 - 0];
+    rightPixel1[1] = imageR[idx1 - 1];
+    rightPixel1[2] = imageR[idx1 - 2];
+    rightPixel1[3] = imageR[idx1 - 3];
+    rightPixel1[4] = imageR[idx1 - 4];
+    rightPixel1[5] = imageR[idx1 - 5];
+    rightPixel1[6] = imageR[idx1 - 6];
+
+    rightPixel2[7] = imageR[idx2 - 7];
+    rightPixel2[0] = imageR[idx2 - 0];
+    rightPixel2[1] = imageR[idx2 - 1];
+    rightPixel2[2] = imageR[idx2 - 2];
+    rightPixel2[3] = imageR[idx2 - 3];
+    rightPixel2[4] = imageR[idx2 - 4];
+    rightPixel2[5] = imageR[idx2 - 5];
+    rightPixel2[6] = imageR[idx2 - 6];
+
+    //See above:  #define COL_SSD_SIZE (BLOCK_W + 2 * radius)
+    diff1 = leftPixel1 - rightPixel1[0];
+    diff2 = leftPixel2 - rightPixel2[0];
+    col_ssd[0 * (BLOCK_W + 2 * radius)] += SQ(diff2) - SQ(diff1);
+
+    diff1 = leftPixel1 - rightPixel1[1];
+    diff2 = leftPixel2 - rightPixel2[1];
+    col_ssd[1 * (BLOCK_W + 2 * radius)] += SQ(diff2) - SQ(diff1);
+
+    diff1 = leftPixel1 - rightPixel1[2];
+    diff2 = leftPixel2 - rightPixel2[2];
+    col_ssd[2 * (BLOCK_W + 2 * radius)] += SQ(diff2) - SQ(diff1);
+
+    diff1 = leftPixel1 - rightPixel1[3];
+    diff2 = leftPixel2 - rightPixel2[3];
+    col_ssd[3 * (BLOCK_W + 2 * radius)] += SQ(diff2) - SQ(diff1);
+
+    diff1 = leftPixel1 - rightPixel1[4];
+    diff2 = leftPixel2 - rightPixel2[4];
+    col_ssd[4 * (BLOCK_W + 2 * radius)] += SQ(diff2) - SQ(diff1);
+
+    diff1 = leftPixel1 - rightPixel1[5];
+    diff2 = leftPixel2 - rightPixel2[5];
+    col_ssd[5 * (BLOCK_W + 2 * radius)] += SQ(diff2) - SQ(diff1);
+
+    diff1 = leftPixel1 - rightPixel1[6];
+    diff2 = leftPixel2 - rightPixel2[6];
+    col_ssd[6 * (BLOCK_W + 2 * radius)] += SQ(diff2) - SQ(diff1);
+
+    diff1 = leftPixel1 - rightPixel1[7];
+    diff2 = leftPixel2 - rightPixel2[7];
+    col_ssd[7 * (BLOCK_W + 2 * radius)] += SQ(diff2) - SQ(diff1);
+}
+
+void InitColSSD(int x_tex, int y_tex, int im_pitch, __global unsigned char* imageL, 
+                __global unsigned char* imageR, int d, 
+                volatile __local unsigned int *col_ssd, int radius)
+{
+    unsigned char leftPixel1;
+    int idx;
+    unsigned int diffa[] = {0, 0, 0, 0, 0, 0, 0, 0};
+
+    for(int i = 0; i < (2 * radius + 1); i++)
+    {
+        idx = y_tex * im_pitch + x_tex;
+        leftPixel1 = imageL[idx];
+        idx = idx - d;
+
+        diffa[0] += SQ(leftPixel1 - imageR[idx - 0]);
+        diffa[1] += SQ(leftPixel1 - imageR[idx - 1]);
+        diffa[2] += SQ(leftPixel1 - imageR[idx - 2]);
+        diffa[3] += SQ(leftPixel1 - imageR[idx - 3]);
+        diffa[4] += SQ(leftPixel1 - imageR[idx - 4]);
+        diffa[5] += SQ(leftPixel1 - imageR[idx - 5]);
+        diffa[6] += SQ(leftPixel1 - imageR[idx - 6]);
+        diffa[7] += SQ(leftPixel1 - imageR[idx - 7]);
+
+        y_tex += 1;
+    }
+    //See above:  #define COL_SSD_SIZE (BLOCK_W + 2 * radius)
+    col_ssd[0 * (BLOCK_W + 2 * radius)] = diffa[0];
+    col_ssd[1 * (BLOCK_W + 2 * radius)] = diffa[1];
+    col_ssd[2 * (BLOCK_W + 2 * radius)] = diffa[2];
+    col_ssd[3 * (BLOCK_W + 2 * radius)] = diffa[3];
+    col_ssd[4 * (BLOCK_W + 2 * radius)] = diffa[4];
+    col_ssd[5 * (BLOCK_W + 2 * radius)] = diffa[5];
+    col_ssd[6 * (BLOCK_W + 2 * radius)] = diffa[6];
+    col_ssd[7 * (BLOCK_W + 2 * radius)] = diffa[7];
+}
+
+__kernel void stereoKernel(__global unsigned char *left, __global unsigned char *right,  
+                           __global unsigned int *cminSSDImage, int cminSSD_step,
+                           __global unsigned char *disp, int disp_step,int cwidth, int cheight,
+                           int img_step, int maxdisp, int radius,  
+                           __local unsigned int *col_ssd_cache)
+{
+
+    volatile __local unsigned int *col_ssd = col_ssd_cache + BLOCK_W + get_local_id(0);
+    volatile __local unsigned int *col_ssd_extra = get_local_id(0) < (2 * radius) ? col_ssd + BLOCK_W : 0;  
+
+    int X = get_group_id(0) * BLOCK_W + get_local_id(0) + maxdisp + radius;
+   // int Y = get_group_id(1) * ROWSperTHREAD + radius;
+
+    #define Y (get_group_id(1) * ROWSperTHREAD + radius)
+
+    volatile __global unsigned int* minSSDImage = cminSSDImage + X + Y * cminSSD_step;
+    __global unsigned char* disparImage = disp + X + Y * disp_step;
+
+    int end_row = ROWSperTHREAD < (cheight - Y) ? ROWSperTHREAD:(cheight - Y);
+    int y_tex;
+    int x_tex = X - radius;
+
+    if (x_tex >= cwidth)
+        return;
+
+    for(int d = STEREO_MIND; d < maxdisp; d += STEREO_DISP_STEP)
+    {
+        y_tex = Y - radius;
+
+        InitColSSD(x_tex, y_tex, img_step, left, right, d, col_ssd, radius);
+        if (col_ssd_extra > 0)
+            if (x_tex + BLOCK_W < cwidth)
+                InitColSSD(x_tex + BLOCK_W, y_tex, img_step, left, right, d, col_ssd_extra, radius);
+
+        barrier(CLK_LOCAL_MEM_FENCE); //before MinSSD function
+
+        if (X < cwidth - radius && Y < cheight - radius)
+        {
+            uint2 minSSD = MinSSD(col_ssd_cache + get_local_id(0), col_ssd, radius);
+            if (minSSD.x < minSSDImage[0])
+            {
+                disparImage[0] = (unsigned char)(d + minSSD.y);
+                minSSDImage[0] = minSSD.x;
+            }
+        }
+
+        for(int row = 1; row < end_row; row++)
+        {
+            int idx1 = y_tex * img_step + x_tex;
+            int idx2 = (y_tex + (2 * radius + 1)) * img_step + x_tex;
+
+            barrier(CLK_GLOBAL_MEM_FENCE); 
+            barrier(CLK_LOCAL_MEM_FENCE); 
+
+            StepDown(idx1, idx2, left, right, d, col_ssd, radius);
+            if (col_ssd_extra > 0)
+                if (x_tex + BLOCK_W < cwidth)
+                    StepDown(idx1, idx2, left + BLOCK_W, right + BLOCK_W, d, col_ssd_extra, radius);
+
+            y_tex += 1;
+
+            barrier(CLK_LOCAL_MEM_FENCE); 
+
+            if (X < cwidth - radius && row < cheight - radius - Y)
+            {
+                int idx = row * cminSSD_step;
+                uint2 minSSD = MinSSD(col_ssd_cache + get_local_id(0), col_ssd, radius);
+                if (minSSD.x < minSSDImage[idx])
+                {
+                    disparImage[disp_step * row] = (unsigned char)(d + minSSD.y);
+                    minSSDImage[idx] = minSSD.x;
+                }
+            }
+        } // for row loop
+    } // for d loop
+}
+
+//////////////////////////////////////////////////////////////////////////////////////////////////
+//////////////////////////// Sobel Prefiler (signal channel)//////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////////////////////////////
+
+__kernel void prefilter_xsobel(__global unsigned char *input, __global unsigned char *output, 
+                               int rows, int cols, int prefilterCap)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if(x < cols && y < rows)
+    {
+        int cov = input[(y-1) * cols + (x-1)] * (-1) + input[(y-1) * cols + (x+1)] * (1) + 
+                  input[(y)   * cols + (x-1)] * (-2) + input[(y)   * cols + (x+1)] * (2) +
+                  input[(y+1) * cols + (x-1)] * (-1) + input[(y+1) * cols + (x+1)] * (1);
+
+        cov = min(min(max(-prefilterCap, cov), prefilterCap) + prefilterCap, 255);
+        output[y * cols + x] = cov & 0xFF;
+    }
+}
+
+
+//////////////////////////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////// Textureness filtering ////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////////////////////////////
+
+float sobel(__global unsigned char *input, int x, int y, int rows, int cols)
+{
+    float conv = 0;
+    int y1 = y==0? 0 : y-1;
+    int x1 = x==0? 0 : x-1;
+    if(x < cols && y < rows)
+    {
+        conv = (float)input[(y1)  * cols + (x1)] * (-1) + (float)input[(y1)  * cols + (x+1)] * (1) + 
+               (float)input[(y)   * cols + (x1)] * (-2) + (float)input[(y)   * cols + (x+1)] * (2) +
+               (float)input[(y+1) * cols + (x1)] * (-1) + (float)input[(y+1) * cols + (x+1)] * (1);
+    
+    }
+    return fabs(conv);
+}
+
+float CalcSums(__local float *cols, __local float *cols_cache, int winsz)
+{
+    float cache = 0;
+    float cache2 = 0;
+    int winsz2 = winsz/2;
+
+    int x = get_local_id(0);
+    int group_size_x = get_local_size(0);
+
+    for(int i = 1; i <= winsz2; i++)
+        cache += cols[i];
+
+    cols_cache[0] = cache;
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (x < group_size_x - winsz2)
+        cache2 = cols_cache[winsz2];
+    else
+        for(int i = winsz2 + 1; i < winsz; i++)
+            cache2 += cols[i];
+
+    return cols[0] + cache + cache2;
+}
+
+#define RpT (2 * ROWSperTHREAD)  // got experimentally
+__kernel void textureness_kernel(__global unsigned char *disp, int disp_rows, int disp_cols, 
+                                 int disp_step, __global unsigned char *input, int input_rows, 
+                                 int input_cols,int winsz, float threshold, 
+                                 __local float *cols_cache)
+{
+    int winsz2 = winsz/2;
+    int n_dirty_pixels = (winsz2) * 2;
+
+    int local_id_x = get_local_id(0);
+    int group_size_x = get_local_size(0);
+    int group_id_y = get_group_id(1);
+
+    __local float *cols = cols_cache + group_size_x + local_id_x;
+    __local float *cols_extra = local_id_x < n_dirty_pixels ? cols + group_size_x : 0;
+
+    int x = get_global_id(0);
+    int beg_row = group_id_y * RpT;
+    int end_row = min(beg_row + RpT, disp_rows);
+
+ //   if (x < disp_cols)
+ //   {
+        int y = beg_row;
+
+        float sum = 0;
+        float sum_extra = 0;
+
+        for(int i = y - winsz2; i <= y + winsz2; ++i)
+        {
+            sum += sobel(input, x - winsz2, i, input_rows, input_cols);
+            if (cols_extra)
+                sum_extra += sobel(input, x + group_size_x - winsz2, i, input_rows, input_cols);
+        }
+        *cols = sum;
+        if (cols_extra)
+            *cols_extra = sum_extra;
+
+        barrier(CLK_LOCAL_MEM_FENCE);
+
+        float sum_win = CalcSums(cols, cols_cache + local_id_x, winsz) * 255;
+        if (sum_win < threshold)
+            disp[y * disp_step + x] = 0;
+
+        barrier(CLK_LOCAL_MEM_FENCE);
+
+        for(int y = beg_row + 1; y < end_row; ++y)
+        {
+            sum = sum - sobel(input, x - winsz2, y - winsz2 - 1, input_rows, input_cols) + 
+                  sobel(input, x - winsz2, y + winsz2, input_rows, input_cols);
+            *cols = sum;
+
+            if (cols_extra)
+            {
+                sum_extra = sum_extra - sobel(input, x + group_size_x - winsz2, y - winsz2 - 1,input_rows, input_cols) 
+                            + sobel(input, x + group_size_x - winsz2, y + winsz2, input_rows, input_cols);
+                *cols_extra = sum_extra;
+            }
+
+            barrier(CLK_LOCAL_MEM_FENCE);
+            float sum_win = CalcSums(cols, cols_cache + local_id_x, winsz) * 255;
+            if (sum_win < threshold)
+                disp[y * disp_step + x] = 0;
+
+            barrier(CLK_LOCAL_MEM_FENCE);
+        }
+  //  }
+}
index f0e65f9..f859193 100644 (file)
@@ -69,6 +69,8 @@ namespace cv
         extern const char *operator_setTo;
         extern const char *operator_setToM;
         extern const char *convertC3C4;
+        extern DevMemType gDeviceMemType;
+        extern DevMemRW gDeviceMemRW;
     }
 }
 
@@ -912,8 +914,18 @@ oclMat cv::ocl::oclMat::reshape(int new_cn, int new_rows) const
 
 }
 
+void cv::ocl::oclMat::createEx(Size size, int type, DevMemRW rw_type, DevMemType mem_type)
+{
+    createEx(size.height, size.width, type, rw_type, mem_type);
+}
+
 void cv::ocl::oclMat::create(int _rows, int _cols, int _type)
 {
+    createEx(_rows, _cols, _type, gDeviceMemRW, gDeviceMemType);
+}
+
+void cv::ocl::oclMat::createEx(int _rows, int _cols, int _type, DevMemRW rw_type, DevMemType mem_type)
+{
     clCxt = Context::getContext();
     /* core logic */
     _type &= TYPE_MASK;
@@ -937,7 +949,7 @@ void cv::ocl::oclMat::create(int _rows, int _cols, int _type)
         size_t esz = elemSize();
 
         void *dev_ptr;
-        openCLMallocPitch(clCxt, &dev_ptr, &step, GPU_MATRIX_MALLOC_STEP(esz * cols), rows);
+        openCLMallocPitchEx(clCxt, &dev_ptr, &step, GPU_MATRIX_MALLOC_STEP(esz * cols), rows, rw_type, mem_type);
         //openCLMallocPitch(clCxt,&dev_ptr, &step, esz * cols, rows);
 
         if (esz * cols == step)
index dfbf7b1..2c13239 100644 (file)
@@ -221,6 +221,36 @@ namespace cv
         {
             openCLFree(texture);
         }
+
+        bool support_image2d(Context *clCxt)
+        {
+            static const char * _kernel_string = "__kernel void test_func(image2d_t img) {}";
+            static bool _isTested = false;
+            static bool _support = false;
+            if(_isTested)
+            {
+                return _support;
+            }
+            try
+            {
+                cv::ocl::openCLGetKernelFromSource(clCxt, &_kernel_string, "test_func");
+                _support = true;
+            }
+            catch (const cv::Exception& e)
+            {
+                if(e.code == -217)
+                {
+                    _support = false;
+                }
+                else
+                {
+                    // throw e once again
+                    throw e;
+                }
+            }
+            _isTested = true;
+            return _support;
+        }
     }//namespace ocl
 
 }//namespace cv
index d1986b9..7f27451 100644 (file)
@@ -70,6 +70,10 @@ namespace cv
         //   2. for faster clamping, there is no buffer padding for the constructed texture
         cl_mem bindTexture(const oclMat &mat);
         void releaseTexture(cl_mem& texture);
+
+        // returns whether the current context supports image2d_t format or not
+        bool support_image2d(Context *clCxt = Context::getContext());
+
     }//namespace ocl
 
 }//namespace cv
diff --git a/modules/ocl/src/moments.cpp b/modules/ocl/src/moments.cpp
new file mode 100644 (file)
index 0000000..4abca03
--- /dev/null
@@ -0,0 +1,370 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Sen Liu, sen@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+#include "precomp.hpp"
+#include <iostream>
+namespace cv
+{
+namespace ocl
+{
+extern const char *moments;
+
+// The function calculates center of gravity and the central second order moments
+static void icvCompleteMomentState( CvMoments* moments )
+{
+    double cx = 0, cy = 0;
+    double mu20, mu11, mu02;
+
+    assert( moments != 0 );
+    moments->inv_sqrt_m00 = 0;
+
+    if( fabs(moments->m00) > DBL_EPSILON )
+    {
+        double inv_m00 = 1. / moments->m00;
+        cx = moments->m10 * inv_m00;
+        cy = moments->m01 * inv_m00;
+        moments->inv_sqrt_m00 = std::sqrt( fabs(inv_m00) );
+    }
+
+    // mu20 = m20 - m10*cx
+    mu20 = moments->m20 - moments->m10 * cx;
+    // mu11 = m11 - m10*cy
+    mu11 = moments->m11 - moments->m10 * cy;
+    // mu02 = m02 - m01*cy
+    mu02 = moments->m02 - moments->m01 * cy;
+
+    moments->mu20 = mu20;
+    moments->mu11 = mu11;
+    moments->mu02 = mu02;
+
+    // mu30 = m30 - cx*(3*mu20 + cx*m10)
+    moments->mu30 = moments->m30 - cx * (3 * mu20 + cx * moments->m10);
+    mu11 += mu11;
+    // mu21 = m21 - cx*(2*mu11 + cx*m01) - cy*mu20
+    moments->mu21 = moments->m21 - cx * (mu11 + cx * moments->m01) - cy * mu20;
+    // mu12 = m12 - cy*(2*mu11 + cy*m10) - cx*mu02
+    moments->mu12 = moments->m12 - cy * (mu11 + cy * moments->m10) - cx * mu02;
+    // mu03 = m03 - cy*(3*mu02 + cy*m01)
+    moments->mu03 = moments->m03 - cy * (3 * mu02 + cy * moments->m01);
+}
+
+
+static void icvContourMoments( CvSeq* contour, CvMoments* mom )
+{
+    if( contour->total )
+    {
+        CvSeqReader reader;
+        int lpt = contour->total;
+        double a00, a10, a01, a20, a11, a02, a30, a21, a12, a03;
+
+        cvStartReadSeq( contour, &reader, 0 );
+
+        size_t reader_size = lpt << 1;
+        cv::Mat reader_mat(1,reader_size,CV_32FC1);
+
+        bool is_float = CV_SEQ_ELTYPE(contour) == CV_32FC2;
+
+        if (!cv::ocl::Context::getContext()->impl->double_support && is_float)
+        {
+            CV_Error(CV_StsUnsupportedFormat, "Moments - double is not supported by your GPU!");
+        }
+
+        if( is_float )
+        {
+            for(size_t i = 0; i < reader_size; ++i)
+            {
+                reader_mat.at<float>(0, i++) = ((CvPoint2D32f*)(reader.ptr))->x;
+                reader_mat.at<float>(0, i) = ((CvPoint2D32f*)(reader.ptr))->y;
+                CV_NEXT_SEQ_ELEM( contour->elem_size, reader );
+            }
+        }
+        else
+        {
+            for(size_t i = 0; i < reader_size; ++i)
+            {
+                reader_mat.at<float>(0, i++) = ((CvPoint*)(reader.ptr))->x;
+                reader_mat.at<float>(0, i) = ((CvPoint*)(reader.ptr))->y;
+                CV_NEXT_SEQ_ELEM( contour->elem_size, reader );
+            }
+        }
+
+        cv::ocl::oclMat dst_a(10, lpt, CV_64FC1);
+        cv::ocl::oclMat reader_oclmat(reader_mat);
+        int llength = std::min(lpt,128);
+        size_t localThreads[3]  = { llength, 1, 1};
+        size_t globalThreads[3] = { lpt, 1, 1};
+        vector<pair<size_t , const void *> > args;
+        args.push_back( make_pair( sizeof(cl_int) , (void *)&contour->total ));
+        args.push_back( make_pair( sizeof(cl_mem) , (void *)&reader_oclmat.data ));
+        args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_a.data ));
+        cl_int dst_step = (cl_int)dst_a.step;
+        args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step ));
+
+        openCLExecuteKernel(dst_a.clCxt, &moments, "icvContourMoments", globalThreads, localThreads, args, -1, -1);
+
+        cv::Mat dst(dst_a);
+        a00 = a10 = a01 = a20 = a11 = a02 = a30 = a21 = a12 = a03 = 0.0;
+        if (!cv::ocl::Context::getContext()->impl->double_support)
+        {
+            for (int i = 0; i < contour->total; ++i)
+            {
+                a00 += dst.at<cl_long>(0, i);
+                a10 += dst.at<cl_long>(1, i);
+                a01 += dst.at<cl_long>(2, i);
+                a20 += dst.at<cl_long>(3, i);
+                a11 += dst.at<cl_long>(4, i);
+                a02 += dst.at<cl_long>(5, i);
+                a30 += dst.at<cl_long>(6, i);
+                a21 += dst.at<cl_long>(7, i);
+                a12 += dst.at<cl_long>(8, i);
+                a03 += dst.at<cl_long>(9, i);
+            }
+        }
+        else
+        {
+            a00 = cv::sum(dst.row(0))[0];
+            a10 = cv::sum(dst.row(1))[0];
+            a01 = cv::sum(dst.row(2))[0];
+            a20 = cv::sum(dst.row(3))[0];
+            a11 = cv::sum(dst.row(4))[0];
+            a02 = cv::sum(dst.row(5))[0];
+            a30 = cv::sum(dst.row(6))[0];
+            a21 = cv::sum(dst.row(7))[0];
+            a12 = cv::sum(dst.row(8))[0];
+            a03 = cv::sum(dst.row(9))[0];
+        }
+
+        double db1_2, db1_6, db1_12, db1_24, db1_20, db1_60;
+        if( fabs(a00) > FLT_EPSILON )
+        {
+            if( a00 > 0 )
+            {
+                db1_2 = 0.5;
+                db1_6 = 0.16666666666666666666666666666667;
+                db1_12 = 0.083333333333333333333333333333333;
+                db1_24 = 0.041666666666666666666666666666667;
+                db1_20 = 0.05;
+                db1_60 = 0.016666666666666666666666666666667;
+            }
+            else
+            {
+                db1_2 = -0.5;
+                db1_6 = -0.16666666666666666666666666666667;
+                db1_12 = -0.083333333333333333333333333333333;
+                db1_24 = -0.041666666666666666666666666666667;
+                db1_20 = -0.05;
+                db1_60 = -0.016666666666666666666666666666667;
+            }
+
+            // spatial moments
+            mom->m00 = a00 * db1_2;
+            mom->m10 = a10 * db1_6;
+            mom->m01 = a01 * db1_6;
+            mom->m20 = a20 * db1_12;
+            mom->m11 = a11 * db1_24;
+            mom->m02 = a02 * db1_12;
+            mom->m30 = a30 * db1_20;
+            mom->m21 = a21 * db1_60;
+            mom->m12 = a12 * db1_60;
+            mom->m03 = a03 * db1_20;
+
+            icvCompleteMomentState( mom );
+        }
+    }
+}
+
+static void ocl_cvMoments( const void* array, CvMoments* mom, int binary )
+{
+    const int TILE_SIZE = 256;
+    int type, depth, cn, coi = 0;
+    CvMat stub, *mat = (CvMat*)array;
+    CvContour contourHeader;
+    CvSeq* contour = 0;
+    CvSeqBlock block;
+    if( CV_IS_SEQ( array ))
+    {
+        contour = (CvSeq*)array;
+        if( !CV_IS_SEQ_POINT_SET( contour ))
+            CV_Error( CV_StsBadArg, "The passed sequence is not a valid contour" );
+    }
+
+    if( !moments )
+        CV_Error( CV_StsNullPtr, "" );
+
+    memset( mom, 0, sizeof(*mom));
+
+    if( !contour )
+    {
+
+        mat = cvGetMat( mat, &stub, &coi );
+        type = CV_MAT_TYPE( mat->type );
+
+        if( type == CV_32SC2 || type == CV_32FC2 )
+        {
+            contour = cvPointSeqFromMat(
+                          CV_SEQ_KIND_CURVE | CV_SEQ_FLAG_CLOSED,
+                          mat, &contourHeader, &block );
+        }
+    }
+    if( contour )
+    {
+        icvContourMoments( contour, mom );
+        return;
+    }
+
+    type = CV_MAT_TYPE( mat->type );
+    depth = CV_MAT_DEPTH( type );
+    cn = CV_MAT_CN( type );
+
+    cv::Size size = cvGetMatSize( mat );
+    if( cn > 1 && coi == 0 )
+        CV_Error( CV_StsBadArg, "Invalid image type" );
+
+    if( size.width <= 0 || size.height <= 0 )
+        return;
+
+    cv::Mat src0(mat);
+    cv::ocl::oclMat src(src0);
+    cv::Size tileSize;
+    int blockx,blocky;
+    if(size.width%TILE_SIZE == 0)
+        blockx = size.width/TILE_SIZE;
+    else
+        blockx = size.width/TILE_SIZE + 1;
+    if(size.height%TILE_SIZE == 0)
+        blocky = size.height/TILE_SIZE;
+    else
+        blocky = size.height/TILE_SIZE + 1;
+    cv::ocl::oclMat dst_m00(blocky, blockx, CV_64FC1);
+    cv::ocl::oclMat dst_m10(blocky, blockx, CV_64FC1);
+    cv::ocl::oclMat dst_m01(blocky, blockx, CV_64FC1);
+    cv::ocl::oclMat dst_m20(blocky, blockx, CV_64FC1);
+    cv::ocl::oclMat dst_m11(blocky, blockx, CV_64FC1);
+    cv::ocl::oclMat dst_m02(blocky, blockx, CV_64FC1);
+    cv::ocl::oclMat dst_m30(blocky, blockx, CV_64FC1);
+    cv::ocl::oclMat dst_m21(blocky, blockx, CV_64FC1);
+    cv::ocl::oclMat dst_m12(blocky, blockx, CV_64FC1);
+    cv::ocl::oclMat dst_m03(blocky, blockx, CV_64FC1);
+    cl_mem sum = openCLCreateBuffer(src.clCxt,CL_MEM_READ_WRITE,10*sizeof(double));
+    int tile_width  = std::min(size.width,TILE_SIZE);
+    int tile_height = std::min(size.height,TILE_SIZE);
+    size_t localThreads[3]  = { tile_height, 1, 1};
+    size_t globalThreads[3] = { size.height, blockx, 1};
+    vector<pair<size_t , const void *> > args,args_sum;
+    args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data ));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows ));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols ));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step ));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&tileSize.width ));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&tileSize.height ));
+    args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m00.data ));
+    args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m10.data ));
+    args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m01.data ));
+    args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m20.data ));
+    args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m11.data ));
+    args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m02.data ));
+    args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m30.data ));
+    args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m21.data ));
+    args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m12.data ));
+    args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m03.data ));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_m00.cols ));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_m00.step ));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&type ));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&depth ));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&cn ));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&coi ));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&binary ));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&TILE_SIZE ));
+    openCLExecuteKernel(dst_m00.clCxt, &moments, "CvMoments", globalThreads, localThreads, args, -1, depth);
+
+    size_t localThreadss[3]  = { 128, 1, 1};
+    size_t globalThreadss[3] = { 128, 1, 1};
+    args_sum.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows ));
+    args_sum.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols ));
+    args_sum.push_back( make_pair( sizeof(cl_int) , (void *)&tile_height ));
+    args_sum.push_back( make_pair( sizeof(cl_int) , (void *)&tile_width ));
+    args_sum.push_back( make_pair( sizeof(cl_int) , (void *)&TILE_SIZE ));
+    args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&sum ));
+    args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m00.data ));
+    args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m10.data ));
+    args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m01.data ));
+    args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m20.data ));
+    args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m11.data ));
+    args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m02.data ));
+    args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m30.data ));
+    args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m21.data ));
+    args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m12.data ));
+    args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m03.data ));
+    openCLExecuteKernel(dst_m00.clCxt, &moments, "dst_sum", globalThreadss, localThreadss, args_sum, -1, -1);
+    double* dstsum = new double[10];
+    memset(dstsum,0,10*sizeof(double));
+    openCLReadBuffer(dst_m00.clCxt,sum,(void *)dstsum,10*sizeof(double));
+    mom->m00 = dstsum[0];
+    mom->m10 = dstsum[1];
+    mom->m01 = dstsum[2];
+    mom->m20 = dstsum[3];
+    mom->m11 = dstsum[4];
+    mom->m02 = dstsum[5];
+    mom->m30 = dstsum[6];
+    mom->m21 = dstsum[7];
+    mom->m12 = dstsum[8];
+    mom->m03 = dstsum[9];
+
+    icvCompleteMomentState( mom );
+}
+
+Moments ocl_moments( InputArray _array, bool binaryImage )
+{
+    CvMoments om;
+    Mat arr = _array.getMat();
+    CvMat c_array = arr;
+    ocl_cvMoments(&c_array, &om, binaryImage);
+    return om;
+}
+
+}
+
+}
+
index f65621f..f4cdae1 100644 (file)
@@ -95,6 +95,8 @@ namespace cv
         ///////////////////////////OpenCL call wrappers////////////////////////////
         void openCLMallocPitch(Context *clCxt, void **dev_ptr, size_t *pitch,
                                size_t widthInBytes, size_t height);
+        void openCLMallocPitchEx(Context *clCxt, void **dev_ptr, size_t *pitch,
+                               size_t widthInBytes, size_t height, DevMemRW rw_type, DevMemType mem_type);
         void openCLMemcpy2D(Context *clCxt, void *dst, size_t dpitch,
                             const void *src, size_t spitch,
                             size_t width, size_t height, enum openCLMemcpyKind kind, int channels = -1);
@@ -143,6 +145,7 @@ namespace cv
             //extra options to recognize vendor specific fp64 extensions
             char extra_options[512];
             string Binpath;
+            int unified_memory; //1 means integrated GPU, otherwise this value is 0
         };
     }
 }
index d4dbfd5..9214406 100644 (file)
@@ -574,8 +574,9 @@ static void lkSparse_run(oclMat &I, oclMat &J,
     Context  *clCxt = I.clCxt;
     int elemCntPerRow = I.step / I.elemSize();
     string kernelName = "lkSparse";
-    size_t localThreads[3]  = { 8, 8, 1 };
-    size_t globalThreads[3] = { 8 * ptcount, 8, 1};
+    bool isImageSupported = support_image2d();
+    size_t localThreads[3]  = { 8, isImageSupported ? 8 : 32, 1 };
+    size_t globalThreads[3] = { 8 * ptcount, isImageSupported ? 8 : 32, 1};
     int cn = I.oclchannels();
     char calcErr;
     if (level == 0)
@@ -588,8 +589,9 @@ static void lkSparse_run(oclMat &I, oclMat &J,
     }
 
     vector<pair<size_t , const void *> > args;
-    cl_mem ITex = bindTexture(I);
-    cl_mem JTex = bindTexture(J);
+
+    cl_mem ITex = isImageSupported ? bindTexture(I) : (cl_mem)I.data;
+    cl_mem JTex = isImageSupported ? bindTexture(J) : (cl_mem)J.data;
 
     args.push_back( make_pair( sizeof(cl_mem), (void *)&ITex ));
     args.push_back( make_pair( sizeof(cl_mem), (void *)&JTex ));
@@ -602,6 +604,8 @@ static void lkSparse_run(oclMat &I, oclMat &J,
     args.push_back( make_pair( sizeof(cl_int), (void *)&level ));
     args.push_back( make_pair( sizeof(cl_int), (void *)&I.rows ));
     args.push_back( make_pair( sizeof(cl_int), (void *)&I.cols ));
+    if (!isImageSupported)     
+        args.push_back( make_pair( sizeof(cl_int), (void *)&elemCntPerRow ) );
     args.push_back( make_pair( sizeof(cl_int), (void *)&patch.x ));
     args.push_back( make_pair( sizeof(cl_int), (void *)&patch.y ));
     args.push_back( make_pair( sizeof(cl_int), (void *)&cn ));
@@ -610,19 +614,14 @@ static void lkSparse_run(oclMat &I, oclMat &J,
     args.push_back( make_pair( sizeof(cl_int), (void *)&iters ));
     args.push_back( make_pair( sizeof(cl_char), (void *)&calcErr ));
 
-    try
+    if(isImageSupported)
     {
         openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH);
-    }
-    catch(Exception&)
-    {
-        printf("Warning: The image2d_t is not supported by the device. Using alternative method!\n");
         releaseTexture(ITex);
         releaseTexture(JTex);
-        ITex = (cl_mem)I.data;
-        JTex = (cl_mem)J.data;
-        localThreads[1] = globalThreads[1] = 32;
-        args.insert( args.begin()+11, make_pair( sizeof(cl_int), (void *)&elemCntPerRow ) );
+    }
+    else
+    {
         openCLExecuteKernel2(clCxt, &pyrlk_no_image, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH);
     }
 }
@@ -724,7 +723,7 @@ static void lkDense_run(oclMat &I, oclMat &J, oclMat &u, oclMat &v,
                  oclMat &prevU, oclMat &prevV, oclMat *err, Size winSize, int iters)
 {
     Context  *clCxt = I.clCxt;
-    bool isImageSupported = clCxt->impl->devName.find("Intel(R) HD Graphics") == string::npos;
+    bool isImageSupported = support_image2d();
     int elemCntPerRow = I.step / I.elemSize();
 
     string kernelName = "lkDense";
diff --git a/modules/ocl/src/stereobm.cpp b/modules/ocl/src/stereobm.cpp
new file mode 100644 (file)
index 0000000..57e14f9
--- /dev/null
@@ -0,0 +1,263 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Jia Haipeng, jiahaipeng95@gmail.com
+//    Xiaopeng Fu, xiaopeng@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+#include <vector>
+
+using namespace cv;
+using namespace cv::ocl;
+using namespace std;
+
+
+namespace cv
+{
+namespace ocl
+{
+
+///////////////////////////OpenCL kernel strings///////////////////////////
+extern const char *stereobm;
+
+}
+}
+namespace cv
+{
+namespace ocl
+{
+namespace stereoBM
+{
+/////////////////////////////////////////////////////////////////////////
+//////////////////////////prefilter_xsbel////////////////////////////////
+////////////////////////////////////////////////////////////////////////
+static void prefilter_xsobel(const oclMat &input, oclMat &output, int prefilterCap)
+{
+    Context *clCxt = input.clCxt;
+
+    string kernelName = "prefilter_xsobel";
+    cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereobm, kernelName);
+
+    size_t blockSize = 1;
+    size_t globalThreads[3] = { input.cols, input.rows, 1 };
+    size_t localThreads[3]  = { blockSize, blockSize, 1 };
+
+    openCLVerifyKernel(clCxt, kernel,  localThreads);
+    openCLSafeCall(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&input.data));
+    openCLSafeCall(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&output.data));
+    openCLSafeCall(clSetKernelArg(kernel, 2, sizeof(cl_int), (void *)&input.rows));
+    openCLSafeCall(clSetKernelArg(kernel, 3, sizeof(cl_int), (void *)&input.cols));
+    openCLSafeCall(clSetKernelArg(kernel, 4, sizeof(cl_int), (void *)&prefilterCap));
+
+    openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL,
+                                          globalThreads, localThreads, 0, NULL, NULL));
+
+    clFinish(clCxt->impl->clCmdQueue);
+    openCLSafeCall(clReleaseKernel(kernel));
+
+}
+//////////////////////////////////////////////////////////////////////////
+//////////////////////////////common////////////////////////////////////
+////////////////////////////////////////////////////////////////////////
+#define N_DISPARITIES 8
+#define ROWSperTHREAD 21
+#define BLOCK_W 128
+static inline int divUp(int total, int grain)
+{
+    return (total + grain - 1) / grain;
+}
+////////////////////////////////////////////////////////////////////////////
+///////////////////////////////stereoBM_GPU////////////////////////////////
+////////////////////////////////////////////////////////////////////////////
+static void stereo_bm(const oclMat &left, const oclMat &right,  oclMat &disp,
+               int maxdisp, int winSize,  oclMat &minSSD_buf)
+{
+    int winsz2 = winSize >> 1;
+
+    //if(winsz2 == 0 || winsz2 >= calles_num)
+    //cv::ocl:error("Unsupported window size", __FILE__, __LINE__, __FUNCTION__);
+
+    Context *clCxt = left.clCxt;
+
+    string kernelName = "stereoKernel";
+    cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereobm, kernelName);
+
+    disp.setTo(Scalar_<unsigned char>::all(0));
+    minSSD_buf.setTo(Scalar_<unsigned int>::all(0xFFFFFFFF));
+
+    size_t minssd_step = minSSD_buf.step / minSSD_buf.elemSize();
+    size_t local_mem_size = (BLOCK_W + N_DISPARITIES * (BLOCK_W + 2 * winsz2)) *
+                            sizeof(cl_uint);
+    //size_t blockSize = 1;
+    size_t localThreads[]  = { BLOCK_W, 1,1};
+    size_t globalThreads[] = { divUp(left.cols - maxdisp - 2 * winsz2, BLOCK_W) *BLOCK_W,
+                               divUp(left.rows - 2 * winsz2, ROWSperTHREAD),
+                               1
+                             };
+
+    openCLVerifyKernel(clCxt, kernel, localThreads);
+    openCLSafeCall(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&left.data));
+    openCLSafeCall(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&right.data));
+    openCLSafeCall(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&minSSD_buf.data));
+    openCLSafeCall(clSetKernelArg(kernel, 3, sizeof(cl_int), (void *)&minssd_step));
+    openCLSafeCall(clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *)&disp.data));
+    openCLSafeCall(clSetKernelArg(kernel, 5, sizeof(cl_int), (void *)&disp.step));
+    openCLSafeCall(clSetKernelArg(kernel, 6, sizeof(cl_int), (void *)&left.cols));
+    openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_int), (void *)&left.rows));
+    openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_int), (void *)&left.step));
+    openCLSafeCall(clSetKernelArg(kernel, 9, sizeof(cl_int), (void *)&maxdisp));
+    openCLSafeCall(clSetKernelArg(kernel, 10, sizeof(cl_int), (void *)&winsz2));
+    openCLSafeCall(clSetKernelArg(kernel, 11, local_mem_size, (void *)NULL));
+
+    openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 2, NULL,
+                                          globalThreads, localThreads, 0, NULL, NULL));
+
+
+    clFinish(clCxt->impl->clCmdQueue);
+    openCLSafeCall(clReleaseKernel(kernel));
+}
+////////////////////////////////////////////////////////////////////////////
+///////////////////////////////postfilter_textureness///////////////////////
+////////////////////////////////////////////////////////////////////////////
+static void postfilter_textureness(oclMat &left, int winSize,
+                            float avergeTexThreshold, oclMat &disparity)
+{
+    Context *clCxt = left.clCxt;
+
+    string kernelName = "textureness_kernel";
+    cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereobm, kernelName);
+
+    size_t blockSize = 1;
+    size_t localThreads[]  = { BLOCK_W, blockSize ,1};
+    size_t globalThreads[] = { divUp(left.cols, BLOCK_W) *BLOCK_W,
+                               divUp(left.rows, 2 * ROWSperTHREAD),
+                               1
+                             };
+
+    size_t local_mem_size = (localThreads[0] + localThreads[0] + (winSize / 2) * 2) * sizeof(float);
+
+    openCLVerifyKernel(clCxt, kernel,  localThreads);
+    openCLSafeCall(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&disparity.data));
+    openCLSafeCall(clSetKernelArg(kernel, 1, sizeof(cl_int), (void *)&disparity.rows));
+    openCLSafeCall(clSetKernelArg(kernel, 2, sizeof(cl_int), (void *)&disparity.cols));
+    openCLSafeCall(clSetKernelArg(kernel, 3, sizeof(cl_int), (void *)&disparity.step));
+    openCLSafeCall(clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *)&left.data));
+    openCLSafeCall(clSetKernelArg(kernel, 5, sizeof(cl_int), (void *)&left.rows));
+    openCLSafeCall(clSetKernelArg(kernel, 6, sizeof(cl_int), (void *)&left.cols));
+    openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_int), (void *)&winSize));
+    openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_float), (void *)&avergeTexThreshold));
+    openCLSafeCall(clSetKernelArg(kernel, 9, local_mem_size, NULL));
+    openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 2, NULL,
+                                          globalThreads, localThreads, 0, NULL, NULL));
+
+    clFinish(clCxt->impl->clCmdQueue);
+    openCLSafeCall(clReleaseKernel(kernel));
+}
+//////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////operator/////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////
+static void operator_(oclMat &minSSD, oclMat &leBuf, oclMat &riBuf, int preset, int ndisp,
+               int winSize, float avergeTexThreshold, const oclMat &left,
+               const oclMat &right, oclMat &disparity)
+
+{
+    CV_DbgAssert(left.rows == right.rows && left.cols == right.cols);
+    CV_DbgAssert(left.type() == CV_8UC1);
+    CV_DbgAssert(right.type() == CV_8UC1);
+
+    disparity.create(left.size(), CV_8UC1);
+    minSSD.create(left.size(), CV_32SC1);
+
+    oclMat le_for_bm =  left;
+    oclMat ri_for_bm = right;
+
+    if (preset == cv::ocl::StereoBM_OCL::PREFILTER_XSOBEL)
+    {
+        leBuf.create( left.size(),  left.type());
+        riBuf.create(right.size(), right.type());
+
+        prefilter_xsobel( left, leBuf, 31);
+        prefilter_xsobel(right, riBuf, 31);
+
+        le_for_bm = leBuf;
+        ri_for_bm = riBuf;
+    }
+
+    stereo_bm(le_for_bm, ri_for_bm, disparity, ndisp, winSize, minSSD);
+
+    if (avergeTexThreshold)
+    {
+        postfilter_textureness(le_for_bm, winSize, avergeTexThreshold, disparity);
+    }
+}
+}
+}
+}
+const float defaultAvgTexThreshold = 3;
+
+cv::ocl::StereoBM_OCL::StereoBM_OCL()
+    : preset(BASIC_PRESET), ndisp(DEFAULT_NDISP), winSize(DEFAULT_WINSZ),
+      avergeTexThreshold(defaultAvgTexThreshold)  {}
+
+cv::ocl::StereoBM_OCL::StereoBM_OCL(int preset_, int ndisparities_, int winSize_)
+    : preset(preset_), ndisp(ndisparities_), winSize(winSize_),
+      avergeTexThreshold(defaultAvgTexThreshold)
+{
+    const int max_supported_ndisp = 1 << (sizeof(unsigned char) * 8);
+    CV_Assert(0 < ndisp && ndisp <= max_supported_ndisp);
+    CV_Assert(ndisp % 8 == 0);
+    CV_Assert(winSize % 2 == 1);
+}
+
+bool cv::ocl::StereoBM_OCL::checkIfGpuCallReasonable()
+{
+    return true;
+}
+
+void cv::ocl::StereoBM_OCL::operator() ( const oclMat &left, const oclMat &right,
+        oclMat &disparity)
+{
+    cv::ocl::stereoBM::operator_(minSSD, leBuf, riBuf, preset, ndisp, winSize, avergeTexThreshold, left, right, disparity);
+}
+
index 65dc86d..9d1372b 100644 (file)
@@ -1,4 +1,4 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
+/*M/////////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
@@ -44,6 +44,7 @@
 //M*/
 #include <iomanip>
 #include "precomp.hpp"
+#include "mcwutil.hpp"
 //#include "opencv2/highgui/highgui.hpp"
 
 using namespace cv;
@@ -56,6 +57,21 @@ namespace cv
     {
         ///////////////////////////OpenCL kernel strings///////////////////////////
         extern const char *nonfree_surf;
+
+        const char* noImage2dOption = "-D DISABLE_IMAGE2D";
+
+        static void openCLExecuteKernelSURF(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3],
+            size_t localThreads[3],  vector< pair<size_t, const void *> > &args, int channels, int depth)
+        {
+            if(support_image2d())
+            {
+                openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth);
+            }
+            else
+            {
+                openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, noImage2dOption);
+            }
+        }
     }
 }
 
@@ -71,7 +87,7 @@ static inline int calcSize(int octave, int layer)
 
     /* Wavelet size increment between layers. This should be an even number,
     such that the wavelet sizes in an octave are either all even or all odd.
-    This ensures that when looking for the neighbours of a sample, the layers
+    This ensures that when looking for the neighbors of a sample, the layers
 
     above and below are aligned correctly. */
     const int HAAR_SIZE_INC = 6;
@@ -79,6 +95,7 @@ static inline int calcSize(int octave, int layer)
     return (HAAR_SIZE0 + HAAR_SIZE_INC * layer) << octave;
 }
 
+
 class SURF_OCL_Invoker
 {
 public:
@@ -88,26 +105,27 @@ public:
     //void loadGlobalConstants(int maxCandidates, int maxFeatures, int img_rows, int img_cols, int nOctaveLayers, float hessianThreshold);
     //void loadOctaveConstants(int octave, int layer_rows, int layer_cols);
 
-    // kernel callers declearations
+    // kernel callers declarations
     void icvCalcLayerDetAndTrace_gpu(oclMat &det, oclMat &trace, int octave, int nOctaveLayers, int layer_rows);
 
     void icvFindMaximaInLayer_gpu(const oclMat &det, const oclMat &trace, oclMat &maxPosBuffer, oclMat &maxCounter, int counterOffset,
                                   int octave, bool use_mask, int nLayers, int layer_rows, int layer_cols);
 
-    void icvInterpolateKeypoint_gpu(const oclMat &det, const oclMat &maxPosBuffer, unsigned int maxCounter,
+    void icvInterpolateKeypoint_gpu(const oclMat &det, const oclMat &maxPosBuffer, int maxCounter,
                                     oclMat &keypoints, oclMat &counters, int octave, int layer_rows, int maxFeatures);
 
     void icvCalcOrientation_gpu(const oclMat &keypoints, int nFeatures);
 
-    void compute_descriptors_gpu(const oclMat &descriptors, const oclMat &keypoints, int nFeatures);
-    // end of kernel callers declearations
+    void icvSetUpright_gpu(const oclMat &keypoints, int nFeatures);
 
+    void compute_descriptors_gpu(const oclMat &descriptors, const oclMat &keypoints, int nFeatures);
+    // end of kernel callers declarations
 
     SURF_OCL_Invoker(SURF_OCL &surf, const oclMat &img, const oclMat &mask) :
         surf_(surf),
         img_cols(img.cols), img_rows(img.rows),
-        use_mask(!mask.empty()),
-        imgTex(NULL), sumTex(NULL), maskSumTex(NULL)
+        use_mask(!mask.empty()), counters(oclMat()),
+        imgTex(NULL), sumTex(NULL), maskSumTex(NULL), _img(img)
     {
         CV_Assert(!img.empty() && img.type() == CV_8UC1);
         CV_Assert(mask.empty() || (mask.size() == img.size() && mask.type() == CV_8UC1));
@@ -131,17 +149,18 @@ public:
         counters.create(1, surf_.nOctaves + 1, CV_32SC1);
         counters.setTo(Scalar::all(0));
 
-        //loadGlobalConstants(maxCandidates, maxFeatures, img_rows, img_cols, surf_.nOctaveLayers, static_cast<float>(surf_.hessianThreshold));
-
-        bindImgTex(img, imgTex);
-        integral(img, surf_.sum); // the two argumented integral version is incorrect
+        integral(img, surf_.sum);
+        if(support_image2d())
+        {
+            bindImgTex(img, imgTex);
+            bindImgTex(surf_.sum, sumTex);
+        }
 
-        bindImgTex(surf_.sum, sumTex);
         maskSumTex = 0;
 
         if (use_mask)
         {
-            throw std::exception();
+            CV_Error(CV_StsBadFunc, "Masked SURF detector is not implemented yet");
             //!FIXME
             // temp fix for missing min overload
             //oclMat temp(mask.size(), mask.type());
@@ -155,7 +174,7 @@ public:
     void detectKeypoints(oclMat &keypoints)
     {
         // create image pyramid buffers
-        // different layers have same sized buffers, but they are sampled from gaussin kernel.
+        // different layers have same sized buffers, but they are sampled from Gaussian kernel.
         ensureSizeIsEnough(img_rows * (surf_.nOctaveLayers + 2), img_cols, CV_32FC1, surf_.det);
         ensureSizeIsEnough(img_rows * (surf_.nOctaveLayers + 2), img_cols, CV_32FC1, surf_.trace);
 
@@ -175,8 +194,8 @@ public:
             icvFindMaximaInLayer_gpu(surf_.det, surf_.trace, surf_.maxPosBuffer, counters, 1 + octave,
                                      octave, use_mask, surf_.nOctaveLayers, layer_rows, layer_cols);
 
-            unsigned int maxCounter = Mat(counters).at<unsigned int>(1 + octave);
-            maxCounter = std::min(maxCounter, static_cast<unsigned int>(maxCandidates));
+            int maxCounter = ((Mat)counters).at<int>(1 + octave);
+            maxCounter = std::min(maxCounter, static_cast<int>(maxCandidates));
 
             if (maxCounter > 0)
             {
@@ -184,15 +203,29 @@ public:
                                            keypoints, counters, octave, layer_rows, maxFeatures);
             }
         }
-        unsigned int featureCounter = Mat(counters).at<unsigned int>(0);
-        featureCounter = std::min(featureCounter, static_cast<unsigned int>(maxFeatures));
+        int featureCounter = Mat(counters).at<int>(0);
+        featureCounter = std::min(featureCounter, static_cast<int>(maxFeatures));
 
         keypoints.cols = featureCounter;
 
         if (surf_.upright)
-            keypoints.row(SURF_OCL::ANGLE_ROW).setTo(Scalar::all(90.0));
+        {
+            //keypoints.row(SURF_OCL::ANGLE_ROW).setTo(Scalar::all(90.0));
+            setUpright(keypoints);
+        }
         else
+        {
             findOrientation(keypoints);
+        }
+    }
+
+    void setUpright(oclMat &keypoints)
+    {
+        const int nFeatures = keypoints.cols;
+        if(nFeatures > 0)
+        {
+            icvSetUpright_gpu(keypoints, keypoints.cols);
+        }
     }
 
     void findOrientation(oclMat &keypoints)
@@ -222,7 +255,6 @@ public:
             openCLFree(sumTex);
         if(maskSumTex)
             openCLFree(maskSumTex);
-        additioalParamBuffer.release();
     }
 
 private:
@@ -236,13 +268,13 @@ private:
     int maxFeatures;
 
     oclMat counters;
-
+    
     // texture buffers
     cl_mem imgTex;
     cl_mem sumTex;
     cl_mem maskSumTex;
 
-    oclMat additioalParamBuffer;
+    const oclMat _img; // make a copy for non-image2d_t supported platform
 
     SURF_OCL_Invoker &operator= (const SURF_OCL_Invoker &right)
     {
@@ -362,11 +394,6 @@ void cv::ocl::SURF_OCL::operator()(const oclMat &img, const oclMat &mask, oclMat
 {
     if (!img.empty())
     {
-        if (img.clCxt->impl->devName.find("Intel(R) HD Graphics") != string::npos)
-        {
-            cout << " Intel HD GPU device unsupported " << endl;
-            return;
-        }
         SURF_OCL_Invoker surf(*this, img, mask);
 
         surf.detectKeypoints(keypoints);
@@ -378,11 +405,6 @@ void cv::ocl::SURF_OCL::operator()(const oclMat &img, const oclMat &mask, oclMat
 {
     if (!img.empty())
     {
-        if (img.clCxt->impl->devName.find("Intel(R) HD Graphics") != string::npos)
-        {
-            cout << " Intel HD GPU device unsupported " << endl;
-            return;
-        }
         SURF_OCL_Invoker surf(*this, img, mask);
 
         if (!useProvidedKeypoints)
@@ -443,74 +465,11 @@ void cv::ocl::SURF_OCL::releaseMemory()
 // bind source buffer to image oject.
 void SURF_OCL_Invoker::bindImgTex(const oclMat &img, cl_mem &texture)
 {
-    cl_image_format format;
-    int err;
-    int depth    = img.depth();
-    int channels = img.channels();
-
-    switch(depth)
-    {
-    case CV_8U:
-        format.image_channel_data_type = CL_UNSIGNED_INT8;
-        break;
-    case CV_32S:
-        format.image_channel_data_type = CL_UNSIGNED_INT32;
-        break;
-    case CV_32F:
-        format.image_channel_data_type = CL_FLOAT;
-        break;
-    default:
-        throw std::exception();
-        break;
-    }
-    switch(channels)
-    {
-    case 1:
-        format.image_channel_order     = CL_R;
-        break;
-    case 3:
-        format.image_channel_order     = CL_RGB;
-        break;
-    case 4:
-        format.image_channel_order     = CL_RGBA;
-        break;
-    default:
-        throw std::exception();
-        break;
-    }
     if(texture)
     {
         openCLFree(texture);
     }
-
-#ifdef CL_VERSION_1_2
-    cl_image_desc desc;
-    desc.image_type       = CL_MEM_OBJECT_IMAGE2D;
-    desc.image_width      = img.step / img.elemSize();
-    desc.image_height     = img.rows;
-    desc.image_depth      = 0;
-    desc.image_array_size = 1;
-    desc.image_row_pitch  = 0;
-    desc.image_slice_pitch = 0;
-    desc.buffer           = NULL;
-    desc.num_mip_levels   = 0;
-    desc.num_samples      = 0;
-    texture = clCreateImage(Context::getContext()->impl->clContext, CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
-#else
-    texture = clCreateImage2D(
-                  Context::getContext()->impl->clContext,
-                  CL_MEM_READ_WRITE,
-                  &format,
-                  img.step / img.elemSize(),
-                  img.rows,
-                  0,
-                  NULL,
-                  &err);
-#endif
-    size_t origin[] = { 0, 0, 0 };
-    size_t region[] = { img.step / img.elemSize(), img.rows, 1 };
-    clEnqueueCopyBufferToImage(img.clCxt->impl->clCmdQueue, (cl_mem)img.data, texture, 0, origin, region, 0, NULL, 0);
-    openCLSafeCall(err);
+    texture = bindTexture(img);
 }
 
 ////////////////////////////
@@ -525,7 +484,14 @@ void SURF_OCL_Invoker::icvCalcLayerDetAndTrace_gpu(oclMat &det, oclMat &trace, i
     string kernelName = "icvCalcLayerDetAndTrace";
     vector< pair<size_t, const void *> > args;
 
-    args.push_back( make_pair( sizeof(cl_mem), (void *)&sumTex));
+    if(sumTex)
+    {
+        args.push_back( make_pair( sizeof(cl_mem), (void *)&sumTex));
+    }
+    else
+    {
+        args.push_back( make_pair( sizeof(cl_mem), (void *)&surf_.sum.data)); // if image2d is not supported
+    }
     args.push_back( make_pair( sizeof(cl_mem), (void *)&det.data));
     args.push_back( make_pair( sizeof(cl_mem), (void *)&trace.data));
     args.push_back( make_pair( sizeof(cl_int), (void *)&det.step));
@@ -535,6 +501,7 @@ void SURF_OCL_Invoker::icvCalcLayerDetAndTrace_gpu(oclMat &det, oclMat &trace, i
     args.push_back( make_pair( sizeof(cl_int), (void *)&nOctaveLayers));
     args.push_back( make_pair( sizeof(cl_int), (void *)&octave));
     args.push_back( make_pair( sizeof(cl_int), (void *)&c_layer_rows));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&surf_.sum.step));
 
     size_t localThreads[3]  = {16, 16, 1};
     size_t globalThreads[3] =
@@ -543,7 +510,7 @@ void SURF_OCL_Invoker::icvCalcLayerDetAndTrace_gpu(oclMat &det, oclMat &trace, i
         divUp(max_samples_i, localThreads[1]) *localThreads[1] *(nOctaveLayers + 2),
         1
     };
-    openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
+    openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
 }
 
 void SURF_OCL_Invoker::icvFindMaximaInLayer_gpu(const oclMat &det, const oclMat &trace, oclMat &maxPosBuffer, oclMat &maxCounter, int counterOffset,
@@ -573,19 +540,26 @@ void SURF_OCL_Invoker::icvFindMaximaInLayer_gpu(const oclMat &det, const oclMat
 
     if(use_mask)
     {
-        args.push_back( make_pair( sizeof(cl_mem), (void *)&maskSumTex));
+        if(maskSumTex)
+        {
+            args.push_back( make_pair( sizeof(cl_mem), (void *)&maskSumTex));
+        }
+        else
+        {
+            args.push_back( make_pair( sizeof(cl_mem), (void *)&surf_.maskSum.data));
+        }
+        args.push_back( make_pair( sizeof(cl_mem), (void *)&surf_.maskSum.step));
     }
-
     size_t localThreads[3]  = {16, 16, 1};
     size_t globalThreads[3] = {divUp(layer_cols - 2 * min_margin, localThreads[0] - 2) *localThreads[0],
                                divUp(layer_rows - 2 * min_margin, localThreads[1] - 2) *nLayers *localThreads[1],
                                1
                               };
 
-    openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
+    openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
 }
 
-void SURF_OCL_Invoker::icvInterpolateKeypoint_gpu(const oclMat &det, const oclMat &maxPosBuffer, unsigned int maxCounter,
+void SURF_OCL_Invoker::icvInterpolateKeypoint_gpu(const oclMat &det, const oclMat &maxPosBuffer, int maxCounter,
         oclMat &keypoints, oclMat &counters, int octave, int layer_rows, int maxFeatures)
 {
     Context *clCxt = det.clCxt;
@@ -607,7 +581,7 @@ void SURF_OCL_Invoker::icvInterpolateKeypoint_gpu(const oclMat &det, const oclMa
     size_t localThreads[3]  = {3, 3, 3};
     size_t globalThreads[3] = {maxCounter *localThreads[0], localThreads[1], 1};
 
-    openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
+    openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
 }
 
 void SURF_OCL_Invoker::icvCalcOrientation_gpu(const oclMat &keypoints, int nFeatures)
@@ -617,18 +591,44 @@ void SURF_OCL_Invoker::icvCalcOrientation_gpu(const oclMat &keypoints, int nFeat
 
     vector< pair<size_t, const void *> > args;
 
-    args.push_back( make_pair( sizeof(cl_mem), (void *)&sumTex));
+    if(sumTex)
+    {
+        args.push_back( make_pair( sizeof(cl_mem), (void *)&sumTex));
+    }
+    else
+    {
+        args.push_back( make_pair( sizeof(cl_mem), (void *)&surf_.sum.data)); // if image2d is not supported
+    }
     args.push_back( make_pair( sizeof(cl_mem), (void *)&keypoints.data));
     args.push_back( make_pair( sizeof(cl_int), (void *)&keypoints.step));
     args.push_back( make_pair( sizeof(cl_int), (void *)&img_rows));
     args.push_back( make_pair( sizeof(cl_int), (void *)&img_cols));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&surf_.sum.step));
 
     size_t localThreads[3]  = {32, 4, 1};
     size_t globalThreads[3] = {nFeatures *localThreads[0], localThreads[1], 1};
 
-    openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
+    openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
 }
 
+void SURF_OCL_Invoker::icvSetUpright_gpu(const oclMat &keypoints, int nFeatures)
+{
+    Context *clCxt = counters.clCxt;
+    string kernelName = "icvSetUpright";
+
+    vector< pair<size_t, const void *> > args;
+
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&keypoints.data));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&keypoints.step));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&nFeatures));
+
+    size_t localThreads[3]  = {256, 1, 1};
+    size_t globalThreads[3] = {saturate_cast<size_t>(nFeatures), 1, 1};
+
+    openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
+}
+
+
 void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const oclMat &keypoints, int nFeatures)
 {
     // compute unnormalized descriptors, then normalize them - odd indexing since grid must be 2D
@@ -649,12 +649,23 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const
         globalThreads[1] = 16 * localThreads[1];
 
         args.clear();
-        args.push_back( make_pair( sizeof(cl_mem), (void *)&imgTex));
+        if(imgTex)
+        {
+            args.push_back( make_pair( sizeof(cl_mem), (void *)&imgTex));
+        }
+        else
+        {
+            args.push_back( make_pair( sizeof(cl_mem), (void *)&_img.data));
+        }
         args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data));
         args.push_back( make_pair( sizeof(cl_mem), (void *)&keypoints.data));
         args.push_back( make_pair( sizeof(cl_int), (void *)&descriptors.step));
         args.push_back( make_pair( sizeof(cl_int), (void *)&keypoints.step));
-        openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
+        args.push_back( make_pair( sizeof(cl_int), (void *)&_img.rows));
+        args.push_back( make_pair( sizeof(cl_int), (void *)&_img.cols));
+        args.push_back( make_pair( sizeof(cl_int), (void *)&_img.step));
+
+        openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
 
         kernelName = "normalize_descriptors64";
 
@@ -667,7 +678,8 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const
         args.clear();
         args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data));
         args.push_back( make_pair( sizeof(cl_int), (void *)&descriptors.step));
-        openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
+
+        openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
     }
     else
     {
@@ -680,12 +692,23 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const
         globalThreads[1] = 16 * localThreads[1];
 
         args.clear();
-        args.push_back( make_pair( sizeof(cl_mem), (void *)&imgTex));
+        if(imgTex)
+        {
+            args.push_back( make_pair( sizeof(cl_mem), (void *)&imgTex));
+        }
+        else
+        {
+            args.push_back( make_pair( sizeof(cl_mem), (void *)&_img.data));
+        }
         args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data));
         args.push_back( make_pair( sizeof(cl_mem), (void *)&keypoints.data));
         args.push_back( make_pair( sizeof(cl_int), (void *)&descriptors.step));
         args.push_back( make_pair( sizeof(cl_int), (void *)&keypoints.step));
-        openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
+        args.push_back( make_pair( sizeof(cl_int), (void *)&_img.rows));
+        args.push_back( make_pair( sizeof(cl_int), (void *)&_img.cols));
+        args.push_back( make_pair( sizeof(cl_int), (void *)&_img.step));
+       
+        openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
 
         kernelName = "normalize_descriptors128";
 
@@ -698,7 +721,7 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const
         args.clear();
         args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data));
         args.push_back( make_pair( sizeof(cl_int), (void *)&descriptors.step));
-        openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
+        
+        openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
     }
 }
-
index c630871..e8c1aaa 100644 (file)
@@ -70,7 +70,7 @@
 #include "opencv2/ts/ts.hpp"
 #include "opencv2/ts/ts_perf.hpp"
 #include "opencv2/ocl/ocl.hpp"
-//#include "opencv2/nonfree/nonfree.hpp"
+#include "opencv2/nonfree/nonfree.hpp"
 
 #include "utility.hpp"
 #include "interpolation.hpp"
diff --git a/modules/ocl/test/test_calib3d.cpp b/modules/ocl/test/test_calib3d.cpp
new file mode 100644 (file)
index 0000000..58dbcc2
--- /dev/null
@@ -0,0 +1,94 @@
+///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+#include <iomanip>
+
+#ifdef HAVE_OPENCL
+
+using namespace cv;
+
+extern std::string workdir;
+PARAM_TEST_CASE(StereoMatchBM, int, int)
+{
+    int n_disp;
+    int winSize;
+
+    virtual void SetUp()
+    {
+        n_disp  = GET_PARAM(0);
+               winSize = GET_PARAM(1);
+    }
+};
+
+TEST_P(StereoMatchBM, Accuracy)
+{
+
+    Mat left_image  = readImage(workdir + "../ocl/aloe-L.png", IMREAD_GRAYSCALE);
+    Mat right_image = readImage(workdir + "../ocl/aloe-R.png", IMREAD_GRAYSCALE);
+    Mat disp_gold   = readImage(workdir + "../ocl/aloe-disp.png", IMREAD_GRAYSCALE);
+       ocl::oclMat d_left, d_right;
+       ocl::oclMat d_disp(left_image.size(), CV_8U);
+       Mat  disp;
+
+    ASSERT_FALSE(left_image.empty());
+    ASSERT_FALSE(right_image.empty());
+    ASSERT_FALSE(disp_gold.empty());
+       d_left.upload(left_image);
+       d_right.upload(right_image);
+
+    ocl::StereoBM_OCL bm(0, n_disp, winSize);
+
+
+    bm(d_left, d_right, d_disp);
+       d_disp.download(disp);
+
+    EXPECT_MAT_SIMILAR(disp_gold, disp, 1e-3);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Calib3D, StereoMatchBM, testing::Combine(testing::Values(128),
+                                          testing::Values(19)));
+
+#endif // HAVE_OPENCL
diff --git a/modules/ocl/test/test_moments.cpp b/modules/ocl/test/test_moments.cpp
new file mode 100644 (file)
index 0000000..715ad89
--- /dev/null
@@ -0,0 +1,72 @@
+#include "precomp.hpp"
+#include <iomanip>
+#include "opencv2/imgproc/imgproc_c.h"
+
+#ifdef HAVE_OPENCL
+
+using namespace cv;
+using namespace cv::ocl;
+using namespace cvtest;
+using namespace testing;
+using namespace std;
+extern string workdir;
+PARAM_TEST_CASE(MomentsTestBase, MatType, bool)
+{
+    int type;
+    cv::Mat mat1;
+    bool test_contours;
+
+    virtual void SetUp()
+    {
+        type = GET_PARAM(0);
+        test_contours = GET_PARAM(1);
+        cv::RNG &rng = TS::ptr()->get_rng();
+        cv::Size size(10*MWIDTH, 10*MHEIGHT);
+        mat1 = randomMat(rng, size, type, 5, 16, false);
+    }
+
+    void Compare(Moments& cpu, Moments& gpu)
+    {
+        Mat gpu_dst, cpu_dst;
+        HuMoments(cpu, cpu_dst);
+        HuMoments(gpu, gpu_dst);
+        EXPECT_MAT_NEAR(gpu_dst,cpu_dst, .5, "");
+    }
+
+};
+struct ocl_Moments : MomentsTestBase {};
+
+TEST_P(ocl_Moments, Mat)
+{
+    bool binaryImage = 0;
+    SetUp();
+
+    for(int j = 0; j < LOOP_TIMES; j++)
+    {
+        if(test_contours)
+        {
+            Mat src = imread( workdir + "../cpp/pic3.png", 1 );
+            Mat src_gray, canny_output;
+            cvtColor( src, src_gray, CV_BGR2GRAY );
+            vector<vector<Point> > contours;
+            vector<Vec4i> hierarchy;
+            Canny( src_gray, canny_output, 100, 200, 3 );
+            findContours( canny_output, contours, hierarchy, CV_RETR_TREE, CV_CHAIN_APPROX_SIMPLE, Point(0, 0) );
+            for( size_t i = 0; i < contours.size(); i++ )
+            {
+                Moments m = moments( contours[i], false );
+                Moments dm = ocl::ocl_moments( contours[i], false );
+                Compare(m, dm);
+            }
+        }
+        cv::_InputArray _array(mat1);
+        cv::Moments CvMom = cv::moments(_array, binaryImage);
+        cv::Moments oclMom = cv::ocl::ocl_moments(_array, binaryImage);
+
+        Compare(CvMom, oclMom);
+
+    }
+}
+INSTANTIATE_TEST_CASE_P(Moments, ocl_Moments, Combine(
+                            Values(CV_8UC1, CV_16UC1, CV_16SC1, CV_64FC1), Values(true,false)));
+#endif // HAVE_OPENCL
diff --git a/modules/ocl/test/test_surf.cpp b/modules/ocl/test/test_surf.cpp
new file mode 100644 (file)
index 0000000..c4cf60f
--- /dev/null
@@ -0,0 +1,227 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Peng Xiao, pengxiao@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+
+#include "precomp.hpp"
+#ifdef HAVE_OPENCL
+
+extern std::string workdir;
+
+using namespace std;
+
+static bool keyPointsEquals(const cv::KeyPoint& p1, const cv::KeyPoint& p2)
+{
+    const double maxPtDif = 1.0;
+    const double maxSizeDif = 1.0;
+    const double maxAngleDif = 2.0;
+    const double maxResponseDif = 0.1;
+
+    double dist = cv::norm(p1.pt - p2.pt);
+
+    if (dist < maxPtDif &&
+        fabs(p1.size - p2.size) < maxSizeDif &&
+        abs(p1.angle - p2.angle) < maxAngleDif &&
+        abs(p1.response - p2.response) < maxResponseDif &&
+        p1.octave == p2.octave &&
+        p1.class_id == p2.class_id)
+    {
+        return true;
+    }
+
+    return false;
+}
+
+
+struct KeyPointLess : std::binary_function<cv::KeyPoint, cv::KeyPoint, bool>
+{
+    bool operator()(const cv::KeyPoint& kp1, const cv::KeyPoint& kp2) const
+    {
+        return kp1.pt.y < kp2.pt.y || (kp1.pt.y == kp2.pt.y && kp1.pt.x < kp2.pt.x);
+    }
+};
+
+
+#define ASSERT_KEYPOINTS_EQ(gold, actual) EXPECT_PRED_FORMAT2(assertKeyPointsEquals, gold, actual);
+
+static int getMatchedPointsCount(std::vector<cv::KeyPoint>& gold, std::vector<cv::KeyPoint>& actual)
+{
+    std::sort(actual.begin(), actual.end(), KeyPointLess());
+    std::sort(gold.begin(), gold.end(), KeyPointLess());
+
+    int validCount = 0;
+
+    for (size_t i = 0; i < gold.size(); ++i)
+    {
+        const cv::KeyPoint& p1 = gold[i];
+        const cv::KeyPoint& p2 = actual[i];
+
+        if (keyPointsEquals(p1, p2))
+            ++validCount;
+    }
+
+    return validCount;
+}
+
+static int getMatchedPointsCount(const std::vector<cv::KeyPoint>& keypoints1, const std::vector<cv::KeyPoint>& keypoints2, const std::vector<cv::DMatch>& matches)
+{
+    int validCount = 0;
+
+    for (size_t i = 0; i < matches.size(); ++i)
+    {
+        const cv::DMatch& m = matches[i];
+
+        const cv::KeyPoint& p1 = keypoints1[m.queryIdx];
+        const cv::KeyPoint& p2 = keypoints2[m.trainIdx];
+
+        if (keyPointsEquals(p1, p2))
+            ++validCount;
+    }
+
+    return validCount;
+}
+
+IMPLEMENT_PARAM_CLASS(SURF_HessianThreshold, double)
+IMPLEMENT_PARAM_CLASS(SURF_Octaves, int)
+IMPLEMENT_PARAM_CLASS(SURF_OctaveLayers, int)
+IMPLEMENT_PARAM_CLASS(SURF_Extended, bool)
+IMPLEMENT_PARAM_CLASS(SURF_Upright, bool)
+
+PARAM_TEST_CASE(SURF, SURF_HessianThreshold, SURF_Octaves, SURF_OctaveLayers, SURF_Extended, SURF_Upright)
+{
+    double hessianThreshold;
+    int nOctaves;
+    int nOctaveLayers;
+    bool extended;
+    bool upright;
+
+    virtual void SetUp()
+    {
+        hessianThreshold = GET_PARAM(0);
+        nOctaves = GET_PARAM(1);
+        nOctaveLayers = GET_PARAM(2);
+        extended = GET_PARAM(3);
+        upright = GET_PARAM(4);
+    }
+};
+TEST_P(SURF, Detector)
+{
+    cv::Mat image = readImage(workdir + "fruits.jpg", cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(image.empty());
+
+    cv::ocl::SURF_OCL surf;
+    surf.hessianThreshold = static_cast<float>(hessianThreshold);
+    surf.nOctaves = nOctaves;
+    surf.nOctaveLayers = nOctaveLayers;
+    surf.extended = extended;
+    surf.upright = upright;
+    surf.keypointsRatio = 0.05f;
+
+    std::vector<cv::KeyPoint> keypoints;
+    surf(cv::ocl::oclMat(image), cv::ocl::oclMat(), keypoints);
+
+    cv::SURF surf_gold;
+    surf_gold.hessianThreshold = hessianThreshold;
+    surf_gold.nOctaves = nOctaves;
+    surf_gold.nOctaveLayers = nOctaveLayers;
+    surf_gold.extended = extended;
+    surf_gold.upright = upright;
+
+    std::vector<cv::KeyPoint> keypoints_gold;
+    surf_gold(image, cv::noArray(), keypoints_gold);
+
+    ASSERT_EQ(keypoints_gold.size(), keypoints.size());
+    int matchedCount = getMatchedPointsCount(keypoints_gold, keypoints);
+    double matchedRatio = static_cast<double>(matchedCount) / keypoints_gold.size();
+
+    EXPECT_GT(matchedRatio, 0.95);
+}
+
+TEST_P(SURF, Descriptor)
+{
+    cv::Mat image = readImage(workdir + "fruits.jpg", cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(image.empty());
+
+    cv::ocl::SURF_OCL surf;
+    surf.hessianThreshold = static_cast<float>(hessianThreshold);
+    surf.nOctaves = nOctaves;
+    surf.nOctaveLayers = nOctaveLayers;
+    surf.extended = extended;
+    surf.upright = upright;
+    surf.keypointsRatio = 0.05f;
+
+    cv::SURF surf_gold;
+    surf_gold.hessianThreshold = hessianThreshold;
+    surf_gold.nOctaves = nOctaves;
+    surf_gold.nOctaveLayers = nOctaveLayers;
+    surf_gold.extended = extended;
+    surf_gold.upright = upright;
+
+    std::vector<cv::KeyPoint> keypoints;
+    surf_gold(image, cv::noArray(), keypoints);
+
+    cv::ocl::oclMat descriptors;
+    surf(cv::ocl::oclMat(image), cv::ocl::oclMat(), keypoints, descriptors, true);
+
+    cv::Mat descriptors_gold;
+    surf_gold(image, cv::noArray(), keypoints, descriptors_gold, true);
+
+    cv::BFMatcher matcher(cv::NORM_L2);
+    std::vector<cv::DMatch> matches;
+    matcher.match(descriptors_gold, cv::Mat(descriptors), matches);
+
+    int matchedCount = getMatchedPointsCount(keypoints, keypoints, matches);
+    double matchedRatio = static_cast<double>(matchedCount) / keypoints.size();
+
+    EXPECT_GT(matchedRatio, 0.35);
+}
+
+INSTANTIATE_TEST_CASE_P(OCL_Features2D, SURF, testing::Combine(
+    testing::Values(/*SURF_HessianThreshold(100.0), */SURF_HessianThreshold(500.0), SURF_HessianThreshold(1000.0)),
+    testing::Values(SURF_Octaves(3), SURF_Octaves(4)),
+    testing::Values(SURF_OctaveLayers(2), SURF_OctaveLayers(3)),
+    testing::Values(SURF_Extended(false), SURF_Extended(true)),
+    testing::Values(SURF_Upright(false), SURF_Upright(true))));
+
+#endif
index 02e1b46..a650f23 100644 (file)
@@ -510,7 +510,7 @@ static bool pyopencv_to(PyObject* obj, double& value, const char* name = "<unkno
     (void)name;
     if(!obj || obj == Py_None)
         return true;
-    if(PyInt_CheckExact(obj))
+    if(!!PyInt_CheckExact(obj))
         value = (double)PyInt_AS_LONG(obj);
     else
         value = PyFloat_AsDouble(obj);
@@ -527,7 +527,7 @@ static bool pyopencv_to(PyObject* obj, float& value, const char* name = "<unknow
     (void)name;
     if(!obj || obj == Py_None)
         return true;
-    if(PyInt_CheckExact(obj))
+    if(!!PyInt_CheckExact(obj))
         value = (float)PyInt_AS_LONG(obj);
     else
         value = (float)PyFloat_AsDouble(obj);
@@ -623,7 +623,7 @@ static inline bool pyopencv_to(PyObject* obj, Point& p, const char* name = "<unk
     (void)name;
     if(!obj || obj == Py_None)
         return true;
-    if(PyComplex_CheckExact(obj))
+    if(!!PyComplex_CheckExact(obj))
     {
         Py_complex c = PyComplex_AsCComplex(obj);
         p.x = saturate_cast<int>(c.real);
@@ -638,7 +638,7 @@ static inline bool pyopencv_to(PyObject* obj, Point2f& p, const char* name = "<u
     (void)name;
     if(!obj || obj == Py_None)
         return true;
-    if(PyComplex_CheckExact(obj))
+    if(!!PyComplex_CheckExact(obj))
     {
         Py_complex c = PyComplex_AsCComplex(obj);
         p.x = saturate_cast<float>(c.real);
@@ -989,7 +989,7 @@ static bool pyopencv_to(PyObject *o, cv::flann::IndexParams& p, const char *name
                 const char* value = PyString_AsString(item);
                 p.setString(k, value);
             }
-            else if( PyBool_Check(item) )
+            else if( !!PyBool_Check(item) )
                 p.setBool(k, item == Py_True);
             else if( PyInt_Check(item) )
             {
index 09d3c47..06c37cb 100644 (file)
@@ -1158,7 +1158,7 @@ static PyObject* cvseq_map_getitem(PyObject *o, PyObject *item)
     if (i < 0)
       i += (int)cvseq_seq_length(o);
     return cvseq_seq_getitem(o, i);
-  } else if (PySlice_Check(item)) {
+  } else if (!!PySlice_Check(item)) {
     Py_ssize_t start, stop, step, slicelength, cur, i;
     PyObject* result;
 
@@ -1975,7 +1975,7 @@ struct dims
 
 static int convert_to_dim(PyObject *item, int i, dims *dst, CvArr *cva, const char *name = "no_name")
 {
-  if (PySlice_Check(item)) {
+  if (!!PySlice_Check(item)) {
     Py_ssize_t start, stop, step, slicelength;
     PySlice_GetIndicesEx((PySliceObject*)item, cvGetDimSize(cva, i), &start, &stop, &step, &slicelength);
     dst->i[i] = (int)start;
index 76c1984..2f94c10 100755 (executable)
@@ -37,10 +37,12 @@ if __name__ == "__main__":
     parser.add_option("", "--module", dest="module", default=None, metavar="NAME", help="module prefix for test names")
     parser.add_option("", "--columns", dest="columns", default=None, metavar="NAMES", help="comma-separated list of column aliases")
     parser.add_option("", "--no-relatives", action="store_false", dest="calc_relatives", default=True, help="do not output relative values")
-    parser.add_option("", "--with-cycles-reduction", action="store_true", dest="calc_cr", default=False, help="alos output cycle reduction percentages")
+    parser.add_option("", "--with-cycles-reduction", action="store_true", dest="calc_cr", default=False, help="output cycle reduction percentages")
+    parser.add_option("", "--with-score", action="store_true", dest="calc_score", default=False, help="output automatic classification of speedups")
     parser.add_option("", "--show-all", action="store_true", dest="showall", default=False, help="also include empty and \"notrun\" lines")
     parser.add_option("", "--match", dest="match", default=None)
     parser.add_option("", "--match-replace", dest="match_replace", default="")
+    parser.add_option("", "--regressions-only", dest="regressionsOnly", default=None, metavar="X-FACTOR", help="show only tests with performance regressions not")
     (options, args) = parser.parse_args()
 
     options.generateHtml = detectHtmlOutputType(options.format)
@@ -106,6 +108,7 @@ if __name__ == "__main__":
 
     # build table
     getter = metrix_table[options.metric][1]
+    getter_score = metrix_table["score"][1]
     if options.calc_relatives:
         getter_p = metrix_table[options.metric + "%"][1]
     if options.calc_cr:
@@ -129,6 +132,11 @@ if __name__ == "__main__":
         for set in metric_sets:
             tbl.newColumn(str(i) + "%", getSetName(set, i, options.columns) + "\nvs\n" + getSetName(test_sets[0], 0, options.columns) + "\n(x-factor)", align = "center", cssclass = "col_rel")
             i += 1
+    if options.calc_score:
+        i = 1
+        for set in metric_sets:
+            tbl.newColumn(str(i) + "S", getSetName(set, i, options.columns) + "\nvs\n" + getSetName(test_sets[0], 0, options.columns) + "\n(score)", align = "center", cssclass = "col_name")
+            i += 1
 
     # rows
     prevGroupName = None
@@ -157,6 +165,8 @@ if __name__ == "__main__":
                     tbl.newCell(str(i) + "%", "-")
                 if options.calc_cr and i > 0:
                     tbl.newCell(str(i) + "$", "-")
+                if options.calc_score and i > 0:
+                    tbl.newCell(str(i) + "$", "-")
             else:
                 status = case.get("status")
                 if status != "run":
@@ -167,6 +177,8 @@ if __name__ == "__main__":
                         tbl.newCell(str(i) + "%", "-", color = "red")
                     if options.calc_cr and i > 0:
                         tbl.newCell(str(i) + "$", "-", color = "red")
+                    if options.calc_score and i > 0:
+                        tbl.newCell(str(i) + "S", "-", color = "red")
                 else:
                     val = getter(case, cases[0], options.units)
                     if options.calc_relatives and i > 0 and val:
@@ -177,6 +189,10 @@ if __name__ == "__main__":
                         valcr = getter_cr(case, cases[0], options.units)
                     else:
                         valcr = None
+                    if options.calc_score and i > 0 and val:
+                        val_score = getter_score(case, cases[0], options.units)
+                    else:
+                        val_score = None
                     if not valp or i == 0:
                         color = None
                     elif valp > 1.05:
@@ -192,9 +208,23 @@ if __name__ == "__main__":
                         tbl.newCell(str(i) + "%", formatValue(valp, "%"), valp, color = color, bold = color)
                     if options.calc_cr and i > 0:
                         tbl.newCell(str(i) + "$", formatValue(valcr, "$"), valcr, color = color, bold = color)
+                    if options.calc_score and i > 0:
+                        tbl.newCell(str(i) + "S", formatValue(val_score, "S"), val_score, color = color, bold = color)
     if not needNewRow:
         tbl.trimLastRow()
 
+    if options.regressionsOnly:
+        for r in reversed(range(len(tbl.rows))):
+            delete = True
+            i = 1
+            for set in metric_sets:
+                val = tbl.rows[r].cells[len(tbl.rows[r].cells)-i].value
+                if val is not None and val < float(options.regressionsOnly):
+                    delete = False
+                i += 1
+            if (delete):
+                tbl.rows.pop(r)
+
     # output table
     if options.generateHtml:
         if options.format == "moinwiki":
@@ -205,3 +235,6 @@ if __name__ == "__main__":
             htmlPrintFooter(sys.stdout)
     else:
         tbl.consolePrintTable(sys.stdout)
+
+    if options.regressionsOnly:
+        sys.exit(len(tbl.rows))
index 598c9e9..9baff0f 100755 (executable)
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 
-import sys, re, os.path, cgi, stat
+import sys, re, os.path, cgi, stat, math
 from optparse import OptionParser
 from color import getColorizer
 
@@ -627,6 +627,21 @@ def getCycleReduction(test, test0, metric):
         return None
     return (1.0-float(val)/val0)*100
 
+def getScore(test, test0, metric):
+    if not test or not test0:
+        return None
+    m0 = float(test.get("gmean", None))
+    m1 = float(test0.get("gmean", None))
+    if m0 == 0 or m1 == 0:
+        return None
+    s0 = float(test.get("gstddev", None))
+    s1 = float(test0.get("gstddev", None))
+    s = math.sqrt(s0*s0 + s1*s1)
+    m0 = math.log(m0)
+    m1 = math.log(m1)
+    if s == 0:
+        return None
+    return (m0-m1)/s
 
 metrix_table = \
 {
@@ -655,6 +670,8 @@ metrix_table = \
     "median$": ("Median (cycle reduction)", lambda test,test0,units: getCycleReduction(test, test0, "median")),
     "stddev$": ("Standard deviation (cycle reduction)", lambda test,test0,units: getCycleReduction(test, test0, "stddev")),
     "gstddev$": ("Standard deviation of Ln(time) (cycle reduction)", lambda test,test0,units: getCycleReduction(test, test0, "gstddev")),
+
+    "score": ("SCORE", lambda test,test0,units: getScore(test, test0, "gstddev")),
 }
 
 def formatValue(val, metric, units = None):
@@ -664,6 +681,18 @@ def formatValue(val, metric, units = None):
         return "%.2f" % val
     if metric.endswith("$"):
         return "%.2f%%" % val
+    if metric.endswith("S"):
+        if val > 3.5:
+            return "SLOWER"
+        if val < -3.5:
+            return "FASTER"
+        if val > -1.5 and val < 1.5:
+            return " "
+        if val < 0:
+            return "faster"
+        if val > 0:
+            return "slower"
+        #return "%.4f" % val
     return "%.3f %s" % (val, units)
 
 if __name__ == "__main__":
index f055697..88ade84 100644 (file)
@@ -68,7 +68,6 @@ public class ImageManipulationsActivity extends Activity implements CvCameraView
     private float                mBuff[];
     private Mat                  mRgbaInnerWindow;
     private Mat                  mGrayInnerWindow;
-    private Mat                  mBlurWindow;
     private Mat                  mZoomWindow;
     private Mat                  mZoomCorner;
     private Mat                  mSepiaKernel;
@@ -220,9 +219,6 @@ public class ImageManipulationsActivity extends Activity implements CvCameraView
         if (mGrayInnerWindow == null && !mGray.empty())
             mGrayInnerWindow = mGray.submat(top, top + height, left, left + width);
 
-        if (mBlurWindow == null)
-            mBlurWindow = mRgba.submat(0, rows, cols / 3, cols * 2 / 3);
-
         if (mZoomCorner == null)
             mZoomCorner = mRgba.submat(0, rows / 2 - rows / 10, 0, cols / 2 - cols / 10);
 
@@ -236,8 +232,6 @@ public class ImageManipulationsActivity extends Activity implements CvCameraView
             mZoomWindow.release();
         if (mZoomCorner != null)
             mZoomCorner.release();
-        if (mBlurWindow != null)
-            mBlurWindow.release();
         if (mGrayInnerWindow != null)
             mGrayInnerWindow.release();
         if (mRgbaInnerWindow != null)
@@ -254,7 +248,6 @@ public class ImageManipulationsActivity extends Activity implements CvCameraView
         mIntermediateMat = null;
         mRgbaInnerWindow = null;
         mGrayInnerWindow = null;
-        mBlurWindow = null;
         mZoomCorner = null;
         mZoomWindow = null;
     }
@@ -327,7 +320,9 @@ public class ImageManipulationsActivity extends Activity implements CvCameraView
             break;
 
         case ImageManipulationsActivity.VIEW_MODE_SEPIA:
-            Core.transform(mRgba, mRgba, mSepiaKernel);
+            if ((mRgbaInnerWindow == null) || (mRgba.cols() != mSizeRgba.width) || (mRgba.height() != mSizeRgba.height))
+                CreateAuxiliaryMats();
+            Core.transform(mRgbaInnerWindow, mRgbaInnerWindow, mSepiaKernel);
             break;
 
         case ImageManipulationsActivity.VIEW_MODE_ZOOM:
index 79fef4e..a574905 100644 (file)
@@ -4,8 +4,8 @@
     android:layout_height="match_parent" >
 
     <org.opencv.samples.tutorial3.Tutorial3View
-        android:layout_width="fill_parent"
-        android:layout_height="fill_parent"
+        android:layout_width="match_parent"
+        android:layout_height="match_parent"
         android:visibility="gone"
         android:id="@+id/tutorial3_activity_java_surface_view" />
 
diff --git a/samples/c/smiledetect.cpp b/samples/c/smiledetect.cpp
new file mode 100644 (file)
index 0000000..c54c724
--- /dev/null
@@ -0,0 +1,247 @@
+#include "opencv2/objdetect/objdetect.hpp"
+#include "opencv2/highgui/highgui.hpp"
+#include "opencv2/imgproc/imgproc.hpp"
+
+#include <cctype>
+#include <iostream>
+#include <iterator>
+#include <stdio.h>
+
+using namespace std;
+using namespace cv;
+
+static void help()
+{
+    cout << "\nThis program demonstrates the smile detector.\n"
+            "Usage:\n"
+            "./smiledetect [--cascade=<cascade_path> this is the frontal face classifier]\n"
+            "   [--smile-cascade=[<smile_cascade_path>]]\n"
+            "   [--scale=<image scale greater or equal to 1, try 2.0 for example. The larger the faster the processing>]\n"
+            "   [--try-flip]\n"
+            "   [video_filename|camera_index]\n\n"
+            "Example:\n"
+            "./smiledetect --cascade=\"../../data/haarcascades/haarcascade_frontalface_alt.xml\" --smile-cascade=\"../../data/haarcascades/haarcascade_smile.xml\" --scale=2.0\n\n"
+            "During execution:\n\tHit any key to quit.\n"
+            "\tUsing OpenCV version " << CV_VERSION << "\n" << endl;
+}
+
+void detectAndDraw( Mat& img, CascadeClassifier& cascade,
+                    CascadeClassifier& nestedCascade,
+                    double scale, bool tryflip );
+
+string cascadeName = "../../data/haarcascades/haarcascade_frontalface_alt.xml";
+string nestedCascadeName = "../../data/haarcascades/haarcascade_smile.xml";
+
+
+int main( int argc, const char** argv )
+{
+    CvCapture* capture = 0;
+    Mat frame, frameCopy, image;
+    const string scaleOpt = "--scale=";
+    size_t scaleOptLen = scaleOpt.length();
+    const string cascadeOpt = "--cascade=";
+    size_t cascadeOptLen = cascadeOpt.length();
+    const string nestedCascadeOpt = "--smile-cascade";
+    size_t nestedCascadeOptLen = nestedCascadeOpt.length();
+    const string tryFlipOpt = "--try-flip";
+    size_t tryFlipOptLen = tryFlipOpt.length();
+    string inputName;
+    bool tryflip = false;
+
+    help();
+
+    CascadeClassifier cascade, nestedCascade;
+    double scale = 1;
+
+    for( int i = 1; i < argc; i++ )
+    {
+        cout << "Processing " << i << " " <<  argv[i] << endl;
+        if( cascadeOpt.compare( 0, cascadeOptLen, argv[i], cascadeOptLen ) == 0 )
+        {
+            cascadeName.assign( argv[i] + cascadeOptLen );
+            cout << "  from which we have cascadeName= " << cascadeName << endl;
+        }
+        else if( nestedCascadeOpt.compare( 0, nestedCascadeOptLen, argv[i], nestedCascadeOptLen ) == 0 )
+        {
+            if( argv[i][nestedCascadeOpt.length()] == '=' )
+                nestedCascadeName.assign( argv[i] + nestedCascadeOpt.length() + 1 );
+        }
+        else if( scaleOpt.compare( 0, scaleOptLen, argv[i], scaleOptLen ) == 0 )
+        {
+            if( !sscanf( argv[i] + scaleOpt.length(), "%lf", &scale ) || scale < 1 )
+                scale = 1;
+            cout << " from which we read scale = " << scale << endl;
+        }
+        else if( tryFlipOpt.compare( 0, tryFlipOptLen, argv[i], tryFlipOptLen ) == 0 )
+        {
+            tryflip = true;
+            cout << " will try to flip image horizontally to detect assymetric objects\n";
+        }
+        else if( argv[i][0] == '-' )
+        {
+            cerr << "WARNING: Unknown option " << argv[i] << endl;
+        }
+        else
+            inputName.assign( argv[i] );
+    }
+
+    if( !cascade.load( cascadeName ) )
+    {
+        cerr << "ERROR: Could not load face cascade" << endl;
+        help();
+        return -1;
+    }
+    if( !nestedCascade.load( nestedCascadeName ) )
+    {
+        cerr << "ERROR: Could not load smile cascade" << endl;
+        help();
+        return -1;
+    }
+
+    if( inputName.empty() || (isdigit(inputName.c_str()[0]) && inputName.c_str()[1] == '\0') )
+    {
+        capture = cvCaptureFromCAM( inputName.empty() ? 0 : inputName.c_str()[0] - '0' );
+        int c = inputName.empty() ? 0 : inputName.c_str()[0] - '0' ;
+        if(!capture) cout << "Capture from CAM " <<  c << " didn't work" << endl;
+    }
+    else if( inputName.size() )
+    {
+        capture = cvCaptureFromAVI( inputName.c_str() );
+        if(!capture) cout << "Capture from AVI didn't work" << endl;
+    }
+
+    cvNamedWindow( "result", 1 );
+
+    if( capture )
+    {
+        cout << "In capture ..." << endl;
+        cout << endl << "NOTE: Smile intensity will only be valid after a first smile has been detected" << endl;
+
+        for(;;)
+        {
+            IplImage* iplImg = cvQueryFrame( capture );
+            frame = iplImg;
+            if( frame.empty() )
+                break;
+            if( iplImg->origin == IPL_ORIGIN_TL )
+                frame.copyTo( frameCopy );
+            else
+                flip( frame, frameCopy, 0 );
+
+            detectAndDraw( frameCopy, cascade, nestedCascade, scale, tryflip );
+
+            if( waitKey( 10 ) >= 0 )
+                goto _cleanup_;
+        }
+
+        waitKey(0);
+
+_cleanup_:
+        cvReleaseCapture( &capture );
+    }
+    else
+    {
+        cerr << "ERROR: Could not initiate capture" << endl;
+        help();
+        return -1;
+    }
+
+    cvDestroyWindow("result");
+    return 0;
+}
+
+void detectAndDraw( Mat& img, CascadeClassifier& cascade,
+                    CascadeClassifier& nestedCascade,
+                    double scale, bool tryflip)
+{
+    int i = 0;
+    vector<Rect> faces, faces2;
+    const static Scalar colors[] =  { CV_RGB(0,0,255),
+        CV_RGB(0,128,255),
+        CV_RGB(0,255,255),
+        CV_RGB(0,255,0),
+        CV_RGB(255,128,0),
+        CV_RGB(255,255,0),
+        CV_RGB(255,0,0),
+        CV_RGB(255,0,255)} ;
+    Mat gray, smallImg( cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 );
+
+    cvtColor( img, gray, CV_BGR2GRAY );
+    resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
+    equalizeHist( smallImg, smallImg );
+
+    cascade.detectMultiScale( smallImg, faces,
+        1.1, 2, 0
+        //|CV_HAAR_FIND_BIGGEST_OBJECT
+        //|CV_HAAR_DO_ROUGH_SEARCH
+        |CV_HAAR_SCALE_IMAGE
+        ,
+        Size(30, 30) );
+    if( tryflip )
+    {
+        flip(smallImg, smallImg, 1);
+        cascade.detectMultiScale( smallImg, faces2,
+                                 1.1, 2, 0
+                                 //|CV_HAAR_FIND_BIGGEST_OBJECT
+                                 //|CV_HAAR_DO_ROUGH_SEARCH
+                                 |CV_HAAR_SCALE_IMAGE
+                                 ,
+                                 Size(30, 30) );
+        for( vector<Rect>::const_iterator r = faces2.begin(); r != faces2.end(); r++ )
+        {
+            faces.push_back(Rect(smallImg.cols - r->x - r->width, r->y, r->width, r->height));
+        }
+    }
+
+    for( vector<Rect>::iterator r = faces.begin(); r != faces.end(); r++, i++ )
+    {
+        Mat smallImgROI;
+        vector<Rect> nestedObjects;
+        Point center;
+        Scalar color = colors[i%8];
+        int radius;
+
+        double aspect_ratio = (double)r->width/r->height;
+        if( 0.75 < aspect_ratio && aspect_ratio < 1.3 )
+        {
+            center.x = cvRound((r->x + r->width*0.5)*scale);
+            center.y = cvRound((r->y + r->height*0.5)*scale);
+            radius = cvRound((r->width + r->height)*0.25*scale);
+            circle( img, center, radius, color, 3, 8, 0 );
+        }
+        else
+            rectangle( img, cvPoint(cvRound(r->x*scale), cvRound(r->y*scale)),
+                       cvPoint(cvRound((r->x + r->width-1)*scale), cvRound((r->y + r->height-1)*scale)),
+                       color, 3, 8, 0);
+
+        const int half_height=cvRound((float)r->height/2);
+        r->y=r->y + half_height;
+        r->height = half_height;
+        smallImgROI = smallImg(*r);
+        nestedCascade.detectMultiScale( smallImgROI, nestedObjects,
+            1.1, 0, 0
+            //|CV_HAAR_FIND_BIGGEST_OBJECT
+            //|CV_HAAR_DO_ROUGH_SEARCH
+            //|CV_HAAR_DO_CANNY_PRUNING
+            |CV_HAAR_SCALE_IMAGE
+            ,
+            Size(30, 30) );
+
+        // The number of detected neighbors depends on image size (and also illumination, etc.). The
+        // following steps use a floating minimum and maximum of neighbors. Intensity thus estimated will be
+        //accurate only after a first smile has been displayed by the user.
+        const int smile_neighbors = (int)nestedObjects.size();
+        static int max_neighbors=-1;
+        static int min_neighbors=-1;
+        if (min_neighbors == -1) min_neighbors = smile_neighbors;
+        max_neighbors = MAX(max_neighbors, smile_neighbors);
+
+        // Draw rectangle on the left side of the image reflecting smile intensity
+        float intensityZeroOne = ((float)smile_neighbors - min_neighbors) / (max_neighbors - min_neighbors + 1);
+        int rect_height = cvRound((float)img.rows * intensityZeroOne);
+        CvScalar col = CV_RGB((float)255 * intensityZeroOne, 0, 0);
+        rectangle(img, cvPoint(0, img.rows), cvPoint(img.cols/10, img.rows - rect_height), col, -1);
+    }
+
+    cv::imshow( "result", img );
+}
index 924af1f..d817aa6 100644 (file)
@@ -20,7 +20,7 @@
 
     <target name="compile">
         <mkdir dir="${classes.dir}"/>
-        <javac srcdir="${src.dir}" destdir="${classes.dir}" classpathref="classpath"/>
+        <javac includeantruntime="false" srcdir="${src.dir}" destdir="${classes.dir}" classpathref="classpath"/>
     </target>
 
     <target name="jar" depends="compile">
index 990536f..a0375c5 100644 (file)
@@ -1,12 +1,14 @@
+import org.opencv.core.Core;
 import org.opencv.core.Mat;
 import org.opencv.core.CvType;
 import org.opencv.core.Scalar;
 
 class SimpleSample {
 
-  static{ System.loadLibrary("opencv_java244"); }
+  static{ System.loadLibrary(Core.NATIVE_LIBRARY_NAME); }
 
   public static void main(String[] args) {
+    System.out.println("Welcome to OpenCV " + Core.VERSION);
     Mat m = new Mat(5, 10, CvType.CV_8UC1, new Scalar(0));
     System.out.println("OpenCV Mat: " + m);
     Mat mr1 = m.row(1);
index 0e9bb58..44f2bb0 100644 (file)
@@ -1,10 +1,12 @@
+import org.opencv.core.Core;
 import org.opencv.core.CvType;
 import org.opencv.core.Mat;
 
 public class Main {
 
     public static void main(String[] args) {
-        System.loadLibrary("opencv_java244");
+        System.out.println("Welcome to OpenCV " + Core.VERSION);
+        System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
         Mat m  = Mat.eye(3, 3, CvType.CV_8UC1);
         System.out.println("m = " + m.dump());
     }
index 4a68d14..6f07aa1 100644 (file)
@@ -8,11 +8,14 @@
  * You're invited to submit your own examples, in any JVM language of
  * your choosing so long as you can get them to build.
  */
+
+import org.opencv.core.Core
+
 object Main extends App {
   // We must load the native library before using any OpenCV functions.
   // You must load this library _exactly once_ per Java invocation.
   // If you load it more than once, you will get a java.lang.UnsatisfiedLinkError.
-  System.loadLibrary("opencv_java")
+  System.loadLibrary(Core.NATIVE_LIBRARY_NAME)
 
   ScalaCorrespondenceMatchingDemo.run()
   ScalaDetectFaceDemo.run()
diff --git a/samples/ocl/aloe-L.png b/samples/ocl/aloe-L.png
new file mode 100644 (file)
index 0000000..4758766
Binary files /dev/null and b/samples/ocl/aloe-L.png differ
diff --git a/samples/ocl/aloe-R.png b/samples/ocl/aloe-R.png
new file mode 100644 (file)
index 0000000..5d11c57
Binary files /dev/null and b/samples/ocl/aloe-R.png differ
diff --git a/samples/ocl/aloe-disp.png b/samples/ocl/aloe-disp.png
new file mode 100644 (file)
index 0000000..dd4a499
Binary files /dev/null and b/samples/ocl/aloe-disp.png differ
diff --git a/samples/ocl/performance.cpp b/samples/ocl/performance.cpp
new file mode 100644 (file)
index 0000000..b2a6d85
--- /dev/null
@@ -0,0 +1,4396 @@
+#include <iomanip>
+#include <stdexcept>
+#include <string>
+#include <iostream>
+#include <cstdio>
+#include <vector>
+#include <numeric>
+#include "opencv2/core/core.hpp"
+#include "opencv2/imgproc/imgproc.hpp"
+#include "opencv2/highgui/highgui.hpp"
+#include "opencv2/calib3d/calib3d.hpp"
+#include "opencv2/video/video.hpp"
+#include "opencv2/nonfree/nonfree.hpp"
+#include "opencv2/objdetect/objdetect.hpp"
+#include "opencv2/features2d/features2d.hpp"
+#define USE_OPENCL
+#ifdef USE_OPENCL
+#include "opencv2/ocl/ocl.hpp"
+#endif
+
+#define TAB "    "
+
+using namespace std;
+using namespace cv;
+
+// This program test most of the functions in ocl module and generate data metrix of x-factor in .csv files
+// All images needed in this test are in samples/gpu folder.
+// For haar template, haarcascade_frontalface_alt.xml shouold be in working directory
+
+void gen(Mat &mat, int rows, int cols, int type, Scalar low, Scalar high);
+string abspath(const string &relpath);
+int CV_CDECL cvErrorCallback(int, const char *, const char *, const char *, int, void *);
+typedef struct
+{
+    short x;
+    short y;
+} COOR;
+COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep,
+                  cv::Size size, int sp, int sr, int maxIter, float eps, int *tab);
+void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi,
+                    int sp, int sr, cv::TermCriteria crit);
+
+class Runnable
+{
+public:
+    explicit Runnable(const std::string &runname): name_(runname) {}
+    virtual ~Runnable() {}
+
+    const std::string &name() const
+    {
+        return name_;
+    }
+
+    virtual void run() = 0;
+
+private:
+    std::string name_;
+};
+
+class TestSystem
+{
+public:
+    static TestSystem &instance()
+    {
+        static TestSystem me;
+        return me;
+    }
+
+    void setWorkingDir(const std::string &val)
+    {
+        working_dir_ = val;
+    }
+    const std::string &workingDir() const
+    {
+        return working_dir_;
+    }
+
+    void setTestFilter(const std::string &val)
+    {
+        test_filter_ = val;
+    }
+    const std::string &testFilter() const
+    {
+        return test_filter_;
+    }
+
+    void setNumIters(int num_iters)
+    {
+        num_iters_ = num_iters;
+    }
+    void setGPUWarmupIters(int num_iters)
+    {
+        gpu_warmup_iters_ = num_iters;
+    }
+    void setCPUIters(int num_iters)
+    {
+        cpu_num_iters_ = num_iters;
+    }
+
+    void setTopThreshold(double top)
+    {
+        top_ = top;
+    }
+    void setBottomThreshold(double bottom)
+    {
+        bottom_ = bottom;
+    }
+
+    void addInit(Runnable *init)
+    {
+        inits_.push_back(init);
+    }
+    void addTest(Runnable *test)
+    {
+        tests_.push_back(test);
+    }
+    void run();
+
+    // It's public because OpenCV callback uses it
+    void printError(const std::string &msg);
+
+    std::stringstream &startNewSubtest()
+    {
+        finishCurrentSubtest();
+        return cur_subtest_description_;
+    }
+
+    bool stop() const
+    {
+        return cur_iter_idx_ >= num_iters_;
+    }
+
+    bool cpu_stop() const
+    {
+        return cur_iter_idx_ >= cpu_num_iters_;
+    }
+
+    bool warmupStop()
+    {
+        return cur_warmup_idx_++ >= gpu_warmup_iters_;
+    }
+
+    void warmupComplete()
+    {
+        cur_warmup_idx_ = 0;
+    }
+
+    void cpuOn()
+    {
+        cpu_started_ = cv::getTickCount();
+    }
+    void cpuOff()
+    {
+        int64 delta = cv::getTickCount() - cpu_started_;
+        cpu_times_.push_back(delta);
+        ++cur_iter_idx_;
+    }
+    void cpuComplete()
+    {
+        cpu_elapsed_ += meanTime(cpu_times_);
+        cur_subtest_is_empty_ = false;
+        cur_iter_idx_ = 0;
+    }
+
+    void gpuOn()
+    {
+        gpu_started_ = cv::getTickCount();
+    }
+    void gpuOff()
+    {
+        int64 delta = cv::getTickCount() - gpu_started_;
+        gpu_times_.push_back(delta);
+        ++cur_iter_idx_;
+    }
+    void gpuComplete()
+    {
+        gpu_elapsed_ += meanTime(gpu_times_);
+        cur_subtest_is_empty_ = false;
+        cur_iter_idx_ = 0;
+    }
+
+    void gpufullOn()
+    {
+        gpu_full_started_ = cv::getTickCount();
+    }
+    void gpufullOff()
+    {
+        int64 delta = cv::getTickCount() - gpu_full_started_;
+        gpu_full_times_.push_back(delta);
+        ++cur_iter_idx_;
+    }
+    void gpufullComplete()
+    {
+        gpu_full_elapsed_ += meanTime(gpu_full_times_);
+        cur_subtest_is_empty_ = false;
+        cur_iter_idx_ = 0;
+    }
+
+    bool isListMode() const
+    {
+        return is_list_mode_;
+    }
+    void setListMode(bool value)
+    {
+        is_list_mode_ = value;
+    }
+
+    void setRecordName(const std::string &name)
+    {
+        recordname_ = name;
+    }
+
+    void setCurrentTest(const std::string &name)
+    {
+        itname_ = name;
+        itname_changed_ = true;
+    }
+
+private:
+    TestSystem():
+        cur_subtest_is_empty_(true), cpu_elapsed_(0),
+        gpu_elapsed_(0), gpu_full_elapsed_(0), speedup_total_(0.0),
+        num_subtests_called_(0),
+        speedup_faster_count_(0), speedup_slower_count_(0), speedup_equal_count_(0),
+        speedup_full_faster_count_(0), speedup_full_slower_count_(0), speedup_full_equal_count_(0), is_list_mode_(false),
+        num_iters_(10), cpu_num_iters_(2),
+        gpu_warmup_iters_(1), cur_iter_idx_(0), cur_warmup_idx_(0),
+        record_(0), recordname_("performance"), itname_changed_(true)
+    {
+        cpu_times_.reserve(num_iters_);
+        gpu_times_.reserve(num_iters_);
+        gpu_full_times_.reserve(num_iters_);
+    }
+
+    void finishCurrentSubtest();
+    void resetCurrentSubtest()
+    {
+        cpu_elapsed_ = 0;
+        gpu_elapsed_ = 0;
+        gpu_full_elapsed_ = 0;
+        cur_subtest_description_.str("");
+        cur_subtest_is_empty_ = true;
+        cur_iter_idx_ = 0;
+        cpu_times_.clear();
+        gpu_times_.clear();
+        gpu_full_times_.clear();
+    }
+
+    double meanTime(const std::vector<int64> &samples);
+
+    void printHeading();
+    void printSummary();
+    void printMetrics(double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup);
+
+    void writeHeading();
+    void writeSummary();
+    void writeMetrics(double cpu_time, double gpu_time, double gpu_full_time,
+                      double speedup, double fullspeedup,
+                      double gpu_min, double gpu_max, double std_dev);
+
+    std::string working_dir_;
+    std::string test_filter_;
+
+    std::vector<Runnable *> inits_;
+    std::vector<Runnable *> tests_;
+
+    std::stringstream cur_subtest_description_;
+    bool cur_subtest_is_empty_;
+
+    int64 cpu_started_;
+    int64 gpu_started_;
+    int64 gpu_full_started_;
+    double cpu_elapsed_;
+    double gpu_elapsed_;
+    double gpu_full_elapsed_;
+
+    double speedup_total_;
+    double speedup_full_total_;
+    int num_subtests_called_;
+
+    int speedup_faster_count_;
+    int speedup_slower_count_;
+    int speedup_equal_count_;
+
+    int speedup_full_faster_count_;
+    int speedup_full_slower_count_;
+    int speedup_full_equal_count_;
+
+    bool is_list_mode_;
+
+    double top_;
+    double bottom_;
+
+    int num_iters_;
+    int cpu_num_iters_;                //there's no need to set cpu running same times with gpu
+    int gpu_warmup_iters_;     //gpu warm up times, default is 1
+    int cur_iter_idx_;
+    int cur_warmup_idx_;       //current gpu warm up times
+    std::vector<int64> cpu_times_;
+    std::vector<int64> gpu_times_;
+    std::vector<int64> gpu_full_times_;
+
+    FILE *record_;
+    std::string recordname_;
+    std::string itname_;
+    bool itname_changed_;
+};
+
+
+#define GLOBAL_INIT(name) \
+    struct name##_init: Runnable { \
+        name##_init(): Runnable(#name) { \
+            TestSystem::instance().addInit(this); \
+        } \
+        void run(); \
+    } name##_init_instance; \
+    void name##_init::run()
+
+
+#define TEST(name) \
+    struct name##_test: Runnable { \
+        name##_test(): Runnable(#name) { \
+            TestSystem::instance().addTest(this); \
+        } \
+        void run(); \
+    } name##_test_instance; \
+    void name##_test::run()
+
+#define SUBTEST TestSystem::instance().startNewSubtest()
+
+#define CPU_ON \
+    while (!TestSystem::instance().cpu_stop()) { \
+        TestSystem::instance().cpuOn()
+#define CPU_OFF \
+        TestSystem::instance().cpuOff(); \
+    } TestSystem::instance().cpuComplete()
+
+#define GPU_ON \
+    while (!TestSystem::instance().stop()) { \
+        TestSystem::instance().gpuOn()
+#define GPU_OFF \
+        TestSystem::instance().gpuOff(); \
+    } TestSystem::instance().gpuComplete()
+
+#define GPU_FULL_ON \
+    while (!TestSystem::instance().stop()) { \
+        TestSystem::instance().gpufullOn()
+#define GPU_FULL_OFF \
+        TestSystem::instance().gpufullOff(); \
+    } TestSystem::instance().gpufullComplete()
+
+#define WARMUP_ON \
+    while (!TestSystem::instance().warmupStop()) {
+#define WARMUP_OFF \
+    } TestSystem::instance().warmupComplete()
+
+void TestSystem::run()
+{
+    if (is_list_mode_)
+    {
+        for (vector<Runnable *>::iterator it = tests_.begin(); it != tests_.end(); ++it)
+        {
+            cout << (*it)->name() << endl;
+        }
+
+        return;
+    }
+
+    // Run test initializers
+    for (vector<Runnable *>::iterator it = inits_.begin(); it != inits_.end(); ++it)
+    {
+        if ((*it)->name().find(test_filter_, 0) != string::npos)
+        {
+            (*it)->run();
+        }
+    }
+
+    printHeading();
+    writeHeading();
+
+    // Run tests
+    for (vector<Runnable *>::iterator it = tests_.begin(); it != tests_.end(); ++it)
+    {
+        try
+        {
+            if ((*it)->name().find(test_filter_, 0) != string::npos)
+            {
+                cout << endl << (*it)->name() << ":\n";
+
+                setCurrentTest((*it)->name());
+                //fprintf(record_,"%s\n",(*it)->name().c_str());
+
+                (*it)->run();
+                finishCurrentSubtest();
+            }
+        }
+        catch (const Exception &)
+        {
+            // Message is printed via callback
+            resetCurrentSubtest();
+        }
+        catch (const runtime_error &e)
+        {
+            printError(e.what());
+            resetCurrentSubtest();
+        }
+    }
+
+#ifdef USE_OPENCL
+    printSummary();
+    writeSummary();
+#endif
+}
+
+
+void TestSystem::finishCurrentSubtest()
+{
+    if (cur_subtest_is_empty_)
+        // There is no need to print subtest statistics
+    {
+        return;
+    }
+
+    double cpu_time = cpu_elapsed_ / getTickFrequency() * 1000.0;
+    double gpu_time = gpu_elapsed_ / getTickFrequency() * 1000.0;
+    double gpu_full_time = gpu_full_elapsed_ / getTickFrequency() * 1000.0;
+
+    double speedup = static_cast<double>(cpu_elapsed_) / std::max(1.0, gpu_elapsed_);
+    speedup_total_ += speedup;
+
+    double fullspeedup = static_cast<double>(cpu_elapsed_) / std::max(1.0, gpu_full_elapsed_);
+    speedup_full_total_ += fullspeedup;
+
+    if (speedup > top_)
+    {
+        speedup_faster_count_++;
+    }
+    else if (speedup < bottom_)
+    {
+        speedup_slower_count_++;
+    }
+    else
+    {
+        speedup_equal_count_++;
+    }
+
+    if (fullspeedup > top_)
+    {
+        speedup_full_faster_count_++;
+    }
+    else if (fullspeedup < bottom_)
+    {
+        speedup_full_slower_count_++;
+    }
+    else
+    {
+        speedup_full_equal_count_++;
+    }
+
+    // compute min, max and
+    std::sort(gpu_times_.begin(), gpu_times_.end());
+    double gpu_min = gpu_times_.front() / getTickFrequency() * 1000.0;
+    double gpu_max = gpu_times_.back() / getTickFrequency() * 1000.0;
+    double deviation = 0;
+
+    if (gpu_times_.size() > 1)
+    {
+        double sum = 0;
+
+        for (size_t i = 0; i < gpu_times_.size(); i++)
+        {
+            int64 diff = gpu_times_[i] - static_cast<int64>(gpu_elapsed_);
+            double diff_time = diff * 1000 / getTickFrequency();
+            sum += diff_time * diff_time;
+        }
+
+        deviation = std::sqrt(sum / gpu_times_.size());
+    }
+
+    printMetrics(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup);
+    writeMetrics(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup, gpu_min, gpu_max, deviation);
+
+    num_subtests_called_++;
+    resetCurrentSubtest();
+}
+
+
+double TestSystem::meanTime(const vector<int64> &samples)
+{
+    double sum = accumulate(samples.begin(), samples.end(), 0.);
+    return sum / samples.size();
+}
+
+
+void TestSystem::printHeading()
+{
+    cout << endl;
+    cout << setiosflags(ios_base::left);
+#ifdef USE_OPENCL
+    cout << TAB << setw(10) << "CPU, ms" << setw(10) << "GPU, ms"
+         << setw(14) << "SPEEDUP" << setw(14) << "GPUTOTAL, ms" << setw(14) << "TOTALSPEEDUP"
+         << "DESCRIPTION\n";
+#else
+    cout << TAB << setw(10) << "CPU, ms\n";
+#endif
+    cout << resetiosflags(ios_base::left);
+}
+
+void TestSystem::writeHeading()
+{
+    if (!record_)
+    {
+#ifdef USE_OPENCL
+        recordname_ += "_OCL.csv";
+#else
+        recordname_ += "_CPU.csv";
+#endif
+        record_ = fopen(recordname_.c_str(), "w");
+    }
+
+#ifdef USE_OPENCL
+    fprintf(record_, "NAME,DESCRIPTION,CPU (ms),GPU (ms),SPEEDUP,GPUTOTAL (ms),TOTALSPEEDUP,GPU Min (ms),GPU Max (ms), Standard deviation (ms)\n");
+#else
+    fprintf(record_, "NAME,DESCRIPTION,CPU (ms)\n");
+#endif
+    fflush(record_);
+}
+
+void TestSystem::printSummary()
+{
+    cout << setiosflags(ios_base::fixed);
+    cout << "\naverage GPU speedup: x"
+         << setprecision(3) << speedup_total_ / std::max(1, num_subtests_called_)
+         << endl;
+    cout << "\nGPU exceeded: "
+         << setprecision(3) << speedup_faster_count_
+         << "\nGPU passed: "
+         << setprecision(3) << speedup_equal_count_
+         << "\nGPU failed: "
+         << setprecision(3) << speedup_slower_count_
+         << endl;
+    cout << "\nGPU exceeded rate: "
+         << setprecision(3) << (float)speedup_faster_count_ / std::max(1, num_subtests_called_) * 100
+         << "%"
+         << "\nGPU passed rate: "
+         << setprecision(3) << (float)speedup_equal_count_ / std::max(1, num_subtests_called_) * 100
+         << "%"
+         << "\nGPU failed rate: "
+         << setprecision(3) << (float)speedup_slower_count_ / std::max(1, num_subtests_called_) * 100
+         << "%"
+         << endl;
+    cout << "\naverage GPUTOTAL speedup: x"
+         << setprecision(3) << speedup_full_total_ / std::max(1, num_subtests_called_)
+         << endl;
+    cout << "\nGPUTOTAL exceeded: "
+         << setprecision(3) << speedup_full_faster_count_
+         << "\nGPUTOTAL passed: "
+         << setprecision(3) << speedup_full_equal_count_
+         << "\nGPUTOTAL failed: "
+         << setprecision(3) << speedup_full_slower_count_
+         << endl;
+    cout << "\nGPUTOTAL exceeded rate: "
+         << setprecision(3) << (float)speedup_full_faster_count_ / std::max(1, num_subtests_called_) * 100
+         << "%"
+         << "\nGPUTOTAL passed rate: "
+         << setprecision(3) << (float)speedup_full_equal_count_ / std::max(1, num_subtests_called_) * 100
+         << "%"
+         << "\nGPUTOTAL failed rate: "
+         << setprecision(3) << (float)speedup_full_slower_count_ / std::max(1, num_subtests_called_) * 100
+         << "%"
+         << endl;
+    cout << resetiosflags(ios_base::fixed);
+}
+
+
+void TestSystem::printMetrics(double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup)
+{
+    cout << TAB << setiosflags(ios_base::left);
+    stringstream stream;
+
+    stream << cpu_time;
+    cout << setw(10) << stream.str();
+#ifdef USE_OPENCL
+    stream.str("");
+    stream << gpu_time;
+    cout << setw(10) << stream.str();
+
+    stream.str("");
+    stream << "x" << setprecision(3) << speedup;
+    cout << setw(14) << stream.str();
+
+    stream.str("");
+    stream << gpu_full_time;
+    cout << setw(14) << stream.str();
+
+    stream.str("");
+    stream << "x" << setprecision(3) << fullspeedup;
+    cout << setw(14) << stream.str();
+#endif
+    cout << cur_subtest_description_.str();
+    cout << resetiosflags(ios_base::left) << endl;
+}
+
+void TestSystem::writeMetrics(double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup, double gpu_min, double gpu_max, double std_dev)
+{
+    if (!record_)
+    {
+        recordname_ += ".csv";
+        record_ = fopen(recordname_.c_str(), "w");
+    }
+
+#ifdef USE_OPENCL
+    fprintf(record_, "%s,%s,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f\n", itname_changed_ ? itname_.c_str() : "",
+            cur_subtest_description_.str().c_str(),
+            cpu_time, gpu_time, speedup, gpu_full_time, fullspeedup,
+            gpu_min, gpu_max, std_dev);
+#else
+    fprintf(record_, "%s,%s,%.3f\n",
+            itname_changed_ ? itname_.c_str() : "", cur_subtest_description_.str().c_str(), cpu_time);
+#endif
+
+    if (itname_changed_)
+    {
+        itname_changed_ = false;
+    }
+
+    fflush(record_);
+}
+
+void TestSystem::writeSummary()
+{
+    if (!record_)
+    {
+        recordname_ += ".csv";
+        record_ = fopen(recordname_.c_str(), "w");
+    }
+
+    fprintf(record_, "\nAverage GPU speedup: %.3f\n"
+            "exceeded: %d (%.3f%%)\n"
+            "passed: %d (%.3f%%)\n"
+            "failed: %d (%.3f%%)\n"
+            "\nAverage GPUTOTAL speedup: %.3f\n"
+            "exceeded: %d (%.3f%%)\n"
+            "passed: %d (%.3f%%)\n"
+            "failed: %d (%.3f%%)\n",
+            speedup_total_ / std::max(1, num_subtests_called_),
+            speedup_faster_count_, (float)speedup_faster_count_ / std::max(1, num_subtests_called_) * 100,
+            speedup_equal_count_, (float)speedup_equal_count_ / std::max(1, num_subtests_called_) * 100,
+            speedup_slower_count_, (float)speedup_slower_count_ / std::max(1, num_subtests_called_) * 100,
+            speedup_full_total_ / std::max(1, num_subtests_called_),
+            speedup_full_faster_count_, (float)speedup_full_faster_count_ / std::max(1, num_subtests_called_) * 100,
+            speedup_full_equal_count_, (float)speedup_full_equal_count_ / std::max(1, num_subtests_called_) * 100,
+            speedup_full_slower_count_, (float)speedup_full_slower_count_ / std::max(1, num_subtests_called_) * 100
+           );
+    fflush(record_);
+}
+
+void TestSystem::printError(const std::string &msg)
+{
+    cout << TAB << "[error: " << msg << "] " << cur_subtest_description_.str() << endl;
+}
+
+void gen(Mat &mat, int rows, int cols, int type, Scalar low, Scalar high)
+{
+    mat.create(rows, cols, type);
+    RNG rng(0);
+    rng.fill(mat, RNG::UNIFORM, low, high);
+}
+
+
+string abspath(const string &relpath)
+{
+    return TestSystem::instance().workingDir() + relpath;
+}
+
+
+int CV_CDECL cvErrorCallback(int /*status*/, const char * /*func_name*/,
+                             const char *err_msg, const char * /*file_name*/,
+                             int /*line*/, void * /*userdata*/)
+{
+    TestSystem::instance().printError(err_msg);
+    return 0;
+}
+
+/////////// matchTemplate ////////////////////////
+//void InitMatchTemplate()
+//{
+//     Mat src; gen(src, 500, 500, CV_32F, 0, 1);
+//     Mat templ; gen(templ, 500, 500, CV_32F, 0, 1);
+//#ifdef USE_OPENCL
+//     ocl::oclMat d_src(src), d_templ(templ), d_dst;
+//     ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR);
+//#endif
+//}
+TEST(matchTemplate)
+{
+    //InitMatchTemplate();
+
+    Mat src, templ, dst;
+    int templ_size = 5;
+
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        int all_type[] = {CV_32FC1, CV_32FC4};
+        std::string type_name[] = {"CV_32FC1", "CV_32FC4"};
+
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            for(templ_size = 5; templ_size <= 5; templ_size *= 5)
+            {
+                gen(src, size, size, all_type[j], 0, 1);
+
+                SUBTEST << src.cols << 'x' << src.rows << "; " << type_name[j] << "; templ " << templ_size << 'x' << templ_size << "; CCORR";
+
+                gen(templ, templ_size, templ_size, all_type[j], 0, 1);
+
+                matchTemplate(src, templ, dst, CV_TM_CCORR);
+
+                CPU_ON;
+                matchTemplate(src, templ, dst, CV_TM_CCORR);
+                CPU_OFF;
+
+#ifdef USE_OPENCL
+                ocl::oclMat d_src(src), d_templ, d_dst;
+
+                d_templ.upload(templ);
+
+                WARMUP_ON;
+                ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR);
+                WARMUP_OFF;
+
+                GPU_ON;
+                ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR);
+                GPU_OFF;
+
+                GPU_FULL_ON;
+                d_src.upload(src);
+                d_templ.upload(templ);
+                ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR);
+                d_dst.download(dst);
+                GPU_FULL_OFF;
+#endif
+            }
+        }
+
+        int all_type_8U[] = {CV_8UC1};
+        std::string type_name_8U[] = {"CV_8UC1"};
+
+        for (size_t j = 0; j < sizeof(all_type_8U) / sizeof(int); j++)
+        {
+            for(templ_size = 5; templ_size <= 5; templ_size *= 5)
+            {
+                SUBTEST << src.cols << 'x' << src.rows << "; " << type_name_8U[j] << "; templ " << templ_size << 'x' << templ_size << "; CCORR_NORMED";
+
+                gen(src, size, size, all_type_8U[j], 0, 255);
+
+                gen(templ, templ_size, templ_size, all_type_8U[j], 0, 255);
+
+                matchTemplate(src, templ, dst, CV_TM_CCORR_NORMED);
+
+                CPU_ON;
+                matchTemplate(src, templ, dst, CV_TM_CCORR_NORMED);
+                CPU_OFF;
+
+#ifdef USE_OPENCL
+                ocl::oclMat d_src(src);
+                ocl::oclMat d_templ(templ), d_dst;
+
+                WARMUP_ON;
+                ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR_NORMED);
+                WARMUP_OFF;
+
+                GPU_ON;
+                ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR_NORMED);
+                GPU_OFF;
+
+                GPU_FULL_ON;
+                d_src.upload(src);
+                d_templ.upload(templ);
+                ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR_NORMED);
+                d_dst.download(dst);
+                GPU_FULL_OFF;
+#endif
+            }
+        }
+    }
+}
+
+///////////// PyrLKOpticalFlow ////////////////////////
+TEST(PyrLKOpticalFlow)
+{
+    std::string images1[] = {"rubberwhale1.png", "aloeL.jpg"};
+    std::string images2[] = {"rubberwhale2.png", "aloeR.jpg"};
+
+    for (size_t i = 0; i < sizeof(images1) / sizeof(std::string); i++)
+    {
+        Mat frame0 = imread(abspath(images1[i]), i == 0 ? IMREAD_COLOR : IMREAD_GRAYSCALE);
+
+        if (frame0.empty())
+        {
+            std::string errstr = "can't open " + images1[i];
+            throw runtime_error(errstr);
+        }
+
+        Mat frame1 = imread(abspath(images2[i]), i == 0 ? IMREAD_COLOR : IMREAD_GRAYSCALE);
+
+        if (frame1.empty())
+        {
+            std::string errstr = "can't open " + images2[i];
+            throw runtime_error(errstr);
+        }
+
+        Mat gray_frame;
+
+        if (i == 0)
+        {
+            cvtColor(frame0, gray_frame, COLOR_BGR2GRAY);
+        }
+
+        for (int points = 1000; points <= 4000; points *= 2)
+        {
+            if (i == 0)
+                SUBTEST << frame0.cols << "x" << frame0.rows << "; color; " << points << " points";
+            else
+                SUBTEST << frame0.cols << "x" << frame0.rows << "; gray; " << points << " points";
+            Mat nextPts_cpu;
+            Mat status_cpu;
+
+            vector<Point2f> pts;
+            goodFeaturesToTrack(i == 0 ? gray_frame : frame0, pts, points, 0.01, 0.0);
+
+            vector<Point2f> nextPts;
+            vector<unsigned char> status;
+
+            vector<float> err;
+
+            calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, err);
+
+            CPU_ON;
+            calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, err);
+            CPU_OFF;
+
+#ifdef USE_OPENCL
+            ocl::PyrLKOpticalFlow d_pyrLK;
+
+            ocl::oclMat d_frame0(frame0);
+            ocl::oclMat d_frame1(frame1);
+
+            ocl::oclMat d_pts;
+            Mat pts_mat(1, (int)pts.size(), CV_32FC2, (void *)&pts[0]);
+            d_pts.upload(pts_mat);
+
+            ocl::oclMat d_nextPts;
+            ocl::oclMat d_status;
+            ocl::oclMat d_err;
+
+            WARMUP_ON;
+            d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err);
+            WARMUP_OFF;
+
+            GPU_ON;
+            d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_frame0.upload(frame0);
+            d_frame1.upload(frame1);
+            d_pts.upload(pts_mat);
+            d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err);
+
+            if (!d_nextPts.empty())
+            {
+                d_nextPts.download(nextPts_cpu);
+            }
+
+            if (!d_status.empty())
+            {
+                d_status.download(status_cpu);
+            }
+
+            GPU_FULL_OFF;
+#endif
+        }
+
+    }
+}
+
+
+///////////// pyrDown //////////////////////
+TEST(pyrDown)
+{
+    Mat src, dst;
+    int all_type[] = {CV_8UC1, CV_8UC4};
+    std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+
+            gen(src, size, size, all_type[j], 0, 256);
+
+            pyrDown(src, dst);
+
+            CPU_ON;
+            pyrDown(src, dst);
+            CPU_OFF;
+
+#ifdef USE_OPENCL
+            ocl::oclMat d_src(src);
+            ocl::oclMat d_dst;
+
+            WARMUP_ON;
+            ocl::pyrDown(d_src, d_dst);
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::pyrDown(d_src, d_dst);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src.upload(src);
+            ocl::pyrDown(d_src, d_dst);
+            d_dst.download(dst);
+            GPU_FULL_OFF;
+#endif
+        }
+    }
+}
+
+///////////// pyrUp ////////////////////////
+TEST(pyrUp)
+{
+    Mat src, dst;
+    int all_type[] = {CV_8UC1, CV_8UC4};
+    std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
+
+    for (int size = 500; size <= 2000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+
+            gen(src, size, size, all_type[j], 0, 256);
+
+            pyrUp(src, dst);
+
+            CPU_ON;
+            pyrUp(src, dst);
+            CPU_OFF;
+
+#ifdef USE_OPENCL
+            ocl::oclMat d_src(src);
+            ocl::oclMat d_dst;
+
+            WARMUP_ON;
+            ocl::pyrUp(d_src, d_dst);
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::pyrUp(d_src, d_dst);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src.upload(src);
+            ocl::pyrUp(d_src, d_dst);
+            d_dst.download(dst);
+            GPU_FULL_OFF;
+#endif
+        }
+    }
+}
+
+///////////// Canny ////////////////////////
+TEST(Canny)
+{
+    Mat img = imread(abspath("aloeL.jpg"), CV_LOAD_IMAGE_GRAYSCALE);
+
+    if (img.empty())
+    {
+        throw runtime_error("can't open aloeL.jpg");
+    }
+
+    SUBTEST << img.cols << 'x' << img.rows << "; aloeL.jpg" << "; edges" << "; CV_8UC1";
+
+    Mat edges(img.size(), CV_8UC1);
+
+    CPU_ON;
+    Canny(img, edges, 50.0, 100.0);
+    CPU_OFF;
+
+#ifdef USE_OPENCL
+    ocl::oclMat d_img(img);
+    ocl::oclMat d_edges;
+    ocl::CannyBuf d_buf;
+
+    WARMUP_ON;
+    ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0);
+    WARMUP_OFF;
+
+    GPU_ON;
+    ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0);
+    GPU_OFF;
+
+    GPU_FULL_ON;
+    d_img.upload(img);
+    ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0);
+    d_edges.download(edges);
+    GPU_FULL_OFF;
+#endif
+}
+
+///////////// Haar ////////////////////////
+#ifdef USE_OPENCL
+namespace cv
+{
+namespace ocl
+{
+
+struct getRect
+{
+    Rect operator()(const CvAvgComp &e) const
+    {
+        return e.rect;
+    }
+};
+
+class CascadeClassifier_GPU : public OclCascadeClassifier
+{
+public:
+    void detectMultiScale(oclMat &image,
+                          CV_OUT std::vector<cv::Rect>& faces,
+                          double scaleFactor = 1.1,
+                          int minNeighbors = 3, int flags = 0,
+                          Size minSize = Size(),
+                          Size maxSize = Size())
+    {
+        (void)maxSize;
+        MemStorage storage(cvCreateMemStorage(0));
+        //CvMat img=image;
+        CvSeq *objs = oclHaarDetectObjects(image, storage, scaleFactor, minNeighbors, flags, minSize);
+        vector<CvAvgComp> vecAvgComp;
+        Seq<CvAvgComp>(objs).copyTo(vecAvgComp);
+        faces.resize(vecAvgComp.size());
+        std::transform(vecAvgComp.begin(), vecAvgComp.end(), faces.begin(), getRect());
+    }
+
+};
+
+}
+}
+#endif
+TEST(Haar)
+{
+    Mat img = imread(abspath("basketball1.png"), CV_LOAD_IMAGE_GRAYSCALE);
+
+    if (img.empty())
+    {
+        throw runtime_error("can't open basketball1.png");
+    }
+
+    CascadeClassifier faceCascadeCPU;
+
+    if (!faceCascadeCPU.load(abspath("haarcascade_frontalface_alt.xml")))
+    {
+        throw runtime_error("can't load haarcascade_frontalface_alt.xml");
+    }
+
+    vector<Rect> faces;
+
+    SUBTEST << img.cols << "x" << img.rows << "; scale image";
+    CPU_ON;
+    faceCascadeCPU.detectMultiScale(img, faces,
+                                    1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30));
+    CPU_OFF;
+
+#ifdef USE_OPENCL
+    ocl::CascadeClassifier_GPU faceCascade;
+
+    if (!faceCascade.load(abspath("haarcascade_frontalface_alt.xml")))
+    {
+        throw runtime_error("can't load haarcascade_frontalface_alt.xml");
+    }
+
+    ocl::oclMat d_img(img);
+
+    faces.clear();
+
+    WARMUP_ON;
+    faceCascade.detectMultiScale(d_img, faces,
+                                 1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30));
+    WARMUP_OFF;
+
+    faces.clear();
+
+    GPU_ON;
+    faceCascade.detectMultiScale(d_img, faces,
+                                 1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30));
+    GPU_OFF;
+
+    GPU_FULL_ON;
+    d_img.upload(img);
+    faceCascade.detectMultiScale(d_img, faces,
+                                 1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30));
+    GPU_FULL_OFF;
+#endif
+}
+
+///////////// blend ////////////////////////
+template <typename T>
+void blendLinearGold(const cv::Mat &img1, const cv::Mat &img2, const cv::Mat &weights1, const cv::Mat &weights2, cv::Mat &result_gold)
+{
+    result_gold.create(img1.size(), img1.type());
+
+    int cn = img1.channels();
+
+    for (int y = 0; y < img1.rows; ++y)
+    {
+        const float *weights1_row = weights1.ptr<float>(y);
+        const float *weights2_row = weights2.ptr<float>(y);
+        const T *img1_row = img1.ptr<T>(y);
+        const T *img2_row = img2.ptr<T>(y);
+        T *result_gold_row = result_gold.ptr<T>(y);
+
+        for (int x = 0; x < img1.cols * cn; ++x)
+        {
+            float w1 = weights1_row[x / cn];
+            float w2 = weights2_row[x / cn];
+            result_gold_row[x] = static_cast<T>((img1_row[x] * w1 + img2_row[x] * w2) / (w1 + w2 + 1e-5f));
+        }
+    }
+}
+TEST(blend)
+{
+    Mat src1, src2, weights1, weights2, dst;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src1, d_src2, d_weights1, d_weights2, d_dst;
+#endif
+    int all_type[] = {CV_8UC1, CV_8UC4};
+    std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j] << " and CV_32FC1";
+
+            gen(src1, size, size, all_type[j], 0, 256);
+            gen(src2, size, size, all_type[j], 0, 256);
+            gen(weights1, size, size, CV_32FC1, 0, 1);
+            gen(weights2, size, size, CV_32FC1, 0, 1);
+
+            blendLinearGold<uchar>(src1, src2, weights1, weights2, dst);
+
+            CPU_ON;
+            blendLinearGold<uchar>(src1, src2, weights1, weights2, dst);
+            CPU_OFF;
+
+#ifdef USE_OPENCL
+            d_src1.upload(src1);
+            d_src2.upload(src2);
+            d_weights1.upload(weights1);
+            d_weights2.upload(weights2);
+
+            WARMUP_ON;
+            ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst);
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src1.upload(src1);
+            d_src2.upload(src2);
+            d_weights1.upload(weights1);
+            d_weights2.upload(weights2);
+            ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst);
+            d_dst.download(dst);
+            GPU_FULL_OFF;
+#endif
+        }
+    }
+}
+///////////// columnSum////////////////////////
+TEST(columnSum)
+{
+    Mat src, dst;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src, d_dst;
+#endif
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        SUBTEST << size << 'x' << size << "; CV_32FC1";
+
+        gen(src, size, size, CV_32FC1, 0, 256);
+
+        CPU_ON;
+        dst.create(src.size(), src.type());
+
+        for (int i = 1; i < src.rows; ++i)
+        {
+            for (int j = 0; j < src.cols; ++j)
+            {
+                dst.at<float>(i, j) = src.at<float>(i, j) += src.at<float>(i - 1, j);
+            }
+        }
+
+        CPU_OFF;
+
+#ifdef USE_OPENCL
+        d_src.upload(src);
+        WARMUP_ON;
+        ocl::columnSum(d_src, d_dst);
+        WARMUP_OFF;
+
+        GPU_ON;
+        ocl::columnSum(d_src, d_dst);
+        GPU_OFF;
+
+        GPU_FULL_ON;
+        d_src.upload(src);
+        ocl::columnSum(d_src, d_dst);
+        d_dst.download(dst);
+        GPU_FULL_OFF;
+#endif
+    }
+}
+
+///////////// HOG////////////////////////
+TEST(HOG)
+{
+    Mat src = imread(abspath("road.png"), cv::IMREAD_GRAYSCALE);
+
+    if (src.empty())
+    {
+        throw runtime_error("can't open road.png");
+    }
+
+
+    cv::HOGDescriptor hog;
+    hog.setSVMDetector(hog.getDefaultPeopleDetector());
+    std::vector<cv::Rect> found_locations;
+
+    SUBTEST << 768 << 'x' << 576 << "; road.png";
+
+    hog.detectMultiScale(src, found_locations);
+
+    CPU_ON;
+    hog.detectMultiScale(src, found_locations);
+    CPU_OFF;
+
+#ifdef USE_OPENCL
+    cv::ocl::HOGDescriptor ocl_hog;
+    ocl_hog.setSVMDetector(ocl_hog.getDefaultPeopleDetector());
+    ocl::oclMat d_src;
+    d_src.upload(src);
+
+    WARMUP_ON;
+    ocl_hog.detectMultiScale(d_src, found_locations);
+    WARMUP_OFF;
+
+    GPU_ON;
+    ocl_hog.detectMultiScale(d_src, found_locations);
+    GPU_OFF;
+
+    GPU_FULL_ON;
+    d_src.upload(src);
+    ocl_hog.detectMultiScale(d_src, found_locations);
+    GPU_FULL_OFF;
+#endif
+}
+
+///////////// SURF ////////////////////////
+
+TEST(SURF)
+{
+    Mat keypoints_cpu;
+    Mat descriptors_cpu;
+
+    Mat src = imread(abspath("aloeL.jpg"), CV_LOAD_IMAGE_GRAYSCALE);
+
+    if (src.empty())
+    {
+        throw runtime_error("can't open aloeL.jpg");
+    }
+
+    SUBTEST << src.cols << "x" << src.rows << "; aloeL.jpg";
+    SURF surf;
+    vector<KeyPoint> keypoints;
+    Mat descriptors;
+
+    surf(src, Mat(), keypoints, descriptors);
+
+    CPU_ON;
+    keypoints.clear();
+    surf(src, Mat(), keypoints, descriptors);
+    CPU_OFF;
+
+#ifdef USE_OPENCL
+    ocl::SURF_OCL d_surf;
+    ocl::oclMat d_src(src);
+    ocl::oclMat d_keypoints;
+    ocl::oclMat d_descriptors;
+
+    WARMUP_ON;
+    d_surf(d_src, ocl::oclMat(), d_keypoints, d_descriptors);
+    WARMUP_OFF;
+
+    GPU_ON;
+    d_surf(d_src, ocl::oclMat(), d_keypoints, d_descriptors);
+    GPU_OFF;
+
+    GPU_FULL_ON;
+    d_src.upload(src);
+    d_surf(d_src, ocl::oclMat(), d_keypoints, d_descriptors);
+
+    if (!d_keypoints.empty())
+    {
+        d_keypoints.download(keypoints_cpu);
+    }
+
+    if (!d_descriptors.empty())
+    {
+        d_descriptors.download(descriptors_cpu);
+    }
+
+    GPU_FULL_OFF;
+#endif
+}
+//////////////////// BruteForceMatch /////////////////
+TEST(BruteForceMatcher)
+{
+    Mat trainIdx_cpu;
+    Mat distance_cpu;
+    Mat allDist_cpu;
+    Mat nMatches_cpu;
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        // Init CPU matcher
+        int desc_len = 64;
+
+        BFMatcher matcher(NORM_L2);
+
+        Mat query;
+        gen(query, size, desc_len, CV_32F, 0, 1);
+
+        Mat train;
+        gen(train, size, desc_len, CV_32F, 0, 1);
+        // Output
+        vector< vector<DMatch> > matches(2);
+#ifdef USE_OPENCL
+        // Init GPU matcher
+        ocl::BruteForceMatcher_OCL_base d_matcher(ocl::BruteForceMatcher_OCL_base::L2Dist);
+
+        ocl::oclMat d_query(query);
+        ocl::oclMat d_train(train);
+
+        ocl::oclMat d_trainIdx, d_distance, d_allDist, d_nMatches;
+#endif
+        SUBTEST << size << "; match";
+
+        matcher.match(query, train, matches[0]);
+
+        CPU_ON;
+        matcher.match(query, train, matches[0]);
+        CPU_OFF;
+
+#ifdef USE_OPENCL
+        WARMUP_ON;
+        d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
+        WARMUP_OFF;
+
+        GPU_ON;
+        d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
+        GPU_OFF;
+
+        GPU_FULL_ON;
+        d_query.upload(query);
+        d_train.upload(train);
+        d_matcher.match(d_query, d_train, matches[0]);
+        GPU_FULL_OFF;
+#endif
+
+        SUBTEST << size << "; knnMatch";
+
+        matcher.knnMatch(query, train, matches, 2);
+
+        CPU_ON;
+        matcher.knnMatch(query, train, matches, 2);
+        CPU_OFF;
+
+#ifdef USE_OPENCL
+        WARMUP_ON;
+        d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, 2);
+        WARMUP_OFF;
+
+        GPU_ON;
+        d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, 2);
+        GPU_OFF;
+
+        GPU_FULL_ON;
+        d_query.upload(query);
+        d_train.upload(train);
+        d_matcher.knnMatch(d_query, d_train, matches, 2);
+        GPU_FULL_OFF;
+#endif
+        SUBTEST << size << "; radiusMatch";
+
+        float max_distance = 2.0f;
+
+        matcher.radiusMatch(query, train, matches, max_distance);
+
+        CPU_ON;
+        matcher.radiusMatch(query, train, matches, max_distance);
+        CPU_OFF;
+
+#ifdef USE_OPENCL
+        d_trainIdx.release();
+
+        WARMUP_ON;
+        d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, max_distance);
+        WARMUP_OFF;
+
+        GPU_ON;
+        d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, max_distance);
+        GPU_OFF;
+
+        GPU_FULL_ON;
+        d_query.upload(query);
+        d_train.upload(train);
+        d_matcher.radiusMatch(d_query, d_train, matches, max_distance);
+        GPU_FULL_OFF;
+#endif
+    }
+}
+///////////// Lut ////////////////////////
+TEST(lut)
+{
+    Mat src, lut, dst;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src, d_lut, d_dst;
+#endif
+    int all_type[] = {CV_8UC1, CV_8UC3};
+    std::string type_name[] = {"CV_8UC1", "CV_8UC3"};
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j];
+
+            gen(src, size, size, all_type[j], 0, 256);
+            gen(lut, 1, 256, CV_8UC1, 0, 1);
+            gen(dst, size, size, all_type[j], 0, 256);
+
+            LUT(src, lut, dst);
+
+            CPU_ON;
+            LUT(src, lut, dst);
+            CPU_OFF;
+
+#ifdef USE_OPENCL
+            d_src.upload(src);
+            d_lut.upload(lut);
+
+            WARMUP_ON;
+            ocl::LUT(d_src, d_lut, d_dst);
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::LUT(d_src, d_lut, d_dst);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src.upload(src);
+            d_lut.upload(lut);
+            ocl::LUT(d_src, d_lut, d_dst);
+            d_dst.download(dst);
+            GPU_FULL_OFF;
+#endif
+        }
+
+    }
+}
+///////////// Exp ////////////////////////
+TEST(Exp)
+{
+    Mat src, dst;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src, d_dst;
+#endif
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        SUBTEST << size << 'x' << size << "; CV_32FC1";
+
+        gen(src, size, size, CV_32FC1, 0, 256);
+        gen(dst, size, size, CV_32FC1, 0, 256);
+
+        exp(src, dst);
+
+        CPU_ON;
+        exp(src, dst);
+        CPU_OFF;
+#ifdef USE_OPENCL
+        d_src.upload(src);
+
+        WARMUP_ON;
+        ocl::exp(d_src, d_dst);
+        WARMUP_OFF;
+
+        GPU_ON;
+        ocl::exp(d_src, d_dst);
+        GPU_OFF;
+
+        GPU_FULL_ON;
+        d_src.upload(src);
+        ocl::exp(d_src, d_dst);
+        d_dst.download(dst);
+        GPU_FULL_OFF;
+#endif
+    }
+}
+
+///////////// LOG ////////////////////////
+TEST(Log)
+{
+    Mat src, dst;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src, d_dst;
+#endif
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        SUBTEST << size << 'x' << size << "; 32F";
+
+        gen(src, size, size, CV_32F, 1, 10);
+
+        log(src, dst);
+
+        CPU_ON;
+        log(src, dst);
+        CPU_OFF;
+#ifdef USE_OPENCL
+        d_src.upload(src);
+
+        WARMUP_ON;
+        ocl::log(d_src, d_dst);
+        WARMUP_OFF;
+
+        GPU_ON;
+        ocl::log(d_src, d_dst);
+        GPU_OFF;
+
+        GPU_FULL_ON;
+        d_src.upload(src);
+        ocl::log(d_src, d_dst);
+        d_dst.download(dst);
+        GPU_FULL_OFF;
+#endif
+    }
+}
+
+///////////// Add ////////////////////////
+
+TEST(Add)
+{
+    Mat src1, src2, dst;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src1, d_src2, d_dst;
+#endif
+    int all_type[] = {CV_8UC1, CV_32FC1};
+    std::string type_name[] = {"CV_8UC1", "CV_32FC1"};
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j];
+
+            gen(src1, size, size, all_type[j], 0, 1);
+            gen(src2, size, size, all_type[j], 0, 1);
+
+            add(src1, src2, dst);
+
+            CPU_ON;
+            add(src1, src2, dst);
+            CPU_OFF;
+#ifdef USE_OPENCL
+            d_src1.upload(src1);
+            d_src2.upload(src2);
+
+            WARMUP_ON;
+            ocl::add(d_src1, d_src2, d_dst);
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::add(d_src1, d_src2, d_dst);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src1.upload(src1);
+            d_src2.upload(src2);
+            ocl::add(d_src1, d_src2, d_dst);
+            d_dst.download(dst);
+            GPU_FULL_OFF;
+#endif
+        }
+
+    }
+}
+///////////// Mul ////////////////////////
+TEST(Mul)
+{
+    Mat src1, src2, dst;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src1, d_src2, d_dst;
+#endif
+    int all_type[] = {CV_8UC1, CV_8UC4};
+    std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+
+            gen(src1, size, size, all_type[j], 0, 256);
+            gen(src2, size, size, all_type[j], 0, 256);
+            gen(dst, size, size, all_type[j], 0, 256);
+
+
+            multiply(src1, src2, dst);
+
+            CPU_ON;
+            multiply(src1, src2, dst);
+            CPU_OFF;
+#ifdef USE_OPENCL
+            d_src1.upload(src1);
+            d_src2.upload(src2);
+
+            WARMUP_ON;
+            ocl::multiply(d_src1, d_src2, d_dst);
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::multiply(d_src1, d_src2, d_dst);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src1.upload(src1);
+            d_src2.upload(src2);
+            ocl::multiply(d_src1, d_src2, d_dst);
+            d_dst.download(dst);
+            GPU_FULL_OFF;
+#endif
+        }
+
+    }
+}
+
+///////////// Div ////////////////////////
+TEST(Div)
+{
+    Mat src1, src2, dst;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src1, d_src2, d_dst;
+#endif
+    int all_type[] = {CV_8UC1, CV_8UC4};
+    std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j];
+
+            gen(src1, size, size, all_type[j], 0, 256);
+            gen(src2, size, size, all_type[j], 0, 256);
+            gen(dst, size, size, all_type[j], 0, 256);
+
+
+            divide(src1, src2, dst);
+
+            CPU_ON;
+            divide(src1, src2, dst);
+            CPU_OFF;
+#ifdef USE_OPENCL
+            d_src1.upload(src1);
+            d_src2.upload(src2);
+
+            WARMUP_ON;
+            ocl::divide(d_src1, d_src2, d_dst);
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::divide(d_src1, d_src2, d_dst);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src1.upload(src1);
+            d_src2.upload(src2);
+            ocl::divide(d_src1, d_src2, d_dst);
+            d_dst.download(dst);
+            GPU_FULL_OFF;
+#endif
+        }
+
+    }
+}
+
+///////////// Absdiff ////////////////////////
+TEST(Absdiff)
+{
+    Mat src1, src2, dst;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src1, d_src2, d_dst;
+#endif
+    int all_type[] = {CV_8UC1, CV_8UC4};
+    std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+
+            gen(src1, size, size, all_type[j], 0, 256);
+            gen(src2, size, size, all_type[j], 0, 256);
+            gen(dst, size, size, all_type[j], 0, 256);
+
+
+            absdiff(src1, src2, dst);
+
+            CPU_ON;
+            absdiff(src1, src2, dst);
+            CPU_OFF;
+#ifdef USE_OPENCL
+            d_src1.upload(src1);
+            d_src2.upload(src2);
+
+            WARMUP_ON;
+            ocl::absdiff(d_src1, d_src2, d_dst);
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::absdiff(d_src1, d_src2, d_dst);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src1.upload(src1);
+            d_src2.upload(src2);
+            ocl::absdiff(d_src1, d_src2, d_dst);
+            d_dst.download(dst);
+            GPU_FULL_OFF;
+#endif
+        }
+
+    }
+}
+
+///////////// CartToPolar ////////////////////////
+TEST(CartToPolar)
+{
+    Mat src1, src2, dst, dst1;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src1, d_src2, d_dst, d_dst1;
+#endif
+    int all_type[] = {CV_32FC1};
+    std::string type_name[] = {"CV_32FC1"};
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j];
+
+            gen(src1, size, size, all_type[j], 0, 256);
+            gen(src2, size, size, all_type[j], 0, 256);
+            gen(dst, size, size, all_type[j], 0, 256);
+            gen(dst1, size, size, all_type[j], 0, 256);
+
+
+            cartToPolar(src1, src2, dst, dst1, 1);
+
+            CPU_ON;
+            cartToPolar(src1, src2, dst, dst1, 1);
+            CPU_OFF;
+#ifdef USE_OPENCL
+            d_src1.upload(src1);
+            d_src2.upload(src2);
+
+            WARMUP_ON;
+            ocl::cartToPolar(d_src1, d_src2, d_dst, d_dst1, 1);
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::cartToPolar(d_src1, d_src2, d_dst, d_dst1, 1);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src1.upload(src1);
+            d_src2.upload(src2);
+            ocl::cartToPolar(d_src1, d_src2, d_dst, d_dst1, 1);
+            d_dst.download(dst);
+            d_dst1.download(dst1);
+            GPU_FULL_OFF;
+#endif
+        }
+
+    }
+}
+
+///////////// PolarToCart ////////////////////////
+TEST(PolarToCart)
+{
+    Mat src1, src2, dst, dst1;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src1, d_src2, d_dst, d_dst1;
+#endif
+    int all_type[] = {CV_32FC1};
+    std::string type_name[] = {"CV_32FC1"};
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+
+            gen(src1, size, size, all_type[j], 0, 256);
+            gen(src2, size, size, all_type[j], 0, 256);
+            gen(dst, size, size, all_type[j], 0, 256);
+            gen(dst1, size, size, all_type[j], 0, 256);
+
+
+            polarToCart(src1, src2, dst, dst1, 1);
+
+            CPU_ON;
+            polarToCart(src1, src2, dst, dst1, 1);
+            CPU_OFF;
+#ifdef USE_OPENCL
+            d_src1.upload(src1);
+            d_src2.upload(src2);
+
+            WARMUP_ON;
+            ocl::polarToCart(d_src1, d_src2, d_dst, d_dst1, 1);
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::polarToCart(d_src1, d_src2, d_dst, d_dst1, 1);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src1.upload(src1);
+            d_src2.upload(src2);
+            ocl::polarToCart(d_src1, d_src2, d_dst, d_dst1, 1);
+            d_dst.download(dst);
+            d_dst1.download(dst1);
+            GPU_FULL_OFF;
+#endif
+        }
+
+    }
+}
+
+///////////// Magnitude ////////////////////////
+TEST(magnitude)
+{
+    Mat x, y, mag;
+#ifdef USE_OPENCL
+    ocl::oclMat d_x, d_y, d_mag;
+#endif
+    int all_type[] = {CV_32FC1};
+    std::string type_name[] = {"CV_32FC1"};
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j];
+
+            gen(x, size, size, all_type[j], 0, 1);
+            gen(y, size, size, all_type[j], 0, 1);
+
+            magnitude(x, y, mag);
+
+            CPU_ON;
+            magnitude(x, y, mag);
+            CPU_OFF;
+#ifdef USE_OPENCL
+            d_x.upload(x);
+            d_y.upload(y);
+
+            WARMUP_ON;
+            ocl::magnitude(d_x, d_y, d_mag);
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::magnitude(d_x, d_y, d_mag);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_x.upload(x);
+            d_y.upload(y);
+            ocl::magnitude(d_x, d_y, d_mag);
+            d_mag.download(mag);
+            GPU_FULL_OFF;
+#endif
+        }
+
+    }
+}
+
+///////////// Transpose ////////////////////////
+TEST(Transpose)
+{
+    Mat src, dst;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src, d_dst;
+#endif
+    int all_type[] = {CV_8UC1, CV_8UC4};
+    std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j];
+
+            gen(src, size, size, all_type[j], 0, 256);
+            gen(dst, size, size, all_type[j], 0, 256);
+
+            transpose(src, dst);
+
+            CPU_ON;
+            transpose(src, dst);
+            CPU_OFF;
+#ifdef USE_OPENCL
+            d_src.upload(src);
+
+            WARMUP_ON;
+            ocl::transpose(d_src, d_dst);
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::transpose(d_src, d_dst);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src.upload(src);
+            ocl::transpose(d_src, d_dst);
+            d_dst.download(dst);
+            GPU_FULL_OFF;
+#endif
+        }
+
+    }
+}
+
+///////////// Flip ////////////////////////
+TEST(Flip)
+{
+    Mat src, dst;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src, d_dst;
+#endif
+    int all_type[] = {CV_8UC1, CV_8UC4};
+    std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j] << " ; FLIP_BOTH";
+
+            gen(src, size, size, all_type[j], 0, 256);
+            gen(dst, size, size, all_type[j], 0, 256);
+
+            flip(src, dst, 0);
+
+            CPU_ON;
+            flip(src, dst, 0);
+            CPU_OFF;
+#ifdef USE_OPENCL
+            d_src.upload(src);
+
+            WARMUP_ON;
+            ocl::flip(d_src, d_dst, 0);
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::flip(d_src, d_dst, 0);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src.upload(src);
+            ocl::flip(d_src, d_dst, 0);
+            d_dst.download(dst);
+            GPU_FULL_OFF;
+#endif
+        }
+
+    }
+}
+
+///////////// minMax ////////////////////////
+TEST(minMax)
+{
+    Mat src;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src;
+#endif
+    double min_val, max_val;
+    Point min_loc, max_loc;
+    int all_type[] = {CV_8UC1, CV_32FC1};
+    std::string type_name[] = {"CV_8UC1", "CV_32FC1"};
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j];
+
+            gen(src, size, size, all_type[j], 0, 256);
+
+            CPU_ON;
+            minMaxLoc(src, &min_val, &max_val, &min_loc, &max_loc);
+            CPU_OFF;
+#ifdef USE_OPENCL
+            d_src.upload(src);
+
+            WARMUP_ON;
+            ocl::minMax(d_src, &min_val, &max_val);
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::minMax(d_src, &min_val, &max_val);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src.upload(src);
+            ocl::minMax(d_src, &min_val, &max_val);
+            GPU_FULL_OFF;
+#endif
+        }
+
+    }
+}
+
+///////////// minMaxLoc ////////////////////////
+TEST(minMaxLoc)
+{
+    Mat src;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src;
+#endif
+    double min_val, max_val;
+    Point min_loc, max_loc;
+    int all_type[] = {CV_8UC1, CV_32FC1};
+    std::string type_name[] = {"CV_8UC1", "CV_32FC1"};
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+
+            gen(src, size, size, all_type[j], 0, 1);
+
+            CPU_ON;
+            minMaxLoc(src, &min_val, &max_val, &min_loc, &max_loc);
+            CPU_OFF;
+#ifdef USE_OPENCL
+            d_src.upload(src);
+
+            WARMUP_ON;
+            ocl::minMaxLoc(d_src, &min_val, &max_val, &min_loc, &max_loc);
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::minMaxLoc(d_src, &min_val, &max_val, &min_loc, &max_loc);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src.upload(src);
+            ocl::minMaxLoc(d_src, &min_val, &max_val, &min_loc, &max_loc);
+            GPU_FULL_OFF;
+#endif
+        }
+
+    }
+}
+///////////// Sum ////////////////////////
+TEST(Sum)
+{
+    Mat src;
+    Scalar cpures, gpures;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src;
+#endif
+    int all_type[] = {CV_8UC1, CV_32SC1};
+    std::string type_name[] = {"CV_8UC1", "CV_32SC1"};
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+
+            gen(src, size, size, all_type[j], 0, 256);
+
+            cpures = sum(src);
+
+            CPU_ON;
+            cpures = sum(src);
+            CPU_OFF;
+#ifdef USE_OPENCL
+            d_src.upload(src);
+
+            WARMUP_ON;
+            gpures = ocl::sum(d_src);
+            WARMUP_OFF;
+
+            GPU_ON;
+            gpures = ocl::sum(d_src);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src.upload(src);
+            gpures = ocl::sum(d_src);
+            GPU_FULL_OFF;
+#endif
+        }
+
+    }
+}
+///////////// countNonZero ////////////////////////
+TEST(countNonZero)
+{
+    Mat src;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src;
+#endif
+    int all_type[] = {CV_8UC1, CV_32FC1};
+    std::string type_name[] = {"CV_8UC1", "CV_32FC1"};
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+
+            gen(src, size, size, all_type[j], 0, 256);
+
+            countNonZero(src);
+
+            CPU_ON;
+            countNonZero(src);
+            CPU_OFF;
+#ifdef USE_OPENCL
+            d_src.upload(src);
+
+            WARMUP_ON;
+            ocl::countNonZero(d_src);
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::countNonZero(d_src);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src.upload(src);
+            ocl::countNonZero(d_src);
+            GPU_FULL_OFF;
+#endif
+        }
+
+    }
+}
+///////////// Phase ////////////////////////
+TEST(Phase)
+{
+    Mat src1, src2, dst;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src1, d_src2, d_dst;
+#endif
+    int all_type[] = {CV_32FC1};
+    std::string type_name[] = {"CV_32FC1"};
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+
+            gen(src1, size, size, all_type[j], 0, 256);
+            gen(src2, size, size, all_type[j], 0, 256);
+            gen(dst, size, size, all_type[j], 0, 256);
+
+
+            phase(src1, src2, dst, 1);
+
+            CPU_ON;
+            phase(src1, src2, dst, 1);
+            CPU_OFF;
+#ifdef USE_OPENCL
+            d_src1.upload(src1);
+            d_src2.upload(src2);
+
+            WARMUP_ON;
+            ocl::phase(d_src1, d_src2, d_dst, 1);
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::phase(d_src1, d_src2, d_dst, 1);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src1.upload(src1);
+            d_src2.upload(src2);
+            ocl::phase(d_src1, d_src2, d_dst, 1);
+            d_dst.download(dst);
+            GPU_FULL_OFF;
+#endif
+        }
+
+    }
+}
+
+///////////// bitwise_and////////////////////////
+TEST(bitwise_and)
+{
+    Mat src1, src2, dst;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src1, d_src2, d_dst;
+#endif
+    int all_type[] = {CV_8UC1, CV_32SC1};
+    std::string type_name[] = {"CV_8UC1", "CV_32SC1"};
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+
+            gen(src1, size, size, all_type[j], 0, 256);
+            gen(src2, size, size, all_type[j], 0, 256);
+            gen(dst, size, size, all_type[j], 0, 256);
+
+
+            bitwise_and(src1, src2, dst);
+
+            CPU_ON;
+            bitwise_and(src1, src2, dst);
+            CPU_OFF;
+#ifdef USE_OPENCL
+            d_src1.upload(src1);
+            d_src2.upload(src2);
+
+            WARMUP_ON;
+            ocl::bitwise_and(d_src1, d_src2, d_dst);
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::bitwise_and(d_src1, d_src2, d_dst);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src1.upload(src1);
+            d_src2.upload(src2);
+            ocl::bitwise_and(d_src1, d_src2, d_dst);
+            d_dst.download(dst);
+            GPU_FULL_OFF;
+#endif
+        }
+
+    }
+}
+///////////// bitwise_or////////////////////////
+TEST(bitwise_or)
+{
+    Mat src1, src2, dst;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src1, d_src2, d_dst;
+#endif
+    int all_type[] = {CV_8UC1, CV_32SC1};
+    std::string type_name[] = {"CV_8UC1", "CV_32SC1"};
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j];
+
+            gen(src1, size, size, all_type[j], 0, 256);
+            gen(src2, size, size, all_type[j], 0, 256);
+            gen(dst, size, size, all_type[j], 0, 256);
+
+
+            bitwise_or(src1, src2, dst);
+
+            CPU_ON;
+            bitwise_or(src1, src2, dst);
+            CPU_OFF;
+#ifdef USE_OPENCL
+            d_src1.upload(src1);
+            d_src2.upload(src2);
+
+            WARMUP_ON;
+            ocl::bitwise_or(d_src1, d_src2, d_dst);
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::bitwise_or(d_src1, d_src2, d_dst);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src1.upload(src1);
+            d_src2.upload(src2);
+            ocl::bitwise_or(d_src1, d_src2, d_dst);
+            d_dst.download(dst);
+            GPU_FULL_OFF;
+#endif
+        }
+
+    }
+}
+///////////// bitwise_xor////////////////////////
+TEST(bitwise_xor)
+{
+    Mat src1, src2, dst;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src1, d_src2, d_dst;
+#endif
+    int all_type[] = {CV_8UC1, CV_32SC1};
+    std::string type_name[] = {"CV_8UC1", "CV_32SC1"};
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j];
+
+            gen(src1, size, size, all_type[j], 0, 256);
+            gen(src2, size, size, all_type[j], 0, 256);
+            gen(dst, size, size, all_type[j], 0, 256);
+
+
+            bitwise_xor(src1, src2, dst);
+
+            CPU_ON;
+            bitwise_xor(src1, src2, dst);
+            CPU_OFF;
+#ifdef USE_OPENCL
+            d_src1.upload(src1);
+            d_src2.upload(src2);
+
+            WARMUP_ON;
+            ocl::bitwise_xor(d_src1, d_src2, d_dst);
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::bitwise_xor(d_src1, d_src2, d_dst);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src1.upload(src1);
+            d_src2.upload(src2);
+            ocl::bitwise_xor(d_src1, d_src2, d_dst);
+            d_dst.download(dst);
+            GPU_FULL_OFF;
+#endif
+        }
+
+    }
+}
+///////////// bitwise_not////////////////////////
+TEST(bitwise_not)
+{
+    Mat src1, dst;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src1, d_dst;
+#endif
+    int all_type[] = {CV_8UC1, CV_32SC1};
+    std::string type_name[] = {"CV_8UC1", "CV_32SC1"};
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+
+            gen(src1, size, size, all_type[j], 0, 256);
+            gen(dst, size, size, all_type[j], 0, 256);
+
+
+            bitwise_not(src1, dst);
+
+            CPU_ON;
+            bitwise_not(src1, dst);
+            CPU_OFF;
+#ifdef USE_OPENCL
+            d_src1.upload(src1);
+
+            WARMUP_ON;
+            ocl::bitwise_not(d_src1, d_dst);
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::bitwise_not(d_src1, d_dst);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src1.upload(src1);
+            ocl::bitwise_not(d_src1, d_dst);
+            d_dst.download(dst);
+            GPU_FULL_OFF;
+#endif
+        }
+
+    }
+}
+
+///////////// compare////////////////////////
+TEST(compare)
+{
+    Mat src1, src2, dst;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src1, d_src2, d_dst;
+#endif
+    int CMP_EQ = 0;
+    int all_type[] = {CV_8UC1, CV_32FC1};
+    std::string type_name[] = {"CV_8UC1", "CV_32FC1"};
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+
+            gen(src1, size, size, all_type[j], 0, 256);
+            gen(src2, size, size, all_type[j], 0, 256);
+            gen(dst, size, size, all_type[j], 0, 256);
+
+
+            compare(src1, src2, dst, CMP_EQ);
+
+            CPU_ON;
+            compare(src1, src2, dst, CMP_EQ);
+            CPU_OFF;
+#ifdef USE_OPENCL
+            d_src1.upload(src1);
+            d_src2.upload(src2);
+
+            WARMUP_ON;
+            ocl::compare(d_src1, d_src2, d_dst, CMP_EQ);
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::compare(d_src1, d_src2, d_dst, CMP_EQ);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src1.upload(src1);
+            d_src2.upload(src2);
+            ocl::compare(d_src1, d_src2, d_dst, CMP_EQ);
+            d_dst.download(dst);
+            GPU_FULL_OFF;
+#endif
+        }
+
+    }
+}
+
+///////////// pow ////////////////////////
+TEST(pow)
+{
+    Mat src, dst;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src, d_dst;
+#endif
+    int all_type[] = {CV_32FC1};
+    std::string type_name[] = {"CV_32FC1"};
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+
+            gen(src, size, size, all_type[j], 0, 100);
+            gen(dst, size, size, all_type[j], 0, 100);
+
+            pow(src, -2.0, dst);
+
+            CPU_ON;
+            pow(src, -2.0, dst);
+            CPU_OFF;
+#ifdef USE_OPENCL
+            d_src.upload(src);
+            d_dst.upload(dst);
+
+            WARMUP_ON;
+            ocl::pow(d_src, -2.0, d_dst);
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::pow(d_src, -2.0, d_dst);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src.upload(src);
+            ocl::pow(d_src, -2.0, d_dst);
+            d_dst.download(dst);
+            GPU_FULL_OFF;
+#endif
+        }
+
+    }
+}
+
+///////////// MagnitudeSqr////////////////////////
+TEST(MagnitudeSqr)
+{
+    Mat src1, src2, dst;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src1, d_src2, d_dst;
+#endif
+    int all_type[] = {CV_32FC1};
+    std::string type_name[] = {"CV_32FC1"};
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t t = 0; t < sizeof(all_type) / sizeof(int); t++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[t];
+
+            gen(src1, size, size, all_type[t], 0, 256);
+            gen(src2, size, size, all_type[t], 0, 256);
+            gen(dst, size, size, all_type[t], 0, 256);
+
+
+            for (int i = 0; i < src1.rows; ++i)
+
+                for (int j = 0; j < src1.cols; ++j)
+                {
+                    float val1 = src1.at<float>(i, j);
+                    float val2 = src2.at<float>(i, j);
+
+                    ((float *)(dst.data))[i * dst.step / 4 + j] = val1 * val1 + val2 * val2;
+
+                }
+
+            CPU_ON;
+
+            for (int i = 0; i < src1.rows; ++i)
+                for (int j = 0; j < src1.cols; ++j)
+                {
+                    float val1 = src1.at<float>(i, j);
+                    float val2 = src2.at<float>(i, j);
+
+                    ((float *)(dst.data))[i * dst.step / 4 + j] = val1 * val1 + val2 * val2;
+
+                }
+
+            CPU_OFF;
+#ifdef USE_OPENCL
+            d_src1.upload(src1);
+            d_src2.upload(src2);
+
+            WARMUP_ON;
+            ocl::magnitudeSqr(d_src1, d_src2, d_dst);
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::magnitudeSqr(d_src1, d_src2, d_dst);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src1.upload(src1);
+            d_src2.upload(src2);
+            ocl::magnitudeSqr(d_src1, d_src2, d_dst);
+            d_dst.download(dst);
+            GPU_FULL_OFF;
+#endif
+        }
+
+    }
+}
+
+///////////// AddWeighted////////////////////////
+TEST(AddWeighted)
+{
+    Mat src1, src2, dst;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src1, d_src2, d_dst;
+#endif
+    double alpha = 2.0, beta = 1.0, gama = 3.0;
+    int all_type[] = {CV_8UC1, CV_32FC1};
+    std::string type_name[] = {"CV_8UC1", "CV_32FC1"};
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+
+            gen(src1, size, size, all_type[j], 0, 256);
+            gen(src2, size, size, all_type[j], 0, 256);
+            gen(dst, size, size, all_type[j], 0, 256);
+
+
+            addWeighted(src1, alpha, src2, beta, gama, dst);
+
+            CPU_ON;
+            addWeighted(src1, alpha, src2, beta, gama, dst);
+            CPU_OFF;
+#ifdef USE_OPENCL
+            d_src1.upload(src1);
+            d_src2.upload(src2);
+
+            WARMUP_ON;
+            ocl::addWeighted(d_src1, alpha, d_src2, beta, gama, d_dst);
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::addWeighted(d_src1, alpha, d_src2, beta, gama, d_dst);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src1.upload(src1);
+            d_src2.upload(src2);
+            ocl::addWeighted(d_src1, alpha, d_src2, beta, gama, d_dst);
+            d_dst.download(dst);
+            GPU_FULL_OFF;
+#endif
+        }
+
+    }
+}
+///////////// Blur////////////////////////
+TEST(Blur)
+{
+    Mat src1, dst;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src1, d_dst;
+#endif
+    Size ksize = Size(3, 3);
+    int bordertype = BORDER_CONSTANT;
+    int all_type[] = {CV_8UC1, CV_8UC4};
+    std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+
+            gen(src1, size, size, all_type[j], 0, 256);
+            gen(dst, size, size, all_type[j], 0, 256);
+
+
+            blur(src1, dst, ksize, Point(-1, -1), bordertype);
+
+            CPU_ON;
+            blur(src1, dst, ksize, Point(-1, -1), bordertype);
+            CPU_OFF;
+#ifdef USE_OPENCL
+            d_src1.upload(src1);
+
+            WARMUP_ON;
+            ocl::blur(d_src1, d_dst, ksize, Point(-1, -1), bordertype);
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::blur(d_src1, d_dst, ksize, Point(-1, -1), bordertype);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src1.upload(src1);
+            ocl::blur(d_src1, d_dst, ksize, Point(-1, -1), bordertype);
+            d_dst.download(dst);
+            GPU_FULL_OFF;
+#endif
+        }
+
+    }
+}
+///////////// Laplacian////////////////////////
+TEST(Laplacian)
+{
+    Mat src1, dst;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src1, d_dst;
+#endif
+    int ksize = 3;
+    int all_type[] = {CV_8UC1, CV_8UC4};
+    std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+
+            gen(src1, size, size, all_type[j], 0, 256);
+            gen(dst, size, size, all_type[j], 0, 256);
+
+
+            Laplacian(src1, dst, -1, ksize, 1);
+
+            CPU_ON;
+            Laplacian(src1, dst, -1, ksize, 1);
+            CPU_OFF;
+#ifdef USE_OPENCL
+            d_src1.upload(src1);
+
+            WARMUP_ON;
+            ocl::Laplacian(d_src1, d_dst, -1, ksize, 1);
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::Laplacian(d_src1, d_dst, -1, ksize, 1);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src1.upload(src1);
+            ocl::Laplacian(d_src1, d_dst, -1, ksize, 1);
+            d_dst.download(dst);
+            GPU_FULL_OFF;
+#endif
+        }
+
+    }
+}
+
+///////////// Erode ////////////////////
+TEST(Erode)
+{
+    Mat src, dst, ker;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src, d_dst;
+#endif
+    int all_type[] = {CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4};
+    std::string type_name[] = {"CV_8UC1", "CV_8UC4", "CV_32FC1", "CV_32FC4"};
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+
+            gen(src, size, size, all_type[j], Scalar::all(0), Scalar::all(256));
+            ker = getStructuringElement(MORPH_RECT, Size(3, 3));
+
+            erode(src, dst, ker);
+
+            CPU_ON;
+            erode(src, dst, ker);
+            CPU_OFF;
+#ifdef USE_OPENCL
+            d_src.upload(src);
+
+            WARMUP_ON;
+            ocl::erode(d_src, d_dst, ker);
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::erode(d_src, d_dst, ker);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src.upload(src);
+            ocl::erode(d_src, d_dst, ker);
+            d_dst.download(dst);
+            GPU_FULL_OFF;
+#endif
+        }
+
+    }
+}
+
+///////////// Sobel ////////////////////////
+TEST(Sobel)
+{
+    Mat src, dst;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src, d_dst;
+#endif
+    int dx = 1;
+    int dy = 1;
+    int all_type[] = {CV_8UC1, CV_8UC4};
+    std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+
+            gen(src, size, size, all_type[j], 0, 256);
+
+            Sobel(src, dst, -1, dx, dy);
+
+            CPU_ON;
+            Sobel(src, dst, -1, dx, dy);
+            CPU_OFF;
+#ifdef USE_OPENCL
+            d_src.upload(src);
+
+            WARMUP_ON;
+            ocl::Sobel(d_src, d_dst, -1, dx, dy);
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::Sobel(d_src, d_dst, -1, dx, dy);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src.upload(src);
+            ocl::Sobel(d_src, d_dst, -1, dx, dy);
+            d_dst.download(dst);
+            GPU_FULL_OFF;
+#endif
+        }
+
+    }
+}
+///////////// Scharr ////////////////////////
+TEST(Scharr)
+{
+    Mat src, dst;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src, d_dst;
+#endif
+    int dx = 1;
+    int dy = 0;
+    int all_type[] = {CV_8UC1, CV_8UC4};
+    std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+
+            gen(src, size, size, all_type[j], 0, 256);
+
+            Scharr(src, dst, -1, dx, dy);
+
+            CPU_ON;
+            Scharr(src, dst, -1, dx, dy);
+            CPU_OFF;
+#ifdef USE_OPENCL
+            d_src.upload(src);
+
+            WARMUP_ON;
+            ocl::Scharr(d_src, d_dst, -1, dx, dy);
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::Scharr(d_src, d_dst, -1, dx, dy);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src.upload(src);
+            ocl::Scharr(d_src, d_dst, -1, dx, dy);
+            d_dst.download(dst);
+            GPU_FULL_OFF;
+#endif
+        }
+
+    }
+}
+
+///////////// GaussianBlur ////////////////////////
+TEST(GaussianBlur)
+{
+    Mat src, dst;
+    int all_type[] = {CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4};
+    std::string type_name[] = {"CV_8UC1", "CV_8UC4", "CV_32FC1", "CV_32FC4"};
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+
+            gen(src, size, size, all_type[j], 0, 256);
+
+            GaussianBlur(src, dst, Size(9, 9), 0);
+
+            CPU_ON;
+            GaussianBlur(src, dst, Size(9, 9), 0);
+            CPU_OFF;
+#ifdef USE_OPENCL
+            ocl::oclMat d_src(src);
+            ocl::oclMat d_dst(src.size(), src.type());
+            ocl::oclMat d_buf;
+
+            WARMUP_ON;
+            ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0);
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src.upload(src);
+            ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0);
+            d_dst.download(dst);
+            GPU_FULL_OFF;
+#endif
+        }
+
+    }
+}
+///////////// equalizeHist ////////////////////////
+TEST(equalizeHist)
+{
+    Mat src, dst;
+    int all_type[] = {CV_8UC1};
+    std::string type_name[] = {"CV_8UC1"};
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+
+            gen(src, size, size, all_type[j], 0, 256);
+
+            equalizeHist(src, dst);
+
+            CPU_ON;
+            equalizeHist(src, dst);
+            CPU_OFF;
+#ifdef USE_OPENCL
+            ocl::oclMat d_src(src);
+            ocl::oclMat d_dst;
+            ocl::oclMat d_hist;
+            ocl::oclMat d_buf;
+
+            WARMUP_ON;
+            ocl::equalizeHist(d_src, d_dst);
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::equalizeHist(d_src, d_dst);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src.upload(src);
+            ocl::equalizeHist(d_src, d_dst);
+            d_dst.download(dst);
+            GPU_FULL_OFF;
+#endif
+        }
+
+    }
+}
+/////////// CopyMakeBorder //////////////////////
+TEST(CopyMakeBorder)
+{
+    Mat src, dst;
+#ifdef USE_OPENCL
+    ocl::oclMat d_dst;
+#endif
+    int bordertype = BORDER_CONSTANT;
+    int all_type[] = {CV_8UC1, CV_8UC4};
+    std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+
+
+            gen(src, size, size, all_type[j], 0, 256);
+
+            copyMakeBorder(src, dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0));
+
+            CPU_ON;
+            copyMakeBorder(src, dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0));
+            CPU_OFF;
+#ifdef USE_OPENCL
+            ocl::oclMat d_src(src);
+
+            WARMUP_ON;
+            ocl::copyMakeBorder(d_src, d_dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0));
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::copyMakeBorder(d_src, d_dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0));
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src.upload(src);
+            ocl::copyMakeBorder(d_src, d_dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0));
+            d_dst.download(dst);
+            GPU_FULL_OFF;
+#endif
+        }
+
+    }
+}
+///////////// cornerMinEigenVal ////////////////////////
+TEST(cornerMinEigenVal)
+{
+    Mat src, dst;
+#ifdef USE_OPENCL
+    ocl::oclMat d_dst;
+#endif
+    int blockSize = 7, apertureSize = 1 + 2 * (rand() % 4);
+    int borderType = BORDER_REFLECT;
+    int all_type[] = {CV_8UC1, CV_32FC1};
+    std::string type_name[] = {"CV_8UC1", "CV_32FC1"};
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+
+
+            gen(src, size, size, all_type[j], 0, 256);
+
+            cornerMinEigenVal(src, dst, blockSize, apertureSize, borderType);
+
+            CPU_ON;
+            cornerMinEigenVal(src, dst, blockSize, apertureSize, borderType);
+            CPU_OFF;
+#ifdef USE_OPENCL
+            ocl::oclMat d_src(src);
+
+            WARMUP_ON;
+            ocl::cornerMinEigenVal(d_src, d_dst, blockSize, apertureSize, borderType);
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::cornerMinEigenVal(d_src, d_dst, blockSize, apertureSize, borderType);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src.upload(src);
+            ocl::cornerMinEigenVal(d_src, d_dst, blockSize, apertureSize, borderType);
+            d_dst.download(dst);
+            GPU_FULL_OFF;
+#endif
+        }
+
+    }
+}
+///////////// cornerHarris ////////////////////////
+TEST(cornerHarris)
+{
+    Mat src, dst;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src, d_dst;
+#endif
+    int all_type[] = {CV_8UC1, CV_32FC1};
+    std::string type_name[] = {"CV_8UC1", "CV_32FC1"};
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j] << " ; BORDER_REFLECT";
+
+            gen(src, size, size, all_type[j], 0, 1);
+
+            cornerHarris(src, dst, 5, 7, 0.1, BORDER_REFLECT);
+
+            CPU_ON;
+            cornerHarris(src, dst, 5, 7, 0.1, BORDER_REFLECT);
+            CPU_OFF;
+#ifdef USE_OPENCL
+            d_src.upload(src);
+
+            WARMUP_ON;
+            ocl::cornerHarris(d_src, d_dst, 5, 7, 0.1, BORDER_REFLECT);
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::cornerHarris(d_src, d_dst, 5, 7, 0.1, BORDER_REFLECT);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src.upload(src);
+            ocl::cornerHarris(d_src, d_dst, 5, 7, 0.1, BORDER_REFLECT);
+            d_dst.download(dst);
+            GPU_FULL_OFF;
+#endif
+        }
+
+
+    }
+}
+///////////// integral ////////////////////////
+TEST(integral)
+{
+    Mat src, sum;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src, d_sum, d_buf;
+#endif
+    int all_type[] = {CV_8UC1};
+    std::string type_name[] = {"CV_8UC1"};
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j]  ;
+
+            gen(src, size, size, all_type[j], 0, 256);
+
+            integral(src, sum);
+
+            CPU_ON;
+            integral(src, sum);
+            CPU_OFF;
+#ifdef USE_OPENCL
+            d_src.upload(src);
+
+            WARMUP_ON;
+            ocl::integral(d_src, d_sum);
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::integral(d_src, d_sum);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src.upload(src);
+            ocl::integral(d_src, d_sum);
+            d_sum.download(sum);
+            GPU_FULL_OFF;
+#endif
+        }
+
+    }
+}
+///////////// WarpAffine ////////////////////////
+TEST(WarpAffine)
+{
+    Mat src, dst;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src, d_dst;
+#endif
+    static const double coeffs[2][3] =
+    {
+        {cos(3.14 / 6), -sin(3.14 / 6), 100.0},
+        {sin(3.14 / 6), cos(3.14 / 6), -100.0}
+    };
+    Mat M(2, 3, CV_64F, (void *)coeffs);
+    int interpolation = INTER_NEAREST;
+
+    int all_type[] = {CV_8UC1, CV_8UC4};
+    std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
+
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+
+            gen(src, size, size, all_type[j], 0, 256);
+            gen(dst, size, size, all_type[j], 0, 256);
+            Size size1 = Size(size, size);
+
+            warpAffine(src, dst, M, size1, interpolation);
+
+            CPU_ON;
+            warpAffine(src, dst, M, size1, interpolation);
+            CPU_OFF;
+#ifdef USE_OPENCL
+            d_src.upload(src);
+
+            WARMUP_ON;
+            ocl::warpAffine(d_src, d_dst, M, size1, interpolation);
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::warpAffine(d_src, d_dst, M, size1, interpolation);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src.upload(src);
+            ocl::warpAffine(d_src, d_dst, M, size1, interpolation);
+            d_dst.download(dst);
+            GPU_FULL_OFF;
+#endif
+        }
+
+    }
+}
+///////////// WarpPerspective ////////////////////////
+TEST(WarpPerspective)
+{
+    Mat src, dst;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src, d_dst;
+#endif
+    static const double coeffs[3][3] =
+    {
+        {cos(3.14 / 6), -sin(3.14 / 6), 100.0},
+        {sin(3.14 / 6), cos(3.14 / 6), -100.0},
+        {0.0, 0.0, 1.0}
+    };
+    Mat M(3, 3, CV_64F, (void *)coeffs);
+    int interpolation = INTER_NEAREST;
+
+    int all_type[] = {CV_8UC1, CV_8UC4};
+    std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+
+            gen(src, size, size, all_type[j], 0, 256);
+            gen(dst, size, size, all_type[j], 0, 256);
+            Size size1 = Size(size, size);
+
+            warpPerspective(src, dst, M, size1, interpolation);
+
+            CPU_ON;
+            warpPerspective(src, dst, M, size1, interpolation);
+            CPU_OFF;
+#ifdef USE_OPENCL
+            d_src.upload(src);
+
+            WARMUP_ON;
+            ocl::warpPerspective(d_src, d_dst, M, size1, interpolation);
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::warpPerspective(d_src, d_dst, M, size1, interpolation);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src.upload(src);
+            ocl::warpPerspective(d_src, d_dst, M, size1, interpolation);
+            d_dst.download(dst);
+            GPU_FULL_OFF;
+#endif
+        }
+
+    }
+}
+
+///////////// resize ////////////////////////
+TEST(resize)
+{
+    Mat src, dst;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src, d_dst;
+#endif
+
+    int all_type[] = {CV_8UC1, CV_8UC4};
+    std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j] << " ; up";
+
+            gen(src, size, size, all_type[j], 0, 256);
+
+            resize(src, dst, Size(), 2.0, 2.0);
+
+            CPU_ON;
+            resize(src, dst, Size(), 2.0, 2.0);
+            CPU_OFF;
+#ifdef USE_OPENCL
+            d_src.upload(src);
+
+            WARMUP_ON;
+            ocl::resize(d_src, d_dst, Size(), 2.0, 2.0);
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::resize(d_src, d_dst, Size(), 2.0, 2.0);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src.upload(src);
+            ocl::resize(d_src, d_dst, Size(), 2.0, 2.0);
+            d_dst.download(dst);
+            GPU_FULL_OFF;
+#endif
+        }
+
+    }
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j] << " ; down";
+
+            gen(src, size, size, all_type[j], 0, 256);
+
+            resize(src, dst, Size(), 0.5, 0.5);
+
+            CPU_ON;
+            resize(src, dst, Size(), 0.5, 0.5);
+            CPU_OFF;
+#ifdef USE_OPENCL
+            d_src.upload(src);
+
+            WARMUP_ON;
+            ocl::resize(d_src, d_dst, Size(), 0.5, 0.5);
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::resize(d_src, d_dst, Size(), 0.5, 0.5);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src.upload(src);
+            ocl::resize(d_src, d_dst, Size(), 0.5, 0.5);
+            d_dst.download(dst);
+            GPU_FULL_OFF;
+#endif
+        }
+
+    }
+}
+///////////// threshold////////////////////////
+TEST(threshold)
+{
+    Mat src, dst;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src, d_dst;
+#endif
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        SUBTEST << size << 'x' << size << "; 8UC1; THRESH_BINARY";
+
+        gen(src, size, size, CV_8U, 0, 100);
+
+        threshold(src, dst, 50.0, 0.0, THRESH_BINARY);
+
+        CPU_ON;
+        threshold(src, dst, 50.0, 0.0, THRESH_BINARY);
+        CPU_OFF;
+#ifdef USE_OPENCL
+        d_src.upload(src);
+
+        WARMUP_ON;
+        ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY);
+        WARMUP_OFF;
+
+        GPU_ON;
+        ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY);
+        GPU_OFF;
+
+        GPU_FULL_ON;
+        d_src.upload(src);
+        ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY);
+        d_dst.download(dst);
+        GPU_FULL_OFF;
+#endif
+    }
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        SUBTEST << size << 'x' << size << "; 32FC1; THRESH_TRUNC [NPP]";
+
+        gen(src, size, size, CV_32FC1, 0, 100);
+
+        threshold(src, dst, 50.0, 0.0, THRESH_TRUNC);
+
+        CPU_ON;
+        threshold(src, dst, 50.0, 0.0, THRESH_TRUNC);
+        CPU_OFF;
+#ifdef USE_OPENCL
+        d_src.upload(src);
+
+        WARMUP_ON;
+        ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC);
+        WARMUP_OFF;
+
+        GPU_ON;
+        ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC);
+        GPU_OFF;
+
+        GPU_FULL_ON;
+        d_src.upload(src);
+        ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC);
+        d_dst.download(dst);
+        GPU_FULL_OFF;
+#endif
+    }
+}
+///////////// meanShiftFiltering////////////////////////
+TEST(meanShiftFiltering)
+{
+    int sp = 10, sr = 10;
+
+    Mat src, dst;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src, d_dst;
+#endif
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        SUBTEST << size << 'x' << size << "; 8UC3 vs 8UC4";
+
+        gen(src, size, size, CV_8UC3, Scalar::all(0), Scalar::all(256));
+
+        pyrMeanShiftFiltering(src, dst, sp, sr);
+
+        CPU_ON;
+        pyrMeanShiftFiltering(src, dst, sp, sr);
+        CPU_OFF;
+#ifdef USE_OPENCL
+        gen(src, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256));
+
+        d_src.upload(src);
+
+        WARMUP_ON;
+        ocl::meanShiftFiltering(d_src, d_dst, sp, sr);
+        WARMUP_OFF;
+
+        GPU_ON;
+        ocl::meanShiftFiltering(d_src, d_dst, sp, sr);
+        GPU_OFF;
+
+        GPU_FULL_ON;
+        d_src.upload(src);
+        ocl::meanShiftFiltering(d_src, d_dst, sp, sr);
+        d_dst.download(dst);
+        GPU_FULL_OFF;
+#endif
+    }
+}
+///////////// meanShiftProc////////////////////////
+COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size size, int sp, int sr, int maxIter, float eps, int *tab)
+{
+
+    int isr2 = sr * sr;
+    int c0, c1, c2, c3;
+    int iter;
+    uchar *ptr = NULL;
+    uchar *pstart = NULL;
+    int revx = 0, revy = 0;
+    c0 = sptr[0];
+    c1 = sptr[1];
+    c2 = sptr[2];
+    c3 = sptr[3];
+
+    // iterate meanshift procedure
+    for (iter = 0; iter < maxIter; iter++)
+    {
+        int count = 0;
+        int s0 = 0, s1 = 0, s2 = 0, sx = 0, sy = 0;
+
+        //mean shift: process pixels in window (p-sigmaSp)x(p+sigmaSp)
+        int minx = x0 - sp;
+        int miny = y0 - sp;
+        int maxx = x0 + sp;
+        int maxy = y0 + sp;
+
+        //deal with the image boundary
+        if (minx < 0)
+        {
+            minx = 0;
+        }
+
+        if (miny < 0)
+        {
+            miny = 0;
+        }
+
+        if (maxx >= size.width)
+        {
+            maxx = size.width - 1;
+        }
+
+        if (maxy >= size.height)
+        {
+            maxy = size.height - 1;
+        }
+
+        if (iter == 0)
+        {
+            pstart = sptr;
+        }
+        else
+        {
+            pstart = pstart + revy * sstep + (revx << 2); //point to the new position
+        }
+
+        ptr = pstart;
+        ptr = ptr + (miny - y0) * sstep + ((minx - x0) << 2); //point to the start in the row
+
+        for (int y = miny; y <= maxy; y++, ptr += sstep - ((maxx - minx + 1) << 2))
+        {
+            int rowCount = 0;
+            int x = minx;
+#if CV_ENABLE_UNROLLED
+
+            for (; x + 4 <= maxx; x += 4, ptr += 16)
+            {
+                int t0, t1, t2;
+                t0 = ptr[0], t1 = ptr[1], t2 = ptr[2];
+
+                if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
+                {
+                    s0 += t0;
+                    s1 += t1;
+                    s2 += t2;
+                    sx += x;
+                    rowCount++;
+                }
+
+                t0 = ptr[4], t1 = ptr[5], t2 = ptr[6];
+
+                if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
+                {
+                    s0 += t0;
+                    s1 += t1;
+                    s2 += t2;
+                    sx += x + 1;
+                    rowCount++;
+                }
+
+                t0 = ptr[8], t1 = ptr[9], t2 = ptr[10];
+
+                if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
+                {
+                    s0 += t0;
+                    s1 += t1;
+                    s2 += t2;
+                    sx += x + 2;
+                    rowCount++;
+                }
+
+                t0 = ptr[12], t1 = ptr[13], t2 = ptr[14];
+
+                if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
+                {
+                    s0 += t0;
+                    s1 += t1;
+                    s2 += t2;
+                    sx += x + 3;
+                    rowCount++;
+                }
+            }
+
+#endif
+
+            for (; x <= maxx; x++, ptr += 4)
+            {
+                int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2];
+
+                if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
+                {
+                    s0 += t0;
+                    s1 += t1;
+                    s2 += t2;
+                    sx += x;
+                    rowCount++;
+                }
+            }
+
+            if (rowCount == 0)
+            {
+                continue;
+            }
+
+            count += rowCount;
+            sy += y * rowCount;
+        }
+
+        if (count == 0)
+        {
+            break;
+        }
+
+        int x1 = sx / count;
+        int y1 = sy / count;
+        s0 = s0 / count;
+        s1 = s1 / count;
+        s2 = s2 / count;
+
+        bool stopFlag = (x0 == x1 && y0 == y1) || (abs(x1 - x0) + abs(y1 - y0) +
+                        tab[s0 - c0 + 255] + tab[s1 - c1 + 255] + tab[s2 - c2 + 255] <= eps);
+
+        //revise the pointer corresponding to the new (y0,x0)
+        revx = x1 - x0;
+        revy = y1 - y0;
+
+        x0 = x1;
+        y0 = y1;
+        c0 = s0;
+        c1 = s1;
+        c2 = s2;
+
+        if (stopFlag)
+        {
+            break;
+        }
+    } //for iter
+
+    dptr[0] = (uchar)c0;
+    dptr[1] = (uchar)c1;
+    dptr[2] = (uchar)c2;
+    dptr[3] = (uchar)c3;
+
+    COOR coor;
+    coor.x = static_cast<short>(x0);
+    coor.y = static_cast<short>(y0);
+    return coor;
+}
+
+void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi, int sp, int sr, cv::TermCriteria crit)
+{
+
+    if (src_roi.empty())
+    {
+        CV_Error(CV_StsBadArg, "The input image is empty");
+    }
+
+    if (src_roi.depth() != CV_8U || src_roi.channels() != 4)
+    {
+        CV_Error(CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported");
+    }
+
+    CV_Assert((src_roi.cols == dst_roi.cols) && (src_roi.rows == dst_roi.rows) &&
+              (src_roi.cols == dstCoor_roi.cols) && (src_roi.rows == dstCoor_roi.rows));
+    CV_Assert(!(dstCoor_roi.step & 0x3));
+
+    if (!(crit.type & cv::TermCriteria::MAX_ITER))
+    {
+        crit.maxCount = 5;
+    }
+
+    int maxIter = std::min(std::max(crit.maxCount, 1), 100);
+    float eps;
+
+    if (!(crit.type & cv::TermCriteria::EPS))
+    {
+        eps = 1.f;
+    }
+
+    eps = (float)std::max(crit.epsilon, 0.0);
+
+    int tab[512];
+
+    for (int i = 0; i < 512; i++)
+    {
+        tab[i] = (i - 255) * (i - 255);
+    }
+
+    uchar *sptr = src_roi.data;
+    uchar *dptr = dst_roi.data;
+    short *dCoorptr = (short *)dstCoor_roi.data;
+    int sstep = (int)src_roi.step;
+    int dstep = (int)dst_roi.step;
+    int dCoorstep = (int)dstCoor_roi.step >> 1;
+    cv::Size size = src_roi.size();
+
+    for (int i = 0; i < size.height; i++, sptr += sstep - (size.width << 2),
+            dptr += dstep - (size.width << 2), dCoorptr += dCoorstep - (size.width << 1))
+    {
+        for (int j = 0; j < size.width; j++, sptr += 4, dptr += 4, dCoorptr += 2)
+        {
+            *((COOR *)dCoorptr) = do_meanShift(j, i, sptr, dptr, sstep, size, sp, sr, maxIter, eps, tab);
+        }
+    }
+
+}
+TEST(meanShiftProc)
+{
+    Mat src, dst, dstCoor_roi;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src, d_dst, d_dstCoor_roi;
+#endif
+    TermCriteria crit(TermCriteria::COUNT + TermCriteria::EPS, 5, 1);
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        SUBTEST << size << 'x' << size << "; 8UC4 and CV_16SC2 ";
+
+        gen(src, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256));
+        gen(dst, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256));
+        gen(dstCoor_roi, size, size, CV_16SC2, Scalar::all(0), Scalar::all(256));
+
+        meanShiftProc_(src, dst, dstCoor_roi, 5, 6, crit);
+
+        CPU_ON;
+        meanShiftProc_(src, dst, dstCoor_roi, 5, 6, crit);
+        CPU_OFF;
+#ifdef USE_OPENCL
+        d_src.upload(src);
+
+        WARMUP_ON;
+        ocl::meanShiftProc(d_src, d_dst, d_dstCoor_roi, 5, 6, crit);
+        WARMUP_OFF;
+
+        GPU_ON;
+        ocl::meanShiftProc(d_src, d_dst, d_dstCoor_roi, 5, 6, crit);
+        GPU_OFF;
+
+        GPU_FULL_ON;
+        d_src.upload(src);
+        ocl::meanShiftProc(d_src, d_dst, d_dstCoor_roi, 5, 6, crit);
+        d_dst.download(dst);
+        d_dstCoor_roi.download(dstCoor_roi);
+        GPU_FULL_OFF;
+#endif
+    }
+}
+///////////// ConvertTo////////////////////////
+TEST(ConvertTo)
+{
+    Mat src, dst;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src, d_dst;
+#endif
+    int all_type[] = {CV_8UC1, CV_8UC4};
+    std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j] << " to 32FC1";
+
+            gen(src, size, size, all_type[j], 0, 256);
+            //gen(dst, size, size, all_type[j], 0, 256);
+
+            //d_dst.upload(dst);
+
+            src.convertTo(dst, CV_32FC1);
+
+            CPU_ON;
+            src.convertTo(dst, CV_32FC1);
+            CPU_OFF;
+#ifdef USE_OPENCL
+            d_src.upload(src);
+
+            WARMUP_ON;
+            d_src.convertTo(d_dst, CV_32FC1);
+            WARMUP_OFF;
+
+            GPU_ON;
+            d_src.convertTo(d_dst, CV_32FC1);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src.upload(src);
+            d_src.convertTo(d_dst, CV_32FC1);
+            d_dst.download(dst);
+            GPU_FULL_OFF;
+#endif
+        }
+
+    }
+}
+///////////// copyTo////////////////////////
+TEST(copyTo)
+{
+    Mat src, dst;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src, d_dst;
+#endif
+    int all_type[] = {CV_8UC1, CV_8UC4};
+    std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+
+            gen(src, size, size, all_type[j], 0, 256);
+            //gen(dst, size, size, all_type[j], 0, 256);
+
+            //d_dst.upload(dst);
+
+            src.copyTo(dst);
+
+            CPU_ON;
+            src.copyTo(dst);
+            CPU_OFF;
+
+#ifdef USE_OPENCL
+            d_src.upload(src);
+
+            WARMUP_ON;
+            d_src.copyTo(d_dst);
+            WARMUP_OFF;
+
+            GPU_ON;
+            d_src.copyTo(d_dst);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src.upload(src);
+            d_src.copyTo(d_dst);
+            d_dst.download(dst);
+            GPU_FULL_OFF;
+#endif
+        }
+
+    }
+}
+///////////// setTo////////////////////////
+TEST(setTo)
+{
+    Mat src, dst;
+    Scalar val(1, 2, 3, 4);
+#ifdef USE_OPENCL
+    ocl::oclMat d_src, d_dst;
+#endif
+    int all_type[] = {CV_8UC1, CV_8UC4};
+    std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+
+            gen(src, size, size, all_type[j], 0, 256);
+
+            src.setTo(val);
+
+            CPU_ON;
+            src.setTo(val);
+            CPU_OFF;
+#ifdef USE_OPENCL
+            d_src.upload(src);
+
+            WARMUP_ON;
+            d_src.setTo(val);
+            WARMUP_OFF;
+
+            GPU_ON;
+            d_src.setTo(val);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src.upload(src);
+            d_src.setTo(val);
+            GPU_FULL_OFF;
+#endif
+        }
+
+    }
+}
+///////////// Merge////////////////////////
+TEST(Merge)
+{
+    Mat dst;
+#ifdef USE_OPENCL
+    ocl::oclMat d_dst;
+#endif
+    int channels = 4;
+    int all_type[] = {CV_8UC1, CV_32FC1};
+    std::string type_name[] = {"CV_8UC1", "CV_32FC1"};
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+            Size size1 = Size(size, size);
+            std::vector<Mat> src(channels);
+
+            for (int i = 0; i < channels; ++i)
+            {
+                src[i] = Mat(size1, all_type[j], cv::Scalar::all(i));
+            }
+
+            merge(src, dst);
+
+            CPU_ON;
+            merge(src, dst);
+            CPU_OFF;
+
+#ifdef USE_OPENCL
+            std::vector<ocl::oclMat> d_src(channels);
+
+            for (int i = 0; i < channels; ++i)
+            {
+                d_src[i] = ocl::oclMat(size1, all_type[j], cv::Scalar::all(i));
+            }
+
+            WARMUP_ON;
+            ocl::merge(d_src, d_dst);
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::merge(d_src, d_dst);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+
+            for (int i = 0; i < channels; ++i)
+            {
+                d_src[i] = ocl::oclMat(size1, CV_8U, cv::Scalar::all(i));
+            }
+
+            ocl::merge(d_src, d_dst);
+            d_dst.download(dst);
+            GPU_FULL_OFF;
+#endif
+        }
+
+    }
+}
+
+///////////// Split////////////////////////
+TEST(Split)
+{
+    //int channels = 4;
+    int all_type[] = {CV_8UC1, CV_32FC1};
+    std::string type_name[] = {"CV_8UC1", "CV_32FC1"};
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j];
+            Size size1 = Size(size, size);
+
+            Mat src(size1, CV_MAKE_TYPE(all_type[j], 4), cv::Scalar(1, 2, 3, 4));
+
+            std::vector<cv::Mat> dst;
+
+            split(src, dst);
+
+            CPU_ON;
+            split(src, dst);
+            CPU_OFF;
+
+#ifdef USE_OPENCL
+            ocl::oclMat d_src(size1, CV_MAKE_TYPE(all_type[j], 4), cv::Scalar(1, 2, 3, 4));
+            std::vector<cv::ocl::oclMat> d_dst;
+
+            WARMUP_ON;
+            ocl::split(d_src, d_dst);
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::split(d_src, d_dst);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src.upload(src);
+            ocl::split(d_src, d_dst);
+            GPU_FULL_OFF;
+#endif
+        }
+
+    }
+}
+
+
+///////////// norm////////////////////////
+TEST(norm)
+{
+    Mat src, buf;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src, d_buf;
+#endif
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        SUBTEST << size << 'x' << size << "; CV_8UC1; NORM_INF";
+
+        gen(src, size, size, CV_8UC1, Scalar::all(0), Scalar::all(1));
+        gen(buf, size, size, CV_8UC1, Scalar::all(0), Scalar::all(1));
+
+        norm(src, NORM_INF);
+
+        CPU_ON;
+        norm(src, NORM_INF);
+        CPU_OFF;
+
+#ifdef USE_OPENCL
+        d_src.upload(src);
+        d_buf.upload(buf);
+
+        WARMUP_ON;
+        ocl::norm(d_src, d_buf, NORM_INF);
+        WARMUP_OFF;
+
+        GPU_ON;
+        ocl::norm(d_src, d_buf, NORM_INF);
+        GPU_OFF;
+
+        GPU_FULL_ON;
+        d_src.upload(src);
+        ocl::norm(d_src, d_buf, NORM_INF);
+        GPU_FULL_OFF;
+#endif
+    }
+}
+///////////// remap////////////////////////
+TEST(remap)
+{
+    Mat src, dst, xmap, ymap;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src, d_dst, d_xmap, d_ymap;
+#endif
+    int all_type[] = {CV_8UC1, CV_8UC4};
+    std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
+
+    int interpolation = INTER_LINEAR;
+    int borderMode = BORDER_CONSTANT;
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t t = 0; t < sizeof(all_type) / sizeof(int); t++)
+        {
+            SUBTEST << size << 'x' << size << "; src " << type_name[t] << "; map CV_32FC1";
+
+            gen(src, size, size, all_type[t], 0, 256);
+
+            xmap.create(size, size, CV_32FC1);
+            dst.create(size, size, CV_32FC1);
+            ymap.create(size, size, CV_32FC1);
+
+            for (int i = 0; i < size; ++i)
+            {
+                float *xmap_row = xmap.ptr<float>(i);
+                float *ymap_row = ymap.ptr<float>(i);
+
+                for (int j = 0; j < size; ++j)
+                {
+                    xmap_row[j] = (j - size * 0.5f) * 0.75f + size * 0.5f;
+                    ymap_row[j] = (i - size * 0.5f) * 0.75f + size * 0.5f;
+                }
+            }
+
+
+            remap(src, dst, xmap, ymap, interpolation, borderMode);
+
+            CPU_ON;
+            remap(src, dst, xmap, ymap, interpolation, borderMode);
+            CPU_OFF;
+
+#ifdef USE_OPENCL
+            d_src.upload(src);
+            d_dst.upload(dst);
+            d_xmap.upload(xmap);
+            d_ymap.upload(ymap);
+
+            WARMUP_ON;
+            ocl::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode);
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src.upload(src);
+            ocl::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode);
+            d_dst.download(dst);
+            GPU_FULL_OFF;
+#endif
+        }
+
+    }
+}
+///////////// cvtColor////////////////////////
+TEST(cvtColor)
+{
+    Mat src, dst;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src, d_dst;
+#endif
+    int all_type[] = {CV_8UC4};
+    std::string type_name[] = {"CV_8UC4"};
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            gen(src, size, size, all_type[j], 0, 256);
+            SUBTEST << size << "x" << size << "; " << type_name[j] << " ; CV_RGBA2GRAY";
+
+            cvtColor(src, dst, CV_RGBA2GRAY, 4);
+
+            CPU_ON;
+            cvtColor(src, dst, CV_RGBA2GRAY, 4);
+            CPU_OFF;
+
+#ifdef USE_OPENCL
+            d_src.upload(src);
+
+            WARMUP_ON;
+            ocl::cvtColor(d_src, d_dst, CV_RGBA2GRAY, 4);
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::cvtColor(d_src, d_dst, CV_RGBA2GRAY, 4);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src.upload(src);
+            ocl::cvtColor(d_src, d_dst, CV_RGBA2GRAY, 4);
+            d_dst.download(dst);
+            GPU_FULL_OFF;
+#endif
+        }
+
+
+    }
+
+
+}
+///////////// filter2D////////////////////////
+TEST(filter2D)
+{
+    Mat src;
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        int all_type[] = {CV_8UC1, CV_8UC4};
+        std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
+
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            gen(src, size, size, all_type[j], 0, 256);
+
+            for (int ksize = 3; ksize <= 15; ksize = 2*ksize+1)
+            {
+                SUBTEST << "ksize = " << ksize << "; " << size << 'x' << size << "; " << type_name[j] ;
+
+                Mat kernel;
+                gen(kernel, ksize, ksize, CV_32FC1, 0.0, 1.0);
+
+                Mat dst;
+                cv::filter2D(src, dst, -1, kernel);
+
+                CPU_ON;
+                cv::filter2D(src, dst, -1, kernel);
+                CPU_OFF;
+#ifdef USE_OPENCL
+                ocl::oclMat d_src(src);
+                ocl::oclMat d_dst;
+
+                WARMUP_ON;
+                ocl::filter2D(d_src, d_dst, -1, kernel);
+                WARMUP_OFF;
+
+                GPU_ON;
+                ocl::filter2D(d_src, d_dst, -1, kernel);
+                GPU_OFF;
+
+                GPU_FULL_ON;
+                d_src.upload(src);
+                ocl::filter2D(d_src, d_dst, -1, kernel);
+                d_dst.download(dst);
+                GPU_FULL_OFF;
+#endif
+            }
+
+        }
+
+
+    }
+}
+
+
+///////////// dft ////////////////////////
+TEST(dft)
+{
+    Mat src, dst;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src, d_dst;
+#endif
+
+    int all_type[] = {CV_32FC1, CV_32FC2};
+    std::string type_name[] = {"CV_32FC1", "CV_32FC2"};
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j] << " ; complex-to-complex";
+
+            gen(src, size, size, all_type[j], Scalar::all(0), Scalar::all(1));
+
+            dft(src, dst);
+
+            CPU_ON;
+            dft(src, dst);
+            CPU_OFF;
+
+#ifdef USE_OPENCL
+            d_src.upload(src);
+
+            WARMUP_ON;
+            ocl::dft(d_src, d_dst, Size(size, size));
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::dft(d_src, d_dst, Size(size, size));
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src.upload(src);
+            ocl::dft(d_src, d_dst, Size(size, size));
+            d_dst.download(dst);
+            GPU_FULL_OFF;
+#endif
+        }
+
+    }
+}
+
+///////////// gemm ////////////////////////
+TEST(gemm)
+{
+    Mat src1, src2, src3, dst;
+#ifdef USE_OPENCL
+    ocl::oclMat d_src1, d_src2, d_src3, d_dst;
+#endif
+
+    for (int size = 1000; size <= 4000; size *= 2)
+    {
+        SUBTEST << size << 'x' << size;
+
+        gen(src1, size, size, CV_32FC1, Scalar::all(-10), Scalar::all(10));
+        gen(src2, size, size, CV_32FC1, Scalar::all(-10), Scalar::all(10));
+        gen(src3, size, size, CV_32FC1, Scalar::all(-10), Scalar::all(10));
+
+        gemm(src1, src2, 1.0, src3, 1.0, dst);
+
+        CPU_ON;
+        gemm(src1, src2, 1.0, src3, 1.0, dst);
+        CPU_OFF;
+
+#ifdef USE_OPENCL
+        d_src1.upload(src1);
+        d_src2.upload(src2);
+        d_src3.upload(src3);
+
+        WARMUP_ON;
+        ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst);
+        WARMUP_OFF;
+
+        GPU_ON;
+        ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst);
+        GPU_OFF;
+
+        GPU_FULL_ON;
+        d_src1.upload(src1);
+        d_src2.upload(src2);
+        d_src3.upload(src3);
+        ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst);
+        d_dst.download(dst);
+        GPU_FULL_OFF;
+#endif
+    }
+}
+
+int main(int argc, const char *argv[])
+{
+#ifdef USE_OPENCL
+    vector<ocl::Info> oclinfo;
+    int num_devices = getDevice(oclinfo);
+
+    if (num_devices < 1)
+    {
+        cerr << "no device found\n";
+        return -1;
+    }
+
+    int devidx = 0;
+
+    for (size_t i = 0; i < oclinfo.size(); i++)
+    {
+        for (size_t j = 0; j < oclinfo[i].DeviceName.size(); j++)
+        {
+            printf("device %d: %s\n", devidx++, oclinfo[i].DeviceName[j].c_str());
+        }
+    }
+
+#endif
+    redirectError(cvErrorCallback);
+
+    const char *keys =
+        "{ h | help    | false | print help message }"
+        "{ f | filter  |       | filter for test }"
+        "{ w | workdir |       | set working directory }"
+        "{ l | list    | false | show all tests }"
+        "{ d | device  | 0     | device id }"
+        "{ i | iters   | 10    | iteration count }"
+        "{ m | warmup  | 1     | gpu warm up iteration count}"
+        "{ t | xtop    | 1.1     | xfactor top boundary}"
+        "{ b | xbottom | 0.9     | xfactor bottom boundary}"
+        "{ v | verify  | false | only run gpu once to verify if problems occur}";
+
+    CommandLineParser cmd(argc, argv, keys);
+
+    if (cmd.get<bool>("help"))
+    {
+        cout << "Avaible options:" << endl;
+        cmd.printParams();
+        return 0;
+    }
+
+#ifdef USE_OPENCL
+    int device = cmd.get<int>("device");
+
+    if (device < 0 || device >= num_devices)
+    {
+        cerr << "Invalid device ID" << endl;
+        return -1;
+    }
+
+    if (cmd.get<bool>("verify"))
+    {
+        TestSystem::instance().setNumIters(1);
+        TestSystem::instance().setGPUWarmupIters(0);
+        TestSystem::instance().setCPUIters(0);
+    }
+
+    devidx = 0;
+
+    for (size_t i = 0; i < oclinfo.size(); i++)
+    {
+        for (size_t j = 0; j < oclinfo[i].DeviceName.size(); j++, devidx++)
+        {
+            if (device == devidx)
+            {
+                ocl::setDevice(oclinfo[i], (int)j);
+                TestSystem::instance().setRecordName(oclinfo[i].DeviceName[j]);
+                printf("\nuse %d: %s\n", devidx, oclinfo[i].DeviceName[j].c_str());
+                goto END_DEV;
+            }
+        }
+    }
+
+END_DEV:
+
+#endif
+    string filter = cmd.get<string>("filter");
+    string workdir = cmd.get<string>("workdir");
+    bool list = cmd.get<bool>("list");
+    int iters = cmd.get<int>("iters");
+    int wu_iters = cmd.get<int>("warmup");
+    double x_top = cmd.get<double>("xtop");
+    double x_bottom = cmd.get<double>("xbottom");
+
+    TestSystem::instance().setTopThreshold(x_top);
+    TestSystem::instance().setBottomThreshold(x_bottom);
+
+    if (!filter.empty())
+    {
+        TestSystem::instance().setTestFilter(filter);
+    }
+
+    if (!workdir.empty())
+    {
+        if (workdir[workdir.size() - 1] != '/' && workdir[workdir.size() - 1] != '\\')
+        {
+            workdir += '/';
+        }
+
+        TestSystem::instance().setWorkingDir(workdir);
+    }
+
+    if (list)
+    {
+        TestSystem::instance().setListMode(true);
+    }
+
+    TestSystem::instance().setNumIters(iters);
+    TestSystem::instance().setGPUWarmupIters(wu_iters);
+
+    TestSystem::instance().run();
+
+    return 0;
+}