Merge pull request #929 from dominikrose:mingw-libdc1394-2-windows
authorRoman Donchenko <roman.donchenko@itseez.com>
Fri, 14 Jun 2013 11:46:23 +0000 (15:46 +0400)
committerOpenCV Buildbot <buildbot@opencv.org>
Fri, 14 Jun 2013 11:46:25 +0000 (15:46 +0400)
309 files changed:
.gitattributes
CMakeLists.txt
CONTRIBUTING.md [deleted file]
README
android/android.toolchain.cmake
android/readme.txt [new file with mode: 0644]
android/scripts/build.cmd [deleted file]
android/scripts/cmake_android.cmd [deleted file]
android/scripts/cmake_android_armeabi.sh [deleted file]
android/scripts/cmake_android_mips.sh [deleted file]
android/scripts/cmake_android_neon.sh [deleted file]
android/scripts/cmake_android_service.sh [deleted file]
android/scripts/cmake_android_x86.sh [deleted file]
android/scripts/wincfg.cmd.tmpl [deleted file]
android/service/doc/Makefile [deleted file]
apps/traincascade/boost.cpp
cmake/OpenCVCompilerOptions.cmake
cmake/OpenCVDetectCUDA.cmake
cmake/OpenCVFindLibsGUI.cmake
cmake/OpenCVFindXimea.cmake
cmake/OpenCVGenConfig.cmake
cmake/OpenCVModule.cmake
cmake/OpenCVUtils.cmake
doc/CMakeLists.txt
doc/conf.py
doc/tutorials/features2d/feature_flann_matcher/feature_flann_matcher.rst
index.rst
modules/CMakeLists.txt
modules/androidcamera/CMakeLists.txt
modules/calib3d/src/solvepnp.cpp
modules/calib3d/src/stereobm.cpp
modules/core/doc/basic_structures.rst
modules/core/include/opencv2/core/core.hpp
modules/features2d/doc/common_interfaces_of_descriptor_matchers.rst
modules/features2d/src/detectors.cpp
modules/gpu/CMakeLists.txt
modules/gpu/include/opencv2/gpu/device/detail/color_detail.hpp
modules/gpu/include/opencv2/gpu/device/functional.hpp
modules/gpu/include/opencv2/gpu/device/utility.hpp
modules/gpu/include/opencv2/gpu/device/vec_math.hpp
modules/gpu/perf/perf_filters.cpp
modules/gpu/perf/perf_video.cpp
modules/gpu/src/calib3d.cpp
modules/gpu/src/cascadeclassifier.cpp
modules/gpu/src/cuda/calib3d.cu
modules/gpu/src/cuda/canny.cu
modules/gpu/src/cuda/ccomponetns.cu
modules/gpu/src/cuda/element_operations.cu
modules/gpu/src/cuda/hough.cu
modules/gpu/src/error.cpp
modules/gpu/src/matrix_reductions.cpp
modules/gpu/src/precomp.hpp
modules/gpu/test/test_core.cpp
modules/gpu/test/test_optflow.cpp
modules/highgui/CMakeLists.txt
modules/highgui/src/cap_dshow.cpp
modules/highgui/src/cap_ximea.cpp
modules/highgui/src/window.cpp
modules/imgproc/doc/miscellaneous_transformations.rst
modules/imgproc/src/clahe.cpp [new file with mode: 0644]
modules/imgproc/src/color.cpp
modules/imgproc/src/distransform.cpp
modules/imgproc/src/histogram.cpp
modules/imgproc/src/morph.cpp
modules/java/generator/src/java/android+CameraBridgeViewBase.java
modules/java/generator/src/java/core+MatOfByte.java
modules/java/generator/src/java/core+MatOfDouble.java
modules/java/generator/src/java/core+MatOfFloat.java
modules/java/generator/src/java/core+MatOfFloat4.java
modules/java/generator/src/java/core+MatOfFloat6.java
modules/java/generator/src/java/core+MatOfInt.java
modules/java/generator/src/java/core+MatOfInt4.java
modules/java/generator/src/java/core+MatOfKeyPoint.java
modules/java/generator/src/java/core+MatOfPoint.java
modules/java/generator/src/java/core+MatOfPoint2f.java
modules/java/generator/src/java/core+MatOfPoint3.java
modules/java/generator/src/java/core+MatOfPoint3f.java
modules/java/generator/src/java/core+MatOfRect.java
modules/ml/src/ann_mlp.cpp
modules/ml/src/gbt.cpp
modules/ml/src/knearest.cpp
modules/ml/src/nbayes.cpp
modules/ml/src/svm.cpp
modules/nonfree/doc/background_subtraction.rst [deleted file]
modules/nonfree/doc/nonfree.rst
modules/nonfree/include/opencv2/nonfree/gpu.hpp
modules/nonfree/perf/perf_gpu.cpp
modules/nonfree/src/cuda/vibe.cu [deleted file]
modules/nonfree/src/sift.cpp
modules/nonfree/src/surf.cpp
modules/nonfree/src/surf.ocl.cpp
modules/nonfree/src/vibe_gpu.cpp [deleted file]
modules/nonfree/test/test_gpu.cpp
modules/objdetect/src/cascadedetect.cpp
modules/objdetect/src/latentsvm.cpp
modules/ocl/CMakeLists.txt
modules/ocl/include/opencv2/ocl/ocl.hpp
modules/ocl/include/opencv2/ocl/private/util.hpp
modules/ocl/perf/perf_arithm.cpp
modules/ocl/perf/perf_blend.cpp
modules/ocl/perf/perf_brute_force_matcher.cpp
modules/ocl/perf/perf_canny.cpp
modules/ocl/perf/perf_color.cpp
modules/ocl/perf/perf_columnsum.cpp
modules/ocl/perf/perf_fft.cpp
modules/ocl/perf/perf_filters.cpp
modules/ocl/perf/perf_gemm.cpp
modules/ocl/perf/perf_haar.cpp
modules/ocl/perf/perf_hog.cpp
modules/ocl/perf/perf_imgproc.cpp
modules/ocl/perf/perf_match_template.cpp
modules/ocl/perf/perf_matrix_operation.cpp
modules/ocl/perf/perf_norm.cpp
modules/ocl/perf/perf_opticalflow.cpp [moved from modules/ocl/perf/perf_pyrlk.cpp with 61% similarity]
modules/ocl/perf/perf_pyramid.cpp [moved from modules/ocl/perf/perf_pyrdown.cpp with 70% similarity]
modules/ocl/perf/perf_pyrup.cpp [deleted file]
modules/ocl/perf/perf_split_merge.cpp
modules/ocl/perf/precomp.cpp
modules/ocl/perf/precomp.hpp
modules/ocl/src/arithm.cpp
modules/ocl/src/brute_force_matcher.cpp
modules/ocl/src/canny.cpp
modules/ocl/src/filtering.cpp
modules/ocl/src/gfft.cpp [new file with mode: 0644]
modules/ocl/src/haar.cpp
modules/ocl/src/hog.cpp
modules/ocl/src/imgproc.cpp
modules/ocl/src/initialization.cpp
modules/ocl/src/mcwutil.cpp
modules/ocl/src/opencl/arithm_add.cl
modules/ocl/src/opencl/arithm_add_scalar_mask.cl
modules/ocl/src/opencl/arithm_mul.cl
modules/ocl/src/opencl/filtering_laplacian.cl
modules/ocl/src/opencl/filtering_morph.cl
modules/ocl/src/opencl/haarobjectdetect.cl
modules/ocl/src/opencl/haarobjectdetect_scaled2.cl
modules/ocl/src/opencl/imgproc_calcHarris.cl
modules/ocl/src/opencl/imgproc_calcMinEigenVal.cl
modules/ocl/src/opencl/imgproc_canny.cl
modules/ocl/src/opencl/imgproc_clahe.cl [new file with mode: 0644]
modules/ocl/src/opencl/imgproc_gfft.cl [new file with mode: 0644]
modules/ocl/src/opencl/imgproc_threshold.cl
modules/ocl/src/opencl/pyr_up.cl
modules/ocl/src/opencl/pyrlk.cl
modules/ocl/src/opencl/tvl1flow.cl [new file with mode: 0644]
modules/ocl/src/precomp.hpp
modules/ocl/src/pyrlk.cpp
modules/ocl/src/safe_call.hpp
modules/ocl/src/tvl1flow.cpp [new file with mode: 0644]
modules/ocl/test/test_canny.cpp
modules/ocl/test/test_gemm.cpp
modules/ocl/test/test_haar.cpp
modules/ocl/test/test_imgproc.cpp
modules/ocl/test/test_optflow.cpp [moved from modules/ocl/test/test_pyrlk.cpp with 54% similarity]
modules/photo/src/denoising.cpp
modules/photo/src/fast_nlmeans_denoising_invoker.hpp
modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp
modules/stitching/src/matchers.cpp
modules/stitching/src/motion_estimators.cpp
modules/ts/misc/run.py
modules/video/src/bgfg_gaussmix2.cpp
modules/video/src/lkpyramid.cpp
modules/video/src/lkpyramid.hpp
modules/video/src/video_init.cpp
modules/videostab/src/global_motion.cpp
platforms/android/README.android [moved from android/README.android with 100% similarity]
platforms/android/android.toolchain.cmake [new file with mode: 0644]
platforms/android/java.rst [moved from android/java.rst with 100% similarity]
platforms/android/libinfo/CMakeLists.txt [moved from android/libinfo/CMakeLists.txt with 100% similarity]
platforms/android/libinfo/info.c [moved from android/libinfo/info.c with 100% similarity]
platforms/android/package/AndroidManifest.xml [moved from android/package/AndroidManifest.xml with 100% similarity]
platforms/android/package/CMakeLists.txt [moved from android/package/CMakeLists.txt with 100% similarity]
platforms/android/package/res/drawable/icon.png [moved from android/service/engine/res/drawable/icon.png with 100% similarity]
platforms/android/package/res/values/strings.xml [moved from android/package/res/values/strings.xml with 100% similarity]
platforms/android/refman.rst [moved from android/refman.rst with 100% similarity]
platforms/android/service/CMakeLists.txt [moved from android/service/CMakeLists.txt with 100% similarity]
platforms/android/service/all.py [moved from android/service/all.py with 100% similarity]
platforms/android/service/device.conf [moved from android/service/device.conf with 100% similarity]
platforms/android/service/doc/AndroidAppUsageModel.dia [moved from android/service/doc/AndroidAppUsageModel.dia with 100% similarity]
platforms/android/service/doc/BaseLoaderCallback.rst [moved from android/service/doc/BaseLoaderCallback.rst with 100% similarity]
platforms/android/service/doc/InstallCallbackInterface.rst [moved from android/service/doc/InstallCallbackInterface.rst with 100% similarity]
platforms/android/service/doc/Intro.rst [moved from android/service/doc/Intro.rst with 100% similarity]
platforms/android/service/doc/JavaHelper.rst [moved from android/service/doc/JavaHelper.rst with 100% similarity]
platforms/android/service/doc/LibInstallAproved.dia [moved from android/service/doc/LibInstallAproved.dia with 100% similarity]
platforms/android/service/doc/LibInstallCanceled.dia [moved from android/service/doc/LibInstallCanceled.dia with 100% similarity]
platforms/android/service/doc/LibInstalled.dia [moved from android/service/doc/LibInstalled.dia with 100% similarity]
platforms/android/service/doc/LoaderCallbackInterface.rst [moved from android/service/doc/LoaderCallbackInterface.rst with 100% similarity]
platforms/android/service/doc/NoService.dia [moved from android/service/doc/NoService.dia with 100% similarity]
platforms/android/service/doc/Structure.dia [moved from android/service/doc/Structure.dia with 100% similarity]
platforms/android/service/doc/UseCases.rst [moved from android/service/doc/UseCases.rst with 100% similarity]
platforms/android/service/doc/build_uml.py [moved from android/service/doc/build_uml.py with 100% similarity]
platforms/android/service/doc/img/AndroidAppUsageModel.png [moved from android/service/doc/img/AndroidAppUsageModel.png with 100% similarity]
platforms/android/service/doc/img/LibInstallAproved.png [moved from android/service/doc/img/LibInstallAproved.png with 100% similarity]
platforms/android/service/doc/img/LibInstallCanceled.png [moved from android/service/doc/img/LibInstallCanceled.png with 100% similarity]
platforms/android/service/doc/img/LibInstalled.png [moved from android/service/doc/img/LibInstalled.png with 100% similarity]
platforms/android/service/doc/img/NoService.png [moved from android/service/doc/img/NoService.png with 100% similarity]
platforms/android/service/doc/img/Structure.png [moved from android/service/doc/img/Structure.png with 100% similarity]
platforms/android/service/doc/index.rst [moved from android/service/doc/index.rst with 100% similarity]
platforms/android/service/engine/.classpath [moved from android/service/engine/.classpath with 100% similarity]
platforms/android/service/engine/.project [moved from android/service/engine/.project with 100% similarity]
platforms/android/service/engine/AndroidManifest.xml [moved from android/service/engine/AndroidManifest.xml with 100% similarity]
platforms/android/service/engine/CMakeLists.txt [moved from android/service/engine/CMakeLists.txt with 97% similarity]
platforms/android/service/engine/build.xml [moved from android/service/engine/build.xml with 100% similarity]
platforms/android/service/engine/jni/Android.mk [moved from android/service/engine/jni/Android.mk with 100% similarity]
platforms/android/service/engine/jni/Application.mk [moved from android/service/engine/jni/Application.mk with 100% similarity]
platforms/android/service/engine/jni/BinderComponent/BnOpenCVEngine.cpp [moved from android/service/engine/jni/BinderComponent/BnOpenCVEngine.cpp with 100% similarity]
platforms/android/service/engine/jni/BinderComponent/BnOpenCVEngine.h [moved from android/service/engine/jni/BinderComponent/BnOpenCVEngine.h with 100% similarity]
platforms/android/service/engine/jni/BinderComponent/BpOpenCVEngine.cpp [moved from android/service/engine/jni/BinderComponent/BpOpenCVEngine.cpp with 100% similarity]
platforms/android/service/engine/jni/BinderComponent/BpOpenCVEngine.h [moved from android/service/engine/jni/BinderComponent/BpOpenCVEngine.h with 100% similarity]
platforms/android/service/engine/jni/BinderComponent/HardwareDetector.cpp [moved from android/service/engine/jni/BinderComponent/HardwareDetector.cpp with 100% similarity]
platforms/android/service/engine/jni/BinderComponent/HardwareDetector.h [moved from android/service/engine/jni/BinderComponent/HardwareDetector.h with 100% similarity]
platforms/android/service/engine/jni/BinderComponent/OpenCVEngine.cpp [moved from android/service/engine/jni/BinderComponent/OpenCVEngine.cpp with 100% similarity]
platforms/android/service/engine/jni/BinderComponent/OpenCVEngine.h [moved from android/service/engine/jni/BinderComponent/OpenCVEngine.h with 100% similarity]
platforms/android/service/engine/jni/BinderComponent/ProcReader.cpp [moved from android/service/engine/jni/BinderComponent/ProcReader.cpp with 100% similarity]
platforms/android/service/engine/jni/BinderComponent/ProcReader.h [moved from android/service/engine/jni/BinderComponent/ProcReader.h with 100% similarity]
platforms/android/service/engine/jni/BinderComponent/StringUtils.cpp [moved from android/service/engine/jni/BinderComponent/StringUtils.cpp with 100% similarity]
platforms/android/service/engine/jni/BinderComponent/StringUtils.h [moved from android/service/engine/jni/BinderComponent/StringUtils.h with 100% similarity]
platforms/android/service/engine/jni/BinderComponent/TegraDetector.cpp [moved from android/service/engine/jni/BinderComponent/TegraDetector.cpp with 100% similarity]
platforms/android/service/engine/jni/BinderComponent/TegraDetector.h [moved from android/service/engine/jni/BinderComponent/TegraDetector.h with 100% similarity]
platforms/android/service/engine/jni/JNIWrapper/HardwareDetector_jni.cpp [moved from android/service/engine/jni/JNIWrapper/HardwareDetector_jni.cpp with 100% similarity]
platforms/android/service/engine/jni/JNIWrapper/HardwareDetector_jni.h [moved from android/service/engine/jni/JNIWrapper/HardwareDetector_jni.h with 100% similarity]
platforms/android/service/engine/jni/JNIWrapper/JavaBasedPackageManager.cpp [moved from android/service/engine/jni/JNIWrapper/JavaBasedPackageManager.cpp with 100% similarity]
platforms/android/service/engine/jni/JNIWrapper/JavaBasedPackageManager.h [moved from android/service/engine/jni/JNIWrapper/JavaBasedPackageManager.h with 100% similarity]
platforms/android/service/engine/jni/JNIWrapper/OpenCVEngine_jni.cpp [moved from android/service/engine/jni/JNIWrapper/OpenCVEngine_jni.cpp with 100% similarity]
platforms/android/service/engine/jni/JNIWrapper/OpenCVEngine_jni.h [moved from android/service/engine/jni/JNIWrapper/OpenCVEngine_jni.h with 100% similarity]
platforms/android/service/engine/jni/JNIWrapper/OpenCVLibraryInfo.cpp [moved from android/service/engine/jni/JNIWrapper/OpenCVLibraryInfo.cpp with 100% similarity]
platforms/android/service/engine/jni/JNIWrapper/OpenCVLibraryInfo.h [moved from android/service/engine/jni/JNIWrapper/OpenCVLibraryInfo.h with 100% similarity]
platforms/android/service/engine/jni/NativeClient/ClientMain.cpp [moved from android/service/engine/jni/NativeClient/ClientMain.cpp with 100% similarity]
platforms/android/service/engine/jni/NativeService/CommonPackageManager.cpp [moved from android/service/engine/jni/NativeService/CommonPackageManager.cpp with 100% similarity]
platforms/android/service/engine/jni/NativeService/CommonPackageManager.h [moved from android/service/engine/jni/NativeService/CommonPackageManager.h with 100% similarity]
platforms/android/service/engine/jni/NativeService/NativePackageManager.cpp [moved from android/service/engine/jni/NativeService/NativePackageManager.cpp with 100% similarity]
platforms/android/service/engine/jni/NativeService/NativePackageManager.h [moved from android/service/engine/jni/NativeService/NativePackageManager.h with 100% similarity]
platforms/android/service/engine/jni/NativeService/PackageInfo.cpp [moved from android/service/engine/jni/NativeService/PackageInfo.cpp with 100% similarity]
platforms/android/service/engine/jni/NativeService/PackageInfo.h [moved from android/service/engine/jni/NativeService/PackageInfo.h with 100% similarity]
platforms/android/service/engine/jni/NativeService/ServiceMain.cpp [moved from android/service/engine/jni/NativeService/ServiceMain.cpp with 100% similarity]
platforms/android/service/engine/jni/Tests/HardwareDetectionTest.cpp [moved from android/service/engine/jni/Tests/HardwareDetectionTest.cpp with 100% similarity]
platforms/android/service/engine/jni/Tests/OpenCVEngineTest.cpp [moved from android/service/engine/jni/Tests/OpenCVEngineTest.cpp with 100% similarity]
platforms/android/service/engine/jni/Tests/PackageInfoTest.cpp [moved from android/service/engine/jni/Tests/PackageInfoTest.cpp with 99% similarity]
platforms/android/service/engine/jni/Tests/PackageManagerStub.cpp [moved from android/service/engine/jni/Tests/PackageManagerStub.cpp with 100% similarity]
platforms/android/service/engine/jni/Tests/PackageManagerStub.h [moved from android/service/engine/jni/Tests/PackageManagerStub.h with 100% similarity]
platforms/android/service/engine/jni/Tests/PackageManagmentTest.cpp [moved from android/service/engine/jni/Tests/PackageManagmentTest.cpp with 99% similarity]
platforms/android/service/engine/jni/Tests/TestMain.cpp [moved from android/service/engine/jni/Tests/TestMain.cpp with 100% similarity]
platforms/android/service/engine/jni/Tests/Tests.mk [moved from android/service/engine/jni/Tests/Tests.mk with 100% similarity]
platforms/android/service/engine/jni/Tests/gtest/gtest-all.cpp [moved from android/service/engine/jni/Tests/gtest/gtest-all.cpp with 100% similarity]
platforms/android/service/engine/jni/Tests/gtest/gtest.h [moved from android/service/engine/jni/Tests/gtest/gtest.h with 100% similarity]
platforms/android/service/engine/jni/include/EngineCommon.h [moved from android/service/engine/jni/include/EngineCommon.h with 100% similarity]
platforms/android/service/engine/jni/include/IOpenCVEngine.h [moved from android/service/engine/jni/include/IOpenCVEngine.h with 100% similarity]
platforms/android/service/engine/jni/include/IPackageManager.h [moved from android/service/engine/jni/include/IPackageManager.h with 100% similarity]
platforms/android/service/engine/jni/include/OpenCVEngineHelper.h [moved from android/service/engine/jni/include/OpenCVEngineHelper.h with 100% similarity]
platforms/android/service/engine/project.properties [moved from android/service/engine_test/project.properties with 100% similarity]
platforms/android/service/engine/res/drawable/icon.png [moved from android/package/res/drawable/icon.png with 100% similarity]
platforms/android/service/engine/res/layout-small/info.xml [moved from android/service/engine/res/layout-small/info.xml with 100% similarity]
platforms/android/service/engine/res/layout-small/main.xml [moved from android/service/engine/res/layout-small/main.xml with 100% similarity]
platforms/android/service/engine/res/layout/info.xml [moved from android/service/engine/res/layout/info.xml with 100% similarity]
platforms/android/service/engine/res/layout/main.xml [moved from android/service/engine/res/layout/main.xml with 100% similarity]
platforms/android/service/engine/res/values/strings.xml [moved from android/service/engine/res/values/strings.xml with 100% similarity]
platforms/android/service/engine/src/org/opencv/engine/BinderConnector.java [moved from android/service/engine/src/org/opencv/engine/BinderConnector.java with 100% similarity]
platforms/android/service/engine/src/org/opencv/engine/HardwareDetector.java [moved from android/service/engine/src/org/opencv/engine/HardwareDetector.java with 100% similarity]
platforms/android/service/engine/src/org/opencv/engine/MarketConnector.java [moved from android/service/engine/src/org/opencv/engine/MarketConnector.java with 100% similarity]
platforms/android/service/engine/src/org/opencv/engine/OpenCVEngineInterface.aidl [moved from android/service/engine/src/org/opencv/engine/OpenCVEngineInterface.aidl with 100% similarity]
platforms/android/service/engine/src/org/opencv/engine/OpenCVEngineService.java [moved from android/service/engine/src/org/opencv/engine/OpenCVEngineService.java with 100% similarity]
platforms/android/service/engine/src/org/opencv/engine/OpenCVLibraryInfo.java [moved from android/service/engine/src/org/opencv/engine/OpenCVLibraryInfo.java with 100% similarity]
platforms/android/service/engine/src/org/opencv/engine/manager/ManagerActivity.java [moved from android/service/engine/src/org/opencv/engine/manager/ManagerActivity.java with 100% similarity]
platforms/android/service/engine/src/org/opencv/engine/manager/PackageListAdapter.java [moved from android/service/engine/src/org/opencv/engine/manager/PackageListAdapter.java with 100% similarity]
platforms/android/service/engine_test/.classpath [moved from android/service/engine_test/.classpath with 100% similarity]
platforms/android/service/engine_test/.project [moved from android/service/engine_test/.project with 100% similarity]
platforms/android/service/engine_test/AndroidManifest.xml [moved from android/service/engine_test/AndroidManifest.xml with 100% similarity]
platforms/android/service/engine_test/build.xml [moved from android/service/engine_test/build.xml with 100% similarity]
platforms/android/service/engine_test/project.properties [moved from android/service/engine/project.properties with 100% similarity]
platforms/android/service/engine_test/res/drawable-hdpi/ic_launcher.png [moved from android/service/engine_test/res/drawable-hdpi/ic_launcher.png with 100% similarity]
platforms/android/service/engine_test/res/drawable-ldpi/ic_launcher.png [moved from android/service/engine_test/res/drawable-ldpi/ic_launcher.png with 100% similarity]
platforms/android/service/engine_test/res/drawable-mdpi/ic_launcher.png [moved from android/service/engine_test/res/drawable-mdpi/ic_launcher.png with 100% similarity]
platforms/android/service/engine_test/res/layout/main.xml [moved from android/service/engine_test/res/layout/main.xml with 100% similarity]
platforms/android/service/engine_test/res/values/strings.xml [moved from android/service/engine_test/res/values/strings.xml with 100% similarity]
platforms/android/service/engine_test/src/org/opencv/engine/test/EngineInterfaceTest.java [moved from android/service/engine_test/src/org/opencv/engine/test/EngineInterfaceTest.java with 100% similarity]
platforms/android/service/push_native.py [moved from android/service/push_native.py with 100% similarity]
platforms/android/service/readme.txt [moved from android/service/readme.txt with 100% similarity]
platforms/android/service/test_native.py [moved from android/service/test_native.py with 99% similarity]
platforms/linux/scripts/cmake_arm_gnueabi_hardfp.sh [deleted file]
platforms/linux/scripts/cmake_arm_gnueabi_softfp.sh [deleted file]
platforms/scripts/ABI_compat_generator.py [moved from android/scripts/ABI_compat_generator.py with 98% similarity]
platforms/scripts/camera_build.conf [moved from android/scripts/camera_build.conf with 100% similarity]
platforms/scripts/cmake_android_all_cameras.py [moved from android/scripts/cmake_android_all_cameras.py with 90% similarity]
platforms/scripts/cmake_android_arm.sh [moved from android/scripts/cmake_android.sh with 50% similarity]
platforms/scripts/cmake_android_mips.sh [new file with mode: 0755]
platforms/scripts/cmake_android_service.sh [new file with mode: 0755]
platforms/scripts/cmake_android_x86.sh [new file with mode: 0755]
platforms/scripts/cmake_arm_gnueabi_hardfp.sh [new file with mode: 0755]
platforms/scripts/cmake_arm_gnueabi_softfp.sh [new file with mode: 0755]
platforms/scripts/cmake_carma.sh [moved from platforms/linux/scripts/cmake_carma.sh with 100% similarity]
platforms/scripts/cmake_winrt.cmd [moved from platforms/winrt/scripts/cmake_winrt.cmd with 100% similarity]
samples/android/face-detection/src/org/opencv/samples/facedetect/FdActivity.java
samples/android/native-activity/.cproject
samples/android/native-activity/.project
samples/android/native-activity/jni/native.cpp
samples/cpp/tutorial_code/features2D/SURF_FlannMatcher.cpp
samples/gpu/bgfg_segm.cpp
samples/gpu/cascadeclassifier_nvidia_api.cpp
samples/gpu/driver_api_multi.cpp
samples/gpu/driver_api_stereo_multi.cpp
samples/ocl/aloe-L.png [deleted file]
samples/ocl/aloe-R.png [deleted file]
samples/ocl/aloe-disp.png [deleted file]
samples/ocl/facedetect.cpp
samples/ocl/hog.cpp
samples/ocl/pyrlk_optical_flow.cpp [new file with mode: 0644]
samples/ocl/stereo_match.cpp [new file with mode: 0644]
samples/ocl/surf_matcher.cpp
samples/python2/grabcut.py [new file with mode: 0644]

index af704cd..cd4359b 100644 (file)
@@ -33,7 +33,7 @@
 CMakeLists.txt  text whitespace=tabwidth=2
 
 *.png       binary
-*.jepg      binary
+*.jpeg      binary
 *.jpg       binary
 *.exr       binary
 *.ico       binary
index e941f59..f464b22 100644 (file)
@@ -299,6 +299,10 @@ set(OPENCV_CONFIG_FILE_INCLUDE_DIR "${CMAKE_BINARY_DIR}/" CACHE PATH "Where to c
 add_definitions(-DHAVE_CVCONFIG_H)
 ocv_include_directories(${OPENCV_CONFIG_FILE_INCLUDE_DIR})
 
+# ----------------------------------------------------------------------------
+#  Path for additional modules
+# ----------------------------------------------------------------------------
+set(OPENCV_EXTRA_MODULES_PATH "" CACHE PATH "Where to look for additional OpenCV modules")
 
 # ----------------------------------------------------------------------------
 #  Autodetect if we are in a GIT repository
@@ -415,7 +419,7 @@ if(ANDROID)
   if(NOT ANDROID_TOOLS_Pkg_Revision GREATER 13)
     message(WARNING "OpenCV requires Android SDK tools revision 14 or newer. Otherwise tests and samples will no be compiled.")
   endif()
-elseif(ANT_EXECUTABLE)
+else()
   find_package(JNI)
 endif()
 
@@ -469,15 +473,15 @@ if(BUILD_EXAMPLES OR BUILD_ANDROID_EXAMPLES OR INSTALL_PYTHON_EXAMPLES)
 endif()
 
 if(ANDROID)
-  add_subdirectory(android/service)
+  add_subdirectory(platforms/android/service)
 endif()
 
 if(BUILD_ANDROID_PACKAGE)
-  add_subdirectory(android/package)
+  add_subdirectory(platforms/android/package)
 endif()
 
 if (ANDROID)
-  add_subdirectory(android/libinfo)
+  add_subdirectory(platforms/android/libinfo)
 endif()
 
 # ----------------------------------------------------------------------------
@@ -843,7 +847,7 @@ status("    ant:"           ANT_EXECUTABLE      THEN "${ANT_EXECUTABLE} (ver ${A
 if(NOT ANDROID)
   status("    JNI:"         JNI_INCLUDE_DIRS    THEN "${JNI_INCLUDE_DIRS}"                                       ELSE NO)
 endif()
-status("    Java tests:"    BUILD_TESTS AND (NOT ANDROID OR CAN_BUILD_ANDROID_PROJECTS)                 THEN YES ELSE NO)
+status("    Java tests:"    BUILD_TESTS AND (CAN_BUILD_ANDROID_PROJECTS OR HAVE_opencv_java)            THEN YES ELSE NO)
 
 # ========================== documentation ==========================
 if(BUILD_DOCS)
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
deleted file mode 100644 (file)
index 8fc54b1..0000000
+++ /dev/null
@@ -1,11 +0,0 @@
-We greatly appreciate your support and contributions and they are always welcomed!
-
-Github pull requests are the convenient way to contribute to OpenCV project. Good pull requests have all of these attributes:
-
-* Are scoped to one specific issue
-* Include a test to demonstrate the correctness
-* Update the docs if relevant
-* Match the [coding style guidelines](http://code.opencv.org/projects/opencv/wiki/CodingStyleGuide)
-* Don't messed by "oops" commits
-
-You can find more detailes about contributing process on http://opencv.org/contribute.html
\ No newline at end of file
diff --git a/README b/README
index 9dd45a2..0799dff 100644 (file)
--- a/README
+++ b/README
@@ -4,3 +4,14 @@ Homepage:    http://opencv.org
 Online docs: http://docs.opencv.org
 Q&A forum:   http://answers.opencv.org
 Dev zone:    http://code.opencv.org
+
+Please read before starting work on a pull request:
+  http://code.opencv.org/projects/opencv/wiki/How_to_contribute
+
+Summary of guidelines:
+
+* One pull request per issue;
+* Choose the right base branch;
+* Include tests and documentation;
+* Clean up "oops" commits before submitting;
+* Follow the coding style guide.
index 0f7e340..9db174a 100644 (file)
@@ -1,3 +1,6 @@
+message(STATUS "Android toolchain was moved to platfroms/android!")
+message(STATUS "This file is depricated and will be removed!")
+
 # Copyright (c) 2010-2011, Ethan Rublee
 # Copyright (c) 2011-2013, Andrey Kamaev
 # All rights reserved.
 #   - March 2013
 #     [+] updated for NDK r8e (x86 version)
 #     [+] support x86_64 version of NDK
+#   - April 2013
+#     [+] support non-release NDK layouts (from Linaro git and Android git)
+#     [~] automatically detect if explicit link to crtbegin_*.o is needed
 # ------------------------------------------------------------------------------
 
 cmake_minimum_required( VERSION 2.6.3 )
@@ -516,24 +522,19 @@ if( NOT ANDROID_NDK )
   endif( ANDROID_NDK )
  endif( NOT ANDROID_STANDALONE_TOOLCHAIN )
 endif( NOT ANDROID_NDK )
+
 # remember found paths
 if( ANDROID_NDK )
  get_filename_component( ANDROID_NDK "${ANDROID_NDK}" ABSOLUTE )
- # try to detect change
- if( CMAKE_AR )
-  string( LENGTH "${ANDROID_NDK}" __length )
-  string( SUBSTRING "${CMAKE_AR}" 0 ${__length} __androidNdkPreviousPath )
-  if( NOT __androidNdkPreviousPath STREQUAL ANDROID_NDK )
-   message( FATAL_ERROR "It is not possible to change the path to the NDK on subsequent CMake run. You must remove all generated files from your build folder first.
-   " )
-  endif()
-  unset( __androidNdkPreviousPath )
-  unset( __length )
- endif()
  set( ANDROID_NDK "${ANDROID_NDK}" CACHE INTERNAL "Path of the Android NDK" FORCE )
  set( BUILD_WITH_ANDROID_NDK True )
- file( STRINGS "${ANDROID_NDK}/RELEASE.TXT" ANDROID_NDK_RELEASE_FULL LIMIT_COUNT 1 REGEX r[0-9]+[a-z]? )
- string( REGEX MATCH r[0-9]+[a-z]? ANDROID_NDK_RELEASE "${ANDROID_NDK_RELEASE_FULL}" )
+ if( EXISTS "${ANDROID_NDK}/RELEASE.TXT" )
+  file( STRINGS "${ANDROID_NDK}/RELEASE.TXT" ANDROID_NDK_RELEASE_FULL LIMIT_COUNT 1 REGEX r[0-9]+[a-z]? )
+  string( REGEX MATCH r[0-9]+[a-z]? ANDROID_NDK_RELEASE "${ANDROID_NDK_RELEASE_FULL}" )
+ else()
+  set( ANDROID_NDK_RELEASE "r1x" )
+  set( ANDROID_NDK_RELEASE_FULL "unreleased" )
+ endif()
 elseif( ANDROID_STANDALONE_TOOLCHAIN )
  get_filename_component( ANDROID_STANDALONE_TOOLCHAIN "${ANDROID_STANDALONE_TOOLCHAIN}" ABSOLUTE )
  # try to detect change
@@ -560,6 +561,51 @@ else()
       sudo ln -s ~/my-android-toolchain ${ANDROID_STANDALONE_TOOLCHAIN_SEARCH_PATH}" )
 endif()
 
+# android NDK layout
+if( BUILD_WITH_ANDROID_NDK )
+ if( NOT DEFINED ANDROID_NDK_LAYOUT )
+  # try to automatically detect the layout
+  if( EXISTS "${ANDROID_NDK}/RELEASE.TXT")
+   set( ANDROID_NDK_LAYOUT "RELEASE" )
+  elseif( EXISTS "${ANDROID_NDK}/../../linux-x86/toolchain/" )
+   set( ANDROID_NDK_LAYOUT "LINARO" )
+  elseif( EXISTS "${ANDROID_NDK}/../../gcc/" )
+   set( ANDROID_NDK_LAYOUT "ANDROID" )
+  endif()
+ endif()
+ set( ANDROID_NDK_LAYOUT "${ANDROID_NDK_LAYOUT}" CACHE STRING "The inner layout of NDK" )
+ mark_as_advanced( ANDROID_NDK_LAYOUT )
+ if( ANDROID_NDK_LAYOUT STREQUAL "LINARO" )
+  set( ANDROID_NDK_HOST_SYSTEM_NAME ${ANDROID_NDK_HOST_SYSTEM_NAME2} ) # only 32-bit at the moment
+  set( ANDROID_NDK_TOOLCHAINS_PATH "${ANDROID_NDK}/../../${ANDROID_NDK_HOST_SYSTEM_NAME}/toolchain" )
+  set( ANDROID_NDK_TOOLCHAINS_SUBPATH  "" )
+  set( ANDROID_NDK_TOOLCHAINS_SUBPATH2 "" )
+ elseif( ANDROID_NDK_LAYOUT STREQUAL "ANDROID" )
+  set( ANDROID_NDK_HOST_SYSTEM_NAME ${ANDROID_NDK_HOST_SYSTEM_NAME2} ) # only 32-bit at the moment
+  set( ANDROID_NDK_TOOLCHAINS_PATH "${ANDROID_NDK}/../../gcc/${ANDROID_NDK_HOST_SYSTEM_NAME}/arm" )
+  set( ANDROID_NDK_TOOLCHAINS_SUBPATH  "" )
+  set( ANDROID_NDK_TOOLCHAINS_SUBPATH2 "" )
+ else() # ANDROID_NDK_LAYOUT STREQUAL "RELEASE"
+  set( ANDROID_NDK_TOOLCHAINS_PATH "${ANDROID_NDK}/toolchains" )
+  set( ANDROID_NDK_TOOLCHAINS_SUBPATH  "/prebuilt/${ANDROID_NDK_HOST_SYSTEM_NAME}" )
+  set( ANDROID_NDK_TOOLCHAINS_SUBPATH2 "/prebuilt/${ANDROID_NDK_HOST_SYSTEM_NAME2}" )
+ endif()
+ get_filename_component( ANDROID_NDK_TOOLCHAINS_PATH "${ANDROID_NDK_TOOLCHAINS_PATH}" ABSOLUTE )
+
+ # try to detect change of NDK
+ if( CMAKE_AR )
+  string( LENGTH "${ANDROID_NDK_TOOLCHAINS_PATH}" __length )
+  string( SUBSTRING "${CMAKE_AR}" 0 ${__length} __androidNdkPreviousPath )
+  if( NOT __androidNdkPreviousPath STREQUAL ANDROID_NDK_TOOLCHAINS_PATH )
+   message( FATAL_ERROR "It is not possible to change the path to the NDK on subsequent CMake run. You must remove all generated files from your build folder first.
+   " )
+  endif()
+  unset( __androidNdkPreviousPath )
+  unset( __length )
+ endif()
+endif()
+
+
 # get all the details about standalone toolchain
 if( BUILD_WITH_STANDALONE_TOOLCHAIN )
  __DETECT_NATIVE_API_LEVEL( ANDROID_SUPPORTED_NATIVE_API_LEVELS "${ANDROID_STANDALONE_TOOLCHAIN}/sysroot/usr/include/android/api-level.h" )
@@ -587,17 +633,23 @@ if( BUILD_WITH_STANDALONE_TOOLCHAIN )
  endif()
 endif()
 
-macro( __GLOB_NDK_TOOLCHAINS __availableToolchainsVar __availableToolchainsLst __host_system_name )
+macro( __GLOB_NDK_TOOLCHAINS __availableToolchainsVar __availableToolchainsLst __toolchain_subpath )
  foreach( __toolchain ${${__availableToolchainsLst}} )
-  if( "${__toolchain}" MATCHES "-clang3[.][0-9]$" AND NOT EXISTS "${ANDROID_NDK}/toolchains/${__toolchain}/prebuilt/" )
+  if( "${__toolchain}" MATCHES "-clang3[.][0-9]$" AND NOT EXISTS "${ANDROID_NDK_TOOLCHAINS_PATH}/${__toolchain}${__toolchain_subpath}" )
    string( REGEX REPLACE "-clang3[.][0-9]$" "-4.6" __gcc_toolchain "${__toolchain}" )
   else()
    set( __gcc_toolchain "${__toolchain}" )
   endif()
-  __DETECT_TOOLCHAIN_MACHINE_NAME( __machine "${ANDROID_NDK}/toolchains/${__gcc_toolchain}/prebuilt/${__host_system_name}" )
+  __DETECT_TOOLCHAIN_MACHINE_NAME( __machine "${ANDROID_NDK_TOOLCHAINS_PATH}/${__gcc_toolchain}${__toolchain_subpath}" )
   if( __machine )
-   string( REGEX MATCH "[0-9]+[.][0-9]+([.][0-9]+)?$" __version "${__gcc_toolchain}" )
-   string( REGEX MATCH "^[^-]+" __arch "${__gcc_toolchain}" )
+   string( REGEX MATCH "[0-9]+[.][0-9]+([.][0-9x]+)?$" __version "${__gcc_toolchain}" )
+   if( __machine MATCHES i686 )
+    set( __arch "x86" )
+   elseif( __machine MATCHES arm )
+    set( __arch "arm" )
+   elseif( __machine MATCHES mipsel )
+    set( __arch "mipsel" )
+   endif()
    list( APPEND __availableToolchainMachines "${__machine}" )
    list( APPEND __availableToolchainArchs "${__arch}" )
    list( APPEND __availableToolchainCompilerVersions "${__version}" )
@@ -615,29 +667,29 @@ if( BUILD_WITH_ANDROID_NDK )
  set( __availableToolchainMachines "" )
  set( __availableToolchainArchs "" )
  set( __availableToolchainCompilerVersions "" )
- if( ANDROID_TOOLCHAIN_NAME AND EXISTS "${ANDROID_NDK}/toolchains/${ANDROID_TOOLCHAIN_NAME}/" )
+ if( ANDROID_TOOLCHAIN_NAME AND EXISTS "${ANDROID_NDK_TOOLCHAINS_PATH}/${ANDROID_TOOLCHAIN_NAME}/" )
   # do not go through all toolchains if we know the name
   set( __availableToolchainsLst "${ANDROID_TOOLCHAIN_NAME}" )
-  __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst ${ANDROID_NDK_HOST_SYSTEM_NAME} )
-  if( NOT __availableToolchains AND NOT ANDROID_NDK_HOST_SYSTEM_NAME STREQUAL ANDROID_NDK_HOST_SYSTEM_NAME2 )
-   __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst ${ANDROID_NDK_HOST_SYSTEM_NAME2} )
+  __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst "${ANDROID_NDK_TOOLCHAINS_SUBPATH}" )
+  if( NOT __availableToolchains AND NOT ANDROID_NDK_TOOLCHAINS_SUBPATH STREQUAL ANDROID_NDK_TOOLCHAINS_SUBPATH2 )
+   __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst "${ANDROID_NDK_TOOLCHAINS_SUBPATH2}" )
    if( __availableToolchains )
-    set( ANDROID_NDK_HOST_SYSTEM_NAME ${ANDROID_NDK_HOST_SYSTEM_NAME2} )
+    set( ANDROID_NDK_TOOLCHAINS_SUBPATH ${ANDROID_NDK_TOOLCHAINS_SUBPATH2} )
    endif()
   endif()
  endif()
  if( NOT __availableToolchains )
-  file( GLOB __availableToolchainsLst RELATIVE "${ANDROID_NDK}/toolchains" "${ANDROID_NDK}/toolchains/*" )
+  file( GLOB __availableToolchainsLst RELATIVE "${ANDROID_NDK_TOOLCHAINS_PATH}" "${ANDROID_NDK_TOOLCHAINS_PATH}/*" )
   if( __availableToolchains )
    list(SORT __availableToolchainsLst) # we need clang to go after gcc
   endif()
   __LIST_FILTER( __availableToolchainsLst "^[.]" )
   __LIST_FILTER( __availableToolchainsLst "llvm" )
-  __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst ${ANDROID_NDK_HOST_SYSTEM_NAME} )
-  if( NOT __availableToolchains AND NOT ANDROID_NDK_HOST_SYSTEM_NAME STREQUAL ANDROID_NDK_HOST_SYSTEM_NAME2 )
-   __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst ${ANDROID_NDK_HOST_SYSTEM_NAME2} )
+  __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst "${ANDROID_NDK_TOOLCHAINS_SUBPATH}" )
+  if( NOT __availableToolchains AND NOT ANDROID_NDK_TOOLCHAINS_SUBPATH STREQUAL ANDROID_NDK_TOOLCHAINS_SUBPATH2 )
+   __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst "${ANDROID_NDK_TOOLCHAINS_SUBPATH2}" )
    if( __availableToolchains )
-    set( ANDROID_NDK_HOST_SYSTEM_NAME ${ANDROID_NDK_HOST_SYSTEM_NAME2} )
+    set( ANDROID_NDK_TOOLCHAINS_SUBPATH ${ANDROID_NDK_TOOLCHAINS_SUBPATH2} )
    endif()
   endif()
  endif()
@@ -768,6 +820,7 @@ else()
   list( GET __availableToolchainArchs ${__idx} __toolchainArch )
   if( __toolchainArch STREQUAL ANDROID_ARCH_FULLNAME )
    list( GET __availableToolchainCompilerVersions ${__idx} __toolchainVersion )
+   string( REPLACE "x" "99" __toolchainVersion "${__toolchainVersion}")
    if( __toolchainVersion VERSION_GREATER __toolchainMaxVersion )
     set( __toolchainMaxVersion "${__toolchainVersion}" )
     set( __toolchainIdx ${__idx} )
@@ -971,11 +1024,11 @@ if( "${ANDROID_TOOLCHAIN_NAME}" STREQUAL "standalone-clang" )
 elseif( "${ANDROID_TOOLCHAIN_NAME}" MATCHES "-clang3[.][0-9]?$" )
  string( REGEX MATCH "3[.][0-9]$" ANDROID_CLANG_VERSION "${ANDROID_TOOLCHAIN_NAME}")
  string( REGEX REPLACE "-clang${ANDROID_CLANG_VERSION}$" "-4.6" ANDROID_GCC_TOOLCHAIN_NAME "${ANDROID_TOOLCHAIN_NAME}" )
- if( NOT EXISTS "${ANDROID_NDK}/toolchains/llvm-${ANDROID_CLANG_VERSION}/prebuilt/${ANDROID_NDK_HOST_SYSTEM_NAME}/bin/clang${TOOL_OS_SUFFIX}" )
+ if( NOT EXISTS "${ANDROID_NDK_TOOLCHAINS_PATH}/llvm-${ANDROID_CLANG_VERSION}${ANDROID_NDK_TOOLCHAINS_SUBPATH}/bin/clang${TOOL_OS_SUFFIX}" )
   message( FATAL_ERROR "Could not find the Clang compiler driver" )
  endif()
  set( ANDROID_COMPILER_IS_CLANG 1 )
- set( ANDROID_CLANG_TOOLCHAIN_ROOT "${ANDROID_NDK}/toolchains/llvm-${ANDROID_CLANG_VERSION}/prebuilt/${ANDROID_NDK_HOST_SYSTEM_NAME}" )
+ set( ANDROID_CLANG_TOOLCHAIN_ROOT "${ANDROID_NDK_TOOLCHAINS_PATH}/llvm-${ANDROID_CLANG_VERSION}${ANDROID_NDK_TOOLCHAINS_SUBPATH}" )
 else()
  set( ANDROID_GCC_TOOLCHAIN_NAME "${ANDROID_TOOLCHAIN_NAME}" )
  unset( ANDROID_COMPILER_IS_CLANG CACHE )
@@ -989,7 +1042,7 @@ endif()
 
 # setup paths and STL for NDK
 if( BUILD_WITH_ANDROID_NDK )
- set( ANDROID_TOOLCHAIN_ROOT "${ANDROID_NDK}/toolchains/${ANDROID_GCC_TOOLCHAIN_NAME}/prebuilt/${ANDROID_NDK_HOST_SYSTEM_NAME}" )
+ set( ANDROID_TOOLCHAIN_ROOT "${ANDROID_NDK_TOOLCHAINS_PATH}/${ANDROID_GCC_TOOLCHAIN_NAME}${ANDROID_NDK_TOOLCHAINS_SUBPATH}" )
  set( ANDROID_SYSROOT "${ANDROID_NDK}/platforms/android-${ANDROID_NATIVE_API_LEVEL}/arch-${ANDROID_ARCH_NAME}" )
 
  if( ANDROID_STL STREQUAL "none" )
@@ -1048,11 +1101,11 @@ if( BUILD_WITH_ANDROID_NDK )
  endif()
  # find libsupc++.a - rtti & exceptions
  if( ANDROID_STL STREQUAL "system_re" OR ANDROID_STL MATCHES "gnustl" )
-  if( ANDROID_NDK_RELEASE STRGREATER "r8" ) # r8b
-   set( __libsupcxx "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/${ANDROID_COMPILER_VERSION}/libs/${ANDROID_NDK_ABI_NAME}/libsupc++.a" )
-  elseif( NOT ANDROID_NDK_RELEASE STRLESS "r7" AND ANDROID_NDK_RELEASE STRLESS "r8b")
-   set( __libsupcxx "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/libs/${ANDROID_NDK_ABI_NAME}/libsupc++.a" )
-  else( ANDROID_NDK_RELEASE STRLESS "r7" )
+  set( __libsupcxx "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/${ANDROID_COMPILER_VERSION}/libs/${ANDROID_NDK_ABI_NAME}/libsupc++.a" ) # r8b or newer
+  if( NOT EXISTS "${__libsupcxx}" )
+   set( __libsupcxx "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/libs/${ANDROID_NDK_ABI_NAME}/libsupc++.a" ) # r7-r8
+  endif()
+  if( NOT EXISTS "${__libsupcxx}" ) # before r7
    if( ARMEABI_V7A )
     if( ANDROID_FORCE_ARM_BUILD )
      set( __libsupcxx "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/libsupc++.a" )
@@ -1102,7 +1155,7 @@ unset( _ndk_ccache )
 
 # setup the cross-compiler
 if( NOT CMAKE_C_COMPILER )
- if( NDK_CCACHE )
+ if( NDK_CCACHE AND NOT ANDROID_SYSROOT MATCHES "[ ;\"]" )
   set( CMAKE_C_COMPILER   "${NDK_CCACHE}" CACHE PATH "ccache as C compiler" )
   set( CMAKE_CXX_COMPILER "${NDK_CCACHE}" CACHE PATH "ccache as C++ compiler" )
   if( ANDROID_COMPILER_IS_CLANG )
@@ -1174,11 +1227,25 @@ set( CMAKE_ASM_SOURCE_FILE_EXTENSIONS s S asm )
 remove_definitions( -DANDROID )
 add_definitions( -DANDROID )
 
-if(ANDROID_SYSROOT MATCHES "[ ;\"]")
- set( ANDROID_CXX_FLAGS "--sysroot=\"${ANDROID_SYSROOT}\"" )
+if( ANDROID_SYSROOT MATCHES "[ ;\"]" )
+ if( CMAKE_HOST_WIN32 )
+  # try to convert path to 8.3 form
+  file( WRITE "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/cvt83.cmd" "@echo %~s1" )
+  execute_process( COMMAND "$ENV{ComSpec}" /c "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/cvt83.cmd" "${ANDROID_SYSROOT}"
+                   OUTPUT_VARIABLE __path OUTPUT_STRIP_TRAILING_WHITESPACE
+                   RESULT_VARIABLE __result ERROR_QUIET )
+  if( __result EQUAL 0 )
+   file( TO_CMAKE_PATH "${__path}" ANDROID_SYSROOT )
+   set( ANDROID_CXX_FLAGS "--sysroot=${ANDROID_SYSROOT}" )
+  else()
+   set( ANDROID_CXX_FLAGS "--sysroot=\"${ANDROID_SYSROOT}\"" )
+  endif()
+ else()
+  set( ANDROID_CXX_FLAGS "'--sysroot=${ANDROID_SYSROOT}'" )
+ endif()
  if( NOT _CMAKE_IN_TRY_COMPILE )
-  # quotes will break try_compile and compiler identification
-  message(WARNING "Your Android system root has non-alphanumeric symbols. It can break compiler features detection and the whole build.")
+  # quotes can break try_compile and compiler identification
+  message(WARNING "Path to your Android NDK (or toolchain) has non-alphanumeric symbols.\nThe build might be broken.\n")
  endif()
 else()
  set( ANDROID_CXX_FLAGS "--sysroot=${ANDROID_SYSROOT}" )
@@ -1249,22 +1316,18 @@ elseif( ARMEABI )
  set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -march=armv5te -mtune=xscale -msoft-float" )
 endif()
 
+if( ANDROID_STL MATCHES "gnustl" AND (EXISTS "${__libstl}" OR EXISTS "${__libsupcxx}") )
+ set( CMAKE_CXX_CREATE_SHARED_LIBRARY "<CMAKE_C_COMPILER> <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>" )
+ set( CMAKE_CXX_CREATE_SHARED_MODULE  "<CMAKE_C_COMPILER> <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>" )
+ set( CMAKE_CXX_LINK_EXECUTABLE       "<CMAKE_C_COMPILER> <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>" )
+else()
+ set( CMAKE_CXX_CREATE_SHARED_LIBRARY "<CMAKE_CXX_COMPILER> <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>" )
+ set( CMAKE_CXX_CREATE_SHARED_MODULE  "<CMAKE_CXX_COMPILER> <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>" )
+ set( CMAKE_CXX_LINK_EXECUTABLE       "<CMAKE_CXX_COMPILER> <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>" )
+endif()
+
 # STL
 if( EXISTS "${__libstl}" OR EXISTS "${__libsupcxx}" )
- if( ANDROID_STL MATCHES "gnustl" )
-  set( CMAKE_CXX_CREATE_SHARED_LIBRARY "<CMAKE_C_COMPILER> <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>" )
-  set( CMAKE_CXX_CREATE_SHARED_MODULE  "<CMAKE_C_COMPILER> <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>" )
-  set( CMAKE_CXX_LINK_EXECUTABLE       "<CMAKE_C_COMPILER> <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>" )
- else()
-  set( CMAKE_CXX_CREATE_SHARED_LIBRARY "<CMAKE_CXX_COMPILER> <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>" )
-  set( CMAKE_CXX_CREATE_SHARED_MODULE  "<CMAKE_CXX_COMPILER> <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>" )
-  set( CMAKE_CXX_LINK_EXECUTABLE       "<CMAKE_CXX_COMPILER> <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>" )
- endif()
- if ( X86 AND ANDROID_STL MATCHES "gnustl" AND ANDROID_NDK_RELEASE STREQUAL "r6" )
-  # workaround "undefined reference to `__dso_handle'" problem
-  set( CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_LIBRARY} \"${ANDROID_SYSROOT}/usr/lib/crtbegin_so.o\"" )
-  set( CMAKE_CXX_CREATE_SHARED_MODULE  "${CMAKE_CXX_CREATE_SHARED_MODULE} \"${ANDROID_SYSROOT}/usr/lib/crtbegin_so.o\"" )
- endif()
  if( EXISTS "${__libstl}" )
   set( CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_LIBRARY} \"${__libstl}\"" )
   set( CMAKE_CXX_CREATE_SHARED_MODULE  "${CMAKE_CXX_CREATE_SHARED_MODULE} \"${__libstl}\"" )
@@ -1283,9 +1346,12 @@ if( EXISTS "${__libstl}" OR EXISTS "${__libsupcxx}" )
   set( CMAKE_C_LINK_EXECUTABLE       "${CMAKE_C_LINK_EXECUTABLE} \"${__libsupcxx}\"" )
  endif()
  if( ANDROID_STL MATCHES "gnustl" )
-  set( CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_LIBRARY} -lm" )
-  set( CMAKE_CXX_CREATE_SHARED_MODULE  "${CMAKE_CXX_CREATE_SHARED_MODULE} -lm" )
-  set( CMAKE_CXX_LINK_EXECUTABLE       "${CMAKE_CXX_LINK_EXECUTABLE} -lm" )
+  if( NOT EXISTS "${ANDROID_LIBM_PATH}" )
+   set( ANDROID_LIBM_PATH -lm )
+  endif()
+  set( CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_LIBRARY} ${ANDROID_LIBM_PATH}" )
+  set( CMAKE_CXX_CREATE_SHARED_MODULE  "${CMAKE_CXX_CREATE_SHARED_MODULE} ${ANDROID_LIBM_PATH}" )
+  set( CMAKE_CXX_LINK_EXECUTABLE       "${CMAKE_CXX_LINK_EXECUTABLE} ${ANDROID_LIBM_PATH}" )
  endif()
 endif()
 
@@ -1321,7 +1387,14 @@ if( ARMEABI_V7A )
 endif()
 
 if( ANDROID_NO_UNDEFINED )
- set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,--no-undefined" )
+ if( MIPS )
+  # there is some sysroot-related problem in mips linker...
+  if( NOT ANDROID_SYSROOT MATCHES "[ ;\"]" )
+   set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,--no-undefined -Wl,-rpath-link,${ANDROID_SYSROOT}/usr/lib" )
+  endif()
+ else()
+  set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,--no-undefined" )
+ endif()
 endif()
 
 if( ANDROID_SO_UNDEFINED )
@@ -1401,9 +1474,9 @@ set( CMAKE_MODULE_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} ${CMAKE_MODULE_LINKER_FL
 set( CMAKE_EXE_LINKER_FLAGS    "${ANDROID_LINKER_FLAGS} ${CMAKE_EXE_LINKER_FLAGS}" )
 
 if( MIPS AND BUILD_WITH_ANDROID_NDK AND ANDROID_NDK_RELEASE STREQUAL "r8" )
- set( CMAKE_SHARED_LINKER_FLAGS "-Wl,-T,${ANDROID_NDK}/toolchains/${ANDROID_GCC_TOOLCHAIN_NAME}/mipself.xsc ${CMAKE_SHARED_LINKER_FLAGS}" )
- set( CMAKE_MODULE_LINKER_FLAGS "-Wl,-T,${ANDROID_NDK}/toolchains/${ANDROID_GCC_TOOLCHAIN_NAME}/mipself.xsc ${CMAKE_MODULE_LINKER_FLAGS}" )
- set( CMAKE_EXE_LINKER_FLAGS    "-Wl,-T,${ANDROID_NDK}/toolchains/${ANDROID_GCC_TOOLCHAIN_NAME}/mipself.x ${CMAKE_EXE_LINKER_FLAGS}" )
+ set( CMAKE_SHARED_LINKER_FLAGS "-Wl,-T,${ANDROID_NDK_TOOLCHAINS_PATH}/${ANDROID_GCC_TOOLCHAIN_NAME}/mipself.xsc ${CMAKE_SHARED_LINKER_FLAGS}" )
+ set( CMAKE_MODULE_LINKER_FLAGS "-Wl,-T,${ANDROID_NDK_TOOLCHAINS_PATH}/${ANDROID_GCC_TOOLCHAIN_NAME}/mipself.xsc ${CMAKE_MODULE_LINKER_FLAGS}" )
+ set( CMAKE_EXE_LINKER_FLAGS    "-Wl,-T,${ANDROID_NDK_TOOLCHAINS_PATH}/${ANDROID_GCC_TOOLCHAIN_NAME}/mipself.x ${CMAKE_EXE_LINKER_FLAGS}" )
 endif()
 
 # configure rtti
@@ -1430,6 +1503,43 @@ endif()
 include_directories( SYSTEM "${ANDROID_SYSROOT}/usr/include" ${ANDROID_STL_INCLUDE_DIRS} )
 link_directories( "${CMAKE_INSTALL_PREFIX}/libs/${ANDROID_NDK_ABI_NAME}" )
 
+# detect if need link crtbegin_so.o explicitly
+if( NOT DEFINED ANDROID_EXPLICIT_CRT_LINK )
+ set( __cmd "${CMAKE_CXX_CREATE_SHARED_LIBRARY}" )
+ string( REPLACE "<CMAKE_CXX_COMPILER>" "${CMAKE_CXX_COMPILER} ${CMAKE_CXX_COMPILER_ARG1}" __cmd "${__cmd}" )
+ string( REPLACE "<CMAKE_C_COMPILER>"   "${CMAKE_C_COMPILER} ${CMAKE_C_COMPILER_ARG1}"   __cmd "${__cmd}" )
+ string( REPLACE "<CMAKE_SHARED_LIBRARY_CXX_FLAGS>" "${CMAKE_CXX_FLAGS}" __cmd "${__cmd}" )
+ string( REPLACE "<LANGUAGE_COMPILE_FLAGS>" "" __cmd "${__cmd}" )
+ string( REPLACE "<LINK_FLAGS>" "${CMAKE_SHARED_LINKER_FLAGS}" __cmd "${__cmd}" )
+ string( REPLACE "<CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS>" "-shared" __cmd "${__cmd}" )
+ string( REPLACE "<CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG>" "" __cmd "${__cmd}" )
+ string( REPLACE "<TARGET_SONAME>" "" __cmd "${__cmd}" )
+ string( REPLACE "<TARGET>" "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/toolchain_crtlink_test.so" __cmd "${__cmd}" )
+ string( REPLACE "<OBJECTS>" "\"${ANDROID_SYSROOT}/usr/lib/crtbegin_so.o\"" __cmd "${__cmd}" )
+ string( REPLACE "<LINK_LIBRARIES>" "" __cmd "${__cmd}" )
+ separate_arguments( __cmd )
+ foreach( __var ANDROID_NDK ANDROID_NDK_TOOLCHAINS_PATH ANDROID_STANDALONE_TOOLCHAIN )
+  if( ${__var} )
+   set( __tmp "${${__var}}" )
+   separate_arguments( __tmp )
+   string( REPLACE "${__tmp}" "${${__var}}" __cmd "${__cmd}")
+  endif()
+ endforeach()
+ string( REPLACE "'" "" __cmd "${__cmd}" )
+ string( REPLACE "\"" "" __cmd "${__cmd}" )
+ execute_process( COMMAND ${__cmd} RESULT_VARIABLE __cmd_result OUTPUT_QUIET ERROR_QUIET )
+ if( __cmd_result EQUAL 0 )
+  set( ANDROID_EXPLICIT_CRT_LINK ON )
+ else()
+  set( ANDROID_EXPLICIT_CRT_LINK OFF )
+ endif()
+endif()
+
+if( ANDROID_EXPLICIT_CRT_LINK )
+ set( CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_LIBRARY} \"${ANDROID_SYSROOT}/usr/lib/crtbegin_so.o\"" )
+ set( CMAKE_CXX_CREATE_SHARED_MODULE  "${CMAKE_CXX_CREATE_SHARED_MODULE} \"${ANDROID_SYSROOT}/usr/lib/crtbegin_so.o\"" )
+endif()
+
 # setup output directories
 set( LIBRARY_OUTPUT_PATH_ROOT ${CMAKE_SOURCE_DIR} CACHE PATH "root for library output, set this to change where android libs are installed to" )
 set( CMAKE_INSTALL_PREFIX "${ANDROID_TOOLCHAIN_ROOT}/user" CACHE STRING "path for installing" )
@@ -1521,6 +1631,7 @@ if( NOT PROJECT_NAME STREQUAL "CMAKE_TRY_COMPILE" )
  foreach( __var NDK_CCACHE  LIBRARY_OUTPUT_PATH_ROOT  ANDROID_FORBID_SYGWIN  ANDROID_SET_OBSOLETE_VARIABLES
                 ANDROID_NDK_HOST_X64
                 ANDROID_NDK
+                ANDROID_NDK_LAYOUT
                 ANDROID_STANDALONE_TOOLCHAIN
                 ANDROID_TOOLCHAIN_NAME
                 ANDROID_ABI
@@ -1534,6 +1645,8 @@ if( NOT PROJECT_NAME STREQUAL "CMAKE_TRY_COMPILE" )
                 ANDROID_GOLD_LINKER
                 ANDROID_NOEXECSTACK
                 ANDROID_RELRO
+                ANDROID_LIBM_PATH
+                ANDROID_EXPLICIT_CRT_LINK
                 )
   if( DEFINED ${__var} )
    if( "${__var}" MATCHES " ")
@@ -1577,6 +1690,7 @@ endif()
 #   ANDROID_STANDALONE_TOOLCHAIN
 #   ANDROID_TOOLCHAIN_NAME : the NDK name of compiler toolchain
 #   ANDROID_NDK_HOST_X64 : try to use x86_64 toolchain (default for x64 host systems)
+#   ANDROID_NDK_LAYOUT : the inner NDK structure (RELEASE, LINARO, ANDROID)
 #   LIBRARY_OUTPUT_PATH_ROOT : <any valid path>
 #   NDK_CCACHE : <path to your ccache executable>
 # Obsolete:
@@ -1622,6 +1736,7 @@ endif()
 #   ANDROID_EXCEPTIONS : if exceptions are enabled by the runtime
 #   ANDROID_GCC_TOOLCHAIN_NAME : read-only, differs from ANDROID_TOOLCHAIN_NAME only if clang is used
 #   ANDROID_CLANG_VERSION : version of clang compiler if clang is used
+#   ANDROID_LIBM_PATH : path to libm.so (set to something like $(TOP)/out/target/product/<product_name>/obj/lib/libm.so) to workaround unresolved `sincos`
 #
 # Defaults:
 #   ANDROID_DEFAULT_NDK_API_LEVEL
diff --git a/android/readme.txt b/android/readme.txt
new file mode 100644 (file)
index 0000000..2d5f396
--- /dev/null
@@ -0,0 +1 @@
+All Android specific sources are moved to platforms/android.
\ No newline at end of file
diff --git a/android/scripts/build.cmd b/android/scripts/build.cmd
deleted file mode 100644 (file)
index 3e0f166..0000000
+++ /dev/null
@@ -1,90 +0,0 @@
-@ECHO OFF
-
-:: enable command extensions
-VERIFY BADVALUE 2>NUL
-SETLOCAL ENABLEEXTENSIONS || (ECHO Unable to enable command extensions. & EXIT \B)
-
-:: build environment
-SET SOURCE_DIR=%cd%
-IF EXIST .\android.toolchain.cmake (SET BUILD_OPENCV=1) ELSE (SET BUILD_OPENCV=0)
-IF EXIST .\jni\nul (SET BUILD_JAVA_PART=1) ELSE (SET BUILD_JAVA_PART=0)
-
-:: load configuration
-PUSHD %~dp0
-SET SCRIPTS_DIR=%cd%
-IF EXIST .\wincfg.cmd CALL .\wincfg.cmd
-POPD
-
-:: inherit old names
-IF NOT DEFINED CMAKE SET CMAKE=%CMAKE_EXE%
-IF NOT DEFINED MAKE SET MAKE=%MAKE_EXE%
-
-:: defaults
-IF NOT DEFINED BUILD_DIR SET BUILD_DIR=build
-IF NOT DEFINED ANDROID_ABI SET ANDROID_ABI=armeabi-v7a
-SET OPENCV_BUILD_DIR=%SCRIPTS_DIR%\..\%BUILD_DIR%
-
-:: check that all required variables defined
-PUSHD .
-IF NOT DEFINED ANDROID_NDK (ECHO. & ECHO You should set an environment variable ANDROID_NDK to the full path to your copy of Android NDK & GOTO end)
-(CD "%ANDROID_NDK%") || (ECHO. & ECHO Directory "%ANDROID_NDK%" specified by ANDROID_NDK variable does not exist & GOTO end)
-
-IF NOT EXIST "%CMAKE%" (ECHO. & ECHO You should set an environment variable CMAKE to the full path to cmake executable & GOTO end)
-IF NOT EXIST "%MAKE%" (ECHO. & ECHO You should set an environment variable MAKE to the full path to native port of make executable & GOTO end)
-
-IF NOT %BUILD_JAVA_PART%==1 GOTO required_variables_checked
-
-IF NOT DEFINED ANDROID_SDK (ECHO. & ECHO You should set an environment variable ANDROID_SDK to the full path to your copy of Android SDK & GOTO end)
-(CD "%ANDROID_SDK%" 2>NUL) || (ECHO. & ECHO Directory "%ANDROID_SDK%" specified by ANDROID_SDK variable does not exist & GOTO end)
-
-IF NOT DEFINED ANT_DIR (ECHO. & ECHO You should set an environment variable ANT_DIR to the full path to Apache Ant root & GOTO end)
-(CD "%ANT_DIR%" 2>NUL) || (ECHO. & ECHO Directory "%ANT_DIR%" specified by ANT_DIR variable does not exist & GOTO end)
-
-IF NOT DEFINED JAVA_HOME (ECHO. & ECHO You should set an environment variable JAVA_HOME to the full path to JDK & GOTO end)
-(CD "%JAVA_HOME%" 2>NUL) || (ECHO. & ECHO Directory "%JAVA_HOME%" specified by JAVA_HOME variable does not exist & GOTO end)
-
-:required_variables_checked
-POPD
-
-:: check for ninja
-echo "%MAKE%"|findstr /i ninja >nul:
-IF %errorlevel%==1 (SET BUILD_WITH_NINJA=0) ELSE (SET BUILD_WITH_NINJA=1)
-IF %BUILD_WITH_NINJA%==1 (SET CMAKE_GENERATOR=Ninja) ELSE (SET CMAKE_GENERATOR=MinGW Makefiles)
-
-:: create build dir
-IF DEFINED REBUILD rmdir /S /Q "%BUILD_DIR%" 2>NUL
-MKDIR "%BUILD_DIR%" 2>NUL
-PUSHD "%BUILD_DIR%" || (ECHO. & ECHO Directory "%BUILD_DIR%" is not found & GOTO end)
-
-:: run cmake
-ECHO. & ECHO Runnning cmake...
-ECHO ANDROID_ABI=%ANDROID_ABI%
-ECHO.
-IF NOT %BUILD_OPENCV%==1 GOTO other-cmake
-:opencv-cmake
-("%CMAKE%" -G"%CMAKE_GENERATOR%" -DANDROID_ABI="%ANDROID_ABI%" -DCMAKE_TOOLCHAIN_FILE="%SOURCE_DIR%"\android.toolchain.cmake -DCMAKE_MAKE_PROGRAM="%MAKE%" %* "%SOURCE_DIR%\..") && GOTO cmakefin
-ECHO. & ECHO cmake failed &    GOTO end
-:other-cmake
-("%CMAKE%" -G"%CMAKE_GENERATOR%" -DANDROID_ABI="%ANDROID_ABI%" -DOpenCV_DIR="%OPENCV_BUILD_DIR%" -DCMAKE_TOOLCHAIN_FILE="%OPENCV_BUILD_DIR%\..\android.toolchain.cmake" -DCMAKE_MAKE_PROGRAM="%MAKE%" %* "%SOURCE_DIR%") && GOTO cmakefin
-ECHO. & ECHO cmake failed &    GOTO end
-:cmakefin
-
-:: run make
-ECHO. & ECHO Building native libs...
-IF %BUILD_WITH_NINJA%==0 ("%MAKE%" -j %NUMBER_OF_PROCESSORS% VERBOSE=%VERBOSE%) || (ECHO. & ECHO make failed & GOTO end)
-IF %BUILD_WITH_NINJA%==1 ("%MAKE%") || (ECHO. & ECHO ninja failed & GOTO end)
-
-IF NOT %BUILD_JAVA_PART%==1 GOTO end
-POPD && PUSHD %SOURCE_DIR%
-
-:: configure java part
-ECHO. & ECHO Updating Android project...
-(CALL "%ANDROID_SDK%\tools\android" update project --name %PROJECT_NAME% --path .) || (ECHO. & ECHO failed to update android project & GOTO end)
-
-:: compile java part
-ECHO. & ECHO Compiling Android project...
-(CALL "%ANT_DIR%\bin\ant" debug) || (ECHO. & ECHO failed to compile android project & GOTO end)
-
-:end
-POPD
-ENDLOCAL
diff --git a/android/scripts/cmake_android.cmd b/android/scripts/cmake_android.cmd
deleted file mode 100644 (file)
index 212c04b..0000000
+++ /dev/null
@@ -1,5 +0,0 @@
-@ECHO OFF
-
-PUSHD %~dp0..
-CALL .\scripts\build.cmd %* -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON
-POPD
\ No newline at end of file
diff --git a/android/scripts/cmake_android_armeabi.sh b/android/scripts/cmake_android_armeabi.sh
deleted file mode 100755 (executable)
index 9c711d8..0000000
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/bin/sh
-cd `dirname $0`/..
-
-mkdir -p build_armeabi
-cd build_armeabi
-
-cmake -DANDROID_ABI=armeabi -DCMAKE_TOOLCHAIN_FILE=../android.toolchain.cmake $@ ../..
-
diff --git a/android/scripts/cmake_android_mips.sh b/android/scripts/cmake_android_mips.sh
deleted file mode 100755 (executable)
index 17d2ff9..0000000
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/bin/sh
-cd `dirname $0`/..
-
-mkdir -p build_mips
-cd build_mips
-
-cmake -DANDROID_ABI=mips -DCMAKE_TOOLCHAIN_FILE=../android.toolchain.cmake $@ ../..
-
diff --git a/android/scripts/cmake_android_neon.sh b/android/scripts/cmake_android_neon.sh
deleted file mode 100755 (executable)
index 5e85605..0000000
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/bin/sh
-cd `dirname $0`/..
-
-mkdir -p build_neon
-cd build_neon
-
-cmake -DANDROID_ABI="armeabi-v7a with NEON" -DCMAKE_TOOLCHAIN_FILE=../android.toolchain.cmake $@ ../..
-
diff --git a/android/scripts/cmake_android_service.sh b/android/scripts/cmake_android_service.sh
deleted file mode 100755 (executable)
index 0dbd482..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/sh
-cd `dirname $0`/..
-
-mkdir -p build_service
-cd build_service
-
-cmake -DCMAKE_TOOLCHAIN_FILE=../android.toolchain.cmake -DANDROID_TOOLCHAIN_NAME="arm-linux-androideabi-4.4.3" -DANDROID_STL=stlport_static -DANDROID_STL_FORCE_FEATURES=OFF -DBUILD_ANDROID_SERVICE=ON -DANDROID_SOURCE_TREE=~/Projects/AndroidSource/ServiceStub/ $@ ../..
diff --git a/android/scripts/cmake_android_x86.sh b/android/scripts/cmake_android_x86.sh
deleted file mode 100755 (executable)
index a01df2e..0000000
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/sh
-
-cd `dirname $0`/..
-
-mkdir -p build_x86
-cd build_x86
-
-cmake -DANDROID_ABI=x86 -DCMAKE_TOOLCHAIN_FILE=../android.toolchain.cmake $@ ../..
-
diff --git a/android/scripts/wincfg.cmd.tmpl b/android/scripts/wincfg.cmd.tmpl
deleted file mode 100644 (file)
index 166a5e7..0000000
+++ /dev/null
@@ -1,30 +0,0 @@
-:: variables required for OpenCV build ::
-:: Note: all pathes should be specified without tailing slashes!
-SET ANDROID_NDK=C:\full\path\to\your\copy\of\android\NDK\android-ndk-r7b
-SET CMAKE_EXE=C:\full\path\to\cmake\utility\cmake.exe
-SET MAKE_EXE=%ANDROID_NDK%\prebuilt\windows\bin\make.exe
-
-:: variables required for android-opencv build ::
-SET ANDROID_SDK=C:\full\path\to\your\copy\of\android\SDK\android-sdk-windows
-SET ANT_DIR=C:\full\path\to\ant\directory\apache-ant-1.8.2
-SET JAVA_HOME=C:\full\path\to\JDK\jdk1.6.0_25
-
-:: configuration options ::
-:::: general ARM-V7 settings
-SET ANDROID_ABI=armeabi-v7a
-SET BUILD_DIR=build
-
-:::: uncomment following lines to compile for old emulator or old device
-::SET ANDROID_ABI=armeabi
-::SET BUILD_DIR=build_armeabi
-
-:::: uncomment following lines to compile for ARM-V7 with NEON support
-::SET ANDROID_ABI=armeabi-v7a with NEON
-::SET BUILD_DIR=build_neon
-
-:::: uncomment following lines to compile for x86
-::SET ANDROID_ABI=x86
-::SET BUILD_DIR=build_x86
-
-:::: other options
-::SET ANDROID_NATIVE_API_LEVEL=8   &:: android-3 is enough for native part of OpenCV but android-8 is required for Java API
diff --git a/android/service/doc/Makefile b/android/service/doc/Makefile
deleted file mode 100644 (file)
index b8e7bba..0000000
+++ /dev/null
@@ -1,89 +0,0 @@
-# Makefile for Sphinx documentation
-#
-
-# You can set these variables from the command line.
-SPHINXOPTS    =
-SPHINXBUILD   = sphinx-build
-PAPER         =
-BUILDDIR      = _build
-
-# Internal variables.
-PAPEROPT_a4     = -D latex_paper_size=a4
-PAPEROPT_letter = -D latex_paper_size=letter
-ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
-
-.PHONY: help clean html dirhtml pickle json htmlhelp qthelp latex changes linkcheck doctest
-
-help:
-       @echo "Please use \`make <target>' where <target> is one of"
-       @echo "  html      to make standalone HTML files"
-       @echo "  dirhtml   to make HTML files named index.html in directories"
-       @echo "  pickle    to make pickle files"
-       @echo "  json      to make JSON files"
-       @echo "  htmlhelp  to make HTML files and a HTML help project"
-       @echo "  qthelp    to make HTML files and a qthelp project"
-       @echo "  latex     to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
-       @echo "  changes   to make an overview of all changed/added/deprecated items"
-       @echo "  linkcheck to check all external links for integrity"
-       @echo "  doctest   to run all doctests embedded in the documentation (if enabled)"
-
-clean:
-       -rm -rf $(BUILDDIR)/*
-
-html:
-       $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
-       @echo
-       @echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
-
-dirhtml:
-       $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
-       @echo
-       @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
-
-pickle:
-       $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
-       @echo
-       @echo "Build finished; now you can process the pickle files."
-
-json:
-       $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
-       @echo
-       @echo "Build finished; now you can process the JSON files."
-
-htmlhelp:
-       $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
-       @echo
-       @echo "Build finished; now you can run HTML Help Workshop with the" \
-             ".hhp project file in $(BUILDDIR)/htmlhelp."
-
-qthelp:
-       $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
-       @echo
-       @echo "Build finished; now you can run "qcollectiongenerator" with the" \
-             ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
-       @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/OpenCVEngine.qhcp"
-       @echo "To view the help file:"
-       @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/OpenCVEngine.qhc"
-
-latex:
-       $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
-       @echo
-       @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
-       @echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \
-             "run these through (pdf)latex."
-
-changes:
-       $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
-       @echo
-       @echo "The overview file is in $(BUILDDIR)/changes."
-
-linkcheck:
-       $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
-       @echo
-       @echo "Link check complete; look for any errors in the above output " \
-             "or in $(BUILDDIR)/linkcheck/output.txt."
-
-doctest:
-       $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
-       @echo "Testing of doctests in the sources finished, look at the " \
-             "results in $(BUILDDIR)/doctest/output.txt."
index 2d29f33..4f91d5a 100644 (file)
@@ -766,7 +766,7 @@ float CvCascadeBoostTrainData::getVarValue( int vi, int si )
 }
 
 
-struct FeatureIdxOnlyPrecalc
+struct FeatureIdxOnlyPrecalc : ParallelLoopBody
 {
     FeatureIdxOnlyPrecalc( const CvFeatureEvaluator* _featureEvaluator, CvMat* _buf, int _sample_count, bool _is_buf_16u )
     {
@@ -776,11 +776,11 @@ struct FeatureIdxOnlyPrecalc
         idst = _buf->data.i;
         is_buf_16u = _is_buf_16u;
     }
-    void operator()( const BlockedRange& range ) const
+    void operator()( const Range& range ) const
     {
         cv::AutoBuffer<float> valCache(sample_count);
         float* valCachePtr = (float*)valCache;
-        for ( int fi = range.begin(); fi < range.end(); fi++)
+        for ( int fi = range.start; fi < range.end; fi++)
         {
             for( int si = 0; si < sample_count; si++ )
             {
@@ -803,7 +803,7 @@ struct FeatureIdxOnlyPrecalc
     bool is_buf_16u;
 };
 
-struct FeatureValAndIdxPrecalc
+struct FeatureValAndIdxPrecalc : ParallelLoopBody
 {
     FeatureValAndIdxPrecalc( const CvFeatureEvaluator* _featureEvaluator, CvMat* _buf, Mat* _valCache, int _sample_count, bool _is_buf_16u )
     {
@@ -814,9 +814,9 @@ struct FeatureValAndIdxPrecalc
         idst = _buf->data.i;
         is_buf_16u = _is_buf_16u;
     }
-    void operator()( const BlockedRange& range ) const
+    void operator()( const Range& range ) const
     {
-        for ( int fi = range.begin(); fi < range.end(); fi++)
+        for ( int fi = range.start; fi < range.end; fi++)
         {
             for( int si = 0; si < sample_count; si++ )
             {
@@ -840,7 +840,7 @@ struct FeatureValAndIdxPrecalc
     bool is_buf_16u;
 };
 
-struct FeatureValOnlyPrecalc
+struct FeatureValOnlyPrecalc : ParallelLoopBody
 {
     FeatureValOnlyPrecalc( const CvFeatureEvaluator* _featureEvaluator, Mat* _valCache, int _sample_count )
     {
@@ -848,9 +848,9 @@ struct FeatureValOnlyPrecalc
         valCache = _valCache;
         sample_count = _sample_count;
     }
-    void operator()( const BlockedRange& range ) const
+    void operator()( const Range& range ) const
     {
-        for ( int fi = range.begin(); fi < range.end(); fi++)
+        for ( int fi = range.start; fi < range.end; fi++)
             for( int si = 0; si < sample_count; si++ )
                 valCache->at<float>(fi,si) = (*featureEvaluator)( fi, si );
     }
@@ -864,12 +864,12 @@ void CvCascadeBoostTrainData::precalculate()
     int minNum = MIN( numPrecalcVal, numPrecalcIdx);
 
     double proctime = -TIME( 0 );
-    parallel_for( BlockedRange(numPrecalcVal, numPrecalcIdx),
-                  FeatureIdxOnlyPrecalc(featureEvaluator, buf, sample_count, is_buf_16u!=0) );
-    parallel_for( BlockedRange(0, minNum),
-                  FeatureValAndIdxPrecalc(featureEvaluator, buf, &valCache, sample_count, is_buf_16u!=0) );
-    parallel_for( BlockedRange(minNum, numPrecalcVal),
-                  FeatureValOnlyPrecalc(featureEvaluator, &valCache, sample_count) );
+    parallel_for_( Range(numPrecalcVal, numPrecalcIdx),
+                   FeatureIdxOnlyPrecalc(featureEvaluator, buf, sample_count, is_buf_16u!=0) );
+    parallel_for_( Range(0, minNum),
+                   FeatureValAndIdxPrecalc(featureEvaluator, buf, &valCache, sample_count, is_buf_16u!=0) );
+    parallel_for_( Range(minNum, numPrecalcVal),
+                   FeatureValOnlyPrecalc(featureEvaluator, &valCache, sample_count) );
     cout << "Precalculation time: " << (proctime + TIME( 0 )) << endl;
 }
 
index aeed112..7a91b18 100644 (file)
@@ -47,6 +47,9 @@ macro(add_extra_compiler_option option)
   endif()
 endmacro()
 
+# OpenCV fails some tests when 'char' is 'unsigned' by default
+add_extra_compiler_option(-fsigned-char)
+
 if(MINGW)
   # http://gcc.gnu.org/bugzilla/show_bug.cgi?id=40838
   # here we are trying to workaround the problem
index f3d101a..8db6677 100644 (file)
@@ -26,6 +26,15 @@ if(CUDA_FOUND)
     set(HAVE_CUBLAS 1)
   endif()
 
+  if(${CUDA_VERSION} VERSION_LESS "5.5")
+    find_cuda_helper_libs(npp)
+  else()
+    find_cuda_helper_libs(nppc)
+    find_cuda_helper_libs(nppi)
+    find_cuda_helper_libs(npps)
+    set(CUDA_npp_LIBRARY ${CUDA_nppc_LIBRARY} ${CUDA_nppi_LIBRARY} ${CUDA_npps_LIBRARY})
+  endif()
+
   if(WITH_NVCUVID)
     find_cuda_helper_libs(nvcuvid)
     set(HAVE_NVCUVID 1)
@@ -136,8 +145,6 @@ if(CUDA_FOUND)
 
   mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD CUDA_SDK_ROOT_DIR)
 
-  find_cuda_helper_libs(npp)
-
   macro(ocv_cuda_compile VAR)
     foreach(var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG)
       set(${var}_backup_in_cuda_compile_ "${${var}}")
index 2ea864c..59ce1cd 100644 (file)
@@ -33,7 +33,7 @@ if(WITH_QT)
   endif()
 
   if(NOT HAVE_QT)
-    find_package(Qt4)
+    find_package(Qt4 REQUIRED QtCore QtGui QtTest)
     if(QT4_FOUND)
       set(HAVE_QT TRUE)
       add_definitions(-DHAVE_QT) # We need to define the macro this way, using cvconfig.h does not work
index 5600275..27e2a78 100644 (file)
@@ -9,6 +9,7 @@
 #
 # Created: 5 Aug 2011 by Marian Zajko (marian.zajko@ximea.com)
 # Updated: 25 June 2012 by Igor Kuzmin (parafin@ximea.com)
+# Updated: 22 October 2012 by Marian Zajko (marian.zajko@ximea.com)
 #
 
 set(XIMEA_FOUND)
@@ -18,11 +19,15 @@ set(XIMEA_LIBRARY_DIR)
 if(WIN32)
   # Try to find the XIMEA API path in registry.
   GET_FILENAME_COMPONENT(XIMEA_PATH "[HKEY_CURRENT_USER\\Software\\XIMEA\\CamSupport\\API;Path]" ABSOLUTE)
-
-  if(EXISTS XIMEA_PATH)
+  if(EXISTS ${XIMEA_PATH})
     set(XIMEA_FOUND 1)
     # set LIB folders
-    set(XIMEA_LIBRARY_DIR "${XIMEA_PATH}/x86")
+    if(CMAKE_CL_64)
+      set(XIMEA_LIBRARY_DIR "${XIMEA_PATH}/x64")
+    else()
+      set(XIMEA_LIBRARY_DIR "${XIMEA_PATH}/x86")
+    endif()
   else()
     set(XIMEA_FOUND 0)
   endif()
@@ -38,5 +43,4 @@ endif()
 
 mark_as_advanced(FORCE XIMEA_FOUND)
 mark_as_advanced(FORCE XIMEA_PATH)
-mark_as_advanced(FORCE XIMEA_LIBRARY_DIR)
-
+mark_as_advanced(FORCE XIMEA_LIBRARY_DIR)
\ No newline at end of file
index 705ccc8..c99cae7 100644 (file)
@@ -162,7 +162,7 @@ if(UNIX)
 endif()
 
 if(ANDROID)
-  install(FILES "${OpenCV_SOURCE_DIR}/android/android.toolchain.cmake" DESTINATION ${OPENCV_CONFIG_INSTALL_PATH}/)
+  install(FILES "${OpenCV_SOURCE_DIR}/platforms/android/android.toolchain.cmake" DESTINATION ${OPENCV_CONFIG_INSTALL_PATH}/)
 endif()
 
 # --------------------------------------------------------------------------------------------
index 8312845..81340bd 100644 (file)
@@ -303,7 +303,7 @@ macro(ocv_glob_modules)
   # collect modules
   set(OPENCV_INITIAL_PASS ON)
   foreach(__path ${ARGN})
-    ocv_get_real_path(__path "${__path}")
+    get_filename_component(__path "${__path}" ABSOLUTE)
 
     list(FIND __directories_observed "${__path}" __pathIdx)
     if(__pathIdx GREATER -1)
@@ -315,7 +315,7 @@ macro(ocv_glob_modules)
     if(__ocvmodules)
       list(SORT __ocvmodules)
       foreach(mod ${__ocvmodules})
-        ocv_get_real_path(__modpath "${__path}/${mod}")
+        get_filename_component(__modpath "${__path}/${mod}" ABSOLUTE)
         if(EXISTS "${__modpath}/CMakeLists.txt")
 
           list(FIND __directories_observed "${__modpath}" __pathIdx)
@@ -470,7 +470,8 @@ endmacro()
 #   ocv_create_module(<extra link dependencies>)
 #   ocv_create_module(SKIP_LINK)
 macro(ocv_create_module)
-  add_library(${the_module} ${OPENCV_MODULE_TYPE} ${OPENCV_MODULE_${the_module}_HEADERS} ${OPENCV_MODULE_${the_module}_SOURCES})
+  add_library(${the_module} ${OPENCV_MODULE_TYPE} ${OPENCV_MODULE_${the_module}_HEADERS} ${OPENCV_MODULE_${the_module}_SOURCES}
+    "${OPENCV_CONFIG_FILE_INCLUDE_DIR}/cvconfig.h" "${OPENCV_CONFIG_FILE_INCLUDE_DIR}/opencv2/opencv_modules.hpp")
 
   if(NOT "${ARGN}" STREQUAL "SKIP_LINK")
     target_link_libraries(${the_module} ${OPENCV_MODULE_${the_module}_DEPS} ${OPENCV_MODULE_${the_module}_DEPS_EXT} ${OPENCV_LINKER_LIBS} ${IPP_LIBS} ${ARGN})
index e6fcda6..59366eb 100644 (file)
@@ -411,16 +411,6 @@ macro(ocv_regex_escape var regex)
 endmacro()
 
 
-# get absolute path with symlinks resolved
-macro(ocv_get_real_path VAR PATHSTR)
-  if(CMAKE_VERSION VERSION_LESS 2.8)
-    get_filename_component(${VAR} "${PATHSTR}" ABSOLUTE)
-  else()
-    get_filename_component(${VAR} "${PATHSTR}" REALPATH)
-  endif()
-endmacro()
-
-
 # convert list of paths to full paths
 macro(ocv_convert_to_full_paths VAR)
   if(${VAR})
index 0f2695f..70f4809 100644 (file)
@@ -53,8 +53,8 @@ if(BUILD_DOCS AND HAVE_SPHINX)
     endif()
   endforeach()
 
-  file(GLOB_RECURSE _OPENCV_FILES_REF "${OpenCV_SOURCE_DIR}/android/service/doc/*.rst")
-  file(GLOB_RECURSE _OPENCV_FILES_REF_PICT "${OpenCV_SOURCE_DIR}/android/service/doc/*.png" "${OpenCV_SOURCE_DIR}/android/service/doc/*.jpg")
+  file(GLOB_RECURSE _OPENCV_FILES_REF "${OpenCV_SOURCE_DIR}/platforms/android/service/doc/*.rst")
+  file(GLOB_RECURSE _OPENCV_FILES_REF_PICT "${OpenCV_SOURCE_DIR}/platforms/android/service/doc/*.png" "${OpenCV_SOURCE_DIR}/platforms/android/service/doc/*.jpg")
   list(APPEND OPENCV_FILES_REF ${_OPENCV_FILES_REF})
   list(APPEND OPENCV_FILES_REF_PICT ${_OPENCV_FILES_REF_PICT})
 
index 4c7a15c..f3f7aec 100755 (executable)
@@ -239,7 +239,7 @@ latex_documents = [
    u'', 'manual'),
   ('doc/tutorials/tutorials', 'opencv_tutorials.tex', u'The OpenCV Tutorials',
    u'', 'manual'),
-  ('android/refman', 'opencv2manager.tex', u'The OpenCV Manager Manual',
+  ('platforms/android/refman', 'opencv2manager.tex', u'The OpenCV Manager Manual',
    u'', 'manual'),
 ]
 
index 47eafed..54d2889 100644 (file)
@@ -85,7 +85,7 @@ This tutorial code's is shown lines below. You can also download it from `here <
      std::vector< DMatch > good_matches;
 
      for( int i = 0; i < descriptors_1.rows; i++ )
-     { if( matches[i].distance < 2*min_dist )
+     { if( matches[i].distance <= 2*min_dist )
        { good_matches.push_back( matches[i]); }
      }
 
@@ -127,6 +127,3 @@ Result
    .. image:: images/Feature_FlannMatcher_Keypoints_Result.jpg
       :align: center
       :height: 250pt
-
-
-
index 909bf90..5f50b66 100644 (file)
--- a/index.rst
+++ b/index.rst
@@ -10,7 +10,7 @@ Welcome to opencv documentation!
    :maxdepth: 2
 
    modules/refman.rst
-   android/refman.rst
+   platforms/android/refman.rst
    doc/user_guide/user_guide.rst
    doc/tutorials/tutorials.rst
 
index 4a6ed6d..3e1ad70 100644 (file)
@@ -2,4 +2,4 @@ if(NOT OPENCV_MODULES_PATH)
   set(OPENCV_MODULES_PATH "${CMAKE_CURRENT_SOURCE_DIR}")
 endif()
 
-ocv_glob_modules(${OPENCV_MODULES_PATH})
+ocv_glob_modules(${OPENCV_MODULES_PATH} ${OPENCV_EXTRA_MODULES_PATH})
index d54dd5d..8ac8ced 100644 (file)
@@ -6,7 +6,7 @@ set(the_description "Auxiliary module for Android native camera support")
 set(OPENCV_MODULE_TYPE STATIC)
 
 ocv_define_module(androidcamera INTERNAL opencv_core log dl)
-ocv_include_directories("${CMAKE_CURRENT_SOURCE_DIR}/camera_wrapper" "${OpenCV_SOURCE_DIR}/android/service/engine/jni/include")
+ocv_include_directories("${CMAKE_CURRENT_SOURCE_DIR}/camera_wrapper" "${OpenCV_SOURCE_DIR}/platforms/android/service/engine/jni/include")
 
 # Android source tree for native camera
 SET (ANDROID_SOURCE_TREE "ANDROID_SOURCE_TREE-NOTFOUND" CACHE PATH
index 25988be..3d2c0c2 100644 (file)
@@ -115,31 +115,6 @@ namespace cv
             transform(points, modif_points, transformation);
         }
 
-        class Mutex
-        {
-        public:
-            Mutex() {
-            }
-            void lock()
-            {
-#ifdef HAVE_TBB
-                resultsMutex.lock();
-#endif
-            }
-
-            void unlock()
-            {
-#ifdef HAVE_TBB
-                resultsMutex.unlock();
-#endif
-            }
-
-        private:
-#ifdef HAVE_TBB
-            tbb::mutex resultsMutex;
-#endif
-        };
-
         struct CameraParameters
         {
             void init(Mat _intrinsics, Mat _distCoeffs)
index 3251427..623883d 100644 (file)
@@ -699,7 +699,7 @@ struct PrefilterInvoker
 };
 
 
-struct FindStereoCorrespInvoker
+struct FindStereoCorrespInvoker : ParallelLoopBody
 {
     FindStereoCorrespInvoker( const Mat& _left, const Mat& _right,
                               Mat& _disp, CvStereoBMState* _state,
@@ -713,12 +713,12 @@ struct FindStereoCorrespInvoker
         validDisparityRect = _validDisparityRect;
     }
 
-    void operator()( const BlockedRange& range ) const
+    void operator()( const Range& range ) const
     {
         int cols = left->cols, rows = left->rows;
-        int _row0 = min(cvRound(range.begin() * rows / nstripes), rows);
-        int _row1 = min(cvRound(range.end() * rows / nstripes), rows);
-        uchar *ptr = state->slidingSumBuf->data.ptr + range.begin() * stripeBufSize;
+        int _row0 = min(cvRound(range.start * rows / nstripes), rows);
+        int _row1 = min(cvRound(range.end * rows / nstripes), rows);
+        uchar *ptr = state->slidingSumBuf->data.ptr + range.start * stripeBufSize;
         int FILTERED = (state->minDisparity - 1)*16;
 
         Rect roi = validDisparityRect & Rect(0, _row0, cols, _row1 - _row0);
@@ -871,14 +871,10 @@ static void findStereoCorrespondenceBM( const Mat& left0, const Mat& right0, Mat
     const bool useShorts = false;
 #endif
 
-#ifdef HAVE_TBB
     const double SAD_overhead_coeff = 10.0;
     double N0 = 8000000 / (useShorts ? 1 : 4);  // approx tbb's min number instructions reasonable for one thread
     double maxStripeSize = min(max(N0 / (width * ndisp), (wsz-1) * SAD_overhead_coeff), (double)height);
     int nstripes = cvCeil(height / maxStripeSize);
-#else
-    const int nstripes = 1;
-#endif
 
     int bufSize = max(bufSize0 * nstripes, max(bufSize1 * 2, bufSize2));
 
@@ -898,9 +894,9 @@ static void findStereoCorrespondenceBM( const Mat& left0, const Mat& right0, Mat
                                               state->minDisparity, state->numberOfDisparities,
                                               state->SADWindowSize);
 
-    parallel_for(BlockedRange(0, nstripes),
-                 FindStereoCorrespInvoker(left, right, disp, state, nstripes,
-                                          bufSize0, useShorts, validDisparityRect));
+    parallel_for_(Range(0, nstripes),
+                  FindStereoCorrespInvoker(left, right, disp, state, nstripes,
+                                           bufSize0, useShorts, validDisparityRect));
 
     if( state->speckleRange >= 0 && state->speckleWindowSize > 0 )
     {
index ca9f5e2..acfbb91 100644 (file)
@@ -1691,7 +1691,7 @@ Returns the depth of a matrix element.
 
 .. ocv:function:: int Mat::depth() const
 
-The method returns the identifier of the matrix element depth (the type of each individual channel). For example, for a 16-bit signed 3-channel array, the method returns ``CV_16S`` . A complete list of matrix types contains the following values:
+The method returns the identifier of the matrix element depth (the type of each individual channel). For example, for a 16-bit signed element array, the method returns ``CV_16S`` . A complete list of matrix types contains the following values:
 
 * ``CV_8U``     - 8-bit unsigned integers ( ``0..255``     )
 
index 1c8e0e2..2b77919 100644 (file)
@@ -4813,6 +4813,9 @@ public:
     ~AutoLock() { mutex->unlock(); }
 protected:
     Mutex* mutex;
+private:
+    AutoLock(const AutoLock&);
+    AutoLock& operator = (const AutoLock&);
 };
 
 }
index 8596ae4..d7e5eb4 100644 (file)
@@ -217,7 +217,7 @@ For each query descriptor, finds the training descriptors not farther than the s
 
     :param compactResult: Parameter used when the mask (or masks) is not empty. If  ``compactResult``  is false, the  ``matches``  vector has the same size as  ``queryDescriptors``  rows. If  ``compactResult``  is true, the  ``matches``  vector does not contain matches for fully masked-out query descriptors.
 
-    :param maxDistance: Threshold for the distance between matched descriptors.
+    :param maxDistance: Threshold for the distance between matched descriptors. Distance means here metric distance (e.g. Hamming distance), not the distance between coordinates (which is measured in Pixels)!
 
 For each query descriptor, the methods find such training descriptors that the distance between the query descriptor and the training descriptor is equal or smaller than ``maxDistance``. Found matches are returned in the distance increasing order.
 
index 2efd5a6..a1e389a 100644 (file)
@@ -214,7 +214,7 @@ static void keepStrongest( int N, vector<KeyPoint>& keypoints )
 }
 
 namespace {
-class GridAdaptedFeatureDetectorInvoker
+class GridAdaptedFeatureDetectorInvoker : public ParallelLoopBody
 {
 private:
     int gridRows_, gridCols_;
@@ -223,29 +223,24 @@ private:
     const Mat& image_;
     const Mat& mask_;
     const Ptr<FeatureDetector>& detector_;
-#ifdef HAVE_TBB
-    tbb::mutex* kptLock_;
-#endif
+    Mutex* kptLock_;
 
     GridAdaptedFeatureDetectorInvoker& operator=(const GridAdaptedFeatureDetectorInvoker&); // to quiet MSVC
 
 public:
 
-    GridAdaptedFeatureDetectorInvoker(const Ptr<FeatureDetector>& detector, const Mat& image, const Mat& mask, vector<KeyPoint>& keypoints, int maxPerCell, int gridRows, int gridCols
-#ifdef HAVE_TBB
-        , tbb::mutex* kptLock
-#endif
-        ) : gridRows_(gridRows), gridCols_(gridCols), maxPerCell_(maxPerCell),
-            keypoints_(keypoints), image_(image), mask_(mask), detector_(detector)
-#ifdef HAVE_TBB
-            , kptLock_(kptLock)
-#endif
+    GridAdaptedFeatureDetectorInvoker(const Ptr<FeatureDetector>& detector, const Mat& image, const Mat& mask,
+                                      vector<KeyPoint>& keypoints, int maxPerCell, int gridRows, int gridCols,
+                                      cv::Mutex* kptLock)
+        : gridRows_(gridRows), gridCols_(gridCols), maxPerCell_(maxPerCell),
+          keypoints_(keypoints), image_(image), mask_(mask), detector_(detector),
+          kptLock_(kptLock)
     {
     }
 
-    void operator() (const BlockedRange& range) const
+    void operator() (const Range& range) const
     {
-        for (int i = range.begin(); i < range.end(); ++i)
+        for (int i = range.start; i < range.end; ++i)
         {
             int celly = i / gridCols_;
             int cellx = i - celly * gridCols_;
@@ -270,9 +265,8 @@ public:
                 it->pt.x += col_range.start;
                 it->pt.y += row_range.start;
             }
-#ifdef HAVE_TBB
-            tbb::mutex::scoped_lock join_keypoints(*kptLock_);
-#endif
+
+            cv::AutoLock join_keypoints(*kptLock_);
             keypoints_.insert( keypoints_.end(), sub_keypoints.begin(), sub_keypoints.end() );
         }
     }
@@ -289,13 +283,9 @@ void GridAdaptedFeatureDetector::detectImpl( const Mat& image, vector<KeyPoint>&
     keypoints.reserve(maxTotalKeypoints);
     int maxPerCell = maxTotalKeypoints / (gridRows * gridCols);
 
-#ifdef HAVE_TBB
-    tbb::mutex kptLock;
-    cv::parallel_for(cv::BlockedRange(0, gridRows * gridCols),
+    cv::Mutex kptLock;
+    cv::parallel_for_(cv::Range(0, gridRows * gridCols),
         GridAdaptedFeatureDetectorInvoker(detector, image, mask, keypoints, maxPerCell, gridRows, gridCols, &kptLock));
-#else
-    GridAdaptedFeatureDetectorInvoker(detector, image, mask, keypoints, maxPerCell, gridRows, gridCols)(cv::BlockedRange(0, gridRows * gridCols));
-#endif
 }
 
 /*
index 5509226..a471da0 100644 (file)
@@ -45,7 +45,7 @@ if(HAVE_CUDA)
   set(cuda_link_libs ${CUDA_LIBRARIES} ${CUDA_npp_LIBRARY})
 
   if(WITH_NVCUVID)
-    set(cuda_link_libs ${cuda_link_libs} ${CUDA_nvcuvid_LIBRARY})
+    set(cuda_link_libs ${cuda_link_libs} ${CUDA_CUDA_LIBRARY} ${CUDA_nvcuvid_LIBRARY})
   endif()
 
   if(WIN32)
index d02027f..5b42284 100644 (file)
@@ -120,11 +120,8 @@ namespace cv { namespace gpu { namespace device
                 return dst;
             }
 
-            __device__ __forceinline__ RGB2RGB()
-                : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
-
-            __device__ __forceinline__ RGB2RGB(const RGB2RGB& other_)
-                :unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
+            __host__ __device__ __forceinline__ RGB2RGB() {}
+            __host__ __device__ __forceinline__ RGB2RGB(const RGB2RGB&) {}
         };
 
         template <> struct RGB2RGB<uchar, 4, 4, 2> : unary_function<uint, uint>
@@ -141,8 +138,8 @@ namespace cv { namespace gpu { namespace device
                 return dst;
             }
 
-            __device__ __forceinline__ RGB2RGB():unary_function<uint, uint>(){}
-            __device__ __forceinline__ RGB2RGB(const RGB2RGB& other_):unary_function<uint, uint>(){}
+            __host__ __device__ __forceinline__ RGB2RGB() {}
+            __host__ __device__ __forceinline__ RGB2RGB(const RGB2RGB&) {}
         };
     }
 
@@ -203,8 +200,8 @@ namespace cv { namespace gpu { namespace device
                 return RGB2RGB5x5Converter<green_bits, bidx>::cvt(src);
             }
 
-            __device__ __forceinline__ RGB2RGB5x5():unary_function<uchar3, ushort>(){}
-            __device__ __forceinline__ RGB2RGB5x5(const RGB2RGB5x5& other_):unary_function<uchar3, ushort>(){}
+            __host__ __device__ __forceinline__ RGB2RGB5x5() {}
+            __host__ __device__ __forceinline__ RGB2RGB5x5(const RGB2RGB5x5&) {}
         };
 
         template<int bidx, int green_bits> struct RGB2RGB5x5<4, bidx,green_bits> : unary_function<uint, ushort>
@@ -214,8 +211,8 @@ namespace cv { namespace gpu { namespace device
                 return RGB2RGB5x5Converter<green_bits, bidx>::cvt(src);
             }
 
-            __device__ __forceinline__ RGB2RGB5x5():unary_function<uint, ushort>(){}
-            __device__ __forceinline__ RGB2RGB5x5(const RGB2RGB5x5& other_):unary_function<uint, ushort>(){}
+            __host__ __device__ __forceinline__ RGB2RGB5x5() {}
+            __host__ __device__ __forceinline__ RGB2RGB5x5(const RGB2RGB5x5&) {}
         };
     }
 
@@ -282,8 +279,8 @@ namespace cv { namespace gpu { namespace device
                 RGB5x52RGBConverter<green_bits, bidx>::cvt(src, dst);
                 return dst;
             }
-            __device__ __forceinline__ RGB5x52RGB():unary_function<ushort, uchar3>(){}
-            __device__ __forceinline__ RGB5x52RGB(const RGB5x52RGB& other_):unary_function<ushort, uchar3>(){}
+            __host__ __device__ __forceinline__ RGB5x52RGB() {}
+            __host__ __device__ __forceinline__ RGB5x52RGB(const RGB5x52RGB&) {}
 
         };
 
@@ -295,8 +292,8 @@ namespace cv { namespace gpu { namespace device
                 RGB5x52RGBConverter<green_bits, bidx>::cvt(src, dst);
                 return dst;
             }
-            __device__ __forceinline__ RGB5x52RGB():unary_function<ushort, uint>(){}
-            __device__ __forceinline__ RGB5x52RGB(const RGB5x52RGB& other_):unary_function<ushort, uint>(){}
+            __host__ __device__ __forceinline__ RGB5x52RGB() {}
+            __host__ __device__ __forceinline__ RGB5x52RGB(const RGB5x52RGB&) {}
         };
     }
 
@@ -325,9 +322,8 @@ namespace cv { namespace gpu { namespace device
 
                 return dst;
             }
-            __device__ __forceinline__ Gray2RGB():unary_function<T, typename TypeVec<T, dcn>::vec_type>(){}
-            __device__ __forceinline__ Gray2RGB(const Gray2RGB& other_)
-                : unary_function<T, typename TypeVec<T, dcn>::vec_type>(){}
+            __host__ __device__ __forceinline__ Gray2RGB() {}
+            __host__ __device__ __forceinline__ Gray2RGB(const Gray2RGB&) {}
         };
 
         template <> struct Gray2RGB<uchar, 4> : unary_function<uchar, uint>
@@ -342,8 +338,8 @@ namespace cv { namespace gpu { namespace device
 
                 return dst;
             }
-            __device__ __forceinline__ Gray2RGB():unary_function<uchar, uint>(){}
-            __device__ __forceinline__ Gray2RGB(const Gray2RGB& other_):unary_function<uchar, uint>(){}
+            __host__ __device__ __forceinline__ Gray2RGB() {}
+            __host__ __device__ __forceinline__ Gray2RGB(const Gray2RGB&) {}
         };
     }
 
@@ -384,8 +380,8 @@ namespace cv { namespace gpu { namespace device
                 return Gray2RGB5x5Converter<green_bits>::cvt(src);
             }
 
-            __device__ __forceinline__ Gray2RGB5x5():unary_function<uchar, ushort>(){}
-            __device__ __forceinline__ Gray2RGB5x5(const Gray2RGB5x5& other_):unary_function<uchar, ushort>(){}
+            __host__ __device__ __forceinline__ Gray2RGB5x5() {}
+            __host__ __device__ __forceinline__ Gray2RGB5x5(const Gray2RGB5x5&) {}
         };
     }
 
@@ -426,8 +422,8 @@ namespace cv { namespace gpu { namespace device
             {
                 return RGB5x52GrayConverter<green_bits>::cvt(src);
             }
-            __device__ __forceinline__ RGB5x52Gray() : unary_function<ushort, uchar>(){}
-            __device__ __forceinline__ RGB5x52Gray(const RGB5x52Gray& other_) : unary_function<ushort, uchar>(){}
+            __host__ __device__ __forceinline__ RGB5x52Gray() {}
+            __host__ __device__ __forceinline__ RGB5x52Gray(const RGB5x52Gray&) {}
         };
     }
 
@@ -467,9 +463,8 @@ namespace cv { namespace gpu { namespace device
             {
                 return RGB2GrayConvert<bidx>(&src.x);
             }
-            __device__ __forceinline__ RGB2Gray() : unary_function<typename TypeVec<T, scn>::vec_type, T>(){}
-            __device__ __forceinline__ RGB2Gray(const RGB2Gray& other_)
-                : unary_function<typename TypeVec<T, scn>::vec_type, T>(){}
+            __host__ __device__ __forceinline__ RGB2Gray() {}
+            __host__ __device__ __forceinline__ RGB2Gray(const RGB2Gray&) {}
         };
 
         template <int bidx> struct RGB2Gray<uchar, 4, bidx> : unary_function<uint, uchar>
@@ -478,8 +473,8 @@ namespace cv { namespace gpu { namespace device
             {
                 return RGB2GrayConvert<bidx>(src);
             }
-            __device__ __forceinline__ RGB2Gray() : unary_function<uint, uchar>(){}
-            __device__ __forceinline__ RGB2Gray(const RGB2Gray& other_) : unary_function<uint, uchar>(){}
+            __host__ __device__ __forceinline__ RGB2Gray() {}
+            __host__ __device__ __forceinline__ RGB2Gray(const RGB2Gray&) {}
         };
     }
 
@@ -529,10 +524,8 @@ namespace cv { namespace gpu { namespace device
                 RGB2YUVConvert<bidx>(&src.x, dst);
                 return dst;
             }
-            __device__ __forceinline__ RGB2YUV()
-                : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
-            __device__ __forceinline__ RGB2YUV(const RGB2YUV& other_)
-                : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
+            __host__ __device__ __forceinline__ RGB2YUV() {}
+            __host__ __device__ __forceinline__ RGB2YUV(const RGB2YUV&) {}
         };
     }
 
@@ -609,10 +602,8 @@ namespace cv { namespace gpu { namespace device
 
                 return dst;
             }
-            __device__ __forceinline__ YUV2RGB()
-                : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
-            __device__ __forceinline__ YUV2RGB(const YUV2RGB& other_)
-                : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
+            __host__ __device__ __forceinline__ YUV2RGB() {}
+            __host__ __device__ __forceinline__ YUV2RGB(const YUV2RGB&) {}
         };
 
         template <int bidx> struct YUV2RGB<uchar, 4, 4, bidx> : unary_function<uint, uint>
@@ -621,8 +612,8 @@ namespace cv { namespace gpu { namespace device
             {
                 return YUV2RGBConvert<bidx>(src);
             }
-            __device__ __forceinline__ YUV2RGB() : unary_function<uint, uint>(){}
-            __device__ __forceinline__ YUV2RGB(const YUV2RGB& other_) : unary_function<uint, uint>(){}
+            __host__ __device__ __forceinline__ YUV2RGB() {}
+            __host__ __device__ __forceinline__ YUV2RGB(const YUV2RGB&) {}
         };
     }
 
@@ -689,10 +680,8 @@ namespace cv { namespace gpu { namespace device
                 RGB2YCrCbConvert<bidx>(&src.x, dst);
                 return dst;
             }
-            __device__ __forceinline__ RGB2YCrCb()
-                : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
-            __device__ __forceinline__ RGB2YCrCb(const RGB2YCrCb& other_)
-                : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
+            __host__ __device__ __forceinline__ RGB2YCrCb() {}
+            __host__ __device__ __forceinline__ RGB2YCrCb(const RGB2YCrCb&) {}
         };
 
         template <int bidx> struct RGB2YCrCb<uchar, 4, 4, bidx> : unary_function<uint, uint>
@@ -702,8 +691,8 @@ namespace cv { namespace gpu { namespace device
                 return RGB2YCrCbConvert<bidx>(src);
             }
 
-            __device__ __forceinline__ RGB2YCrCb() : unary_function<uint, uint>(){}
-            __device__ __forceinline__ RGB2YCrCb(const RGB2YCrCb& other_) : unary_function<uint, uint>(){}
+            __host__ __device__ __forceinline__ RGB2YCrCb() {}
+            __host__ __device__ __forceinline__ RGB2YCrCb(const RGB2YCrCb&) {}
         };
     }
 
@@ -771,10 +760,8 @@ namespace cv { namespace gpu { namespace device
 
                 return dst;
             }
-            __device__ __forceinline__ YCrCb2RGB()
-                : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
-            __device__ __forceinline__ YCrCb2RGB(const YCrCb2RGB& other_)
-                : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
+            __host__ __device__ __forceinline__ YCrCb2RGB() {}
+            __host__ __device__ __forceinline__ YCrCb2RGB(const YCrCb2RGB&) {}
         };
 
         template <int bidx> struct YCrCb2RGB<uchar, 4, 4, bidx> : unary_function<uint, uint>
@@ -783,8 +770,8 @@ namespace cv { namespace gpu { namespace device
             {
                 return YCrCb2RGBConvert<bidx>(src);
             }
-            __device__ __forceinline__ YCrCb2RGB() : unary_function<uint, uint>(){}
-            __device__ __forceinline__ YCrCb2RGB(const YCrCb2RGB& other_) : unary_function<uint, uint>(){}
+            __host__ __device__ __forceinline__ YCrCb2RGB() {}
+            __host__ __device__ __forceinline__ YCrCb2RGB(const YCrCb2RGB&) {}
         };
     }
 
@@ -849,10 +836,8 @@ namespace cv { namespace gpu { namespace device
 
                 return dst;
             }
-            __device__ __forceinline__ RGB2XYZ()
-                : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
-            __device__ __forceinline__ RGB2XYZ(const RGB2XYZ& other_)
-                : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
+            __host__ __device__ __forceinline__ RGB2XYZ() {}
+            __host__ __device__ __forceinline__ RGB2XYZ(const RGB2XYZ&) {}
         };
 
         template <int bidx> struct RGB2XYZ<uchar, 4, 4, bidx> : unary_function<uint, uint>
@@ -861,8 +846,8 @@ namespace cv { namespace gpu { namespace device
             {
                 return RGB2XYZConvert<bidx>(src);
             }
-            __device__ __forceinline__ RGB2XYZ() : unary_function<uint, uint>(){}
-            __device__ __forceinline__ RGB2XYZ(const RGB2XYZ& other_) : unary_function<uint, uint>(){}
+            __host__ __device__ __forceinline__ RGB2XYZ() {}
+            __host__ __device__ __forceinline__ RGB2XYZ(const RGB2XYZ&) {}
         };
     }
 
@@ -926,10 +911,8 @@ namespace cv { namespace gpu { namespace device
 
                 return dst;
             }
-            __device__ __forceinline__ XYZ2RGB()
-                : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
-            __device__ __forceinline__ XYZ2RGB(const XYZ2RGB& other_)
-                : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
+            __host__ __device__ __forceinline__ XYZ2RGB() {}
+            __host__ __device__ __forceinline__ XYZ2RGB(const XYZ2RGB&) {}
         };
 
         template <int bidx> struct XYZ2RGB<uchar, 4, 4, bidx> : unary_function<uint, uint>
@@ -938,8 +921,8 @@ namespace cv { namespace gpu { namespace device
             {
                 return XYZ2RGBConvert<bidx>(src);
             }
-            __device__ __forceinline__ XYZ2RGB() : unary_function<uint, uint>(){}
-            __device__ __forceinline__ XYZ2RGB(const XYZ2RGB& other_) : unary_function<uint, uint>(){}
+            __host__ __device__ __forceinline__ XYZ2RGB() {}
+            __host__ __device__ __forceinline__ XYZ2RGB(const XYZ2RGB&) {}
         };
     }
 
@@ -1066,10 +1049,8 @@ namespace cv { namespace gpu { namespace device
 
                 return dst;
             }
-            __device__ __forceinline__ RGB2HSV()
-                : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
-            __device__ __forceinline__ RGB2HSV(const RGB2HSV& other_)
-                : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
+            __host__ __device__ __forceinline__ RGB2HSV() {}
+            __host__ __device__ __forceinline__ RGB2HSV(const RGB2HSV&) {}
         };
 
         template <int bidx, int hr> struct RGB2HSV<uchar, 4, 4, bidx, hr> : unary_function<uint, uint>
@@ -1078,8 +1059,8 @@ namespace cv { namespace gpu { namespace device
             {
                 return RGB2HSVConvert<bidx, hr>(src);
             }
-            __device__ __forceinline__ RGB2HSV():unary_function<uint, uint>(){}
-            __device__ __forceinline__ RGB2HSV(const RGB2HSV& other_):unary_function<uint, uint>(){}
+            __host__ __device__ __forceinline__ RGB2HSV() {}
+            __host__ __device__ __forceinline__ RGB2HSV(const RGB2HSV&) {}
         };
     }
 
@@ -1208,10 +1189,8 @@ namespace cv { namespace gpu { namespace device
 
                 return dst;
             }
-            __device__ __forceinline__ HSV2RGB()
-                : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
-            __device__ __forceinline__ HSV2RGB(const HSV2RGB& other_)
-                : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
+            __host__ __device__ __forceinline__ HSV2RGB() {}
+            __host__ __device__ __forceinline__ HSV2RGB(const HSV2RGB&) {}
         };
 
         template <int bidx, int hr> struct HSV2RGB<uchar, 4, 4, bidx, hr> : unary_function<uint, uint>
@@ -1220,8 +1199,8 @@ namespace cv { namespace gpu { namespace device
             {
                 return HSV2RGBConvert<bidx, hr>(src);
             }
-            __device__ __forceinline__ HSV2RGB():unary_function<uint, uint>(){}
-            __device__ __forceinline__ HSV2RGB(const HSV2RGB& other_):unary_function<uint, uint>(){}
+            __host__ __device__ __forceinline__ HSV2RGB() {}
+            __host__ __device__ __forceinline__ HSV2RGB(const HSV2RGB&) {}
         };
     }
 
@@ -1343,10 +1322,8 @@ namespace cv { namespace gpu { namespace device
 
                 return dst;
             }
-            __device__ __forceinline__ RGB2HLS()
-                : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
-            __device__ __forceinline__ RGB2HLS(const RGB2HLS& other_)
-                : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
+            __host__ __device__ __forceinline__ RGB2HLS() {}
+            __host__ __device__ __forceinline__ RGB2HLS(const RGB2HLS&) {}
         };
 
         template <int bidx, int hr> struct RGB2HLS<uchar, 4, 4, bidx, hr> : unary_function<uint, uint>
@@ -1355,8 +1332,8 @@ namespace cv { namespace gpu { namespace device
             {
                 return RGB2HLSConvert<bidx, hr>(src);
             }
-            __device__ __forceinline__ RGB2HLS() : unary_function<uint, uint>(){}
-            __device__ __forceinline__ RGB2HLS(const RGB2HLS& other_) : unary_function<uint, uint>(){}
+            __host__ __device__ __forceinline__ RGB2HLS() {}
+            __host__ __device__ __forceinline__ RGB2HLS(const RGB2HLS&) {}
         };
     }
 
@@ -1485,10 +1462,8 @@ namespace cv { namespace gpu { namespace device
 
                 return dst;
             }
-            __device__ __forceinline__ HLS2RGB()
-                : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
-            __device__ __forceinline__ HLS2RGB(const HLS2RGB& other_)
-                : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
+            __host__ __device__ __forceinline__ HLS2RGB() {}
+            __host__ __device__ __forceinline__ HLS2RGB(const HLS2RGB&) {}
         };
 
         template <int bidx, int hr> struct HLS2RGB<uchar, 4, 4, bidx, hr> : unary_function<uint, uint>
@@ -1497,8 +1472,8 @@ namespace cv { namespace gpu { namespace device
             {
                 return HLS2RGBConvert<bidx, hr>(src);
             }
-            __device__ __forceinline__ HLS2RGB() : unary_function<uint, uint>(){}
-            __device__ __forceinline__ HLS2RGB(const HLS2RGB& other_) : unary_function<uint, uint>(){}
+            __host__ __device__ __forceinline__ HLS2RGB() {}
+            __host__ __device__ __forceinline__ HLS2RGB(const HLS2RGB&) {}
         };
     }
 
@@ -1651,8 +1626,8 @@ namespace cv { namespace gpu { namespace device
 
                 return dst;
             }
-            __device__ __forceinline__ RGB2Lab() {}
-            __device__ __forceinline__ RGB2Lab(const RGB2Lab& other_) {}
+            __host__ __device__ __forceinline__ RGB2Lab() {}
+            __host__ __device__ __forceinline__ RGB2Lab(const RGB2Lab&) {}
         };
         template <int scn, int dcn, bool srgb, int blueIdx>
         struct RGB2Lab<float, scn, dcn, srgb, blueIdx>
@@ -1666,8 +1641,8 @@ namespace cv { namespace gpu { namespace device
 
                 return dst;
             }
-            __device__ __forceinline__ RGB2Lab() {}
-            __device__ __forceinline__ RGB2Lab(const RGB2Lab& other_) {}
+            __host__ __device__ __forceinline__ RGB2Lab() {}
+            __host__ __device__ __forceinline__ RGB2Lab(const RGB2Lab&) {}
         };
     }
 
@@ -1764,8 +1739,8 @@ namespace cv { namespace gpu { namespace device
 
                 return dst;
             }
-            __device__ __forceinline__ Lab2RGB() {}
-            __device__ __forceinline__ Lab2RGB(const Lab2RGB& other_) {}
+            __host__ __device__ __forceinline__ Lab2RGB() {}
+            __host__ __device__ __forceinline__ Lab2RGB(const Lab2RGB&) {}
         };
         template <int scn, int dcn, bool srgb, int blueIdx>
         struct Lab2RGB<float, scn, dcn, srgb, blueIdx>
@@ -1779,8 +1754,8 @@ namespace cv { namespace gpu { namespace device
 
                 return dst;
             }
-            __device__ __forceinline__ Lab2RGB() {}
-            __device__ __forceinline__ Lab2RGB(const Lab2RGB& other_) {}
+            __host__ __device__ __forceinline__ Lab2RGB() {}
+            __host__ __device__ __forceinline__ Lab2RGB(const Lab2RGB&) {}
         };
     }
 
@@ -1863,8 +1838,8 @@ namespace cv { namespace gpu { namespace device
 
                 return dst;
             }
-            __device__ __forceinline__ RGB2Luv() {}
-            __device__ __forceinline__ RGB2Luv(const RGB2Luv& other_) {}
+            __host__ __device__ __forceinline__ RGB2Luv() {}
+            __host__ __device__ __forceinline__ RGB2Luv(const RGB2Luv&) {}
         };
         template <int scn, int dcn, bool srgb, int blueIdx>
         struct RGB2Luv<float, scn, dcn, srgb, blueIdx>
@@ -1878,8 +1853,8 @@ namespace cv { namespace gpu { namespace device
 
                 return dst;
             }
-            __device__ __forceinline__ RGB2Luv() {}
-            __device__ __forceinline__ RGB2Luv(const RGB2Luv& other_) {}
+            __host__ __device__ __forceinline__ RGB2Luv() {}
+            __host__ __device__ __forceinline__ RGB2Luv(const RGB2Luv&) {}
         };
     }
 
@@ -1964,8 +1939,8 @@ namespace cv { namespace gpu { namespace device
 
                 return dst;
             }
-            __device__ __forceinline__ Luv2RGB() {}
-            __device__ __forceinline__ Luv2RGB(const Luv2RGB& other_) {}
+            __host__ __device__ __forceinline__ Luv2RGB() {}
+            __host__ __device__ __forceinline__ Luv2RGB(const Luv2RGB&) {}
         };
         template <int scn, int dcn, bool srgb, int blueIdx>
         struct Luv2RGB<float, scn, dcn, srgb, blueIdx>
@@ -1979,8 +1954,8 @@ namespace cv { namespace gpu { namespace device
 
                 return dst;
             }
-            __device__ __forceinline__ Luv2RGB() {}
-            __device__ __forceinline__ Luv2RGB(const Luv2RGB& other_) {}
+            __host__ __device__ __forceinline__ Luv2RGB() {}
+            __host__ __device__ __forceinline__ Luv2RGB(const Luv2RGB&) {}
         };
     }
 
index 6064e8e..db26473 100644 (file)
@@ -63,8 +63,8 @@ namespace cv { namespace gpu { namespace device
         {
             return a + b;
         }
-        __device__ __forceinline__ plus(const plus& other):binary_function<T,T,T>(){}
-        __device__ __forceinline__ plus():binary_function<T,T,T>(){}
+        __host__ __device__ __forceinline__ plus() {}
+        __host__ __device__ __forceinline__ plus(const plus&) {}
     };
 
     template <typename T> struct minus : binary_function<T, T, T>
@@ -74,8 +74,8 @@ namespace cv { namespace gpu { namespace device
         {
             return a - b;
         }
-        __device__ __forceinline__ minus(const minus& other):binary_function<T,T,T>(){}
-        __device__ __forceinline__ minus():binary_function<T,T,T>(){}
+        __host__ __device__ __forceinline__ minus() {}
+        __host__ __device__ __forceinline__ minus(const minus&) {}
     };
 
     template <typename T> struct multiplies : binary_function<T, T, T>
@@ -85,8 +85,8 @@ namespace cv { namespace gpu { namespace device
         {
             return a * b;
         }
-        __device__ __forceinline__ multiplies(const multiplies& other):binary_function<T,T,T>(){}
-        __device__ __forceinline__ multiplies():binary_function<T,T,T>(){}
+        __host__ __device__ __forceinline__ multiplies() {}
+        __host__ __device__ __forceinline__ multiplies(const multiplies&) {}
     };
 
     template <typename T> struct divides : binary_function<T, T, T>
@@ -96,8 +96,8 @@ namespace cv { namespace gpu { namespace device
         {
             return a / b;
         }
-        __device__ __forceinline__ divides(const divides& other):binary_function<T,T,T>(){}
-        __device__ __forceinline__ divides():binary_function<T,T,T>(){}
+        __host__ __device__ __forceinline__ divides() {}
+        __host__ __device__ __forceinline__ divides(const divides&) {}
     };
 
     template <typename T> struct modulus : binary_function<T, T, T>
@@ -107,8 +107,8 @@ namespace cv { namespace gpu { namespace device
         {
             return a % b;
         }
-        __device__ __forceinline__ modulus(const modulus& other):binary_function<T,T,T>(){}
-        __device__ __forceinline__ modulus():binary_function<T,T,T>(){}
+        __host__ __device__ __forceinline__ modulus() {}
+        __host__ __device__ __forceinline__ modulus(const modulus&) {}
     };
 
     template <typename T> struct negate : unary_function<T, T>
@@ -117,8 +117,8 @@ namespace cv { namespace gpu { namespace device
         {
             return -a;
         }
-        __device__ __forceinline__ negate(const negate& other):unary_function<T,T>(){}
-        __device__ __forceinline__ negate():unary_function<T,T>(){}
+        __host__ __device__ __forceinline__ negate() {}
+        __host__ __device__ __forceinline__ negate(const negate&) {}
     };
 
     // Comparison Operations
@@ -129,8 +129,8 @@ namespace cv { namespace gpu { namespace device
         {
             return a == b;
         }
-        __device__ __forceinline__ equal_to(const equal_to& other):binary_function<T,T,bool>(){}
-        __device__ __forceinline__ equal_to():binary_function<T,T,bool>(){}
+        __host__ __device__ __forceinline__ equal_to() {}
+        __host__ __device__ __forceinline__ equal_to(const equal_to&) {}
     };
 
     template <typename T> struct not_equal_to : binary_function<T, T, bool>
@@ -140,8 +140,8 @@ namespace cv { namespace gpu { namespace device
         {
             return a != b;
         }
-        __device__ __forceinline__ not_equal_to(const not_equal_to& other):binary_function<T,T,bool>(){}
-        __device__ __forceinline__ not_equal_to():binary_function<T,T,bool>(){}
+        __host__ __device__ __forceinline__ not_equal_to() {}
+        __host__ __device__ __forceinline__ not_equal_to(const not_equal_to&) {}
     };
 
     template <typename T> struct greater : binary_function<T, T, bool>
@@ -151,8 +151,8 @@ namespace cv { namespace gpu { namespace device
         {
             return a > b;
         }
-        __device__ __forceinline__ greater(const greater& other):binary_function<T,T,bool>(){}
-        __device__ __forceinline__ greater():binary_function<T,T,bool>(){}
+        __host__ __device__ __forceinline__ greater() {}
+        __host__ __device__ __forceinline__ greater(const greater&) {}
     };
 
     template <typename T> struct less : binary_function<T, T, bool>
@@ -162,8 +162,8 @@ namespace cv { namespace gpu { namespace device
         {
             return a < b;
         }
-        __device__ __forceinline__ less(const less& other):binary_function<T,T,bool>(){}
-        __device__ __forceinline__ less():binary_function<T,T,bool>(){}
+        __host__ __device__ __forceinline__ less() {}
+        __host__ __device__ __forceinline__ less(const less&) {}
     };
 
     template <typename T> struct greater_equal : binary_function<T, T, bool>
@@ -173,8 +173,8 @@ namespace cv { namespace gpu { namespace device
         {
             return a >= b;
         }
-        __device__ __forceinline__ greater_equal(const greater_equal& other):binary_function<T,T,bool>(){}
-        __device__ __forceinline__ greater_equal():binary_function<T,T,bool>(){}
+        __host__ __device__ __forceinline__ greater_equal() {}
+        __host__ __device__ __forceinline__ greater_equal(const greater_equal&) {}
     };
 
     template <typename T> struct less_equal : binary_function<T, T, bool>
@@ -184,8 +184,8 @@ namespace cv { namespace gpu { namespace device
         {
             return a <= b;
         }
-        __device__ __forceinline__ less_equal(const less_equal& other):binary_function<T,T,bool>(){}
-        __device__ __forceinline__ less_equal():binary_function<T,T,bool>(){}
+        __host__ __device__ __forceinline__ less_equal() {}
+        __host__ __device__ __forceinline__ less_equal(const less_equal&) {}
     };
 
     // Logical Operations
@@ -196,8 +196,8 @@ namespace cv { namespace gpu { namespace device
         {
             return a && b;
         }
-        __device__ __forceinline__ logical_and(const logical_and& other):binary_function<T,T,bool>(){}
-        __device__ __forceinline__ logical_and():binary_function<T,T,bool>(){}
+        __host__ __device__ __forceinline__ logical_and() {}
+        __host__ __device__ __forceinline__ logical_and(const logical_and&) {}
     };
 
     template <typename T> struct logical_or : binary_function<T, T, bool>
@@ -207,8 +207,8 @@ namespace cv { namespace gpu { namespace device
         {
             return a || b;
         }
-        __device__ __forceinline__ logical_or(const logical_or& other):binary_function<T,T,bool>(){}
-        __device__ __forceinline__ logical_or():binary_function<T,T,bool>(){}
+        __host__ __device__ __forceinline__ logical_or() {}
+        __host__ __device__ __forceinline__ logical_or(const logical_or&) {}
     };
 
     template <typename T> struct logical_not : unary_function<T, bool>
@@ -217,8 +217,8 @@ namespace cv { namespace gpu { namespace device
         {
             return !a;
         }
-        __device__ __forceinline__ logical_not(const logical_not& other):unary_function<T,bool>(){}
-        __device__ __forceinline__ logical_not():unary_function<T,bool>(){}
+        __host__ __device__ __forceinline__ logical_not() {}
+        __host__ __device__ __forceinline__ logical_not(const logical_not&) {}
     };
 
     // Bitwise Operations
@@ -229,8 +229,8 @@ namespace cv { namespace gpu { namespace device
         {
             return a & b;
         }
-        __device__ __forceinline__ bit_and(const bit_and& other):binary_function<T,T,T>(){}
-        __device__ __forceinline__ bit_and():binary_function<T,T,T>(){}
+        __host__ __device__ __forceinline__ bit_and() {}
+        __host__ __device__ __forceinline__ bit_and(const bit_and&) {}
     };
 
     template <typename T> struct bit_or : binary_function<T, T, T>
@@ -240,8 +240,8 @@ namespace cv { namespace gpu { namespace device
         {
             return a | b;
         }
-        __device__ __forceinline__ bit_or(const bit_or& other):binary_function<T,T,T>(){}
-        __device__ __forceinline__ bit_or():binary_function<T,T,T>(){}
+        __host__ __device__ __forceinline__ bit_or() {}
+        __host__ __device__ __forceinline__ bit_or(const bit_or&) {}
     };
 
     template <typename T> struct bit_xor : binary_function<T, T, T>
@@ -251,8 +251,8 @@ namespace cv { namespace gpu { namespace device
         {
             return a ^ b;
         }
-        __device__ __forceinline__ bit_xor(const bit_xor& other):binary_function<T,T,T>(){}
-        __device__ __forceinline__ bit_xor():binary_function<T,T,T>(){}
+        __host__ __device__ __forceinline__ bit_xor() {}
+        __host__ __device__ __forceinline__ bit_xor(const bit_xor&) {}
     };
 
     template <typename T> struct bit_not : unary_function<T, T>
@@ -261,8 +261,8 @@ namespace cv { namespace gpu { namespace device
         {
             return ~v;
         }
-        __device__ __forceinline__ bit_not(const bit_not& other):unary_function<T,T>(){}
-        __device__ __forceinline__ bit_not():unary_function<T,T>(){}
+        __host__ __device__ __forceinline__ bit_not() {}
+        __host__ __device__ __forceinline__ bit_not(const bit_not&) {}
     };
 
     // Generalized Identity Operations
@@ -272,8 +272,8 @@ namespace cv { namespace gpu { namespace device
         {
             return x;
         }
-        __device__ __forceinline__ identity(const identity& other):unary_function<T,T>(){}
-        __device__ __forceinline__ identity():unary_function<T,T>(){}
+        __host__ __device__ __forceinline__ identity() {}
+        __host__ __device__ __forceinline__ identity(const identity&) {}
     };
 
     template <typename T1, typename T2> struct project1st : binary_function<T1, T2, T1>
@@ -282,8 +282,8 @@ namespace cv { namespace gpu { namespace device
         {
             return lhs;
         }
-        __device__ __forceinline__ project1st(const project1st& other):binary_function<T1,T2,T1>(){}
-        __device__ __forceinline__ project1st():binary_function<T1,T2,T1>(){}
+        __host__ __device__ __forceinline__ project1st() {}
+        __host__ __device__ __forceinline__ project1st(const project1st&) {}
     };
 
     template <typename T1, typename T2> struct project2nd : binary_function<T1, T2, T2>
@@ -292,8 +292,8 @@ namespace cv { namespace gpu { namespace device
         {
             return rhs;
         }
-        __device__ __forceinline__ project2nd(const project2nd& other):binary_function<T1,T2,T2>(){}
-        __device__ __forceinline__ project2nd():binary_function<T1,T2,T2>(){}
+        __host__ __device__ __forceinline__ project2nd() {}
+        __host__ __device__ __forceinline__ project2nd(const project2nd&) {}
     };
 
     // Min/Max Operations
@@ -302,8 +302,8 @@ namespace cv { namespace gpu { namespace device
     template <> struct name<type> : binary_function<type, type, type> \
     { \
         __device__ __forceinline__ type operator()(type lhs, type rhs) const {return op(lhs, rhs);} \
-        __device__ __forceinline__ name() {}\
-        __device__ __forceinline__ name(const name&) {}\
+        __host__ __device__ __forceinline__ name() {}\
+        __host__ __device__ __forceinline__ name(const name&) {}\
     };
 
     template <typename T> struct maximum : binary_function<T, T, T>
@@ -312,8 +312,8 @@ namespace cv { namespace gpu { namespace device
         {
             return max(lhs, rhs);
         }
-        __device__ __forceinline__ maximum() {}
-        __device__ __forceinline__ maximum(const maximum&) {}
+        __host__ __device__ __forceinline__ maximum() {}
+        __host__ __device__ __forceinline__ maximum(const maximum&) {}
     };
 
     OPENCV_GPU_IMPLEMENT_MINMAX(maximum, uchar, ::max)
@@ -332,8 +332,8 @@ namespace cv { namespace gpu { namespace device
         {
             return min(lhs, rhs);
         }
-        __device__ __forceinline__ minimum() {}
-        __device__ __forceinline__ minimum(const minimum&) {}
+        __host__ __device__ __forceinline__ minimum() {}
+        __host__ __device__ __forceinline__ minimum(const minimum&) {}
     };
 
     OPENCV_GPU_IMPLEMENT_MINMAX(minimum, uchar, ::min)
@@ -349,7 +349,6 @@ namespace cv { namespace gpu { namespace device
 #undef OPENCV_GPU_IMPLEMENT_MINMAX
 
     // Math functions
-///bound=========================================
 
     template <typename T> struct abs_func : unary_function<T, T>
     {
@@ -358,8 +357,8 @@ namespace cv { namespace gpu { namespace device
             return abs(x);
         }
 
-        __device__ __forceinline__ abs_func() {}
-        __device__ __forceinline__ abs_func(const abs_func&) {}
+        __host__ __device__ __forceinline__ abs_func() {}
+        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
     };
     template <> struct abs_func<unsigned char> : unary_function<unsigned char, unsigned char>
     {
@@ -368,8 +367,8 @@ namespace cv { namespace gpu { namespace device
             return x;
         }
 
-        __device__ __forceinline__ abs_func() {}
-        __device__ __forceinline__ abs_func(const abs_func&) {}
+        __host__ __device__ __forceinline__ abs_func() {}
+        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
     };
     template <> struct abs_func<signed char> : unary_function<signed char, signed char>
     {
@@ -378,8 +377,8 @@ namespace cv { namespace gpu { namespace device
             return ::abs((int)x);
         }
 
-        __device__ __forceinline__ abs_func() {}
-        __device__ __forceinline__ abs_func(const abs_func&) {}
+        __host__ __device__ __forceinline__ abs_func() {}
+        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
     };
     template <> struct abs_func<char> : unary_function<char, char>
     {
@@ -388,8 +387,8 @@ namespace cv { namespace gpu { namespace device
             return ::abs((int)x);
         }
 
-        __device__ __forceinline__ abs_func() {}
-        __device__ __forceinline__ abs_func(const abs_func&) {}
+        __host__ __device__ __forceinline__ abs_func() {}
+        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
     };
     template <> struct abs_func<unsigned short> : unary_function<unsigned short, unsigned short>
     {
@@ -398,8 +397,8 @@ namespace cv { namespace gpu { namespace device
             return x;
         }
 
-        __device__ __forceinline__ abs_func() {}
-        __device__ __forceinline__ abs_func(const abs_func&) {}
+        __host__ __device__ __forceinline__ abs_func() {}
+        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
     };
     template <> struct abs_func<short> : unary_function<short, short>
     {
@@ -408,8 +407,8 @@ namespace cv { namespace gpu { namespace device
             return ::abs((int)x);
         }
 
-        __device__ __forceinline__ abs_func() {}
-        __device__ __forceinline__ abs_func(const abs_func&) {}
+        __host__ __device__ __forceinline__ abs_func() {}
+        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
     };
     template <> struct abs_func<unsigned int> : unary_function<unsigned int, unsigned int>
     {
@@ -418,8 +417,8 @@ namespace cv { namespace gpu { namespace device
             return x;
         }
 
-        __device__ __forceinline__ abs_func() {}
-        __device__ __forceinline__ abs_func(const abs_func&) {}
+        __host__ __device__ __forceinline__ abs_func() {}
+        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
     };
     template <> struct abs_func<int> : unary_function<int, int>
     {
@@ -428,8 +427,8 @@ namespace cv { namespace gpu { namespace device
             return ::abs(x);
         }
 
-        __device__ __forceinline__ abs_func() {}
-        __device__ __forceinline__ abs_func(const abs_func&) {}
+        __host__ __device__ __forceinline__ abs_func() {}
+        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
     };
     template <> struct abs_func<float> : unary_function<float, float>
     {
@@ -438,8 +437,8 @@ namespace cv { namespace gpu { namespace device
             return ::fabsf(x);
         }
 
-        __device__ __forceinline__ abs_func() {}
-        __device__ __forceinline__ abs_func(const abs_func&) {}
+        __host__ __device__ __forceinline__ abs_func() {}
+        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
     };
     template <> struct abs_func<double> : unary_function<double, double>
     {
@@ -448,8 +447,8 @@ namespace cv { namespace gpu { namespace device
             return ::fabs(x);
         }
 
-        __device__ __forceinline__ abs_func() {}
-        __device__ __forceinline__ abs_func(const abs_func&) {}
+        __host__ __device__ __forceinline__ abs_func() {}
+        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
     };
 
 #define OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(name, func) \
@@ -459,8 +458,8 @@ namespace cv { namespace gpu { namespace device
         { \
             return func ## f(v); \
         } \
-        __device__ __forceinline__ name ## _func() {} \
-        __device__ __forceinline__ name ## _func(const name ## _func&) {} \
+        __host__ __device__ __forceinline__ name ## _func() {} \
+        __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
     }; \
     template <> struct name ## _func<double> : unary_function<double, double> \
     { \
@@ -468,8 +467,8 @@ namespace cv { namespace gpu { namespace device
         { \
             return func(v); \
         } \
-        __device__ __forceinline__ name ## _func() {} \
-        __device__ __forceinline__ name ## _func(const name ## _func&) {} \
+        __host__ __device__ __forceinline__ name ## _func() {} \
+        __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
     };
 
 #define OPENCV_GPU_IMPLEMENT_BIN_FUNCTOR(name, func) \
@@ -479,6 +478,8 @@ namespace cv { namespace gpu { namespace device
         { \
             return func ## f(v1, v2); \
         } \
+        __host__ __device__ __forceinline__ name ## _func() {} \
+        __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
     }; \
     template <> struct name ## _func<double> : binary_function<double, double, double> \
     { \
@@ -486,6 +487,8 @@ namespace cv { namespace gpu { namespace device
         { \
             return func(v1, v2); \
         } \
+        __host__ __device__ __forceinline__ name ## _func() {} \
+        __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
     };
 
     OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(sqrt, ::sqrt)
@@ -522,8 +525,8 @@ namespace cv { namespace gpu { namespace device
         {
             return src1 * src1 + src2 * src2;
         }
-        __device__ __forceinline__ hypot_sqr_func(const hypot_sqr_func& other) : binary_function<T, T, float>(){}
-        __device__ __forceinline__ hypot_sqr_func() : binary_function<T, T, float>(){}
+        __host__ __device__ __forceinline__ hypot_sqr_func() {}
+        __host__ __device__ __forceinline__ hypot_sqr_func(const hypot_sqr_func&) {}
     };
 
     // Saturate Cast Functor
@@ -533,8 +536,8 @@ namespace cv { namespace gpu { namespace device
         {
             return saturate_cast<D>(v);
         }
-        __device__ __forceinline__ saturate_cast_func(const saturate_cast_func& other):unary_function<T, D>(){}
-        __device__ __forceinline__ saturate_cast_func():unary_function<T, D>(){}
+        __host__ __device__ __forceinline__ saturate_cast_func() {}
+        __host__ __device__ __forceinline__ saturate_cast_func(const saturate_cast_func&) {}
     };
 
     // Threshold Functors
@@ -547,10 +550,9 @@ namespace cv { namespace gpu { namespace device
             return (src > thresh) * maxVal;
         }
 
-        __device__ __forceinline__ thresh_binary_func(const thresh_binary_func& other)
-            : unary_function<T, T>(), thresh(other.thresh), maxVal(other.maxVal){}
-
-        __device__ __forceinline__ thresh_binary_func():unary_function<T, T>(){}
+        __host__ __device__ __forceinline__ thresh_binary_func() {}
+        __host__ __device__ __forceinline__ thresh_binary_func(const thresh_binary_func& other)
+            : thresh(other.thresh), maxVal(other.maxVal) {}
 
         const T thresh;
         const T maxVal;
@@ -565,10 +567,9 @@ namespace cv { namespace gpu { namespace device
             return (src <= thresh) * maxVal;
         }
 
-        __device__ __forceinline__ thresh_binary_inv_func(const thresh_binary_inv_func& other)
-            : unary_function<T, T>(), thresh(other.thresh), maxVal(other.maxVal){}
-
-        __device__ __forceinline__ thresh_binary_inv_func():unary_function<T, T>(){}
+        __host__ __device__ __forceinline__ thresh_binary_inv_func() {}
+        __host__ __device__ __forceinline__ thresh_binary_inv_func(const thresh_binary_inv_func& other)
+            : thresh(other.thresh), maxVal(other.maxVal) {}
 
         const T thresh;
         const T maxVal;
@@ -583,10 +584,9 @@ namespace cv { namespace gpu { namespace device
             return minimum<T>()(src, thresh);
         }
 
-        __device__ __forceinline__ thresh_trunc_func(const thresh_trunc_func& other)
-            : unary_function<T, T>(), thresh(other.thresh){}
-
-        __device__ __forceinline__ thresh_trunc_func():unary_function<T, T>(){}
+        __host__ __device__ __forceinline__ thresh_trunc_func() {}
+        __host__ __device__ __forceinline__ thresh_trunc_func(const thresh_trunc_func& other)
+            : thresh(other.thresh) {}
 
         const T thresh;
     };
@@ -599,10 +599,10 @@ namespace cv { namespace gpu { namespace device
         {
             return (src > thresh) * src;
         }
-        __device__ __forceinline__ thresh_to_zero_func(const thresh_to_zero_func& other)
-            : unary_function<T, T>(), thresh(other.thresh){}
 
-        __device__ __forceinline__ thresh_to_zero_func():unary_function<T, T>(){}
+        __host__ __device__ __forceinline__ thresh_to_zero_func() {}
+       __host__  __device__ __forceinline__ thresh_to_zero_func(const thresh_to_zero_func& other)
+            : thresh(other.thresh) {}
 
         const T thresh;
     };
@@ -615,14 +615,14 @@ namespace cv { namespace gpu { namespace device
         {
             return (src <= thresh) * src;
         }
-        __device__ __forceinline__ thresh_to_zero_inv_func(const thresh_to_zero_inv_func& other)
-            : unary_function<T, T>(), thresh(other.thresh){}
 
-        __device__ __forceinline__ thresh_to_zero_inv_func():unary_function<T, T>(){}
+        __host__ __device__ __forceinline__ thresh_to_zero_inv_func() {}
+        __host__ __device__ __forceinline__ thresh_to_zero_inv_func(const thresh_to_zero_inv_func& other)
+            : thresh(other.thresh) {}
 
         const T thresh;
     };
-//bound!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ============>
+
     // Function Object Adaptors
     template <typename Predicate> struct unary_negate : unary_function<typename Predicate::argument_type, bool>
     {
@@ -633,8 +633,8 @@ namespace cv { namespace gpu { namespace device
           return !pred(x);
       }
 
-        __device__ __forceinline__ unary_negate(const unary_negate& other) : unary_function<typename Predicate::argument_type, bool>(){}
-        __device__ __forceinline__ unary_negate() : unary_function<typename Predicate::argument_type, bool>(){}
+      __host__ __device__ __forceinline__ unary_negate() {}
+      __host__ __device__ __forceinline__ unary_negate(const unary_negate& other) : pred(other.pred) {}
 
       const Predicate pred;
     };
@@ -653,11 +653,9 @@ namespace cv { namespace gpu { namespace device
         {
             return !pred(x,y);
         }
-        __device__ __forceinline__ binary_negate(const binary_negate& other)
-        : binary_function<typename Predicate::first_argument_type, typename Predicate::second_argument_type, bool>(){}
 
-        __device__ __forceinline__ binary_negate() :
-        binary_function<typename Predicate::first_argument_type, typename Predicate::second_argument_type, bool>(){}
+        __host__ __device__ __forceinline__ binary_negate() {}
+        __host__ __device__ __forceinline__ binary_negate(const binary_negate& other) : pred(other.pred) {}
 
         const Predicate pred;
     };
@@ -676,8 +674,8 @@ namespace cv { namespace gpu { namespace device
             return op(arg1, a);
         }
 
-        __device__ __forceinline__ binder1st(const binder1st& other) :
-        unary_function<typename Op::second_argument_type, typename Op::result_type>(){}
+        __host__ __device__ __forceinline__ binder1st() {}
+        __host__ __device__ __forceinline__ binder1st(const binder1st& other) : op(other.op), arg1(other.arg1) {}
 
         const Op op;
         const typename Op::first_argument_type arg1;
@@ -697,8 +695,8 @@ namespace cv { namespace gpu { namespace device
             return op(a, arg2);
         }
 
-         __device__ __forceinline__ binder2nd(const binder2nd& other) :
-        unary_function<typename Op::first_argument_type, typename Op::result_type>(), op(other.op), arg2(other.arg2){}
+        __host__ __device__ __forceinline__ binder2nd() {}
+        __host__ __device__ __forceinline__ binder2nd(const binder2nd& other) : op(other.op), arg2(other.arg2) {}
 
         const Op op;
         const typename Op::second_argument_type arg2;
index 83eaaa2..85e81ac 100644 (file)
@@ -124,8 +124,8 @@ namespace cv { namespace gpu { namespace device
 
     struct WithOutMask
     {
-        __device__ __forceinline__ WithOutMask(){}
-        __device__ __forceinline__ WithOutMask(const WithOutMask& mask){}
+        __host__ __device__ __forceinline__ WithOutMask(){}
+        __host__ __device__ __forceinline__ WithOutMask(const WithOutMask&){}
 
         __device__ __forceinline__ void next() const
         {
index 1c46dc0..a6cb43a 100644 (file)
 #ifndef __OPENCV_GPU_VECMATH_HPP__
 #define __OPENCV_GPU_VECMATH_HPP__
 
-#include "saturate_cast.hpp"
 #include "vec_traits.hpp"
-#include "functional.hpp"
+#include "saturate_cast.hpp"
 
 namespace cv { namespace gpu { namespace device
 {
-    namespace vec_math_detail
+
+// saturate_cast
+
+namespace vec_math_detail
+{
+    template <int cn, typename VecD> struct SatCastHelper;
+    template <typename VecD> struct SatCastHelper<1, VecD>
     {
-        template <int cn, typename VecD> struct SatCastHelper;
-        template <typename VecD> struct SatCastHelper<1, VecD>
+        template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
         {
-            template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
-            {
-                typedef typename VecTraits<VecD>::elem_type D;
-                return VecTraits<VecD>::make(saturate_cast<D>(v.x));
-            }
-        };
-        template <typename VecD> struct SatCastHelper<2, VecD>
-        {
-            template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
-            {
-                typedef typename VecTraits<VecD>::elem_type D;
-                return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y));
-            }
-        };
-        template <typename VecD> struct SatCastHelper<3, VecD>
+            typedef typename VecTraits<VecD>::elem_type D;
+            return VecTraits<VecD>::make(saturate_cast<D>(v.x));
+        }
+    };
+    template <typename VecD> struct SatCastHelper<2, VecD>
+    {
+        template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
         {
-            template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
-            {
-                typedef typename VecTraits<VecD>::elem_type D;
-                return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z));
-            }
-        };
-        template <typename VecD> struct SatCastHelper<4, VecD>
+            typedef typename VecTraits<VecD>::elem_type D;
+            return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y));
+        }
+    };
+    template <typename VecD> struct SatCastHelper<3, VecD>
+    {
+        template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
         {
-            template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
-            {
-                typedef typename VecTraits<VecD>::elem_type D;
-                return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z), saturate_cast<D>(v.w));
-            }
-        };
-
-        template <typename VecD, typename VecS> static __device__ __forceinline__ VecD saturate_cast_caller(const VecS& v)
+            typedef typename VecTraits<VecD>::elem_type D;
+            return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z));
+        }
+    };
+    template <typename VecD> struct SatCastHelper<4, VecD>
+    {
+        template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
         {
-            return SatCastHelper<VecTraits<VecD>::cn, VecD>::cast(v);
+            typedef typename VecTraits<VecD>::elem_type D;
+            return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z), saturate_cast<D>(v.w));
         }
+    };
+
+    template <typename VecD, typename VecS> static __device__ __forceinline__ VecD saturate_cast_helper(const VecS& v)
+    {
+        return SatCastHelper<VecTraits<VecD>::cn, VecD>::cast(v);
     }
+}
+
+template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const char1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const short1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const uint1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const int1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const float1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const double1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+
+template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const char2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const short2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const uint2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const int2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const float2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const double2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+
+template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const char3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const short3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const uint3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const int3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const float3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const double3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+
+template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const char4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const short4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const uint4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const int4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const float4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const double4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 
-    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uchar1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
-    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const char1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
-    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const ushort1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
-    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const short1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
-    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uint1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
-    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const int1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
-    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const float1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
-    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const double1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
-
-    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uchar2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
-    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const char2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
-    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const ushort2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
-    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const short2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
-    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uint2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
-    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const int2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
-    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const float2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
-    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const double2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
-
-    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uchar3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
-    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const char3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
-    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const ushort3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
-    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const short3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
-    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uint3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
-    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const int3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
-    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const float3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
-    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const double3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
-
-    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uchar4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
-    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const char4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
-    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const ushort4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
-    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const short4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
-    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uint4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
-    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const int4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
-    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const float4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
-    template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const double4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
-
-#define OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, op, func) \
-    __device__ __forceinline__ TypeVec<func<type>::result_type, 1>::vec_type op(const type ## 1 & a) \
-    { \
-        func<type> f; \
-        return VecTraits<TypeVec<func<type>::result_type, 1>::vec_type>::make(f(a.x)); \
+// unary operators
+
+#define CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(op, input_type, output_type) \
+    __device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a) \
+    { \
+        return VecTraits<output_type ## 1>::make(op (a.x)); \
     } \
-    __device__ __forceinline__ TypeVec<func<type>::result_type, 2>::vec_type op(const type ## 2 & a) \
+    __device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a) \
     { \
-        func<type> f; \
-        return VecTraits<TypeVec<func<type>::result_type, 2>::vec_type>::make(f(a.x), f(a.y)); \
+        return VecTraits<output_type ## 2>::make(op (a.x), op (a.y)); \
     } \
-    __device__ __forceinline__ TypeVec<func<type>::result_type, 3>::vec_type op(const type ## 3 & a) \
+    __device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a) \
     { \
-        func<type> f; \
-        return VecTraits<TypeVec<func<type>::result_type, 3>::vec_type>::make(f(a.x), f(a.y), f(a.z)); \
+        return VecTraits<output_type ## 3>::make(op (a.x), op (a.y), op (a.z)); \
     } \
-    __device__ __forceinline__ TypeVec<func<type>::result_type, 4>::vec_type op(const type ## 4 & a) \
+    __device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a) \
     { \
-        func<type> f; \
-        return VecTraits<TypeVec<func<type>::result_type, 4>::vec_type>::make(f(a.x), f(a.y), f(a.z), f(a.w)); \
+        return VecTraits<output_type ## 4>::make(op (a.x), op (a.y), op (a.z), op (a.w)); \
     }
 
-    namespace vec_math_detail
-    {
-        template <typename T1, typename T2> struct BinOpTraits
-        {
-            typedef int argument_type;
-        };
-        template <typename T> struct BinOpTraits<T, T>
-        {
-            typedef T argument_type;
-        };
-        template <typename T> struct BinOpTraits<T, double>
-        {
-            typedef double argument_type;
-        };
-        template <typename T> struct BinOpTraits<double, T>
-        {
-            typedef double argument_type;
-        };
-        template <> struct BinOpTraits<double, double>
-        {
-            typedef double argument_type;
-        };
-        template <typename T> struct BinOpTraits<T, float>
-        {
-            typedef float argument_type;
-        };
-        template <typename T> struct BinOpTraits<float, T>
-        {
-            typedef float argument_type;
-        };
-        template <> struct BinOpTraits<float, float>
-        {
-            typedef float argument_type;
-        };
-        template <> struct BinOpTraits<double, float>
-        {
-            typedef double argument_type;
-        };
-        template <> struct BinOpTraits<float, double>
-        {
-            typedef double argument_type;
-        };
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, char, char)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, short, short)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, int, int)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, char, uchar)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, ushort, uchar)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, short, uchar)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, int, uchar)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, uint, uchar)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, float, uchar)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, double, uchar)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, char, char)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, ushort, ushort)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, short, short)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, int, int)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, uint, uint)
+
+#undef CV_CUDEV_IMPLEMENT_VEC_UNARY_OP
+
+// unary functions
+
+#define CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(func_name, func, input_type, output_type) \
+    __device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a) \
+    { \
+        return VecTraits<output_type ## 1>::make(func (a.x)); \
+    } \
+    __device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a) \
+    { \
+        return VecTraits<output_type ## 2>::make(func (a.x), func (a.y)); \
+    } \
+    __device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a) \
+    { \
+        return VecTraits<output_type ## 3>::make(func (a.x), func (a.y), func (a.z)); \
+    } \
+    __device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a) \
+    { \
+        return VecTraits<output_type ## 4>::make(func (a.x), func (a.y), func (a.z), func (a.w)); \
     }
 
-#define OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, op, func) \
-    __device__ __forceinline__ TypeVec<func<type>::result_type, 1>::vec_type op(const type ## 1 & a, const type ## 1 & b) \
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, char, char)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, ushort, ushort)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, short, short)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, int, int)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, uint, uint)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::fabsf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::fabs, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrt, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::exp, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::log, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sin, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cos, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tan, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asin, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acos, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atan, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinh, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::cosh, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanh, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinh, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acosh, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanh, double, double)
+
+#undef CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC
+
+// binary operators (vec & vec)
+
+#define CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(op, input_type, output_type) \
+    __device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a, const input_type ## 1 & b) \
     { \
-        func<type> f; \
-        return VecTraits<TypeVec<func<type>::result_type, 1>::vec_type>::make(f(a.x, b.x)); \
+        return VecTraits<output_type ## 1>::make(a.x op b.x); \
     } \
-    template <typename T> \
-    __device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type op(const type ## 1 & v, T s) \
+    __device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a, const input_type ## 2 & b) \
     { \
-        func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
-        return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type>::make(f(v.x, s)); \
+        return VecTraits<output_type ## 2>::make(a.x op b.x, a.y op b.y); \
     } \
-    template <typename T> \
-    __device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type op(T s, const type ## 1 & v) \
+    __device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a, const input_type ## 3 & b) \
     { \
-        func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
-        return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type>::make(f(s, v.x)); \
+        return VecTraits<output_type ## 3>::make(a.x op b.x, a.y op b.y, a.z op b.z); \
     } \
-    __device__ __forceinline__ TypeVec<func<type>::result_type, 2>::vec_type op(const type ## 2 & a, const type ## 2 & b) \
+    __device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a, const input_type ## 4 & b) \
     { \
-        func<type> f; \
-        return VecTraits<TypeVec<func<type>::result_type, 2>::vec_type>::make(f(a.x, b.x), f(a.y, b.y)); \
+        return VecTraits<output_type ## 4>::make(a.x op b.x, a.y op b.y, a.z op b.z, a.w op b.w); \
+    }
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, uchar, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, char, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, ushort, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, short, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, int, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, uint, uint)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, float, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, uchar, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, char, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, ushort, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, short, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, int, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, uint, uint)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, float, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, uchar, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, char, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, ushort, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, short, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, int, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, uint, uint)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, float, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, uchar, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, char, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, ushort, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, short, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, int, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, uint, uint)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, float, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, char, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, ushort, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, short, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, int, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, uint, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, float, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, double, uchar)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, char, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, ushort, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, short, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, int, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, uint, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, float, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, double, uchar)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, char, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, ushort, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, short, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, int, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, uint, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, float, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, double, uchar)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, char, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, ushort, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, short, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, int, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, uint, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, float, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, double, uchar)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, char, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, ushort, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, short, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, int, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, uint, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, float, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, double, uchar)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, char, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, ushort, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, short, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, int, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, uint, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, float, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, double, uchar)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, char, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, ushort, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, short, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, int, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, uint, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, float, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, double, uchar)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, char, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, ushort, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, short, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, int, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, uint, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, float, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, double, uchar)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, char, char)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, ushort, ushort)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, short, short)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, int, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uint, uint)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, char, char)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, ushort, ushort)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, short, short)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, int, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uint, uint)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, char, char)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, ushort, ushort)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, short, short)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, int, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uint, uint)
+
+#undef CV_CUDEV_IMPLEMENT_VEC_BINARY_OP
+
+// binary operators (vec & scalar)
+
+#define CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(op, input_type, scalar_type, output_type) \
+    __device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a, scalar_type s) \
+    { \
+        return VecTraits<output_type ## 1>::make(a.x op s); \
+    } \
+    __device__ __forceinline__ output_type ## 1 operator op(scalar_type s, const input_type ## 1 & b) \
+    { \
+        return VecTraits<output_type ## 1>::make(s op b.x); \
+    } \
+    __device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a, scalar_type s) \
+    { \
+        return VecTraits<output_type ## 2>::make(a.x op s, a.y op s); \
+    } \
+    __device__ __forceinline__ output_type ## 2 operator op(scalar_type s, const input_type ## 2 & b) \
+    { \
+        return VecTraits<output_type ## 2>::make(s op b.x, s op b.y); \
+    } \
+    __device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a, scalar_type s) \
+    { \
+        return VecTraits<output_type ## 3>::make(a.x op s, a.y op s, a.z op s); \
+    } \
+    __device__ __forceinline__ output_type ## 3 operator op(scalar_type s, const input_type ## 3 & b) \
+    { \
+        return VecTraits<output_type ## 3>::make(s op b.x, s op b.y, s op b.z); \
+    } \
+    __device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a, scalar_type s) \
+    { \
+        return VecTraits<output_type ## 4>::make(a.x op s, a.y op s, a.z op s, a.w op s); \
+    } \
+    __device__ __forceinline__ output_type ## 4 operator op(scalar_type s, const input_type ## 4 & b) \
+    { \
+        return VecTraits<output_type ## 4>::make(s op b.x, s op b.y, s op b.z, s op b.w); \
+    }
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, uint, uint)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, float, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, float, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, double, double, double)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, uint, uint)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, float, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, float, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, double, double, double)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, uint, uint)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, float, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, float, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, double, double, double)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, uint, uint)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, float, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, float, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, double, double, double)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, char, char, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, ushort, ushort, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, short, short, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, int, int, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, uint, uint, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, float, float, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, double, double, uchar)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, char, char, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, ushort, ushort, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, short, short, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, int, int, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, uint, uint, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, float, float, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, double, double, uchar)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, char, char, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, ushort, ushort, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, short, short, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, int, int, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, uint, uint, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, float, float, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, double, double, uchar)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, char, char, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, ushort, ushort, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, short, short, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, int, int, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, uint, uint, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, float, float, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, double, double, uchar)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, char, char, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, ushort, ushort, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, short, short, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, int, int, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, uint, uint, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, float, float, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, double, double, uchar)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, char, char, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, ushort, ushort, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, short, short, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, int, int, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, uint, uint, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, float, float, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, double, double, uchar)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, char, char, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, ushort, ushort, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, short, short, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, int, int, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, uint, uint, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, float, float, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, double, double, uchar)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, char, char, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, ushort, ushort, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, short, short, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, int, int, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, uint, uint, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, float, float, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, double, double, uchar)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, char, char, char)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, ushort, ushort, ushort)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, short, short, short)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, int, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, uint, uint, uint)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, char, char, char)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, ushort, ushort, ushort)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, short, short, short)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, int, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, uint, uint, uint)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, char, char, char)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, ushort, ushort, ushort)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, short, short, short)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, int, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, uint, uint, uint)
+
+#undef CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP
+
+// binary function (vec & vec)
+
+#define CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(func_name, func, input_type, output_type) \
+    __device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a, const input_type ## 1 & b) \
+    { \
+        return VecTraits<output_type ## 1>::make(func (a.x, b.x)); \
+    } \
+    __device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a, const input_type ## 2 & b) \
+    { \
+        return VecTraits<output_type ## 2>::make(func (a.x, b.x), func (a.y, b.y)); \
     } \
-    template <typename T> \
-    __device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type op(const type ## 2 & v, T s) \
+    __device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a, const input_type ## 3 & b) \
     { \
-        func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
-        return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type>::make(f(v.x, s), f(v.y, s)); \
+        return VecTraits<output_type ## 3>::make(func (a.x, b.x), func (a.y, b.y), func (a.z, b.z)); \
     } \
-    template <typename T> \
-    __device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type op(T s, const type ## 2 & v) \
+    __device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a, const input_type ## 4 & b) \
     { \
-        func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
-        return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type>::make(f(s, v.x), f(s, v.y)); \
+        return VecTraits<output_type ## 4>::make(func (a.x, b.x), func (a.y, b.y), func (a.z, b.z), func (a.w, b.w)); \
+    }
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, char, char)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, ushort, ushort)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, short, short)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, uint, uint)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, int, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::fmaxf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::fmax, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, char, char)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, ushort, ushort)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, short, short)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, uint, uint)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, int, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::fminf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::fmin, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypot, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, char, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, short, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, int, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, float, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2, double, double)
+
+#undef CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC
+
+// binary function (vec & scalar)
+
+#define CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(func_name, func, input_type, scalar_type, output_type) \
+    __device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a, scalar_type s) \
+    { \
+        return VecTraits<output_type ## 1>::make(func ((output_type) a.x, (output_type) s)); \
+    } \
+    __device__ __forceinline__ output_type ## 1 func_name(scalar_type s, const input_type ## 1 & b) \
+    { \
+        return VecTraits<output_type ## 1>::make(func ((output_type) s, (output_type) b.x)); \
     } \
-    __device__ __forceinline__ TypeVec<func<type>::result_type, 3>::vec_type op(const type ## 3 & a, const type ## 3 & b) \
+    __device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a, scalar_type s) \
     { \
-        func<type> f; \
-        return VecTraits<TypeVec<func<type>::result_type, 3>::vec_type>::make(f(a.x, b.x), f(a.y, b.y), f(a.z, b.z)); \
+        return VecTraits<output_type ## 2>::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s)); \
     } \
-    template <typename T> \
-    __device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type op(const type ## 3 & v, T s) \
+    __device__ __forceinline__ output_type ## 2 func_name(scalar_type s, const input_type ## 2 & b) \
     { \
-        func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
-        return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type>::make(f(v.x, s), f(v.y, s), f(v.z, s)); \
+        return VecTraits<output_type ## 2>::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y)); \
     } \
-    template <typename T> \
-    __device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type op(T s, const type ## 3 & v) \
+    __device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a, scalar_type s) \
     { \
-        func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
-        return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type>::make(f(s, v.x), f(s, v.y), f(s, v.z)); \
+        return VecTraits<output_type ## 3>::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s), func ((output_type) a.z, (output_type) s)); \
     } \
-    __device__ __forceinline__ TypeVec<func<type>::result_type, 4>::vec_type op(const type ## 4 & a, const type ## 4 & b) \
+    __device__ __forceinline__ output_type ## 3 func_name(scalar_type s, const input_type ## 3 & b) \
     { \
-        func<type> f; \
-        return VecTraits<TypeVec<func<type>::result_type, 4>::vec_type>::make(f(a.x, b.x), f(a.y, b.y), f(a.z, b.z), f(a.w, b.w)); \
+        return VecTraits<output_type ## 3>::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y), func ((output_type) s, (output_type) b.z)); \
     } \
-    template <typename T> \
-    __device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type op(const type ## 4 & v, T s) \
+    __device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a, scalar_type s) \
     { \
-        func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
-        return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type>::make(f(v.x, s), f(v.y, s), f(v.z, s), f(v.w, s)); \
+        return VecTraits<output_type ## 4>::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s), func ((output_type) a.z, (output_type) s), func ((output_type) a.w, (output_type) s)); \
     } \
-    template <typename T> \
-    __device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type op(T s, const type ## 4 & v) \
+    __device__ __forceinline__ output_type ## 4 func_name(scalar_type s, const input_type ## 4 & b) \
     { \
-        func<typename vec_math_detail::BinOpTraits<T, type>::argument_type> f; \
-        return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type>::make(f(s, v.x), f(s, v.y), f(s, v.z), f(s, v.w)); \
+        return VecTraits<output_type ## 4>::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y), func ((output_type) s, (output_type) b.z), func ((output_type) s, (output_type) b.w)); \
     }
 
-#define OPENCV_GPU_IMPLEMENT_VEC_OP(type) \
-    OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator +, plus) \
-    OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator -, minus) \
-    OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator *, multiplies) \
-    OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator /, divides) \
-    OPENCV_GPU_IMPLEMENT_VEC_UNOP (type, operator -, negate) \
-    OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator ==, equal_to) \
-    OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator !=, not_equal_to) \
-    OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator > , greater) \
-    OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator < , less) \
-    OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator >=, greater_equal) \
-    OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator <=, less_equal) \
-    OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator &&, logical_and) \
-    OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator ||, logical_or) \
-    OPENCV_GPU_IMPLEMENT_VEC_UNOP (type, operator ! , logical_not) \
-    OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, max, maximum) \
-    OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, min, minimum) \
-    OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, abs, abs_func) \
-    OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, sqrt, sqrt_func) \
-    OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, exp, exp_func) \
-    OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, exp2, exp2_func) \
-    OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, exp10, exp10_func) \
-    OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, log, log_func) \
-    OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, log2, log2_func) \
-    OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, log10, log10_func) \
-    OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, sin, sin_func) \
-    OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, cos, cos_func) \
-    OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, tan, tan_func) \
-    OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, asin, asin_func) \
-    OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, acos, acos_func) \
-    OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, atan, atan_func) \
-    OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, sinh, sinh_func) \
-    OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, cosh, cosh_func) \
-    OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, tanh, tanh_func) \
-    OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, asinh, asinh_func) \
-    OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, acosh, acosh_func) \
-    OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, atanh, atanh_func) \
-    OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, hypot, hypot_func) \
-    OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, atan2, atan2_func) \
-    OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, pow, pow_func) \
-    OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, hypot_sqr, hypot_sqr_func)
-
-#define OPENCV_GPU_IMPLEMENT_VEC_INT_OP(type) \
-    OPENCV_GPU_IMPLEMENT_VEC_OP(type) \
-    OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator &, bit_and) \
-    OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator |, bit_or) \
-    OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator ^, bit_xor) \
-    OPENCV_GPU_IMPLEMENT_VEC_UNOP (type, operator ~, bit_not)
-
-    OPENCV_GPU_IMPLEMENT_VEC_INT_OP(uchar)
-    OPENCV_GPU_IMPLEMENT_VEC_INT_OP(char)
-    OPENCV_GPU_IMPLEMENT_VEC_INT_OP(ushort)
-    OPENCV_GPU_IMPLEMENT_VEC_INT_OP(short)
-    OPENCV_GPU_IMPLEMENT_VEC_INT_OP(int)
-    OPENCV_GPU_IMPLEMENT_VEC_INT_OP(uint)
-    OPENCV_GPU_IMPLEMENT_VEC_OP(float)
-    OPENCV_GPU_IMPLEMENT_VEC_OP(double)
-
-    #undef OPENCV_GPU_IMPLEMENT_VEC_UNOP
-    #undef OPENCV_GPU_IMPLEMENT_VEC_BINOP
-    #undef OPENCV_GPU_IMPLEMENT_VEC_OP
-    #undef OPENCV_GPU_IMPLEMENT_VEC_INT_OP
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, uchar, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, uchar, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, char, char, char)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, char, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, char, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, ushort, ushort, ushort)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, ushort, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, ushort, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, short, short, short)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, short, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, short, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, uint, uint, uint)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, uint, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, uint, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, int, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, int, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, int, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, float, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, float, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, double, double, double)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, uchar, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, uchar, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, char, char, char)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, char, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, char, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, ushort, ushort, ushort)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, ushort, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, ushort, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, short, short, short)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, short, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, short, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, uint, uint, uint)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, uint, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, uint, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, int, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, int, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, int, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, float, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, float, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, double, double, double)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, uchar, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, uchar, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, char, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, char, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, ushort, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, ushort, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, short, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, short, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, uint, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, uint, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, int, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, int, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, float, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, float, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, double, double, double)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, uchar, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, uchar, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, char, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, char, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, ushort, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, ushort, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, short, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, short, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, uint, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, uint, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, int, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, int, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, float, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, float, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, double, double, double)
+
+#undef CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC
+
 }}} // namespace cv { namespace gpu { namespace device
 
 #endif // __OPENCV_GPU_VECMATH_HPP__
index 40d88aa..adfc294 100644 (file)
@@ -72,7 +72,7 @@ PERF_TEST_P(Sz_Type_KernelSz, Filters_Blur,
 
         TEST_CYCLE() cv::gpu::blur(d_src, dst, cv::Size(ksize, ksize));
 
-        GPU_SANITY_CHECK(dst);
+        GPU_SANITY_CHECK(dst, 1);
     }
     else
     {
index 1ab01a7..672d657 100644 (file)
@@ -103,7 +103,7 @@ PERF_TEST_P(ImagePair, Video_InterpolateFrames,
 
         TEST_CYCLE() cv::gpu::interpolateFrames(d_frame0, d_frame1, d_fu, d_fv, d_bu, d_bv, 0.5f, newFrame, d_buf);
 
-        GPU_SANITY_CHECK(newFrame);
+        GPU_SANITY_CHECK(newFrame, 1e-4);
     }
     else
     {
@@ -142,7 +142,7 @@ PERF_TEST_P(ImagePair, Video_CreateOpticalFlowNeedleMap,
 
         TEST_CYCLE() cv::gpu::createOpticalFlowNeedleMap(u, v, vertex, colors);
 
-        GPU_SANITY_CHECK(vertex);
+        GPU_SANITY_CHECK(vertex, 1e-6);
         GPU_SANITY_CHECK(colors);
     }
     else
@@ -219,8 +219,8 @@ PERF_TEST_P(ImagePair, Video_BroxOpticalFlow,
 
         TEST_CYCLE() d_flow(d_frame0, d_frame1, u, v);
 
-        GPU_SANITY_CHECK(u);
-        GPU_SANITY_CHECK(v);
+        GPU_SANITY_CHECK(u, 1e-1);
+        GPU_SANITY_CHECK(v, 1e-1);
     }
     else
     {
index e83213f..b84f09d 100644 (file)
@@ -151,7 +151,7 @@ namespace
     }
 
     // Computes rotation, translation pair for small subsets if the input data
-    class TransformHypothesesGenerator
+    class TransformHypothesesGenerator : public ParallelLoopBody
     {
     public:
         TransformHypothesesGenerator(const Mat& object_, const Mat& image_, const Mat& dist_coef_,
@@ -161,7 +161,7 @@ namespace
                   num_points(num_points_), subset_size(subset_size_), rot_matrices(rot_matrices_),
                   transl_vectors(transl_vectors_) {}
 
-        void operator()(const BlockedRange& range) const
+        void operator()(const Range& range) const
         {
             // Input data for generation of the current hypothesis
             vector<int> subset_indices(subset_size);
@@ -173,7 +173,7 @@ namespace
             Mat rot_mat(3, 3, CV_64F);
             Mat transl_vec(1, 3, CV_64F);
 
-            for (int iter = range.begin(); iter < range.end(); ++iter)
+            for (int iter = range.start; iter < range.end; ++iter)
             {
                 selectRandom(subset_size, num_points, subset_indices);
                 for (int i = 0; i < subset_size; ++i)
@@ -239,7 +239,7 @@ void cv::gpu::solvePnPRansac(const Mat& object, const Mat& image, const Mat& cam
     // Generate set of hypotheses using small subsets of the input data
     TransformHypothesesGenerator body(object, image_normalized, empty_dist_coef, eye_camera_mat,
                                       num_points, subset_size, rot_matrices, transl_vectors);
-    parallel_for(BlockedRange(0, num_iters), body);
+    parallel_for_(Range(0, num_iters), body);
 
     // Compute scores (i.e. number of inliers) for each hypothesis
     GpuMat d_object(object);
index 814a96b..7b95b69 100644 (file)
@@ -406,7 +406,7 @@ public:
         GpuMat dclassified(1, 1, CV_32S);
         cudaSafeCall( cudaMemcpy(dclassified.ptr(), &classified, sizeof(int), cudaMemcpyHostToDevice) );
 
-        PyrLavel level(0, 1.0f, image.size(), NxM, minObjectSize);
+        PyrLavel level(0, scaleFactor, image.size(), NxM, minObjectSize);
 
         while (level.isFeasible(maxObjectSize))
         {
index 0fd482c..f29471f 100644 (file)
@@ -67,8 +67,8 @@ namespace cv { namespace gpu { namespace device
                         crot1.x * p.x + crot1.y * p.y + crot1.z * p.z + ctransl.y,
                         crot2.x * p.x + crot2.y * p.y + crot2.z * p.z + ctransl.z);
             }
-            __device__ __forceinline__ TransformOp() {}
-            __device__ __forceinline__ TransformOp(const TransformOp&) {}
+            __host__ __device__ __forceinline__ TransformOp() {}
+            __host__ __device__ __forceinline__ TransformOp(const TransformOp&) {}
         };
 
         void call(const PtrStepSz<float3> src, const float* rot,
@@ -106,8 +106,8 @@ namespace cv { namespace gpu { namespace device
                         (cproj0.x * t.x + cproj0.y * t.y) / t.z + cproj0.z,
                         (cproj1.x * t.x + cproj1.y * t.y) / t.z + cproj1.z);
             }
-            __device__ __forceinline__ ProjectOp() {}
-            __device__ __forceinline__ ProjectOp(const ProjectOp&) {}
+            __host__ __device__ __forceinline__ ProjectOp() {}
+            __host__ __device__ __forceinline__ ProjectOp(const ProjectOp&) {}
         };
 
         void call(const PtrStepSz<float3> src, const float* rot,
index 1afcddc..aab922f 100644 (file)
@@ -62,8 +62,8 @@ namespace canny
             return ::abs(x) + ::abs(y);
         }
 
-        __device__ __forceinline__ L1() {}
-        __device__ __forceinline__ L1(const L1&) {}
+        __host__ __device__ __forceinline__ L1() {}
+        __host__ __device__ __forceinline__ L1(const L1&) {}
     };
     struct L2 : binary_function<int, int, float>
     {
@@ -72,8 +72,8 @@ namespace canny
             return ::sqrtf(x * x + y * y);
         }
 
-        __device__ __forceinline__ L2() {}
-        __device__ __forceinline__ L2(const L2&) {}
+        __host__ __device__ __forceinline__ L2() {}
+        __host__ __device__ __forceinline__ L2(const L2&) {}
     };
 }
 
@@ -470,8 +470,8 @@ namespace canny
             return (uchar)(-(e >> 1));
         }
 
-        __device__ __forceinline__ GetEdges() {}
-        __device__ __forceinline__ GetEdges(const GetEdges&) {}
+        __host__ __device__ __forceinline__ GetEdges() {}
+        __host__ __device__ __forceinline__ GetEdges(const GetEdges&) {}
     };
 }
 
index 7f3d4ae..c4d79bd 100644 (file)
@@ -153,7 +153,7 @@ namespace cv { namespace gpu { namespace device
 
             template<typename I> __device__ __forceinline__ bool operator() (const I& a, const I& b) const
             {
-                I d = a - b;
+                I d = saturate_cast<I>(a - b);
                 return lo.x <= d.x && d.x <= hi.x &&
                        lo.y <= d.y && d.y <= hi.y &&
                        lo.z <= d.z && d.z <= hi.z;
@@ -169,7 +169,7 @@ namespace cv { namespace gpu { namespace device
 
             template<typename I> __device__ __forceinline__ bool operator() (const I& a, const I& b) const
             {
-                I d = a - b;
+                I d = saturate_cast<I>(a - b);
                 return lo.x <= d.x && d.x <= hi.x &&
                        lo.y <= d.y && d.y <= hi.y &&
                        lo.z <= d.z && d.z <= hi.z &&
index e9397e5..876d4ad 100644 (file)
@@ -162,8 +162,8 @@ namespace arithm
             return vadd4(a, b);
         }
 
-        __device__ __forceinline__ VAdd4() {}
-        __device__ __forceinline__ VAdd4(const VAdd4& other) {}
+        __host__ __device__ __forceinline__ VAdd4() {}
+        __host__ __device__ __forceinline__ VAdd4(const VAdd4&) {}
     };
 
     ////////////////////////////////////
@@ -175,8 +175,8 @@ namespace arithm
             return vadd2(a, b);
         }
 
-        __device__ __forceinline__ VAdd2() {}
-        __device__ __forceinline__ VAdd2(const VAdd2& other) {}
+        __host__ __device__ __forceinline__ VAdd2() {}
+        __host__ __device__ __forceinline__ VAdd2(const VAdd2&) {}
     };
 
     ////////////////////////////////////
@@ -188,8 +188,8 @@ namespace arithm
             return saturate_cast<D>(a + b);
         }
 
-        __device__ __forceinline__ AddMat() {}
-        __device__ __forceinline__ AddMat(const AddMat& other) {}
+        __host__ __device__ __forceinline__ AddMat() {}
+        __host__ __device__ __forceinline__ AddMat(const AddMat&) {}
     };
 }
 
@@ -397,8 +397,8 @@ namespace arithm
             return vsub4(a, b);
         }
 
-        __device__ __forceinline__ VSub4() {}
-        __device__ __forceinline__ VSub4(const VSub4& other) {}
+        __host__ __device__ __forceinline__ VSub4() {}
+        __host__ __device__ __forceinline__ VSub4(const VSub4&) {}
     };
 
     ////////////////////////////////////
@@ -410,8 +410,8 @@ namespace arithm
             return vsub2(a, b);
         }
 
-        __device__ __forceinline__ VSub2() {}
-        __device__ __forceinline__ VSub2(const VSub2& other) {}
+        __host__ __device__ __forceinline__ VSub2() {}
+        __host__ __device__ __forceinline__ VSub2(const VSub2&) {}
     };
 
     ////////////////////////////////////
@@ -423,8 +423,8 @@ namespace arithm
             return saturate_cast<D>(a - b);
         }
 
-        __device__ __forceinline__ SubMat() {}
-        __device__ __forceinline__ SubMat(const SubMat& other) {}
+        __host__ __device__ __forceinline__ SubMat() {}
+        __host__ __device__ __forceinline__ SubMat(const SubMat&) {}
     };
 }
 
@@ -617,8 +617,8 @@ namespace arithm
             return res;
         }
 
-        __device__ __forceinline__ Mul_8uc4_32f() {}
-        __device__ __forceinline__ Mul_8uc4_32f(const Mul_8uc4_32f& other) {}
+        __host__ __device__ __forceinline__ Mul_8uc4_32f() {}
+        __host__ __device__ __forceinline__ Mul_8uc4_32f(const Mul_8uc4_32f&) {}
     };
 
     struct Mul_16sc4_32f : binary_function<short4, float, short4>
@@ -629,8 +629,8 @@ namespace arithm
                                saturate_cast<short>(a.z * b), saturate_cast<short>(a.w * b));
         }
 
-        __device__ __forceinline__ Mul_16sc4_32f() {}
-        __device__ __forceinline__ Mul_16sc4_32f(const Mul_16sc4_32f& other) {}
+        __host__ __device__ __forceinline__ Mul_16sc4_32f() {}
+        __host__ __device__ __forceinline__ Mul_16sc4_32f(const Mul_16sc4_32f&) {}
     };
 
     template <typename T, typename D> struct Mul : binary_function<T, T, D>
@@ -640,8 +640,8 @@ namespace arithm
             return saturate_cast<D>(a * b);
         }
 
-        __device__ __forceinline__ Mul() {}
-        __device__ __forceinline__ Mul(const Mul& other) {}
+        __host__ __device__ __forceinline__ Mul() {}
+        __host__ __device__ __forceinline__ Mul(const Mul&) {}
     };
 
     template <typename T, typename S, typename D> struct MulScale : binary_function<T, T, D>
@@ -888,8 +888,8 @@ namespace arithm
             return b != 0 ? saturate_cast<D>(a / b) : 0;
         }
 
-        __device__ __forceinline__ Div() {}
-        __device__ __forceinline__ Div(const Div& other) {}
+        __host__ __device__ __forceinline__ Div() {}
+        __host__ __device__ __forceinline__ Div(const Div&) {}
     };
     template <typename T> struct Div<T, float> : binary_function<T, T, float>
     {
@@ -898,8 +898,8 @@ namespace arithm
             return b != 0 ? static_cast<float>(a) / b : 0;
         }
 
-        __device__ __forceinline__ Div() {}
-        __device__ __forceinline__ Div(const Div& other) {}
+        __host__ __device__ __forceinline__ Div() {}
+        __host__ __device__ __forceinline__ Div(const Div&) {}
     };
     template <typename T> struct Div<T, double> : binary_function<T, T, double>
     {
@@ -908,8 +908,8 @@ namespace arithm
             return b != 0 ? static_cast<double>(a) / b : 0;
         }
 
-        __device__ __forceinline__ Div() {}
-        __device__ __forceinline__ Div(const Div& other) {}
+        __host__ __device__ __forceinline__ Div() {}
+        __host__ __device__ __forceinline__ Div(const Div&) {}
     };
 
     template <typename T, typename S, typename D> struct DivScale : binary_function<T, T, D>
@@ -1196,8 +1196,8 @@ namespace arithm
             return vabsdiff4(a, b);
         }
 
-        __device__ __forceinline__ VAbsDiff4() {}
-        __device__ __forceinline__ VAbsDiff4(const VAbsDiff4& other) {}
+        __host__ __device__ __forceinline__ VAbsDiff4() {}
+        __host__ __device__ __forceinline__ VAbsDiff4(const VAbsDiff4&) {}
     };
 
     ////////////////////////////////////
@@ -1209,8 +1209,8 @@ namespace arithm
             return vabsdiff2(a, b);
         }
 
-        __device__ __forceinline__ VAbsDiff2() {}
-        __device__ __forceinline__ VAbsDiff2(const VAbsDiff2& other) {}
+        __host__ __device__ __forceinline__ VAbsDiff2() {}
+        __host__ __device__ __forceinline__ VAbsDiff2(const VAbsDiff2&) {}
     };
 
     ////////////////////////////////////
@@ -1235,8 +1235,8 @@ namespace arithm
             return saturate_cast<T>(_abs(a - b));
         }
 
-        __device__ __forceinline__ AbsDiffMat() {}
-        __device__ __forceinline__ AbsDiffMat(const AbsDiffMat& other) {}
+        __host__ __device__ __forceinline__ AbsDiffMat() {}
+        __host__ __device__ __forceinline__ AbsDiffMat(const AbsDiffMat&) {}
     };
 }
 
@@ -1370,8 +1370,8 @@ namespace arithm
             return saturate_cast<T>(x * x);
         }
 
-        __device__ __forceinline__ Sqr() {}
-        __device__ __forceinline__ Sqr(const Sqr& other) {}
+        __host__ __device__ __forceinline__ Sqr() {}
+        __host__ __device__ __forceinline__ Sqr(const Sqr&) {}
     };
 }
 
@@ -1466,8 +1466,8 @@ namespace arithm
             return saturate_cast<T>(f(x));
         }
 
-        __device__ __forceinline__ Exp() {}
-        __device__ __forceinline__ Exp(const Exp& other) {}
+        __host__ __device__ __forceinline__ Exp() {}
+        __host__ __device__ __forceinline__ Exp(const Exp&) {}
     };
 }
 
@@ -1507,8 +1507,8 @@ namespace arithm
             return vcmpeq4(a, b);
         }
 
-        __device__ __forceinline__ VCmpEq4() {}
-        __device__ __forceinline__ VCmpEq4(const VCmpEq4& other) {}
+        __host__ __device__ __forceinline__ VCmpEq4() {}
+        __host__ __device__ __forceinline__ VCmpEq4(const VCmpEq4&) {}
     };
     struct VCmpNe4 : binary_function<uint, uint, uint>
     {
@@ -1517,8 +1517,8 @@ namespace arithm
             return vcmpne4(a, b);
         }
 
-        __device__ __forceinline__ VCmpNe4() {}
-        __device__ __forceinline__ VCmpNe4(const VCmpNe4& other) {}
+        __host__ __device__ __forceinline__ VCmpNe4() {}
+        __host__ __device__ __forceinline__ VCmpNe4(const VCmpNe4&) {}
     };
     struct VCmpLt4 : binary_function<uint, uint, uint>
     {
@@ -1527,8 +1527,8 @@ namespace arithm
             return vcmplt4(a, b);
         }
 
-        __device__ __forceinline__ VCmpLt4() {}
-        __device__ __forceinline__ VCmpLt4(const VCmpLt4& other) {}
+        __host__ __device__ __forceinline__ VCmpLt4() {}
+        __host__ __device__ __forceinline__ VCmpLt4(const VCmpLt4&) {}
     };
     struct VCmpLe4 : binary_function<uint, uint, uint>
     {
@@ -1537,8 +1537,8 @@ namespace arithm
             return vcmple4(a, b);
         }
 
-        __device__ __forceinline__ VCmpLe4() {}
-        __device__ __forceinline__ VCmpLe4(const VCmpLe4& other) {}
+        __host__ __device__ __forceinline__ VCmpLe4() {}
+        __host__ __device__ __forceinline__ VCmpLe4(const VCmpLe4&) {}
     };
 
     ////////////////////////////////////
@@ -2008,8 +2008,8 @@ namespace arithm
             return vmin4(a, b);
         }
 
-        __device__ __forceinline__ VMin4() {}
-        __device__ __forceinline__ VMin4(const VMin4& other) {}
+        __host__ __device__ __forceinline__ VMin4() {}
+        __host__ __device__ __forceinline__ VMin4(const VMin4&) {}
     };
 
     ////////////////////////////////////
@@ -2021,8 +2021,8 @@ namespace arithm
             return vmin2(a, b);
         }
 
-        __device__ __forceinline__ VMin2() {}
-        __device__ __forceinline__ VMin2(const VMin2& other) {}
+        __host__ __device__ __forceinline__ VMin2() {}
+        __host__ __device__ __forceinline__ VMin2(const VMin2&) {}
     };
 }
 
@@ -2100,8 +2100,8 @@ namespace arithm
             return vmax4(a, b);
         }
 
-        __device__ __forceinline__ VMax4() {}
-        __device__ __forceinline__ VMax4(const VMax4& other) {}
+        __host__ __device__ __forceinline__ VMax4() {}
+        __host__ __device__ __forceinline__ VMax4(const VMax4&) {}
     };
 
     ////////////////////////////////////
@@ -2113,8 +2113,8 @@ namespace arithm
             return vmax2(a, b);
         }
 
-        __device__ __forceinline__ VMax2() {}
-        __device__ __forceinline__ VMax2(const VMax2& other) {}
+        __host__ __device__ __forceinline__ VMax2() {}
+        __host__ __device__ __forceinline__ VMax2(const VMax2&) {}
     };
 }
 
index faec89b..59eba26 100644 (file)
@@ -48,6 +48,7 @@
 #include "opencv2/gpu/device/common.hpp"
 #include "opencv2/gpu/device/emulation.hpp"
 #include "opencv2/gpu/device/vec_math.hpp"
+#include "opencv2/gpu/device/functional.hpp"
 #include "opencv2/gpu/device/limits.hpp"
 #include "opencv2/gpu/device/dynamic_smem.hpp"
 
@@ -811,7 +812,7 @@ namespace cv { namespace gpu { namespace device
 
             const int ind = ::atomicAdd(r_sizes + n, 1);
             if (ind < maxSize)
-                r_table(n, ind) = p - templCenter;
+                r_table(n, ind) = saturate_cast<short2>(p - templCenter);
         }
 
         void buildRTable_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
@@ -855,7 +856,7 @@ namespace cv { namespace gpu { namespace device
 
             for (int j = 0; j < r_row_size; ++j)
             {
-                short2 c = p - r_row[j];
+                int2 c = p - r_row[j];
 
                 c.x = __float2int_rn(c.x * idp);
                 c.y = __float2int_rn(c.y * idp);
index c155aa8..7f5d5f3 100644 (file)
@@ -81,48 +81,90 @@ namespace
 
     const ErrorEntry npp_errors [] =
     {
-        error_entry( NPP_NOT_SUPPORTED_MODE_ERROR ),
-        error_entry( NPP_ROUND_MODE_NOT_SUPPORTED_ERROR ),
-        error_entry( NPP_RESIZE_NO_OPERATION_ERROR ),
-
 #if defined (_MSC_VER)
         error_entry( NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY ),
 #endif
 
+#if NPP_VERSION < 5500
         error_entry( NPP_BAD_ARG_ERROR ),
-        error_entry( NPP_LUT_NUMBER_OF_LEVELS_ERROR ),
-        error_entry( NPP_TEXTURE_BIND_ERROR ),
         error_entry( NPP_COEFF_ERROR ),
         error_entry( NPP_RECT_ERROR ),
         error_entry( NPP_QUAD_ERROR ),
-        error_entry( NPP_WRONG_INTERSECTION_ROI_ERROR ),
-        error_entry( NPP_NOT_EVEN_STEP_ERROR ),
-        error_entry( NPP_INTERPOLATION_ERROR ),
-        error_entry( NPP_RESIZE_FACTOR_ERROR ),
-        error_entry( NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR ),
         error_entry( NPP_MEMFREE_ERR ),
         error_entry( NPP_MEMSET_ERR ),
-        error_entry( NPP_MEMCPY_ERROR ),
         error_entry( NPP_MEM_ALLOC_ERR ),
         error_entry( NPP_HISTO_NUMBER_OF_LEVELS_ERROR ),
         error_entry( NPP_MIRROR_FLIP_ERR ),
         error_entry( NPP_INVALID_INPUT ),
+        error_entry( NPP_POINTER_ERROR ),
+        error_entry( NPP_WARNING ),
+        error_entry( NPP_ODD_ROI_WARNING ),
+#else
+        error_entry( NPP_INVALID_HOST_POINTER_ERROR ),
+        error_entry( NPP_INVALID_DEVICE_POINTER_ERROR ),
+        error_entry( NPP_LUT_PALETTE_BITSIZE_ERROR ),
+        error_entry( NPP_ZC_MODE_NOT_SUPPORTED_ERROR ),
+        error_entry( NPP_MEMFREE_ERROR ),
+        error_entry( NPP_MEMSET_ERROR ),
+        error_entry( NPP_QUALITY_INDEX_ERROR ),
+        error_entry( NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR ),
+        error_entry( NPP_CHANNEL_ORDER_ERROR ),
+        error_entry( NPP_ZERO_MASK_VALUE_ERROR ),
+        error_entry( NPP_QUADRANGLE_ERROR ),
+        error_entry( NPP_RECTANGLE_ERROR ),
+        error_entry( NPP_COEFFICIENT_ERROR ),
+        error_entry( NPP_NUMBER_OF_CHANNELS_ERROR ),
+        error_entry( NPP_COI_ERROR ),
+        error_entry( NPP_DIVISOR_ERROR ),
+        error_entry( NPP_CHANNEL_ERROR ),
+        error_entry( NPP_STRIDE_ERROR ),
+        error_entry( NPP_ANCHOR_ERROR ),
+        error_entry( NPP_MASK_SIZE_ERROR ),
+        error_entry( NPP_MIRROR_FLIP_ERROR ),
+        error_entry( NPP_MOMENT_00_ZERO_ERROR ),
+        error_entry( NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR ),
+        error_entry( NPP_THRESHOLD_ERROR ),
+        error_entry( NPP_CONTEXT_MATCH_ERROR ),
+        error_entry( NPP_FFT_FLAG_ERROR ),
+        error_entry( NPP_FFT_ORDER_ERROR ),
+        error_entry( NPP_SCALE_RANGE_ERROR ),
+        error_entry( NPP_DATA_TYPE_ERROR ),
+        error_entry( NPP_OUT_OFF_RANGE_ERROR ),
+        error_entry( NPP_DIVIDE_BY_ZERO_ERROR ),
+        error_entry( NPP_MEMORY_ALLOCATION_ERR ),
+        error_entry( NPP_RANGE_ERROR ),
+        error_entry( NPP_BAD_ARGUMENT_ERROR ),
+        error_entry( NPP_NO_MEMORY_ERROR ),
+        error_entry( NPP_ERROR_RESERVED ),
+        error_entry( NPP_NO_OPERATION_WARNING ),
+        error_entry( NPP_DIVIDE_BY_ZERO_WARNING ),
+        error_entry( NPP_WRONG_INTERSECTION_ROI_WARNING ),
+#endif
+
+        error_entry( NPP_NOT_SUPPORTED_MODE_ERROR ),
+        error_entry( NPP_ROUND_MODE_NOT_SUPPORTED_ERROR ),
+        error_entry( NPP_RESIZE_NO_OPERATION_ERROR ),
+        error_entry( NPP_LUT_NUMBER_OF_LEVELS_ERROR ),
+        error_entry( NPP_TEXTURE_BIND_ERROR ),
+        error_entry( NPP_WRONG_INTERSECTION_ROI_ERROR ),
+        error_entry( NPP_NOT_EVEN_STEP_ERROR ),
+        error_entry( NPP_INTERPOLATION_ERROR ),
+        error_entry( NPP_RESIZE_FACTOR_ERROR ),
+        error_entry( NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR ),
+        error_entry( NPP_MEMCPY_ERROR ),
         error_entry( NPP_ALIGNMENT_ERROR ),
         error_entry( NPP_STEP_ERROR ),
         error_entry( NPP_SIZE_ERROR ),
-        error_entry( NPP_POINTER_ERROR ),
         error_entry( NPP_NULL_POINTER_ERROR ),
         error_entry( NPP_CUDA_KERNEL_EXECUTION_ERROR ),
         error_entry( NPP_NOT_IMPLEMENTED_ERROR ),
         error_entry( NPP_ERROR ),
         error_entry( NPP_NO_ERROR ),
         error_entry( NPP_SUCCESS ),
-        error_entry( NPP_WARNING ),
         error_entry( NPP_WRONG_INTERSECTION_QUAD_WARNING ),
         error_entry( NPP_MISALIGNED_DST_ROI_WARNING ),
         error_entry( NPP_AFFINE_QUAD_INCORRECT_WARNING ),
-        error_entry( NPP_DOUBLE_SIZE_WARNING ),
-        error_entry( NPP_ODD_ROI_WARNING )
+        error_entry( NPP_DOUBLE_SIZE_WARNING )
     };
 
     const size_t npp_error_num = sizeof(npp_errors) / sizeof(npp_errors[0]);
index 761abb5..056e5ef 100644 (file)
@@ -187,10 +187,20 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType)
     CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
     CV_Assert(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2);
 
-    typedef NppStatus (*npp_norm_diff_func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2,
-        NppiSize oSizeROI, Npp64f* pRetVal);
+#if CUDA_VERSION < 5050
+    typedef NppStatus (*func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2, NppiSize oSizeROI, Npp64f* pRetVal);
 
-    static const npp_norm_diff_func_t npp_norm_diff_func[] = {nppiNormDiff_Inf_8u_C1R, nppiNormDiff_L1_8u_C1R, nppiNormDiff_L2_8u_C1R};
+    static const func_t funcs[] = {nppiNormDiff_Inf_8u_C1R, nppiNormDiff_L1_8u_C1R, nppiNormDiff_L2_8u_C1R};
+#else
+    typedef NppStatus (*func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2,
+        NppiSize oSizeROI, Npp64f* pRetVal, Npp8u * pDeviceBuffer);
+
+    typedef NppStatus (*buf_size_func_t)(NppiSize oSizeROI, int* hpBufferSize);
+
+    static const func_t funcs[] = {nppiNormDiff_Inf_8u_C1R, nppiNormDiff_L1_8u_C1R, nppiNormDiff_L2_8u_C1R};
+
+    static const buf_size_func_t buf_size_funcs[] = {nppiNormDiffInfGetBufferHostSize_8u_C1R, nppiNormDiffL1GetBufferHostSize_8u_C1R, nppiNormDiffL2GetBufferHostSize_8u_C1R};
+#endif
 
     NppiSize sz;
     sz.width  = src1.cols;
@@ -202,7 +212,16 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType)
 
     DeviceBuffer dbuf;
 
-    nppSafeCall( npp_norm_diff_func[funcIdx](src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step), sz, dbuf) );
+#if CUDA_VERSION < 5050
+    nppSafeCall( funcs[funcIdx](src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step), sz, dbuf) );
+#else
+    int bufSize;
+    buf_size_funcs[funcIdx](sz, &bufSize);
+
+    GpuMat buf(1, bufSize, CV_8UC1);
+
+    nppSafeCall( funcs[funcIdx](src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step), sz, dbuf, buf.data) );
+#endif
 
     cudaSafeCall( cudaDeviceSynchronize() );
 
index f219089..06d5386 100644 (file)
     #define CUDART_MINIMUM_REQUIRED_VERSION 4010
     #define NPP_MINIMUM_REQUIRED_VERSION 4100
 
+    #define NPP_VERSION (NPP_VERSION_MAJOR * 1000 + NPP_VERSION_MINOR * 100 + NPP_VERSION_BUILD)
+
     #if (CUDART_VERSION < CUDART_MINIMUM_REQUIRED_VERSION)
         #error "Insufficient Cuda Runtime library version, please update it."
     #endif
 
-    #if (NPP_VERSION_MAJOR * 1000 + NPP_VERSION_MINOR * 100 + NPP_VERSION_BUILD < NPP_MINIMUM_REQUIRED_VERSION)
+    #if (NPP_VERSION < NPP_MINIMUM_REQUIRED_VERSION)
         #error "Insufficient NPP version, please update it."
     #endif
 
index 1bc952c..b622ad8 100644 (file)
@@ -352,7 +352,7 @@ GPU_TEST_P(Add_Scalar, WithOutMask)
         cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0));
         cv::add(mat, val, dst_gold, cv::noArray(), depth.second);
 
-        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
+        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 1.0);
     }
 }
 
@@ -383,7 +383,7 @@ GPU_TEST_P(Add_Scalar, WithMask)
         cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0));
         cv::add(mat, val, dst_gold, mask, depth.second);
 
-        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
+        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 1.0);
     }
 }
 
@@ -567,7 +567,7 @@ GPU_TEST_P(Subtract_Scalar, WithOutMask)
         cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0));
         cv::subtract(mat, val, dst_gold, cv::noArray(), depth.second);
 
-        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
+        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 1.0);
     }
 }
 
@@ -598,7 +598,7 @@ GPU_TEST_P(Subtract_Scalar, WithMask)
         cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0));
         cv::subtract(mat, val, dst_gold, mask, depth.second);
 
-        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
+        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 1.0);
     }
 }
 
@@ -2148,7 +2148,7 @@ GPU_TEST_P(Min, Scalar)
 
         cv::Mat dst_gold = cv::min(src, val);
 
-        EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+        EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 1.0 : 1e-5);
     }
 }
 
@@ -2231,7 +2231,7 @@ GPU_TEST_P(Max, Scalar)
 
         cv::Mat dst_gold = cv::max(src, val);
 
-        EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+        EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 1.0 : 1e-5);
     }
 }
 
index 9e30b92..53b93a0 100644 (file)
@@ -102,8 +102,8 @@ GPU_TEST_P(BroxOpticalFlow, Regression)
     for (int i = 0; i < v_gold.rows; ++i)
         f.read(v_gold.ptr<char>(i), v_gold.cols * sizeof(float));
 
-    EXPECT_MAT_NEAR(u_gold, u, 0);
-    EXPECT_MAT_NEAR(v_gold, v, 0);
+    EXPECT_MAT_SIMILAR(u_gold, u, 1e-3);
+    EXPECT_MAT_SIMILAR(v_gold, v, 1e-3);
 #else
     std::ofstream f(fname.c_str(), std::ios_base::binary);
 
index 4c60867..05ab99a 100644 (file)
@@ -95,14 +95,10 @@ elseif(HAVE_QT)
   endif()
   include(${QT_USE_FILE})
 
-  if(QT_INCLUDE_DIR)
-    ocv_include_directories(${QT_INCLUDE_DIR})
-  endif()
-
   QT4_ADD_RESOURCES(_RCC_OUTFILES src/window_QT.qrc)
   QT4_WRAP_CPP(_MOC_OUTFILES src/window_QT.h)
 
-  list(APPEND HIGHGUI_LIBRARIES ${QT_LIBRARIES} ${QT_QTTEST_LIBRARY})
+  list(APPEND HIGHGUI_LIBRARIES ${QT_LIBRARIES})
   list(APPEND highgui_srcs src/window_QT.cpp ${_MOC_OUTFILES} ${_RCC_OUTFILES})
   ocv_check_flag_support(CXX -Wno-missing-declarations _have_flag)
   if(${_have_flag})
@@ -183,7 +179,11 @@ if(HAVE_XIMEA)
   if(XIMEA_LIBRARY_DIR)
     link_directories(${XIMEA_LIBRARY_DIR})
   endif()
-  list(APPEND HIGHGUI_LIBRARIES m3api)
+  if(CMAKE_CL_64)
+    list(APPEND HIGHGUI_LIBRARIES m3apiX64)
+  else()
+    list(APPEND HIGHGUI_LIBRARIES m3api)
+  endif()
 endif(HAVE_XIMEA)
 
 if(HAVE_FFMPEG)
index 21fb947..b7cfbd9 100644 (file)
@@ -3195,8 +3195,10 @@ IplImage* CvCaptureCAM_DShow::retrieveFrame(int)
         frame = cvCreateImage( cvSize(w,h), 8, 3 );
     }
 
-    VI.getPixels( index, (uchar*)frame->imageData, false, true );
-    return frame;
+    if (VI.getPixels( index, (uchar*)frame->imageData, false, true ))
+        return frame;
+    else
+        return NULL;
 }
 
 double CvCaptureCAM_DShow::getProperty( int property_id )
index dbb8f58..5acf2c0 100644 (file)
@@ -20,25 +20,24 @@ public:
     virtual IplImage* retrieveFrame(int);
     virtual int getCaptureDomain() { return CV_CAP_XIAPI; } // Return the type of the capture object: CV_CAP_VFW, etc...
 
-protected:
+private:
     void init();
     void errMsg(const char* msg, int errNum);
+    void resetCvImage();
+    int  getBpp();
     IplImage* frame;
 
     HANDLE    hmv;
     DWORD     numDevices;
-    XI_IMG    image;
-    int       width;
-    int       height;
-    int       format;
     int       timeout;
+    XI_IMG    image;
 };
 
 /**********************************************************************************/
 
 CvCapture* cvCreateCameraCapture_XIMEA( int index )
 {
-     CvCaptureCAM_XIMEA* capture = new CvCaptureCAM_XIMEA;
+    CvCaptureCAM_XIMEA* capture = new CvCaptureCAM_XIMEA;
 
     if( capture->open( index ))
         return capture;
@@ -79,18 +78,19 @@ bool CvCaptureCAM_XIMEA::open( int wIndex )
     // always use auto white ballance
     mvret = xiSetParamInt( hmv, XI_PRM_AUTO_WB, 1);
     if(mvret != XI_OK) goto error;
+    
+    // default image format RGB24
+    mvret = xiSetParamInt( hmv, XI_PRM_IMAGE_DATA_FORMAT, XI_RGB24);
+    if(mvret != XI_OK) goto error;
 
+    int width = 0;
     mvret = xiGetParamInt( hmv, XI_PRM_WIDTH, &width);
     if(mvret != XI_OK) goto error;
 
+    int height = 0;
     mvret = xiGetParamInt( hmv, XI_PRM_HEIGHT, &height);
     if(mvret != XI_OK) goto error;
 
-    // default image format RGB24
-    format = XI_RGB24;
-    mvret = xiSetParamInt( hmv, XI_PRM_IMAGE_DATA_FORMAT, format);
-    if(mvret != XI_OK) goto error;
-
     // allocate frame buffer for RGB24 image
     frame = cvCreateImage(cvSize( width, height), IPL_DEPTH_8U, 3);
 
@@ -103,10 +103,10 @@ bool CvCaptureCAM_XIMEA::open( int wIndex )
         errMsg("StartAcquisition XI_DEVICE failed", mvret);
         goto error;
     }
-
     return true;
 
 error:
+    errMsg("Open XI_DEVICE failed", mvret);
     xiCloseDevice(hmv);
     hmv = NULL;
     return false;
@@ -116,18 +116,19 @@ error:
 
 void CvCaptureCAM_XIMEA::close()
 {
-    if(hmv)
-    {
-        xiStopAcquisition(hmv);
-        xiCloseDevice(hmv);
-        hmv = NULL;
-    }
+    if(frame)
+        cvReleaseImage(&frame);
+
+    xiStopAcquisition(hmv);
+    xiCloseDevice(hmv);
+    hmv = NULL;
 }
 
 /**********************************************************************************/
 
 bool CvCaptureCAM_XIMEA::grabFrame()
 {
+    memset(&image, 0, sizeof(XI_IMG));
     image.size = sizeof(XI_IMG);
     int mvret = xiGetImage( hmv, timeout, &image);
 
@@ -151,31 +152,18 @@ bool CvCaptureCAM_XIMEA::grabFrame()
 IplImage* CvCaptureCAM_XIMEA::retrieveFrame(int)
 {
     // update cvImage after format has changed
-    if( (int)image.width != width || (int)image.height != height || image.frm != (XI_IMG_FORMAT)format)
-    {
-        cvReleaseImage(&frame);
-        switch( image.frm)
-        {
-        case XI_MONO8  : frame = cvCreateImage(cvSize( image.width, image.height), IPL_DEPTH_8U, 1); break;
-        case XI_MONO16 : frame = cvCreateImage(cvSize( image.width, image.height), IPL_DEPTH_16U, 1); break;
-        case XI_RGB24  : frame = cvCreateImage(cvSize( image.width, image.height), IPL_DEPTH_8U, 3); break;
-        case XI_RGB32  : frame = cvCreateImage(cvSize( image.width, image.height), IPL_DEPTH_8U, 4); break;
-        default :
-            return frame;
-        }
-        // update global image format
-        format = image.frm;
-        width = image.width;
-        height = image.height;
-    }
-
+    resetCvImage();
+    
     // copy pixel data
     switch( image.frm)
     {
-    case XI_MONO8  : memcpy( frame->imageData, image.bp, image.width*image.height); break;
-    case XI_MONO16 : memcpy( frame->imageData, image.bp, image.width*image.height*sizeof(WORD)); break;
-    case XI_RGB24  : memcpy( frame->imageData, image.bp, image.width*image.height*3); break;
-    case XI_RGB32  : memcpy( frame->imageData, image.bp, image.width*image.height*sizeof(DWORD)); break;
+    case XI_MONO8       : 
+    case XI_RAW8        : memcpy( frame->imageData, image.bp, image.width*image.height); break;
+    case XI_MONO16      :
+    case XI_RAW16       : memcpy( frame->imageData, image.bp, image.width*image.height*sizeof(WORD)); break;
+    case XI_RGB24       :
+    case XI_RGB_PLANAR  : memcpy( frame->imageData, image.bp, image.width*image.height*3); break;
+    case XI_RGB32       : memcpy( frame->imageData, image.bp, image.width*image.height*4); break;
     default: break;
     }
     return frame;
@@ -183,6 +171,35 @@ IplImage* CvCaptureCAM_XIMEA::retrieveFrame(int)
 
 /**********************************************************************************/
 
+void CvCaptureCAM_XIMEA::resetCvImage()
+{
+    int width = 0, height = 0, format = 0;
+    xiGetParamInt( hmv, XI_PRM_WIDTH, &width);
+    xiGetParamInt( hmv, XI_PRM_HEIGHT, &height);
+    xiGetParamInt( hmv, XI_PRM_IMAGE_DATA_FORMAT, &format);
+
+    if( (int)image.width != width || (int)image.height != height || image.frm != (XI_IMG_FORMAT)format)
+    {
+        if(frame) cvReleaseImage(&frame);
+        frame = NULL;
+
+        switch( image.frm)
+        {
+        case XI_MONO8       :
+        case XI_RAW8        : frame = cvCreateImage(cvSize( image.width, image.height), IPL_DEPTH_8U, 1); break;
+        case XI_MONO16      : 
+        case XI_RAW16       : frame = cvCreateImage(cvSize( image.width, image.height), IPL_DEPTH_16U, 1); break;
+        case XI_RGB24       : 
+        case XI_RGB_PLANAR  : frame = cvCreateImage(cvSize( image.width, image.height), IPL_DEPTH_8U, 3); break;
+        case XI_RGB32       : frame = cvCreateImage(cvSize( image.width, image.height), IPL_DEPTH_8U, 4); break;
+        default :
+            return;
+        }
+    }  
+    cvZero(frame);
+}
+/**********************************************************************************/
+
 double CvCaptureCAM_XIMEA::getProperty( int property_id )
 {
     if(hmv == NULL)
@@ -238,20 +255,14 @@ bool CvCaptureCAM_XIMEA::setProperty( int property_id, double value )
     switch(property_id)
     {
     // OCV parameters
-    case CV_CAP_PROP_FRAME_WIDTH  : mvret = xiSetParamInt( hmv, XI_PRM_WIDTH, ival);
-        if(mvret == XI_OK) width = ival;
-        break;
-    case CV_CAP_PROP_FRAME_HEIGHT : mvret = xiSetParamInt( hmv, XI_PRM_HEIGHT, ival);
-        if(mvret == XI_OK) height = ival;
-        break;
+    case CV_CAP_PROP_FRAME_WIDTH  : mvret = xiSetParamInt( hmv, XI_PRM_WIDTH, ival); break;
+    case CV_CAP_PROP_FRAME_HEIGHT : mvret = xiSetParamInt( hmv, XI_PRM_HEIGHT, ival); break;
     case CV_CAP_PROP_FPS          : mvret = xiSetParamFloat( hmv, XI_PRM_FRAMERATE, fval); break;
     case CV_CAP_PROP_GAIN         : mvret = xiSetParamFloat( hmv, XI_PRM_GAIN, fval); break;
     case CV_CAP_PROP_EXPOSURE     : mvret = xiSetParamInt( hmv, XI_PRM_EXPOSURE, ival); break;
     // XIMEA camera properties
     case CV_CAP_PROP_XI_DOWNSAMPLING  : mvret = xiSetParamInt( hmv, XI_PRM_DOWNSAMPLING, ival); break;
-    case CV_CAP_PROP_XI_DATA_FORMAT   : mvret = xiSetParamInt( hmv, XI_PRM_IMAGE_DATA_FORMAT, ival);
-        if(mvret == XI_OK) format = ival;
-        break;
+    case CV_CAP_PROP_XI_DATA_FORMAT   : mvret = xiSetParamInt( hmv, XI_PRM_IMAGE_DATA_FORMAT, ival); break;
     case CV_CAP_PROP_XI_OFFSET_X      : mvret = xiSetParamInt( hmv, XI_PRM_OFFSET_X, ival); break;
     case CV_CAP_PROP_XI_OFFSET_Y      : mvret = xiSetParamInt( hmv, XI_PRM_OFFSET_Y, ival); break;
     case CV_CAP_PROP_XI_TRG_SOURCE    : mvret = xiSetParamInt( hmv, XI_PRM_TRG_SOURCE, ival); break;
@@ -288,7 +299,7 @@ bool CvCaptureCAM_XIMEA::setProperty( int property_id, double value )
 void CvCaptureCAM_XIMEA::errMsg(const char* msg, int errNum)
 {
 #if defined WIN32 || defined _WIN32
-    char buf[512];
+    char buf[512]="";
     sprintf( buf, "%s : %d\n", msg, errNum);
     OutputDebugString(buf);
 #else
@@ -296,4 +307,22 @@ void CvCaptureCAM_XIMEA::errMsg(const char* msg, int errNum)
 #endif
 }
 
+/**********************************************************************************/
+
+int  CvCaptureCAM_XIMEA::getBpp()
+{
+    switch( image.frm)
+    {
+    case XI_MONO8       :
+    case XI_RAW8        : return 1;
+    case XI_MONO16      : 
+    case XI_RAW16       : return 2;
+    case XI_RGB24       : 
+    case XI_RGB_PLANAR  : return 3;
+    case XI_RGB32       : return 4;
+    default :
+        return 0;
+    }
+}
+
 /**********************************************************************************/
\ No newline at end of file
index 6d29534..1e47bf6 100644 (file)
@@ -256,12 +256,17 @@ namespace
 
 void cv::imshow( const string& winname, InputArray _img )
 {
+    const Size size = _img.size();
 #ifndef HAVE_OPENGL
-    Mat img = _img.getMat();
-    CvMat c_img = img;
-    cvShowImage(winname.c_str(), &c_img);
+    CV_Assert(size.width>0 && size.height>0);
+    {
+        Mat img = _img.getMat();
+        CvMat c_img = img;
+        cvShowImage(winname.c_str(), &c_img);
+    }
 #else
     const double useGl = getWindowProperty(winname, WND_PROP_OPENGL);
+    CV_Assert(size.width>0 && size.height>0);
 
     if (useGl <= 0)
     {
@@ -275,7 +280,6 @@ void cv::imshow( const string& winname, InputArray _img )
 
         if (autoSize > 0)
         {
-            Size size = _img.size();
             resizeWindow(winname, size.width, size.height);
         }
 
index 4ebf6d5..9fd8df5 100644 (file)
@@ -116,6 +116,7 @@ If you use ``cvtColor`` with 8-bit images, the conversion will have some informa
 The function can do the following transformations:
 
 *
+    RGB :math:`\leftrightarrow` GRAY ( ``CV_BGR2GRAY, CV_RGB2GRAY, CV_GRAY2BGR, CV_GRAY2RGB``     )
     Transformations within RGB space like adding/removing the alpha channel, reversing the channel order, conversion to/from 16-bit RGB color (R5:G6:B5 or R5:G5:B5), as well as conversion to/from grayscale using:
 
     .. math::
@@ -765,7 +766,7 @@ Runs the GrabCut algorithm.
 
         * **GC_PR_BGD** defines a possible background pixel.
 
-        * **GC_PR_BGD** defines a possible foreground pixel.
+        * **GC_PR_FGD** defines a possible foreground pixel.
 
     :param rect: ROI containing a segmented object. The pixels outside of the ROI are marked as "obvious background". The parameter is only used when  ``mode==GC_INIT_WITH_RECT`` .
 
diff --git a/modules/imgproc/src/clahe.cpp b/modules/imgproc/src/clahe.cpp
new file mode 100644 (file)
index 0000000..4ce4797
--- /dev/null
@@ -0,0 +1,334 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, NVIDIA Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the copyright holders or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+// ----------------------------------------------------------------------
+// CLAHE
+
+namespace
+{
+    class CLAHE_CalcLut_Body : public cv::ParallelLoopBody
+    {
+    public:
+        CLAHE_CalcLut_Body(const cv::Mat& src, cv::Mat& lut, cv::Size tileSize, int tilesX, int tilesY, int clipLimit, float lutScale) :
+            src_(src), lut_(lut), tileSize_(tileSize), tilesX_(tilesX), tilesY_(tilesY), clipLimit_(clipLimit), lutScale_(lutScale)
+        {
+        }
+
+        void operator ()(const cv::Range& range) const;
+
+    private:
+        cv::Mat src_;
+        mutable cv::Mat lut_;
+
+        cv::Size tileSize_;
+        int tilesX_;
+        int tilesY_;
+        int clipLimit_;
+        float lutScale_;
+    };
+
+    void CLAHE_CalcLut_Body::operator ()(const cv::Range& range) const
+    {
+        const int histSize = 256;
+
+        uchar* tileLut = lut_.ptr(range.start);
+        const size_t lut_step = lut_.step;
+
+        for (int k = range.start; k < range.end; ++k, tileLut += lut_step)
+        {
+            const int ty = k / tilesX_;
+            const int tx = k % tilesX_;
+
+            // retrieve tile submatrix
+
+            cv::Rect tileROI;
+            tileROI.x = tx * tileSize_.width;
+            tileROI.y = ty * tileSize_.height;
+            tileROI.width = tileSize_.width;
+            tileROI.height = tileSize_.height;
+
+            const cv::Mat tile = src_(tileROI);
+
+            // calc histogram
+
+            int tileHist[histSize] = {0, };
+
+            int height = tileROI.height;
+            const size_t sstep = tile.step;
+            for (const uchar* ptr = tile.ptr<uchar>(0); height--; ptr += sstep)
+            {
+                int x = 0;
+                for (; x <= tileROI.width - 4; x += 4)
+                {
+                    int t0 = ptr[x], t1 = ptr[x+1];
+                    tileHist[t0]++; tileHist[t1]++;
+                    t0 = ptr[x+2]; t1 = ptr[x+3];
+                    tileHist[t0]++; tileHist[t1]++;
+                }
+
+                for (; x < tileROI.width; ++x)
+                    tileHist[ptr[x]]++;
+            }
+
+            // clip histogram
+
+            if (clipLimit_ > 0)
+            {
+                // how many pixels were clipped
+                int clipped = 0;
+                for (int i = 0; i < histSize; ++i)
+                {
+                    if (tileHist[i] > clipLimit_)
+                    {
+                        clipped += tileHist[i] - clipLimit_;
+                        tileHist[i] = clipLimit_;
+                    }
+                }
+
+                // redistribute clipped pixels
+                int redistBatch = clipped / histSize;
+                int residual = clipped - redistBatch * histSize;
+
+                for (int i = 0; i < histSize; ++i)
+                    tileHist[i] += redistBatch;
+
+                for (int i = 0; i < residual; ++i)
+                    tileHist[i]++;
+            }
+
+            // calc Lut
+
+            int sum = 0;
+            for (int i = 0; i < histSize; ++i)
+            {
+                sum += tileHist[i];
+                tileLut[i] = cv::saturate_cast<uchar>(sum * lutScale_);
+            }
+        }
+    }
+
+    class CLAHE_Interpolation_Body : public cv::ParallelLoopBody
+    {
+    public:
+        CLAHE_Interpolation_Body(const cv::Mat& src, cv::Mat& dst, const cv::Mat& lut, cv::Size tileSize, int tilesX, int tilesY) :
+            src_(src), dst_(dst), lut_(lut), tileSize_(tileSize), tilesX_(tilesX), tilesY_(tilesY)
+        {
+        }
+
+        void operator ()(const cv::Range& range) const;
+
+    private:
+        cv::Mat src_;
+        mutable cv::Mat dst_;
+        cv::Mat lut_;
+
+        cv::Size tileSize_;
+        int tilesX_;
+        int tilesY_;
+    };
+
+    void CLAHE_Interpolation_Body::operator ()(const cv::Range& range) const
+    {
+        const size_t lut_step = lut_.step;
+
+        for (int y = range.start; y < range.end; ++y)
+        {
+            const uchar* srcRow = src_.ptr<uchar>(y);
+            uchar* dstRow = dst_.ptr<uchar>(y);
+
+            const float tyf = (static_cast<float>(y) / tileSize_.height) - 0.5f;
+
+            int ty1 = cvFloor(tyf);
+            int ty2 = ty1 + 1;
+
+            const float ya = tyf - ty1;
+
+            ty1 = std::max(ty1, 0);
+            ty2 = std::min(ty2, tilesY_ - 1);
+
+            const uchar* lutPlane1 = lut_.ptr(ty1 * tilesX_);
+            const uchar* lutPlane2 = lut_.ptr(ty2 * tilesX_);
+
+            for (int x = 0; x < src_.cols; ++x)
+            {
+                const float txf = (static_cast<float>(x) / tileSize_.width) - 0.5f;
+
+                int tx1 = cvFloor(txf);
+                int tx2 = tx1 + 1;
+
+                const float xa = txf - tx1;
+
+                tx1 = std::max(tx1, 0);
+                tx2 = std::min(tx2, tilesX_ - 1);
+
+                const int srcVal = srcRow[x];
+
+                const size_t ind1 = tx1 * lut_step + srcVal;
+                const size_t ind2 = tx2 * lut_step + srcVal;
+
+                float res = 0;
+
+                res += lutPlane1[ind1] * ((1.0f - xa) * (1.0f - ya));
+                res += lutPlane1[ind2] * ((xa) * (1.0f - ya));
+                res += lutPlane2[ind1] * ((1.0f - xa) * (ya));
+                res += lutPlane2[ind2] * ((xa) * (ya));
+
+                dstRow[x] = cv::saturate_cast<uchar>(res);
+            }
+        }
+    }
+
+    class CLAHE_Impl : public cv::CLAHE
+    {
+    public:
+        CLAHE_Impl(double clipLimit = 40.0, int tilesX = 8, int tilesY = 8);
+
+        cv::AlgorithmInfo* info() const;
+
+        void apply(cv::InputArray src, cv::OutputArray dst);
+
+        void setClipLimit(double clipLimit);
+        double getClipLimit() const;
+
+        void setTilesGridSize(cv::Size tileGridSize);
+        cv::Size getTilesGridSize() const;
+
+        void collectGarbage();
+
+    private:
+        double clipLimit_;
+        int tilesX_;
+        int tilesY_;
+
+        cv::Mat srcExt_;
+        cv::Mat lut_;
+    };
+
+    CLAHE_Impl::CLAHE_Impl(double clipLimit, int tilesX, int tilesY) :
+        clipLimit_(clipLimit), tilesX_(tilesX), tilesY_(tilesY)
+    {
+    }
+
+    CV_INIT_ALGORITHM(CLAHE_Impl, "CLAHE",
+        obj.info()->addParam(obj, "clipLimit", obj.clipLimit_);
+        obj.info()->addParam(obj, "tilesX", obj.tilesX_);
+        obj.info()->addParam(obj, "tilesY", obj.tilesY_))
+
+    void CLAHE_Impl::apply(cv::InputArray _src, cv::OutputArray _dst)
+    {
+        cv::Mat src = _src.getMat();
+
+        CV_Assert( src.type() == CV_8UC1 );
+
+        _dst.create( src.size(), src.type() );
+        cv::Mat dst = _dst.getMat();
+
+        const int histSize = 256;
+
+        lut_.create(tilesX_ * tilesY_, histSize, CV_8UC1);
+
+        cv::Size tileSize;
+        cv::Mat srcForLut;
+
+        if (src.cols % tilesX_ == 0 && src.rows % tilesY_ == 0)
+        {
+            tileSize = cv::Size(src.cols / tilesX_, src.rows / tilesY_);
+            srcForLut = src;
+        }
+        else
+        {
+            cv::copyMakeBorder(src, srcExt_, 0, tilesY_ - (src.rows % tilesY_), 0, tilesX_ - (src.cols % tilesX_), cv::BORDER_REFLECT_101);
+
+            tileSize = cv::Size(srcExt_.cols / tilesX_, srcExt_.rows / tilesY_);
+            srcForLut = srcExt_;
+        }
+
+        const int tileSizeTotal = tileSize.area();
+        const float lutScale = static_cast<float>(histSize - 1) / tileSizeTotal;
+
+        int clipLimit = 0;
+        if (clipLimit_ > 0.0)
+        {
+            clipLimit = static_cast<int>(clipLimit_ * tileSizeTotal / histSize);
+            clipLimit = std::max(clipLimit, 1);
+        }
+
+        CLAHE_CalcLut_Body calcLutBody(srcForLut, lut_, tileSize, tilesX_, tilesY_, clipLimit, lutScale);
+        cv::parallel_for_(cv::Range(0, tilesX_ * tilesY_), calcLutBody);
+
+        CLAHE_Interpolation_Body interpolationBody(src, dst, lut_, tileSize, tilesX_, tilesY_);
+        cv::parallel_for_(cv::Range(0, src.rows), interpolationBody);
+    }
+
+    void CLAHE_Impl::setClipLimit(double clipLimit)
+    {
+        clipLimit_ = clipLimit;
+    }
+
+    double CLAHE_Impl::getClipLimit() const
+    {
+        return clipLimit_;
+    }
+
+    void CLAHE_Impl::setTilesGridSize(cv::Size tileGridSize)
+    {
+        tilesX_ = tileGridSize.width;
+        tilesY_ = tileGridSize.height;
+    }
+
+    cv::Size CLAHE_Impl::getTilesGridSize() const
+    {
+        return cv::Size(tilesX_, tilesY_);
+    }
+
+    void CLAHE_Impl::collectGarbage()
+    {
+        srcExt_.release();
+        lut_.release();
+    }
+}
+
+cv::Ptr<cv::CLAHE> cv::createCLAHE(double clipLimit, cv::Size tileGridSize)
+{
+    return new CLAHE_Impl(clipLimit, tileGridSize.width, tileGridSize.height);
+}
index 3799d43..41ca2db 100644 (file)
@@ -2755,7 +2755,7 @@ const int ITUR_BT_601_CGV = -385875;
 const int ITUR_BT_601_CBV = -74448;
 
 template<int bIdx, int uIdx>
-struct YUV420sp2RGB888Invoker
+struct YUV420sp2RGB888Invoker : ParallelLoopBody
 {
     Mat* dst;
     const uchar* my1, *muv;
@@ -2764,10 +2764,10 @@ struct YUV420sp2RGB888Invoker
     YUV420sp2RGB888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _uv)
         : dst(_dst), my1(_y1), muv(_uv), width(_dst->cols), stride(_stride) {}
 
-    void operator()(const BlockedRange& range) const
+    void operator()(const Range& range) const
     {
-        int rangeBegin = range.begin() * 2;
-        int rangeEnd = range.end() * 2;
+        int rangeBegin = range.start * 2;
+        int rangeEnd = range.end * 2;
 
         //R = 1.164(Y - 16) + 1.596(V - 128)
         //G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128)
@@ -2824,7 +2824,7 @@ struct YUV420sp2RGB888Invoker
 };
 
 template<int bIdx, int uIdx>
-struct YUV420sp2RGBA8888Invoker
+struct YUV420sp2RGBA8888Invoker : ParallelLoopBody
 {
     Mat* dst;
     const uchar* my1, *muv;
@@ -2833,10 +2833,10 @@ struct YUV420sp2RGBA8888Invoker
     YUV420sp2RGBA8888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _uv)
         : dst(_dst), my1(_y1), muv(_uv), width(_dst->cols), stride(_stride) {}
 
-    void operator()(const BlockedRange& range) const
+    void operator()(const Range& range) const
     {
-        int rangeBegin = range.begin() * 2;
-        int rangeEnd = range.end() * 2;
+        int rangeBegin = range.start * 2;
+        int rangeEnd = range.end * 2;
 
         //R = 1.164(Y - 16) + 1.596(V - 128)
         //G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128)
@@ -2897,7 +2897,7 @@ struct YUV420sp2RGBA8888Invoker
 };
 
 template<int bIdx>
-struct YUV420p2RGB888Invoker
+struct YUV420p2RGB888Invoker : ParallelLoopBody
 {
     Mat* dst;
     const uchar* my1, *mu, *mv;
@@ -2907,19 +2907,19 @@ struct YUV420p2RGB888Invoker
     YUV420p2RGB888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int _ustepIdx, int _vstepIdx)
         : dst(_dst), my1(_y1), mu(_u), mv(_v), width(_dst->cols), stride(_stride), ustepIdx(_ustepIdx), vstepIdx(_vstepIdx) {}
 
-    void operator()(const BlockedRange& range) const
+    void operator()(const Range& range) const
     {
-        const int rangeBegin = range.begin() * 2;
-        const int rangeEnd = range.end() * 2;
+        const int rangeBegin = range.start * 2;
+        const int rangeEnd = range.end * 2;
 
         size_t uvsteps[2] = {width/2, stride - width/2};
         int usIdx = ustepIdx, vsIdx = vstepIdx;
 
         const uchar* y1 = my1 + rangeBegin * stride;
-        const uchar* u1 = mu + (range.begin() / 2) * stride;
-        const uchar* v1 = mv + (range.begin() / 2) * stride;
+        const uchar* u1 = mu + (range.start / 2) * stride;
+        const uchar* v1 = mv + (range.start / 2) * stride;
 
-        if(range.begin() % 2 == 1)
+        if(range.start % 2 == 1)
         {
             u1 += uvsteps[(usIdx++) & 1];
             v1 += uvsteps[(vsIdx++) & 1];
@@ -2965,7 +2965,7 @@ struct YUV420p2RGB888Invoker
 };
 
 template<int bIdx>
-struct YUV420p2RGBA8888Invoker
+struct YUV420p2RGBA8888Invoker : ParallelLoopBody
 {
     Mat* dst;
     const uchar* my1, *mu, *mv;
@@ -2975,19 +2975,19 @@ struct YUV420p2RGBA8888Invoker
     YUV420p2RGBA8888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int _ustepIdx, int _vstepIdx)
         : dst(_dst), my1(_y1), mu(_u), mv(_v), width(_dst->cols), stride(_stride), ustepIdx(_ustepIdx), vstepIdx(_vstepIdx) {}
 
-    void operator()(const BlockedRange& range) const
+    void operator()(const Range& range) const
     {
-        int rangeBegin = range.begin() * 2;
-        int rangeEnd = range.end() * 2;
+        int rangeBegin = range.start * 2;
+        int rangeEnd = range.end * 2;
 
         size_t uvsteps[2] = {width/2, stride - width/2};
         int usIdx = ustepIdx, vsIdx = vstepIdx;
 
         const uchar* y1 = my1 + rangeBegin * stride;
-        const uchar* u1 = mu + (range.begin() / 2) * stride;
-        const uchar* v1 = mv + (range.begin() / 2) * stride;
+        const uchar* u1 = mu + (range.start / 2) * stride;
+        const uchar* v1 = mv + (range.start / 2) * stride;
 
-        if(range.begin() % 2 == 1)
+        if(range.start % 2 == 1)
         {
             u1 += uvsteps[(usIdx++) & 1];
             v1 += uvsteps[(vsIdx++) & 1];
@@ -3042,48 +3042,40 @@ template<int bIdx, int uIdx>
 inline void cvtYUV420sp2RGB(Mat& _dst, int _stride, const uchar* _y1, const uchar* _uv)
 {
     YUV420sp2RGB888Invoker<bIdx, uIdx> converter(&_dst, _stride, _y1,  _uv);
-#ifdef HAVE_TBB
     if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION)
-        parallel_for(BlockedRange(0, _dst.rows/2), converter);
+        parallel_for_(Range(0, _dst.rows/2), converter);
     else
-#endif
-        converter(BlockedRange(0, _dst.rows/2));
+        converter(Range(0, _dst.rows/2));
 }
 
 template<int bIdx, int uIdx>
 inline void cvtYUV420sp2RGBA(Mat& _dst, int _stride, const uchar* _y1, const uchar* _uv)
 {
     YUV420sp2RGBA8888Invoker<bIdx, uIdx> converter(&_dst, _stride, _y1,  _uv);
-#ifdef HAVE_TBB
     if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION)
-        parallel_for(BlockedRange(0, _dst.rows/2), converter);
+        parallel_for_(Range(0, _dst.rows/2), converter);
     else
-#endif
-        converter(BlockedRange(0, _dst.rows/2));
+        converter(Range(0, _dst.rows/2));
 }
 
 template<int bIdx>
 inline void cvtYUV420p2RGB(Mat& _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int ustepIdx, int vstepIdx)
 {
     YUV420p2RGB888Invoker<bIdx> converter(&_dst, _stride, _y1,  _u, _v, ustepIdx, vstepIdx);
-#ifdef HAVE_TBB
     if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION)
-        parallel_for(BlockedRange(0, _dst.rows/2), converter);
+        parallel_for_(Range(0, _dst.rows/2), converter);
     else
-#endif
-        converter(BlockedRange(0, _dst.rows/2));
+        converter(Range(0, _dst.rows/2));
 }
 
 template<int bIdx>
 inline void cvtYUV420p2RGBA(Mat& _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int ustepIdx, int vstepIdx)
 {
     YUV420p2RGBA8888Invoker<bIdx> converter(&_dst, _stride, _y1,  _u, _v, ustepIdx, vstepIdx);
-#ifdef HAVE_TBB
     if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION)
-        parallel_for(BlockedRange(0, _dst.rows/2), converter);
+        parallel_for_(Range(0, _dst.rows/2), converter);
     else
-#endif
-        converter(BlockedRange(0, _dst.rows/2));
+        converter(Range(0, _dst.rows/2));
 }
 
 ///////////////////////////////////// RGB -> YUV420p /////////////////////////////////////
@@ -3167,7 +3159,7 @@ static void cvtRGBtoYUV420p(const Mat& src, Mat& dst)
 ///////////////////////////////////// YUV422 -> RGB /////////////////////////////////////
 
 template<int bIdx, int uIdx, int yIdx>
-struct YUV422toRGB888Invoker
+struct YUV422toRGB888Invoker : ParallelLoopBody
 {
     Mat* dst;
     const uchar* src;
@@ -3176,10 +3168,10 @@ struct YUV422toRGB888Invoker
     YUV422toRGB888Invoker(Mat* _dst, int _stride, const uchar* _yuv)
         : dst(_dst), src(_yuv), width(_dst->cols), stride(_stride) {}
 
-    void operator()(const BlockedRange& range) const
+    void operator()(const Range& range) const
     {
-        int rangeBegin = range.begin();
-        int rangeEnd = range.end();
+        int rangeBegin = range.start;
+        int rangeEnd = range.end;
 
         const int uidx = 1 - yIdx + uIdx * 2;
         const int vidx = (2 + uidx) % 4;
@@ -3213,7 +3205,7 @@ struct YUV422toRGB888Invoker
 };
 
 template<int bIdx, int uIdx, int yIdx>
-struct YUV422toRGBA8888Invoker
+struct YUV422toRGBA8888Invoker : ParallelLoopBody
 {
     Mat* dst;
     const uchar* src;
@@ -3222,10 +3214,10 @@ struct YUV422toRGBA8888Invoker
     YUV422toRGBA8888Invoker(Mat* _dst, int _stride, const uchar* _yuv)
         : dst(_dst), src(_yuv), width(_dst->cols), stride(_stride) {}
 
-    void operator()(const BlockedRange& range) const
+    void operator()(const Range& range) const
     {
-        int rangeBegin = range.begin();
-        int rangeEnd = range.end();
+        int rangeBegin = range.start;
+        int rangeEnd = range.end;
 
         const int uidx = 1 - yIdx + uIdx * 2;
         const int vidx = (2 + uidx) % 4;
@@ -3266,24 +3258,20 @@ template<int bIdx, int uIdx, int yIdx>
 inline void cvtYUV422toRGB(Mat& _dst, int _stride, const uchar* _yuv)
 {
     YUV422toRGB888Invoker<bIdx, uIdx, yIdx> converter(&_dst, _stride, _yuv);
-#ifdef HAVE_TBB
     if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV422_CONVERSION)
-        parallel_for(BlockedRange(0, _dst.rows), converter);
+        parallel_for_(Range(0, _dst.rows), converter);
     else
-#endif
-        converter(BlockedRange(0, _dst.rows));
+        converter(Range(0, _dst.rows));
 }
 
 template<int bIdx, int uIdx, int yIdx>
 inline void cvtYUV422toRGBA(Mat& _dst, int _stride, const uchar* _yuv)
 {
     YUV422toRGBA8888Invoker<bIdx, uIdx, yIdx> converter(&_dst, _stride, _yuv);
-#ifdef HAVE_TBB
     if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV422_CONVERSION)
-        parallel_for(BlockedRange(0, _dst.rows), converter);
+        parallel_for_(Range(0, _dst.rows), converter);
     else
-#endif
-        converter(BlockedRange(0, _dst.rows));
+        converter(Range(0, _dst.rows));
 }
 
 /////////////////////////// RGBA <-> mRGBA (alpha premultiplied) //////////////
index 89d3a55..d3e6f90 100644 (file)
@@ -443,7 +443,7 @@ icvGetDistanceTransformMask( int maskType, float *metrics )
 namespace cv
 {
 
-struct DTColumnInvoker
+struct DTColumnInvoker : ParallelLoopBody
 {
     DTColumnInvoker( const CvMat* _src, CvMat* _dst, const int* _sat_tab, const float* _sqr_tab)
     {
@@ -453,9 +453,9 @@ struct DTColumnInvoker
         sqr_tab = _sqr_tab;
     }
 
-    void operator()( const BlockedRange& range ) const
+    void operator()( const Range& range ) const
     {
-        int i, i1 = range.begin(), i2 = range.end();
+        int i, i1 = range.start, i2 = range.end;
         int m = src->rows;
         size_t sstep = src->step, dstep = dst->step/sizeof(float);
         AutoBuffer<int> _d(m);
@@ -490,7 +490,7 @@ struct DTColumnInvoker
 };
 
 
-struct DTRowInvoker
+struct DTRowInvoker : ParallelLoopBody
 {
     DTRowInvoker( CvMat* _dst, const float* _sqr_tab, const float* _inv_tab )
     {
@@ -499,10 +499,10 @@ struct DTRowInvoker
         inv_tab = _inv_tab;
     }
 
-    void operator()( const BlockedRange& range ) const
+    void operator()( const Range& range ) const
     {
         const float inf = 1e15f;
-        int i, i1 = range.begin(), i2 = range.end();
+        int i, i1 = range.start, i2 = range.end;
         int n = dst->cols;
         AutoBuffer<uchar> _buf((n+2)*2*sizeof(float) + (n+2)*sizeof(int));
         float* f = (float*)(uchar*)_buf;
@@ -586,7 +586,7 @@ icvTrueDistTrans( const CvMat* src, CvMat* dst )
     for( ; i <= m*3; i++ )
         sat_tab[i] = i - shift;
 
-    cv::parallel_for(cv::BlockedRange(0, n), cv::DTColumnInvoker(src, dst, sat_tab, sqr_tab));
+    cv::parallel_for_(cv::Range(0, n), cv::DTColumnInvoker(src, dst, sat_tab, sqr_tab));
 
     // stage 2: compute modified distance transform for each row
     float* inv_tab = sqr_tab + n;
@@ -598,7 +598,7 @@ icvTrueDistTrans( const CvMat* src, CvMat* dst )
         sqr_tab[i] = (float)(i*i);
     }
 
-    cv::parallel_for(cv::BlockedRange(0, m), cv::DTRowInvoker(dst, sqr_tab, inv_tab));
+    cv::parallel_for_(cv::Range(0, m), cv::DTRowInvoker(dst, sqr_tab, inv_tab));
 }
 
 
index 22dd9be..bfcdee5 100644 (file)
@@ -2986,29 +2986,23 @@ cvCalcProbDensity( const CvHistogram* hist, const CvHistogram* hist_mask,
     }
 }
 
-class EqualizeHistCalcHist_Invoker
+class EqualizeHistCalcHist_Invoker : public cv::ParallelLoopBody
 {
 public:
     enum {HIST_SZ = 256};
 
-#ifdef HAVE_TBB
-    typedef tbb::mutex* MutextPtr;
-#else
-    typedef void* MutextPtr;
-#endif
-
-    EqualizeHistCalcHist_Invoker(cv::Mat& src, int* histogram, MutextPtr histogramLock)
+    EqualizeHistCalcHist_Invoker(cv::Mat& src, int* histogram, cv::Mutex* histogramLock)
         : src_(src), globalHistogram_(histogram), histogramLock_(histogramLock)
     { }
 
-    void operator()( const cv::BlockedRange& rowRange ) const
+    void operator()( const cv::Range& rowRange ) const
     {
         int localHistogram[HIST_SZ] = {0, };
 
         const size_t sstep = src_.step;
 
         int width = src_.cols;
-        int height = rowRange.end() - rowRange.begin();
+        int height = rowRange.end - rowRange.start;
 
         if (src_.isContinuous())
         {
@@ -3016,7 +3010,7 @@ public:
             height = 1;
         }
 
-        for (const uchar* ptr = src_.ptr<uchar>(rowRange.begin()); height--; ptr += sstep)
+        for (const uchar* ptr = src_.ptr<uchar>(rowRange.start); height--; ptr += sstep)
         {
             int x = 0;
             for (; x <= width - 4; x += 4)
@@ -3031,9 +3025,7 @@ public:
                 localHistogram[ptr[x]]++;
         }
 
-#ifdef HAVE_TBB
-        tbb::mutex::scoped_lock lock(*histogramLock_);
-#endif
+        cv::AutoLock lock(*histogramLock_);
 
         for( int i = 0; i < HIST_SZ; i++ )
             globalHistogram_[i] += localHistogram[i];
@@ -3041,12 +3033,7 @@ public:
 
     static bool isWorthParallel( const cv::Mat& src )
     {
-#ifdef HAVE_TBB
         return ( src.total() >= 640*480 );
-#else
-        (void)src;
-        return false;
-#endif
     }
 
 private:
@@ -3054,10 +3041,10 @@ private:
 
     cv::Mat& src_;
     int* globalHistogram_;
-    MutextPtr histogramLock_;
+    cv::Mutex* histogramLock_;
 };
 
-class EqualizeHistLut_Invoker
+class EqualizeHistLut_Invoker : public cv::ParallelLoopBody
 {
 public:
     EqualizeHistLut_Invoker( cv::Mat& src, cv::Mat& dst, int* lut )
@@ -3066,13 +3053,13 @@ public:
           lut_(lut)
     { }
 
-    void operator()( const cv::BlockedRange& rowRange ) const
+    void operator()( const cv::Range& rowRange ) const
     {
         const size_t sstep = src_.step;
         const size_t dstep = dst_.step;
 
         int width = src_.cols;
-        int height = rowRange.end() - rowRange.begin();
+        int height = rowRange.end - rowRange.start;
         int* lut = lut_;
 
         if (src_.isContinuous() && dst_.isContinuous())
@@ -3081,8 +3068,8 @@ public:
             height = 1;
         }
 
-        const uchar* sptr = src_.ptr<uchar>(rowRange.begin());
-        uchar* dptr = dst_.ptr<uchar>(rowRange.begin());
+        const uchar* sptr = src_.ptr<uchar>(rowRange.start);
+        uchar* dptr = dst_.ptr<uchar>(rowRange.start);
 
         for (; height--; sptr += sstep, dptr += dstep)
         {
@@ -3111,12 +3098,7 @@ public:
 
     static bool isWorthParallel( const cv::Mat& src )
     {
-#ifdef HAVE_TBB
         return ( src.total() >= 640*480 );
-#else
-        (void)src;
-        return false;
-#endif
     }
 
 private:
@@ -3143,23 +3125,18 @@ void cv::equalizeHist( InputArray _src, OutputArray _dst )
     if(src.empty())
         return;
 
-#ifdef HAVE_TBB
-    tbb::mutex histogramLockInstance;
-    EqualizeHistCalcHist_Invoker::MutextPtr histogramLock = &histogramLockInstance;
-#else
-    EqualizeHistCalcHist_Invoker::MutextPtr histogramLock = 0;
-#endif
+    Mutex histogramLockInstance;
 
     const int hist_sz = EqualizeHistCalcHist_Invoker::HIST_SZ;
     int hist[hist_sz] = {0,};
     int lut[hist_sz];
 
-    EqualizeHistCalcHist_Invoker calcBody(src, hist, histogramLock);
+    EqualizeHistCalcHist_Invoker calcBody(src, hist, &histogramLockInstance);
     EqualizeHistLut_Invoker      lutBody(src, dst, lut);
-    cv::BlockedRange heightRange(0, src.rows);
+    cv::Range heightRange(0, src.rows);
 
     if(EqualizeHistCalcHist_Invoker::isWorthParallel(src))
-        parallel_for(heightRange, calcBody);
+        parallel_for_(heightRange, calcBody);
     else
         calcBody(heightRange);
 
@@ -3183,304 +3160,12 @@ void cv::equalizeHist( InputArray _src, OutputArray _dst )
     }
 
     if(EqualizeHistLut_Invoker::isWorthParallel(src))
-        parallel_for(heightRange, lutBody);
+        parallel_for_(heightRange, lutBody);
     else
         lutBody(heightRange);
 }
 
 // ----------------------------------------------------------------------
-// CLAHE
-
-namespace
-{
-    class CLAHE_CalcLut_Body : public cv::ParallelLoopBody
-    {
-    public:
-        CLAHE_CalcLut_Body(const cv::Mat& src, cv::Mat& lut, cv::Size tileSize, int tilesX, int tilesY, int clipLimit, float lutScale) :
-            src_(src), lut_(lut), tileSize_(tileSize), tilesX_(tilesX), tilesY_(tilesY), clipLimit_(clipLimit), lutScale_(lutScale)
-        {
-        }
-
-        void operator ()(const cv::Range& range) const;
-
-    private:
-        cv::Mat src_;
-        mutable cv::Mat lut_;
-
-        cv::Size tileSize_;
-        int tilesX_;
-        int tilesY_;
-        int clipLimit_;
-        float lutScale_;
-    };
-
-    void CLAHE_CalcLut_Body::operator ()(const cv::Range& range) const
-    {
-        const int histSize = 256;
-
-        uchar* tileLut = lut_.ptr(range.start);
-        const size_t lut_step = lut_.step;
-
-        for (int k = range.start; k < range.end; ++k, tileLut += lut_step)
-        {
-            const int ty = k / tilesX_;
-            const int tx = k % tilesX_;
-
-            // retrieve tile submatrix
-
-            cv::Rect tileROI;
-            tileROI.x = tx * tileSize_.width;
-            tileROI.y = ty * tileSize_.height;
-            tileROI.width = tileSize_.width;
-            tileROI.height = tileSize_.height;
-
-            const cv::Mat tile = src_(tileROI);
-
-            // calc histogram
-
-            int tileHist[histSize] = {0, };
-
-            int height = tileROI.height;
-            const size_t sstep = tile.step;
-            for (const uchar* ptr = tile.ptr<uchar>(0); height--; ptr += sstep)
-            {
-                int x = 0;
-                for (; x <= tileROI.width - 4; x += 4)
-                {
-                    int t0 = ptr[x], t1 = ptr[x+1];
-                    tileHist[t0]++; tileHist[t1]++;
-                    t0 = ptr[x+2]; t1 = ptr[x+3];
-                    tileHist[t0]++; tileHist[t1]++;
-                }
-
-                for (; x < tileROI.width; ++x)
-                    tileHist[ptr[x]]++;
-            }
-
-            // clip histogram
-
-            if (clipLimit_ > 0)
-            {
-                // how many pixels were clipped
-                int clipped = 0;
-                for (int i = 0; i < histSize; ++i)
-                {
-                    if (tileHist[i] > clipLimit_)
-                    {
-                        clipped += tileHist[i] - clipLimit_;
-                        tileHist[i] = clipLimit_;
-                    }
-                }
-
-                // redistribute clipped pixels
-                int redistBatch = clipped / histSize;
-                int residual = clipped - redistBatch * histSize;
-
-                for (int i = 0; i < histSize; ++i)
-                    tileHist[i] += redistBatch;
-
-                for (int i = 0; i < residual; ++i)
-                    tileHist[i]++;
-            }
-
-            // calc Lut
-
-            int sum = 0;
-            for (int i = 0; i < histSize; ++i)
-            {
-                sum += tileHist[i];
-                tileLut[i] = cv::saturate_cast<uchar>(sum * lutScale_);
-            }
-        }
-    }
-
-    class CLAHE_Interpolation_Body : public cv::ParallelLoopBody
-    {
-    public:
-        CLAHE_Interpolation_Body(const cv::Mat& src, cv::Mat& dst, const cv::Mat& lut, cv::Size tileSize, int tilesX, int tilesY) :
-            src_(src), dst_(dst), lut_(lut), tileSize_(tileSize), tilesX_(tilesX), tilesY_(tilesY)
-        {
-        }
-
-        void operator ()(const cv::Range& range) const;
-
-    private:
-        cv::Mat src_;
-        mutable cv::Mat dst_;
-        cv::Mat lut_;
-
-        cv::Size tileSize_;
-        int tilesX_;
-        int tilesY_;
-    };
-
-    void CLAHE_Interpolation_Body::operator ()(const cv::Range& range) const
-    {
-        const size_t lut_step = lut_.step;
-
-        for (int y = range.start; y < range.end; ++y)
-        {
-            const uchar* srcRow = src_.ptr<uchar>(y);
-            uchar* dstRow = dst_.ptr<uchar>(y);
-
-            const float tyf = (static_cast<float>(y) / tileSize_.height) - 0.5f;
-
-            int ty1 = cvFloor(tyf);
-            int ty2 = ty1 + 1;
-
-            const float ya = tyf - ty1;
-
-            ty1 = std::max(ty1, 0);
-            ty2 = std::min(ty2, tilesY_ - 1);
-
-            const uchar* lutPlane1 = lut_.ptr(ty1 * tilesX_);
-            const uchar* lutPlane2 = lut_.ptr(ty2 * tilesX_);
-
-            for (int x = 0; x < src_.cols; ++x)
-            {
-                const float txf = (static_cast<float>(x) / tileSize_.width) - 0.5f;
-
-                int tx1 = cvFloor(txf);
-                int tx2 = tx1 + 1;
-
-                const float xa = txf - tx1;
-
-                tx1 = std::max(tx1, 0);
-                tx2 = std::min(tx2, tilesX_ - 1);
-
-                const int srcVal = srcRow[x];
-
-                const size_t ind1 = tx1 * lut_step + srcVal;
-                const size_t ind2 = tx2 * lut_step + srcVal;
-
-                float res = 0;
-
-                res += lutPlane1[ind1] * ((1.0f - xa) * (1.0f - ya));
-                res += lutPlane1[ind2] * ((xa) * (1.0f - ya));
-                res += lutPlane2[ind1] * ((1.0f - xa) * (ya));
-                res += lutPlane2[ind2] * ((xa) * (ya));
-
-                dstRow[x] = cv::saturate_cast<uchar>(res);
-            }
-        }
-    }
-
-    class CLAHE_Impl : public cv::CLAHE
-    {
-    public:
-        CLAHE_Impl(double clipLimit = 40.0, int tilesX = 8, int tilesY = 8);
-
-        cv::AlgorithmInfo* info() const;
-
-        void apply(cv::InputArray src, cv::OutputArray dst);
-
-        void setClipLimit(double clipLimit);
-        double getClipLimit() const;
-
-        void setTilesGridSize(cv::Size tileGridSize);
-        cv::Size getTilesGridSize() const;
-
-        void collectGarbage();
-
-    private:
-        double clipLimit_;
-        int tilesX_;
-        int tilesY_;
-
-        cv::Mat srcExt_;
-        cv::Mat lut_;
-    };
-
-    CLAHE_Impl::CLAHE_Impl(double clipLimit, int tilesX, int tilesY) :
-        clipLimit_(clipLimit), tilesX_(tilesX), tilesY_(tilesY)
-    {
-    }
-
-    CV_INIT_ALGORITHM(CLAHE_Impl, "CLAHE",
-        obj.info()->addParam(obj, "clipLimit", obj.clipLimit_);
-        obj.info()->addParam(obj, "tilesX", obj.tilesX_);
-        obj.info()->addParam(obj, "tilesY", obj.tilesY_))
-
-    void CLAHE_Impl::apply(cv::InputArray _src, cv::OutputArray _dst)
-    {
-        cv::Mat src = _src.getMat();
-
-        CV_Assert( src.type() == CV_8UC1 );
-
-        _dst.create( src.size(), src.type() );
-        cv::Mat dst = _dst.getMat();
-
-        const int histSize = 256;
-
-        lut_.create(tilesX_ * tilesY_, histSize, CV_8UC1);
-
-        cv::Size tileSize;
-        cv::Mat srcForLut;
-
-        if (src.cols % tilesX_ == 0 && src.rows % tilesY_ == 0)
-        {
-            tileSize = cv::Size(src.cols / tilesX_, src.rows / tilesY_);
-            srcForLut = src;
-        }
-        else
-        {
-            cv::copyMakeBorder(src, srcExt_, 0, tilesY_ - (src.rows % tilesY_), 0, tilesX_ - (src.cols % tilesX_), cv::BORDER_REFLECT_101);
-
-            tileSize = cv::Size(srcExt_.cols / tilesX_, srcExt_.rows / tilesY_);
-            srcForLut = srcExt_;
-        }
-
-        const int tileSizeTotal = tileSize.area();
-        const float lutScale = static_cast<float>(histSize - 1) / tileSizeTotal;
-
-        int clipLimit = 0;
-        if (clipLimit_ > 0.0)
-        {
-            clipLimit = static_cast<int>(clipLimit_ * tileSizeTotal / histSize);
-            clipLimit = std::max(clipLimit, 1);
-        }
-
-        CLAHE_CalcLut_Body calcLutBody(srcForLut, lut_, tileSize, tilesX_, tilesY_, clipLimit, lutScale);
-        cv::parallel_for_(cv::Range(0, tilesX_ * tilesY_), calcLutBody);
-
-        CLAHE_Interpolation_Body interpolationBody(src, dst, lut_, tileSize, tilesX_, tilesY_);
-        cv::parallel_for_(cv::Range(0, src.rows), interpolationBody);
-    }
-
-    void CLAHE_Impl::setClipLimit(double clipLimit)
-    {
-        clipLimit_ = clipLimit;
-    }
-
-    double CLAHE_Impl::getClipLimit() const
-    {
-        return clipLimit_;
-    }
-
-    void CLAHE_Impl::setTilesGridSize(cv::Size tileGridSize)
-    {
-        tilesX_ = tileGridSize.width;
-        tilesY_ = tileGridSize.height;
-    }
-
-    cv::Size CLAHE_Impl::getTilesGridSize() const
-    {
-        return cv::Size(tilesX_, tilesY_);
-    }
-
-    void CLAHE_Impl::collectGarbage()
-    {
-        srcExt_.release();
-        lut_.release();
-    }
-}
-
-cv::Ptr<cv::CLAHE> cv::createCLAHE(double clipLimit, cv::Size tileGridSize)
-{
-    return new CLAHE_Impl(clipLimit, tileGridSize.width, tileGridSize.height);
-}
-
-// ----------------------------------------------------------------------
 
 /* Implementation of RTTI and Generic Functions for CvHistogram */
 #define CV_TYPE_NAME_HIST "opencv-hist"
index a63e08f..53d2347 100644 (file)
@@ -1081,7 +1081,7 @@ cv::Mat cv::getStructuringElement(int shape, Size ksize, Point anchor)
 namespace cv
 {
 
-class MorphologyRunner
+class MorphologyRunner : public ParallelLoopBody
 {
 public:
     MorphologyRunner(Mat _src, Mat _dst, int _nStripes, int _iterations,
@@ -1102,14 +1102,14 @@ public:
         columnBorderType = _columnBorderType;
     }
 
-    void operator () ( const BlockedRange& range ) const
+    void operator () ( const Range& range ) const
     {
-        int row0 = min(cvRound(range.begin() * src.rows / nStripes), src.rows);
-        int row1 = min(cvRound(range.end() * src.rows / nStripes), src.rows);
+        int row0 = min(cvRound(range.start * src.rows / nStripes), src.rows);
+        int row1 = min(cvRound(range.end * src.rows / nStripes), src.rows);
 
         /*if(0)
             printf("Size = (%d, %d), range[%d,%d), row0 = %d, row1 = %d\n",
-                   src.rows, src.cols, range.begin(), range.end(), row0, row1);*/
+                   src.rows, src.cols, range.start, range.end, row0, row1);*/
 
         Mat srcStripe = src.rowRange(row0, row1);
         Mat dstStripe = dst.rowRange(row0, row1);
@@ -1173,15 +1173,15 @@ static void morphOp( int op, InputArray _src, OutputArray _dst,
     }
 
     int nStripes = 1;
-#if defined HAVE_TBB && defined HAVE_TEGRA_OPTIMIZATION
+#if defined HAVE_TEGRA_OPTIMIZATION
     if (src.data != dst.data && iterations == 1 &&  //NOTE: threads are not used for inplace processing
         (borderType & BORDER_ISOLATED) == 0 && //TODO: check border types
         src.rows >= 64 ) //NOTE: just heuristics
         nStripes = 4;
 #endif
 
-    parallel_for(BlockedRange(0, nStripes),
-                 MorphologyRunner(src, dst, nStripes, iterations, op, kernel, anchor, borderType, borderType, borderValue));
+    parallel_for_(Range(0, nStripes),
+                  MorphologyRunner(src, dst, nStripes, iterations, op, kernel, anchor, borderType, borderType, borderValue));
 
     //Ptr<FilterEngine> f = createMorphologyFilter(op, src.type(),
     //                                             kernel, anchor, borderType, borderType, borderValue );
index 6c5c329..c0c9f5b 100644 (file)
@@ -81,6 +81,14 @@ public abstract class CameraBridgeViewBase extends SurfaceView implements Surfac
         styledAttrs.recycle();
     }
 
+    /**
+     * Sets the camera index
+     * @param cameraIndex new camera index
+     */
+    public void setCameraIndex(int cameraIndex) {
+        this.mCameraIndex = cameraIndex;
+    }
+
     public interface CvCameraViewListener {
         /**
          * This method is invoked when camera preview has started. After this method is invoked
index 0ebdb66..b3fe569 100644 (file)
@@ -14,7 +14,7 @@ public class MatOfByte extends Mat {
 
     protected MatOfByte(long addr) {
         super(addr);
-        if(checkVector(_channels, _depth) < 0 )
+        if( !empty() && checkVector(_channels, _depth) < 0 )
             throw new IllegalArgumentException("Incomatible Mat");
         //FIXME: do we need release() here?
     }
@@ -25,7 +25,7 @@ public class MatOfByte extends Mat {
 
     public MatOfByte(Mat m) {
         super(m, Range.all());
-        if(checkVector(_channels, _depth) < 0 )
+        if( !empty() && checkVector(_channels, _depth) < 0 )
             throw new IllegalArgumentException("Incomatible Mat");
         //FIXME: do we need release() here?
     }
index cca5251..4eb7cbc 100644 (file)
@@ -14,7 +14,7 @@ public class MatOfDouble extends Mat {
 
     protected MatOfDouble(long addr) {
         super(addr);
-        if(checkVector(_channels, _depth) < 0 )
+        if( !empty() && checkVector(_channels, _depth) < 0 )
             throw new IllegalArgumentException("Incomatible Mat");
         //FIXME: do we need release() here?
     }
@@ -25,7 +25,7 @@ public class MatOfDouble extends Mat {
 
     public MatOfDouble(Mat m) {
         super(m, Range.all());
-        if(checkVector(_channels, _depth) < 0 )
+        if( !empty() && checkVector(_channels, _depth) < 0 )
             throw new IllegalArgumentException("Incomatible Mat");
         //FIXME: do we need release() here?
     }
index ce73b6f..96bbeab 100644 (file)
@@ -14,7 +14,7 @@ public class MatOfFloat extends Mat {
 
     protected MatOfFloat(long addr) {
         super(addr);
-        if(checkVector(_channels, _depth) < 0 )
+        if( !empty() && checkVector(_channels, _depth) < 0 )
             throw new IllegalArgumentException("Incomatible Mat");
         //FIXME: do we need release() here?
     }
@@ -25,7 +25,7 @@ public class MatOfFloat extends Mat {
 
     public MatOfFloat(Mat m) {
         super(m, Range.all());
-        if(checkVector(_channels, _depth) < 0 )
+        if( !empty() && checkVector(_channels, _depth) < 0 )
             throw new IllegalArgumentException("Incomatible Mat");
         //FIXME: do we need release() here?
     }
index 8a3e510..aaa97b7 100644 (file)
@@ -14,7 +14,7 @@ public class MatOfFloat4 extends Mat {
 
     protected MatOfFloat4(long addr) {
         super(addr);
-        if(checkVector(_channels, _depth) < 0 )
+        if( !empty() && checkVector(_channels, _depth) < 0 )
             throw new IllegalArgumentException("Incomatible Mat");
         //FIXME: do we need release() here?
     }
@@ -25,7 +25,7 @@ public class MatOfFloat4 extends Mat {
 
     public MatOfFloat4(Mat m) {
         super(m, Range.all());
-        if(checkVector(_channels, _depth) < 0 )
+        if( !empty() && checkVector(_channels, _depth) < 0 )
             throw new IllegalArgumentException("Incomatible Mat");
         //FIXME: do we need release() here?
     }
index 1e23101..68e6249 100644 (file)
@@ -14,7 +14,7 @@ public class MatOfFloat6 extends Mat {
 
     protected MatOfFloat6(long addr) {
         super(addr);
-        if(checkVector(_channels, _depth) < 0 )
+        if( !empty() && checkVector(_channels, _depth) < 0 )
             throw new IllegalArgumentException("Incomatible Mat");
         //FIXME: do we need release() here?
     }
@@ -25,7 +25,7 @@ public class MatOfFloat6 extends Mat {
 
     public MatOfFloat6(Mat m) {
         super(m, Range.all());
-        if(checkVector(_channels, _depth) < 0 )
+        if( !empty() && checkVector(_channels, _depth) < 0 )
             throw new IllegalArgumentException("Incomatible Mat");
         //FIXME: do we need release() here?
     }
index 80c5b3a..33e5124 100644 (file)
@@ -15,7 +15,7 @@ public class MatOfInt extends Mat {
 
     protected MatOfInt(long addr) {
         super(addr);
-        if(checkVector(_channels, _depth) < 0 )
+        if( !empty() && checkVector(_channels, _depth) < 0 )
             throw new IllegalArgumentException("Incomatible Mat");
         //FIXME: do we need release() here?
     }
@@ -26,7 +26,7 @@ public class MatOfInt extends Mat {
 
     public MatOfInt(Mat m) {
         super(m, Range.all());
-        if(checkVector(_channels, _depth) < 0 )
+        if( !empty() && checkVector(_channels, _depth) < 0 )
             throw new IllegalArgumentException("Incomatible Mat");
         //FIXME: do we need release() here?
     }
index 6027710..c924233 100644 (file)
@@ -15,7 +15,7 @@ public class MatOfInt4 extends Mat {
 
     protected MatOfInt4(long addr) {
         super(addr);
-        if(checkVector(_channels, _depth) < 0 )
+        if( !empty() && checkVector(_channels, _depth) < 0 )
             throw new IllegalArgumentException("Incomatible Mat");
         //FIXME: do we need release() here?
     }
@@ -26,7 +26,7 @@ public class MatOfInt4 extends Mat {
 
     public MatOfInt4(Mat m) {
         super(m, Range.all());
-        if(checkVector(_channels, _depth) < 0 )
+        if( !empty() && checkVector(_channels, _depth) < 0 )
             throw new IllegalArgumentException("Incomatible Mat");
         //FIXME: do we need release() here?
     }
index b91fedc..b402fe1 100644 (file)
@@ -16,7 +16,7 @@ public class MatOfKeyPoint extends Mat {
 
     protected MatOfKeyPoint(long addr) {
         super(addr);
-        if(checkVector(_channels, _depth) < 0 )
+        if( !empty() && checkVector(_channels, _depth) < 0 )
             throw new IllegalArgumentException("Incomatible Mat");
         //FIXME: do we need release() here?
     }
@@ -27,7 +27,7 @@ public class MatOfKeyPoint extends Mat {
 
     public MatOfKeyPoint(Mat m) {
         super(m, Range.all());
-        if(checkVector(_channels, _depth) < 0 )
+        if( !empty() && checkVector(_channels, _depth) < 0 )
             throw new IllegalArgumentException("Incomatible Mat");
         //FIXME: do we need release() here?
     }
index 23eeed0..6d23ed1 100644 (file)
@@ -14,7 +14,7 @@ public class MatOfPoint extends Mat {
 
     protected MatOfPoint(long addr) {
         super(addr);
-        if(checkVector(_channels, _depth) < 0 )
+        if( !empty() && checkVector(_channels, _depth) < 0 )
             throw new IllegalArgumentException("Incomatible Mat");
         //FIXME: do we need release() here?
     }
@@ -25,7 +25,7 @@ public class MatOfPoint extends Mat {
 
     public MatOfPoint(Mat m) {
         super(m, Range.all());
-        if(checkVector(_channels, _depth) < 0 )
+        if( !empty() && checkVector(_channels, _depth) < 0 )
             throw new IllegalArgumentException("Incomatible Mat");
         //FIXME: do we need release() here?
     }
index ba4be4a..0c69607 100644 (file)
@@ -14,7 +14,7 @@ public class MatOfPoint2f extends Mat {
 
     protected MatOfPoint2f(long addr) {
         super(addr);
-        if(checkVector(_channels, _depth) < 0 )
+        if( !empty() && checkVector(_channels, _depth) < 0 )
             throw new IllegalArgumentException("Incomatible Mat");
         //FIXME: do we need release() here?
     }
@@ -25,7 +25,7 @@ public class MatOfPoint2f extends Mat {
 
     public MatOfPoint2f(Mat m) {
         super(m, Range.all());
-        if(checkVector(_channels, _depth) < 0 )
+        if( !empty() && checkVector(_channels, _depth) < 0 )
             throw new IllegalArgumentException("Incomatible Mat");
         //FIXME: do we need release() here?
     }
index 16e2130..0c8374f 100644 (file)
@@ -14,7 +14,7 @@ public class MatOfPoint3 extends Mat {
 
     protected MatOfPoint3(long addr) {
         super(addr);
-        if(checkVector(_channels, _depth) < 0 )
+        if( !empty() && checkVector(_channels, _depth) < 0 )
             throw new IllegalArgumentException("Incomatible Mat");
         //FIXME: do we need release() here?
     }
@@ -25,7 +25,7 @@ public class MatOfPoint3 extends Mat {
 
     public MatOfPoint3(Mat m) {
         super(m, Range.all());
-        if(checkVector(_channels, _depth) < 0 )
+        if( !empty() && checkVector(_channels, _depth) < 0 )
             throw new IllegalArgumentException("Incomatible Mat");
         //FIXME: do we need release() here?
     }
index 97e2a95..b0d50d4 100644 (file)
@@ -14,7 +14,7 @@ public class MatOfPoint3f extends Mat {
 
     protected MatOfPoint3f(long addr) {
         super(addr);
-        if(checkVector(_channels, _depth) < 0 )
+        if( !empty() && checkVector(_channels, _depth) < 0 )
             throw new IllegalArgumentException("Incomatible Mat");
         //FIXME: do we need release() here?
     }
@@ -25,7 +25,7 @@ public class MatOfPoint3f extends Mat {
 
     public MatOfPoint3f(Mat m) {
         super(m, Range.all());
-        if(checkVector(_channels, _depth) < 0 )
+        if( !empty() && checkVector(_channels, _depth) < 0 )
             throw new IllegalArgumentException("Incomatible Mat");
         //FIXME: do we need release() here?
     }
index 2e58bfe..3844d9d 100644 (file)
@@ -15,7 +15,7 @@ public class MatOfRect extends Mat {
 
     protected MatOfRect(long addr) {
         super(addr);
-        if(checkVector(_channels, _depth) < 0 )
+        if( !empty() && checkVector(_channels, _depth) < 0 )
             throw new IllegalArgumentException("Incomatible Mat");
         //FIXME: do we need release() here?
     }
@@ -26,7 +26,7 @@ public class MatOfRect extends Mat {
 
     public MatOfRect(Mat m) {
         super(m, Range.all());
-        if(checkVector(_channels, _depth) < 0 )
+        if( !empty() && checkVector(_channels, _depth) < 0 )
             throw new IllegalArgumentException("Incomatible Mat");
         //FIXME: do we need release() here?
     }
index 438872a..bf85425 100644 (file)
 
 #include "precomp.hpp"
 
-#ifdef HAVE_TBB
-#include <tbb/tbb.h>
-#endif
-
 CvANN_MLP_TrainParams::CvANN_MLP_TrainParams()
 {
     term_crit = cvTermCriteria( CV_TERMCRIT_ITER + CV_TERMCRIT_EPS, 1000, 0.01 );
@@ -1022,7 +1018,7 @@ int CvANN_MLP::train_backprop( CvVectors x0, CvVectors u, const double* sw )
     return iter;
 }
 
-struct rprop_loop {
+struct rprop_loop : cv::ParallelLoopBody {
   rprop_loop(const CvANN_MLP* _point, double**& _weights, int& _count, int& _ivcount, CvVectors* _x0,
      int& _l_count, CvMat*& _layer_sizes, int& _ovcount, int& _max_count,
      CvVectors* _u, const double*& _sw, double& _inv_count, CvMat*& _dEdw, int& _dcount0, double* _E, int _buf_sz)
@@ -1063,7 +1059,7 @@ struct rprop_loop {
   int buf_sz;
 
 
-  void operator()( const cv::BlockedRange& range ) const
+  void operator()( const cv::Range& range ) const
   {
     double* buf_ptr;
     double** x = 0;
@@ -1084,7 +1080,7 @@ struct rprop_loop {
         buf_ptr += (df[i] - x[i])*2;
     }
 
-    for(int si = range.begin(); si < range.end(); si++ )
+    for(int si = range.start; si < range.end; si++ )
     {
         if (si % dcount0 != 0) continue;
         int n1, n2, k;
@@ -1170,36 +1166,33 @@ struct rprop_loop {
             }
 
         // backward pass, update dEdw
-        #ifdef HAVE_TBB
-        static tbb::spin_mutex mutex;
-        tbb::spin_mutex::scoped_lock lock;
-        #endif
+        static cv::Mutex mutex;
+
         for(int i = l_count-1; i > 0; i-- )
         {
             n1 = layer_sizes->data.i[i-1]; n2 = layer_sizes->data.i[i];
             cvInitMatHeader( &_df, dcount, n2, CV_64F, df[i] );
             cvMul( grad1, &_df, grad1 );
-            #ifdef HAVE_TBB
-            lock.acquire(mutex);
-            #endif
-            cvInitMatHeader( &_dEdw, n1, n2, CV_64F, dEdw->data.db+(weights[i]-weights[0]) );
-            cvInitMatHeader( x1, dcount, n1, CV_64F, x[i-1] );
-            cvGEMM( x1, grad1, 1, &_dEdw, 1, &_dEdw, CV_GEMM_A_T );
-
-            // update bias part of dEdw
-           for( k = 0; k < dcount; k++ )
-           {
-               double* dst = _dEdw.data.db + n1*n2;
-               const double* src = grad1->data.db + k*n2;
-               for(int j = 0; j < n2; j++ )
-                   dst[j] += src[j];
+
+            {
+                cv::AutoLock lock(mutex);
+                cvInitMatHeader( &_dEdw, n1, n2, CV_64F, dEdw->data.db+(weights[i]-weights[0]) );
+                cvInitMatHeader( x1, dcount, n1, CV_64F, x[i-1] );
+                cvGEMM( x1, grad1, 1, &_dEdw, 1, &_dEdw, CV_GEMM_A_T );
+
+                // update bias part of dEdw
+                for( k = 0; k < dcount; k++ )
+                {
+                    double* dst = _dEdw.data.db + n1*n2;
+                    const double* src = grad1->data.db + k*n2;
+                    for(int j = 0; j < n2; j++ )
+                        dst[j] += src[j];
+                }
+
+                if (i > 1)
+                    cvInitMatHeader( &_w, n1, n2, CV_64F, weights[i] );
            }
 
-           if (i > 1)
-               cvInitMatHeader( &_w, n1, n2, CV_64F, weights[i] );
-           #ifdef HAVE_TBB
-           lock.release();
-           #endif
            cvInitMatHeader( grad2, dcount, n1, CV_64F, grad2->data.db );
            if( i > 1 )
                cvGEMM( grad1, &_w, 1, 0, 0, grad2, CV_GEMM_B_T );
@@ -1297,7 +1290,7 @@ int CvANN_MLP::train_rprop( CvVectors x0, CvVectors u, const double* sw )
         double E = 0;
 
         // first, iterate through all the samples and compute dEdw
-        cv::parallel_for(cv::BlockedRange(0, count),
+        cv::parallel_for_(cv::Range(0, count),
             rprop_loop(this, weights, count, ivcount, &x0, l_count, layer_sizes,
                        ovcount, max_count, &u, sw, inv_count, dEdw, dcount0, &E, buf_sz)
         );
index 6671a34..b52ffbe 100644 (file)
@@ -900,7 +900,7 @@ float CvGBTrees::predict_serial( const CvMat* _sample, const CvMat* _missing,
 }
 
 
-class Tree_predictor
+class Tree_predictor : public cv::ParallelLoopBody
 {
 private:
     pCvSeq* weak;
@@ -910,9 +910,7 @@ private:
     const CvMat* missing;
     const float shrinkage;
 
-#ifdef HAVE_TBB
-    static tbb::spin_mutex SumMutex;
-#endif
+    static cv::Mutex SumMutex;
 
 
 public:
@@ -931,14 +929,11 @@ public:
     Tree_predictor& operator=( const Tree_predictor& )
     { return *this; }
 
-    virtual void operator()(const cv::BlockedRange& range) const
+    virtual void operator()(const cv::Range& range) const
     {
-#ifdef HAVE_TBB
-        tbb::spin_mutex::scoped_lock lock;
-#endif
         CvSeqReader reader;
-        int begin = range.begin();
-        int end = range.end();
+        int begin = range.start;
+        int end = range.end;
 
         int weak_count = end - begin;
         CvDTree* tree;
@@ -956,13 +951,11 @@ public:
                     tmp_sum += shrinkage*(float)(tree->predict(sample, missing)->value);
                 }
             }
-#ifdef HAVE_TBB
-            lock.acquire(SumMutex);
-            sum[i] += tmp_sum;
-            lock.release();
-#else
-            sum[i] += tmp_sum;
-#endif
+
+            {
+                cv::AutoLock lock(SumMutex);
+                sum[i] += tmp_sum;
+            }
         }
     } // Tree_predictor::operator()
 
@@ -970,11 +963,7 @@ public:
 
 }; // class Tree_predictor
 
-
-#ifdef HAVE_TBB
-tbb::spin_mutex Tree_predictor::SumMutex;
-#endif
-
+cv::Mutex Tree_predictor::SumMutex;
 
 
 float CvGBTrees::predict( const CvMat* _sample, const CvMat* _missing,
@@ -992,12 +981,7 @@ float CvGBTrees::predict( const CvMat* _sample, const CvMat* _missing,
         Tree_predictor predictor = Tree_predictor(weak_seq, class_count,
                                     params.shrinkage, _sample, _missing, sum);
 
-//#ifdef HAVE_TBB
-//      tbb::parallel_for(cv::BlockedRange(begin, end), predictor,
-//                          tbb::auto_partitioner());
-//#else
-        cv::parallel_for(cv::BlockedRange(begin, end), predictor);
-//#endif
+        cv::parallel_for_(cv::Range(begin, end), predictor);
 
         for (int i=0; i<class_count; ++i)
             sum[i] = sum[i] /** params.shrinkage*/ + base_value;
@@ -1228,7 +1212,7 @@ void CvGBTrees::read( CvFileStorage* fs, CvFileNode* node )
 
 //===========================================================================
 
-class Sample_predictor
+class Sample_predictor : public cv::ParallelLoopBody
 {
 private:
     const CvGBTrees* gbt;
@@ -1258,10 +1242,10 @@ public:
     {}
 
 
-    virtual void operator()(const cv::BlockedRange& range) const
+    virtual void operator()(const cv::Range& range) const
     {
-        int begin = range.begin();
-        int end = range.end();
+        int begin = range.start;
+        int end = range.end;
 
         CvMat x;
         CvMat miss;
@@ -1317,11 +1301,7 @@ CvGBTrees::calc_error( CvMLData* _data, int type, std::vector<float> *resp )
     Sample_predictor predictor = Sample_predictor(this, pred_resp, _data->get_values(),
             _data->get_missing(), _sample_idx);
 
-//#ifdef HAVE_TBB
-//    tbb::parallel_for(cv::BlockedRange(0,n), predictor, tbb::auto_partitioner());
-//#else
-    cv::parallel_for(cv::BlockedRange(0,n), predictor);
-//#endif
+    cv::parallel_for_(cv::Range(0,n), predictor);
 
     int* sidx = _sample_idx ? _sample_idx->data.i : 0;
     int r_step = CV_IS_MAT_CONT(response->type) ?
index 3c2f9eb..6b6f5e6 100644 (file)
@@ -306,7 +306,7 @@ float CvKNearest::write_results( int k, int k1, int start, int end,
     return result;
 }
 
-struct P1 {
+struct P1 : cv::ParallelLoopBody {
   P1(const CvKNearest* _pointer, int _buf_sz, int _k, const CvMat* __samples, const float** __neighbors,
      int _k1, CvMat* __results, CvMat* __neighbor_responses, CvMat* __dist, float* _result)
   {
@@ -333,10 +333,10 @@ struct P1 {
   float* result;
   int buf_sz;
 
-  void operator()( const cv::BlockedRange& range ) const
+  void operator()( const cv::Range& range ) const
   {
     cv::AutoBuffer<float> buf(buf_sz);
-    for(int i = range.begin(); i < range.end(); i += 1 )
+    for(int i = range.start; i < range.end; i += 1 )
     {
         float* neighbor_responses = &buf[0];
         float* dist = neighbor_responses + 1*k;
@@ -410,8 +410,8 @@ float CvKNearest::find_nearest( const CvMat* _samples, int k, CvMat* _results,
     int k1 = get_sample_count();
     k1 = MIN( k1, k );
 
-    cv::parallel_for(cv::BlockedRange(0, count), P1(this, buf_sz, k, _samples, _neighbors, k1,
-                                                    _results, _neighbor_responses, _dist, &result)
+    cv::parallel_for_(cv::Range(0, count), P1(this, buf_sz, k, _samples, _neighbors, k1,
+                                             _results, _neighbor_responses, _dist, &result)
     );
 
     return result;
index 15146d6..f1f7a24 100644 (file)
@@ -277,7 +277,7 @@ bool CvNormalBayesClassifier::train( const CvMat* _train_data, const CvMat* _res
     return result;
 }
 
-struct predict_body {
+struct predict_body : cv::ParallelLoopBody {
   predict_body(CvMat* _c, CvMat** _cov_rotate_mats, CvMat** _inv_eigen_values, CvMat** _avg,
      const CvMat* _samples, const int* _vidx, CvMat* _cls_labels,
      CvMat* _results, float* _value, int _var_count1
@@ -307,7 +307,7 @@ struct predict_body {
   float* value;
   int var_count1;
 
-  void operator()( const cv::BlockedRange& range ) const
+  void operator()( const cv::Range& range ) const
   {
 
     int cls = -1;
@@ -324,7 +324,7 @@ struct predict_body {
     cv::AutoBuffer<double> buffer(nclasses + var_count1);
     CvMat diff = cvMat( 1, var_count1, CV_64FC1, &buffer[0] );
 
-    for(int k = range.begin(); k < range.end(); k += 1 )
+    for(int k = range.start; k < range.end; k += 1 )
     {
         int ival;
         double opt = FLT_MAX;
@@ -397,9 +397,9 @@ float CvNormalBayesClassifier::predict( const CvMat* samples, CvMat* results ) c
 
     const int* vidx = var_idx ? var_idx->data.i : 0;
 
-    cv::parallel_for(cv::BlockedRange(0, samples->rows), predict_body(c, cov_rotate_mats, inv_eigen_values, avg, samples,
-                                                                      vidx, cls_labels, results, &value, var_count
-    ));
+    cv::parallel_for_(cv::Range(0, samples->rows),
+                      predict_body(c, cov_rotate_mats, inv_eigen_values, avg, samples,
+                                   vidx, cls_labels, results, &value, var_count));
 
     return value;
 }
index 9752848..2e1b2e3 100644 (file)
@@ -2143,7 +2143,7 @@ float CvSVM::predict( const CvMat* sample, bool returnDFVal ) const
     return result;
 }
 
-struct predict_body_svm {
+struct predict_body_svm : ParallelLoopBody {
     predict_body_svm(const CvSVM* _pointer, float* _result, const CvMat* _samples, CvMat* _results)
     {
         pointer = _pointer;
@@ -2157,9 +2157,9 @@ struct predict_body_svm {
     const CvMat* samples;
     CvMat* results;
 
-    void operator()( const cv::BlockedRange& range ) const
+    void operator()( const cv::Range& range ) const
     {
-        for(int i = range.begin(); i < range.end(); i++ )
+        for(int i = range.start; i < range.end; i++ )
         {
             CvMat sample;
             cvGetRow( samples, &sample, i );
@@ -2175,7 +2175,7 @@ struct predict_body_svm {
 float CvSVM::predict(const CvMat* samples, CV_OUT CvMat* results) const
 {
     float result = 0;
-    cv::parallel_for(cv::BlockedRange(0, samples->rows),
+    cv::parallel_for_(cv::Range(0, samples->rows),
              predict_body_svm(this, &result, samples, results)
     );
     return result;
diff --git a/modules/nonfree/doc/background_subtraction.rst b/modules/nonfree/doc/background_subtraction.rst
deleted file mode 100644 (file)
index 11603ca..0000000
+++ /dev/null
@@ -1,79 +0,0 @@
-Background Subtraction
-======================
-
-.. highlight:: cpp
-
-
-
-gpu::VIBE_GPU
--------------
-.. ocv:class:: gpu::VIBE_GPU
-
-Class used for background/foreground segmentation. ::
-
-    class VIBE_GPU
-    {
-    public:
-        explicit VIBE_GPU(unsigned long rngSeed = 1234567);
-
-        void initialize(const GpuMat& firstFrame, Stream& stream = Stream::Null());
-
-        void operator()(const GpuMat& frame, GpuMat& fgmask, Stream& stream = Stream::Null());
-
-        void release();
-
-        ...
-    };
-
-The class discriminates between foreground and background pixels by building and maintaining a model of the background. Any pixel which does not fit this model is then deemed to be foreground. The class implements algorithm described in [VIBE2011]_.
-
-
-
-gpu::VIBE_GPU::VIBE_GPU
------------------------
-The constructor.
-
-.. ocv:function:: gpu::VIBE_GPU::VIBE_GPU(unsigned long rngSeed = 1234567)
-
-    :param rngSeed: Value used to initiate a random sequence.
-
-Default constructor sets all parameters to default values.
-
-
-
-gpu::VIBE_GPU::initialize
--------------------------
-Initialize background model and allocates all inner buffers.
-
-.. ocv:function:: void gpu::VIBE_GPU::initialize(const GpuMat& firstFrame, Stream& stream = Stream::Null())
-
-    :param firstFrame: First frame from video sequence.
-
-    :param stream: Stream for the asynchronous version.
-
-
-
-gpu::VIBE_GPU::operator()
--------------------------
-Updates the background model and returns the foreground mask
-
-.. ocv:function:: void gpu::VIBE_GPU::operator()(const GpuMat& frame, GpuMat& fgmask, Stream& stream = Stream::Null())
-
-    :param frame: Next video frame.
-
-    :param fgmask: The output foreground mask as an 8-bit binary image.
-
-    :param stream: Stream for the asynchronous version.
-
-
-
-gpu::VIBE_GPU::release
-----------------------
-Releases all inner buffer's memory.
-
-.. ocv:function:: void gpu::VIBE_GPU::release()
-
-
-
-
-.. [VIBE2011] O. Barnich and M. Van D Roogenbroeck. *ViBe: A universal background subtraction algorithm for video sequences*. IEEE Transactions on Image Processing, 20(6) :1709-1724, June 2011
index f8fa1d6..e524ea8 100644 (file)
@@ -8,4 +8,3 @@ The module contains algorithms that may be patented in some countries or have so
     :maxdepth: 2
 
     feature_detection
-    background_subtraction
index c8a24e0..3cb0b47 100644 (file)
@@ -125,41 +125,6 @@ public:
     GpuMat maxPosBuffer;
 };
 
-/*!
- * The class implements the following algorithm:
- * "ViBe: A universal background subtraction algorithm for video sequences"
- * O. Barnich and M. Van D Roogenbroeck
- * IEEE Transactions on Image Processing, 20(6) :1709-1724, June 2011
- */
-class CV_EXPORTS VIBE_GPU
-{
-public:
-    //! the default constructor
-    explicit VIBE_GPU(unsigned long rngSeed = 1234567);
-
-    //! re-initiaization method
-    void initialize(const GpuMat& firstFrame, Stream& stream = Stream::Null());
-
-    //! the update operator
-    void operator()(const GpuMat& frame, GpuMat& fgmask, Stream& stream = Stream::Null());
-
-    //! releases all inner buffers
-    void release();
-
-    int nbSamples;         // number of samples per pixel
-    int reqMatches;        // #_min
-    int radius;            // R
-    int subsamplingFactor; // amount of random subsampling
-
-private:
-    Size frameSize_;
-
-    unsigned long rngSeed_;
-    GpuMat randStates_;
-
-    GpuMat samples_;
-};
-
 } // namespace gpu
 
 } // namespace cv
index aa8516b..9f451de 100644 (file)
@@ -50,18 +50,6 @@ using namespace std;
 using namespace testing;
 using namespace perf;
 
-#if defined(HAVE_XINE)         || \
-    defined(HAVE_GSTREAMER)    || \
-    defined(HAVE_QUICKTIME)    || \
-    defined(HAVE_AVFOUNDATION) || \
-    defined(HAVE_FFMPEG)       || \
-    defined(WIN32) /* assume that we have ffmpeg */
-
-#  define BUILD_WITH_VIDEO_INPUT_SUPPORT 1
-#else
-#  define BUILD_WITH_VIDEO_INPUT_SUPPORT 0
-#endif
-
 //////////////////////////////////////////////////////////////////////
 // SURF
 
@@ -108,75 +96,4 @@ PERF_TEST_P(Image, GPU_SURF,
     }
 }
 
-//////////////////////////////////////////////////////
-// VIBE
-
-#if BUILD_WITH_VIDEO_INPUT_SUPPORT
-
-DEF_PARAM_TEST(Video_Cn, string, int);
-
-PERF_TEST_P(Video_Cn, GPU_VIBE,
-            Combine(Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"),
-                    GPU_CHANNELS_1_3_4))
-{
-    const string inputFile = perf::TestBase::getDataPath(GET_PARAM(0));
-    const int cn = GET_PARAM(1);
-
-    cv::VideoCapture cap(inputFile);
-    ASSERT_TRUE(cap.isOpened());
-
-    cv::Mat frame;
-    cap >> frame;
-    ASSERT_FALSE(frame.empty());
-
-    if (cn != 3)
-    {
-        cv::Mat temp;
-        if (cn == 1)
-            cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
-        else
-            cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
-        cv::swap(temp, frame);
-    }
-
-    if (PERF_RUN_GPU())
-    {
-        cv::gpu::GpuMat d_frame(frame);
-        cv::gpu::VIBE_GPU vibe;
-        cv::gpu::GpuMat foreground;
-
-        vibe(d_frame, foreground);
-
-        for (int i = 0; i < 10; ++i)
-        {
-            cap >> frame;
-            ASSERT_FALSE(frame.empty());
-
-            if (cn != 3)
-            {
-                cv::Mat temp;
-                if (cn == 1)
-                    cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
-                else
-                    cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
-                cv::swap(temp, frame);
-            }
-
-            d_frame.upload(frame);
-
-            startTimer(); next();
-            vibe(d_frame, foreground);
-            stopTimer();
-        }
-
-        GPU_SANITY_CHECK(foreground);
-    }
-    else
-    {
-        FAIL_NO_CPU();
-    }
-}
-
-#endif
-
 #endif
diff --git a/modules/nonfree/src/cuda/vibe.cu b/modules/nonfree/src/cuda/vibe.cu
deleted file mode 100644 (file)
index ba678ab..0000000
+++ /dev/null
@@ -1,271 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "opencv2/opencv_modules.hpp"
-
-#ifdef HAVE_OPENCV_GPU
-
-#include "opencv2/gpu/device/common.hpp"
-
-namespace cv { namespace gpu { namespace device
-{
-    namespace vibe
-    {
-        void loadConstants(int nbSamples, int reqMatches, int radius, int subsamplingFactor);
-
-        void init_gpu(PtrStepSzb frame, int cn, PtrStepSzb samples, PtrStepSz<unsigned int> randStates, cudaStream_t stream);
-
-        void update_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb samples, PtrStepSz<unsigned int> randStates, cudaStream_t stream);
-    }
-}}}
-
-namespace cv { namespace gpu { namespace device
-{
-    namespace vibe
-    {
-        __constant__ int c_nbSamples;
-        __constant__ int c_reqMatches;
-        __constant__ int c_radius;
-        __constant__ int c_subsamplingFactor;
-
-        void loadConstants(int nbSamples, int reqMatches, int radius, int subsamplingFactor)
-        {
-            cudaSafeCall( cudaMemcpyToSymbol(c_nbSamples, &nbSamples, sizeof(int)) );
-            cudaSafeCall( cudaMemcpyToSymbol(c_reqMatches, &reqMatches, sizeof(int)) );
-            cudaSafeCall( cudaMemcpyToSymbol(c_radius, &radius, sizeof(int)) );
-            cudaSafeCall( cudaMemcpyToSymbol(c_subsamplingFactor, &subsamplingFactor, sizeof(int)) );
-        }
-
-        __device__ __forceinline__ uint nextRand(uint& state)
-        {
-            const unsigned int CV_RNG_COEFF = 4164903690U;
-            state = state * CV_RNG_COEFF + (state >> 16);
-            return state;
-        }
-
-        __constant__ int c_xoff[9] = {-1,  0,  1, -1, 1, -1, 0, 1, 0};
-        __constant__ int c_yoff[9] = {-1, -1, -1,  0, 0,  1, 1, 1, 0};
-
-        __device__ __forceinline__ int2 chooseRandomNeighbor(int x, int y, uint& randState, int count = 8)
-        {
-            int idx = nextRand(randState) % count;
-
-            return make_int2(x + c_xoff[idx], y + c_yoff[idx]);
-        }
-
-        __device__ __forceinline__ uchar cvt(uchar val)
-        {
-            return val;
-        }
-        __device__ __forceinline__ uchar4 cvt(const uchar3& val)
-        {
-            return make_uchar4(val.x, val.y, val.z, 0);
-        }
-        __device__ __forceinline__ uchar4 cvt(const uchar4& val)
-        {
-            return val;
-        }
-
-        template <typename SrcT, typename SampleT>
-        __global__ void init(const PtrStepSz<SrcT> frame, PtrStep<SampleT> samples, PtrStep<uint> randStates)
-        {
-            const int x = blockIdx.x * blockDim.x + threadIdx.x;
-            const int y = blockIdx.y * blockDim.y + threadIdx.y;
-
-            if (x >= frame.cols || y >= frame.rows)
-                return;
-
-            uint localState = randStates(y, x);
-
-            for (int k = 0; k < c_nbSamples; ++k)
-            {
-                int2 np = chooseRandomNeighbor(x, y, localState, 9);
-
-                np.x = ::max(0, ::min(np.x, frame.cols - 1));
-                np.y = ::max(0, ::min(np.y, frame.rows - 1));
-
-                SrcT pix = frame(np.y, np.x);
-
-                samples(k * frame.rows + y, x) = cvt(pix);
-            }
-
-            randStates(y, x) = localState;
-        }
-
-        template <typename SrcT, typename SampleT>
-        void init_caller(PtrStepSzb frame, PtrStepSzb samples, PtrStepSz<uint> randStates, cudaStream_t stream)
-        {
-            dim3 block(32, 8);
-            dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));
-
-            cudaSafeCall( cudaFuncSetCacheConfig(init<SrcT, SampleT>, cudaFuncCachePreferL1) );
-
-            init<SrcT, SampleT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, (PtrStepSz<SampleT>) samples, randStates);
-            cudaSafeCall( cudaGetLastError() );
-
-            if (stream == 0)
-                cudaSafeCall( cudaDeviceSynchronize() );
-        }
-
-        void init_gpu(PtrStepSzb frame, int cn, PtrStepSzb samples, PtrStepSz<uint> randStates, cudaStream_t stream)
-        {
-            typedef void (*func_t)(PtrStepSzb frame, PtrStepSzb samples, PtrStepSz<uint> randStates, cudaStream_t stream);
-            static const func_t funcs[] =
-            {
-                0, init_caller<uchar, uchar>, 0, init_caller<uchar3, uchar4>, init_caller<uchar4, uchar4>
-            };
-
-            funcs[cn](frame, samples, randStates, stream);
-        }
-
-        __device__ __forceinline__ int calcDist(uchar a, uchar b)
-        {
-            return ::abs(a - b);
-        }
-        __device__ __forceinline__ int calcDist(const uchar3& a, const uchar4& b)
-        {
-            return (::abs(a.x - b.x) + ::abs(a.y - b.y) + ::abs(a.z - b.z)) / 3;
-        }
-        __device__ __forceinline__ int calcDist(const uchar4& a, const uchar4& b)
-        {
-            return (::abs(a.x - b.x) + ::abs(a.y - b.y) + ::abs(a.z - b.z)) / 3;
-        }
-
-        template <typename SrcT, typename SampleT>
-        __global__ void update(const PtrStepSz<SrcT> frame, PtrStepb fgmask, PtrStep<SampleT> samples, PtrStep<uint> randStates)
-        {
-            const int x = blockIdx.x * blockDim.x + threadIdx.x;
-            const int y = blockIdx.y * blockDim.y + threadIdx.y;
-
-            if (x >= frame.cols || y >= frame.rows)
-                return;
-
-            uint localState = randStates(y, x);
-
-            SrcT imgPix = frame(y, x);
-
-            // comparison with the model
-
-            int count = 0;
-            for (int k = 0; (count < c_reqMatches) && (k < c_nbSamples); ++k)
-            {
-                SampleT samplePix = samples(k * frame.rows + y, x);
-
-                int distance = calcDist(imgPix, samplePix);
-
-                if (distance < c_radius)
-                    ++count;
-            }
-
-            // pixel classification according to reqMatches
-
-            fgmask(y, x) = (uchar) (-(count < c_reqMatches));
-
-            if (count >= c_reqMatches)
-            {
-                // the pixel belongs to the background
-
-                // gets a random number between 0 and subsamplingFactor-1
-                int randomNumber = nextRand(localState) % c_subsamplingFactor;
-
-                // update of the current pixel model
-                if (randomNumber == 0)
-                {
-                    // random subsampling
-
-                    int k = nextRand(localState) % c_nbSamples;
-
-                    samples(k * frame.rows + y, x) = cvt(imgPix);
-                }
-
-                // update of a neighboring pixel model
-                randomNumber = nextRand(localState) % c_subsamplingFactor;
-
-                if (randomNumber == 0)
-                {
-                    // random subsampling
-
-                    // chooses a neighboring pixel randomly
-                    int2 np = chooseRandomNeighbor(x, y, localState);
-
-                    np.x = ::max(0, ::min(np.x, frame.cols - 1));
-                    np.y = ::max(0, ::min(np.y, frame.rows - 1));
-
-                    // chooses the value to be replaced randomly
-                    int k = nextRand(localState) % c_nbSamples;
-
-                    samples(k * frame.rows + np.y, np.x) = cvt(imgPix);
-                }
-            }
-
-            randStates(y, x) = localState;
-        }
-
-        template <typename SrcT, typename SampleT>
-        void update_caller(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb samples, PtrStepSz<uint> randStates, cudaStream_t stream)
-        {
-            dim3 block(32, 8);
-            dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));
-
-            cudaSafeCall( cudaFuncSetCacheConfig(update<SrcT, SampleT>, cudaFuncCachePreferL1) );
-
-            update<SrcT, SampleT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, (PtrStepSz<SampleT>) samples, randStates);
-            cudaSafeCall( cudaGetLastError() );
-
-            if (stream == 0)
-                cudaSafeCall( cudaDeviceSynchronize() );
-        }
-
-        void update_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb samples, PtrStepSz<uint> randStates, cudaStream_t stream)
-        {
-            typedef void (*func_t)(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb samples, PtrStepSz<uint> randStates, cudaStream_t stream);
-            static const func_t funcs[] =
-            {
-                0, update_caller<uchar, uchar>, 0, update_caller<uchar3, uchar4>, update_caller<uchar4, uchar4>
-            };
-
-            funcs[cn](frame, fgmask, samples, randStates, stream);
-        }
-    }
-}}}
-
-#endif /* HAVE_OPENCV_GPU */
index 58ebd31..5a7fd89 100644 (file)
@@ -774,9 +774,6 @@ void SIFT::operator()(InputArray _image, InputArray _mask,
         findScaleSpaceExtrema(gpyr, dogpyr, keypoints);
         KeyPointsFilter::removeDuplicated( keypoints );
 
-        if( !mask.empty() )
-            KeyPointsFilter::runByPixelsMask( keypoints, mask );
-
         if( nfeatures > 0 )
             KeyPointsFilter::retainBest(keypoints, nfeatures);
         //t = (double)getTickCount() - t;
@@ -791,6 +788,9 @@ void SIFT::operator()(InputArray _image, InputArray _mask,
                 kpt.pt *= scale;
                 kpt.size *= scale;
             }
+
+        if( !mask.empty() )
+            KeyPointsFilter::runByPixelsMask( keypoints, mask );
     }
     else
     {
index bb6d53e..2fc459f 100644 (file)
@@ -258,7 +258,7 @@ interpolateKeypoint( float N9[3][9], int dx, int dy, int ds, KeyPoint& kpt )
 }
 
 // Multi-threaded construction of the scale-space pyramid
-struct SURFBuildInvoker
+struct SURFBuildInvoker : ParallelLoopBody
 {
     SURFBuildInvoker( const Mat& _sum, const vector<int>& _sizes,
                       const vector<int>& _sampleSteps,
@@ -271,9 +271,9 @@ struct SURFBuildInvoker
         traces = &_traces;
     }
 
-    void operator()(const BlockedRange& range) const
+    void operator()(const Range& range) const
     {
-        for( int i=range.begin(); i<range.end(); i++ )
+        for( int i=range.start; i<range.end; i++ )
             calcLayerDetAndTrace( *sum, (*sizes)[i], (*sampleSteps)[i], (*dets)[i], (*traces)[i] );
     }
 
@@ -285,7 +285,7 @@ struct SURFBuildInvoker
 };
 
 // Multi-threaded search of the scale-space pyramid for keypoints
-struct SURFFindInvoker
+struct SURFFindInvoker : ParallelLoopBody
 {
     SURFFindInvoker( const Mat& _sum, const Mat& _mask_sum,
                      const vector<Mat>& _dets, const vector<Mat>& _traces,
@@ -310,9 +310,9 @@ struct SURFFindInvoker
                    const vector<int>& sizes, vector<KeyPoint>& keypoints,
                    int octave, int layer, float hessianThreshold, int sampleStep );
 
-    void operator()(const BlockedRange& range) const
+    void operator()(const Range& range) const
     {
-        for( int i=range.begin(); i<range.end(); i++ )
+        for( int i=range.start; i<range.end; i++ )
         {
             int layer = (*middleIndices)[i];
             int octave = i / nOctaveLayers;
@@ -333,14 +333,10 @@ struct SURFFindInvoker
     int nOctaveLayers;
     float hessianThreshold;
 
-#ifdef HAVE_TBB
-    static tbb::mutex findMaximaInLayer_m;
-#endif
+    static Mutex findMaximaInLayer_m;
 };
 
-#ifdef HAVE_TBB
-tbb::mutex SURFFindInvoker::findMaximaInLayer_m;
-#endif
+Mutex SURFFindInvoker::findMaximaInLayer_m;
 
 
 /*
@@ -437,9 +433,7 @@ void SURFFindInvoker::findMaximaInLayer( const Mat& sum, const Mat& mask_sum,
                     if( interp_ok  )
                     {
                         /*printf( "KeyPoint %f %f %d\n", point.pt.x, point.pt.y, point.size );*/
-#ifdef HAVE_TBB
-                        tbb::mutex::scoped_lock lock(findMaximaInLayer_m);
-#endif
+                        cv::AutoLock lock(findMaximaInLayer_m);
                         keypoints.push_back(kpt);
                     }
                 }
@@ -505,20 +499,20 @@ static void fastHessianDetector( const Mat& sum, const Mat& mask_sum, vector<Key
     }
 
     // Calculate hessian determinant and trace samples in each layer
-    parallel_for( BlockedRange(0, nTotalLayers),
-                      SURFBuildInvoker(sum, sizes, sampleSteps, dets, traces) );
+    parallel_for_( Range(0, nTotalLayers),
+                   SURFBuildInvoker(sum, sizes, sampleSteps, dets, traces) );
 
     // Find maxima in the determinant of the hessian
-    parallel_for( BlockedRange(0, nMiddleLayers),
-                      SURFFindInvoker(sum, mask_sum, dets, traces, sizes,
-                                      sampleSteps, middleIndices, keypoints,
-                                      nOctaveLayers, hessianThreshold) );
+    parallel_for_( Range(0, nMiddleLayers),
+                   SURFFindInvoker(sum, mask_sum, dets, traces, sizes,
+                                   sampleSteps, middleIndices, keypoints,
+                                   nOctaveLayers, hessianThreshold) );
 
     std::sort(keypoints.begin(), keypoints.end(), KeypointGreater());
 }
 
 
-struct SURFInvoker
+struct SURFInvoker : ParallelLoopBody
 {
     enum { ORI_RADIUS = 6, ORI_WIN = 60, PATCH_SZ = 20 };
 
@@ -566,7 +560,7 @@ struct SURFInvoker
         }
     }
 
-    void operator()(const BlockedRange& range) const
+    void operator()(const Range& range) const
     {
         /* X and Y gradient wavelet data */
         const int NX=2, NY=2;
@@ -587,7 +581,7 @@ struct SURFInvoker
 
         int dsize = extended ? 128 : 64;
 
-        int k, k1 = range.begin(), k2 = range.end();
+        int k, k1 = range.start, k2 = range.end;
         float maxSize = 0;
         for( k = k1; k < k2; k++ )
         {
@@ -954,7 +948,7 @@ void SURF::operator()(InputArray _img, InputArray _mask,
 
         // we call SURFInvoker in any case, even if we do not need descriptors,
         // since it computes orientation of each feature.
-        parallel_for(BlockedRange(0, N), SURFInvoker(img, sum, keypoints, descriptors, extended, upright) );
+        parallel_for_(Range(0, N), SURFInvoker(img, sum, keypoints, descriptors, extended, upright) );
 
         // remove keypoints that were marked for deletion
         for( i = j = 0; i < N; i++ )
index acc188e..de7cac2 100644 (file)
@@ -60,27 +60,24 @@ namespace cv
 
         const char noImage2dOption [] = "-D DISABLE_IMAGE2D";
 
-        static char SURF_OPTIONS [1024] = ""; 
-        static bool USE_IMAGE2d = false;
+        static bool use_image2d = false;
+
         static void openCLExecuteKernelSURF(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3],
             size_t localThreads[3],  vector< pair<size_t, const void *> > &args, int channels, int depth)
         {
-            char * pSURF_OPTIONS = SURF_OPTIONS;
-            static bool OPTION_INIT = false;
-            if(!OPTION_INIT)
+            char optBuf [100] = {0};
+            char * optBufPtr = optBuf;
+            if( !use_image2d )
             {
-                if( !USE_IMAGE2d )
-                {
-                    strcat(pSURF_OPTIONS, noImage2dOption);
-                    pSURF_OPTIONS += strlen(noImage2dOption);
-                }
-
-                size_t wave_size = 0;
-                queryDeviceInfo(WAVEFRONT_SIZE, &wave_size);
-                std::sprintf(pSURF_OPTIONS, "-D WAVE_SIZE=%d", static_cast<int>(wave_size));
-                OPTION_INIT = true;
+                strcat(optBufPtr, noImage2dOption);
+                optBufPtr += strlen(noImage2dOption);
             }
-            openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, SURF_OPTIONS);
+            cl_kernel kernel;
+            kernel = openCLGetKernelFromSource(clCxt, source, kernelName, optBufPtr);
+            size_t wave_size = queryDeviceInfo<WAVEFRONT_SIZE, size_t>(kernel);
+            CV_Assert(clReleaseKernel(kernel) == CL_SUCCESS);
+            sprintf(optBufPtr, "-D WAVE_SIZE=%d", static_cast<int>(wave_size));
+            openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, optBufPtr);
         }
     }
 }
@@ -161,22 +158,12 @@ public:
         counters.setTo(Scalar::all(0));
 
         integral(img, surf_.sum);
-        if(support_image2d())
+        use_image2d = support_image2d();
+        if(use_image2d)
         {
-            try
-            {
-                bindImgTex(img, imgTex);
-                bindImgTex(surf_.sum, sumTex);
-                USE_IMAGE2d = true;
-            }
-            catch (const cv::Exception& e)
-            {
-                USE_IMAGE2d = false;
-                if(e.code != CL_IMAGE_FORMAT_NOT_SUPPORTED && e.code != -217)
-                {
-                    throw e;
-                }
-            }
+            bindImgTex(img, imgTex);
+            bindImgTex(surf_.sum, sumTex);
+            finish();
         }
 
         maskSumTex = 0;
diff --git a/modules/nonfree/src/vibe_gpu.cpp b/modules/nonfree/src/vibe_gpu.cpp
deleted file mode 100644 (file)
index e348627..0000000
+++ /dev/null
@@ -1,141 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-
-#if defined(HAVE_OPENCV_GPU)
-
-#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
-
-cv::gpu::VIBE_GPU::VIBE_GPU(unsigned long) { throw_nogpu(); }
-void cv::gpu::VIBE_GPU::initialize(const GpuMat&, Stream&) { throw_nogpu(); }
-void cv::gpu::VIBE_GPU::operator()(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
-void cv::gpu::VIBE_GPU::release() {}
-
-#else
-
-namespace cv { namespace gpu { namespace device
-{
-    namespace vibe
-    {
-        void loadConstants(int nbSamples, int reqMatches, int radius, int subsamplingFactor);
-
-        void init_gpu(PtrStepSzb frame, int cn, PtrStepSzb samples, PtrStepSz<unsigned int> randStates, cudaStream_t stream);
-
-        void update_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb samples, PtrStepSz<unsigned int> randStates, cudaStream_t stream);
-    }
-}}}
-
-namespace
-{
-    const int defaultNbSamples = 20;
-    const int defaultReqMatches = 2;
-    const int defaultRadius = 20;
-    const int defaultSubsamplingFactor = 16;
-}
-
-cv::gpu::VIBE_GPU::VIBE_GPU(unsigned long rngSeed) :
-    frameSize_(0, 0), rngSeed_(rngSeed)
-{
-    nbSamples = defaultNbSamples;
-    reqMatches = defaultReqMatches;
-    radius = defaultRadius;
-    subsamplingFactor = defaultSubsamplingFactor;
-}
-
-void cv::gpu::VIBE_GPU::initialize(const GpuMat& firstFrame, Stream& s)
-{
-    using namespace cv::gpu::device::vibe;
-
-    CV_Assert(firstFrame.type() == CV_8UC1 || firstFrame.type() == CV_8UC3 || firstFrame.type() == CV_8UC4);
-
-    cudaStream_t stream = StreamAccessor::getStream(s);
-
-    loadConstants(nbSamples, reqMatches, radius, subsamplingFactor);
-
-    frameSize_ = firstFrame.size();
-
-    if (randStates_.size() != frameSize_)
-    {
-        cv::RNG rng(rngSeed_);
-        cv::Mat h_randStates(frameSize_, CV_8UC4);
-        rng.fill(h_randStates, cv::RNG::UNIFORM, 0, 255);
-        randStates_.upload(h_randStates);
-    }
-
-    int ch = firstFrame.channels();
-    int sample_ch = ch == 1 ? 1 : 4;
-
-    samples_.create(nbSamples * frameSize_.height, frameSize_.width, CV_8UC(sample_ch));
-
-    init_gpu(firstFrame, ch, samples_, randStates_, stream);
-}
-
-void cv::gpu::VIBE_GPU::operator()(const GpuMat& frame, GpuMat& fgmask, Stream& s)
-{
-    using namespace cv::gpu::device::vibe;
-
-    CV_Assert(frame.depth() == CV_8U);
-
-    int ch = frame.channels();
-    int sample_ch = ch == 1 ? 1 : 4;
-
-    if (frame.size() != frameSize_ || sample_ch != samples_.channels())
-        initialize(frame);
-
-    fgmask.create(frameSize_, CV_8UC1);
-
-    update_gpu(frame, ch, fgmask, samples_, randStates_, StreamAccessor::getStream(s));
-}
-
-void cv::gpu::VIBE_GPU::release()
-{
-    frameSize_ = Size(0, 0);
-
-    randStates_.release();
-
-    samples_.release();
-}
-
-#endif
-
-#endif // defined(HAVE_OPENCV_GPU)
index 30aec35..3f63eed 100644 (file)
@@ -191,42 +191,4 @@ INSTANTIATE_TEST_CASE_P(GPU_Features2D, SURF, testing::Combine(
     testing::Values(SURF_Extended(false), SURF_Extended(true)),
     testing::Values(SURF_Upright(false), SURF_Upright(true))));
 
-//////////////////////////////////////////////////////
-// VIBE
-
-PARAM_TEST_CASE(VIBE, cv::Size, MatType, UseRoi)
-{
-};
-
-GPU_TEST_P(VIBE, Accuracy)
-{
-    const cv::Size size = GET_PARAM(0);
-    const int type = GET_PARAM(1);
-    const bool useRoi = GET_PARAM(2);
-
-    const cv::Mat fullfg(size, CV_8UC1, cv::Scalar::all(255));
-
-    cv::Mat frame = randomMat(size, type, 0.0, 100);
-    cv::gpu::GpuMat d_frame = loadMat(frame, useRoi);
-
-    cv::gpu::VIBE_GPU vibe;
-    cv::gpu::GpuMat d_fgmask = createMat(size, CV_8UC1, useRoi);
-    vibe.initialize(d_frame);
-
-    for (int i = 0; i < 20; ++i)
-        vibe(d_frame, d_fgmask);
-
-    frame = randomMat(size, type, 160, 255);
-    d_frame = loadMat(frame, useRoi);
-    vibe(d_frame, d_fgmask);
-
-    // now fgmask should be entirely foreground
-    ASSERT_MAT_NEAR(fullfg, d_fgmask, 0);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Video, VIBE, testing::Combine(
-    DIFFERENT_SIZES,
-    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4)),
-    WHOLE_SUBMAT));
-
 #endif
index 46a232e..9e78dce 100644 (file)
@@ -1141,7 +1141,7 @@ void CascadeClassifier::detectMultiScale( const Mat& image, vector<Rect>& object
 
         Size windowSize( cvRound(originalWindowSize.width*factor), cvRound(originalWindowSize.height*factor) );
         Size scaledImageSize( cvRound( grayImage.cols/factor ), cvRound( grayImage.rows/factor ) );
-        Size processingRectSize( scaledImageSize.width - originalWindowSize.width + 1, scaledImageSize.height - originalWindowSize.height + 1 );
+        Size processingRectSize( scaledImageSize.width - originalWindowSize.width, scaledImageSize.height - originalWindowSize.height );
 
         if( processingRectSize.width <= 0 || processingRectSize.height <= 0 )
             break;
@@ -1165,15 +1165,10 @@ void CascadeClassifier::detectMultiScale( const Mat& image, vector<Rect>& object
 
         int stripCount, stripSize;
 
-    #ifdef HAVE_TBB
         const int PTS_PER_THREAD = 1000;
         stripCount = ((processingRectSize.width/yStep)*(processingRectSize.height + yStep-1)/yStep + PTS_PER_THREAD/2)/PTS_PER_THREAD;
         stripCount = std::min(std::max(stripCount, 1), 100);
         stripSize = (((processingRectSize.height + stripCount - 1)/stripCount + yStep-1)/yStep)*yStep;
-    #else
-        stripCount = 1;
-        stripSize = processingRectSize.height;
-    #endif
 
         if( !detectSingleScale( scaledImage, stripCount, processingRectSize, stripSize, yStep, factor, candidates,
             rejectLevels, levelWeights, outputRejectLevels ) )
index 521f0fd..5a45965 100644 (file)
@@ -582,7 +582,6 @@ int searchObjectThresholdSomeComponents(const CvLSVMFeaturePyramid *H,
     // For each component perform searching
     for (i = 0; i < kComponents; i++)
     {
-#ifdef HAVE_TBB
         int error = searchObjectThreshold(H, &(filters[componentIndex]), kPartFilters[i],
             b[i], maxXBorder, maxYBorder, scoreThreshold,
             &(pointsArr[i]), &(levelsArr[i]), &(kPointsArr[i]),
@@ -598,13 +597,6 @@ int searchObjectThresholdSomeComponents(const CvLSVMFeaturePyramid *H,
             free(partsDisplacementArr);
             return LATENT_SVM_SEARCH_OBJECT_FAILED;
         }
-#else
-    (void)numThreads;
-        searchObjectThreshold(H, &(filters[componentIndex]), kPartFilters[i],
-            b[i], maxXBorder, maxYBorder, scoreThreshold,
-            &(pointsArr[i]), &(levelsArr[i]), &(kPointsArr[i]),
-            &(scoreArr[i]), &(partsDisplacementArr[i]));
-#endif
         estimateBoxes(pointsArr[i], levelsArr[i], kPointsArr[i],
             filters[componentIndex]->sizeX, filters[componentIndex]->sizeY, &(oppPointsArr[i]));
         componentIndex += (kPartFilters[i] + 1);
index a7cd3a0..05b28b8 100644 (file)
@@ -3,5 +3,5 @@ if(NOT HAVE_OPENCL)
 endif()
 
 set(the_description "OpenCL-accelerated Computer Vision")
-ocv_define_module(ocl opencv_core opencv_imgproc opencv_features2d opencv_objdetect opencv_video)
+ocv_define_module(ocl opencv_core opencv_imgproc opencv_features2d opencv_objdetect opencv_video opencv_calib3d)
 ocv_warnings_disable(CMAKE_CXX_FLAGS -Wshadow)
index 6e34d27..2902127 100644 (file)
@@ -122,8 +122,9 @@ namespace cv
         CV_EXPORTS  void setBinpath(const char *path);
 
         //The two functions below enable other opencl program to use ocl module's cl_context and cl_command_queue
+        //returns cl_context * 
         CV_EXPORTS void* getoclContext();
-
+        //returns cl_command_queue *
         CV_EXPORTS void* getoclCommandQueue();
 
         //explicit call clFinish. The global command queue will be used.
@@ -133,6 +134,9 @@ namespace cv
         //getDevice also need to be called before this function
         CV_EXPORTS void setDeviceEx(Info &oclinfo, void *ctx, void *qu, int devnum = 0);
 
+        //returns true when global OpenCL context is initialized
+        CV_EXPORTS bool initialized();
+
         //////////////////////////////// Error handling ////////////////////////
         CV_EXPORTS void error(const char *error_string, const char *file, const int line, const char *func);
 
@@ -143,7 +147,7 @@ namespace cv
         protected:
             Context();
             friend class auto_ptr<Context>;
-
+            friend bool initialized();
         private:
             static auto_ptr<Context> clCxt;
             static int val;
@@ -407,6 +411,9 @@ namespace cv
         //! computes element-wise product of the two arrays (c = a * b)
         // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
         CV_EXPORTS void multiply(const oclMat &a, const oclMat &b, oclMat &c, double scale = 1);
+        //! multiplies matrix to a number (dst = scalar * src)
+        // supports CV_32FC1 only
+        CV_EXPORTS void multiply(double scalar, const oclMat &src, oclMat &dst);
         //! computes element-wise quotient of the two arrays (c = a / b)
         // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
         CV_EXPORTS void divide(const oclMat &a, const oclMat &b, oclMat &c, double scale = 1);
@@ -458,6 +465,7 @@ namespace cv
         // support all C1 types
 
         CV_EXPORTS void minMax(const oclMat &src, double *minVal, double *maxVal = 0, const oclMat &mask = oclMat());
+        CV_EXPORTS void minMax_buf(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat& buf);
 
         //! finds global minimum and maximum array elements and returns their values with locations
         // support all C1 types
@@ -478,6 +486,23 @@ namespace cv
         CV_EXPORTS void calcHist(const oclMat &mat_src, oclMat &mat_hist);
         //! only 8UC1 and 256 bins is supported now
         CV_EXPORTS void equalizeHist(const oclMat &mat_src, oclMat &mat_dst);
+        
+        //! only 8UC1 is supported now
+        class CV_EXPORTS CLAHE
+        {
+        public:
+            virtual void apply(const oclMat &src, oclMat &dst) = 0;
+
+            virtual void setClipLimit(double clipLimit) = 0;
+            virtual double getClipLimit() const = 0;
+
+            virtual void setTilesGridSize(Size tileGridSize) = 0;
+            virtual Size getTilesGridSize() const = 0;
+
+            virtual void collectGarbage() = 0;
+        };
+        CV_EXPORTS Ptr<cv::ocl::CLAHE> createCLAHE(double clipLimit = 40.0, Size tileGridSize = Size(8, 8));
+        
         //! bilateralFilter
         // supports 8UC1 8UC4
         CV_EXPORTS void bilateralFilter(const oclMat& src, oclMat& dst, int d, double sigmaColor, double sigmaSpave, int borderType=BORDER_DEFAULT);
@@ -684,6 +709,8 @@ namespace cv
         }
 
         //! applies non-separable 2D linear filter to the image
+        //  Note, at the moment this function only works when anchor point is in the kernel center
+        //  and kernel size supported is either 3x3 or 5x5; otherwise the function will fail to output valid result
         CV_EXPORTS void filter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernel,
                                  Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
 
@@ -786,7 +813,11 @@ namespace cv
         CV_EXPORTS void integral(const oclMat &src, oclMat &sum, oclMat &sqsum);
         CV_EXPORTS void integral(const oclMat &src, oclMat &sum);
         CV_EXPORTS void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
+        CV_EXPORTS void cornerHarris_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy,
+            int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
         CV_EXPORTS void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT);
+        CV_EXPORTS void cornerMinEigenVal_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy,
+            int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT);
 
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
         ///////////////////////////////////////////CascadeClassifier//////////////////////////////////////////////////////////////////
@@ -808,7 +839,7 @@ namespace cv
             OclCascadeClassifierBuf() :
                 m_flags(0), initialized(false), m_scaleFactor(0), buffers(NULL) {}
 
-            ~OclCascadeClassifierBuf() {}
+            ~OclCascadeClassifierBuf() { release(); }
 
             void detectMultiScale(oclMat &image, CV_OUT std::vector<cv::Rect>& faces,
                                   double scaleFactor = 1.1, int minNeighbors = 3, int flags = 0,
@@ -866,7 +897,6 @@ namespace cv
             std::vector<oclMat> image_sqsums;
         };
 
-
         //! computes the proximity map for the raster template and the image where the template is searched for
         // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4
         // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
@@ -877,71 +907,36 @@ namespace cv
         // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
         CV_EXPORTS void matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method, MatchTemplateBuf &buf);
 
-
-
         ///////////////////////////////////////////// Canny /////////////////////////////////////////////
-
         struct CV_EXPORTS CannyBuf;
-
-
-
         //! compute edges of the input image using Canny operator
-
         // Support CV_8UC1 only
-
         CV_EXPORTS void Canny(const oclMat &image, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
-
         CV_EXPORTS void Canny(const oclMat &image, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
-
         CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false);
-
         CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false);
 
-
-
         struct CV_EXPORTS CannyBuf
-
         {
-
             CannyBuf() : counter(NULL) {}
-
             ~CannyBuf()
             {
                 release();
             }
-
             explicit CannyBuf(const Size &image_size, int apperture_size = 3) : counter(NULL)
-
             {
-
                 create(image_size, apperture_size);
-
             }
-
             CannyBuf(const oclMat &dx_, const oclMat &dy_);
 
-
-
             void create(const Size &image_size, int apperture_size = 3);
-
-
-
             void release();
-
-
-
             oclMat dx, dy;
-
             oclMat dx_buf, dy_buf;
-
             oclMat edgeBuf;
-
             oclMat trackBuf1, trackBuf2;
-
             void *counter;
-
             Ptr<FilterEngine_GPU> filterDX, filterDY;
-
         };
 
         ///////////////////////////////////////// clAmdFft related /////////////////////////////////////////
@@ -966,159 +961,69 @@ namespace cv
                              const oclMat &src3, double beta, oclMat &dst, int flags = 0);
 
         //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
-
         struct CV_EXPORTS HOGDescriptor
-
         {
-
             enum { DEFAULT_WIN_SIGMA = -1 };
-
             enum { DEFAULT_NLEVELS = 64 };
-
             enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL };
-
-
-
             HOGDescriptor(Size win_size = Size(64, 128), Size block_size = Size(16, 16),
-
                           Size block_stride = Size(8, 8), Size cell_size = Size(8, 8),
-
                           int nbins = 9, double win_sigma = DEFAULT_WIN_SIGMA,
-
                           double threshold_L2hys = 0.2, bool gamma_correction = true,
-
                           int nlevels = DEFAULT_NLEVELS);
 
-
-
             size_t getDescriptorSize() const;
-
             size_t getBlockHistogramSize() const;
-
-
-
             void setSVMDetector(const vector<float> &detector);
-
-
-
             static vector<float> getDefaultPeopleDetector();
-
             static vector<float> getPeopleDetector48x96();
-
             static vector<float> getPeopleDetector64x128();
-
-
-
             void detect(const oclMat &img, vector<Point> &found_locations,
-
                         double hit_threshold = 0, Size win_stride = Size(),
-
                         Size padding = Size());
-
-
-
             void detectMultiScale(const oclMat &img, vector<Rect> &found_locations,
-
                                   double hit_threshold = 0, Size win_stride = Size(),
-
                                   Size padding = Size(), double scale0 = 1.05,
-
                                   int group_threshold = 2);
-
-
-
             void getDescriptors(const oclMat &img, Size win_stride,
-
                                 oclMat &descriptors,
-
                                 int descr_format = DESCR_FORMAT_COL_BY_COL);
-
-
-
             Size win_size;
-
             Size block_size;
-
             Size block_stride;
-
             Size cell_size;
 
             int nbins;
-
             double win_sigma;
-
             double threshold_L2hys;
-
             bool gamma_correction;
-
             int nlevels;
 
-
-
         protected:
-
             // initialize buffers; only need to do once in case of multiscale detection
-
             void init_buffer(const oclMat &img, Size win_stride);
-
-
-
             void computeBlockHistograms(const oclMat &img);
-
             void computeGradient(const oclMat &img, oclMat &grad, oclMat &qangle);
-
-
-
             double getWinSigma() const;
-
             bool checkDetectorSize() const;
 
-
-
             static int numPartsWithin(int size, int part_size, int stride);
-
             static Size numPartsWithin(Size size, Size part_size, Size stride);
 
-
-
             // Coefficients of the separating plane
-
             float free_coef;
-
             oclMat detector;
-
-
-
             // Results of the last classification step
-
             oclMat labels;
-
             Mat labels_host;
-
-
-
             // Results of the last histogram evaluation step
-
             oclMat block_hists;
-
-
-
             // Gradients conputation results
-
             oclMat grad, qangle;
-
-
-
             // scaled image
-
             oclMat image_scale;
-
-
-
             // effect size of input image (might be different from original size after scaling)
-
             Size effect_size;
-
         };
 
 
@@ -1126,13 +1031,11 @@ namespace cv
         /****************************************************************************************\
         *                                      Distance                                          *
         \****************************************************************************************/
-
         template<typename T>
         struct CV_EXPORTS Accumulator
         {
             typedef T Type;
         };
-
         template<> struct Accumulator<unsigned char>
         {
             typedef float Type;
@@ -1206,469 +1109,276 @@ namespace cv
         {
         public:
             enum DistType {L1Dist = 0, L2Dist, HammingDist};
-
             explicit BruteForceMatcher_OCL_base(DistType distType = L2Dist);
-
-
-
             // Add descriptors to train descriptor collection
-
             void add(const std::vector<oclMat> &descCollection);
-
-
-
             // Get train descriptors collection
-
             const std::vector<oclMat> &getTrainDescriptors() const;
-
-
-
             // Clear train descriptors collection
-
             void clear();
-
-
-
             // Return true if there are not train descriptors in collection
-
             bool empty() const;
 
-
-
             // Return true if the matcher supports mask in match methods
-
             bool isMaskSupported() const;
 
-
-
             // Find one best match for each query descriptor
-
             void matchSingle(const oclMat &query, const oclMat &train,
-
                              oclMat &trainIdx, oclMat &distance,
-
                              const oclMat &mask = oclMat());
 
-
-
             // Download trainIdx and distance and convert it to CPU vector with DMatch
-
             static void matchDownload(const oclMat &trainIdx, const oclMat &distance, std::vector<DMatch> &matches);
-
             // Convert trainIdx and distance to vector with DMatch
-
             static void matchConvert(const Mat &trainIdx, const Mat &distance, std::vector<DMatch> &matches);
 
-
-
             // Find one best match for each query descriptor
-
             void match(const oclMat &query, const oclMat &train, std::vector<DMatch> &matches, const oclMat &mask = oclMat());
 
-
-
             // Make gpu collection of trains and masks in suitable format for matchCollection function
-
             void makeGpuCollection(oclMat &trainCollection, oclMat &maskCollection, const std::vector<oclMat> &masks = std::vector<oclMat>());
 
 
-
             // Find one best match from train collection for each query descriptor
-
             void matchCollection(const oclMat &query, const oclMat &trainCollection,
-
                                  oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
-
                                  const oclMat &masks = oclMat());
 
-
-
             // Download trainIdx, imgIdx and distance and convert it to vector with DMatch
-
             static void matchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, std::vector<DMatch> &matches);
-
             // Convert trainIdx, imgIdx and distance to vector with DMatch
-
             static void matchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, std::vector<DMatch> &matches);
 
-
-
             // Find one best match from train collection for each query descriptor.
-
             void match(const oclMat &query, std::vector<DMatch> &matches, const std::vector<oclMat> &masks = std::vector<oclMat>());
 
-
-
             // Find k best matches for each query descriptor (in increasing order of distances)
-
             void knnMatchSingle(const oclMat &query, const oclMat &train,
-
                                 oclMat &trainIdx, oclMat &distance, oclMat &allDist, int k,
-
                                 const oclMat &mask = oclMat());
 
-
-
             // Download trainIdx and distance and convert it to vector with DMatch
-
             // compactResult is used when mask is not empty. If compactResult is false matches
-
             // vector will have the same size as queryDescriptors rows. If compactResult is true
-
             // matches vector will not contain matches for fully masked out query descriptors.
-
             static void knnMatchDownload(const oclMat &trainIdx, const oclMat &distance,
-
                                          std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
 
             // Convert trainIdx and distance to vector with DMatch
-
             static void knnMatchConvert(const Mat &trainIdx, const Mat &distance,
-
                                         std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
 
-
-
             // Find k best matches for each query descriptor (in increasing order of distances).
-
             // compactResult is used when mask is not empty. If compactResult is false matches
-
             // vector will have the same size as queryDescriptors rows. If compactResult is true
-
             // matches vector will not contain matches for fully masked out query descriptors.
-
             void knnMatch(const oclMat &query, const oclMat &train,
-
                           std::vector< std::vector<DMatch> > &matches, int k, const oclMat &mask = oclMat(),
-
                           bool compactResult = false);
 
-
-
             // Find k best matches from train collection for each query descriptor (in increasing order of distances)
-
             void knnMatch2Collection(const oclMat &query, const oclMat &trainCollection,
-
                                      oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
-
                                      const oclMat &maskCollection = oclMat());
 
-
-
             // Download trainIdx and distance and convert it to vector with DMatch
-
             // compactResult is used when mask is not empty. If compactResult is false matches
-
             // vector will have the same size as queryDescriptors rows. If compactResult is true
-
             // matches vector will not contain matches for fully masked out query descriptors.
-
             static void knnMatch2Download(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance,
-
                                           std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
 
             // Convert trainIdx and distance to vector with DMatch
-
             static void knnMatch2Convert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance,
-
                                          std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
 
-
-
             // Find k best matches  for each query descriptor (in increasing order of distances).
-
             // compactResult is used when mask is not empty. If compactResult is false matches
-
             // vector will have the same size as queryDescriptors rows. If compactResult is true
-
             // matches vector will not contain matches for fully masked out query descriptors.
-
             void knnMatch(const oclMat &query, std::vector< std::vector<DMatch> > &matches, int k,
-
                           const std::vector<oclMat> &masks = std::vector<oclMat>(), bool compactResult = false);
 
-
-
             // Find best matches for each query descriptor which have distance less than maxDistance.
-
             // nMatches.at<int>(0, queryIdx) will contain matches count for queryIdx.
-
             // carefully nMatches can be greater than trainIdx.cols - it means that matcher didn't find all matches,
-
             // because it didn't have enough memory.
-
             // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nTrain / 100), 10),
-
             // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
-
             // Matches doesn't sorted.
-
             void radiusMatchSingle(const oclMat &query, const oclMat &train,
-
                                    oclMat &trainIdx, oclMat &distance, oclMat &nMatches, float maxDistance,
-
                                    const oclMat &mask = oclMat());
 
-
-
             // Download trainIdx, nMatches and distance and convert it to vector with DMatch.
-
             // matches will be sorted in increasing order of distances.
-
             // compactResult is used when mask is not empty. If compactResult is false matches
-
             // vector will have the same size as queryDescriptors rows. If compactResult is true
-
             // matches vector will not contain matches for fully masked out query descriptors.
-
             static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches,
-
                                             std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
-
             // Convert trainIdx, nMatches and distance to vector with DMatch.
-
             static void radiusMatchConvert(const Mat &trainIdx, const Mat &distance, const Mat &nMatches,
-
                                            std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
-
-
-
             // Find best matches for each query descriptor which have distance less than maxDistance
-
             // in increasing order of distances).
-
             void radiusMatch(const oclMat &query, const oclMat &train,
-
                              std::vector< std::vector<DMatch> > &matches, float maxDistance,
-
                              const oclMat &mask = oclMat(), bool compactResult = false);
-
-
-
             // Find best matches for each query descriptor which have distance less than maxDistance.
-
             // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nQuery / 100), 10),
-
             // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
-
             // Matches doesn't sorted.
-
             void radiusMatchCollection(const oclMat &query, oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, oclMat &nMatches, float maxDistance,
-
                                        const std::vector<oclMat> &masks = std::vector<oclMat>());
-
-
-
             // Download trainIdx, imgIdx, nMatches and distance and convert it to vector with DMatch.
-
             // matches will be sorted in increasing order of distances.
-
             // compactResult is used when mask is not empty. If compactResult is false matches
-
             // vector will have the same size as queryDescriptors rows. If compactResult is true
-
             // matches vector will not contain matches for fully masked out query descriptors.
-
             static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, const oclMat &nMatches,
-
                                             std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
-
             // Convert trainIdx, nMatches and distance to vector with DMatch.
-
             static void radiusMatchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, const Mat &nMatches,
-
                                            std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
-
-
-
             // Find best matches from train collection for each query descriptor which have distance less than
-
             // maxDistance (in increasing order of distances).
-
             void radiusMatch(const oclMat &query, std::vector< std::vector<DMatch> > &matches, float maxDistance,
-
                              const std::vector<oclMat> &masks = std::vector<oclMat>(), bool compactResult = false);
-
-
-
             DistType distType;
-
-
-
         private:
-
             std::vector<oclMat> trainDescCollection;
-
         };
 
-
-
         template <class Distance>
-
         class CV_EXPORTS BruteForceMatcher_OCL;
 
-
-
         template <typename T>
-
         class CV_EXPORTS BruteForceMatcher_OCL< L1<T> > : public BruteForceMatcher_OCL_base
-
         {
-
         public:
-
             explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L1Dist) {}
-
             explicit BruteForceMatcher_OCL(L1<T> /*d*/) : BruteForceMatcher_OCL_base(L1Dist) {}
-
         };
 
         template <typename T>
-
         class CV_EXPORTS BruteForceMatcher_OCL< L2<T> > : public BruteForceMatcher_OCL_base
-
         {
-
         public:
-
             explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L2Dist) {}
-
             explicit BruteForceMatcher_OCL(L2<T> /*d*/) : BruteForceMatcher_OCL_base(L2Dist) {}
-
         };
 
         template <> class CV_EXPORTS BruteForceMatcher_OCL< Hamming > : public BruteForceMatcher_OCL_base
-
         {
-
         public:
-
             explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(HammingDist) {}
-
             explicit BruteForceMatcher_OCL(Hamming /*d*/) : BruteForceMatcher_OCL_base(HammingDist) {}
+        };
 
+        class CV_EXPORTS BFMatcher_OCL : public BruteForceMatcher_OCL_base
+        {
+        public:
+            explicit BFMatcher_OCL(int norm = NORM_L2) : BruteForceMatcher_OCL_base(norm == NORM_L1 ? L1Dist : norm == NORM_L2 ? L2Dist : HammingDist) {}
         };
 
+        class CV_EXPORTS GoodFeaturesToTrackDetector_OCL
+        {
+        public:
+            explicit GoodFeaturesToTrackDetector_OCL(int maxCorners = 1000, double qualityLevel = 0.01, double minDistance = 0.0,
+                int blockSize = 3, bool useHarrisDetector = false, double harrisK = 0.04);
 
+            //! return 1 rows matrix with CV_32FC2 type
+            void operator ()(const oclMat& image, oclMat& corners, const oclMat& mask = oclMat());
+            //! download points of type Point2f to a vector. the vector's content will be erased
+            void downloadPoints(const oclMat &points, vector<Point2f> &points_v);
 
-        /////////////////////////////// PyrLKOpticalFlow /////////////////////////////////////
+            int maxCorners;
+            double qualityLevel;
+            double minDistance;
 
-        class CV_EXPORTS PyrLKOpticalFlow
+            int blockSize;
+            bool useHarrisDetector;
+            double harrisK;
+            void releaseMemory()
+            {
+                Dx_.release();
+                Dy_.release();
+                eig_.release();
+                minMaxbuf_.release();
+                tmpCorners_.release();
+            }
+        private:
+            oclMat Dx_;
+            oclMat Dy_;
+            oclMat eig_;
+            oclMat minMaxbuf_;
+            oclMat tmpCorners_;
+        };
 
+        inline GoodFeaturesToTrackDetector_OCL::GoodFeaturesToTrackDetector_OCL(int maxCorners_, double qualityLevel_, double minDistance_,
+            int blockSize_, bool useHarrisDetector_, double harrisK_)
         {
+            maxCorners = maxCorners_;
+            qualityLevel = qualityLevel_;
+            minDistance = minDistance_;
+            blockSize = blockSize_;
+            useHarrisDetector = useHarrisDetector_;
+            harrisK = harrisK_;
+        }
 
+        /////////////////////////////// PyrLKOpticalFlow /////////////////////////////////////
+        class CV_EXPORTS PyrLKOpticalFlow
+        {
         public:
-
             PyrLKOpticalFlow()
-
             {
-
                 winSize = Size(21, 21);
-
                 maxLevel = 3;
-
                 iters = 30;
-
                 derivLambda = 0.5;
-
                 useInitialFlow = false;
-
                 minEigThreshold = 1e-4f;
-
                 getMinEigenVals = false;
-
                 isDeviceArch11_ = false;
-
             }
 
-
-
             void sparse(const oclMat &prevImg, const oclMat &nextImg, const oclMat &prevPts, oclMat &nextPts,
-
                         oclMat &status, oclMat *err = 0);
-
-
-
             void dense(const oclMat &prevImg, const oclMat &nextImg, oclMat &u, oclMat &v, oclMat *err = 0);
-
-
-
             Size winSize;
-
             int maxLevel;
-
             int iters;
-
             double derivLambda;
-
             bool useInitialFlow;
-
             float minEigThreshold;
-
             bool getMinEigenVals;
-
-
-
             void releaseMemory()
-
             {
-
                 dx_calcBuf_.release();
-
                 dy_calcBuf_.release();
 
-
-
                 prevPyr_.clear();
-
                 nextPyr_.clear();
 
-
-
                 dx_buf_.release();
-
                 dy_buf_.release();
-
             }
-
-
-
         private:
-
             void calcSharrDeriv(const oclMat &src, oclMat &dx, oclMat &dy);
-
-
-
             void buildImagePyramid(const oclMat &img0, vector<oclMat> &pyr, bool withBorder);
 
-
-
             oclMat dx_calcBuf_;
-
             oclMat dy_calcBuf_;
 
-
-
             vector<oclMat> prevPyr_;
-
             vector<oclMat> nextPyr_;
 
-
-
             oclMat dx_buf_;
-
             oclMat dy_buf_;
-
-
-
             oclMat uPyr_[2];
-
             oclMat vPyr_[2];
-
-
-
             bool isDeviceArch11_;
-
         };
         //////////////// build warping maps ////////////////////
         //! builds plane warping maps
@@ -1739,6 +1449,7 @@ namespace cv
         private:
             oclMat minSSD, leBuf, riBuf;
         };
+
         class CV_EXPORTS StereoBeliefPropagation
         {
         public:
@@ -1769,6 +1480,7 @@ namespace cv
             std::vector<oclMat> datas;
             oclMat out;
         };
+
         class CV_EXPORTS StereoConstantSpaceBP
         {
         public:
@@ -1807,6 +1519,94 @@ namespace cv
             oclMat temp;
             oclMat out;
         };
+
+        // Implementation of the Zach, Pock and Bischof Dual TV-L1 Optical Flow method
+        //
+        // see reference:
+        //   [1] C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow".
+        //   [2] Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation".
+        class CV_EXPORTS OpticalFlowDual_TVL1_OCL
+        {
+        public:
+            OpticalFlowDual_TVL1_OCL();
+
+            void operator ()(const oclMat& I0, const oclMat& I1, oclMat& flowx, oclMat& flowy);
+
+            void collectGarbage();
+
+            /**
+            * Time step of the numerical scheme.
+            */
+            double tau;
+
+            /**
+            * Weight parameter for the data term, attachment parameter.
+            * This is the most relevant parameter, which determines the smoothness of the output.
+            * The smaller this parameter is, the smoother the solutions we obtain.
+            * It depends on the range of motions of the images, so its value should be adapted to each image sequence.
+            */
+            double lambda;
+
+            /**
+            * Weight parameter for (u - v)^2, tightness parameter.
+            * It serves as a link between the attachment and the regularization terms.
+            * In theory, it should have a small value in order to maintain both parts in correspondence.
+            * The method is stable for a large range of values of this parameter.
+            */
+            double theta;
+
+            /**
+            * Number of scales used to create the pyramid of images.
+            */
+            int nscales;
+
+            /**
+            * Number of warpings per scale.
+            * Represents the number of times that I1(x+u0) and grad( I1(x+u0) ) are computed per scale.
+            * This is a parameter that assures the stability of the method.
+            * It also affects the running time, so it is a compromise between speed and accuracy.
+            */
+            int warps;
+
+            /**
+            * Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time.
+            * A small value will yield more accurate solutions at the expense of a slower convergence.
+            */
+            double epsilon;
+
+            /**
+            * Stopping criterion iterations number used in the numerical scheme.
+            */
+            int iterations;
+
+            bool useInitialFlow;
+
+        private:
+            void procOneScale(const oclMat& I0, const oclMat& I1, oclMat& u1, oclMat& u2);
+
+            std::vector<oclMat> I0s;
+            std::vector<oclMat> I1s;
+            std::vector<oclMat> u1s;
+            std::vector<oclMat> u2s;
+
+            oclMat I1x_buf;
+            oclMat I1y_buf;
+
+            oclMat I1w_buf;
+            oclMat I1wx_buf;
+            oclMat I1wy_buf;
+
+            oclMat grad_buf;
+            oclMat rho_c_buf;
+
+            oclMat p11_buf;
+            oclMat p12_buf;
+            oclMat p21_buf;
+            oclMat p22_buf;
+
+            oclMat diff_buf;
+            oclMat norm_buf;
+        };
     }
 }
 #if defined _MSC_VER && _MSC_VER >= 1200
index 081d234..634f2f2 100644 (file)
@@ -49,7 +49,7 @@
 #include "opencv2/ocl/ocl.hpp"
 
 #if defined __APPLE__
-#include <OpenCL/OpenCL.h>
+#include <OpenCL/opencl.h>
 #else
 #include <CL/opencl.h>
 #endif
@@ -120,6 +120,33 @@ namespace cv
         cl_mem CV_EXPORTS bindTexture(const oclMat &mat);
         void CV_EXPORTS releaseTexture(cl_mem& texture);
 
+        //Represents an image texture object
+        class CV_EXPORTS TextureCL
+        {
+        public:
+            TextureCL(cl_mem tex, int r, int c, int t)
+                : tex_(tex), rows(r), cols(c), type(t) {}
+            ~TextureCL()
+            {
+                openCLFree(tex_);
+            }
+            operator cl_mem() 
+            {
+                return tex_;
+            }
+            cl_mem const tex_;
+            const int rows;
+            const int cols;
+            const int type;
+        private:
+            //disable assignment
+            void operator=(const TextureCL&);
+        };
+        // bind oclMat to OpenCL image textures and retunrs an TextureCL object
+        // note:
+        //   for faster clamping, there is no buffer padding for the constructed texture
+        Ptr<TextureCL> CV_EXPORTS bindTexturePtr(const oclMat &mat);
+
         // returns whether the current context supports image2d_t format or not
         bool CV_EXPORTS support_image2d(Context *clCxt = Context::getContext());
 
@@ -128,11 +155,17 @@ namespace cv
         enum DEVICE_INFO
         {
             WAVEFRONT_SIZE,             //in AMD speak
-            WARP_SIZE = WAVEFRONT_SIZE, //in nvidia speak
             IS_CPU_DEVICE               //check if the device is CPU
         };
-        //info should have been pre-allocated
-        void CV_EXPORTS queryDeviceInfo(DEVICE_INFO info_type, void* info);
+        template<DEVICE_INFO _it, typename _ty>
+        _ty queryDeviceInfo(cl_kernel kernel = NULL);
+
+        template<>
+        int CV_EXPORTS queryDeviceInfo<WAVEFRONT_SIZE, int>(cl_kernel kernel);
+        template<>
+        size_t CV_EXPORTS queryDeviceInfo<WAVEFRONT_SIZE, size_t>(cl_kernel kernel);
+        template<>
+        bool CV_EXPORTS queryDeviceInfo<IS_CPU_DEVICE, bool>(cl_kernel kernel);
 
     }//namespace ocl
 
index e6e9576..3ef0634 100644 (file)
@@ -16,6 +16,7 @@
 //
 // @Authors
 //    Fangfang Bai, fangfang@multicorewareinc.com
+//    Jin Ma,       jin@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -45,9 +46,9 @@
 
 #include "precomp.hpp"
 ///////////// Lut ////////////////////////
-TEST(lut)
+PERFTEST(lut)
 {
-    Mat src, lut, dst;
+    Mat src, lut, dst, ocl_dst;
     ocl::oclMat d_src, d_lut, d_dst;
 
     int all_type[] = {CV_8UC1, CV_8UC3};
@@ -61,7 +62,6 @@ TEST(lut)
 
             gen(src, size, size, all_type[j], 0, 256);
             gen(lut, 1, 256, CV_8UC1, 0, 1);
-            gen(dst, size, size, all_type[j], 0, 256);
 
             LUT(src, lut, dst);
 
@@ -78,33 +78,32 @@ TEST(lut)
 
             GPU_ON;
             ocl::LUT(d_src, d_lut, d_dst);
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
             d_src.upload(src);
             d_lut.upload(lut);
             ocl::LUT(d_src, d_lut, d_dst);
-            d_dst.download(dst);
+            d_dst.download(ocl_dst);
             GPU_FULL_OFF;
 
+            TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 0);
         }
 
     }
 }
 
 ///////////// Exp ////////////////////////
-TEST(Exp)
+PERFTEST(Exp)
 {
-    Mat src, dst;
+    Mat src, dst, ocl_dst;
     ocl::oclMat d_src, d_dst;
 
     for (int size = Min_Size; size <= Max_Size; size *= Multiple)
     {
         SUBTEST << size << 'x' << size << "; CV_32FC1";
 
-        gen(src, size, size, CV_32FC1, 0, 256);
-        gen(dst, size, size, CV_32FC1, 0, 256);
+        gen(src, size, size, CV_32FC1, 5, 16);
 
         exp(src, dst);
 
@@ -119,21 +118,22 @@ TEST(Exp)
 
         GPU_ON;
         ocl::exp(d_src, d_dst);
-         ;
         GPU_OFF;
 
         GPU_FULL_ON;
         d_src.upload(src);
         ocl::exp(d_src, d_dst);
-        d_dst.download(dst);
+        d_dst.download(ocl_dst);
         GPU_FULL_OFF;
+
+        TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 2);
     }
 }
 
 ///////////// LOG ////////////////////////
-TEST(Log)
+PERFTEST(Log)
 {
-    Mat src, dst;
+    Mat src, dst, ocl_dst;
     ocl::oclMat d_src, d_dst;
 
     for (int size = Min_Size; size <= Max_Size; size *= Multiple)
@@ -155,21 +155,22 @@ TEST(Log)
 
         GPU_ON;
         ocl::log(d_src, d_dst);
-         ;
         GPU_OFF;
 
         GPU_FULL_ON;
         d_src.upload(src);
         ocl::log(d_src, d_dst);
-        d_dst.download(dst);
+        d_dst.download(ocl_dst);
         GPU_FULL_OFF;
+
+        TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1);
     }
 }
 
 ///////////// Add ////////////////////////
-TEST(Add)
+PERFTEST(Add)
 {
-    Mat src1, src2, dst;
+    Mat src1, src2, dst, ocl_dst;
     ocl::oclMat d_src1, d_src2, d_dst;
 
     int all_type[] = {CV_8UC1, CV_32FC1};
@@ -189,6 +190,7 @@ TEST(Add)
             CPU_ON;
             add(src1, src2, dst);
             CPU_OFF;
+
             d_src1.upload(src1);
             d_src2.upload(src2);
 
@@ -198,24 +200,25 @@ TEST(Add)
 
             GPU_ON;
             ocl::add(d_src1, d_src2, d_dst);
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
             d_src1.upload(src1);
             d_src2.upload(src2);
             ocl::add(d_src1, d_src2, d_dst);
-            d_dst.download(dst);
+            d_dst.download(ocl_dst);
             GPU_FULL_OFF;
+
+            TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 0.0);
         }
 
     }
 }
 
 ///////////// Mul ////////////////////////
-TEST(Mul)
+PERFTEST(Mul)
 {
-    Mat src1, src2, dst;
+    Mat src1, src2, dst, ocl_dst;
     ocl::oclMat d_src1, d_src2, d_dst;
 
     int all_type[] = {CV_8UC1, CV_8UC4};
@@ -229,8 +232,6 @@ TEST(Mul)
 
             gen(src1, size, size, all_type[j], 0, 256);
             gen(src2, size, size, all_type[j], 0, 256);
-            gen(dst, size, size, all_type[j], 0, 256);
-
 
             multiply(src1, src2, dst);
 
@@ -246,24 +247,25 @@ TEST(Mul)
 
             GPU_ON;
             ocl::multiply(d_src1, d_src2, d_dst);
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
             d_src1.upload(src1);
             d_src2.upload(src2);
             ocl::multiply(d_src1, d_src2, d_dst);
-            d_dst.download(dst);
+            d_dst.download(ocl_dst);
             GPU_FULL_OFF;
+
+            TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 0.0);
         }
 
     }
 }
 
 ///////////// Div ////////////////////////
-TEST(Div)
+PERFTEST(Div)
 {
-    Mat src1, src2, dst;
+    Mat src1, src2, dst, ocl_dst;
     ocl::oclMat d_src1, d_src2, d_dst;
     int all_type[] = {CV_8UC1, CV_8UC4};
     std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
@@ -276,14 +278,13 @@ TEST(Div)
 
             gen(src1, size, size, all_type[j], 0, 256);
             gen(src2, size, size, all_type[j], 0, 256);
-            gen(dst, size, size, all_type[j], 0, 256);
-
 
             divide(src1, src2, dst);
 
             CPU_ON;
             divide(src1, src2, dst);
             CPU_OFF;
+
             d_src1.upload(src1);
             d_src2.upload(src2);
 
@@ -293,24 +294,25 @@ TEST(Div)
 
             GPU_ON;
             ocl::divide(d_src1, d_src2, d_dst);
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
             d_src1.upload(src1);
             d_src2.upload(src2);
             ocl::divide(d_src1, d_src2, d_dst);
-            d_dst.download(dst);
+            d_dst.download(ocl_dst);
             GPU_FULL_OFF;
+
+            TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1);
         }
 
     }
 }
 
 ///////////// Absdiff ////////////////////////
-TEST(Absdiff)
+PERFTEST(Absdiff)
 {
-    Mat src1, src2, dst;
+    Mat src1, src2, dst, ocl_dst;
     ocl::oclMat d_src1, d_src2, d_dst;
 
     int all_type[] = {CV_8UC1, CV_8UC4};
@@ -326,12 +328,12 @@ TEST(Absdiff)
             gen(src2, size, size, all_type[j], 0, 256);
             gen(dst, size, size, all_type[j], 0, 256);
 
-
             absdiff(src1, src2, dst);
 
             CPU_ON;
             absdiff(src1, src2, dst);
             CPU_OFF;
+
             d_src1.upload(src1);
             d_src2.upload(src2);
 
@@ -341,24 +343,25 @@ TEST(Absdiff)
 
             GPU_ON;
             ocl::absdiff(d_src1, d_src2, d_dst);
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
             d_src1.upload(src1);
             d_src2.upload(src2);
             ocl::absdiff(d_src1, d_src2, d_dst);
-            d_dst.download(dst);
+            d_dst.download(ocl_dst);
             GPU_FULL_OFF;
+
+            TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 0.0);
         }
 
     }
 }
 
 ///////////// CartToPolar ////////////////////////
-TEST(CartToPolar)
+PERFTEST(CartToPolar)
 {
-    Mat src1, src2, dst, dst1;
+    Mat src1, src2, dst, dst1, ocl_dst, ocl_dst1;
     ocl::oclMat d_src1, d_src2, d_dst, d_dst1;
 
     int all_type[] = {CV_32FC1};
@@ -381,6 +384,7 @@ TEST(CartToPolar)
             CPU_ON;
             cartToPolar(src1, src2, dst, dst1, 1);
             CPU_OFF;
+
             d_src1.upload(src1);
             d_src2.upload(src2);
 
@@ -390,25 +394,30 @@ TEST(CartToPolar)
 
             GPU_ON;
             ocl::cartToPolar(d_src1, d_src2, d_dst, d_dst1, 1);
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
             d_src1.upload(src1);
             d_src2.upload(src2);
             ocl::cartToPolar(d_src1, d_src2, d_dst, d_dst1, 1);
-            d_dst.download(dst);
-            d_dst1.download(dst1);
+            d_dst.download(ocl_dst);
+            d_dst1.download(ocl_dst1);
             GPU_FULL_OFF;
+
+            double diff1 = checkNorm(ocl_dst1, dst1);
+            double diff2 = checkNorm(ocl_dst, dst);
+            double max_diff = max(diff1, diff2);
+            TestSystem::instance().setAccurate(max_diff<=.5?1:0, max_diff);
+
         }
 
     }
 }
 
 ///////////// PolarToCart ////////////////////////
-TEST(PolarToCart)
+PERFTEST(PolarToCart)
 {
-    Mat src1, src2, dst, dst1;
+    Mat src1, src2, dst, dst1, ocl_dst, ocl_dst1;
     ocl::oclMat d_src1, d_src2, d_dst, d_dst1;
 
     int all_type[] = {CV_32FC1};
@@ -440,25 +449,30 @@ TEST(PolarToCart)
 
             GPU_ON;
             ocl::polarToCart(d_src1, d_src2, d_dst, d_dst1, 1);
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
             d_src1.upload(src1);
             d_src2.upload(src2);
             ocl::polarToCart(d_src1, d_src2, d_dst, d_dst1, 1);
-            d_dst.download(dst);
-            d_dst1.download(dst1);
+            d_dst.download(ocl_dst);
+            d_dst1.download(ocl_dst1);
             GPU_FULL_OFF;
+
+            double diff1 = checkNorm(ocl_dst1, dst1);
+            double diff2 = checkNorm(ocl_dst, dst);
+            double max_diff = max(diff1, diff2);
+            TestSystem::instance().setAccurate(max_diff<=.5?1:0, max_diff);
+
         }
 
     }
 }
 
 ///////////// Magnitude ////////////////////////
-TEST(magnitude)
+PERFTEST(magnitude)
 {
-    Mat x, y, mag;
+    Mat x, y, mag, ocl_mag;
     ocl::oclMat d_x, d_y, d_mag;
 
     int all_type[] = {CV_32FC1};
@@ -487,24 +501,25 @@ TEST(magnitude)
 
             GPU_ON;
             ocl::magnitude(d_x, d_y, d_mag);
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
             d_x.upload(x);
             d_y.upload(y);
             ocl::magnitude(d_x, d_y, d_mag);
-            d_mag.download(mag);
+            d_mag.download(ocl_mag);
             GPU_FULL_OFF;
+
+            TestSystem::instance().ExpectedMatNear(ocl_mag, mag, 1e-5);
         }
 
     }
 }
 
 ///////////// Transpose ////////////////////////
-TEST(Transpose)
+PERFTEST(Transpose)
 {
-    Mat src, dst;
+    Mat src, dst, ocl_dst;
     ocl::oclMat d_src, d_dst;
 
     int all_type[] = {CV_8UC1, CV_8UC4};
@@ -532,23 +547,24 @@ TEST(Transpose)
 
             GPU_ON;
             ocl::transpose(d_src, d_dst);
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
             d_src.upload(src);
             ocl::transpose(d_src, d_dst);
-            d_dst.download(dst);
+            d_dst.download(ocl_dst);
             GPU_FULL_OFF;
+
+            TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-5);
         }
 
     }
 }
 
 ///////////// Flip ////////////////////////
-TEST(Flip)
+PERFTEST(Flip)
 {
-    Mat src, dst;
+    Mat src, dst, ocl_dst;
     ocl::oclMat d_src, d_dst;
 
     int all_type[] = {CV_8UC1, CV_8UC4};
@@ -576,26 +592,28 @@ TEST(Flip)
 
             GPU_ON;
             ocl::flip(d_src, d_dst, 0);
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
             d_src.upload(src);
             ocl::flip(d_src, d_dst, 0);
-            d_dst.download(dst);
+            d_dst.download(ocl_dst);
             GPU_FULL_OFF;
+
+            TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-5);
         }
 
     }
 }
 
 ///////////// minMax ////////////////////////
-TEST(minMax)
+PERFTEST(minMax)
 {
     Mat src;
     ocl::oclMat d_src;
 
-    double min_val, max_val;
+    double min_val = 0.0, max_val = 0.0;
+    double min_val_ = 0.0, max_val_ = 0.0;
     Point min_loc, max_loc;
     int all_type[] = {CV_8UC1, CV_32FC1};
     std::string type_name[] = {"CV_8UC1", "CV_32FC1"};
@@ -614,12 +632,16 @@ TEST(minMax)
             d_src.upload(src);
 
             WARMUP_ON;
-            ocl::minMax(d_src, &min_val, &max_val);
+            ocl::minMax(d_src, &min_val_, &max_val_);
             WARMUP_OFF;
 
+            if(EeceptDoubleEQ<double>(max_val_, max_val) && EeceptDoubleEQ<double>(min_val_, min_val))
+                TestSystem::instance().setAccurate(1, max(fabs(max_val_-max_val), fabs(min_val_-min_val)));
+            else
+                TestSystem::instance().setAccurate(0, max(fabs(max_val_-max_val), fabs(min_val_-min_val)));
+
             GPU_ON;
             ocl::minMax(d_src, &min_val, &max_val);
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
@@ -633,13 +655,15 @@ TEST(minMax)
 }
 
 ///////////// minMaxLoc ////////////////////////
-TEST(minMaxLoc)
+PERFTEST(minMaxLoc)
 {
     Mat src;
     ocl::oclMat d_src;
 
-    double min_val, max_val;
+    double min_val = 0.0, max_val = 0.0;
+    double min_val_ = 0.0, max_val_ = 0.0;
     Point min_loc, max_loc;
+    Point min_loc_, max_loc_;
     int all_type[] = {CV_8UC1, CV_32FC1};
     std::string type_name[] = {"CV_8UC1", "CV_32FC1"};
 
@@ -657,12 +681,71 @@ TEST(minMaxLoc)
             d_src.upload(src);
 
             WARMUP_ON;
-            ocl::minMaxLoc(d_src, &min_val, &max_val, &min_loc, &max_loc);
+            ocl::minMaxLoc(d_src, &min_val_, &max_val_, &min_loc_, &max_loc_);
             WARMUP_OFF;
 
+            double error0 = 0., error1 = 0., minlocVal = 0., minlocVal_ = 0., maxlocVal = 0., maxlocVal_ = 0.;
+            if(src.depth() == 0)
+            {
+                minlocVal = src.at<unsigned char>(min_loc);
+                minlocVal_ = src.at<unsigned char>(min_loc_);
+                maxlocVal = src.at<unsigned char>(max_loc);
+                maxlocVal_ = src.at<unsigned char>(max_loc_);
+            }
+            if(src.depth() == 1)
+            {
+                minlocVal = src.at<signed char>(min_loc);
+                minlocVal_ = src.at<signed char>(min_loc_);
+                maxlocVal = src.at<signed char>(max_loc);
+                maxlocVal_ = src.at<signed char>(max_loc_);
+            }
+            if(src.depth() == 2)
+            {
+                minlocVal = src.at<unsigned short>(min_loc);
+                minlocVal_ = src.at<unsigned short>(min_loc_);
+                maxlocVal = src.at<unsigned short>(max_loc);
+                maxlocVal_ = src.at<unsigned short>(max_loc_);
+            }
+            if(src.depth() == 3)
+            {
+                minlocVal = src.at<signed short>(min_loc);
+                minlocVal_ = src.at<signed short>(min_loc_);
+                maxlocVal = src.at<signed short>(max_loc);
+                maxlocVal_ = src.at<signed short>(max_loc_);
+            }
+            if(src.depth() == 4)
+            {
+                minlocVal = src.at<int>(min_loc);
+                minlocVal_ = src.at<int>(min_loc_);
+                maxlocVal = src.at<int>(max_loc);
+                maxlocVal_ = src.at<int>(max_loc_);
+            }
+            if(src.depth() == 5)
+            {
+                minlocVal = src.at<float>(min_loc);
+                minlocVal_ = src.at<float>(min_loc_);
+                maxlocVal = src.at<float>(max_loc);
+                maxlocVal_ = src.at<float>(max_loc_);
+            }
+            if(src.depth() == 6)
+            {
+                minlocVal = src.at<double>(min_loc);
+                minlocVal_ = src.at<double>(min_loc_);
+                maxlocVal = src.at<double>(max_loc);
+                maxlocVal_ = src.at<double>(max_loc_);
+            }
+            error0 = ::abs(minlocVal_ - minlocVal);
+            error1 = ::abs(maxlocVal_ - maxlocVal);
+            if( EeceptDoubleEQ<double>(maxlocVal_, maxlocVal)
+                &&EeceptDoubleEQ<double>(minlocVal_, minlocVal)
+                &&EeceptDoubleEQ<double>(max_val_, max_val)
+                &&EeceptDoubleEQ<double>(min_val_, min_val))
+                TestSystem::instance().setAccurate(1, 0.);
+            else
+                TestSystem::instance().setAccurate(0, max(error0, error1));
+
             GPU_ON;
             ocl::minMaxLoc(d_src, &min_val, &max_val, &min_loc, &max_loc);
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
@@ -675,7 +758,7 @@ TEST(minMaxLoc)
 }
 
 ///////////// Sum ////////////////////////
-TEST(Sum)
+PERFTEST(Sum)
 {
     Mat src;
     Scalar cpures, gpures;
@@ -690,7 +773,7 @@ TEST(Sum)
         {
             SUBTEST << size << 'x' << size << "; " << type_name[j] ;
 
-            gen(src, size, size, all_type[j], 0, 256);
+            gen(src, size, size, all_type[j], 0, 60);
 
             cpures = sum(src);
 
@@ -703,9 +786,16 @@ TEST(Sum)
             gpures = ocl::sum(d_src);
             WARMUP_OFF;
 
+            vector<double> diffs(4);
+            diffs[3] = fabs(cpures[3] - gpures[3]);
+            diffs[2] = fabs(cpures[2] - gpures[2]);
+            diffs[1] = fabs(cpures[1] - gpures[1]);
+            diffs[0] = fabs(cpures[0] - gpures[0]);
+            double max_diff = *max_element(diffs.begin(), diffs.end());
+            TestSystem::instance().setAccurate(max_diff<0.1?1:0, max_diff);
+
             GPU_ON;
             gpures = ocl::sum(d_src);
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
@@ -718,7 +808,7 @@ TEST(Sum)
 }
 
 ///////////// countNonZero ////////////////////////
-TEST(countNonZero)
+PERFTEST(countNonZero)
 {
     Mat src;
     ocl::oclMat d_src;
@@ -736,18 +826,24 @@ TEST(countNonZero)
 
             countNonZero(src);
 
+            int cpures = 0, gpures = 0;
             CPU_ON;
-            countNonZero(src);
+            cpures = countNonZero(src);
             CPU_OFF;
             d_src.upload(src);
 
             WARMUP_ON;
-            ocl::countNonZero(d_src);
+            gpures = ocl::countNonZero(d_src);
             WARMUP_OFF;
 
+            int diff = abs(cpures - gpures);
+            if(diff == 0)
+                TestSystem::instance().setAccurate(1, 0);
+            else
+                TestSystem::instance().setAccurate(0, diff);
+
             GPU_ON;
             ocl::countNonZero(d_src);
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
@@ -760,9 +856,9 @@ TEST(countNonZero)
 }
 
 ///////////// Phase ////////////////////////
-TEST(Phase)
+PERFTEST(Phase)
 {
-    Mat src1, src2, dst;
+    Mat src1, src2, dst, ocl_dst;
     ocl::oclMat d_src1, d_src2, d_dst;
 
     int all_type[] = {CV_32FC1};
@@ -778,12 +874,12 @@ TEST(Phase)
             gen(src2, size, size, all_type[j], 0, 256);
             gen(dst, size, size, all_type[j], 0, 256);
 
-
             phase(src1, src2, dst, 1);
 
             CPU_ON;
             phase(src1, src2, dst, 1);
             CPU_OFF;
+
             d_src1.upload(src1);
             d_src2.upload(src2);
 
@@ -793,24 +889,25 @@ TEST(Phase)
 
             GPU_ON;
             ocl::phase(d_src1, d_src2, d_dst, 1);
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
             d_src1.upload(src1);
             d_src2.upload(src2);
             ocl::phase(d_src1, d_src2, d_dst, 1);
-            d_dst.download(dst);
+            d_dst.download(ocl_dst);
             GPU_FULL_OFF;
+
+            TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-2);
         }
 
     }
 }
 
 ///////////// bitwise_and////////////////////////
-TEST(bitwise_and)
+PERFTEST(bitwise_and)
 {
-    Mat src1, src2, dst;
+    Mat src1, src2, dst, ocl_dst;
     ocl::oclMat d_src1, d_src2, d_dst;
 
     int all_type[] = {CV_8UC1, CV_32SC1};
@@ -826,7 +923,6 @@ TEST(bitwise_and)
             gen(src2, size, size, all_type[j], 0, 256);
             gen(dst, size, size, all_type[j], 0, 256);
 
-
             bitwise_and(src1, src2, dst);
 
             CPU_ON;
@@ -841,120 +937,25 @@ TEST(bitwise_and)
 
             GPU_ON;
             ocl::bitwise_and(d_src1, d_src2, d_dst);
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
             d_src1.upload(src1);
             d_src2.upload(src2);
             ocl::bitwise_and(d_src1, d_src2, d_dst);
-            d_dst.download(dst);
+            d_dst.download(ocl_dst);
             GPU_FULL_OFF;
-        }
-
-    }
-}
-
-///////////// bitwise_or////////////////////////
-TEST(bitwise_or)
-{
-    Mat src1, src2, dst;
-    ocl::oclMat d_src1, d_src2, d_dst;
-
-    int all_type[] = {CV_8UC1, CV_32SC1};
-    std::string type_name[] = {"CV_8UC1", "CV_32SC1"};
-
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
-    {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j];
-
-            gen(src1, size, size, all_type[j], 0, 256);
-            gen(src2, size, size, all_type[j], 0, 256);
-            gen(dst, size, size, all_type[j], 0, 256);
 
-
-            bitwise_or(src1, src2, dst);
-
-            CPU_ON;
-            bitwise_or(src1, src2, dst);
-            CPU_OFF;
-            d_src1.upload(src1);
-            d_src2.upload(src2);
-
-            WARMUP_ON;
-            ocl::bitwise_or(d_src1, d_src2, d_dst);
-            WARMUP_OFF;
-
-            GPU_ON;
-            ocl::bitwise_or(d_src1, d_src2, d_dst);
-             ;
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src1.upload(src1);
-            d_src2.upload(src2);
-            ocl::bitwise_or(d_src1, d_src2, d_dst);
-            d_dst.download(dst);
-            GPU_FULL_OFF;
-        }
-
-    }
-}
-
-///////////// bitwise_xor////////////////////////
-TEST(bitwise_xor)
-{
-    Mat src1, src2, dst;
-    ocl::oclMat d_src1, d_src2, d_dst;
-
-    int all_type[] = {CV_8UC1, CV_32SC1};
-    std::string type_name[] = {"CV_8UC1", "CV_32SC1"};
-
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
-    {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j];
-
-            gen(src1, size, size, all_type[j], 0, 256);
-            gen(src2, size, size, all_type[j], 0, 256);
-            gen(dst, size, size, all_type[j], 0, 256);
-
-
-            bitwise_xor(src1, src2, dst);
-
-            CPU_ON;
-            bitwise_xor(src1, src2, dst);
-            CPU_OFF;
-            d_src1.upload(src1);
-            d_src2.upload(src2);
-
-            WARMUP_ON;
-            ocl::bitwise_xor(d_src1, d_src2, d_dst);
-            WARMUP_OFF;
-
-            GPU_ON;
-            ocl::bitwise_xor(d_src1, d_src2, d_dst);
-             ;
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src1.upload(src1);
-            d_src2.upload(src2);
-            ocl::bitwise_xor(d_src1, d_src2, d_dst);
-            d_dst.download(dst);
-            GPU_FULL_OFF;
+            TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 0.0);
         }
 
     }
 }
 
 ///////////// bitwise_not////////////////////////
-TEST(bitwise_not)
+PERFTEST(bitwise_not)
 {
-    Mat src1, dst;
+    Mat src1, dst, ocl_dst;
     ocl::oclMat d_src1, d_dst;
 
     int all_type[] = {CV_8UC1, CV_32SC1};
@@ -969,7 +970,6 @@ TEST(bitwise_not)
             gen(src1, size, size, all_type[j], 0, 256);
             gen(dst, size, size, all_type[j], 0, 256);
 
-
             bitwise_not(src1, dst);
 
             CPU_ON;
@@ -983,23 +983,24 @@ TEST(bitwise_not)
 
             GPU_ON;
             ocl::bitwise_not(d_src1, d_dst);
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
             d_src1.upload(src1);
             ocl::bitwise_not(d_src1, d_dst);
-            d_dst.download(dst);
+            d_dst.download(ocl_dst);
             GPU_FULL_OFF;
+
+            TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 0.0);
         }
 
     }
 }
 
 ///////////// compare////////////////////////
-TEST(compare)
+PERFTEST(compare)
 {
-    Mat src1, src2, dst;
+    Mat src1, src2, dst, ocl_dst;
     ocl::oclMat d_src1, d_src2, d_dst;
 
     int CMP_EQ = 0;
@@ -1016,12 +1017,12 @@ TEST(compare)
             gen(src2, size, size, all_type[j], 0, 256);
             gen(dst, size, size, all_type[j], 0, 256);
 
-
             compare(src1, src2, dst, CMP_EQ);
 
             CPU_ON;
             compare(src1, src2, dst, CMP_EQ);
             CPU_OFF;
+
             d_src1.upload(src1);
             d_src2.upload(src2);
 
@@ -1031,24 +1032,25 @@ TEST(compare)
 
             GPU_ON;
             ocl::compare(d_src1, d_src2, d_dst, CMP_EQ);
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
             d_src1.upload(src1);
             d_src2.upload(src2);
             ocl::compare(d_src1, d_src2, d_dst, CMP_EQ);
-            d_dst.download(dst);
+            d_dst.download(ocl_dst);
             GPU_FULL_OFF;
+
+            TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 0.0);
         }
 
     }
 }
 
 ///////////// pow ////////////////////////
-TEST(pow)
+PERFTEST(pow)
 {
-    Mat src, dst;
+    Mat src, dst, ocl_dst;
     ocl::oclMat d_src, d_dst;
 
     int all_type[] = {CV_32FC1};
@@ -1060,8 +1062,7 @@ TEST(pow)
         {
             SUBTEST << size << 'x' << size << "; " << type_name[j] ;
 
-            gen(src, size, size, all_type[j], 0, 100);
-            gen(dst, size, size, all_type[j], 0, 100);
+            gen(src, size, size, all_type[j], 5, 16);
 
             pow(src, -2.0, dst);
 
@@ -1077,23 +1078,24 @@ TEST(pow)
 
             GPU_ON;
             ocl::pow(d_src, -2.0, d_dst);
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
             d_src.upload(src);
             ocl::pow(d_src, -2.0, d_dst);
-            d_dst.download(dst);
+            d_dst.download(ocl_dst);
             GPU_FULL_OFF;
+
+            TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1.0);
         }
 
     }
 }
 
 ///////////// MagnitudeSqr////////////////////////
-TEST(MagnitudeSqr)
+PERFTEST(MagnitudeSqr)
 {
-    Mat src1, src2, dst;
+    Mat src1, src2, dst, ocl_dst;
     ocl::oclMat d_src1, d_src2, d_dst;
 
     int all_type[] = {CV_32FC1};
@@ -1109,31 +1111,17 @@ TEST(MagnitudeSqr)
             gen(src2, size, size, all_type[t], 0, 256);
             gen(dst, size, size, all_type[t], 0, 256);
 
-
-            for (int i = 0; i < src1.rows; ++i)
-
-                for (int j = 0; j < src1.cols; ++j)
-                {
-                    float val1 = src1.at<float>(i, j);
-                    float val2 = src2.at<float>(i, j);
-
-                    ((float *)(dst.data))[i * dst.step / 4 + j] = val1 * val1 + val2 * val2;
-
-                }
-
             CPU_ON;
-
             for (int i = 0; i < src1.rows; ++i)
                 for (int j = 0; j < src1.cols; ++j)
                 {
                     float val1 = src1.at<float>(i, j);
                     float val2 = src2.at<float>(i, j);
-
                     ((float *)(dst.data))[i * dst.step / 4 + j] = val1 * val1 + val2 * val2;
 
                 }
-
             CPU_OFF;
+
             d_src1.upload(src1);
             d_src2.upload(src2);
 
@@ -1143,24 +1131,25 @@ TEST(MagnitudeSqr)
 
             GPU_ON;
             ocl::magnitudeSqr(d_src1, d_src2, d_dst);
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
             d_src1.upload(src1);
             d_src2.upload(src2);
             ocl::magnitudeSqr(d_src1, d_src2, d_dst);
-            d_dst.download(dst);
+            d_dst.download(ocl_dst);
             GPU_FULL_OFF;
+
+            TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1.0);
         }
 
     }
 }
 
 ///////////// AddWeighted////////////////////////
-TEST(AddWeighted)
+PERFTEST(AddWeighted)
 {
-    Mat src1, src2, dst;
+    Mat src1, src2, dst, ocl_dst;
     ocl::oclMat d_src1, d_src2, d_dst;
 
     double alpha = 2.0, beta = 1.0, gama = 3.0;
@@ -1192,15 +1181,16 @@ TEST(AddWeighted)
 
             GPU_ON;
             ocl::addWeighted(d_src1, alpha, d_src2, beta, gama, d_dst);
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
             d_src1.upload(src1);
             d_src2.upload(src2);
             ocl::addWeighted(d_src1, alpha, d_src2, beta, gama, d_dst);
-            d_dst.download(dst);
+            d_dst.download(ocl_dst);
             GPU_FULL_OFF;
+
+            TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-5);
         }
 
     }
index 0003470..8ebb648 100644 (file)
@@ -16,6 +16,7 @@
 //
 // @Authors
 //    Fangfang Bai, fangfang@multicorewareinc.com
+//    Jin Ma,       jin@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -68,9 +69,9 @@ void blendLinearGold(const cv::Mat &img1, const cv::Mat &img2, const cv::Mat &we
         }
     }
 }
-TEST(blend)
+PERFTEST(blend)
 {
-    Mat src1, src2, weights1, weights2, dst;
+    Mat src1, src2, weights1, weights2, dst, ocl_dst;
     ocl::oclMat d_src1, d_src2, d_weights1, d_weights2, d_dst;
 
     int all_type[] = {CV_8UC1, CV_8UC4};
@@ -104,7 +105,6 @@ TEST(blend)
 
             GPU_ON;
             ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst);
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
@@ -113,8 +113,10 @@ TEST(blend)
             d_weights1.upload(weights1);
             d_weights2.upload(weights2);
             ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst);
-            d_dst.download(dst);
+            d_dst.download(ocl_dst);
             GPU_FULL_OFF;
+
+            TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.f);
         }
     }
 }
\ No newline at end of file
index 6562f91..406b46a 100644 (file)
@@ -16,6 +16,7 @@
 //
 // @Authors
 //    Fangfang Bai, fangfang@multicorewareinc.com
+//    Jin Ma,       jin@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -45,7 +46,7 @@
 #include "precomp.hpp"
 
 //////////////////// BruteForceMatch /////////////////
-TEST(BruteForceMatcher)
+PERFTEST(BruteForceMatcher)
 {
     Mat trainIdx_cpu;
     Mat distance_cpu;
@@ -66,6 +67,7 @@ TEST(BruteForceMatcher)
         gen(train, size, desc_len, CV_32F, 0, 1);
         // Output
         vector< vector<DMatch> > matches(2);
+        vector< vector<DMatch> > d_matches(2);
         // Init GPU matcher
         ocl::BruteForceMatcher_OCL_base d_matcher(ocl::BruteForceMatcher_OCL_base::L2Dist);
 
@@ -88,15 +90,20 @@ TEST(BruteForceMatcher)
 
         GPU_ON;
         d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
-         ;
         GPU_OFF;
 
         GPU_FULL_ON;
         d_query.upload(query);
         d_train.upload(train);
-        d_matcher.match(d_query, d_train, matches[0]);
+        d_matcher.match(d_query, d_train, d_matches[0]);
         GPU_FULL_OFF;
 
+        int diff = abs((int)d_matches[0].size() - (int)matches[0].size());
+        if(diff == 0)
+            TestSystem::instance().setAccurate(1, 0);
+        else
+            TestSystem::instance().setAccurate(0, diff);
+
         SUBTEST << size << "; knnMatch";
 
         matcher.knnMatch(query, train, matches, 2);
@@ -111,15 +118,20 @@ TEST(BruteForceMatcher)
 
         GPU_ON;
         d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, 2);
-         ;
         GPU_OFF;
 
         GPU_FULL_ON;
         d_query.upload(query);
         d_train.upload(train);
-        d_matcher.knnMatch(d_query, d_train, matches, 2);
+        d_matcher.knnMatch(d_query, d_train, d_matches, 2);
         GPU_FULL_OFF;
 
+        diff = abs((int)d_matches[0].size() - (int)matches[0].size());
+        if(diff == 0)
+            TestSystem::instance().setAccurate(1, 0);
+        else
+            TestSystem::instance().setAccurate(0, diff);
+
         SUBTEST << size << "; radiusMatch";
 
         float max_distance = 2.0f;
@@ -138,13 +150,18 @@ TEST(BruteForceMatcher)
 
         GPU_ON;
         d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, max_distance);
-         ;
         GPU_OFF;
 
         GPU_FULL_ON;
         d_query.upload(query);
         d_train.upload(train);
-        d_matcher.radiusMatch(d_query, d_train, matches, max_distance);
+        d_matcher.radiusMatch(d_query, d_train, d_matches, max_distance);
         GPU_FULL_OFF;
+
+        diff = abs((int)d_matches[0].size() - (int)matches[0].size());
+        if(diff == 0)
+            TestSystem::instance().setAccurate(1, 0);
+        else
+            TestSystem::instance().setAccurate(0, diff);
     }
 }
\ No newline at end of file
index 428e036..cb23d7a 100644 (file)
@@ -16,6 +16,7 @@
 //
 // @Authors
 //    Fangfang Bai, fangfang@multicorewareinc.com
+//    Jin Ma,       jin@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -45,7 +46,7 @@
 #include "precomp.hpp"
 
 ///////////// Canny ////////////////////////
-TEST(Canny)
+PERFTEST(Canny)
 {
     Mat img = imread(abspath("aloeL.jpg"), CV_LOAD_IMAGE_GRAYSCALE);
 
@@ -56,7 +57,7 @@ TEST(Canny)
 
     SUBTEST << img.cols << 'x' << img.rows << "; aloeL.jpg" << "; edges" << "; CV_8UC1";
 
-    Mat edges(img.size(), CV_8UC1);
+    Mat edges(img.size(), CV_8UC1), ocl_edges;
 
     CPU_ON;
     Canny(img, edges, 50.0, 100.0);
@@ -72,12 +73,13 @@ TEST(Canny)
 
     GPU_ON;
     ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0);
-     ;
     GPU_OFF;
 
     GPU_FULL_ON;
     d_img.upload(img);
     ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0);
-    d_edges.download(edges);
+    d_edges.download(ocl_edges);
     GPU_FULL_OFF;
+
+    TestSystem::instance().ExceptedMatSimilar(edges, ocl_edges, 2e-2);
 }
\ No newline at end of file
index e32a183..daf1cfd 100644 (file)
@@ -16,6 +16,7 @@
 //
 // @Authors
 //    Fangfang Bai, fangfang@multicorewareinc.com
+//    Jin Ma,       jin@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -45,9 +46,9 @@
 #include "precomp.hpp"
 
 ///////////// cvtColor////////////////////////
-TEST(cvtColor)
+PERFTEST(cvtColor)
 {
-    Mat src, dst;
+    Mat src, dst, ocl_dst;
     ocl::oclMat d_src, d_dst;
 
     int all_type[] = {CV_8UC4};
@@ -74,14 +75,15 @@ TEST(cvtColor)
 
             GPU_ON;
             ocl::cvtColor(d_src, d_dst, CV_RGBA2GRAY, 4);
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
             d_src.upload(src);
             ocl::cvtColor(d_src, d_dst, CV_RGBA2GRAY, 4);
-            d_dst.download(dst);
+            d_dst.download(ocl_dst);
             GPU_FULL_OFF;
+
+            TestSystem::instance().ExceptedMatSimilar(dst, ocl_dst, 1e-5);
         }
 
 
index d2e3b45..ff7ebcd 100644 (file)
@@ -16,6 +16,7 @@
 //
 // @Authors
 //    Fangfang Bai, fangfang@multicorewareinc.com
+//    Jin Ma,       jin@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -45,9 +46,9 @@
 #include "precomp.hpp"
 
 ///////////// columnSum////////////////////////
-TEST(columnSum)
+PERFTEST(columnSum)
 {
-    Mat src, dst;
+    Mat src, dst, ocl_dst;
     ocl::oclMat d_src, d_dst;
 
     for (int size = Min_Size; size <= Max_Size; size *= Multiple)
@@ -58,31 +59,30 @@ TEST(columnSum)
 
         CPU_ON;
         dst.create(src.size(), src.type());
+        for (int j = 0; j < src.cols; j++)
+            dst.at<float>(0, j) = src.at<float>(0, j);
 
         for (int i = 1; i < src.rows; ++i)
-        {
             for (int j = 0; j < src.cols; ++j)
-            {
-                dst.at<float>(i, j) = src.at<float>(i, j) += src.at<float>(i - 1, j);
-            }
-        }
-
+                dst.at<float>(i, j) = dst.at<float>(i - 1 , j) + src.at<float>(i , j);
         CPU_OFF;
 
         d_src.upload(src);
+
         WARMUP_ON;
         ocl::columnSum(d_src, d_dst);
         WARMUP_OFF;
 
         GPU_ON;
         ocl::columnSum(d_src, d_dst);
-         ;
         GPU_OFF;
 
         GPU_FULL_ON;
         d_src.upload(src);
         ocl::columnSum(d_src, d_dst);
-        d_dst.download(dst);
+        d_dst.download(ocl_dst);
         GPU_FULL_OFF;
+
+        TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 5e-1);
     }
 }
\ No newline at end of file
index 50be254..6e0be3f 100644 (file)
@@ -16,6 +16,7 @@
 //
 // @Authors
 //    Fangfang Bai, fangfang@multicorewareinc.com
+//    Jin Ma,       jin@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 #include "precomp.hpp"
 
 ///////////// dft ////////////////////////
-TEST(dft)
+PERFTEST(dft)
 {
-    Mat src, dst;
+    Mat src, dst, ocl_dst;
     ocl::oclMat d_src, d_dst;
 
-    int all_type[] = {CV_32FC1, CV_32FC2};
-    std::string type_name[] = {"CV_32FC1", "CV_32FC2"};
+    int all_type[] = {CV_32FC2};
+    std::string type_name[] = {"CV_32FC2"};
 
     for (int size = Min_Size; size <= Max_Size; size *= Multiple)
     {
@@ -75,14 +76,15 @@ TEST(dft)
 
             GPU_ON;
             ocl::dft(d_src, d_dst, Size(size, size));
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
             d_src.upload(src);
             ocl::dft(d_src, d_dst, Size(size, size));
-            d_dst.download(dst);
+            d_dst.download(ocl_dst);
             GPU_FULL_OFF;
+
+            TestSystem::instance().ExpectedMatNear(dst, ocl_dst, src.size().area() * 1e-4);
         }
 
     }
index e9646c7..a05301b 100644 (file)
@@ -16,6 +16,7 @@
 //
 // @Authors
 //    Fangfang Bai, fangfang@multicorewareinc.com
+//    Jin Ma,       jin@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -45,9 +46,9 @@
 #include "precomp.hpp"
 
 ///////////// Blur////////////////////////
-TEST(Blur)
+PERFTEST(Blur)
 {
-    Mat src1, dst;
+    Mat src1, dst, ocl_dst;
     ocl::oclMat d_src1, d_dst;
 
     Size ksize = Size(3, 3);
@@ -64,7 +65,6 @@ TEST(Blur)
             gen(src1, size, size, all_type[j], 0, 256);
             gen(dst, size, size, all_type[j], 0, 256);
 
-
             blur(src1, dst, ksize, Point(-1, -1), bordertype);
 
             CPU_ON;
@@ -79,22 +79,23 @@ TEST(Blur)
 
             GPU_ON;
             ocl::blur(d_src1, d_dst, ksize, Point(-1, -1), bordertype);
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
             d_src1.upload(src1);
             ocl::blur(d_src1, d_dst, ksize, Point(-1, -1), bordertype);
-            d_dst.download(dst);
+            d_dst.download(ocl_dst);
             GPU_FULL_OFF;
+
+            TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1.0);
         }
 
     }
 }
 ///////////// Laplacian////////////////////////
-TEST(Laplacian)
+PERFTEST(Laplacian)
 {
-    Mat src1, dst;
+    Mat src1, dst, ocl_dst;
     ocl::oclMat d_src1, d_dst;
 
     int ksize = 3;
@@ -110,7 +111,6 @@ TEST(Laplacian)
             gen(src1, size, size, all_type[j], 0, 256);
             gen(dst, size, size, all_type[j], 0, 256);
 
-
             Laplacian(src1, dst, -1, ksize, 1);
 
             CPU_ON;
@@ -125,23 +125,24 @@ TEST(Laplacian)
 
             GPU_ON;
             ocl::Laplacian(d_src1, d_dst, -1, ksize, 1);
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
             d_src1.upload(src1);
             ocl::Laplacian(d_src1, d_dst, -1, ksize, 1);
-            d_dst.download(dst);
+            d_dst.download(ocl_dst);
             GPU_FULL_OFF;
+
+            TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-5);
         }
 
     }
 }
 
 ///////////// Erode ////////////////////
-TEST(Erode)
+PERFTEST(Erode)
 {
-    Mat src, dst, ker;
+    Mat src, dst, ker, ocl_dst;
     ocl::oclMat d_src, d_dst;
 
     int all_type[] = {CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4};
@@ -170,23 +171,24 @@ TEST(Erode)
 
             GPU_ON;
             ocl::erode(d_src, d_dst, ker);
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
             d_src.upload(src);
             ocl::erode(d_src, d_dst, ker);
-            d_dst.download(dst);
+            d_dst.download(ocl_dst);
             GPU_FULL_OFF;
+
+            TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-5);
         }
 
     }
 }
 
 ///////////// Sobel ////////////////////////
-TEST(Sobel)
+PERFTEST(Sobel)
 {
-    Mat src, dst;
+    Mat src, dst, ocl_dst;
     ocl::oclMat d_src, d_dst;
 
     int dx = 1;
@@ -216,22 +218,23 @@ TEST(Sobel)
 
             GPU_ON;
             ocl::Sobel(d_src, d_dst, -1, dx, dy);
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
             d_src.upload(src);
             ocl::Sobel(d_src, d_dst, -1, dx, dy);
-            d_dst.download(dst);
+            d_dst.download(ocl_dst);
             GPU_FULL_OFF;
+
+            TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1);
         }
 
     }
 }
 ///////////// Scharr ////////////////////////
-TEST(Scharr)
+PERFTEST(Scharr)
 {
-    Mat src, dst;
+    Mat src, dst, ocl_dst;
     ocl::oclMat d_src, d_dst;
 
     int dx = 1;
@@ -261,23 +264,24 @@ TEST(Scharr)
 
             GPU_ON;
             ocl::Scharr(d_src, d_dst, -1, dx, dy);
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
             d_src.upload(src);
             ocl::Scharr(d_src, d_dst, -1, dx, dy);
-            d_dst.download(dst);
+            d_dst.download(ocl_dst);
             GPU_FULL_OFF;
+
+            TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1);
         }
 
     }
 }
 
 ///////////// GaussianBlur ////////////////////////
-TEST(GaussianBlur)
+PERFTEST(GaussianBlur)
 {
-    Mat src, dst;
+    Mat src, dst, ocl_dst;
     int all_type[] = {CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4};
     std::string type_name[] = {"CV_8UC1", "CV_8UC4", "CV_32FC1", "CV_32FC4"};
 
@@ -287,7 +291,7 @@ TEST(GaussianBlur)
         {
             SUBTEST << size << 'x' << size << "; " << type_name[j] ;
 
-            gen(src, size, size, all_type[j], 0, 256);
+            gen(src, size, size, all_type[j], 5, 16);
 
             GaussianBlur(src, dst, Size(9, 9), 0);
 
@@ -305,21 +309,22 @@ TEST(GaussianBlur)
 
             GPU_ON;
             ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0);
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
             d_src.upload(src);
             ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0);
-            d_dst.download(dst);
+            d_dst.download(ocl_dst);
             GPU_FULL_OFF;
+
+            TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1.0);
         }
 
     }
 }
 
 ///////////// filter2D////////////////////////
-TEST(filter2D)
+PERFTEST(filter2D)
 {
     Mat src;
 
@@ -332,38 +337,38 @@ TEST(filter2D)
         {
             gen(src, size, size, all_type[j], 0, 256);
 
-            for (int ksize = 3; ksize <= 15; ksize = 2*ksize+1)
-            {
-                SUBTEST << "ksize = " << ksize << "; " << size << 'x' << size << "; " << type_name[j] ;
+            const int ksize = 3;
 
-                Mat kernel;
-                gen(kernel, ksize, ksize, CV_32FC1, 0.0, 1.0);
+            SUBTEST << "ksize = " << ksize << "; " << size << 'x' << size << "; " << type_name[j] ;
 
-                Mat dst;
-                cv::filter2D(src, dst, -1, kernel);
+            Mat kernel;
+            gen(kernel, ksize, ksize, CV_32SC1, -3.0, 3.0);
 
-                CPU_ON;
-                cv::filter2D(src, dst, -1, kernel);
-                CPU_OFF;
+            Mat dst, ocl_dst;
 
-                ocl::oclMat d_src(src);
-                ocl::oclMat d_dst;
+            cv::filter2D(src, dst, -1, kernel);
 
-                WARMUP_ON;
-                ocl::filter2D(d_src, d_dst, -1, kernel);
-                WARMUP_OFF;
+            CPU_ON;
+            cv::filter2D(src, dst, -1, kernel);
+            CPU_OFF;
 
-                GPU_ON;
-                ocl::filter2D(d_src, d_dst, -1, kernel);
-                 ;
-                GPU_OFF;
+            ocl::oclMat d_src(src), d_dst;
+
+            WARMUP_ON;
+            ocl::filter2D(d_src, d_dst, -1, kernel);
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::filter2D(d_src, d_dst, -1, kernel);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src.upload(src);
+            ocl::filter2D(d_src, d_dst, -1, kernel);
+            d_dst.download(ocl_dst);
+            GPU_FULL_OFF;
 
-                GPU_FULL_ON;
-                d_src.upload(src);
-                ocl::filter2D(d_src, d_dst, -1, kernel);
-                d_dst.download(dst);
-                GPU_FULL_OFF;
-            }
+            TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-5);
 
         }
 
index 930ecb0..f197c5f 100644 (file)
@@ -16,6 +16,7 @@
 //
 // @Authors
 //    Fangfang Bai, fangfang@multicorewareinc.com
+//    Jin Ma,       jin@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -45,9 +46,9 @@
 #include "precomp.hpp"
 
 ///////////// gemm ////////////////////////
-TEST(gemm)
+PERFTEST(gemm)
 {
-    Mat src1, src2, src3, dst;
+    Mat src1, src2, src3, dst, ocl_dst;
     ocl::oclMat d_src1, d_src2, d_src3, d_dst;
 
     for (int size = Min_Size; size <= Max_Size; size *= Multiple)
@@ -74,7 +75,6 @@ TEST(gemm)
 
         GPU_ON;
         ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst);
-         ;
         GPU_OFF;
 
         GPU_FULL_ON;
@@ -82,7 +82,9 @@ TEST(gemm)
         d_src2.upload(src2);
         d_src3.upload(src3);
         ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst);
-        d_dst.download(dst);
+        d_dst.download(ocl_dst);
         GPU_FULL_OFF;
+
+        TestSystem::instance().ExpectedMatNear(ocl_dst, dst, src1.cols * src1.rows * 1e-4);
     }
 }
\ No newline at end of file
index 5a909ac..72f01dc 100644 (file)
@@ -16,6 +16,7 @@
 //
 // @Authors
 //    Fangfang Bai, fangfang@multicorewareinc.com
+//    Jin Ma,       jin@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -82,7 +83,7 @@ public:
 
 }
 }
-TEST(Haar)
+PERFTEST(Haar)
 {
     Mat img = imread(abspath("basketball1.png"), CV_LOAD_IMAGE_GRAYSCALE);
 
@@ -106,6 +107,8 @@ TEST(Haar)
                                     1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30));
     CPU_OFF;
 
+
+    vector<Rect> oclfaces;
     ocl::CascadeClassifier_GPU faceCascade;
 
     if (!faceCascade.load(abspath("haarcascade_frontalface_alt.xml")))
@@ -115,24 +118,26 @@ TEST(Haar)
 
     ocl::oclMat d_img(img);
 
-    faces.clear();
-
     WARMUP_ON;
-    faceCascade.detectMultiScale(d_img, faces,
+    faceCascade.detectMultiScale(d_img, oclfaces,
                                  1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30));
     WARMUP_OFF;
 
+    if(faces.size() == oclfaces.size())
+        TestSystem::instance().setAccurate(1, 0);
+    else
+        TestSystem::instance().setAccurate(0, abs((int)faces.size() - (int)oclfaces.size()));
+
     faces.clear();
 
     GPU_ON;
-    faceCascade.detectMultiScale(d_img, faces,
+    faceCascade.detectMultiScale(d_img, oclfaces,
                                  1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30));
-     ;
     GPU_OFF;
 
     GPU_FULL_ON;
     d_img.upload(img);
-    faceCascade.detectMultiScale(d_img, faces,
+    faceCascade.detectMultiScale(d_img, oclfaces,
                                  1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30));
     GPU_FULL_OFF;
 }
\ No newline at end of file
index b74077f..0509381 100644 (file)
@@ -16,6 +16,7 @@
 //
 // @Authors
 //    Fangfang Bai, fangfang@multicorewareinc.com
+//    Jin Ma,       jin@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 #include "precomp.hpp"
 
 ///////////// HOG////////////////////////
-TEST(HOG)
+bool match_rect(cv::Rect r1, cv::Rect r2, int threshold)
+{
+    return ((abs(r1.x - r2.x) < threshold) && (abs(r1.y - r2.y) < threshold) &&
+        (abs(r1.width - r2.width) < threshold) && (abs(r1.height - r2.height) < threshold));
+}
+
+PERFTEST(HOG)
 {
     Mat src = imread(abspath("road.png"), cv::IMREAD_GRAYSCALE);
 
@@ -58,6 +65,7 @@ TEST(HOG)
     cv::HOGDescriptor hog;
     hog.setSVMDetector(hog.getDefaultPeopleDetector());
     std::vector<cv::Rect> found_locations;
+    std::vector<cv::Rect> d_found_locations;
 
     SUBTEST << 768 << 'x' << 576 << "; road.png";
 
@@ -73,12 +81,76 @@ TEST(HOG)
     d_src.upload(src);
 
     WARMUP_ON;
-    ocl_hog.detectMultiScale(d_src, found_locations);
+    ocl_hog.detectMultiScale(d_src, d_found_locations);
     WARMUP_OFF;
+    
+    // Ground-truth rectangular people window
+    cv::Rect win1_64x128(231, 190, 72, 144);
+    cv::Rect win2_64x128(621, 156, 97, 194);
+    cv::Rect win1_48x96(238, 198, 63, 126);
+    cv::Rect win2_48x96(619, 161, 92, 185);
+    cv::Rect win3_48x96(488, 136, 56, 112);
+
+    // Compare whether ground-truth windows are detected and compare the number of windows detected.
+    std::vector<int> d_comp(4);
+    std::vector<int> comp(4);
+    for(int i = 0; i < (int)d_comp.size(); i++)
+    {
+        d_comp[i] = 0;
+        comp[i] = 0;
+    }
+
+    int threshold = 10;
+    int val = 32;
+    d_comp[0] = (int)d_found_locations.size();
+    comp[0] = (int)found_locations.size();
+
+    cv::Size winSize = hog.winSize;
+
+    if (winSize == cv::Size(48, 96))
+    {
+        for(int i = 0; i < (int)d_found_locations.size(); i++)
+        {
+            if (match_rect(d_found_locations[i], win1_48x96, threshold))
+                d_comp[1] = val;
+            if (match_rect(d_found_locations[i], win2_48x96, threshold))
+                d_comp[2] = val;
+            if (match_rect(d_found_locations[i], win3_48x96, threshold))
+                d_comp[3] = val;
+        }
+        for(int i = 0; i < (int)found_locations.size(); i++)
+        {
+            if (match_rect(found_locations[i], win1_48x96, threshold))
+                comp[1] = val;
+            if (match_rect(found_locations[i], win2_48x96, threshold))
+                comp[2] = val;
+            if (match_rect(found_locations[i], win3_48x96, threshold))
+                comp[3] = val;
+        }
+    }
+    else if (winSize == cv::Size(64, 128))
+    {
+        for(int i = 0; i < (int)d_found_locations.size(); i++)
+        {
+            if (match_rect(d_found_locations[i], win1_64x128, threshold))
+                d_comp[1] = val;
+            if (match_rect(d_found_locations[i], win2_64x128, threshold))
+                d_comp[2] = val;
+        }
+        for(int i = 0; i < (int)found_locations.size(); i++)
+        {
+            if (match_rect(found_locations[i], win1_64x128, threshold))
+                comp[1] = val;
+            if (match_rect(found_locations[i], win2_64x128, threshold))
+                comp[2] = val;
+        }
+    }
+
+    cv::Mat gpu_rst(d_comp), cpu_rst(comp);
+    TestSystem::instance().ExpectedMatNear(gpu_rst, cpu_rst, 3);
 
     GPU_ON;
     ocl_hog.detectMultiScale(d_src, found_locations);
-     ;
     GPU_OFF;
 
     GPU_FULL_ON;
index 756f695..e87e821 100644 (file)
@@ -16,6 +16,7 @@
 //
 // @Authors
 //    Fangfang Bai, fangfang@multicorewareinc.com
+//    Jin Ma,       jin@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -45,9 +46,9 @@
 #include "precomp.hpp"
 
 ///////////// equalizeHist ////////////////////////
-TEST(equalizeHist)
+PERFTEST(equalizeHist)
 {
-    Mat src, dst;
+    Mat src, dst, ocl_dst;
     int all_type[] = {CV_8UC1};
     std::string type_name[] = {"CV_8UC1"};
 
@@ -76,22 +77,23 @@ TEST(equalizeHist)
 
             GPU_ON;
             ocl::equalizeHist(d_src, d_dst);
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
             d_src.upload(src);
             ocl::equalizeHist(d_src, d_dst);
-            d_dst.download(dst);
+            d_dst.download(ocl_dst);
             GPU_FULL_OFF;
+
+            TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.1);
         }
 
     }
 }
 /////////// CopyMakeBorder //////////////////////
-TEST(CopyMakeBorder)
+PERFTEST(CopyMakeBorder)
 {
-    Mat src, dst;
+    Mat src, dst, ocl_dst;
     ocl::oclMat d_dst;
 
     int bordertype = BORDER_CONSTANT;
@@ -121,22 +123,23 @@ TEST(CopyMakeBorder)
 
             GPU_ON;
             ocl::copyMakeBorder(d_src, d_dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0));
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
             d_src.upload(src);
             ocl::copyMakeBorder(d_src, d_dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0));
-            d_dst.download(dst);
+            d_dst.download(ocl_dst);
             GPU_FULL_OFF;
+
+            TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 0.0);
         }
 
     }
 }
 ///////////// cornerMinEigenVal ////////////////////////
-TEST(cornerMinEigenVal)
+PERFTEST(cornerMinEigenVal)
 {
-    Mat src, dst;
+    Mat src, dst, ocl_dst;
     ocl::oclMat d_dst;
 
     int blockSize = 7, apertureSize = 1 + 2 * (rand() % 4);
@@ -150,7 +153,6 @@ TEST(cornerMinEigenVal)
         {
             SUBTEST << size << 'x' << size << "; " << type_name[j] ;
 
-
             gen(src, size, size, all_type[j], 0, 256);
 
             cornerMinEigenVal(src, dst, blockSize, apertureSize, borderType);
@@ -167,22 +169,23 @@ TEST(cornerMinEigenVal)
 
             GPU_ON;
             ocl::cornerMinEigenVal(d_src, d_dst, blockSize, apertureSize, borderType);
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
             d_src.upload(src);
             ocl::cornerMinEigenVal(d_src, d_dst, blockSize, apertureSize, borderType);
-            d_dst.download(dst);
+            d_dst.download(ocl_dst);
             GPU_FULL_OFF;
+
+            TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0);
         }
 
     }
 }
 ///////////// cornerHarris ////////////////////////
-TEST(cornerHarris)
+PERFTEST(cornerHarris)
 {
-    Mat src, dst;
+    Mat src, dst, ocl_dst;
     ocl::oclMat d_src, d_dst;
 
     int all_type[] = {CV_8UC1, CV_32FC1};
@@ -210,23 +213,24 @@ TEST(cornerHarris)
 
             GPU_ON;
             ocl::cornerHarris(d_src, d_dst, 5, 7, 0.1, BORDER_REFLECT);
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
             d_src.upload(src);
             ocl::cornerHarris(d_src, d_dst, 5, 7, 0.1, BORDER_REFLECT);
-            d_dst.download(dst);
+            d_dst.download(ocl_dst);
             GPU_FULL_OFF;
+
+            TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0);
         }
 
 
     }
 }
 ///////////// integral ////////////////////////
-TEST(integral)
+PERFTEST(integral)
 {
-    Mat src, sum;
+    Mat src, sum, ocl_sum;
     ocl::oclMat d_src, d_sum, d_buf;
 
     int all_type[] = {CV_8UC1};
@@ -254,28 +258,31 @@ TEST(integral)
 
             GPU_ON;
             ocl::integral(d_src, d_sum);
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
             d_src.upload(src);
             ocl::integral(d_src, d_sum);
-            d_sum.download(sum);
+            d_sum.download(ocl_sum);
             GPU_FULL_OFF;
+
+            if(sum.type() == ocl_sum.type()) //we won't test accuracy when cpu function overlow
+                TestSystem::instance().ExpectedMatNear(sum, ocl_sum, 0.0);
+
         }
 
     }
 }
 ///////////// WarpAffine ////////////////////////
-TEST(WarpAffine)
+PERFTEST(WarpAffine)
 {
-    Mat src, dst;
+    Mat src, dst, ocl_dst;
     ocl::oclMat d_src, d_dst;
 
     static const double coeffs[2][3] =
     {
-        {cos(3.14 / 6), -sin(3.14 / 6), 100.0},
-        {sin(3.14 / 6), cos(3.14 / 6), -100.0}
+        {cos(CV_PI / 6), -sin(CV_PI / 6), 100.0},
+        {sin(CV_PI / 6), cos(CV_PI / 6), -100.0}
     };
     Mat M(2, 3, CV_64F, (void *)coeffs);
     int interpolation = INTER_NEAREST;
@@ -308,32 +315,33 @@ TEST(WarpAffine)
 
             GPU_ON;
             ocl::warpAffine(d_src, d_dst, M, size1, interpolation);
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
             d_src.upload(src);
             ocl::warpAffine(d_src, d_dst, M, size1, interpolation);
-            d_dst.download(dst);
+            d_dst.download(ocl_dst);
             GPU_FULL_OFF;
+
+            TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0);
         }
 
     }
 }
 ///////////// WarpPerspective ////////////////////////
-TEST(WarpPerspective)
+PERFTEST(WarpPerspective)
 {
-    Mat src, dst;
+    Mat src, dst, ocl_dst;
     ocl::oclMat d_src, d_dst;
 
     static const double coeffs[3][3] =
     {
-        {cos(3.14 / 6), -sin(3.14 / 6), 100.0},
-        {sin(3.14 / 6), cos(3.14 / 6), -100.0},
+        {cos(CV_PI / 6), -sin(CV_PI / 6), 100.0},
+        {sin(CV_PI / 6), cos(CV_PI / 6), -100.0},
         {0.0, 0.0, 1.0}
     };
     Mat M(3, 3, CV_64F, (void *)coeffs);
-    int interpolation = INTER_NEAREST;
+    int interpolation = INTER_LINEAR;
 
     int all_type[] = {CV_8UC1, CV_8UC4};
     std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
@@ -362,23 +370,24 @@ TEST(WarpPerspective)
 
             GPU_ON;
             ocl::warpPerspective(d_src, d_dst, M, size1, interpolation);
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
             d_src.upload(src);
             ocl::warpPerspective(d_src, d_dst, M, size1, interpolation);
-            d_dst.download(dst);
+            d_dst.download(ocl_dst);
             GPU_FULL_OFF;
+
+            TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0);
         }
 
     }
 }
 
 ///////////// resize ////////////////////////
-TEST(resize)
+PERFTEST(resize)
 {
-    Mat src, dst;
+    Mat src, dst, ocl_dst;
     ocl::oclMat d_src, d_dst;
 
 
@@ -407,14 +416,15 @@ TEST(resize)
 
             GPU_ON;
             ocl::resize(d_src, d_dst, Size(), 2.0, 2.0);
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
             d_src.upload(src);
             ocl::resize(d_src, d_dst, Size(), 2.0, 2.0);
-            d_dst.download(dst);
+            d_dst.download(ocl_dst);
             GPU_FULL_OFF;
+
+            TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0);
         }
 
     }
@@ -441,25 +451,25 @@ TEST(resize)
 
             GPU_ON;
             ocl::resize(d_src, d_dst, Size(), 0.5, 0.5);
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
             d_src.upload(src);
             ocl::resize(d_src, d_dst, Size(), 0.5, 0.5);
-            d_dst.download(dst);
+            d_dst.download(ocl_dst);
             GPU_FULL_OFF;
+
+            TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0);
         }
 
     }
 }
 ///////////// threshold////////////////////////
-TEST(threshold)
+PERFTEST(threshold)
 {
-    Mat src, dst;
+    Mat src, dst, ocl_dst;
     ocl::oclMat d_src, d_dst;
 
-
     for (int size = Min_Size; size <= Max_Size; size *= Multiple)
     {
         SUBTEST << size << 'x' << size << "; 8UC1; THRESH_BINARY";
@@ -480,15 +490,15 @@ TEST(threshold)
 
         GPU_ON;
         ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY);
-         ;
         GPU_OFF;
 
         GPU_FULL_ON;
         d_src.upload(src);
         ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY);
-        d_dst.download(dst);
+        d_dst.download(ocl_dst);
         GPU_FULL_OFF;
 
+        TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0);
     }
 
     for (int size = Min_Size; size <= Max_Size; size *= Multiple)
@@ -511,57 +521,18 @@ TEST(threshold)
 
         GPU_ON;
         ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC);
-         ;
         GPU_OFF;
 
         GPU_FULL_ON;
         d_src.upload(src);
         ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC);
-        d_dst.download(dst);
+        d_dst.download(ocl_dst);
         GPU_FULL_OFF;
-    }
-}
-///////////// meanShiftFiltering////////////////////////
-TEST(meanShiftFiltering)
-{
-    int sp = 10, sr = 10;
-    Mat src, dst;
-
-    ocl::oclMat d_src, d_dst;
-
-    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
-    {
-        SUBTEST << size << 'x' << size << "; 8UC3 vs 8UC4";
-
-        gen(src, size, size, CV_8UC3, Scalar::all(0), Scalar::all(256));
 
-        pyrMeanShiftFiltering(src, dst, sp, sr);
-
-        CPU_ON;
-        pyrMeanShiftFiltering(src, dst, sp, sr);
-        CPU_OFF;
-
-        gen(src, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256));
-
-        d_src.upload(src);
-
-        WARMUP_ON;
-        ocl::meanShiftFiltering(d_src, d_dst, sp, sr);
-        WARMUP_OFF;
-
-        GPU_ON;
-        ocl::meanShiftFiltering(d_src, d_dst, sp, sr);
-         ;
-        GPU_OFF;
-
-        GPU_FULL_ON;
-        d_src.upload(src);
-        ocl::meanShiftFiltering(d_src, d_dst, sp, sr);
-        d_dst.download(dst);
-        GPU_FULL_OFF;
+        TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0);
     }
 }
-///////////// meanShiftProc////////////////////////
+///////////// meanShiftFiltering////////////////////////
 COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size size, int sp, int sr, int maxIter, float eps, int *tab)
 {
 
@@ -575,9 +546,8 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size
     c1 = sptr[1];
     c2 = sptr[2];
     c3 = sptr[3];
-
     // iterate meanshift procedure
-    for (iter = 0; iter < maxIter; iter++)
+    for(iter = 0; iter < maxIter; iter++ )
     {
         int count = 0;
         int s0 = 0, s1 = 0, s2 = 0, sx = 0, sy = 0;
@@ -589,27 +559,11 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size
         int maxy = y0 + sp;
 
         //deal with the image boundary
-        if (minx < 0)
-        {
-            minx = 0;
-        }
-
-        if (miny < 0)
-        {
-            miny = 0;
-        }
-
-        if (maxx >= size.width)
-        {
-            maxx = size.width - 1;
-        }
-
-        if (maxy >= size.height)
-        {
-            maxy = size.height - 1;
-        }
-
-        if (iter == 0)
+        if(minx < 0) minx = 0;
+        if(miny < 0) miny = 0;
+        if(maxx >= size.width) maxx = size.width - 1;
+        if(maxy >= size.height) maxy = size.height - 1;
+        if(iter == 0)
         {
             pstart = sptr;
         }
@@ -617,22 +571,19 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size
         {
             pstart = pstart + revy * sstep + (revx << 2); //point to the new position
         }
-
         ptr = pstart;
         ptr = ptr + (miny - y0) * sstep + ((minx - x0) << 2); //point to the start in the row
 
-        for (int y = miny; y <= maxy; y++, ptr += sstep - ((maxx - minx + 1) << 2))
+        forint y = miny; y <= maxy; y++, ptr += sstep - ((maxx - minx + 1) << 2))
         {
             int rowCount = 0;
             int x = minx;
 #if CV_ENABLE_UNROLLED
-
-            for (; x + 4 <= maxx; x += 4, ptr += 16)
+            for( ; x + 4 <= maxx; x += 4, ptr += 16)
             {
                 int t0, t1, t2;
                 t0 = ptr[0], t1 = ptr[1], t2 = ptr[2];
-
-                if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
+                if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
                 {
                     s0 += t0;
                     s1 += t1;
@@ -640,10 +591,8 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size
                     sx += x;
                     rowCount++;
                 }
-
                 t0 = ptr[4], t1 = ptr[5], t2 = ptr[6];
-
-                if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
+                if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
                 {
                     s0 += t0;
                     s1 += t1;
@@ -651,10 +600,8 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size
                     sx += x + 1;
                     rowCount++;
                 }
-
                 t0 = ptr[8], t1 = ptr[9], t2 = ptr[10];
-
-                if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
+                if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
                 {
                     s0 += t0;
                     s1 += t1;
@@ -662,10 +609,8 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size
                     sx += x + 2;
                     rowCount++;
                 }
-
                 t0 = ptr[12], t1 = ptr[13], t2 = ptr[14];
-
-                if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
+                if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
                 {
                     s0 += t0;
                     s1 += t1;
@@ -674,14 +619,11 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size
                     rowCount++;
                 }
             }
-
 #endif
-
-            for (; x <= maxx; x++, ptr += 4)
+            for(; x <= maxx; x++, ptr += 4)
             {
                 int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2];
-
-                if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
+                if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
                 {
                     s0 += t0;
                     s1 += t1;
@@ -690,20 +632,14 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size
                     rowCount++;
                 }
             }
-
-            if (rowCount == 0)
-            {
+            if(rowCount == 0)
                 continue;
-            }
-
             count += rowCount;
             sy += y * rowCount;
         }
 
-        if (count == 0)
-        {
+        if( count == 0 )
             break;
-        }
 
         int x1 = sx / count;
         int y1 = sy / count;
@@ -712,7 +648,7 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size
         s2 = s2 / count;
 
         bool stopFlag = (x0 == x1 && y0 == y1) || (abs(x1 - x0) + abs(y1 - y0) +
-                        tab[s0 - c0 + 255] + tab[s1 - c1 + 255] + tab[s2 - c2 + 255] <= eps);
+            tab[s0 - c0 + 255] + tab[s1 - c1 + 255] + tab[s2 - c2 + 255] <= eps);
 
         //revise the pointer corresponding to the new (y0,x0)
         revx = x1 - x0;
@@ -724,10 +660,8 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size
         c1 = s1;
         c2 = s2;
 
-        if (stopFlag)
-        {
+        if( stopFlag )
             break;
-        }
     } //for iter
 
     dptr[0] = (uchar)c0;
@@ -741,19 +675,101 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size
     return coor;
 }
 
-void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi, int sp, int sr, cv::TermCriteria crit)
+static void meanShiftFiltering_(const Mat &src_roi, Mat &dst_roi, int sp, int sr, cv::TermCriteria crit)
 {
+    if( src_roi.empty() )
+        CV_Error( CV_StsBadArg, "The input image is empty" );
 
+    if( src_roi.depth() != CV_8U || src_roi.channels() != 4 )
+        CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" );
+
+    dst_roi.create(src_roi.size(), src_roi.type());
+
+    CV_Assert( (src_roi.cols == dst_roi.cols) && (src_roi.rows == dst_roi.rows) );
+    CV_Assert( !(dst_roi.step & 0x3) );
+
+    if( !(crit.type & cv::TermCriteria::MAX_ITER) )
+        crit.maxCount = 5;
+    int maxIter = std::min(std::max(crit.maxCount, 1), 100);
+    float eps;
+    if( !(crit.type & cv::TermCriteria::EPS) )
+        eps = 1.f;
+    eps = (float)std::max(crit.epsilon, 0.0);
+
+    int tab[512];
+    for(int i = 0; i < 512; i++)
+        tab[i] = (i - 255) * (i - 255);
+    uchar *sptr = src_roi.data;
+    uchar *dptr = dst_roi.data;
+    int sstep = (int)src_roi.step;
+    int dstep = (int)dst_roi.step;
+    cv::Size size = src_roi.size();
+
+    for(int i = 0; i < size.height; i++, sptr += sstep - (size.width << 2),
+        dptr += dstep - (size.width << 2))
+    {
+        for(int j = 0; j < size.width; j++, sptr += 4, dptr += 4)
+        {
+            do_meanShift(j, i, sptr, dptr, sstep, size, sp, sr, maxIter, eps, tab);
+        }
+    }
+}
+
+PERFTEST(meanShiftFiltering)
+{
+    int sp = 5, sr = 6;
+    Mat src, dst, ocl_dst;
+
+    ocl::oclMat d_src, d_dst;
+
+    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    {
+        SUBTEST << size << 'x' << size << "; 8UC3 vs 8UC4";
+
+        gen(src, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256));
+
+        cv::TermCriteria crit(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, 5, 1);
+
+        meanShiftFiltering_(src, dst, sp, sr, crit);
+
+        CPU_ON;
+        meanShiftFiltering_(src, dst, sp, sr, crit);
+        CPU_OFF;
+
+        d_src.upload(src);
+
+        WARMUP_ON;
+        ocl::meanShiftFiltering(d_src, d_dst, sp, sr, crit);
+        WARMUP_OFF;
+
+        GPU_ON;
+        ocl::meanShiftFiltering(d_src, d_dst, sp, sr);
+        GPU_OFF;
+
+        GPU_FULL_ON;
+        d_src.upload(src);
+        ocl::meanShiftFiltering(d_src, d_dst, sp, sr);
+        d_dst.download(ocl_dst);
+        GPU_FULL_OFF;
+
+        TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 0.0);
+    }
+}
+
+void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi, int sp, int sr, cv::TermCriteria crit)
+{
     if (src_roi.empty())
     {
         CV_Error(CV_StsBadArg, "The input image is empty");
     }
-
     if (src_roi.depth() != CV_8U || src_roi.channels() != 4)
     {
         CV_Error(CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported");
     }
 
+    dst_roi.create(src_roi.size(), src_roi.type());
+    dstCoor_roi.create(src_roi.size(), CV_16SC2);
+
     CV_Assert((src_roi.cols == dst_roi.cols) && (src_roi.rows == dst_roi.rows) &&
               (src_roi.cols == dstCoor_roi.cols) && (src_roi.rows == dstCoor_roi.rows));
     CV_Assert(!(dstCoor_roi.step & 0x3));
@@ -798,10 +814,11 @@ void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi, int sp,
     }
 
 }
-TEST(meanShiftProc)
+PERFTEST(meanShiftProc)
 {
-    Mat src, dst, dstCoor_roi;
-    ocl::oclMat d_src, d_dst, d_dstCoor_roi;
+    Mat src;
+    vector<Mat> dst(2), ocl_dst(2);
+    ocl::oclMat d_src, d_dst, d_dstCoor;
 
     TermCriteria crit(TermCriteria::COUNT + TermCriteria::EPS, 5, 1);
 
@@ -810,40 +827,39 @@ TEST(meanShiftProc)
         SUBTEST << size << 'x' << size << "; 8UC4 and CV_16SC2 ";
 
         gen(src, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256));
-        gen(dst, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256));
-        gen(dstCoor_roi, size, size, CV_16SC2, Scalar::all(0), Scalar::all(256));
 
-        meanShiftProc_(src, dst, dstCoor_roi, 5, 6, crit);
+        meanShiftProc_(src, dst[0], dst[1], 5, 6, crit);
 
         CPU_ON;
-        meanShiftProc_(src, dst, dstCoor_roi, 5, 6, crit);
+        meanShiftProc_(src, dst[0], dst[1], 5, 6, crit);
         CPU_OFF;
 
         d_src.upload(src);
 
         WARMUP_ON;
-        ocl::meanShiftProc(d_src, d_dst, d_dstCoor_roi, 5, 6, crit);
+        ocl::meanShiftProc(d_src, d_dst, d_dstCoor, 5, 6, crit);
         WARMUP_OFF;
 
         GPU_ON;
-        ocl::meanShiftProc(d_src, d_dst, d_dstCoor_roi, 5, 6, crit);
-         ;
+        ocl::meanShiftProc(d_src, d_dst, d_dstCoor, 5, 6, crit);
         GPU_OFF;
 
         GPU_FULL_ON;
         d_src.upload(src);
-        ocl::meanShiftProc(d_src, d_dst, d_dstCoor_roi, 5, 6, crit);
-        d_dst.download(dst);
-        d_dstCoor_roi.download(dstCoor_roi);
+        ocl::meanShiftProc(d_src, d_dst, d_dstCoor, 5, 6, crit);
+        d_dst.download(ocl_dst[0]);
+        d_dstCoor.download(ocl_dst[1]);
         GPU_FULL_OFF;
 
+        vector<double> eps(2, 0.);
+        TestSystem::instance().ExpectMatsNear(dst, ocl_dst, eps);      
     }
 }
 
 ///////////// remap////////////////////////
-TEST(remap)
+PERFTEST(remap)
 {
-    Mat src, dst, xmap, ymap;
+    Mat src, dst, xmap, ymap, ocl_dst;
     ocl::oclMat d_src, d_dst, d_xmap, d_ymap;
 
     int all_type[] = {CV_8UC1, CV_8UC4};
@@ -876,7 +892,6 @@ TEST(remap)
                 }
             }
 
-
             remap(src, dst, xmap, ymap, interpolation, borderMode);
 
             CPU_ON;
@@ -894,15 +909,63 @@ TEST(remap)
 
             GPU_ON;
             ocl::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode);
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
             d_src.upload(src);
             ocl::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode);
-            d_dst.download(dst);
+            d_dst.download(ocl_dst);
             GPU_FULL_OFF;
+
+            TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 2.0);
         }
 
     }
-}
\ No newline at end of file
+}
+///////////// CLAHE ////////////////////////
+PERFTEST(CLAHE)
+{
+    Mat src, dst, ocl_dst;
+    cv::ocl::oclMat d_src, d_dst;
+    int all_type[] = {CV_8UC1};
+    std::string type_name[] = {"CV_8UC1"};
+
+    double clipLimit = 40.0;
+
+    cv::Ptr<cv::CLAHE>      clahe   = cv::createCLAHE(clipLimit);
+    cv::Ptr<cv::ocl::CLAHE> d_clahe = cv::ocl::createCLAHE(clipLimit);
+
+    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+
+            gen(src, size, size, all_type[j], 0, 256);
+
+            CPU_ON;
+            clahe->apply(src, dst);
+            CPU_OFF;
+
+            d_src.upload(src);
+
+            WARMUP_ON;
+            d_clahe->apply(d_src, d_dst);
+            WARMUP_OFF;
+
+            ocl_dst = d_dst;
+
+            TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0);
+
+            GPU_ON;
+            d_clahe->apply(d_src, d_dst);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src.upload(src);
+            d_clahe->apply(d_src, d_dst);
+            d_dst.download(dst);
+            GPU_FULL_OFF;
+        }
+    }
+}
index 2828efe..5da15aa 100644 (file)
@@ -16,6 +16,7 @@
 //
 // @Authors
 //    Fangfang Bai, fangfang@multicorewareinc.com
+//    Jin Ma,       jin@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //     ocl::oclMat d_src(src), d_templ(templ), d_dst;
 //     ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR);
 //}
-TEST(matchTemplate)
+PERFTEST(matchTemplate)
 {
     //InitMatchTemplate();
-
-    Mat src, templ, dst;
+    Mat src, templ, dst, ocl_dst;
     int templ_size = 5;
 
-
     for (int size = Min_Size; size <= Max_Size; size *= Multiple)
     {
         int all_type[] = {CV_32FC1, CV_32FC4};
@@ -81,9 +80,7 @@ TEST(matchTemplate)
                 matchTemplate(src, templ, dst, CV_TM_CCORR);
                 CPU_OFF;
 
-                ocl::oclMat d_src(src), d_templ, d_dst;
-
-                d_templ.upload(templ);
+                ocl::oclMat d_src(src), d_templ(templ), d_dst;
 
                 WARMUP_ON;
                 ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR);
@@ -91,15 +88,16 @@ TEST(matchTemplate)
 
                 GPU_ON;
                 ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR);
-                 ;
                 GPU_OFF;
 
                 GPU_FULL_ON;
                 d_src.upload(src);
                 d_templ.upload(templ);
                 ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR);
-                d_dst.download(dst);
+                d_dst.download(ocl_dst);
                 GPU_FULL_OFF;
+
+                TestSystem::instance().ExpectedMatNear(dst, ocl_dst, templ.rows * templ.cols * 1e-1);
             }
         }
 
@@ -131,15 +129,16 @@ TEST(matchTemplate)
 
                 GPU_ON;
                 ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR_NORMED);
-                 ;
                 GPU_OFF;
 
                 GPU_FULL_ON;
                 d_src.upload(src);
                 d_templ.upload(templ);
                 ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR_NORMED);
-                d_dst.download(dst);
+                d_dst.download(ocl_dst);
                 GPU_FULL_OFF;
+
+                TestSystem::instance().ExpectedMatNear(dst, ocl_dst, templ.rows * templ.cols * 1e-1);
             }
         }
     }
index 495b2b8..b724cdb 100644 (file)
@@ -16,6 +16,7 @@
 //
 // @Authors
 //    Fangfang Bai, fangfang@multicorewareinc.com
+//    Jin Ma,       jin@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -45,9 +46,9 @@
 #include "precomp.hpp"
 
 ///////////// ConvertTo////////////////////////
-TEST(ConvertTo)
+PERFTEST(ConvertTo)
 {
-    Mat src, dst;
+    Mat src, dst, ocl_dst;
     ocl::oclMat d_src, d_dst;
 
     int all_type[] = {CV_8UC1, CV_8UC4};
@@ -78,22 +79,23 @@ TEST(ConvertTo)
 
             GPU_ON;
             d_src.convertTo(d_dst, CV_32FC1);
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
             d_src.upload(src);
             d_src.convertTo(d_dst, CV_32FC1);
-            d_dst.download(dst);
+            d_dst.download(ocl_dst);
             GPU_FULL_OFF;
+
+            TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 0.0);
         }
 
     }
 }
 ///////////// copyTo////////////////////////
-TEST(copyTo)
+PERFTEST(copyTo)
 {
-    Mat src, dst;
+    Mat src, dst, ocl_dst;
     ocl::oclMat d_src, d_dst;
 
     int all_type[] = {CV_8UC1, CV_8UC4};
@@ -124,24 +126,25 @@ TEST(copyTo)
 
             GPU_ON;
             d_src.copyTo(d_dst);
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
             d_src.upload(src);
             d_src.copyTo(d_dst);
-            d_dst.download(dst);
+            d_dst.download(ocl_dst);
             GPU_FULL_OFF;
+
+            TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 0.0);
         }
 
     }
 }
 ///////////// setTo////////////////////////
-TEST(setTo)
+PERFTEST(setTo)
 {
-    Mat src, dst;
+    Mat src, ocl_src;
     Scalar val(1, 2, 3, 4);
-    ocl::oclMat d_src, d_dst;
+    ocl::oclMat d_src;
 
     int all_type[] = {CV_8UC1, CV_8UC4};
     std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
@@ -166,9 +169,11 @@ TEST(setTo)
             d_src.setTo(val);
             WARMUP_OFF;
 
-            GPU_ON;
+            d_src.download(ocl_src);
+            TestSystem::instance().ExpectedMatNear(src, ocl_src, 1.0);
+
+            GPU_ON;;
             d_src.setTo(val);
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
index 8b7118a..1d986c8 100644 (file)
@@ -16,6 +16,7 @@
 //
 // @Authors
 //    Fangfang Bai, fangfang@multicorewareinc.com
+//    Jin Ma,       jin@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 #include "precomp.hpp"
 
 ///////////// norm////////////////////////
-TEST(norm)
+PERFTEST(norm)
 {
-    Mat src, buf;
-    ocl::oclMat d_src, d_buf;
-
+    Mat src1, src2, ocl_src1;
+    ocl::oclMat d_src1, d_src2;
 
     for (int size = Min_Size; size <= Max_Size; size *= Multiple)
     {
         SUBTEST << size << 'x' << size << "; CV_8UC1; NORM_INF";
 
-        gen(src, size, size, CV_8UC1, Scalar::all(0), Scalar::all(1));
-        gen(buf, size, size, CV_8UC1, Scalar::all(0), Scalar::all(1));
+        gen(src1, size, size, CV_8UC1, Scalar::all(0), Scalar::all(1));
+        gen(src2, size, size, CV_8UC1, Scalar::all(0), Scalar::all(1));
 
-        norm(src, NORM_INF);
+        norm(src1, src2, NORM_INF);
 
         CPU_ON;
-        norm(src, NORM_INF);
+        norm(src1, src2, NORM_INF);
         CPU_OFF;
 
-        d_src.upload(src);
-        d_buf.upload(buf);
+        d_src1.upload(src1);
+        d_src2.upload(src2);
 
         WARMUP_ON;
-        ocl::norm(d_src, d_buf, NORM_INF);
+        ocl::norm(d_src1, d_src2, NORM_INF);
         WARMUP_OFF;
 
+        d_src1.download(ocl_src1);
+        TestSystem::instance().ExpectedMatNear(src1, ocl_src1, .5);                        
+
         GPU_ON;
-        ocl::norm(d_src, d_buf, NORM_INF);
-         ;
+        ocl::norm(d_src1, d_src2, NORM_INF);
         GPU_OFF;
 
         GPU_FULL_ON;
-        d_src.upload(src);
-        ocl::norm(d_src, d_buf, NORM_INF);
+        d_src1.upload(src1);
+        d_src2.upload(src2);
+        ocl::norm(d_src1, d_src2, NORM_INF);
         GPU_FULL_OFF;
     }
 }
\ No newline at end of file
similarity index 61%
rename from modules/ocl/perf/perf_pyrlk.cpp
rename to modules/ocl/perf/perf_opticalflow.cpp
index f7fc22b..97283b2 100644 (file)
@@ -16,6 +16,7 @@
 //
 // @Authors
 //    Fangfang Bai, fangfang@multicorewareinc.com
+//    Jin Ma,       jin@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 #include "precomp.hpp"
 
 ///////////// PyrLKOpticalFlow ////////////////////////
-TEST(PyrLKOpticalFlow)
+PERFTEST(PyrLKOpticalFlow)
 {
-    std::string images1[] = {"rubberwhale1.png", "aloeL.jpg"};
-    std::string images2[] = {"rubberwhale2.png", "aloeR.jpg"};
+    std::string images1[] = {"rubberwhale1.png", "basketball1.png"};
+    std::string images2[] = {"rubberwhale2.png", "basketball2.png"};
 
     for (size_t i = 0; i < sizeof(images1) / sizeof(std::string); i++)
     {
@@ -81,8 +82,8 @@ TEST(PyrLKOpticalFlow)
                 SUBTEST << frame0.cols << "x" << frame0.rows << "; color; " << points << " points";
             else
                 SUBTEST << frame0.cols << "x" << frame0.rows << "; gray; " << points << " points";
-            Mat nextPts_cpu;
-            Mat status_cpu;
+            Mat ocl_nextPts;
+            Mat ocl_status;
 
             vector<Point2f> pts;
             goodFeaturesToTrack(i == 0 ? gray_frame : frame0, pts, points, 0.01, 0.0);
@@ -117,7 +118,6 @@ TEST(PyrLKOpticalFlow)
 
             GPU_ON;
             d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err);
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
@@ -127,17 +127,102 @@ TEST(PyrLKOpticalFlow)
             d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err);
 
             if (!d_nextPts.empty())
-            {
-                d_nextPts.download(nextPts_cpu);
-            }
+                d_nextPts.download(ocl_nextPts);
 
             if (!d_status.empty())
+                d_status.download(ocl_status);
+            GPU_FULL_OFF;
+
+            size_t mismatch = 0;
+            for (int i = 0; i < (int)nextPts.size(); ++i)
             {
-                d_status.download(status_cpu);
+                if(status[i] != ocl_status.at<unsigned char>(0, i)){
+                    mismatch++;
+                    continue;
+                }
+                if(status[i]){
+                    Point2f gpu_rst = ocl_nextPts.at<Point2f>(0, i);
+                    Point2f cpu_rst = nextPts[i];
+                    if(fabs(gpu_rst.x - cpu_rst.x) >= 1. || fabs(gpu_rst.y - cpu_rst.y) >= 1.)
+                        mismatch++;
+                }
             }
-
-            GPU_FULL_OFF;
+            double ratio = (double)mismatch / (double)nextPts.size();
+            if(ratio < .02)
+                TestSystem::instance().setAccurate(1, ratio);
+            else
+                TestSystem::instance().setAccurate(0, ratio);
         }
 
     }
 }
+
+
+PERFTEST(tvl1flow)
+{
+    cv::Mat frame0 = imread("rubberwhale1.png", cv::IMREAD_GRAYSCALE);
+    assert(!frame0.empty());
+
+    cv::Mat frame1 = imread("rubberwhale2.png", cv::IMREAD_GRAYSCALE);
+    assert(!frame1.empty());
+
+    cv::ocl::OpticalFlowDual_TVL1_OCL d_alg;
+    cv::ocl::oclMat d_flowx(frame0.size(), CV_32FC1);
+    cv::ocl::oclMat d_flowy(frame1.size(), CV_32FC1);
+
+    cv::Ptr<cv::DenseOpticalFlow> alg = cv::createOptFlow_DualTVL1();
+    cv::Mat flow;
+
+
+    SUBTEST << frame0.cols << 'x' << frame0.rows << "; rubberwhale1.png; "<<frame1.cols<<'x'<<frame1.rows<<"; rubberwhale2.png";
+
+    alg->calc(frame0, frame1, flow);
+
+    CPU_ON;
+    alg->calc(frame0, frame1, flow);
+    CPU_OFF;
+
+    cv::Mat gold[2];
+    cv::split(flow, gold);
+
+    cv::ocl::oclMat d0(frame0.size(), CV_32FC1);
+    d0.upload(frame0);
+    cv::ocl::oclMat d1(frame1.size(), CV_32FC1);
+    d1.upload(frame1);
+
+    WARMUP_ON;
+    d_alg(d0, d1, d_flowx, d_flowy);
+    WARMUP_OFF;
+/*
+    double diff1 = 0.0, diff2 = 0.0;
+    if(ExceptedMatSimilar(gold[0], cv::Mat(d_flowx), 3e-3, diff1) == 1
+        &&ExceptedMatSimilar(gold[1], cv::Mat(d_flowy), 3e-3, diff2) == 1)
+        TestSystem::instance().setAccurate(1);
+    else
+        TestSystem::instance().setAccurate(0);
+
+    TestSystem::instance().setDiff(diff1);
+    TestSystem::instance().setDiff(diff2);
+*/
+
+
+    GPU_ON;
+    d_alg(d0, d1, d_flowx, d_flowy);
+    d_alg.collectGarbage();
+    GPU_OFF;
+    
+
+    cv::Mat flowx, flowy;
+
+    GPU_FULL_ON;
+    d0.upload(frame0);
+    d1.upload(frame1);
+    d_alg(d0, d1, d_flowx, d_flowy);
+    d_alg.collectGarbage();
+    d_flowx.download(flowx);
+    d_flowy.download(flowy);
+    GPU_FULL_OFF;
+
+    TestSystem::instance().ExceptedMatSimilar(gold[0], flowx, 3e-3);
+    TestSystem::instance().ExceptedMatSimilar(gold[1], flowy, 3e-3);
+}
\ No newline at end of file
similarity index 70%
rename from modules/ocl/perf/perf_pyrdown.cpp
rename to modules/ocl/perf/perf_pyramid.cpp
index 1d1d2de..3b96251 100644 (file)
@@ -16,6 +16,7 @@
 //
 // @Authors
 //    Fangfang Bai, fangfang@multicorewareinc.com
+//    Jin Ma,       jin@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -45,9 +46,9 @@
 #include "precomp.hpp"
 
 ///////////// pyrDown //////////////////////
-TEST(pyrDown)
+PERFTEST(pyrDown)
 {
-    Mat src, dst;
+    Mat src, dst, ocl_dst;
     int all_type[] = {CV_8UC1, CV_8UC4};
     std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
 
@@ -74,14 +75,58 @@ TEST(pyrDown)
 
             GPU_ON;
             ocl::pyrDown(d_src, d_dst);
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
             d_src.upload(src);
             ocl::pyrDown(d_src, d_dst);
-            d_dst.download(dst);
+            d_dst.download(ocl_dst);
             GPU_FULL_OFF;
+
+            TestSystem::instance().ExpectedMatNear(dst, ocl_dst, dst.depth() == CV_32F ? 1e-4f : 1.0f);
+        }
+    }
+}
+
+///////////// pyrUp ////////////////////////
+PERFTEST(pyrUp)
+{
+    Mat src, dst, ocl_dst;
+    int all_type[] = {CV_8UC1, CV_8UC4};
+    std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
+
+    for (int size = 500; size <= 2000; size *= 2)
+    {
+        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+        {
+            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+
+            gen(src, size, size, all_type[j], 0, 256);
+
+            pyrUp(src, dst);
+
+            CPU_ON;
+            pyrUp(src, dst);
+            CPU_OFF;
+
+            ocl::oclMat d_src(src);
+            ocl::oclMat d_dst;
+
+            WARMUP_ON;
+            ocl::pyrUp(d_src, d_dst);
+            WARMUP_OFF;
+
+            GPU_ON;
+            ocl::pyrUp(d_src, d_dst);
+            GPU_OFF;
+
+            GPU_FULL_ON;
+            d_src.upload(src);
+            ocl::pyrUp(d_src, d_dst);
+            d_dst.download(ocl_dst);
+            GPU_FULL_OFF;
+
+            TestSystem::instance().ExpectedMatNear(dst, ocl_dst, (src.depth() == CV_32F ? 1e-4f : 1.0));
         }
     }
 }
\ No newline at end of file
diff --git a/modules/ocl/perf/perf_pyrup.cpp b/modules/ocl/perf/perf_pyrup.cpp
deleted file mode 100644 (file)
index d3b3003..0000000
+++ /dev/null
@@ -1,87 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Fangfang Bai, fangfang@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other oclMaterials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#include "precomp.hpp"
-
-///////////// pyrUp ////////////////////////
-TEST(pyrUp)
-{
-    Mat src, dst;
-    int all_type[] = {CV_8UC1, CV_8UC4};
-    std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
-
-    for (int size = 500; size <= 2000; size *= 2)
-    {
-        for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
-        {
-            SUBTEST << size << 'x' << size << "; " << type_name[j] ;
-
-            gen(src, size, size, all_type[j], 0, 256);
-
-            pyrUp(src, dst);
-
-            CPU_ON;
-            pyrUp(src, dst);
-            CPU_OFF;
-
-            ocl::oclMat d_src(src);
-            ocl::oclMat d_dst;
-
-            WARMUP_ON;
-            ocl::pyrUp(d_src, d_dst);
-            WARMUP_OFF;
-
-            GPU_ON;
-            ocl::pyrUp(d_src, d_dst);
-             ;
-            GPU_OFF;
-
-            GPU_FULL_ON;
-            d_src.upload(src);
-            ocl::pyrUp(d_src, d_dst);
-            d_dst.download(dst);
-            GPU_FULL_OFF;
-        }
-    }
-}
\ No newline at end of file
index 48ff1ff..0fafd14 100644 (file)
@@ -16,6 +16,7 @@
 //
 // @Authors
 //    Fangfang Bai, fangfang@multicorewareinc.com
+//    Jin Ma,       jin@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -45,9 +46,9 @@
 #include "precomp.hpp"
 
 ///////////// Merge////////////////////////
-TEST(Merge)
+PERFTEST(Merge)
 {
-    Mat dst;
+    Mat dst, ocl_dst;
     ocl::oclMat d_dst;
 
     int channels = 4;
@@ -86,26 +87,25 @@ TEST(Merge)
 
             GPU_ON;
             ocl::merge(d_src, d_dst);
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
-
             for (int i = 0; i < channels; ++i)
             {
-                d_src[i] = ocl::oclMat(size1, CV_8U, cv::Scalar::all(i));
+                d_src[i] = ocl::oclMat(size1, all_type[j], cv::Scalar::all(i));
             }
-
             ocl::merge(d_src, d_dst);
-            d_dst.download(dst);
+            d_dst.download(ocl_dst);
             GPU_FULL_OFF;
+
+            TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 0.0);
         }
 
     }
 }
 
 ///////////// Split////////////////////////
-TEST(Split)
+PERFTEST(Split)
 {
     //int channels = 4;
     int all_type[] = {CV_8UC1, CV_32FC1};
@@ -120,7 +120,7 @@ TEST(Split)
 
             Mat src(size1, CV_MAKE_TYPE(all_type[j], 4), cv::Scalar(1, 2, 3, 4));
 
-            std::vector<cv::Mat> dst;
+            std::vector<cv::Mat> dst, ocl_dst(4);
 
             split(src, dst);
 
@@ -133,17 +133,21 @@ TEST(Split)
 
             WARMUP_ON;
             ocl::split(d_src, d_dst);
-            WARMUP_OFF;
+            WARMUP_OFF;         
 
             GPU_ON;
             ocl::split(d_src, d_dst);
-             ;
             GPU_OFF;
 
             GPU_FULL_ON;
             d_src.upload(src);
             ocl::split(d_src, d_dst);
+            for(size_t i = 0; i < dst.size(); i++)
+                d_dst[i].download(ocl_dst[i]);
             GPU_FULL_OFF;
+
+            vector<double> eps(4, 0.);
+            TestSystem::instance().ExpectMatsNear(dst, ocl_dst, eps);
         }
 
     }
index e35a071..71a13a1 100644 (file)
 //M*/
 
 #include "precomp.hpp"
+#if GTEST_OS_WINDOWS
+#define NOMINMAX
+# include <windows.h>
+#endif
 
 // This program test most of the functions in ocl module and generate data metrix of x-factor in .csv files
 // All images needed in this test are in samples/gpu folder.
@@ -166,7 +170,7 @@ void TestSystem::finishCurrentSubtest()
         deviation = std::sqrt(sum / gpu_times_.size());
     }
 
-    printMetrics(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup);
+    printMetrics(is_accurate_, cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup);
     writeMetrics(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup, gpu_min, gpu_max, deviation);
 
     num_subtests_called_++;
@@ -184,10 +188,19 @@ double TestSystem::meanTime(const vector<int64> &samples)
 void TestSystem::printHeading()
 {
     cout << endl;
-    cout << setiosflags(ios_base::left);
-    cout << TAB << setw(10) << "CPU, ms" << setw(10) << "GPU, ms"
-         << setw(14) << "SPEEDUP" << setw(14) << "GPUTOTAL, ms" << setw(14) << "TOTALSPEEDUP"
-         << "DESCRIPTION\n";
+    cout<< setiosflags(ios_base::left);
+
+#if 0
+    cout<<TAB<<setw(7)<< "Accu." << setw(10) << "CPU (ms)" << setw(10) << "GPU, ms"
+        << setw(8) << "Speedup"<< setw(10)<<"GPUTotal" << setw(10) << "Total"
+        << "Description\n";
+    cout<<TAB<<setw(7)<<""<<setw(10)<<""<<setw(10)<<""<<setw(8)<<""<<setw(10)<<"(ms)"<<setw(10)<<"Speedup\n";
+#endif
+
+    cout<<TAB<< setw(10) << "CPU (ms)" << setw(10) << "GPU, ms"
+        << setw(8) << "Speedup"<< setw(10)<<"GPUTotal" << setw(10) << "Total"
+        << "Description\n";
+    cout<<TAB<<setw(10)<<""<<setw(10)<<""<<setw(8)<<""<<setw(10)<<"(ms)"<<setw(10)<<"Speedup\n";
 
     cout << resetiosflags(ios_base::left);
 }
@@ -198,9 +211,14 @@ void TestSystem::writeHeading()
     {
         recordname_ += "_OCL.csv";
         record_ = fopen(recordname_.c_str(), "w");
+        if(record_ == NULL)
+        {
+            cout<<".csv file open failed.\n";
+            exit(0);
+        }
     }
 
-    fprintf(record_, "NAME,DESCRIPTION,CPU (ms),GPU (ms),SPEEDUP,GPUTOTAL (ms),TOTALSPEEDUP,GPU Min (ms),GPU Max (ms), Standard deviation (ms)\n");
+    fprintf(record_, "NAME,DESCRIPTION,ACCURACY,DIFFERENCE,CPU (ms),GPU (ms),SPEEDUP,GPUTOTAL (ms),TOTALSPEEDUP,GPU Min (ms),GPU Max (ms), Standard deviation (ms)\n");
 
     fflush(record_);
 }
@@ -209,54 +227,82 @@ void TestSystem::printSummary()
 {
     cout << setiosflags(ios_base::fixed);
     cout << "\naverage GPU speedup: x"
-         << setprecision(3) << speedup_total_ / std::max(1, num_subtests_called_)
-         << endl;
+        << setprecision(3) << speedup_total_ / std::max(1, num_subtests_called_)
+        << endl;
     cout << "\nGPU exceeded: "
-         << setprecision(3) << speedup_faster_count_
-         << "\nGPU passed: "
-         << setprecision(3) << speedup_equal_count_
-         << "\nGPU failed: "
-         << setprecision(3) << speedup_slower_count_
-         << endl;
+        << setprecision(3) << speedup_faster_count_
+        << "\nGPU passed: "
+        << setprecision(3) << speedup_equal_count_
+        << "\nGPU failed: "
+        << setprecision(3) << speedup_slower_count_
+        << endl;
     cout << "\nGPU exceeded rate: "
-         << setprecision(3) << (float)speedup_faster_count_ / std::max(1, num_subtests_called_) * 100
-         << "%"
-         << "\nGPU passed rate: "
-         << setprecision(3) << (float)speedup_equal_count_ / std::max(1, num_subtests_called_) * 100
-         << "%"
-         << "\nGPU failed rate: "
-         << setprecision(3) << (float)speedup_slower_count_ / std::max(1, num_subtests_called_) * 100
-         << "%"
-         << endl;
+        << setprecision(3) << (float)speedup_faster_count_ / std::max(1, num_subtests_called_) * 100
+        << "%"
+        << "\nGPU passed rate: "
+        << setprecision(3) << (float)speedup_equal_count_ / std::max(1, num_subtests_called_) * 100
+        << "%"
+        << "\nGPU failed rate: "
+        << setprecision(3) << (float)speedup_slower_count_ / std::max(1, num_subtests_called_) * 100
+        << "%"
+        << endl;
     cout << "\naverage GPUTOTAL speedup: x"
-         << setprecision(3) << speedup_full_total_ / std::max(1, num_subtests_called_)
-         << endl;
+        << setprecision(3) << speedup_full_total_ / std::max(1, num_subtests_called_)
+        << endl;
     cout << "\nGPUTOTAL exceeded: "
-         << setprecision(3) << speedup_full_faster_count_
-         << "\nGPUTOTAL passed: "
-         << setprecision(3) << speedup_full_equal_count_
-         << "\nGPUTOTAL failed: "
-         << setprecision(3) << speedup_full_slower_count_
-         << endl;
+        << setprecision(3) << speedup_full_faster_count_
+        << "\nGPUTOTAL passed: "
+        << setprecision(3) << speedup_full_equal_count_
+        << "\nGPUTOTAL failed: "
+        << setprecision(3) << speedup_full_slower_count_
+        << endl;
     cout << "\nGPUTOTAL exceeded rate: "
-         << setprecision(3) << (float)speedup_full_faster_count_ / std::max(1, num_subtests_called_) * 100
-         << "%"
-         << "\nGPUTOTAL passed rate: "
-         << setprecision(3) << (float)speedup_full_equal_count_ / std::max(1, num_subtests_called_) * 100
-         << "%"
-         << "\nGPUTOTAL failed rate: "
-         << setprecision(3) << (float)speedup_full_slower_count_ / std::max(1, num_subtests_called_) * 100
-         << "%"
-         << endl;
+        << setprecision(3) << (float)speedup_full_faster_count_ / std::max(1, num_subtests_called_) * 100
+        << "%"
+        << "\nGPUTOTAL passed rate: "
+        << setprecision(3) << (float)speedup_full_equal_count_ / std::max(1, num_subtests_called_) * 100
+        << "%"
+        << "\nGPUTOTAL failed rate: "
+        << setprecision(3) << (float)speedup_full_slower_count_ / std::max(1, num_subtests_called_) * 100
+        << "%"
+        << endl;
     cout << resetiosflags(ios_base::fixed);
 }
 
 
-void TestSystem::printMetrics(double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup)
+enum GTestColor {
+    COLOR_DEFAULT,
+    COLOR_RED,
+    COLOR_GREEN,
+    COLOR_YELLOW
+};
+#if GTEST_OS_WINDOWS&&!GTEST_OS_WINDOWS_MOBILE
+// Returns the character attribute for the given color.
+WORD GetColorAttribute(GTestColor color) {
+    switch (color) {
+    case COLOR_RED:    return FOREGROUND_RED;
+    case COLOR_GREEN:  return FOREGROUND_GREEN;
+    case COLOR_YELLOW: return FOREGROUND_RED | FOREGROUND_GREEN;
+    default:           return 0;
+    }
+}
+#else
+static const char* GetAnsiColorCode(GTestColor color) {
+    switch (color) {
+    case COLOR_RED:     return "1";
+    case COLOR_GREEN:   return "2";
+    case COLOR_YELLOW:  return "3";
+    default:            return NULL;
+    };
+}
+#endif
+
+static void printMetricsUti(double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup, std::stringstream& stream, std::stringstream& cur_subtest_description)
 {
-    cout << TAB << setiosflags(ios_base::left);
-    stringstream stream;
+    //cout <<TAB<< setw(7) << stream.str(); 
+    cout <<TAB; 
 
+    stream.str("");
     stream << cpu_time;
     cout << setw(10) << stream.str();
 
@@ -266,20 +312,85 @@ void TestSystem::printMetrics(double cpu_time, double gpu_time, double gpu_full_
 
     stream.str("");
     stream << "x" << setprecision(3) << speedup;
-    cout << setw(14) << stream.str();
+    cout << setw(8) << stream.str();
 
     stream.str("");
     stream << gpu_full_time;
-    cout << setw(14) << stream.str();
+    cout << setw(10) << stream.str();
 
     stream.str("");
     stream << "x" << setprecision(3) << fullspeedup;
-    cout << setw(14) << stream.str();
+    cout << setw(10) << stream.str();
 
-    cout << cur_subtest_description_.str();
+    cout << cur_subtest_description.str();
     cout << resetiosflags(ios_base::left) << endl;
 }
 
+void TestSystem::printMetrics(int is_accurate, double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup)
+{
+    cout << setiosflags(ios_base::left);
+    stringstream stream;
+
+#if 0
+    if(is_accurate == 1)
+            stream << "Pass";
+    else if(is_accurate_ == 0)
+            stream << "Fail";
+    else if(is_accurate == -1)
+        stream << " ";
+    else
+    {
+        std::cout<<"is_accurate errer: "<<is_accurate<<"\n";
+        exit(-1);
+    }
+#endif
+
+    std::stringstream &cur_subtest_description = getCurSubtestDescription();
+   
+#if GTEST_OS_WINDOWS&&!GTEST_OS_WINDOWS_MOBILE
+    
+    WORD color;
+    const HANDLE stdout_handle = GetStdHandle(STD_OUTPUT_HANDLE);
+    // Gets the current text color.
+    CONSOLE_SCREEN_BUFFER_INFO buffer_info;
+    GetConsoleScreenBufferInfo(stdout_handle, &buffer_info);
+    const WORD old_color_attrs = buffer_info.wAttributes;
+    // We need to flush the stream buffers into the console before each
+    // SetConsoleTextAttribute call lest it affect the text that is already
+    // printed but has not yet reached the console.
+    fflush(stdout);
+
+    if(is_accurate == 1||is_accurate == -1)
+    {
+        color = old_color_attrs;
+        printMetricsUti(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup, stream, cur_subtest_description);
+
+    }else
+    {
+        color = GetColorAttribute(COLOR_RED);
+        SetConsoleTextAttribute(stdout_handle,
+            color| FOREGROUND_INTENSITY);
+
+        printMetricsUti(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup, stream, cur_subtest_description);
+        fflush(stdout);
+        // Restores the text color.
+        SetConsoleTextAttribute(stdout_handle, old_color_attrs);
+    }
+#else
+    GTestColor color = COLOR_RED;
+    if(is_accurate == 1|| is_accurate == -1)
+    {
+        printMetricsUti(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup, stream, cur_subtest_description);
+
+    }else
+    {
+        printf("\033[0;3%sm", GetAnsiColorCode(color));
+        printMetricsUti(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup, stream, cur_subtest_description);
+        printf("\033[m");  // Resets the terminal to default.
+    }
+#endif
+}
+
 void TestSystem::writeMetrics(double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup, double gpu_min, double gpu_max, double std_dev)
 {
     if (!record_)
@@ -288,10 +399,27 @@ void TestSystem::writeMetrics(double cpu_time, double gpu_time, double gpu_full_
         record_ = fopen(recordname_.c_str(), "w");
     }
 
-    fprintf(record_, "%s,%s,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f\n", itname_changed_ ? itname_.c_str() : "",
-            cur_subtest_description_.str().c_str(),
-            cpu_time, gpu_time, speedup, gpu_full_time, fullspeedup,
-            gpu_min, gpu_max, std_dev);
+    string _is_accurate_;
+
+    if(is_accurate_ == 1)
+        _is_accurate_ = "Pass";
+    else if(is_accurate_ == 0)
+        _is_accurate_ = "Fail";
+    else if(is_accurate_ == -1)
+        _is_accurate_ = " ";
+    else
+    {
+        std::cout<<"is_accurate errer: "<<is_accurate_<<"\n";
+        exit(-1);
+    }
+
+    fprintf(record_, "%s,%s,%s,%.2f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f\n", 
+        itname_changed_ ? itname_.c_str() : "",
+        cur_subtest_description_.str().c_str(),
+        _is_accurate_.c_str(), 
+        accurate_diff_,
+        cpu_time, gpu_time, speedup, gpu_full_time, fullspeedup,
+        gpu_min, gpu_max, std_dev);
 
     if (itname_changed_)
     {
@@ -310,31 +438,31 @@ void TestSystem::writeSummary()
     }
 
     fprintf(record_, "\nAverage GPU speedup: %.3f\n"
-            "exceeded: %d (%.3f%%)\n"
-            "passed: %d (%.3f%%)\n"
-            "failed: %d (%.3f%%)\n"
-            "\nAverage GPUTOTAL speedup: %.3f\n"
-            "exceeded: %d (%.3f%%)\n"
-            "passed: %d (%.3f%%)\n"
-            "failed: %d (%.3f%%)\n",
-            speedup_total_ / std::max(1, num_subtests_called_),
-            speedup_faster_count_, (float)speedup_faster_count_ / std::max(1, num_subtests_called_) * 100,
-            speedup_equal_count_, (float)speedup_equal_count_ / std::max(1, num_subtests_called_) * 100,
-            speedup_slower_count_, (float)speedup_slower_count_ / std::max(1, num_subtests_called_) * 100,
-            speedup_full_total_ / std::max(1, num_subtests_called_),
-            speedup_full_faster_count_, (float)speedup_full_faster_count_ / std::max(1, num_subtests_called_) * 100,
-            speedup_full_equal_count_, (float)speedup_full_equal_count_ / std::max(1, num_subtests_called_) * 100,
-            speedup_full_slower_count_, (float)speedup_full_slower_count_ / std::max(1, num_subtests_called_) * 100
-           );
+        "exceeded: %d (%.3f%%)\n"
+        "passed: %d (%.3f%%)\n"
+        "failed: %d (%.3f%%)\n"
+        "\nAverage GPUTOTAL speedup: %.3f\n"
+        "exceeded: %d (%.3f%%)\n"
+        "passed: %d (%.3f%%)\n"
+        "failed: %d (%.3f%%)\n",
+        speedup_total_ / std::max(1, num_subtests_called_),
+        speedup_faster_count_, (float)speedup_faster_count_ / std::max(1, num_subtests_called_) * 100,
+        speedup_equal_count_, (float)speedup_equal_count_ / std::max(1, num_subtests_called_) * 100,
+        speedup_slower_count_, (float)speedup_slower_count_ / std::max(1, num_subtests_called_) * 100,
+        speedup_full_total_ / std::max(1, num_subtests_called_),
+        speedup_full_faster_count_, (float)speedup_full_faster_count_ / std::max(1, num_subtests_called_) * 100,
+        speedup_full_equal_count_, (float)speedup_full_equal_count_ / std::max(1, num_subtests_called_) * 100,
+        speedup_full_slower_count_, (float)speedup_full_slower_count_ / std::max(1, num_subtests_called_) * 100
+        );
     fflush(record_);
 }
 
 void TestSystem::printError(const std::string &msg)
 {
-       if(msg != "CL_INVALID_BUFFER_SIZE")
-       {
-               cout << TAB << "[error: " << msg << "] " << cur_subtest_description_.str() << endl;
-       }
+    if(msg != "CL_INVALID_BUFFER_SIZE")
+    {
+        cout << TAB << "[error: " << msg << "] " << cur_subtest_description_.str() << endl;
+    }
 }
 
 void gen(Mat &mat, int rows, int cols, int type, Scalar low, Scalar high)
@@ -344,7 +472,6 @@ void gen(Mat &mat, int rows, int cols, int type, Scalar low, Scalar high)
     rng.fill(mat, RNG::UNIFORM, low, high);
 }
 
-
 string abspath(const string &relpath)
 {
     return TestSystem::instance().workingDir() + relpath;
@@ -352,11 +479,30 @@ string abspath(const string &relpath)
 
 
 int CV_CDECL cvErrorCallback(int /*status*/, const char * /*func_name*/,
-                             const char *err_msg, const char * /*file_name*/,
-                             int /*line*/, void * /*userdata*/)
+    const char *err_msg, const char * /*file_name*/,
+    int /*line*/, void * /*userdata*/)
 {
     TestSystem::instance().printError(err_msg);
     return 0;
 }
 
+double checkNorm(const Mat &m)
+{
+    return norm(m, NORM_INF);
+}
+
+double checkNorm(const Mat &m1, const Mat &m2)
+{
+    return norm(m1, m2, NORM_INF);
+}
+
+double checkSimilarity(const Mat &m1, const Mat &m2)
+{
+    Mat diff;
+    matchTemplate(m1, m2, diff, CV_TM_CCORR_NORMED);
+    return std::abs(diff.at<float>(0, 0) - 1.f);
+}
+
+
+
 
index c2cf123..97e3d7e 100644 (file)
 #include "opencv2/core/core.hpp"
 #include "opencv2/imgproc/imgproc.hpp"
 #include "opencv2/highgui/highgui.hpp"
+#include "opencv2/calib3d/calib3d.hpp"
 #include "opencv2/video/video.hpp"
 #include "opencv2/objdetect/objdetect.hpp"
 #include "opencv2/features2d/features2d.hpp"
 #include "opencv2/ocl/ocl.hpp"
+#include "opencv2/ts/ts.hpp"
+#include "opencv2/ts/ts_perf.hpp"
+#include "opencv2/ts/ts_gtest.h"
+
 
 #define Min_Size 1000
 #define Max_Size 4000
@@ -64,6 +69,8 @@ using namespace std;
 using namespace cv;
 
 void gen(Mat &mat, int rows, int cols, int type, Scalar low, Scalar high);
+void gen(Mat &mat, int rows, int cols, int type, int low, int high, int n);
+
 string abspath(const string &relpath);
 int CV_CDECL cvErrorCallback(int, const char *, const char *, const char *, int, void *);
 typedef struct
@@ -76,6 +83,50 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep,
 void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi,
                     int sp, int sr, cv::TermCriteria crit);
 
+
+template<class T1, class T2>
+int ExpectedEQ(T1 expected, T2 actual)
+{
+    if(expected == actual)
+        return 1;
+
+    return 0;
+}
+
+template<class T1>
+int EeceptDoubleEQ(T1 expected, T1 actual)
+{
+    testing::internal::Double lhs(expected);
+    testing::internal::Double rhs(actual);
+
+    if (lhs.AlmostEquals(rhs)) 
+    {
+        return 1;
+    }
+
+    return 0;
+}
+
+template<class T>
+int AssertEQ(T expected, T actual)
+{
+    if(expected == actual)
+    {
+        return 1;
+    }
+    return 0;
+}
+
+int ExceptDoubleNear(double val1, double val2, double abs_error);
+bool match_rect(cv::Rect r1, cv::Rect r2, int threshold);
+
+double checkNorm(const cv::Mat &m);
+double checkNorm(const cv::Mat &m1, const cv::Mat &m2);
+double checkSimilarity(const cv::Mat &m1, const cv::Mat &m2);
+
+int ExpectedMatNear(cv::Mat dst, cv::Mat cpu_dst, double eps);
+int ExceptedMatSimilar(cv::Mat dst, cv::Mat cpu_dst, double eps);
+
 class Runnable
 {
 public:
@@ -171,6 +222,16 @@ public:
         return cur_iter_idx_ >= cpu_num_iters_;
     }
 
+    int get_cur_iter_idx()
+    {
+        return cur_iter_idx_;
+    }
+
+    int get_cpu_num_iters()
+    {
+        return cpu_num_iters_;
+    }
+
     bool warmupStop()
     {
         return cur_warmup_idx_++ >= gpu_warmup_iters_;
@@ -252,6 +313,53 @@ public:
         itname_changed_ = true;
     }
 
+    void setAccurate(int accurate, double diff)
+    {
+        is_accurate_ = accurate;
+        accurate_diff_ = diff;
+    }
+
+    void ExpectMatsNear(vector<Mat>& dst, vector<Mat>& cpu_dst, vector<double>& eps)
+    {
+        assert(dst.size() == cpu_dst.size());
+        assert(cpu_dst.size() == eps.size());
+        is_accurate_ = 1;
+        for(size_t i=0; i<dst.size(); i++)
+        {
+            double cur_diff = checkNorm(dst[i], cpu_dst[i]);
+            accurate_diff_ = max(accurate_diff_, cur_diff);
+            if(cur_diff > eps[i])
+                is_accurate_ = 0;
+        }
+    }
+
+    void ExpectedMatNear(cv::Mat& dst, cv::Mat& cpu_dst, double eps)
+    {
+        assert(dst.type() == cpu_dst.type());
+        assert(dst.size() == cpu_dst.size());
+        accurate_diff_ = checkNorm(dst, cpu_dst);
+        if(accurate_diff_ <= eps)
+            is_accurate_ = 1;
+        else
+            is_accurate_ = 0;
+    }
+
+    void ExceptedMatSimilar(cv::Mat& dst, cv::Mat& cpu_dst, double eps)
+    {
+        assert(dst.type() == cpu_dst.type());
+        assert(dst.size() == cpu_dst.size());
+        accurate_diff_ = checkSimilarity(cpu_dst, dst);
+        if(accurate_diff_ <= eps)
+            is_accurate_ = 1;
+        else
+            is_accurate_ = 0;    
+    }
+
+    std::stringstream &getCurSubtestDescription()
+    {
+        return cur_subtest_description_;
+    }
+
 private:
     TestSystem():
         cur_subtest_is_empty_(true), cpu_elapsed_(0),
@@ -261,7 +369,8 @@ private:
         speedup_full_faster_count_(0), speedup_full_slower_count_(0), speedup_full_equal_count_(0), is_list_mode_(false),
         num_iters_(10), cpu_num_iters_(2),
         gpu_warmup_iters_(1), cur_iter_idx_(0), cur_warmup_idx_(0),
-        record_(0), recordname_("performance"), itname_changed_(true)
+        record_(0), recordname_("performance"), itname_changed_(true), 
+        is_accurate_(-1), accurate_diff_(0.)
     {
         cpu_times_.reserve(num_iters_);
         gpu_times_.reserve(num_iters_);
@@ -277,16 +386,19 @@ private:
         cur_subtest_description_.str("");
         cur_subtest_is_empty_ = true;
         cur_iter_idx_ = 0;
+        cur_warmup_idx_ = 0;
         cpu_times_.clear();
         gpu_times_.clear();
         gpu_full_times_.clear();
+        is_accurate_ = -1;
+        accurate_diff_ = 0.;
     }
 
     double meanTime(const std::vector<int64> &samples);
 
     void printHeading();
     void printSummary();
-    void printMetrics(double cpu_time, double gpu_time = 0.0f, double gpu_full_time = 0.0f, double speedup = 0.0f, double fullspeedup = 0.0f);
+    void printMetrics(int is_accurate, double cpu_time, double gpu_time = 0.0f, double gpu_full_time = 0.0f, double speedup = 0.0f, double fullspeedup = 0.0f);
 
     void writeHeading();
     void writeSummary();
@@ -340,6 +452,9 @@ private:
     std::string recordname_;
     std::string itname_;
     bool itname_changed_;
+
+    int is_accurate_;
+    double accurate_diff_;
 };
 
 
@@ -353,7 +468,7 @@ struct name##_init: Runnable { \
        void name##_init::run()
 
 
-#define TEST(name) \
+#define PERFTEST(name) \
 struct name##_test: Runnable { \
        name##_test(): Runnable(#name) { \
        TestSystem::instance().addTest(this); \
@@ -375,7 +490,7 @@ struct name##_test: Runnable { \
        while (!TestSystem::instance().stop()) { \
        TestSystem::instance().gpuOn()
 #define GPU_OFF \
-    ocl::finish(); \
+       ocl::finish();\
        TestSystem::instance().gpuOff(); \
        } TestSystem::instance().gpuComplete()
 
@@ -389,5 +504,5 @@ struct name##_test: Runnable { \
 #define WARMUP_ON \
        while (!TestSystem::instance().warmupStop()) {
 #define WARMUP_OFF \
-        ocl::finish(); \
+       ocl::finish();\
        } TestSystem::instance().warmupComplete()
index d679a93..49a56ce 100644 (file)
@@ -22,6 +22,7 @@
 //    Jiang Liyuan, jlyuan001.good@163.com
 //    Rock Li, Rock.Li@amd.com
 //    Zailong Wu, bullet@yeah.net
+//    Peng Xiao, pengxiao@outlook.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -286,6 +287,7 @@ void cv::ocl::multiply(const oclMat &src1, const oclMat &src2, oclMat &dst, doub
     else
         arithmetic_run<float>(src1, src2, dst, "arithm_mul", &arithm_mul, (void *)(&scalar));
 }
+
 void cv::ocl::divide(const oclMat &src1, const oclMat &src2, oclMat &dst, double scalar)
 {
 
@@ -411,11 +413,11 @@ static void arithmetic_scalar_run(const oclMat &src, oclMat &dst, string kernelN
     args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
     args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
 
+    float f_scalar = (float)scalar;
     if(src.clCxt->supportsFeature(Context::CL_DOUBLE))
         args.push_back( make_pair( sizeof(cl_double), (void *)&scalar ));
     else
     {
-        float f_scalar = (float)scalar;
         args.push_back( make_pair( sizeof(cl_float), (void *)&f_scalar));
     }
 
@@ -468,6 +470,11 @@ void cv::ocl::subtract(const Scalar &src2, const oclMat &src1, oclMat &dst, cons
     const char **kernelString = mask.data ? &arithm_add_scalar_mask : &arithm_add_scalar;
     arithmetic_scalar( src1, src2, dst, mask, kernelName, kernelString, -1);
 }
+void cv::ocl::multiply(double scalar, const oclMat &src, oclMat &dst)
+{
+    string kernelName = "arithm_muls";
+    arithmetic_scalar_run( src, dst, kernelName, &arithm_mul, scalar);
+}
 void cv::ocl::divide(double scalar, const oclMat &src,  oclMat &dst)
 {
     if(!src.clCxt->supportsFeature(Context::CL_DOUBLE))
@@ -775,46 +782,56 @@ static void arithmetic_minMax_mask_run(const oclMat &src, const oclMat &mask, cl
     }
 }
 
-template <typename T> void arithmetic_minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask)
+template <typename T> void arithmetic_minMax(const oclMat &src, double *minVal, double *maxVal,
+                                             const oclMat &mask, oclMat &buf)
 {
     size_t groupnum = src.clCxt->computeUnits();
     CV_Assert(groupnum != 0);
     groupnum = groupnum * 2;
     int vlen = 8;
     int dbsize = groupnum * 2 * vlen * sizeof(T) ;
-    Context *clCxt = src.clCxt;
-    cl_mem dstBuffer = openCLCreateBuffer(clCxt, CL_MEM_WRITE_ONLY, dbsize);
-    *minVal = std::numeric_limits<double>::max() , *maxVal = -std::numeric_limits<double>::max();
+
+    ensureSizeIsEnough(1, dbsize, CV_8UC1, buf);
+
+    cl_mem buf_data = reinterpret_cast<cl_mem>(buf.data);
+
     if (mask.empty())
     {
-        arithmetic_minMax_run(src, mask, dstBuffer, vlen, groupnum, "arithm_op_minMax");
+        arithmetic_minMax_run(src, mask, buf_data, vlen, groupnum, "arithm_op_minMax");
     }
     else
     {
-        arithmetic_minMax_mask_run(src, mask, dstBuffer, vlen, groupnum, "arithm_op_minMax_mask");
+        arithmetic_minMax_mask_run(src, mask, buf_data, vlen, groupnum, "arithm_op_minMax_mask");
     }
-    T *p = new T[groupnum * vlen * 2];
-    memset(p, 0, dbsize);
-    openCLReadBuffer(clCxt, dstBuffer, (void *)p, dbsize);
-    if(minVal != NULL){
+
+    Mat matbuf = Mat(buf);
+    T *p = matbuf.ptr<T>();
+    if(minVal != NULL)
+    {
+        *minVal = std::numeric_limits<double>::max();
         for(int i = 0; i < vlen * (int)groupnum; i++)
         {
             *minVal = *minVal < p[i] ? *minVal : p[i];
         }
     }
-    if(maxVal != NULL){
+    if(maxVal != NULL)
+    {
+        *maxVal = -std::numeric_limits<double>::max();
         for(int i = vlen * (int)groupnum; i < 2 * vlen * (int)groupnum; i++)
         {
             *maxVal = *maxVal > p[i] ? *maxVal : p[i];
         }
     }
-    delete[] p;
-    openCLFree(dstBuffer);
 }
 
-typedef void (*minMaxFunc)(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask);
+typedef void (*minMaxFunc)(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat &buf);
 void cv::ocl::minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask)
 {
+    oclMat buf;
+    minMax_buf(src, minVal, maxVal, mask, buf);
+}
+void cv::ocl::minMax_buf(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat &buf)
+{
     CV_Assert(src.oclchannels() == 1);
     if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
     {
@@ -833,7 +850,7 @@ void cv::ocl::minMax(const oclMat &src, double *minVal, double *maxVal, const oc
     };
     minMaxFunc func;
     func = functab[src.depth()];
-    func(src, minVal, maxVal, mask);
+    func(src, minVal, maxVal, mask, buf);
 }
 
 //////////////////////////////////////////////////////////////////////////////
@@ -1680,10 +1697,11 @@ void bitwise_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string ker
     args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
     args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
 
+    T scalar;
     if(_scalar != NULL)
     {
         double scalar1 = *((double *)_scalar);
-        scalar = (T)scalar1;
+        scalar = (T)scalar1;
         args.push_back( make_pair( sizeof(T), (void *)&scalar ));
     }
 
@@ -2300,9 +2318,9 @@ static void arithmetic_pow_run(const oclMat &src1, double p, oclMat &dst, string
     args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows ));
     args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
     args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
+    float pf = p;
     if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE))
     {
-        float pf = p;
         args.push_back( make_pair( sizeof(cl_float), (void *)&pf ));
     }
     else
index c12fa73..74da6dd 100644 (file)
@@ -245,11 +245,12 @@ static void matchDispatcher(const oclMat &query, const oclMat &train, const oclM
 {
     const oclMat zeroMask;
     const oclMat &tempMask = mask.data ? mask : zeroMask;
+    bool is_cpu = queryDeviceInfo<IS_CPU_DEVICE, bool>();
     if (query.cols <= 64)
     {
         matchUnrolledCached<16, 64>(query, train, tempMask, trainIdx, distance, distType);
     }
-    else if (query.cols <= 128)
+    else if (query.cols <= 128 && !is_cpu)
     {
         matchUnrolledCached<16, 128>(query, train, tempMask, trainIdx,  distance, distType);
     }
@@ -264,11 +265,12 @@ static void matchDispatcher(const oclMat &query, const oclMat *trains, int n, co
 {
     const oclMat zeroMask;
     const oclMat &tempMask = mask.data ? mask : zeroMask;
+    bool is_cpu = queryDeviceInfo<IS_CPU_DEVICE, bool>();
     if (query.cols <= 64)
     {
         matchUnrolledCached<16, 64>(query, trains, n, tempMask, trainIdx, imgIdx, distance, distType);
     }
-    else if (query.cols <= 128)
+    else if (query.cols <= 128 && !is_cpu)
     {
         matchUnrolledCached<16, 128>(query, trains, n, tempMask, trainIdx, imgIdx, distance, distType);
     }
@@ -284,11 +286,12 @@ static void matchDispatcher(const oclMat &query, const oclMat &train, float maxD
 {
     const oclMat zeroMask;
     const oclMat &tempMask = mask.data ? mask : zeroMask;
+    bool is_cpu = queryDeviceInfo<IS_CPU_DEVICE, bool>();
     if (query.cols <= 64)
     {
         matchUnrolledCached<16, 64>(query, train, maxDistance, tempMask, trainIdx, distance, nMatches, distType);
     }
-    else if (query.cols <= 128)
+    else if (query.cols <= 128 && !is_cpu)
     {
         matchUnrolledCached<16, 128>(query, train, maxDistance, tempMask, trainIdx, distance, nMatches, distType);
     }
@@ -466,11 +469,12 @@ static void calcDistanceDispatcher(const oclMat &query, const oclMat &train, con
 static void match2Dispatcher(const oclMat &query, const oclMat &train, const oclMat &mask,
                       const oclMat &trainIdx, const oclMat &distance, int distType)
 {
+    bool is_cpu = queryDeviceInfo<IS_CPU_DEVICE, bool>();
     if (query.cols <= 64)
     {
         knn_matchUnrolledCached<16, 64>(query, train, mask, trainIdx, distance, distType);
     }
-    else if (query.cols <= 128)
+    else if (query.cols <= 128 && !is_cpu)
     {
         knn_matchUnrolledCached<16, 128>(query, train, mask, trainIdx, distance, distType);
     }
index cc7e60e..82bb01b 100644 (file)
@@ -87,7 +87,7 @@ void cv::ocl::CannyBuf::create(const Size &image_size, int apperture_size)
             filterDY = createDerivFilter_GPU(CV_8U, CV_32S, 0, 1, apperture_size, BORDER_REPLICATE);
         }
     }
-    ensureSizeIsEnough(image_size.height + 2, image_size.width + 2, CV_32FC1, edgeBuf);
+    ensureSizeIsEnough(2 * (image_size.height + 2), image_size.width + 2, CV_32FC1, edgeBuf);
 
     ensureSizeIsEnough(1, image_size.width * image_size.height, CV_16UC2, trackBuf1);
     ensureSizeIsEnough(1, image_size.width * image_size.height, CV_16UC2, trackBuf2);
@@ -141,13 +141,16 @@ namespace
     void CannyCaller(CannyBuf &buf, oclMat &dst, float low_thresh, float high_thresh)
     {
         using namespace ::cv::ocl::canny;
-        calcMap_gpu(buf.dx, buf.dy, buf.edgeBuf, buf.edgeBuf, dst.rows, dst.cols, low_thresh, high_thresh);
+        oclMat magBuf = buf.edgeBuf(Rect(0, 0, buf.edgeBuf.cols, buf.edgeBuf.rows / 2));
+        oclMat mapBuf = buf.edgeBuf(Rect(0, buf.edgeBuf.rows / 2, buf.edgeBuf.cols, buf.edgeBuf.rows / 2));
 
-        edgesHysteresisLocal_gpu(buf.edgeBuf, buf.trackBuf1, buf.counter, dst.rows, dst.cols);
+        calcMap_gpu(buf.dx, buf.dy, magBuf, mapBuf, dst.rows, dst.cols, low_thresh, high_thresh);
 
-        edgesHysteresisGlobal_gpu(buf.edgeBuf, buf.trackBuf1, buf.trackBuf2, buf.counter, dst.rows, dst.cols);
+        edgesHysteresisLocal_gpu(mapBuf, buf.trackBuf1, buf.counter, dst.rows, dst.cols);
 
-        getEdges_gpu(buf.edgeBuf, dst, dst.rows, dst.cols);
+        edgesHysteresisGlobal_gpu(mapBuf, buf.trackBuf1, buf.trackBuf2, buf.counter, dst.rows, dst.cols);
+
+        getEdges_gpu(mapBuf, dst, dst.rows, dst.cols);
     }
 }
 
@@ -172,18 +175,20 @@ void cv::ocl::Canny(const oclMat &src, CannyBuf &buf, oclMat &dst, double low_th
     buf.create(src.size(), apperture_size);
     buf.edgeBuf.setTo(Scalar::all(0));
 
+    oclMat magBuf = buf.edgeBuf(Rect(0, 0, buf.edgeBuf.cols, buf.edgeBuf.rows / 2));
+
     if (apperture_size == 3)
     {
         calcSobelRowPass_gpu(src, buf.dx_buf, buf.dy_buf, src.rows, src.cols);
 
-        calcMagnitude_gpu(buf.dx_buf, buf.dy_buf, buf.dx, buf.dy, buf.edgeBuf, src.rows, src.cols, L2gradient);
+        calcMagnitude_gpu(buf.dx_buf, buf.dy_buf, buf.dx, buf.dy, magBuf, src.rows, src.cols, L2gradient);
     }
     else
     {
         buf.filterDX->apply(src, buf.dx);
         buf.filterDY->apply(src, buf.dy);
 
-        calcMagnitude_gpu(buf.dx, buf.dy, buf.edgeBuf, src.rows, src.cols, L2gradient);
+        calcMagnitude_gpu(buf.dx, buf.dy, magBuf, src.rows, src.cols, L2gradient);
     }
     CannyCaller(buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));
 }
@@ -209,7 +214,10 @@ void cv::ocl::Canny(const oclMat &dx, const oclMat &dy, CannyBuf &buf, oclMat &d
     buf.dy = dy;
     buf.create(dx.size(), -1);
     buf.edgeBuf.setTo(Scalar::all(0));
-    calcMagnitude_gpu(buf.dx, buf.dy, buf.edgeBuf, dx.rows, dx.cols, L2gradient);
+
+    oclMat magBuf = buf.edgeBuf(Rect(0, 0, buf.edgeBuf.cols, buf.edgeBuf.rows / 2));
+
+    calcMagnitude_gpu(buf.dx, buf.dy, magBuf, dx.rows, dx.cols, L2gradient);
 
     CannyCaller(buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));
 }
@@ -234,7 +242,7 @@ void canny::calcSobelRowPass_gpu(const oclMat &src, oclMat &dx_buf, oclMat &dy_b
 
     size_t globalThreads[3] = {cols, rows, 1};
     size_t localThreads[3]  = {16, 16, 1};
-    openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
+    openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
 }
 
 void canny::calcMagnitude_gpu(const oclMat &dx_buf, const oclMat &dy_buf, oclMat &dx, oclMat &dy, oclMat &mag, int rows, int cols, bool L2Grad)
@@ -264,12 +272,8 @@ void canny::calcMagnitude_gpu(const oclMat &dx_buf, const oclMat &dy_buf, oclMat
     size_t globalThreads[3] = {cols, rows, 1};
     size_t localThreads[3]  = {16, 16, 1};
 
-    char build_options [15] = "";
-    if(L2Grad)
-    {
-        strcat(build_options, "-D L2GRAD");
-    }
-    openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
+    const char * build_options = L2Grad ? "-D L2GRAD":"";
+    openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
 }
 void canny::calcMagnitude_gpu(const oclMat &dx, const oclMat &dy, oclMat &mag, int rows, int cols, bool L2Grad)
 {
@@ -292,12 +296,8 @@ void canny::calcMagnitude_gpu(const oclMat &dx, const oclMat &dy, oclMat &mag, i
     size_t globalThreads[3] = {cols, rows, 1};
     size_t localThreads[3]  = {16, 16, 1};
 
-    char build_options [15] = "";
-    if(L2Grad)
-    {
-        strcat(build_options, "-D L2GRAD");
-    }
-    openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
+    const char * build_options = L2Grad ? "-D L2GRAD":"";
+    openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
 }
 
 void canny::calcMap_gpu(oclMat &dx, oclMat &dy, oclMat &mag, oclMat &map, int rows, int cols, float low_thresh, float high_thresh)
@@ -328,7 +328,7 @@ void canny::calcMap_gpu(oclMat &dx, oclMat &dy, oclMat &mag, oclMat &map, int ro
     string kernelName = "calcMap";
     size_t localThreads[3]  = {16, 16, 1};
 
-    openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
+    openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
 }
 
 void canny::edgesHysteresisLocal_gpu(oclMat &map, oclMat &st1, void *counter, int rows, int cols)
@@ -348,7 +348,7 @@ void canny::edgesHysteresisLocal_gpu(oclMat &map, oclMat &st1, void *counter, in
     size_t globalThreads[3] = {cols, rows, 1};
     size_t localThreads[3]  = {16, 16, 1};
 
-    openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
+    openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
 }
 
 void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, void *counter, int rows, int cols)
@@ -378,7 +378,7 @@ void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, voi
         args.push_back( make_pair( sizeof(cl_int), (void *)&map.step));
         args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));
 
-        openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, DISABLE);
+        openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
         openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)getoclCommandQueue(), (cl_mem)counter, 1, 0, sizeof(int), &count, 0, NULL, NULL));
         std::swap(st1, st2);
     }
@@ -403,5 +403,5 @@ void canny::getEdges_gpu(oclMat &map, oclMat &dst, int rows, int cols)
     size_t globalThreads[3] = {cols, rows, 1};
     size_t localThreads[3]  = {16, 16, 1};
 
-    openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
+    openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
 }
index cc07209..f35a26e 100644 (file)
@@ -356,8 +356,7 @@ static void GPUDilate(const oclMat &src, oclMat &dst, oclMat &mat_kernel,
     char compile_option[128];
     sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D DILATE %s %s", 
         anchor.x, anchor.y, (int)localThreads[0], (int)localThreads[1], 
-        rectKernel?"-D RECTKERNEL":"",
-        s);
+        s, rectKernel?"-D RECTKERNEL":"");
     vector< pair<size_t, const void *> > args;
     args.push_back(make_pair(sizeof(cl_mem), (void *)&src.data));
     args.push_back(make_pair(sizeof(cl_mem), (void *)&dst.data));
@@ -646,7 +645,11 @@ static void GPUFilter2D(const oclMat &src, oclMat &dst, oclMat &mat_kernel,
     args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholecols));
     args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholerows));
 
-    openCLExecuteKernel(clCxt, &filtering_laplacian, kernelName, globalThreads, localThreads, args, cn, depth);
+    const int buffer_size = 100;
+    char opt_buffer [buffer_size] = "";
+    sprintf(opt_buffer, "-DANCHOR=%d -DANX=%d -DANY=%d", ksize.width, anchor.x, anchor.y);
+
+    openCLExecuteKernel(clCxt, &filtering_laplacian, kernelName, globalThreads, localThreads, args, cn, depth, opt_buffer);
 }
 Ptr<BaseFilter_GPU> cv::ocl::getLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Size &ksize,
         Point anchor, int borderType)
@@ -657,7 +660,7 @@ Ptr<BaseFilter_GPU> cv::ocl::getLinearFilter_GPU(int srcType, int dstType, const
 
     oclMat gpu_krnl;
     int nDivisor;
-    normalizeKernel(kernel, gpu_krnl, CV_32S, &nDivisor, true);
+    normalizeKernel(kernel, gpu_krnl, CV_32S, &nDivisor, false);
     normalizeAnchor(anchor, ksize);
 
     return Ptr<BaseFilter_GPU>(new LinearFilter_GPU(ksize, anchor, gpu_krnl, GPUFilter2D_callers[CV_MAT_CN(srcType)],
@@ -1173,7 +1176,7 @@ void linearRowFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_kernel
     args.push_back(make_pair(sizeof(cl_int), (void *)&ridusy));
     args.push_back(make_pair(sizeof(cl_mem), (void *)&mat_kernel.data));
 
-    openCLExecuteKernel2(clCxt, &filter_sep_row, kernelName, globalThreads, localThreads, args, channels, src.depth(), compile_option, CLFLUSH);
+    openCLExecuteKernel(clCxt, &filter_sep_row, kernelName, globalThreads, localThreads, args, channels, src.depth(), compile_option);
 }
 
 Ptr<BaseRowFilter_GPU> cv::ocl::getLinearRowFilter_GPU(int srcType, int /*bufType*/, const Mat &rowKernel, int anchor, int bordertype)
diff --git a/modules/ocl/src/gfft.cpp b/modules/ocl/src/gfft.cpp
new file mode 100644 (file)
index 0000000..7fd5e3a
--- /dev/null
@@ -0,0 +1,352 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Peng Xiao, pengxiao@outlook.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+#include <iomanip>
+#include "precomp.hpp"
+
+using namespace cv;
+using namespace cv::ocl;
+
+static bool use_cpu_sorter = true;
+
+namespace cv
+{
+    namespace ocl
+    {
+        ///////////////////////////OpenCL kernel strings///////////////////////////
+        extern const char *imgproc_gfft;
+    }
+}
+
+namespace
+{
+enum SortMethod
+{
+    CPU_STL,
+    BITONIC,
+    SELECTION
+};
+
+const int GROUP_SIZE = 256;
+
+template<SortMethod method>
+struct Sorter
+{
+    //typedef EigType;
+};
+
+//TODO(pengx): optimize GPU sorter's performance thus CPU sorter is removed.
+template<>
+struct Sorter<CPU_STL>
+{
+    typedef oclMat EigType;
+    static cv::Mutex cs;
+    static Mat mat_eig;
+
+    //prototype
+    static int clfloat2Gt(cl_float2 pt1, cl_float2 pt2)
+    {
+        float v1 = mat_eig.at<float>(cvRound(pt1.s[1]), cvRound(pt1.s[0]));
+        float v2 = mat_eig.at<float>(cvRound(pt2.s[1]), cvRound(pt2.s[0]));
+        return v1 > v2;
+    }
+    static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count)
+    {
+        cv::AutoLock lock(cs);
+        //temporarily use STL's sort function
+        Mat mat_corners = corners;
+        mat_eig = eig_tex;
+        std::sort(mat_corners.begin<cl_float2>(), mat_corners.begin<cl_float2>() + count, clfloat2Gt);
+        corners = mat_corners;
+    }
+};
+cv::Mutex Sorter<CPU_STL>::cs;
+cv::Mat   Sorter<CPU_STL>::mat_eig;
+
+template<>
+struct Sorter<BITONIC>
+{
+    typedef TextureCL EigType;
+
+    static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count)
+    {
+        Context * cxt = Context::getContext();
+        size_t globalThreads[3] = {count / 2, 1, 1};
+        size_t localThreads[3]  = {GROUP_SIZE, 1, 1};
+
+        // 2^numStages should be equal to count or the output is invalid
+        int numStages = 0;
+        for(int i = count; i > 1; i >>= 1)
+        {
+            ++numStages;
+        }
+        const int argc = 5;
+        std::vector< std::pair<size_t, const void *> > args(argc);
+        std::string kernelname = "sortCorners_bitonicSort";
+        args[0] = std::make_pair(sizeof(cl_mem), (void *)&eig_tex);
+        args[1] = std::make_pair(sizeof(cl_mem), (void *)&corners.data);
+        args[2] = std::make_pair(sizeof(cl_int), (void *)&count);
+        for(int stage = 0; stage < numStages; ++stage)
+        {
+            args[3] = std::make_pair(sizeof(cl_int), (void *)&stage);
+            for(int passOfStage = 0; passOfStage < stage + 1; ++passOfStage)
+            {
+                args[4] = std::make_pair(sizeof(cl_int), (void *)&passOfStage);
+                openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1);
+            }
+        }
+    }
+};
+
+template<>
+struct Sorter<SELECTION>
+{
+    typedef TextureCL EigType;
+
+    static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count)
+    {
+        Context * cxt = Context::getContext();
+        
+        size_t globalThreads[3] = {count, 1, 1};
+        size_t localThreads[3]  = {GROUP_SIZE, 1, 1};
+
+        std::vector< std::pair<size_t, const void *> > args;
+        //local
+        std::string kernelname = "sortCorners_selectionSortLocal";
+        int lds_size = GROUP_SIZE * sizeof(cl_float2);
+        args.push_back( std::make_pair( sizeof(cl_mem), (void*)&eig_tex) );
+        args.push_back( std::make_pair( sizeof(cl_mem), (void*)&corners.data) );
+        args.push_back( std::make_pair( sizeof(cl_int), (void*)&count) );
+        args.push_back( std::make_pair( lds_size,       (void*)NULL) );
+
+        openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1);
+
+        //final
+        kernelname = "sortCorners_selectionSortFinal";
+        args.pop_back();
+        openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1);
+    }
+};
+
+int findCorners_caller(
+    const TextureCL& eig, 
+    const float threshold,
+    const oclMat& mask,
+    oclMat& corners,
+    const int max_count)
+{
+    std::vector<int> k;
+    Context * cxt = Context::getContext();
+
+    std::vector< std::pair<size_t, const void*> > args;
+    std::string kernelname = "findCorners";
+
+    const int mask_strip = mask.step / mask.elemSize1();
+
+    oclMat g_counter(1, 1, CV_32SC1);
+    g_counter.setTo(0);
+
+    args.push_back(make_pair( sizeof(cl_mem),   (void*)&eig  ));
+    args.push_back(make_pair( sizeof(cl_mem),   (void*)&mask.data ));
+    args.push_back(make_pair( sizeof(cl_mem),   (void*)&corners.data ));
+    args.push_back(make_pair( sizeof(cl_int),   (void*)&mask_strip));
+    args.push_back(make_pair( sizeof(cl_float), (void*)&threshold ));
+    args.push_back(make_pair( sizeof(cl_int), (void*)&eig.rows ));
+    args.push_back(make_pair( sizeof(cl_int), (void*)&eig.cols ));
+    args.push_back(make_pair( sizeof(cl_int), (void*)&max_count ));
+    args.push_back(make_pair( sizeof(cl_mem), (void*)&g_counter.data ));
+
+    size_t globalThreads[3] = {eig.cols, eig.rows, 1};
+    size_t localThreads[3]  = {16, 16, 1};
+
+    const char * opt = mask.empty() ? "" : "-D WITH_MASK";
+    openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1, opt);
+    return std::min(Mat(g_counter).at<int>(0), max_count);
+}
+}//unnamed namespace
+
+void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image, oclMat& corners, const oclMat& mask)
+{
+    CV_Assert(qualityLevel > 0 && minDistance >= 0 && maxCorners >= 0);
+    CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size()));
+
+    CV_DbgAssert(support_image2d());
+
+    ensureSizeIsEnough(image.size(), CV_32F, eig_);
+
+    if (useHarrisDetector)
+        cornerMinEigenVal_dxdy(image, eig_, Dx_, Dy_, blockSize, 3, harrisK);
+    else
+        cornerMinEigenVal_dxdy(image, eig_, Dx_, Dy_, blockSize, 3);
+
+    double maxVal = 0;
+    minMax_buf(eig_, 0, &maxVal, oclMat(), minMaxbuf_);
+
+    ensureSizeIsEnough(1, std::max(1000, static_cast<int>(image.size().area() * 0.05)), CV_32FC2, tmpCorners_);
+
+    Ptr<TextureCL> eig_tex = bindTexturePtr(eig_);
+    int total = findCorners_caller(
+        *eig_tex,
+        static_cast<float>(maxVal * qualityLevel),
+        mask,
+        tmpCorners_,
+        tmpCorners_.cols);
+
+    if (total == 0)
+    {
+        corners.release();
+        return;
+    }
+    if(use_cpu_sorter)
+    {
+        Sorter<CPU_STL>::sortCorners_caller(eig_, tmpCorners_, total);
+    }
+    else
+    {
+        //if total is power of 2
+        if(((total - 1) & (total)) == 0)
+        {
+            Sorter<BITONIC>::sortCorners_caller(*eig_tex, tmpCorners_, total);
+        }
+        else
+        {
+            Sorter<SELECTION>::sortCorners_caller(*eig_tex, tmpCorners_, total);
+        }
+    }
+    
+    if (minDistance < 1)
+    {
+        Rect roi_range(0, 0, maxCorners > 0 ? std::min(maxCorners, total) : total, 1);
+        tmpCorners_(roi_range).copyTo(corners);
+    }
+    else
+    {
+        vector<Point2f> tmp(total);
+        downloadPoints(tmpCorners_, tmp);
+
+        vector<Point2f> tmp2;
+        tmp2.reserve(total);
+
+        const int cell_size = cvRound(minDistance);
+        const int grid_width = (image.cols + cell_size - 1) / cell_size;
+        const int grid_height = (image.rows + cell_size - 1) / cell_size;
+
+        std::vector< std::vector<Point2f> > grid(grid_width * grid_height);
+
+        for (int i = 0; i < total; ++i)
+        {
+            Point2f p = tmp[i];
+
+            bool good = true;
+
+            int x_cell = static_cast<int>(p.x / cell_size);
+            int y_cell = static_cast<int>(p.y / cell_size);
+
+            int x1 = x_cell - 1;
+            int y1 = y_cell - 1;
+            int x2 = x_cell + 1;
+            int y2 = y_cell + 1;
+
+            // boundary check
+            x1 = std::max(0, x1);
+            y1 = std::max(0, y1);
+            x2 = std::min(grid_width - 1, x2);
+            y2 = std::min(grid_height - 1, y2);
+
+            for (int yy = y1; yy <= y2; yy++)
+            {
+                for (int xx = x1; xx <= x2; xx++)
+                {
+                    vector<Point2f>& m = grid[yy * grid_width + xx];
+
+                    if (!m.empty())
+                    {
+                        for(size_t j = 0; j < m.size(); j++)
+                        {
+                            float dx = p.x - m[j].x;
+                            float dy = p.y - m[j].y;
+
+                            if (dx * dx + dy * dy < minDistance * minDistance)
+                            {
+                                good = false;
+                                goto break_out;
+                            }
+                        }
+                    }
+                }
+            }
+
+            break_out:
+
+            if(good)
+            {
+                grid[y_cell * grid_width + x_cell].push_back(p);
+
+                tmp2.push_back(p);
+
+                if (maxCorners > 0 && tmp2.size() == static_cast<size_t>(maxCorners))
+                    break;
+            }
+        }
+
+        corners.upload(Mat(1, static_cast<int>(tmp2.size()), CV_32FC2, &tmp2[0]));
+    }
+}
+void cv::ocl::GoodFeaturesToTrackDetector_OCL::downloadPoints(const oclMat &points, vector<Point2f> &points_v)
+{
+    CV_DbgAssert(points.type() == CV_32FC2);
+    points_v.resize(points.cols);
+    openCLSafeCall(clEnqueueReadBuffer(
+        *reinterpret_cast<cl_command_queue*>(getoclCommandQueue()), 
+        reinterpret_cast<cl_mem>(points.data), 
+        CL_TRUE,                                    
+        0, 
+        points.cols * sizeof(Point2f), 
+        &points_v[0], 
+        0, 
+        NULL, 
+        NULL));
+}
+
+
index 5afe542..565270c 100644 (file)
@@ -137,47 +137,22 @@ struct CvHidHaarClassifierCascade
 };
 typedef struct
 {
-    //int rows;
-    //int ystep;
     int width_height;
-    //int height;
     int grpnumperline_totalgrp;
-    //int totalgrp;
     int imgoff;
     float factor;
 } detect_piramid_info;
-
-#if defined WIN32 && !defined __MINGW__ && !defined __MINGW32__
+#ifdef WIN32
 #define _ALIGNED_ON(_ALIGNMENT) __declspec(align(_ALIGNMENT))
-typedef _ALIGNED_ON(128) struct  GpuHidHaarFeature
-{
-    _ALIGNED_ON(32) struct
-    {
-        _ALIGNED_ON(4)  int    p0 ;
-        _ALIGNED_ON(4)  int    p1 ;
-        _ALIGNED_ON(4)  int    p2 ;
-        _ALIGNED_ON(4)  int    p3 ;
-        _ALIGNED_ON(4)  float weight  ;
-    }
-    /*_ALIGNED_ON(32)*/ rect[CV_HAAR_FEATURE_MAX] ;
-}
-GpuHidHaarFeature;
-
 
 typedef _ALIGNED_ON(128) struct  GpuHidHaarTreeNode
 {
     _ALIGNED_ON(64) int p[CV_HAAR_FEATURE_MAX][4];
-    //_ALIGNED_ON(16) int p1[CV_HAAR_FEATURE_MAX] ;
-    //_ALIGNED_ON(16) int p2[CV_HAAR_FEATURE_MAX] ;
-    //_ALIGNED_ON(16) int p3[CV_HAAR_FEATURE_MAX] ;
-    /*_ALIGNED_ON(16)*/
     float weight[CV_HAAR_FEATURE_MAX] ;
-    /*_ALIGNED_ON(4)*/
     float threshold ;
-    _ALIGNED_ON(8) float alpha[2] ;
+    _ALIGNED_ON(16) float alpha[3] ;
     _ALIGNED_ON(4) int left ;
     _ALIGNED_ON(4) int right ;
-    // GpuHidHaarFeature feature __attribute__((aligned (128)));
 }
 GpuHidHaarTreeNode;
 
@@ -185,7 +160,6 @@ GpuHidHaarTreeNode;
 typedef  _ALIGNED_ON(32) struct  GpuHidHaarClassifier
 {
     _ALIGNED_ON(4) int count;
-    //CvHaarFeature* orig_feature;
     _ALIGNED_ON(8) GpuHidHaarTreeNode *node ;
     _ALIGNED_ON(8) float *alpha ;
 }
@@ -220,32 +194,16 @@ typedef _ALIGNED_ON(64) struct  GpuHidHaarClassifierCascade
     _ALIGNED_ON(4) int p2 ;
     _ALIGNED_ON(4) int p3 ;
     _ALIGNED_ON(4) float inv_window_area ;
-    // GpuHidHaarStageClassifier* stage_classifier __attribute__((aligned (8)));
 } GpuHidHaarClassifierCascade;
 #else
 #define _ALIGNED_ON(_ALIGNMENT) __attribute__((aligned(_ALIGNMENT) ))
 
-typedef struct _ALIGNED_ON(128) GpuHidHaarFeature
-{
-    struct _ALIGNED_ON(32)
-{
-    int    p0 _ALIGNED_ON(4);
-    int    p1 _ALIGNED_ON(4);
-    int    p2 _ALIGNED_ON(4);
-    int    p3 _ALIGNED_ON(4);
-    float weight  _ALIGNED_ON(4);
-}
-rect[CV_HAAR_FEATURE_MAX] _ALIGNED_ON(32);
-}
-GpuHidHaarFeature;
-
-
 typedef struct _ALIGNED_ON(128) GpuHidHaarTreeNode
 {
     int p[CV_HAAR_FEATURE_MAX][4] _ALIGNED_ON(64);
     float weight[CV_HAAR_FEATURE_MAX];// _ALIGNED_ON(16);
     float threshold;// _ALIGNED_ON(4);
-    float alpha[2] _ALIGNED_ON(8);
+    float alpha[3] _ALIGNED_ON(16);
     int left _ALIGNED_ON(4);
     int right _ALIGNED_ON(4);
 }
@@ -288,7 +246,6 @@ typedef struct _ALIGNED_ON(64) GpuHidHaarClassifierCascade
     int p2 _ALIGNED_ON(4);
     int p3 _ALIGNED_ON(4);
     float inv_window_area _ALIGNED_ON(4);
-    // GpuHidHaarStageClassifier* stage_classifier __attribute__((aligned (8)));
 } GpuHidHaarClassifierCascade;
 #endif
 
@@ -296,36 +253,6 @@ const int icv_object_win_border = 1;
 const float icv_stage_threshold_bias = 0.0001f;
 double globaltime = 0;
 
-
-// static CvHaarClassifierCascade * gpuCreateHaarClassifierCascade( int stage_count )
-// {
-//     CvHaarClassifierCascade *cascade = 0;
-
-//     int block_size = sizeof(*cascade) + stage_count * sizeof(*cascade->stage_classifier);
-
-//     if( stage_count <= 0 )
-//         CV_Error( CV_StsOutOfRange, "Number of stages should be positive" );
-
-//     cascade = (CvHaarClassifierCascade *)cvAlloc( block_size );
-//     memset( cascade, 0, block_size );
-
-//     cascade->stage_classifier = (CvHaarStageClassifier *)(cascade + 1);
-//     cascade->flags = CV_HAAR_MAGIC_VAL;
-//     cascade->count = stage_count;
-
-//     return cascade;
-// }
-
-//static int globalcounter = 0;
-
-// static void gpuReleaseHidHaarClassifierCascade( GpuHidHaarClassifierCascade **_cascade )
-// {
-//     if( _cascade && *_cascade )
-//     {
-//         cvFree( _cascade );
-//     }
-// }
-
 /* create more efficient internal representation of haar classifier cascade */
 static GpuHidHaarClassifierCascade * gpuCreateHidHaarClassifierCascade( CvHaarClassifierCascade *cascade, int *size, int *totalclassifier)
 {
@@ -441,24 +368,12 @@ static GpuHidHaarClassifierCascade * gpuCreateHidHaarClassifierCascade( CvHaarCl
         hid_stage_classifier->two_rects = 1;
         haar_classifier_ptr += stage_classifier->count;
 
-        /*
-        hid_stage_classifier->parent = (stage_classifier->parent == -1)
-        ? NULL : stage_classifier_ptr + stage_classifier->parent;
-        hid_stage_classifier->next = (stage_classifier->next == -1)
-        ? NULL : stage_classifier_ptr + stage_classifier->next;
-        hid_stage_classifier->child = (stage_classifier->child == -1)
-        ? NULL : stage_classifier_ptr + stage_classifier->child;
-
-        out->is_tree |= hid_stage_classifier->next != NULL;
-        */
-
         for( j = 0; j < stage_classifier->count; j++ )
         {
             CvHaarClassifier *classifier         = stage_classifier->classifier + j;
             GpuHidHaarClassifier *hid_classifier = hid_stage_classifier->classifier + j;
             int node_count = classifier->count;
 
-            //   float* alpha_ptr = (float*)(haar_node_ptr + node_count);
             float *alpha_ptr = &haar_node_ptr->alpha[0];
 
             hid_classifier->count = node_count;
@@ -485,16 +400,12 @@ static GpuHidHaarClassifierCascade * gpuCreateHidHaarClassifierCascade( CvHaarCl
                     node->p[2][3] = 0;
                     node->weight[2] = 0;
                 }
-                //   memset( &(node->feature.rect[2]), 0, sizeof(node->feature.rect[2]) );
                 else
                     hid_stage_classifier->two_rects = 0;
-            }
-
-            memcpy( alpha_ptr, classifier->alpha, (node_count + 1)*sizeof(alpha_ptr[0]));
-            haar_node_ptr = haar_node_ptr + 1;
-            // (GpuHidHaarTreeNode*)cvAlignPtr(alpha_ptr+node_count+1, sizeof(void*));
-            //   (GpuHidHaarTreeNode*)(alpha_ptr+node_count+1);
 
+                memcpy( node->alpha, classifier->alpha, (node_count + 1)*sizeof(alpha_ptr[0]));
+                haar_node_ptr = haar_node_ptr + 1;
+            }
             out->is_stump_based &= node_count == 1;
         }
     }
@@ -507,25 +418,19 @@ static GpuHidHaarClassifierCascade * gpuCreateHidHaarClassifierCascade( CvHaarCl
 
 
 #define sum_elem_ptr(sum,row,col)  \
-       ((sumtype*)CV_MAT_ELEM_PTR_FAST((sum),(row),(col),sizeof(sumtype)))
+    ((sumtype*)CV_MAT_ELEM_PTR_FAST((sum),(row),(col),sizeof(sumtype)))
 
 #define sqsum_elem_ptr(sqsum,row,col)  \
-       ((sqsumtype*)CV_MAT_ELEM_PTR_FAST((sqsum),(row),(col),sizeof(sqsumtype)))
+    ((sqsumtype*)CV_MAT_ELEM_PTR_FAST((sqsum),(row),(col),sizeof(sqsumtype)))
 
 #define calc_sum(rect,offset) \
-       ((rect).p0[offset] - (rect).p1[offset] - (rect).p2[offset] + (rect).p3[offset])
+    ((rect).p0[offset] - (rect).p1[offset] - (rect).p2[offset] + (rect).p3[offset])
 
 
 static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_cascade,
-                                      /*   const CvArr* _sum,
-                                      const CvArr* _sqsum,
-                                      const CvArr* _tilted_sum,*/
                                       double scale,
                                       int step)
 {
-    //   CvMat sum_stub, *sum = (CvMat*)_sum;
-    //   CvMat sqsum_stub, *sqsum = (CvMat*)_sqsum;
-    //   CvMat tilted_stub, *tilted = (CvMat*)_tilted_sum;
     GpuHidHaarClassifierCascade *cascade;
     int coi0 = 0, coi1 = 0;
     int i;
@@ -541,61 +446,25 @@ static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_casc
     if( scale <= 0 )
         CV_Error( CV_StsOutOfRange, "Scale must be positive" );
 
-    //   sum = cvGetMat( sum, &sum_stub, &coi0 );
-    //   sqsum = cvGetMat( sqsum, &sqsum_stub, &coi1 );
-
     if( coi0 || coi1 )
         CV_Error( CV_BadCOI, "COI is not supported" );
 
-    //   if( !CV_ARE_SIZES_EQ( sum, sqsum ))
-    //       CV_Error( CV_StsUnmatchedSizes, "All integral images must have the same size" );
-
-    //   if( CV_MAT_TYPE(sqsum->type) != CV_64FC1 ||
-    //       CV_MAT_TYPE(sum->type) != CV_32SC1 )
-    //       CV_Error( CV_StsUnsupportedFormat,
-    //       "Only (32s, 64f, 32s) combination of (sum,sqsum,tilted_sum) formats is allowed" );
-
     if( !_cascade->hid_cascade )
         gpuCreateHidHaarClassifierCascade(_cascade, &datasize, &total);
 
     cascade = (GpuHidHaarClassifierCascade *) _cascade->hid_cascade;
     stage_classifier = (GpuHidHaarStageClassifier *) (cascade + 1);
 
-    if( cascade->has_tilted_features )
-    {
-        //    tilted = cvGetMat( tilted, &tilted_stub, &coi1 );
-
-        //    if( CV_MAT_TYPE(tilted->type) != CV_32SC1 )
-        //        CV_Error( CV_StsUnsupportedFormat,
-        //        "Only (32s, 64f, 32s) combination of (sum,sqsum,tilted_sum) formats is allowed" );
-
-        //    if( sum->step != tilted->step )
-        //        CV_Error( CV_StsUnmatchedSizes,
-        //        "Sum and tilted_sum must have the same stride (step, widthStep)" );
-
-        //    if( !CV_ARE_SIZES_EQ( sum, tilted ))
-        //        CV_Error( CV_StsUnmatchedSizes, "All integral images must have the same size" );
-        //  cascade->tilted = *tilted;
-    }
-
     _cascade->scale = scale;
     _cascade->real_window_size.width = cvRound( _cascade->orig_window_size.width * scale );
     _cascade->real_window_size.height = cvRound( _cascade->orig_window_size.height * scale );
 
-    //cascade->sum = *sum;
-    //cascade->sqsum = *sqsum;
-
     equRect.x = equRect.y = cvRound(scale);
     equRect.width = cvRound((_cascade->orig_window_size.width - 2) * scale);
     equRect.height = cvRound((_cascade->orig_window_size.height - 2) * scale);
     weight_scale = 1. / (equRect.width * equRect.height);
     cascade->inv_window_area = weight_scale;
 
-    // cascade->pq0 = equRect.y * step + equRect.x;
-    // cascade->pq1 = equRect.y * step + equRect.x + equRect.width ;
-    // cascade->pq2 = (equRect.y + equRect.height)*step + equRect.x;
-    // cascade->pq3 = (equRect.y + equRect.height)*step + equRect.x + equRect.width ;
-
     cascade->pq0 = equRect.x;
     cascade->pq1 = equRect.y;
     cascade->pq2 = equRect.x + equRect.width;
@@ -618,10 +487,6 @@ static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_casc
             {
                 CvHaarFeature *feature =
                     &_cascade->stage_classifier[i].classifier[j].haar_feature[l];
-                /*  GpuHidHaarClassifier* classifier =
-                cascade->stage_classifier[i].classifier + j; */
-                //GpuHidHaarFeature* hidfeature =
-                //    &cascade->stage_classifier[i].classifier[j].node[l].feature;
                 GpuHidHaarTreeNode *hidnode = &stage_classifier[i].classifier[j].node[l];
                 double sum0 = 0, area0 = 0;
                 CvRect r[3];
@@ -636,8 +501,6 @@ static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_casc
                 /* align blocks */
                 for( k = 0; k < CV_HAAR_FEATURE_MAX; k++ )
                 {
-                    //if( !hidfeature->rect[k].p0 )
-                    //    break;
                     if(!hidnode->p[k][0])
                         break;
                     r[k] = feature->rect[k].r;
@@ -717,15 +580,6 @@ static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_casc
 
                     if( !feature->tilted )
                     {
-                        /*     hidfeature->rect[k].p0 = tr.y * sum->cols + tr.x;
-                        hidfeature->rect[k].p1 = tr.y * sum->cols + tr.x + tr.width;
-                        hidfeature->rect[k].p2 = (tr.y + tr.height) * sum->cols + tr.x;
-                        hidfeature->rect[k].p3 = (tr.y + tr.height) * sum->cols + tr.x + tr.width;
-                        */
-                        /*hidnode->p0[k] = tr.y * step + tr.x;
-                        hidnode->p1[k] = tr.y * step + tr.x + tr.width;
-                        hidnode->p2[k] = (tr.y + tr.height) * step + tr.x;
-                        hidnode->p3[k] = (tr.y + tr.height) * step + tr.x + tr.width;*/
                         hidnode->p[k][0] = tr.x;
                         hidnode->p[k][1] = tr.y;
                         hidnode->p[k][2] = tr.x + tr.width;
@@ -733,37 +587,24 @@ static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_casc
                     }
                     else
                     {
-                        /*    hidfeature->rect[k].p2 = (tr.y + tr.width) * tilted->cols + tr.x + tr.width;
-                        hidfeature->rect[k].p3 = (tr.y + tr.width + tr.height) * tilted->cols + tr.x + tr.width - tr.height;
-                        hidfeature->rect[k].p0 = tr.y * tilted->cols + tr.x;
-                        hidfeature->rect[k].p1 = (tr.y + tr.height) * tilted->cols + tr.x - tr.height;
-                        */
-
                         hidnode->p[k][2] = (tr.y + tr.width) * step + tr.x + tr.width;
                         hidnode->p[k][3] = (tr.y + tr.width + tr.height) * step + tr.x + tr.width - tr.height;
                         hidnode->p[k][0] = tr.y * step + tr.x;
                         hidnode->p[k][1] = (tr.y + tr.height) * step + tr.x - tr.height;
                     }
-
-                    //hidfeature->rect[k].weight = (float)(feature->rect[k].weight * correction_ratio);
                     hidnode->weight[k] = (float)(feature->rect[k].weight * correction_ratio);
                     if( k == 0 )
                         area0 = tr.width * tr.height;
                     else
-                        //sum0 += hidfeature->rect[k].weight * tr.width * tr.height;
                         sum0 += hidnode->weight[k] * tr.width * tr.height;
                 }
-
-                // hidfeature->rect[0].weight = (float)(-sum0/area0);
                 hidnode->weight[0] = (float)(-sum0 / area0);
             } /* l */
         } /* j */
     }
 }
 
-static void gpuSetHaarClassifierCascade( CvHaarClassifierCascade *_cascade
-                             /*double scale=0.0,*/
-                             /*int step*/)
+static void gpuSetHaarClassifierCascade( CvHaarClassifierCascade *_cascade)
 {
     GpuHidHaarClassifierCascade *cascade;
     int i;
@@ -817,11 +658,7 @@ static void gpuSetHaarClassifierCascade( CvHaarClassifierCascade *_cascade
                     if(!hidnode->p[k][0])
                         break;
                     r[k] = feature->rect[k].r;
-                    //                                         base_w = (int)CV_IMIN( (unsigned)base_w, (unsigned)(r[k].width-1) );
-                    //                                         base_w = (int)CV_IMIN( (unsigned)base_w, (unsigned)(r[k].x - r[0].x-1) );
-                    //                                         base_h = (int)CV_IMIN( (unsigned)base_h, (unsigned)(r[k].height-1) );
-                    //                                         base_h = (int)CV_IMIN( (unsigned)base_h, (unsigned)(r[k].y - r[0].y-1) );
-                }
+               }
 
                 nr = k;
                 for( k = 0; k < nr; k++ )
@@ -839,7 +676,6 @@ static void gpuSetHaarClassifierCascade( CvHaarClassifierCascade *_cascade
                     hidnode->p[k][3] = tr.height;
                     hidnode->weight[k] = (float)(feature->rect[k].weight * correction_ratio);
                 }
-                //hidnode->weight[0]=(float)(-sum0/area0);
             } /* l */
         } /* j */
     }
@@ -852,7 +688,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
 
     const double GROUP_EPS = 0.2;
     CvSeq *result_seq = 0;
-    cv::Ptr<CvMemStorage> temp_storage;
 
     cv::ConcurrentRectVector allCandidates;
     std::vector<cv::Rect> rectList;
@@ -910,6 +745,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
     if( gimg.cols < minSize.width || gimg.rows < minSize.height )
         CV_Error(CV_StsError, "Image too small");
 
+    cl_command_queue qu = reinterpret_cast<cl_command_queue>(Context::getContext()->oclCommandQueue());
     if( (flags & CV_HAAR_SCALE_IMAGE) )
     {
         CvSize winSize0 = cascade->orig_window_size;
@@ -952,7 +788,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
 
         size_t blocksize = 8;
         size_t localThreads[3] = { blocksize, blocksize , 1 };
-        size_t globalThreads[3] = { grp_per_CU * gsum.clCxt->computeUnits() *localThreads[0],
+        size_t globalThreads[3] = { grp_per_CU *(gsum.clCxt->computeUnits()) *localThreads[0],
                                     localThreads[1], 1
                                   };
         int outputsz = 256 * globalThreads[0] / localThreads[0];
@@ -997,7 +833,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
         gpuSetImagesForHaarClassifierCascade( cascade, 1., gsum.step / 4 );
 
         stagebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(GpuHidHaarStageClassifier) * gcascade->count);
-        cl_command_queue qu = (cl_command_queue)gsum.clCxt->oclCommandQueue();
         openCLSafeCall(clEnqueueWriteBuffer(qu, stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL));
 
         nodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, nodenum * sizeof(GpuHidHaarTreeNode));
@@ -1044,7 +879,9 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
         args.push_back ( make_pair(sizeof(cl_int4) , (void *)&pq ));
         args.push_back ( make_pair(sizeof(cl_float) , (void *)&correction ));
 
-        openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade", globalThreads, localThreads, args, -1, -1);
+        const char * build_options = gcascade->is_stump_based ? "-D STUMP_BASED=1" : "-D STUMP_BASED=0";
+
+        openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade", globalThreads, localThreads, args, -1, -1, build_options);
 
         openCLReadBuffer( gsum.clCxt, candidatebuffer, candidate, 4 * sizeof(int)*outputsz );
 
@@ -1059,6 +896,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
         openCLSafeCall(clReleaseMemObject(scaleinfobuffer));
         openCLSafeCall(clReleaseMemObject(nodebuffer));
         openCLSafeCall(clReleaseMemObject(candidatebuffer));
+
     }
     else
     {
@@ -1118,7 +956,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
                        sizeof(GpuHidHaarStageClassifier) * gcascade->count - sizeof(GpuHidHaarClassifier) * totalclassifier) / sizeof(GpuHidHaarTreeNode);
         nodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY,
                                         nodenum * sizeof(GpuHidHaarTreeNode));
-        cl_command_queue qu = (cl_command_queue)gsum.clCxt->oclCommandQueue();
         openCLSafeCall(clEnqueueWriteBuffer(qu, nodebuffer, 1, 0,
                                             nodenum * sizeof(GpuHidHaarTreeNode),
                                             node, 0, NULL, NULL));
@@ -1160,7 +997,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
             args1.push_back ( make_pair(sizeof(cl_int) , (void *)&startnodenum ));
 
             size_t globalThreads2[3] = {nodenum, 1, 1};
-
             openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuscaleclassifier", globalThreads2, NULL/*localThreads2*/, args1, -1, -1);
         }
 
@@ -1195,8 +1031,8 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
         args.push_back ( make_pair(sizeof(cl_mem) , (void *)&pbuffer ));
         args.push_back ( make_pair(sizeof(cl_mem) , (void *)&correctionbuffer ));
         args.push_back ( make_pair(sizeof(cl_int) , (void *)&nodenum ));
-
-        openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1);
+        const char * build_options = gcascade->is_stump_based ? "-D STUMP_BASED=1" : "-D STUMP_BASED=0";
+        openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1, build_options);
 
         candidate = (int *)clEnqueueMapBuffer(qu, candidatebuffer, 1, CL_MAP_READ, 0, 4 * sizeof(int) * outputsz, 0, 0, 0, &status);
 
@@ -1284,7 +1120,7 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
     int blocksize = 8;
     int grp_per_CU = 12;
     size_t localThreads[3] = { blocksize, blocksize, 1 };
-    size_t globalThreads[3] = { grp_per_CU * Context::getContext()->computeUnits() * localThreads[0],
+    size_t globalThreads[3] = { grp_per_CU * cv::ocl::Context::getContext()->computeUnits() *localThreads[0],
         localThreads[1],
         1 };
     int outputsz = 256 * globalThreads[0] / localThreads[0];
@@ -1300,8 +1136,6 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
     CvHaarClassifierCascade      *cascade = oldCascade;
     GpuHidHaarClassifierCascade  *gcascade;
     GpuHidHaarStageClassifier    *stage;
-    GpuHidHaarClassifier         *classifier;
-    GpuHidHaarTreeNode           *node;
 
     if( CV_MAT_DEPTH(gimg.type()) != CV_8U )
         CV_Error( CV_StsUnsupportedFormat, "Only 8-bit images are supported" );
@@ -1314,7 +1148,7 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
     }
 
     int *candidate;
-
+    cl_command_queue qu = reinterpret_cast<cl_command_queue>(Context::getContext()->oclCommandQueue());
     if( (flags & CV_HAAR_SCALE_IMAGE) )
     {
         int indexy = 0;
@@ -1340,19 +1174,6 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
 
         gcascade   = (GpuHidHaarClassifierCascade *)(cascade->hid_cascade);
         stage      = (GpuHidHaarStageClassifier *)(gcascade + 1);
-        classifier = (GpuHidHaarClassifier *)(stage + gcascade->count);
-        node       = (GpuHidHaarTreeNode *)(classifier->node);
-
-        gpuSetImagesForHaarClassifierCascade( cascade, 1., gsum.step / 4 );
-
-        cl_command_queue qu = (cl_command_queue)gsum.clCxt->oclCommandQueue();
-        openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->stagebuffer, 1, 0,
-                                            sizeof(GpuHidHaarStageClassifier) * gcascade->count,
-                                            stage, 0, NULL, NULL));
-
-        openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->nodebuffer, 1, 0,
-                                            m_nodenum * sizeof(GpuHidHaarTreeNode),
-                                            node, 0, NULL, NULL));
 
         int startstage = 0;
         int endstage = gcascade->count;
@@ -1389,17 +1210,23 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
         args.push_back ( make_pair(sizeof(cl_int4) , (void *)&pq ));
         args.push_back ( make_pair(sizeof(cl_float) , (void *)&correction ));
 
-        openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade", globalThreads, localThreads, args, -1, -1);
+        const char * build_options = gcascade->is_stump_based ? "-D STUMP_BASED=1" : "-D STUMP_BASED=0";
+
+        openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade", globalThreads, localThreads, args, -1, -1, build_options);
 
         candidate = (int *)malloc(4 * sizeof(int) * outputsz);
         memset(candidate, 0, 4 * sizeof(int) * outputsz);
+
         openCLReadBuffer( gsum.clCxt, ((OclBuffers *)buffers)->candidatebuffer, candidate, 4 * sizeof(int)*outputsz );
 
         for(int i = 0; i < outputsz; i++)
+        {
             if(candidate[4 * i + 2] != 0)
+            {
                 allCandidates.push_back(Rect(candidate[4 * i], candidate[4 * i + 1],
                 candidate[4 * i + 2], candidate[4 * i + 3]));
-
+            }
+        }
         free((void *)candidate);
         candidate = NULL;
     }
@@ -1407,56 +1234,14 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
     {
         cv::ocl::integral(gimg, gsum, gsqsum);
 
-        gpuSetHaarClassifierCascade(cascade);
-
         gcascade   = (GpuHidHaarClassifierCascade *)cascade->hid_cascade;
-        stage      = (GpuHidHaarStageClassifier *)(gcascade + 1);
-        classifier = (GpuHidHaarClassifier *)(stage + gcascade->count);
-        node       = (GpuHidHaarTreeNode *)(classifier->node);
-
-        cl_command_queue qu = (cl_command_queue)gsum.clCxt->oclCommandQueue();
-        openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->nodebuffer, 1, 0,
-                                            m_nodenum * sizeof(GpuHidHaarTreeNode),
-                                            node, 0, NULL, NULL));
-
-        cl_int4 *p = (cl_int4 *)malloc(sizeof(cl_int4) * m_loopcount);
-        float *correction = (float *)malloc(sizeof(float) * m_loopcount);
-        int startstage = 0;
-        int endstage = gcascade->count;
-        double factor;
-        for(int i = 0; i < m_loopcount; i++)
-        {
-            factor = scalev[i];
-            int equRect_x = (int)(factor * gcascade->p0 + 0.5);
-            int equRect_y = (int)(factor * gcascade->p1 + 0.5);
-            int equRect_w = (int)(factor * gcascade->p3 + 0.5);
-            int equRect_h = (int)(factor * gcascade->p2 + 0.5);
-            p[i].s[0] = equRect_x;
-            p[i].s[1] = equRect_y;
-            p[i].s[2] = equRect_x + equRect_w;
-            p[i].s[3] = equRect_y + equRect_h;
-            correction[i] = 1. / (equRect_w * equRect_h);
-            int startnodenum = m_nodenum * i;
-            float factor2 = (float)factor;
-
-            vector<pair<size_t, const void *> > args1;
-            args1.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->nodebuffer ));
-            args1.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->newnodebuffer ));
-            args1.push_back ( make_pair(sizeof(cl_float) , (void *)&factor2 ));
-            args1.push_back ( make_pair(sizeof(cl_float) , (void *)&correction[i] ));
-            args1.push_back ( make_pair(sizeof(cl_int) , (void *)&startnodenum ));
-
-            size_t globalThreads2[3] = {m_nodenum, 1, 1};
-
-            openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuscaleclassifier", globalThreads2, NULL/*localThreads2*/, args1, -1, -1);
-        }
 
         int step = gsum.step / 4;
         int startnode = 0;
         int splitstage = 3;
-        openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL));
-        openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->pbuffer, 1, 0, sizeof(cl_int4)*m_loopcount, p, 0, NULL, NULL));
-        openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->correctionbuffer, 1, 0, sizeof(cl_float)*m_loopcount, correction, 0, NULL, NULL));
+
+        int startstage = 0;
+        int endstage = gcascade->count;
 
         vector<pair<size_t, const void *> > args;
         args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->stagebuffer ));
@@ -1477,7 +1262,8 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
         args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->correctionbuffer ));
         args.push_back ( make_pair(sizeof(cl_int) , (void *)&m_nodenum ));
 
-        openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1);
+        const char * build_options = gcascade->is_stump_based ? "-D STUMP_BASED=1" : "-D STUMP_BASED=0";
+        openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1, build_options);
 
         candidate = (int *)clEnqueueMapBuffer(qu, ((OclBuffers *)buffers)->candidatebuffer, 1, CL_MAP_READ, 0, 4 * sizeof(int) * outputsz, 0, 0, 0, NULL);
 
@@ -1487,12 +1273,8 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
                 allCandidates.push_back(Rect(candidate[4 * i], candidate[4 * i + 1],
                 candidate[4 * i + 2], candidate[4 * i + 3]));
         }
-
-        free(p);
-        free(correction);
         clEnqueueUnmapMemObject(qu, ((OclBuffers *)buffers)->candidatebuffer, candidate, 0, 0, 0);
     }
-
     rectList.resize(allCandidates.size());
     if(!allCandidates.empty())
         std::copy(allCandidates.begin(), allCandidates.end(), rectList.begin());
@@ -1510,6 +1292,10 @@ void cv::ocl::OclCascadeClassifierBuf::Init(const int rows, const int cols,
     const int outputsz, const size_t localThreads[],
     CvSize minSize, CvSize maxSize)
 {
+    if(initialized)
+    {
+        return; // we only allow one time initialization
+    }
     CvHaarClassifierCascade      *cascade = oldCascade;
 
     if( !CV_IS_HAAR_CLASSIFIER(cascade) )
@@ -1525,7 +1311,9 @@ void cv::ocl::OclCascadeClassifierBuf::Init(const int rows, const int cols,
     int totalclassifier=0;
 
     if( !cascade->hid_cascade )
+    {
         gpuCreateHidHaarClassifierCascade(cascade, &datasize, &totalclassifier);
+    }
 
     if( maxSize.height == 0 || maxSize.width == 0 )
     {
@@ -1547,6 +1335,78 @@ void cv::ocl::OclCascadeClassifierBuf::Init(const int rows, const int cols,
     m_minSize = minSize;
     m_maxSize = maxSize;
 
+    // initialize nodes
+    GpuHidHaarClassifierCascade  *gcascade;
+    GpuHidHaarStageClassifier    *stage;
+    GpuHidHaarClassifier         *classifier;
+    GpuHidHaarTreeNode           *node;
+    cl_command_queue qu = reinterpret_cast<cl_command_queue>(Context::getContext()->oclCommandQueue());
+    if( (flags & CV_HAAR_SCALE_IMAGE) )
+    {
+        gcascade   = (GpuHidHaarClassifierCascade *)(cascade->hid_cascade);
+        stage      = (GpuHidHaarStageClassifier *)(gcascade + 1);
+        classifier = (GpuHidHaarClassifier *)(stage + gcascade->count);
+        node       = (GpuHidHaarTreeNode *)(classifier->node);
+
+        gpuSetImagesForHaarClassifierCascade( cascade, 1., gsum.step / 4 );
+
+        openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->stagebuffer, 1, 0,
+            sizeof(GpuHidHaarStageClassifier) * gcascade->count,
+            stage, 0, NULL, NULL));
+
+        openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->nodebuffer, 1, 0,
+                                            m_nodenum * sizeof(GpuHidHaarTreeNode),
+                                            node, 0, NULL, NULL));
+    }
+    else
+    {
+        gpuSetHaarClassifierCascade(cascade);
+
+        gcascade   = (GpuHidHaarClassifierCascade *)cascade->hid_cascade;
+        stage      = (GpuHidHaarStageClassifier *)(gcascade + 1);
+        classifier = (GpuHidHaarClassifier *)(stage + gcascade->count);
+        node       = (GpuHidHaarTreeNode *)(classifier->node);
+
+        openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->nodebuffer, 1, 0,
+            m_nodenum * sizeof(GpuHidHaarTreeNode),
+            node, 0, NULL, NULL));
+
+        cl_int4 *p = (cl_int4 *)malloc(sizeof(cl_int4) * m_loopcount);
+        float *correction = (float *)malloc(sizeof(float) * m_loopcount);
+        double factor;
+        for(int i = 0; i < m_loopcount; i++)
+        {
+            factor = scalev[i];
+            int equRect_x = (int)(factor * gcascade->p0 + 0.5);
+            int equRect_y = (int)(factor * gcascade->p1 + 0.5);
+            int equRect_w = (int)(factor * gcascade->p3 + 0.5);
+            int equRect_h = (int)(factor * gcascade->p2 + 0.5);
+            p[i].s[0] = equRect_x;
+            p[i].s[1] = equRect_y;
+            p[i].s[2] = equRect_x + equRect_w;
+            p[i].s[3] = equRect_y + equRect_h;
+            correction[i] = 1. / (equRect_w * equRect_h);
+            int startnodenum = m_nodenum * i;
+            float factor2 = (float)factor;
+
+            vector<pair<size_t, const void *> > args1;
+            args1.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->nodebuffer ));
+            args1.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->newnodebuffer ));
+            args1.push_back ( make_pair(sizeof(cl_float) , (void *)&factor2 ));
+            args1.push_back ( make_pair(sizeof(cl_float) , (void *)&correction[i] ));
+            args1.push_back ( make_pair(sizeof(cl_int) , (void *)&startnodenum ));
+
+            size_t globalThreads2[3] = {m_nodenum, 1, 1};
+
+            openCLExecuteKernel(Context::getContext(), &haarobjectdetect_scaled2, "gpuscaleclassifier", globalThreads2, NULL/*localThreads2*/, args1, -1, -1);
+        }
+        openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL));
+        openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->pbuffer, 1, 0, sizeof(cl_int4)*m_loopcount, p, 0, NULL, NULL));
+        openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->correctionbuffer, 1, 0, sizeof(cl_float)*m_loopcount, correction, 0, NULL, NULL));
+
+        free(p);
+        free(correction);
+    }
     initialized = true;
 }
 
@@ -1645,6 +1505,7 @@ void cv::ocl::OclCascadeClassifierBuf::CreateFactorRelatedBufs(
     CvSize sz;
     CvSize winSize0 = oldCascade->orig_window_size;
     detect_piramid_info *scaleinfo;
+    cl_command_queue qu = reinterpret_cast<cl_command_queue>(Context::getContext()->oclCommandQueue());
     if (flags & CV_HAAR_SCALE_IMAGE)
     {
         for(factor = 1.f;; factor *= scaleFactor)
@@ -1746,7 +1607,7 @@ void cv::ocl::OclCascadeClassifierBuf::CreateFactorRelatedBufs(
         ((OclBuffers *)buffers)->scaleinfobuffer = openCLCreateBuffer(cv::ocl::Context::getContext(), CL_MEM_READ_ONLY, sizeof(detect_piramid_info) * loopcount);
     }
 
-    openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)cv::ocl::Context::getContext()->oclCommandQueue(), ((OclBuffers *)buffers)->scaleinfobuffer, 1, 0,
+    openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->scaleinfobuffer, 1, 0,
         sizeof(detect_piramid_info)*loopcount,
         scaleinfo, 0, NULL, NULL));
     free(scaleinfo);
@@ -1758,7 +1619,8 @@ void cv::ocl::OclCascadeClassifierBuf::GenResult(CV_OUT std::vector<cv::Rect>& f
                                                  const std::vector<cv::Rect> &rectList,
                                                  const std::vector<int> &rweights)
 {
-    CvSeq *result_seq = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvAvgComp), cvCreateMemStorage(0) );
+    MemStorage tempStorage(cvCreateMemStorage(0));
+    CvSeq *result_seq = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvAvgComp), tempStorage );
 
     if( findBiggestObject && rectList.size() )
     {
@@ -1794,167 +1656,30 @@ void cv::ocl::OclCascadeClassifierBuf::GenResult(CV_OUT std::vector<cv::Rect>& f
 
 void cv::ocl::OclCascadeClassifierBuf::release()
 {
-    openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->stagebuffer));
-    openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->scaleinfobuffer));
-    openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->nodebuffer));
-    openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->candidatebuffer));
-
-    if( (m_flags & CV_HAAR_SCALE_IMAGE) )
-    {
-        cvFree(&oldCascade->hid_cascade);
-    }
-    else
+    if(initialized)
     {
-        openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->newnodebuffer));
-        openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->correctionbuffer));
-        openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->pbuffer));
-    }
+        openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->stagebuffer));
+        openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->scaleinfobuffer));
+        openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->nodebuffer));
+        openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->candidatebuffer));
+
+        if( (m_flags & CV_HAAR_SCALE_IMAGE) )
+        {
+            cvFree(&oldCascade->hid_cascade);
+        }
+        else
+        {
+            openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->newnodebuffer));
+            openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->correctionbuffer));
+            openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->pbuffer));
+        }
 
-    free(buffers);
-    buffers = NULL;
+        free(buffers);
+        buffers = NULL;
+        initialized = false;
+    }
 }
 
 #ifndef _MAX_PATH
 #define _MAX_PATH 1024
 #endif
-
-
-/****************************************************************************************\
-*                                  Persistence functions                                 *
-\****************************************************************************************/
-
-/* field names */
-
-#define ICV_HAAR_SIZE_NAME            "size"
-#define ICV_HAAR_STAGES_NAME          "stages"
-#define ICV_HAAR_TREES_NAME             "trees"
-#define ICV_HAAR_FEATURE_NAME             "feature"
-#define ICV_HAAR_RECTS_NAME                 "rects"
-#define ICV_HAAR_TILTED_NAME                "tilted"
-#define ICV_HAAR_THRESHOLD_NAME           "threshold"
-#define ICV_HAAR_LEFT_NODE_NAME           "left_node"
-#define ICV_HAAR_LEFT_VAL_NAME            "left_val"
-#define ICV_HAAR_RIGHT_NODE_NAME          "right_node"
-#define ICV_HAAR_RIGHT_VAL_NAME           "right_val"
-#define ICV_HAAR_STAGE_THRESHOLD_NAME   "stage_threshold"
-#define ICV_HAAR_PARENT_NAME            "parent"
-#define ICV_HAAR_NEXT_NAME              "next"
-
-static int gpuRunHaarClassifierCascade( /*const CvHaarClassifierCascade *_cascade, CvPoint pt, int start_stage */)
-{
-    return 1;
-}
-
-namespace cv
-{
-namespace ocl
-{
-
-struct gpuHaarDetectObjects_ScaleImage_Invoker
-{
-    gpuHaarDetectObjects_ScaleImage_Invoker( const CvHaarClassifierCascade *_cascade,
-            int _stripSize, double _factor,
-            const Mat &_sum1, const Mat &_sqsum1, Mat *_norm1,
-            Mat *_mask1, Rect _equRect, ConcurrentRectVector &_vec )
-    {
-        cascade = _cascade;
-        stripSize = _stripSize;
-        factor = _factor;
-        sum1 = _sum1;
-        sqsum1 = _sqsum1;
-        norm1 = _norm1;
-        mask1 = _mask1;
-        equRect = _equRect;
-        vec = &_vec;
-    }
-
-    void operator()( const BlockedRange &range ) const
-    {
-        Size winSize0 = cascade->orig_window_size;
-        Size winSize(cvRound(winSize0.width * factor), cvRound(winSize0.height * factor));
-        int y1 = range.begin() * stripSize, y2 = min(range.end() * stripSize, sum1.rows - 1 - winSize0.height);
-        Size ssz(sum1.cols - 1 - winSize0.width, y2 - y1);
-        int x, y, ystep = factor > 2 ? 1 : 2;
-
-        for( y = y1; y < y2; y += ystep )
-            for( x = 0; x < ssz.width; x += ystep )
-            {
-                if( gpuRunHaarClassifierCascade( /*cascade, cvPoint(x, y), 0*/ ) > 0 )
-                    vec->push_back(Rect(cvRound(x * factor), cvRound(y * factor),
-                                        winSize.width, winSize.height));
-            }
-    }
-
-    const CvHaarClassifierCascade *cascade;
-    int stripSize;
-    double factor;
-    Mat sum1, sqsum1, *norm1, *mask1;
-    Rect equRect;
-    ConcurrentRectVector *vec;
-};
-
-
-struct gpuHaarDetectObjects_ScaleCascade_Invoker
-{
-    gpuHaarDetectObjects_ScaleCascade_Invoker( const CvHaarClassifierCascade *_cascade,
-            Size _winsize, const Range &_xrange, double _ystep,
-            size_t _sumstep, const int **_p, const int **_pq,
-            ConcurrentRectVector &_vec )
-    {
-        cascade = _cascade;
-        winsize = _winsize;
-        xrange = _xrange;
-        ystep = _ystep;
-        sumstep = _sumstep;
-        p = _p;
-        pq = _pq;
-        vec = &_vec;
-    }
-
-    void operator()( const BlockedRange &range ) const
-    {
-        int iy, startY = range.begin(), endY = range.end();
-        const int *p0 = p[0], *p1 = p[1], *p2 = p[2], *p3 = p[3];
-        const int *pq0 = pq[0], *pq1 = pq[1], *pq2 = pq[2], *pq3 = pq[3];
-        bool doCannyPruning = p0 != 0;
-        int sstep = (int)(sumstep / sizeof(p0[0]));
-
-        for( iy = startY; iy < endY; iy++ )
-        {
-            int ix, y = cvRound(iy * ystep), ixstep = 1;
-            for( ix = xrange.start; ix < xrange.end; ix += ixstep )
-            {
-                int x = cvRound(ix * ystep); // it should really be ystep, not ixstep
-
-                if( doCannyPruning )
-                {
-                    int offset = y * sstep + x;
-                    int s = p0[offset] - p1[offset] - p2[offset] + p3[offset];
-                    int sq = pq0[offset] - pq1[offset] - pq2[offset] + pq3[offset];
-                    if( s < 100 || sq < 20 )
-                    {
-                        ixstep = 2;
-                        continue;
-                    }
-                }
-
-                int result = gpuRunHaarClassifierCascade(/* cascade, cvPoint(x, y), 0 */);
-                if( result > 0 )
-                    vec->push_back(Rect(x, y, winsize.width, winsize.height));
-                ixstep = result != 0 ? 1 : 2;
-            }
-        }
-    }
-
-    const CvHaarClassifierCascade *cascade;
-    double ystep;
-    size_t sumstep;
-    Size winsize;
-    Range xrange;
-    const int **p;
-    const int **pq;
-    ConcurrentRectVector *vec;
-};
-
-}
-}
index 7a13324..a351458 100644 (file)
@@ -1578,8 +1578,9 @@ static void openCLExecuteKernel_hog(Context *clCxt , const char **source, string
                                     size_t globalThreads[3], size_t localThreads[3], 
                                     vector< pair<size_t, const void *> > &args)
 {
-    size_t wave_size = 0;
-    queryDeviceInfo(WAVEFRONT_SIZE, &wave_size);
+    cl_kernel kernel = openCLGetKernelFromSource(clCxt, source, kernelName);
+    size_t wave_size = queryDeviceInfo<WAVEFRONT_SIZE, size_t>(kernel);
+    openCLSafeCall(clReleaseKernel(kernel));
     if (wave_size <= 16)
     {
         char build_options[64];
index ee1e92a..3dbd68d 100644 (file)
@@ -25,6 +25,7 @@
 //    Xu Pang, pangxu010@163.com
 //    Wu Zailong, bullet@yeah.net
 //    Wenju He, wenju@multicorewareinc.com
+//    Sen Liu, swjtuls1987@126.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -80,6 +81,7 @@ namespace cv
         extern const char *imgproc_calcHarris;
         extern const char *imgproc_calcMinEigenVal;
         extern const char *imgproc_convolve;
+        extern const char *imgproc_clahe;
         ////////////////////////////////////OpenCL call wrappers////////////////////////////
 
         template <typename T> struct index_and_sizeof;
@@ -269,7 +271,7 @@ namespace cv
             size_t globalThreads[3] = {glbSizeX, glbSizeY, 1};
             size_t localThreads[3] = {blkSizeX, blkSizeY, 1};
 
-
+            float borderFloat[4] = {(float)borderValue[0], (float)borderValue[1], (float)borderValue[2], (float)borderValue[3]};
             vector< pair<size_t, const void *> > args;
             if(map1.channels() == 2)
             {
@@ -289,9 +291,8 @@ namespace cv
                 args.push_back( make_pair(sizeof(cl_int), (void *)&map1.cols));
                 args.push_back( make_pair(sizeof(cl_int), (void *)&map1.rows));
                 args.push_back( make_pair(sizeof(cl_int), (void *)&cols));
-                float borderFloat[4] = {(float)borderValue[0], (float)borderValue[1], (float)borderValue[2], (float)borderValue[3]};
-
-               if(src.clCxt->supportsFeature(Context::CL_DOUBLE))
+                
+                if(src.clCxt->supportsFeature(Context::CL_DOUBLE))
                 {
                     args.push_back( make_pair(sizeof(cl_double4), (void *)&borderValue));
                 }
@@ -325,7 +326,6 @@ namespace cv
                 }
                 else
                 {
-                    float borderFloat[4] = {(float)borderValue[0], (float)borderValue[1], (float)borderValue[2], (float)borderValue[3]};
                     args.push_back( make_pair(sizeof(cl_float4), (void *)&borderFloat));
                 }
             }
@@ -1207,30 +1207,41 @@ namespace cv
         void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize,
                           double k, int borderType)
         {
+            oclMat dx, dy;
+            cornerHarris_dxdy(src, dst, dx, dy, blockSize, ksize, k, borderType);
+        }
+
+        void cornerHarris_dxdy(const oclMat &src, oclMat &dst, oclMat &dx, oclMat &dy, int blockSize, int ksize,
+                          double k, int borderType)
+        {
             if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
             {
                 CV_Error(CV_GpuNotSupported, "select device don't support double");
             }
             CV_Assert(src.cols >= blockSize / 2 && src.rows >= blockSize / 2);
-            oclMat Dx, Dy;
             CV_Assert(borderType == cv::BORDER_CONSTANT || borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT);
-            extractCovData(src, Dx, Dy, blockSize, ksize, borderType);
+            extractCovData(src, dx, dy, blockSize, ksize, borderType);
             dst.create(src.size(), CV_32F);
-            corner_ocl(imgproc_calcHarris, "calcHarris", blockSize, static_cast<float>(k), Dx, Dy, dst, borderType);
+            corner_ocl(imgproc_calcHarris, "calcHarris", blockSize, static_cast<float>(k), dx, dy, dst, borderType);
         }
 
         void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int borderType)
         {
+            oclMat dx, dy;
+            cornerMinEigenVal_dxdy(src, dst, dx, dy, blockSize, ksize, borderType);
+        }
+        
+        void cornerMinEigenVal_dxdy(const oclMat &src, oclMat &dst, oclMat &dx, oclMat &dy, int blockSize, int ksize, int borderType)
+        {
             if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
             {
                 CV_Error(CV_GpuNotSupported, "select device don't support double");
             }
             CV_Assert(src.cols >= blockSize / 2 && src.rows >= blockSize / 2);
-            oclMat Dx, Dy;
             CV_Assert(borderType == cv::BORDER_CONSTANT || borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT);
-            extractCovData(src, Dx, Dy, blockSize, ksize, borderType);
+            extractCovData(src, dx, dy, blockSize, ksize, borderType);
             dst.create(src.size(), CV_32F);
-            corner_ocl(imgproc_calcMinEigenVal, "calcMinEigenVal", blockSize, 0, Dx, Dy, dst, borderType);
+            corner_ocl(imgproc_calcMinEigenVal, "calcMinEigenVal", blockSize, 0, dx, dy, dst, borderType);
         }
         /////////////////////////////////// MeanShiftfiltering ///////////////////////////////////////////////
         static void meanShiftFiltering_gpu(const oclMat &src, oclMat dst, int sp, int sr, int maxIter, float eps)
@@ -1502,6 +1513,189 @@ namespace cv
             openCLExecuteKernel(clCxt, &imgproc_histogram, kernelName, globalThreads, localThreads, args, -1, -1);
             LUT(mat_src, lut, mat_dst);
         }
+
+        ////////////////////////////////////////////////////////////////////////
+        // CLAHE
+        namespace clahe
+        {
+            inline int divUp(int total, int grain)
+            {
+                return (total + grain - 1) / grain * grain;
+            }
+
+            static void calcLut(const oclMat &src, oclMat &dst,
+                const int tilesX, const int tilesY, const cv::Size tileSize,
+                const int clipLimit, const float lutScale)
+            {
+                cl_int2 tile_size;
+                tile_size.s[0] = tileSize.width;
+                tile_size.s[1] = tileSize.height;
+
+                std::vector<pair<size_t , const void *> > args;
+                args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src.data ));
+                args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data ));
+                args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.step ));
+                args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.step ));
+                args.push_back( std::make_pair( sizeof(cl_int2), (void *)&tile_size ));
+                args.push_back( std::make_pair( sizeof(cl_int), (void *)&tilesX ));
+                args.push_back( std::make_pair( sizeof(cl_int), (void *)&clipLimit ));
+                args.push_back( std::make_pair( sizeof(cl_float), (void *)&lutScale ));
+
+                String kernelName = "calcLut";
+                size_t localThreads[3]  = { 32, 8, 1 };
+                size_t globalThreads[3] = { tilesX * localThreads[0], tilesY * localThreads[1], 1 };
+                bool is_cpu = queryDeviceInfo<IS_CPU_DEVICE, bool>();
+                if (is_cpu)
+                {
+                    openCLExecuteKernel(Context::getContext(), &imgproc_clahe, kernelName, globalThreads, localThreads, args, -1, -1, (char*)" -D CPU");
+                }
+                else
+                {
+                    cl_kernel kernel = openCLGetKernelFromSource(Context::getContext(), &imgproc_clahe, kernelName);
+                    int wave_size = queryDeviceInfo<WAVEFRONT_SIZE, int>(kernel);
+                    openCLSafeCall(clReleaseKernel(kernel));
+
+                    static char opt[20] = {0};
+                    sprintf(opt, " -D WAVE_SIZE=%d", wave_size);
+                    openCLExecuteKernel(Context::getContext(), &imgproc_clahe, kernelName, globalThreads, localThreads, args, -1, -1, opt);
+                }
+            }
+
+            static void transform(const oclMat &src, oclMat &dst, const oclMat &lut,
+                const int tilesX, const int tilesY, const cv::Size tileSize)
+            {
+                cl_int2 tile_size;
+                tile_size.s[0] = tileSize.width;
+                tile_size.s[1] = tileSize.height;
+
+                std::vector<pair<size_t , const void *> > args;
+                args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src.data ));
+                args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data ));
+                args.push_back( std::make_pair( sizeof(cl_mem), (void *)&lut.data ));
+                args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.step ));
+                args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.step ));
+                args.push_back( std::make_pair( sizeof(cl_int), (void *)&lut.step ));
+                args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.cols ));
+                args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.rows ));
+                args.push_back( std::make_pair( sizeof(cl_int2), (void *)&tile_size ));
+                args.push_back( std::make_pair( sizeof(cl_int), (void *)&tilesX ));
+                args.push_back( std::make_pair( sizeof(cl_int), (void *)&tilesY ));
+
+                String kernelName = "transform";
+                size_t localThreads[3]  = { 32, 8, 1 };
+                size_t globalThreads[3] = { divUp(src.cols, localThreads[0]), divUp(src.rows, localThreads[1]), 1 };
+
+                openCLExecuteKernel(Context::getContext(), &imgproc_clahe, kernelName, globalThreads, localThreads, args, -1, -1);
+            }
+        }
+
+        namespace
+        {
+            class CLAHE_Impl : public cv::ocl::CLAHE
+            {
+            public:
+                CLAHE_Impl(double clipLimit = 40.0, int tilesX = 8, int tilesY = 8);
+
+                cv::AlgorithmInfo* info() const;
+
+                void apply(const oclMat &src, oclMat &dst);
+
+                void setClipLimit(double clipLimit);
+                double getClipLimit() const;
+
+                void setTilesGridSize(cv::Size tileGridSize);
+                cv::Size getTilesGridSize() const;
+
+                void collectGarbage();
+
+            private:
+                double clipLimit_;
+                int tilesX_;
+                int tilesY_;
+
+                oclMat srcExt_;
+                oclMat lut_;
+            };
+
+            CLAHE_Impl::CLAHE_Impl(double clipLimit, int tilesX, int tilesY) :
+            clipLimit_(clipLimit), tilesX_(tilesX), tilesY_(tilesY)
+            {
+            }
+
+            void CLAHE_Impl::apply(const oclMat &src, oclMat &dst)
+            {
+                CV_Assert( src.type() == CV_8UC1 );
+
+                dst.create( src.size(), src.type() );
+
+                const int histSize = 256;
+
+                ensureSizeIsEnough(tilesX_ * tilesY_, histSize, CV_8UC1, lut_);
+
+                cv::Size tileSize;
+                oclMat srcForLut;
+
+                if (src.cols % tilesX_ == 0 && src.rows % tilesY_ == 0)
+                {
+                    tileSize = cv::Size(src.cols / tilesX_, src.rows / tilesY_);
+                    srcForLut = src;
+                }
+                else
+                {
+                    cv::ocl::copyMakeBorder(src, srcExt_, 0, tilesY_ - (src.rows % tilesY_), 0, tilesX_ - (src.cols % tilesX_), cv::BORDER_REFLECT_101, cv::Scalar());
+
+                    tileSize = cv::Size(srcExt_.cols / tilesX_, srcExt_.rows / tilesY_);
+                    srcForLut = srcExt_;
+                }
+
+                const int tileSizeTotal = tileSize.area();
+                const float lutScale = static_cast<float>(histSize - 1) / tileSizeTotal;
+
+                int clipLimit = 0;
+                if (clipLimit_ > 0.0)
+                {
+                    clipLimit = static_cast<int>(clipLimit_ * tileSizeTotal / histSize);
+                    clipLimit = std::max(clipLimit, 1);
+                }
+
+                clahe::calcLut(srcForLut, lut_, tilesX_, tilesY_, tileSize, clipLimit, lutScale);
+                //finish();
+                clahe::transform(src, dst, lut_, tilesX_, tilesY_, tileSize);
+            }
+
+            void CLAHE_Impl::setClipLimit(double clipLimit)
+            {
+                clipLimit_ = clipLimit;
+            }
+
+            double CLAHE_Impl::getClipLimit() const
+            {
+                return clipLimit_;
+            }
+
+            void CLAHE_Impl::setTilesGridSize(cv::Size tileGridSize)
+            {
+                tilesX_ = tileGridSize.width;
+                tilesY_ = tileGridSize.height;
+            }
+
+            cv::Size CLAHE_Impl::getTilesGridSize() const
+            {
+                return cv::Size(tilesX_, tilesY_);
+            }
+
+            void CLAHE_Impl::collectGarbage()
+            {
+                srcExt_.release();
+                lut_.release();
+            }
+        }
+
+        cv::Ptr<cv::ocl::CLAHE> createCLAHE(double clipLimit, cv::Size tileGridSize)
+        {
+            return new CLAHE_Impl(clipLimit, tileGridSize.width, tileGridSize.height);
+        }
+
         //////////////////////////////////bilateralFilter////////////////////////////////////////////////////
         static void
         oclbilateralFilter_8u( const oclMat &src, oclMat &dst, int d,
index 799c49c..6a2e8ad 100644 (file)
@@ -333,6 +333,10 @@ namespace cv
                     oclinfo.push_back(ocltmpinfo);
                 }
             }
+            if(devcienums > 0)
+            {
+                setDevice(oclinfo[0]);
+            }
             return devcienums;
         }
 
@@ -363,64 +367,43 @@ namespace cv
             clFinish(Context::getContext()->impl->clCmdQueue);
         }
 
-        void queryDeviceInfo(DEVICE_INFO info_type, void* info)
+        //template specializations of queryDeviceInfo
+        template<>
+        bool queryDeviceInfo<IS_CPU_DEVICE, bool>(cl_kernel)
         {
-            static Info::Impl* impl = Context::getContext()->impl;
-            switch(info_type)
-            {
-            case WAVEFRONT_SIZE:
-                {
-                    bool is_cpu = false;
-                    queryDeviceInfo(IS_CPU_DEVICE, &is_cpu);
-                    if(is_cpu)
-                    {
-                        *(int*)info = 1;
-                        return;
-                    }
-#ifdef CL_DEVICE_WAVEFRONT_WIDTH_AMD
-                    try
-                    {
-                        openCLSafeCall(clGetDeviceInfo(Context::getContext()->impl->devices[0], 
-                            CL_DEVICE_WAVEFRONT_WIDTH_AMD, sizeof(size_t), info, 0));
-                    }
-                    catch(const cv::Exception&)
-#elif defined (CL_DEVICE_WARP_SIZE_NV)
-                    const int EXT_LEN = 4096 + 1 ;
-                    char extends_set[EXT_LEN];
-                    size_t extends_size;
-                    openCLSafeCall(clGetDeviceInfo(impl->devices[impl->devnum], CL_DEVICE_EXTENSIONS, EXT_LEN, (void *)extends_set, &extends_size));
-                    extends_set[EXT_LEN - 1] = 0;
-                    if(std::string(extends_set).find("cl_nv_device_attribute_query") != std::string::npos)
-                    {
-                        openCLSafeCall(clGetDeviceInfo(Context::getContext()->impl->devices[0], 
-                            CL_DEVICE_WARP_SIZE_NV, sizeof(size_t), info, 0));
-                    }
-                    else
-#endif
-                    {
-                        // if no way left for us to query the warp size, we can get it from kernel group info
-                        static const char * _kernel_string = "__kernel void test_func() {}";
-                        cl_kernel kernel;
-                        kernel = openCLGetKernelFromSource(Context::getContext(), &_kernel_string, "test_func");
-                        openCLSafeCall(clGetKernelWorkGroupInfo(kernel, impl->devices[impl->devnum],
-                            CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(size_t), info, NULL));
-                    }
+            Info::Impl* impl = Context::getContext()->impl;
+            cl_device_type devicetype;
+            openCLSafeCall(clGetDeviceInfo(impl->devices[impl->devnum],
+                CL_DEVICE_TYPE, sizeof(cl_device_type),
+                &devicetype, NULL));
+            return (devicetype == CVCL_DEVICE_TYPE_CPU);
+        }
 
-                }
-                break;
-            case IS_CPU_DEVICE:
-                {
-                    cl_device_type devicetype;
-                    openCLSafeCall(clGetDeviceInfo(impl->devices[impl->devnum], 
-                                    CL_DEVICE_TYPE, sizeof(cl_device_type), 
-                                    &devicetype, NULL));
-                    *(bool*)info = (devicetype == CVCL_DEVICE_TYPE_CPU);
-                }
-                break;
-            default:
-                CV_Error(-1, "Invalid device info type");
-                break;
+        template<typename _ty>
+        static _ty queryWavesize(cl_kernel kernel)
+        {
+            size_t info = 0;
+            Info::Impl* impl = Context::getContext()->impl;
+            bool is_cpu = queryDeviceInfo<IS_CPU_DEVICE, bool>();
+            if(is_cpu)
+            {
+                return 1;
             }
+            CV_Assert(kernel != NULL);
+            openCLSafeCall(clGetKernelWorkGroupInfo(kernel, impl->devices[impl->devnum],
+                CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(size_t), &info, NULL));
+            return static_cast<_ty>(info);
+        }
+
+        template<>
+        size_t queryDeviceInfo<WAVEFRONT_SIZE, size_t>(cl_kernel kernel)
+        {
+            return queryWavesize<size_t>(kernel);
+        }
+        template<>
+        int queryDeviceInfo<WAVEFRONT_SIZE, int>(cl_kernel kernel)
+        {
+            return queryWavesize<int>(kernel);
         }
 
         void openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void *host_buffer, size_t size)
@@ -934,6 +917,14 @@ namespace cv
         int Context::val = 0;
         static Mutex cs;
         static volatile int context_tear_down = 0;
+
+        bool initialized()
+        {
+            return *((volatile int*)&Context::val) != 0 && 
+                Context::clCxt->impl->clCmdQueue != NULL&& 
+                Context::clCxt->impl->oclcontext != NULL;
+        }
+
         Context* Context::getContext()
         {
             if(*((volatile int*)&val) != 1)
@@ -947,8 +938,6 @@ namespace cv
                         clCxt.reset(new Context);
                     std::vector<Info> oclinfo;
                     CV_Assert(getDevice(oclinfo, CVCL_DEVICE_TYPE_ALL) > 0);
-                    oclinfo[0].impl->setDevice(0, 0, 0);
-                    clCxt.get()->impl = oclinfo[0].impl->copy();
 
                     *((volatile int*)&val) = 1;
                 }
@@ -1073,7 +1062,7 @@ BOOL WINAPI DllMain( HINSTANCE, DWORD  fdwReason, LPVOID )
         Context* cv_ctx = Context::getContext();
         if(cv_ctx)
         {
-            cl_context ctx = (cl_context)&(cv_ctx->impl->oclcontext);
+            cl_context ctx = cv_ctx->impl->oclcontext;
             if(ctx)
                 openCLSafeCall(clReleaseContext(ctx));
         }
index 3bcb870..75314fb 100644 (file)
 //
 //M*/
 
-#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
 #include "precomp.hpp"
 
+#ifdef __GNUC__
+#if ((__GNUC__ * 100) + __GNUC_MINOR__) >= 402
+#define GCC_DIAG_STR(s) #s
+#define GCC_DIAG_JOINSTR(x,y) GCC_DIAG_STR(x ## y)
+# define GCC_DIAG_DO_PRAGMA(x) _Pragma (#x)
+# define GCC_DIAG_PRAGMA(x) GCC_DIAG_DO_PRAGMA(GCC diagnostic x)
+# if ((__GNUC__ * 100) + __GNUC_MINOR__) >= 406
+#  define GCC_DIAG_OFF(x) GCC_DIAG_PRAGMA(push) \
+GCC_DIAG_PRAGMA(ignored GCC_DIAG_JOINSTR(-W,x))
+#  define GCC_DIAG_ON(x) GCC_DIAG_PRAGMA(pop)
+# else
+#  define GCC_DIAG_OFF(x) GCC_DIAG_PRAGMA(ignored GCC_DIAG_JOINSTR(-W,x))
+#  define GCC_DIAG_ON(x)  GCC_DIAG_PRAGMA(warning GCC_DIAG_JOINSTR(-W,x))
+# endif
+#else
+# define GCC_DIAG_OFF(x)
+# define GCC_DIAG_ON(x)
+#endif
+#endif /* __GNUC__ */
+
 using namespace std;
 
 namespace cv
@@ -121,6 +140,9 @@ namespace cv
                                   build_options, finish_mode);
         }
 
+#ifdef __GNUC__
+        GCC_DIAG_OFF(deprecated-declarations)
+#endif
         cl_mem bindTexture(const oclMat &mat)
         {
             cl_mem texture;
@@ -156,7 +178,7 @@ namespace cv
                 format.image_channel_order     = CL_RGBA;
                 break;
             default:
-                CV_Error(-1, "Image forma is not supported");
+                CV_Error(-1, "Image format is not supported");
                 break;
             }
 #ifdef CL_VERSION_1_2
@@ -180,10 +202,6 @@ namespace cv
             else
 #endif
             {
-#ifdef __GNUC__
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-#endif
                 texture = clCreateImage2D(
                     (cl_context)mat.clCxt->oclContext(),
                     CL_MEM_READ_WRITE,
@@ -193,9 +211,6 @@ namespace cv
                     0,
                     NULL,
                     &err);
-#ifdef __GNUC__
-#pragma GCC diagnostic pop
-#endif
             }
             size_t origin[] = { 0, 0, 0 };
             size_t region[] = { mat.cols, mat.rows, 1 };
@@ -225,6 +240,14 @@ namespace cv
             openCLSafeCall(err);
             return texture;
         }
+#ifdef __GNUC__
+        GCC_DIAG_ON(deprecated-declarations)
+#endif
+
+        Ptr<TextureCL> bindTexturePtr(const oclMat &mat)
+        {
+            return Ptr<TextureCL>(new TextureCL(bindTexture(mat), mat.rows, mat.cols, mat.type()));
+        }
         void releaseTexture(cl_mem& texture)
         {
             openCLFree(texture);
index 7d4b0a7..070ced4 100644 (file)
@@ -127,7 +127,7 @@ __kernel void arithm_add_D2 (__global ushort *src1, int src1_step, int src1_offs
 #ifdef dst_align
 #undef dst_align
 #endif
-#define dst_align ((dst_offset >> 1) & 3)
+#define dst_align ((dst_offset / 2) & 3)
         int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
         int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
 
@@ -165,7 +165,7 @@ __kernel void arithm_add_D3 (__global short *src1, int src1_step, int src1_offse
 #ifdef dst_align
 #undef dst_align
 #endif
-#define dst_align ((dst_offset >> 1) & 3)
+#define dst_align ((dst_offset / 2) & 3)
         int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
         int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
 
@@ -335,7 +335,7 @@ __kernel void arithm_add_with_mask_C1_D2 (__global ushort *src1, int src1_step,
 #ifdef dst_align
 #undef dst_align
 #endif
-#define dst_align ((dst_offset >> 1) & 1)
+#define dst_align ((dst_offset / 2) & 1)
         int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
         int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
         int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -375,7 +375,7 @@ __kernel void arithm_add_with_mask_C1_D3 (__global short *src1, int src1_step, i
 #ifdef dst_align
 #undef dst_align
 #endif
-#define dst_align ((dst_offset >> 1) & 1)
+#define dst_align ((dst_offset / 2) & 1)
         int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
         int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
         int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
@@ -507,7 +507,7 @@ __kernel void arithm_add_with_mask_C2_D0 (__global uchar *src1, int src1_step, i
 #ifdef dst_align
 #undef dst_align
 #endif
-#define dst_align ((dst_offset >> 1) & 1)
+#define dst_align ((dst_offset / 2) & 1)
         int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
         int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
         int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
index fdf6592..3dbd376 100644 (file)
@@ -126,7 +126,7 @@ __kernel void arithm_s_add_with_mask_C1_D2 (__global   ushort *src1, int src1_st
 #ifdef dst_align
 #undef dst_align
 #endif
-#define dst_align ((dst_offset >> 1) & 1)
+#define dst_align ((dst_offset / 2) & 1)
         int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
         int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
 
@@ -164,7 +164,7 @@ __kernel void arithm_s_add_with_mask_C1_D3 (__global   short *src1, int src1_ste
 #ifdef dst_align
 #undef dst_align
 #endif
-#define dst_align ((dst_offset >> 1) & 1)
+#define dst_align ((dst_offset / 2) & 1)
         int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
         int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
 
@@ -288,7 +288,7 @@ __kernel void arithm_s_add_with_mask_C2_D0 (__global   uchar *src1, int src1_ste
 #ifdef dst_align
 #undef dst_align
 #endif
-#define dst_align ((dst_offset >> 1) & 1)
+#define dst_align ((dst_offset / 2) & 1)
         int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
         int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
 
index e1cc9f6..40988f5 100644 (file)
@@ -277,9 +277,15 @@ __kernel void arithm_mul_D6 (__global double *src1, int src1_step, int src1_offs
 }
 #endif
 
+#ifdef DOUBLE_SUPPORT
+#define SCALAR_TYPE double
+#else
+#define SCALAR_TYPE float
+#endif
+
 __kernel void arithm_muls_D5 (__global float *src1, int src1_step, int src1_offset,
                               __global float *dst,  int dst_step,  int dst_offset,
-                              int rows, int cols, int dst_step1, float scalar)
+                              int rows, int cols, int dst_step1, SCALAR_TYPE scalar)
 {
     int x = get_global_id(0);
     int y = get_global_id(1);
index 96a2f51..8535eb1 100644 (file)
@@ -82,9 +82,9 @@
 //////////////////////////////////////////////////////////////////////////////////////////////////////
 /////////////////////////////Macro for define elements number per thread/////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////
-#define ANCHOR                  3
-#define ANX                     1
-#define ANY                     1
+//#define ANCHOR                  3
+//#define ANX                     1
+//#define ANY                     1
 
 #define ROWS_PER_GROUP          4
 #define ROWS_PER_GROUP_BITS     2
@@ -185,7 +185,7 @@ __kernel void filter2D_C1_D0(__global uchar *src, int src_step, int src_offset_x
 
         for(int i = 0; i < ANCHOR; i++)
         {
-#pragma unroll 3
+#pragma unroll
             for(int j = 0; j < ANCHOR; j++)
             {
                 if(dst_rows_index < dst_rows_end)
@@ -295,7 +295,7 @@ __kernel void filter2D_C1_D5(__global float *src, int src_step, int src_offset_x
 
         for(int i = 0; i < ANCHOR; i++)
         {
-#pragma unroll 3
+#pragma unroll
             for(int j = 0; j < ANCHOR; j++)
             {
                 if(dst_rows_index < dst_rows_end)
@@ -410,7 +410,7 @@ __kernel void filter2D_C4_D0(__global uchar4 *src, int src_step, int src_offset_
 
         for(int i = 0; i < ANCHOR; i++)
         {
-#pragma unroll 3
+#pragma unroll
             for(int j = 0; j < ANCHOR; j++)
             {
                 if(dst_rows_index < dst_rows_end)
index 4964000..e659a59 100644 (file)
@@ -120,7 +120,7 @@ __kernel void morph_C1_D0(__global const uchar * restrict src,
     int gidy = get_global_id(1);
     int out_addr = mad24(gidy,dst_step_in_pixel,gidx+dst_offset_in_pixel);
 
-    if(gidx+3<cols && gidy<rows && (dst_offset_in_pixel&3)==0)
+    if(gidx+3<cols && gidy<rows && ((dst_offset_in_pixel&3)==0))
     {
         *(__global uchar4*)&dst[out_addr] = res;
     }
index e0ab860..4873298 100644 (file)
@@ -10,6 +10,7 @@
 //    Wang Weiyan, wangweiyanster@gmail.com
 //    Jia Haipeng, jiahaipeng95@gmail.com
 //    Nathan, liujun@multicorewareinc.com
+//    Peng Xiao, pengxiao@outlook.com
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 typedef int   sumtype;
 typedef float sqsumtype;
 
-typedef struct  __attribute__((aligned (128)))  GpuHidHaarFeature
-{
-    struct __attribute__((aligned (32)))
-{
-    int p0 __attribute__((aligned (4)));
-    int p1 __attribute__((aligned (4)));
-    int p2 __attribute__((aligned (4)));
-    int p3 __attribute__((aligned (4)));
-    float weight __attribute__((aligned (4)));
-}
-rect[CV_HAAR_FEATURE_MAX] __attribute__((aligned (32)));
-}
-GpuHidHaarFeature;
-
+#ifndef STUMP_BASED 
+#define STUMP_BASED 1
+#endif
 
 typedef struct __attribute__((aligned (128) )) GpuHidHaarTreeNode
 {
     int p[CV_HAAR_FEATURE_MAX][4] __attribute__((aligned (64)));
-    float weight[CV_HAAR_FEATURE_MAX] /*__attribute__((aligned (16)))*/;
-    float threshold /*__attribute__((aligned (4)))*/;
-    float alpha[2] __attribute__((aligned (8)));
+    float weight[CV_HAAR_FEATURE_MAX];
+    float threshold;
+    float alpha[3] __attribute__((aligned (16)));
     int left __attribute__((aligned (4)));
     int right __attribute__((aligned (4)));
 }
@@ -111,7 +101,6 @@ typedef struct __attribute__((aligned (64))) GpuHidHaarClassifierCascade
     float inv_window_area __attribute__((aligned (4)));
 } GpuHidHaarClassifierCascade;
 
-
 __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCascade(
     global GpuHidHaarStageClassifier * stagecascadeptr,
     global int4 * info,
@@ -234,7 +223,7 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
                 float stage_sum = 0.f;
                 int2 stageinfo = *(global int2*)(stagecascadeptr+stageloop);
                 float stagethreshold = as_float(stageinfo.y);
-                for(int nodeloop = 0; nodeloop < stageinfo.x; nodeloop++ )
+                for(int nodeloop = 0; nodeloop < stageinfo.x; )
                 {
                     __global GpuHidHaarTreeNode* currentnodeptr = (nodeptr + nodecounter);
 
@@ -242,7 +231,8 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
                     int4 info2 = *(__global int4*)(&(currentnodeptr->p[1][0]));
                     int4 info3 = *(__global int4*)(&(currentnodeptr->p[2][0]));
                     float4 w = *(__global float4*)(&(currentnodeptr->weight[0]));
-                    float2 alpha2 = *(__global float2*)(&(currentnodeptr->alpha[0]));
+                    float3 alpha3 = *(__global float3*)(&(currentnodeptr->alpha[0]));
+
                     float nodethreshold  = w.w * variance_norm_factor;
 
                     info1.x +=lcl_off;
@@ -261,8 +251,34 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
                     classsum += (lcldata[mad24(info3.y,readwidth,info3.x)] - lcldata[mad24(info3.y,readwidth,info3.z)] -
                                     lcldata[mad24(info3.w,readwidth,info3.x)] + lcldata[mad24(info3.w,readwidth,info3.z)]) * w.z;
 
-                    stage_sum += classsum >= nodethreshold ? alpha2.y : alpha2.x;
+                    bool passThres = classsum >= nodethreshold;
+#if STUMP_BASED
+                    stage_sum += passThres ? alpha3.y : alpha3.x;
                     nodecounter++;
+                    nodeloop++;
+#else
+                    bool isRootNode = (nodecounter & 1) == 0;
+                    if(isRootNode)
+                    {
+                        if( (passThres && currentnodeptr->right) ||
+                            (!passThres && currentnodeptr->left))
+                        {
+                            nodecounter ++;
+                        }
+                        else
+                        {
+                            stage_sum += alpha3.x;
+                            nodecounter += 2;
+                            nodeloop ++;
+                        }
+                    }
+                    else
+                    {
+                        stage_sum += passThres ? alpha3.z : alpha3.y;
+                        nodecounter ++;
+                        nodeloop ++;
+                    }
+#endif
                 }
 
                 result = (stage_sum >= stagethreshold);
@@ -301,18 +317,20 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
 
                     if(lcl_compute_win_id < queuecount)
                     {
-
                         int tempnodecounter = lcl_compute_id;
                         float part_sum = 0.f;
-                        for(int lcl_loop=0; lcl_loop<lcl_loops && tempnodecounter<stageinfo.x; lcl_loop++)
+                        const int stump_factor = STUMP_BASED ? 1 : 2;
+                        int root_offset = 0;
+                        for(int lcl_loop=0; lcl_loop<lcl_loops && tempnodecounter<stageinfo.x;)
                         {
-                            __global GpuHidHaarTreeNode* currentnodeptr = (nodeptr + nodecounter + tempnodecounter);
+                            __global GpuHidHaarTreeNode* currentnodeptr = 
+                                nodeptr + (nodecounter + tempnodecounter) * stump_factor + root_offset;
 
                             int4 info1 = *(__global int4*)(&(currentnodeptr->p[0][0]));
                             int4 info2 = *(__global int4*)(&(currentnodeptr->p[1][0]));
                             int4 info3 = *(__global int4*)(&(currentnodeptr->p[2][0]));
                             float4 w = *(__global float4*)(&(currentnodeptr->weight[0]));
-                            float2 alpha2 = *(__global float2*)(&(currentnodeptr->alpha[0]));
+                            float3 alpha3 = *(__global float3*)(&(currentnodeptr->alpha[0]));
                             float nodethreshold  = w.w * variance_norm_factor;
 
                             info1.x +=queue_pixel;
@@ -332,8 +350,34 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
                             classsum += (lcldata[mad24(info3.y,readwidth,info3.x)] - lcldata[mad24(info3.y,readwidth,info3.z)] -
                                             lcldata[mad24(info3.w,readwidth,info3.x)] + lcldata[mad24(info3.w,readwidth,info3.z)]) * w.z;
 
-                            part_sum += classsum >= nodethreshold ? alpha2.y : alpha2.x;
-                            tempnodecounter +=lcl_compute_win;
+                            bool passThres = classsum >= nodethreshold;
+#if STUMP_BASED
+                            part_sum += passThres ? alpha3.y : alpha3.x;
+                            tempnodecounter += lcl_compute_win;
+                            lcl_loop++;
+#else
+                            if(root_offset == 0)
+                            {
+                                if( (passThres && currentnodeptr->right) ||
+                                    (!passThres && currentnodeptr->left))
+                                {
+                                    root_offset = 1;
+                                }
+                                else
+                                {
+                                    part_sum += alpha3.x;
+                                    tempnodecounter += lcl_compute_win;
+                                    lcl_loop++;
+                                }
+                            }
+                            else
+                            {
+                                part_sum += passThres ? alpha3.z : alpha3.y;
+                                tempnodecounter += lcl_compute_win;
+                                lcl_loop++;
+                                root_offset = 0;
+                            }
+#endif
                         }//end for(int lcl_loop=0;lcl_loop<lcl_loops;lcl_loop++)
                         partialsum[lcl_id]=part_sum;
                     }
@@ -379,157 +423,3 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
 }
 
 
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-/*
-if(stagecascade->two_rects)
-{
-    #pragma unroll
-    for( n = 0; n < stagecascade->count; n++ )
-    {
-        t1 = *(node + counter);
-        t = t1.threshold * variance_norm_factor;
-        classsum = calc_sum1(t1,p_offset,0) * t1.weight[0];
-
-        classsum  += calc_sum1(t1, p_offset,1) * t1.weight[1];
-        stage_sum += classsum >= t ? t1.alpha[1]:t1.alpha[0];
-
-        counter++;
-    }
-}
-else
-{
-    #pragma unroll
-    for( n = 0; n < stagecascade->count; n++ )
-    {
-        t = node[counter].threshold*variance_norm_factor;
-        classsum = calc_sum1(node[counter],p_offset,0) * node[counter].weight[0];
-        classsum += calc_sum1(node[counter],p_offset,1) * node[counter].weight[1];
-
-        if( node[counter].p0[2] )
-            classsum += calc_sum1(node[counter],p_offset,2) * node[counter].weight[2];
-
-        stage_sum += classsum >= t ? node[counter].alpha[1]:node[counter].alpha[0];// modify
-
-        counter++;
-    }
-}
-*/
-/*
-__kernel void gpuRunHaarClassifierCascade_ScaleWindow(
-                          constant GpuHidHaarClassifierCascade * _cascade,
-                          global GpuHidHaarStageClassifier * stagecascadeptr,
-                          //global GpuHidHaarClassifier * classifierptr,
-                          global GpuHidHaarTreeNode * nodeptr,
-                          global int * sum,
-                          global float * sqsum,
-                          global int * _candidate,
-                          int pixel_step,
-                          int cols,
-                          int rows,
-                          int start_stage,
-                          int end_stage,
-                          //int counts,
-                          int nodenum,
-                          int ystep,
-                          int detect_width,
-                          //int detect_height,
-                          int loopcount,
-                          int outputstep)
-                          //float scalefactor)
-{
-unsigned int x1 = get_global_id(0);
-unsigned int y1 = get_global_id(1);
-int p_offset;
-int m, n;
-int result;
-int counter;
-float mean, variance_norm_factor;
-for(int i=0;i<loopcount;i++)
-{
-constant GpuHidHaarClassifierCascade * cascade = _cascade + i;
-global int * candidate = _candidate + i*outputstep;
-int window_width = cascade->p1 - cascade->p0;
-int window_height = window_width;
-result = 1;
-counter = 0;
-unsigned int x = mul24(x1,ystep);
-unsigned int y = mul24(y1,ystep);
-if((x < cols - window_width - 1) && (y < rows - window_height -1))
-{
-global GpuHidHaarStageClassifier *stagecascade = stagecascadeptr +cascade->count*i+ start_stage;
-//global GpuHidHaarClassifier      *classifier   = classifierptr;
-global GpuHidHaarTreeNode        *node         = nodeptr + nodenum*i;
-
-p_offset = mad24(y, pixel_step, x);// modify
-
-mean = (*(sum + p_offset + (int)cascade->p0) - *(sum + p_offset + (int)cascade->p1) -
-    *(sum + p_offset + (int)cascade->p2) + *(sum + p_offset + (int)cascade->p3))
-    *cascade->inv_window_area;
-
-variance_norm_factor = *(sqsum + p_offset + cascade->p0) - *(sqsum + cascade->p1 + p_offset) -
-                    *(sqsum + p_offset + cascade->p2) + *(sqsum + cascade->p3 + p_offset);
-variance_norm_factor = variance_norm_factor * cascade->inv_window_area - mean * mean;
-variance_norm_factor = variance_norm_factor >=0.f ? sqrt(variance_norm_factor) : 1;//modify
-
-// if( cascade->is_stump_based )
-//{
-for( m = start_stage; m < end_stage; m++ )
-{
-float stage_sum = 0.f;
-float t,  classsum;
-GpuHidHaarTreeNode t1;
-
-//#pragma unroll
-for( n = 0; n < stagecascade->count; n++ )
-{
-     t1 = *(node + counter);
-     t  = t1.threshold * variance_norm_factor;
-     classsum = calc_sum1(t1, p_offset ,0) * t1.weight[0] + calc_sum1(t1, p_offset ,1) * t1.weight[1];
-
-     if((t1.p0[2]) && (!stagecascade->two_rects))
-         classsum += calc_sum1(t1, p_offset, 2) * t1.weight[2];
-
-     stage_sum += classsum >= t ? t1.alpha[1] : t1.alpha[0];// modify
-     counter++;
-}
-
-if (stage_sum < stagecascade->threshold)
-{
-    result = 0;
-    break;
-}
-
-stagecascade++;
-
-}
-if(result)
-{
-    candidate[4 * (y1 * detect_width + x1)]     = x;
-    candidate[4 * (y1 * detect_width + x1) + 1] = y;
-    candidate[4 * (y1 * detect_width + x1)+2]     = window_width;
-    candidate[4 * (y1 * detect_width + x1) + 3] = window_height;
-}
-//}
-}
-}
-}
-*/
-
-
-
-
index 44877f3..8507972 100644 (file)
@@ -17,7 +17,7 @@
 // @Authors
 //    Wu Xinglong, wxl370@126.com
 //    Sen Liu, swjtuls1987@126.com
-//
+//    Peng Xiao, pengxiao@outlook.com
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 #define CV_HAAR_FEATURE_MAX           3
 typedef int   sumtype;
 typedef float sqsumtype;
-typedef struct  __attribute__((aligned(128)))  GpuHidHaarFeature
-{
-    struct __attribute__((aligned(32)))
-{
-    int p0 __attribute__((aligned(4)));
-    int p1 __attribute__((aligned(4)));
-    int p2 __attribute__((aligned(4)));
-    int p3 __attribute__((aligned(4)));
-    float weight __attribute__((aligned(4)));
-}
-rect[CV_HAAR_FEATURE_MAX] __attribute__((aligned(32)));
-}
-GpuHidHaarFeature;
+
 typedef struct __attribute__((aligned(128))) GpuHidHaarTreeNode
 {
     int p[CV_HAAR_FEATURE_MAX][4] __attribute__((aligned(64)));
     float weight[CV_HAAR_FEATURE_MAX] /*__attribute__((aligned (16)))*/;
     float threshold /*__attribute__((aligned (4)))*/;
-    float alpha[2] __attribute__((aligned(8)));
+    float alpha[3] __attribute__((aligned(16)));
     int left __attribute__((aligned(4)));
     int right __attribute__((aligned(4)));
 }
@@ -174,45 +162,83 @@ __kernel void gpuRunHaarClassifierCascade_scaled2(
                 const int p_offset = mad24(y, step, x);
                 cascadeinfo.x += p_offset;
                 cascadeinfo.z += p_offset;
-                mean = (sum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.x), 0, max_idx)] - sum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.z), 0, max_idx)] -
-                        sum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.x), 0, max_idx)] + sum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.z), 0, max_idx)])
+                mean = (sum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.x), 0, max_idx)]
+                - sum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.z), 0, max_idx)] -
+                        sum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.x), 0, max_idx)]
+                + sum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.z), 0, max_idx)])
                        * correction_t;
-                variance_norm_factor = sqsum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.x), 0, max_idx)] - sqsum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.z), 0, max_idx)] -
-                                       sqsum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.x), 0, max_idx)] + sqsum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.z), 0, max_idx)];
+                variance_norm_factor = sqsum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.x), 0, max_idx)]
+                - sqsum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.z), 0, max_idx)] -
+                                       sqsum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.x), 0, max_idx)]
+                + sqsum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.z), 0, max_idx)];
                 variance_norm_factor = variance_norm_factor * correction_t - mean * mean;
                 variance_norm_factor = variance_norm_factor >= 0.f ? sqrt(variance_norm_factor) : 1.f;
                 bool result = true;
                 nodecounter = startnode + nodecount * scalei;
-
                 for (int stageloop = start_stage; (stageloop < end_stage) && result; stageloop++)
                 {
                     float stage_sum = 0.f;
                     int   stagecount = stagecascadeptr[stageloop].count;
-                    for (int nodeloop = 0; nodeloop < stagecount; nodeloop++)
+                    for (int nodeloop = 0; nodeloop < stagecount;)
                     {
                         __global GpuHidHaarTreeNode *currentnodeptr = (nodeptr + nodecounter);
                         int4 info1 = *(__global int4 *)(&(currentnodeptr->p[0][0]));
                         int4 info2 = *(__global int4 *)(&(currentnodeptr->p[1][0]));
                         int4 info3 = *(__global int4 *)(&(currentnodeptr->p[2][0]));
                         float4 w = *(__global float4 *)(&(currentnodeptr->weight[0]));
-                        float2 alpha2 = *(__global float2 *)(&(currentnodeptr->alpha[0]));
+                        float3 alpha3 = *(__global float3 *)(&(currentnodeptr->alpha[0]));
                         float nodethreshold  = w.w * variance_norm_factor;
+
                         info1.x += p_offset;
                         info1.z += p_offset;
                         info2.x += p_offset;
                         info2.z += p_offset;
-                        float classsum = (sum[clamp(mad24(info1.y, step, info1.x), 0, max_idx)] - sum[clamp(mad24(info1.y, step, info1.z), 0, max_idx)] -
-                                          sum[clamp(mad24(info1.w, step, info1.x), 0, max_idx)] + sum[clamp(mad24(info1.w, step, info1.z), 0, max_idx)]) * w.x;
-                        classsum += (sum[clamp(mad24(info2.y, step, info2.x), 0, max_idx)] - sum[clamp(mad24(info2.y, step, info2.z), 0, max_idx)] -
-                                     sum[clamp(mad24(info2.w, step, info2.x), 0, max_idx)] + sum[clamp(mad24(info2.w, step, info2.z), 0, max_idx)]) * w.y;
                         info3.x += p_offset;
                         info3.z += p_offset;
-                        classsum += (sum[clamp(mad24(info3.y, step, info3.x), 0, max_idx)] - sum[clamp(mad24(info3.y, step, info3.z), 0, max_idx)] -
-                                     sum[clamp(mad24(info3.w, step, info3.x), 0, max_idx)] + sum[clamp(mad24(info3.w, step, info3.z), 0, max_idx)]) * w.z;
-                        stage_sum += classsum >= nodethreshold ? alpha2.y : alpha2.x;
+                        float classsum = (sum[clamp(mad24(info1.y, step, info1.x), 0, max_idx)]
+                        - sum[clamp(mad24(info1.y, step, info1.z), 0, max_idx)] -
+                                          sum[clamp(mad24(info1.w, step, info1.x), 0, max_idx)]
+                        + sum[clamp(mad24(info1.w, step, info1.z), 0, max_idx)]) * w.x;
+                        classsum += (sum[clamp(mad24(info2.y, step, info2.x), 0, max_idx)]
+                        - sum[clamp(mad24(info2.y, step, info2.z), 0, max_idx)] -
+                                     sum[clamp(mad24(info2.w, step, info2.x), 0, max_idx)]
+                        + sum[clamp(mad24(info2.w, step, info2.z), 0, max_idx)]) * w.y;
+                        classsum += (sum[clamp(mad24(info3.y, step, info3.x), 0, max_idx)]
+                        - sum[clamp(mad24(info3.y, step, info3.z), 0, max_idx)] -
+                                     sum[clamp(mad24(info3.w, step, info3.x), 0, max_idx)]
+                        + sum[clamp(mad24(info3.w, step, info3.z), 0, max_idx)]) * w.z;
+                        
+                        bool passThres = classsum >= nodethreshold;
+
+#if STUMP_BASED
+                        stage_sum += passThres ? alpha3.y : alpha3.x;
                         nodecounter++;
+                        nodeloop++;
+#else
+                        bool isRootNode = (nodecounter & 1) == 0;
+                        if(isRootNode)
+                        {
+                            if( (passThres && currentnodeptr->right) ||
+                                (!passThres && currentnodeptr->left))
+                            {
+                                nodecounter ++;
+                            }
+                            else
+                            {
+                                stage_sum += alpha3.x;
+                                nodecounter += 2;
+                                nodeloop ++;
+                            }
+                        }
+                        else
+                        {
+                            stage_sum += (passThres ? alpha3.z : alpha3.y);
+                            nodecounter ++;
+                            nodeloop ++;
+                        }
+#endif
                     }
-                    result = (bool)(stage_sum >= stagecascadeptr[stageloop].threshold);
+                    result = (int)(stage_sum >= stagecascadeptr[stageloop].threshold);
                 }
 
                 barrier(CLK_LOCAL_MEM_FENCE);
@@ -222,7 +248,6 @@ __kernel void gpuRunHaarClassifierCascade_scaled2(
                     int queueindex = atomic_inc(lclcount);
                     lcloutindex[queueindex] = (y << 16) | x;
                 }
-
                 barrier(CLK_LOCAL_MEM_FENCE);
                 int queuecount = lclcount[0];
 
@@ -277,5 +302,6 @@ __kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuH
     newnode[counter].threshold = t1.threshold;
     newnode[counter].alpha[0] = t1.alpha[0];
     newnode[counter].alpha[1] = t1.alpha[1];
+    newnode[counter].alpha[2] = t1.alpha[2];
 }
 
index 15742d6..1911a72 100644 (file)
@@ -130,28 +130,29 @@ __kernel void calcHarris(__global const float *Dx,__global const float *Dy, __gl
         data[2][i] = dy_data[i] * dy_data[i];
     }
 #else
-   for(int i=0; i < ksY+1; i++)
-   {
+    int clamped_col = min(dst_cols, col);
+    for(int i=0; i < ksY+1; i++)
+    {
         int dx_selected_row;
         int dx_selected_col;
         dx_selected_row = ADDR_H(dx_startY+i, 0, dx_whole_rows);
         dx_selected_row = ADDR_B(dx_startY+i, dx_whole_rows, dx_selected_row);
-        dx_selected_col = ADDR_L(dx_startX+col, 0, dx_whole_cols);
-        dx_selected_col = ADDR_R(dx_startX+col, dx_whole_cols, dx_selected_col);
+        dx_selected_col = ADDR_L(dx_startX+clamped_col, 0, dx_whole_cols);
+        dx_selected_col = ADDR_R(dx_startX+clamped_col, dx_whole_cols, dx_selected_col);
         dx_data[i] = Dx[dx_selected_row * (dx_step>>2) + dx_selected_col];
 
         int dy_selected_row;
         int dy_selected_col;
         dy_selected_row = ADDR_H(dy_startY+i, 0, dy_whole_rows);
         dy_selected_row = ADDR_B(dy_startY+i, dy_whole_rows, dy_selected_row);
-        dy_selected_col = ADDR_L(dy_startX+col, 0, dy_whole_cols);
-        dy_selected_col = ADDR_R(dy_startX+col, dy_whole_cols, dy_selected_col);
+        dy_selected_col = ADDR_L(dy_startX+clamped_col, 0, dy_whole_cols);
+        dy_selected_col = ADDR_R(dy_startX+clamped_col, dy_whole_cols, dy_selected_col);
         dy_data[i] = Dy[dy_selected_row * (dy_step>>2) + dy_selected_col];
 
         data[0][i] = dx_data[i] * dx_data[i];
         data[1][i] = dx_data[i] * dy_data[i];
         data[2][i] = dy_data[i] * dy_data[i];
-   }
+    }
 #endif
     float sum0 = 0.0, sum1 = 0.0, sum2 = 0.0;
     for(int i=1; i < ksY; i++)
index 662fbb0..462ec77 100644 (file)
@@ -130,28 +130,30 @@ __kernel void calcMinEigenVal(__global const float *Dx,__global const float *Dy,
         data[2][i] = dy_data[i] * dy_data[i];
     }
 #else
-   for(int i=0; i < ksY+1; i++)
-   {
+    int clamped_col = min(dst_cols, col);
+
+    for(int i=0; i < ksY+1; i++)
+    {
         int dx_selected_row;
         int dx_selected_col;
         dx_selected_row = ADDR_H(dx_startY+i, 0, dx_whole_rows);
         dx_selected_row = ADDR_B(dx_startY+i, dx_whole_rows, dx_selected_row);
-        dx_selected_col = ADDR_L(dx_startX+col, 0, dx_whole_cols);
-        dx_selected_col = ADDR_R(dx_startX+col, dx_whole_cols, dx_selected_col);
+        dx_selected_col = ADDR_L(dx_startX+clamped_col, 0, dx_whole_cols);
+        dx_selected_col = ADDR_R(dx_startX+clamped_col, dx_whole_cols, dx_selected_col);
         dx_data[i] = Dx[dx_selected_row * (dx_step>>2) + dx_selected_col];
 
         int dy_selected_row;
         int dy_selected_col;
         dy_selected_row = ADDR_H(dy_startY+i, 0, dy_whole_rows);
         dy_selected_row = ADDR_B(dy_startY+i, dy_whole_rows, dy_selected_row);
-        dy_selected_col = ADDR_L(dy_startX+col, 0, dy_whole_cols);
-        dy_selected_col = ADDR_R(dy_startX+col, dy_whole_cols, dy_selected_col);
+        dy_selected_col = ADDR_L(dy_startX+clamped_col, 0, dy_whole_cols);
+        dy_selected_col = ADDR_R(dy_startX+clamped_col, dy_whole_cols, dy_selected_col);
         dy_data[i] = Dy[dy_selected_row * (dy_step>>2) + dy_selected_col];
 
         data[0][i] = dx_data[i] * dx_data[i];
         data[1][i] = dx_data[i] * dy_data[i];
         data[2][i] = dy_data[i] * dy_data[i];
-   }
+    }
 #endif
     float sum0 = 0.0, sum1 = 0.0, sum2 = 0.0;
     for(int i=1; i < ksY; i++)
index ceaaed1..5402759 100644 (file)
@@ -297,6 +297,9 @@ calcMap
     map_step   /= sizeof(*map);
     map_offset /= sizeof(*map);
 
+    mag += mag_offset;
+    map += map_offset;
+
     __local float smem[18][18];
 
     int gidx = get_global_id(0);
@@ -389,7 +392,7 @@ edgesHysteresisLocal
 (
     __global int * map,
     __global ushort2 * st,
-    volatile __global unsigned int * counter,
+    __global unsigned int * counter,
     int rows,
     int cols,
     int map_step,
@@ -399,6 +402,8 @@ edgesHysteresisLocal
     map_step   /= sizeof(*map);
     map_offset /= sizeof(*map);
 
+    map += map_offset;
+
     __local int smem[18][18];
 
     int gidx = get_global_id(0);
@@ -416,12 +421,12 @@ edgesHysteresisLocal
     if(ly < 14)
     {
         smem[ly][lx] =
-            map[grp_idx + lx + min(grp_idy + ly, rows - 1) * map_step + map_offset];
+            map[grp_idx + lx + min(grp_idy + ly, rows - 1) * map_step];
     }
     if(ly < 4 && grp_idy + ly + 14 <= rows && grp_idx + lx <= cols)
     {
         smem[ly + 14][lx] =
-            map[grp_idx + lx + min(grp_idy + ly + 14, rows - 1) * map_step + map_offset];
+            map[grp_idx + lx + min(grp_idy + ly + 14, rows - 1) * map_step];
     }
 
     barrier(CLK_LOCAL_MEM_FENCE);
@@ -482,14 +487,17 @@ edgesHysteresisLocal
 __constant int c_dx[8] = {-1,  0,  1, -1, 1, -1, 0, 1};
 __constant int c_dy[8] = {-1, -1, -1,  0, 0,  1, 1, 1};
 
+
 #define stack_size 512
 __kernel
-void edgesHysteresisGlobal
+void
+__attribute__((reqd_work_group_size(128,1,1)))
+edgesHysteresisGlobal
 (
     __global int * map,
     __global ushort2 * st1,
     __global ushort2 * st2,
-    volatile __global int * counter,
+    __global int * counter,
     int rows,
     int cols,
     int count,
@@ -501,6 +509,8 @@ void edgesHysteresisGlobal
     map_step   /= sizeof(*map);
     map_offset /= sizeof(*map);
 
+    map += map_offset;
+
     int gidx = get_global_id(0);
     int gidy = get_global_id(1);
 
@@ -510,7 +520,7 @@ void edgesHysteresisGlobal
     int grp_idx = get_group_id(0);
     int grp_idy = get_group_id(1);
 
-    volatile __local unsigned int s_counter;
+    __local unsigned int s_counter;
     __local unsigned int s_ind;
 
     __local ushort2 s_st[stack_size];
@@ -564,9 +574,9 @@ void edgesHysteresisGlobal
                     pos.x += c_dx[lidx & 7];
                     pos.y += c_dy[lidx & 7];
 
-                    if (map[pos.x + map_offset + pos.y * map_step] == 1)
+                    if (map[pos.x + pos.y * map_step] == 1)
                     {
-                        map[pos.x + map_offset + pos.y * map_step] = 2;
+                        map[pos.x + pos.y * map_step] = 2;
 
                         ind = atomic_inc(&s_counter);
 
@@ -621,6 +631,6 @@ void getEdges
 
     if(gidy < rows && gidx < cols)
     {
-        dst[gidx + gidy * dst_step] = (uchar)(-(map[gidx + 1 + (gidy + 1) * map_step] >> 1));
+        dst[gidx + gidy * dst_step] = (uchar)(-(map[gidx + 1 + (gidy + 1) * map_step + map_offset] >> 1));
     }
 }
diff --git a/modules/ocl/src/opencl/imgproc_clahe.cl b/modules/ocl/src/opencl/imgproc_clahe.cl
new file mode 100644 (file)
index 0000000..0d010f7
--- /dev/null
@@ -0,0 +1,275 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Sen Liu, swjtuls1987@126.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef WAVE_SIZE
+#define WAVE_SIZE 1
+#endif
+
+int calc_lut(__local int* smem, int val, int tid)
+{
+    smem[tid] = val;
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (tid == 0)
+    {
+        for (int i = 1; i < 256; ++i)
+        {
+            smem[i] += smem[i - 1];
+        }
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    return smem[tid];
+}
+
+#ifdef CPU
+void reduce(volatile __local int* smem, int val, int tid)
+{
+    smem[tid] = val;
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (tid < 128)
+    { 
+        smem[tid] = val += smem[tid + 128];
+    } 
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (tid < 64)
+    { 
+        smem[tid] = val += smem[tid + 64];
+    } 
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (tid < 32)
+    {
+        smem[tid] += smem[tid + 32];
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (tid < 16)
+    {
+        smem[tid] += smem[tid + 16];
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (tid < 8)
+    {
+        smem[tid] += smem[tid + 8];
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (tid < 4)
+    {
+        smem[tid] += smem[tid + 4];
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (tid < 2)
+    {
+        smem[tid] += smem[tid + 2];
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (tid < 1)
+    {
+        smem[256] = smem[tid] + smem[tid + 1];
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+}
+#else
+void reduce(__local volatile int* smem, int val, int tid)
+{
+    smem[tid] = val;
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (tid < 128)
+    { 
+        smem[tid] = val += smem[tid + 128];
+    } 
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (tid < 64)
+    { 
+        smem[tid] = val += smem[tid + 64];
+    } 
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (tid < 32)
+    {
+        smem[tid] += smem[tid + 32];
+#if WAVE_SIZE < 32
+    } barrier(CLK_LOCAL_MEM_FENCE);
+    if (tid < 16) {
+#endif
+        smem[tid] += smem[tid + 16];
+#if WAVE_SIZE < 16
+    } barrier(CLK_LOCAL_MEM_FENCE);
+    if (tid < 8) {
+#endif
+        smem[tid] += smem[tid + 8];
+        smem[tid] += smem[tid + 4];
+        smem[tid] += smem[tid + 2];
+        smem[tid] += smem[tid + 1];
+    }
+}
+#endif
+
+__kernel void calcLut(__global __const uchar * src, __global uchar * lut,
+                      const int srcStep, const int dstStep,
+                      const int2 tileSize, const int tilesX,
+                      const int clipLimit, const float lutScale)
+{
+    __local int smem[512];
+
+    const int tx = get_group_id(0);
+    const int ty = get_group_id(1);
+    const unsigned int tid = get_local_id(1) * get_local_size(0)
+                             + get_local_id(0);
+
+    smem[tid] = 0;
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    for (int i = get_local_id(1); i < tileSize.y; i += get_local_size(1))
+    {
+        __global const uchar* srcPtr = src + mad24( ty * tileSize.y + i,
+                                                    srcStep, tx * tileSize.x );
+        for (int j = get_local_id(0); j < tileSize.x; j += get_local_size(0))
+        {
+            const int data = srcPtr[j];
+            atomic_inc(&smem[data]);
+        }
+    }
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    int tHistVal = smem[tid];
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (clipLimit > 0)
+    {
+        // clip histogram bar
+
+        int clipped = 0;
+        if (tHistVal > clipLimit)
+        {
+            clipped = tHistVal - clipLimit;
+            tHistVal = clipLimit;
+        }
+
+        // find number of overall clipped samples
+
+        reduce(smem, clipped, tid);
+        barrier(CLK_LOCAL_MEM_FENCE);
+#ifdef CPU
+        clipped = smem[256];
+#else
+        clipped = smem[0];
+#endif
+
+        // broadcast evaluated value
+
+        __local int totalClipped;
+
+        if (tid == 0)
+            totalClipped = clipped;
+        barrier(CLK_LOCAL_MEM_FENCE);
+
+        // redistribute clipped samples evenly
+
+        int redistBatch = totalClipped / 256;
+        tHistVal += redistBatch;
+
+        int residual = totalClipped - redistBatch * 256;
+        if (tid < residual)
+            ++tHistVal;
+    }
+
+    const int lutVal = calc_lut(smem, tHistVal, tid);
+    uint ires = (uint)convert_int_rte(lutScale * lutVal);
+    lut[(ty * tilesX + tx) * dstStep + tid] =
+        convert_uchar(clamp(ires, (uint)0, (uint)255));
+}
+
+__kernel void transform(__global __const uchar * src,
+                        __global uchar * dst,
+                        __global uchar * lut,
+                        const int srcStep, const int dstStep, const int lutStep,
+                        const int cols, const int rows,
+                        const int2 tileSize,
+                        const int tilesX, const int tilesY)
+{
+    const int x = get_global_id(0);
+    const int y = get_global_id(1);
+
+    if (x >= cols || y >= rows)
+        return;
+
+    const float tyf = (convert_float(y) / tileSize.y) - 0.5f;
+    int ty1 = convert_int_rtn(tyf);
+    int ty2 = ty1 + 1;
+    const float ya = tyf - ty1;
+    ty1 = max(ty1, 0);
+    ty2 = min(ty2, tilesY - 1);
+
+    const float txf = (convert_float(x) / tileSize.x) - 0.5f;
+    int tx1 = convert_int_rtn(txf);
+    int tx2 = tx1 + 1;
+    const float xa = txf - tx1;
+    tx1 = max(tx1, 0);
+    tx2 = min(tx2, tilesX - 1);
+
+    const int srcVal = src[mad24(y, srcStep, x)];
+
+    float res = 0;
+
+    res += lut[mad24(ty1 * tilesX + tx1, lutStep, srcVal)] * ((1.0f - xa) * (1.0f - ya));
+    res += lut[mad24(ty1 * tilesX + tx2, lutStep, srcVal)] * ((xa) * (1.0f - ya));
+    res += lut[mad24(ty2 * tilesX + tx1, lutStep, srcVal)] * ((1.0f - xa) * (ya));
+    res += lut[mad24(ty2 * tilesX + tx2, lutStep, srcVal)] * ((xa) * (ya));
+
+    uint ires = (uint)convert_int_rte(res);
+    dst[mad24(y, dstStep, x)] = convert_uchar(clamp(ires, (uint)0, (uint)255));
+}
diff --git a/modules/ocl/src/opencl/imgproc_gfft.cl b/modules/ocl/src/opencl/imgproc_gfft.cl
new file mode 100644 (file)
index 0000000..5fa27ff
--- /dev/null
@@ -0,0 +1,276 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Peng Xiao, pengxiao@outlook.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef WITH_MASK
+#define WITH_MASK 0
+#endif
+
+__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
+
+inline float ELEM_INT2(image2d_t _eig, int _x, int _y) 
+{
+    return read_imagef(_eig, sampler, (int2)(_x, _y)).x;
+}
+
+inline float ELEM_FLT2(image2d_t _eig, float2 pt) 
+{
+    return read_imagef(_eig, sampler, pt).x;
+}
+
+__kernel
+    void findCorners
+    (
+        image2d_t eig,
+        __global const char * mask,
+        __global float2 * corners,
+        const int mask_strip,// in pixels
+        const float threshold,
+        const int rows,
+        const int cols,
+        const int max_count,
+        __global int * g_counter
+    )
+{
+    const int j = get_global_id(0);
+    const int i = get_global_id(1);
+
+    if (i > 0 && i < rows - 1 && j > 0 && j < cols - 1
+#if WITH_MASK
+        && mask[i * mask_strip + j] != 0
+#endif
+        )
+    {
+        const float val = ELEM_INT2(eig, j, i);
+
+        if (val > threshold)
+        {
+            float maxVal = val;
+
+            maxVal = fmax(ELEM_INT2(eig, j - 1, i - 1), maxVal);
+            maxVal = fmax(ELEM_INT2(eig, j    , i - 1), maxVal);
+            maxVal = fmax(ELEM_INT2(eig, j + 1, i - 1), maxVal);
+
+            maxVal = fmax(ELEM_INT2(eig, j - 1, i), maxVal);
+            maxVal = fmax(ELEM_INT2(eig, j + 1, i), maxVal);
+
+            maxVal = fmax(ELEM_INT2(eig, j - 1, i + 1), maxVal);
+            maxVal = fmax(ELEM_INT2(eig, j    , i + 1), maxVal);
+            maxVal = fmax(ELEM_INT2(eig, j + 1, i + 1), maxVal);
+
+            if (val == maxVal)
+            {
+                const int ind = atomic_inc(g_counter);
+
+                if (ind < max_count)
+                    corners[ind] = (float2)(j, i);
+            }
+        }
+    }
+}
+
+//bitonic sort
+__kernel
+    void sortCorners_bitonicSort
+    (
+        image2d_t eig,
+        __global float2 * corners,
+        const int count,
+        const int stage,
+        const int passOfStage
+    )
+{
+    const int threadId = get_global_id(0);
+    if(threadId >= count / 2)
+    {
+        return;
+    }
+
+    const int sortOrder = (((threadId/(1 << stage)) % 2)) == 1 ? 1 : 0; // 0 is descent
+
+    const int pairDistance = 1 << (stage - passOfStage);
+    const int blockWidth   = 2 * pairDistance;
+
+    const int leftId = min( (threadId % pairDistance) 
+                   + (threadId / pairDistance) * blockWidth, count );
+
+    const int rightId = min( leftId + pairDistance, count );
+
+    const float2 leftPt  = corners[leftId];
+    const float2 rightPt = corners[rightId];
+
+    const float leftVal  = ELEM_FLT2(eig, leftPt);
+    const float rightVal = ELEM_FLT2(eig, rightPt);
+
+    const bool compareResult = leftVal > rightVal;
+
+    float2 greater = compareResult ? leftPt:rightPt;
+    float2 lesser  = compareResult ? rightPt:leftPt;
+    
+    corners[leftId]  = sortOrder ? lesser : greater;
+    corners[rightId] = sortOrder ? greater : lesser;
+}
+
+//selection sort for gfft
+//kernel is ported from Bolt library:
+//https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/sort_kernels.cl
+//  Local sort will firstly sort elements of each workgroup using selection sort
+//  its performance is O(n)
+__kernel
+    void sortCorners_selectionSortLocal
+    (
+        image2d_t eig,
+        __global float2 * corners,
+        const int count,
+        __local float2 * scratch
+    )
+{
+    int          i  = get_local_id(0); // index in workgroup
+    int numOfGroups = get_num_groups(0); // index in workgroup
+    int groupID     = get_group_id(0);
+    int         wg  = get_local_size(0); // workgroup size = block size
+    int n; // number of elements to be processed for this work group
+
+    int offset   = groupID * wg;
+    int same     = 0;
+    corners      += offset;
+    n = (groupID == (numOfGroups-1))? (count - wg*(numOfGroups-1)) : wg;
+    float2 pt1, pt2;
+
+    pt1 = corners[min(i, n)];
+    scratch[i] = pt1;
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if(i >= n)
+    {
+        return;
+    }
+
+    float val1 = ELEM_FLT2(eig, pt1);
+    float val2;
+
+    int pos = 0;
+    for (int j=0;j<n;++j)
+    {
+        pt2  = scratch[j];
+        val2 = ELEM_FLT2(eig, pt2);
+        if(val2 > val1) 
+            pos++;//calculate the rank of this element in this work group
+        else 
+        {
+            if(val1 > val2)
+                continue;
+            else 
+            {
+                // val1 and val2 are same
+                same++;
+            }
+        }
+    }
+    for (int j=0; j< same; j++)      
+        corners[pos + j] = pt1;
+}
+__kernel
+    void sortCorners_selectionSortFinal
+    (
+        image2d_t eig,
+        __global float2 * corners,
+        const int count
+    )
+{
+    const int          i  = get_local_id(0); // index in workgroup
+    const int numOfGroups = get_num_groups(0); // index in workgroup
+    const int groupID     = get_group_id(0);
+    const int         wg  = get_local_size(0); // workgroup size = block size
+    int pos = 0, same = 0;
+    const int offset = get_group_id(0) * wg;
+    const int remainder = count - wg*(numOfGroups-1);
+
+    if((offset + i ) >= count)
+        return;
+    float2 pt1, pt2;
+    pt1 = corners[groupID*wg + i];
+
+    float val1 = ELEM_FLT2(eig, pt1);
+    float val2;
+
+    for(int j=0; j<numOfGroups-1; j++ )
+    {
+        for(int k=0; k<wg; k++)
+        {
+            pt2  = corners[j*wg + k];
+            val2 = ELEM_FLT2(eig, pt2); 
+            if(val1 > val2)
+                break;
+            else
+            {
+                //Increment only if the value is not the same. 
+                if( val2 > val1 )
+                    pos++;
+                else 
+                    same++;
+            }
+        }
+    }
+
+    for(int k=0; k<remainder; k++)
+    {
+        pt2  = corners[(numOfGroups-1)*wg + k];
+        val2 = ELEM_FLT2(eig, pt2); 
+        if(val1 > val2)
+            break;
+        else
+        {
+            //Don't increment if the value is the same. 
+            //Two elements are same if (*userComp)(jData, iData)  and (*userComp)(iData, jData) are both false
+            if(val2 > val1)
+                pos++;
+            else 
+                same++;
+        }
+    }  
+    for (int j=0; j< same; j++)      
+        corners[pos + j] = pt1;
+}
+
index 8ad501f..9162abb 100644 (file)
@@ -143,7 +143,7 @@ __kernel void threshold_C1_D5(__global const float * restrict src, __global floa
         int4 dpos = (int4)(dstart, dstart+1, dstart+2, dstart+3);
         float4 dVal = *(__global float4*)(dst+dst_offset+gy*dst_step+dstart);
         int4 con = dpos >= 0 && dpos < dst_cols;
-        ddata = convert_float4(con) != 0 ? ddata : dVal;
+        ddata = convert_float4(con) != (float4)(0) ? ddata : dVal;
         if(dstart < dst_cols)
         {
             *(__global float4*)(dst+dst_offset+gy*dst_step+dstart) = ddata;
index 0b7f0c9..4afa7b7 100644 (file)
@@ -18,6 +18,7 @@
 //    Zhang Chunpeng   chunpeng@multicorewareinc.com
 //    Dachuan Zhao, dachuan@multicorewareinc.com
 //    Yao Wang, yao@multicorewareinc.com
+//    Peng Xiao, pengxiao@outlook.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -47,7 +48,7 @@
 
 //#pragma OPENCL EXTENSION cl_amd_printf : enable
 
-uchar get_valid_uchar(uchar data)
+uchar get_valid_uchar(float data)
 {
     return (uchar)(data <= 255 ? data : data > 0 ? 255 : 0);
 }
@@ -142,7 +143,7 @@ __kernel void pyrUp_C1_D0(__global uchar* src,__global uchar* dst,
     sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][tidx];
 
     if ((x < dstCols) && (y < dstRows))
-        dst[x + y * dstStep] = (float)(4.0f * sum);
+        dst[x + y * dstStep] = convert_uchar_sat_rte(4.0f * sum);
 
 }
 
@@ -244,7 +245,7 @@ __kernel void pyrUp_C1_D2(__global ushort* src,__global ushort* dst,
     sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][get_local_id(0)];
 
     if ((x < dstCols) && (y < dstRows))
-        dst[x + y * dstStep] = (float)(4.0f * sum);
+        dst[x + y * dstStep] = convert_short_sat_rte(4.0f * sum);
 
 }
 
@@ -351,31 +352,6 @@ __kernel void pyrUp_C1_D5(__global float* src,__global float* dst,
 ///////////////////////////////////////////////////////////////////////
 //////////////////////////  CV_8UC4  //////////////////////////////////
 ///////////////////////////////////////////////////////////////////////
-float4 covert_uchar4_to_float4(uchar4 data)
-{
-    float4 f4Data = {0,0,0,0};
-
-    f4Data.x = (float)data.x;
-    f4Data.y = (float)data.y;
-    f4Data.z = (float)data.z;
-    f4Data.w = (float)data.w;
-
-    return f4Data;
-}
-
-
-uchar4 convert_float4_to_uchar4(float4 data)
-{
-    uchar4 u4Data;
-
-    u4Data.x = get_valid_uchar(data.x);
-    u4Data.y = get_valid_uchar(data.y);
-    u4Data.z = get_valid_uchar(data.z);
-    u4Data.w = get_valid_uchar(data.w);
-
-    return u4Data;
-}
-
 __kernel void pyrUp_C4_D0(__global uchar4* src,__global uchar4* dst,
                           int srcRows,int dstRows,int srcCols,int dstCols,
                           int srcOffset,int dstOffset,int srcStep,int dstStep)
@@ -406,15 +382,15 @@ __kernel void pyrUp_C4_D0(__global uchar4* src,__global uchar4* dst,
         srcy = abs(srcy);
         srcy = min(srcRows -1 ,srcy);
 
-        s_srcPatch[tidy][tidx] = covert_uchar4_to_float4(src[srcx + srcy * srcStep]);
+        s_srcPatch[tidy][tidx] = convert_float4(src[srcx + srcy * srcStep]);
     }
 
     barrier(CLK_LOCAL_MEM_FENCE);
 
     float4 sum = (float4)(0,0,0,0);
 
-    const int evenFlag = (int)((tidx & 1) == 0);
-    const int oddFlag = (int)((tidx & 1) != 0);
+    const float4 evenFlag = (float4)((tidx & 1) == 0);
+    const float4 oddFlag = (float4)((tidx & 1) != 0);
     const bool  eveny = ((tidy & 1) == 0);
 
     float4 co1 = (float4)(0.375f, 0.375f, 0.375f, 0.375f);
@@ -476,38 +452,13 @@ __kernel void pyrUp_C4_D0(__global uchar4* src,__global uchar4* dst,
 
     if ((x < dstCols) && (y < dstRows))
     {
-        dst[x + y * dstStep] = convert_float4_to_uchar4(4.0f * sum);
+        dst[x + y * dstStep] = convert_uchar4_sat_rte(4.0f * sum);
     }
 }
+
 ///////////////////////////////////////////////////////////////////////
 //////////////////////////  CV_16UC4 //////////////////////////////////
 ///////////////////////////////////////////////////////////////////////
-float4 covert_ushort4_to_float4(ushort4 data)
-{
-    float4 f4Data = {0,0,0,0};
-
-    f4Data.x = (float)data.x;
-    f4Data.y = (float)data.y;
-    f4Data.z = (float)data.z;
-    f4Data.w = (float)data.w;
-
-    return f4Data;
-}
-
-
-ushort4 convert_float4_to_ushort4(float4 data)
-{
-    ushort4 u4Data;
-
-    u4Data.x = (float)data.x;
-    u4Data.y = (float)data.y;
-    u4Data.z = (float)data.z;
-    u4Data.w = (float)data.w;
-
-    return u4Data;
-}
-
-
 __kernel void pyrUp_C4_D2(__global ushort4* src,__global ushort4* dst,
                           int srcRows,int dstRows,int srcCols,int dstCols,
                           int srcOffset,int dstOffset,int srcStep,int dstStep)
@@ -535,15 +486,15 @@ __kernel void pyrUp_C4_D2(__global ushort4* src,__global ushort4* dst,
         srcy = abs(srcy);
         srcy = min(srcRows -1 ,srcy);
 
-        s_srcPatch[get_local_id(1)][get_local_id(0)] = covert_ushort4_to_float4(src[srcx + srcy * srcStep]);
+        s_srcPatch[get_local_id(1)][get_local_id(0)] = convert_float4(src[srcx + srcy * srcStep]);
     }
 
     barrier(CLK_LOCAL_MEM_FENCE);
 
     float4 sum = (float4)(0,0,0,0);
 
-    const int evenFlag = (int)((get_local_id(0) & 1) == 0);
-    const int oddFlag = (int)((get_local_id(0) & 1) != 0);
+    const float4 evenFlag = (float4)((get_local_id(0) & 1) == 0);
+    const float4 oddFlag = (float4)((get_local_id(0) & 1) != 0);
     const bool  eveny = ((get_local_id(1) & 1) == 0);
     const int tidx = get_local_id(0);
 
@@ -570,11 +521,11 @@ __kernel void pyrUp_C4_D2(__global ushort4* src,__global ushort4* dst,
 
         if (eveny)
         {
-            sum = sum + (evenFlag * co3) * s_srcPatch[0][1 + ((tidx - 2) >> 1)];
-            sum = sum + ( oddFlag * co2  ) * s_srcPatch[0][1 + ((tidx - 1) >> 1)];
+            sum = sum + (evenFlag * co3 ) * s_srcPatch[0][1 + ((tidx - 2) >> 1)];
+            sum = sum + (oddFlag * co2  ) * s_srcPatch[0][1 + ((tidx - 1) >> 1)];
             sum = sum + (evenFlag * co1 ) * s_srcPatch[0][1 + ((tidx    ) >> 1)];
-            sum = sum + ( oddFlag * co2  ) * s_srcPatch[0][1 + ((tidx + 1) >> 1)];
-            sum = sum + (evenFlag * co3) * s_srcPatch[0][1 + ((tidx + 2) >> 1)];
+            sum = sum + (oddFlag * co2  ) * s_srcPatch[0][1 + ((tidx + 1) >> 1)];
+            sum = sum + (evenFlag * co3 ) * s_srcPatch[0][1 + ((tidx + 2) >> 1)];
         }
 
         s_dstPatch[get_local_id(1)][get_local_id(0)] = sum;
@@ -610,7 +561,7 @@ __kernel void pyrUp_C4_D2(__global ushort4* src,__global ushort4* dst,
 
     if ((x < dstCols) && (y < dstRows))
     {
-        dst[x + y * dstStep] = convert_float4_to_ushort4(4.0f * sum);
+        dst[x + y * dstStep] = convert_ushort4_sat_rte(4.0f * sum);
     }
 }
 
@@ -654,8 +605,8 @@ __kernel void pyrUp_C4_D5(__global float4* src,__global float4* dst,
 
     float4 sum = (float4)(0,0,0,0);
 
-    const int evenFlag = (int)((tidx & 1) == 0);
-    const int oddFlag = (int)((tidx & 1) != 0);
+    const float4 evenFlag = (float4)((tidx & 1) == 0);
+    const float4 oddFlag = (float4)((tidx & 1) != 0);
     const bool  eveny = ((tidy & 1) == 0);
 
     float4 co1 = (float4)(0.375f, 0.375f, 0.375f, 0.375f);
@@ -681,11 +632,11 @@ __kernel void pyrUp_C4_D5(__global float4* src,__global float4* dst,
 
         if (eveny)
         {
-            sum = sum + (evenFlag * co3) * s_srcPatch[lsizey-16][1 + ((tidx - 2) >> 1)];
-            sum = sum + ( oddFlag * co2  ) * s_srcPatch[lsizey-16][1 + ((tidx - 1) >> 1)];
+            sum = sum + (evenFlag * co3 ) * s_srcPatch[lsizey-16][1 + ((tidx - 2) >> 1)];
+            sum = sum + (oddFlag * co2  ) * s_srcPatch[lsizey-16][1 + ((tidx - 1) >> 1)];
             sum = sum + (evenFlag * co1 ) * s_srcPatch[lsizey-16][1 + ((tidx    ) >> 1)];
-            sum = sum + ( oddFlag * co2  ) * s_srcPatch[lsizey-16][1 + ((tidx + 1) >> 1)];
-            sum = sum + (evenFlag * co3) * s_srcPatch[lsizey-16][1 + ((tidx + 2) >> 1)];
+            sum = sum + ( oddFlag * co2 ) * s_srcPatch[lsizey-16][1 + ((tidx + 1) >> 1)];
+            sum = sum + (evenFlag * co3 ) * s_srcPatch[lsizey-16][1 + ((tidx + 2) >> 1)];
         }
 
         s_dstPatch[tidy][tidx] = sum;
@@ -719,4 +670,4 @@ __kernel void pyrUp_C4_D5(__global float4* src,__global float4* dst,
     {
         dst[x + y * dstStep] = 4.0f * sum;
     }
-}
\ No newline at end of file
+}
index 1043b84..40a1993 100644 (file)
 
 //#pragma OPENCL EXTENSION cl_amd_printf : enable
 
-__kernel void calcSharrDeriv_vertical_C1_D0(__global const uchar* src, int srcStep, int rows, int cols, int cn, __global short* dx_buf, int dx_bufStep, __global short* dy_buf, int dy_bufStep)
-{
-    const int x = get_global_id(0);
-    const int y = get_global_id(1);
-
-    if (y < rows && x < cols * cn)
-    {
-        const uchar src_val0 = (src + (y > 0 ? y-1 : rows > 1 ? 1 : 0) * srcStep)[x];
-        const uchar src_val1 = (src + y * srcStep)[x];
-        const uchar src_val2 = (src + (y < rows-1 ? y+1 : rows > 1 ? rows-2 : 0) * srcStep)[x];
-
-        ((__global short*)((__global char*)dx_buf + y * dx_bufStep / 2))[x] = (src_val0 + src_val2) * 3 + src_val1 * 10;
-        ((__global short*)((__global char*)dy_buf + y * dy_bufStep / 2))[x] = src_val2 - src_val0;
-    }
-}
-
-__kernel void calcSharrDeriv_vertical_C4_D0(__global const uchar* src, int srcStep, int rows, int cols, int cn, __global short* dx_buf, int dx_bufStep, __global short* dy_buf, int dy_bufStep)
-{
-    const int x = get_global_id(0);
-    const int y = get_global_id(1);
-
-    if (y < rows && x < cols * cn)
-    {
-        const uchar src_val0 = (src + (y > 0 ? y - 1 : 1) * srcStep)[x];
-        const uchar src_val1 = (src + y * srcStep)[x];
-        const uchar src_val2 = (src + (y < rows - 1 ? y + 1 : rows - 2) * srcStep)[x];
-
-        ((__global short*)((__global char*)dx_buf + y * dx_bufStep / 2))[x] = (src_val0 + src_val2) * 3 + src_val1 * 10;
-        ((__global short*)((__global char*)dy_buf + y * dy_bufStep / 2))[x] = src_val2 - src_val0;
-    }
-}
-
-__kernel void calcSharrDeriv_horizontal_C1_D0(int rows, int cols, int cn, __global const short* dx_buf, int dx_bufStep, __global const short* dy_buf, int dy_bufStep, __global short* dIdx, int dIdxStep, __global short* dIdy, int dIdyStep)
-{
-    const int x = get_global_id(0);
-    const int y = get_global_id(1);
-
-    const int colsn = cols * cn;
-
-    if (y < rows && x < colsn)
-    {
-        __global const short* dx_buf_row = dx_buf + y * dx_bufStep;
-        __global const short* dy_buf_row = dy_buf + y * dy_bufStep;
-
-        const int xr = x + cn < colsn ? x + cn : (cols - 2) * cn + x + cn - colsn;
-        const int xl = x - cn >= 0 ? x - cn : cn + x;
-
-        ((__global short*)((__global char*)dIdx + y * dIdxStep / 2))[x] = dx_buf_row[xr] - dx_buf_row[xl];
-        ((__global short*)((__global char*)dIdy + y * dIdyStep / 2))[x] = (dy_buf_row[xr] + dy_buf_row[xl]) * 3 + dy_buf_row[x] * 10;
-    }
-}
-
-__kernel void calcSharrDeriv_horizontal_C4_D0(int rows, int cols, int cn, __global const short* dx_buf, int dx_bufStep, __global const short* dy_buf, int dy_bufStep, __global short* dIdx, int dIdxStep, __global short* dIdy, int dIdyStep)
-{
-    const int x = get_global_id(0);
-    const int y = get_global_id(1);
-
-    const int colsn = cols * cn;
-
-    if (y < rows && x < colsn)
-    {
-        __global const short* dx_buf_row = dx_buf + y * dx_bufStep;
-        __global const short* dy_buf_row = dy_buf + y * dy_bufStep;
-
-        const int xr = x + cn < colsn ? x + cn : (cols - 2) * cn + x + cn - colsn;
-        const int xl = x - cn >= 0 ? x - cn : cn + x;
-
-        ((__global short*)((__global char*)dIdx + y * dIdxStep / 2))[x] = dx_buf_row[xr] - dx_buf_row[xl];
-        ((__global short*)((__global char*)dIdy + y * dIdyStep / 2))[x] = (dy_buf_row[xr] + dy_buf_row[xl]) * 3 + dy_buf_row[x] * 10;
-    }
-}
-
-#define W_BITS 14
-#define W_BITS1 14
-
-#define  CV_DESCALE(x, n)     (((x) + (1 << ((n)-1))) >> (n))
-
-int linearFilter_uchar(__global const uchar* src, int srcStep, int cn, float2 pt, int x, int y)
-{
-    int2 ipt;
-    ipt.x = convert_int_sat_rtn(pt.x);
-    ipt.y = convert_int_sat_rtn(pt.y);
-
-    float a = pt.x - ipt.x;
-    float b = pt.y - ipt.y;
-
-    int iw00 = convert_int_sat_rte((1.0f - a) * (1.0f - b) * (1 << W_BITS));
-    int iw01 = convert_int_sat_rte(a * (1.0f - b) * (1 << W_BITS));
-    int iw10 = convert_int_sat_rte((1.0f - a) * b * (1 << W_BITS));
-    int iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
-
-    __global const uchar* src_row = src + (ipt.y + y) * srcStep + ipt.x * cn;
-    __global const uchar* src_row1 = src + (ipt.y + y + 1) * srcStep + ipt.x * cn;
-
-    return CV_DESCALE(src_row[x] * iw00 + src_row[x + cn] * iw01 + src_row1[x] * iw10 + src_row1[x + cn] * iw11, W_BITS1 - 5);
-}
-
-int linearFilter_short(__global const short* src, int srcStep, int cn, float2 pt, int x, int y)
-{
-    int2 ipt;
-    ipt.x = convert_int_sat_rtn(pt.x);
-    ipt.y = convert_int_sat_rtn(pt.y);
-
-    float a = pt.x - ipt.x;
-    float b = pt.y - ipt.y;
-
-    int iw00 = convert_int_sat_rte((1.0f - a) * (1.0f - b) * (1 << W_BITS));
-    int iw01 = convert_int_sat_rte(a * (1.0f - b) * (1 << W_BITS));
-    int iw10 = convert_int_sat_rte((1.0f - a) * b * (1 << W_BITS));
-    int iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
-
-    __global const short* src_row = src + (ipt.y + y) * srcStep + ipt.x * cn;
-    __global const short* src_row1 = src + (ipt.y + y + 1) * srcStep + ipt.x * cn;
-
-    return CV_DESCALE(src_row[x] * iw00 + src_row[x + cn] * iw01 + src_row1[x] * iw10 + src_row1[x + cn] * iw11, W_BITS1);
-}
-
-float linearFilter_float(__global const float* src, int srcStep, int cn, float2 pt, float x, float y)
-{
-    int2 ipt;
-    ipt.x = convert_int_sat_rtn(pt.x);
-    ipt.y = convert_int_sat_rtn(pt.y);
-
-    float a = pt.x - ipt.x;
-    float b = pt.y - ipt.y;
-
-    float iw00 = ((1.0f - a) * (1.0f - b) * (1 << W_BITS));
-    float iw01 = (a * (1.0f - b) * (1 << W_BITS));
-    float iw10 = ((1.0f - a) * b * (1 << W_BITS));
-    float iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
-
-    __global const float* src_row = src + (int)(ipt.y + y) * srcStep / 4 + ipt.x * cn;
-    __global const float* src_row1 = src + (int)(ipt.y + y + 1) * srcStep / 4 + ipt.x * cn;
-
-    return src_row[(int)x] * iw00 + src_row[(int)x + cn] * iw01 + src_row1[(int)x] * iw10 + src_row1[(int)x + cn] * iw11, W_BITS1 - 5;
-}
-
 #define        BUFFER  64
-
+#ifndef WAVE_SIZE
+#define WAVE_SIZE 1
+#endif
 #ifdef CPU
 void reduce3(float val1, float val2, float val3,  __local float* smem1,  __local float* smem2,  __local float* smem3, int tid)
 {
@@ -193,71 +58,51 @@ void reduce3(float val1, float val2, float val3,  __local float* smem1,  __local
     smem3[tid] = val3;
     barrier(CLK_LOCAL_MEM_FENCE);
 
-#if    BUFFER > 128
-    if (tid < 128)
-    {
-        smem1[tid] = val1 += smem1[tid + 128];
-        smem2[tid] = val2 += smem2[tid + 128];
-        smem3[tid] = val3 += smem3[tid + 128];
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-#endif
-
-#if    BUFFER > 64
-    if (tid < 64)
-    {
-        smem1[tid] = val1 += smem1[tid + 64];
-        smem2[tid] = val2 += smem2[tid + 64];
-        smem3[tid] = val3 += smem3[tid + 64];
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-#endif
-
     if (tid < 32)
     {
-        smem1[tid] = val1 += smem1[tid + 32];
-        smem2[tid] = val2 += smem2[tid + 32];
-        smem3[tid] = val3 += smem3[tid + 32];
+        smem1[tid] += smem1[tid + 32];
+        smem2[tid] += smem2[tid + 32];
+        smem3[tid] += smem3[tid + 32];
     }
     barrier(CLK_LOCAL_MEM_FENCE);
 
     if (tid < 16)
     {
-        smem1[tid] = val1 += smem1[tid + 16];
-        smem2[tid] = val2 += smem2[tid + 16];
-        smem3[tid] = val3 += smem3[tid + 16];
+        smem1[tid] += smem1[tid + 16];
+        smem2[tid] += smem2[tid + 16];
+        smem3[tid] += smem3[tid + 16];
     }
     barrier(CLK_LOCAL_MEM_FENCE);
 
     if (tid < 8)
     {
-        smem1[tid] = val1 += smem1[tid + 8];
-        smem2[tid] = val2 += smem2[tid + 8];
-        smem3[tid] = val3 += smem3[tid + 8];
+        smem1[tid] += smem1[tid + 8];
+        smem2[tid] += smem2[tid + 8];
+        smem3[tid] += smem3[tid + 8];
     }
     barrier(CLK_LOCAL_MEM_FENCE);
 
     if (tid < 4)
     {
-        smem1[tid] = val1 += smem1[tid + 4];
-        smem2[tid] = val2 += smem2[tid + 4];
-        smem3[tid] = val3 += smem3[tid + 4];
+        smem1[tid] += smem1[tid + 4];
+        smem2[tid] += smem2[tid + 4];
+        smem3[tid] += smem3[tid + 4];
     }
     barrier(CLK_LOCAL_MEM_FENCE);
 
     if (tid < 2)
     {
-        smem1[tid] = val1 += smem1[tid + 2];
-        smem2[tid] = val2 += smem2[tid + 2];
-        smem3[tid] = val3 += smem3[tid + 2];
+        smem1[tid] += smem1[tid + 2];
+        smem2[tid] += smem2[tid + 2];
+        smem3[tid] += smem3[tid + 2];
     }
     barrier(CLK_LOCAL_MEM_FENCE);
 
     if (tid < 1)
     {
-        smem1[BUFFER] = val1 += smem1[tid + 1];
-        smem2[BUFFER] = val2 += smem2[tid + 1];
-        smem3[BUFFER] = val3 += smem3[tid + 1];
+        smem1[BUFFER] = smem1[tid] + smem1[tid + 1];
+        smem2[BUFFER] = smem2[tid] + smem2[tid + 1];
+        smem3[BUFFER] = smem3[tid] + smem3[tid + 1];
     }
     barrier(CLK_LOCAL_MEM_FENCE);
 }
@@ -268,63 +113,45 @@ void reduce2(float val1, float val2, volatile __local float* smem1, volatile __l
     smem2[tid] = val2;
     barrier(CLK_LOCAL_MEM_FENCE);
 
-#if    BUFFER > 128
-    if (tid < 128)
-    {
-        smem1[tid] = (val1 += smem1[tid + 128]);
-        smem2[tid] = (val2 += smem2[tid + 128]);
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-#endif
-
-#if    BUFFER > 64
-    if (tid < 64)
-    {
-        smem1[tid] = (val1 += smem1[tid + 64]);
-        smem2[tid] = (val2 += smem2[tid + 64]);
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-#endif
-
     if (tid < 32)
     {
-        smem1[tid] = (val1 += smem1[tid + 32]);
-        smem2[tid] = (val2 += smem2[tid + 32]);
+        smem1[tid] += smem1[tid + 32];
+        smem2[tid] += smem2[tid + 32];
     }
     barrier(CLK_LOCAL_MEM_FENCE);
 
     if (tid < 16)
     {
-        smem1[tid] = (val1 += smem1[tid + 16]);
-        smem2[tid] = (val2 += smem2[tid + 16]);
+        smem1[tid] += smem1[tid + 16];
+        smem2[tid] += smem2[tid + 16];
     }
     barrier(CLK_LOCAL_MEM_FENCE);
 
     if (tid < 8)
     {
-        smem1[tid] = (val1 += smem1[tid + 8]);
-        smem2[tid] = (val2 += smem2[tid + 8]);
+        smem1[tid] += smem1[tid + 8];
+        smem2[tid] += smem2[tid + 8];
     }
     barrier(CLK_LOCAL_MEM_FENCE);
 
     if (tid < 4)
     {
-        smem1[tid] = (val1 += smem1[tid + 4]);
-        smem2[tid] = (val2 += smem2[tid + 4]);
+        smem1[tid] += smem1[tid + 4];
+        smem2[tid] += smem2[tid + 4];
     }
     barrier(CLK_LOCAL_MEM_FENCE);
 
     if (tid < 2)
     {
-        smem1[tid] = (val1 += smem1[tid + 2]);
-        smem2[tid] = (val2 += smem2[tid + 2]);
+        smem1[tid] += smem1[tid + 2];
+        smem2[tid] += smem2[tid + 2];
     }
     barrier(CLK_LOCAL_MEM_FENCE);
 
     if (tid < 1)
     {
-        smem1[BUFFER] = (val1 += smem1[tid + 1]);
-        smem2[BUFFER] = (val2 += smem2[tid + 1]);
+        smem1[BUFFER] = smem1[tid] + smem1[tid + 1];
+        smem2[BUFFER] = smem2[tid] + smem2[tid + 1];
     }
     barrier(CLK_LOCAL_MEM_FENCE);
 }
@@ -334,205 +161,146 @@ void reduce1(float val1, volatile __local float* smem1, int tid)
     smem1[tid] = val1;
     barrier(CLK_LOCAL_MEM_FENCE);
 
-#if    BUFFER > 128
-    if (tid < 128)
-    {
-        smem1[tid] = (val1 += smem1[tid + 128]);
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-#endif
-
-#if    BUFFER > 64
-    if (tid < 64)
-    {
-        smem1[tid] = (val1 += smem1[tid + 64]);
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-#endif
-
     if (tid < 32)
     {
-        smem1[tid] = (val1 += smem1[tid + 32]);
+        smem1[tid] += smem1[tid + 32];
     }
     barrier(CLK_LOCAL_MEM_FENCE);
 
     if (tid < 16)
     {
-        smem1[tid] = (val1 += smem1[tid + 16]);
+        smem1[tid] += smem1[tid + 16];
     }
     barrier(CLK_LOCAL_MEM_FENCE);
 
     if (tid < 8)
     {
-        smem1[tid] = (val1 += smem1[tid + 8]);
+        smem1[tid] += smem1[tid + 8];
     }
     barrier(CLK_LOCAL_MEM_FENCE);
 
     if (tid < 4)
     {
-        smem1[tid] = (val1 += smem1[tid + 4]);
+        smem1[tid] += smem1[tid + 4];
     }
     barrier(CLK_LOCAL_MEM_FENCE);
 
     if (tid < 2)
     {
-        smem1[tid] = (val1 += smem1[tid + 2]);
+        smem1[tid] += smem1[tid + 2];
     }
     barrier(CLK_LOCAL_MEM_FENCE);
 
     if (tid < 1)
     {
-        smem1[BUFFER] = (val1 += smem1[tid + 1]);
+        smem1[BUFFER] = smem1[tid] + smem1[tid + 1];
     }
     barrier(CLK_LOCAL_MEM_FENCE);
 }
 #else
-void reduce3(float val1, float val2, float val3, __local float* smem1, __local float* smem2, __local float* smem3, int tid)
+void reduce3(float val1, float val2, float val3, 
+__local volatile float* smem1, __local volatile float* smem2, __local volatile float* smem3, int tid)
 {
     smem1[tid] = val1;
     smem2[tid] = val2;
     smem3[tid] = val3;
     barrier(CLK_LOCAL_MEM_FENCE);
 
-#if    BUFFER > 128
-    if (tid < 128)
+    if (tid < 32)
     {
-        smem1[tid] = val1 += smem1[tid + 128];
-        smem2[tid] = val2 += smem2[tid + 128];
-        smem3[tid] = val3 += smem3[tid + 128];
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
+        smem1[tid] += smem1[tid + 32];
+        smem2[tid] += smem2[tid + 32];
+        smem3[tid] += smem3[tid + 32];
+#if WAVE_SIZE < 32
+       } barrier(CLK_LOCAL_MEM_FENCE);
+       if (tid < 16) {
 #endif
-
-#if    BUFFER > 64
-    if (tid < 64)
-    {
-        smem1[tid] = val1 += smem1[tid + 64];
-        smem2[tid] = val2 += smem2[tid + 64];
-        smem3[tid] = val3 += smem3[tid + 64];
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
+        smem1[tid] += smem1[tid + 16];
+        smem2[tid] += smem2[tid + 16];
+        smem3[tid] += smem3[tid + 16];
+#if WAVE_SIZE <16
+       } barrier(CLK_LOCAL_MEM_FENCE);
+       if (tid < 8) {
 #endif
+        smem1[tid] += smem1[tid + 8];
+        smem2[tid] += smem2[tid + 8];
+        smem3[tid] += smem3[tid + 8];
 
-    if (tid < 32)
-    {
-        volatile __local float* vmem1 = smem1;
-        volatile __local float* vmem2 = smem2;
-        volatile __local float* vmem3 = smem3;
-
-        vmem1[tid] = val1 += vmem1[tid + 32];
-        vmem2[tid] = val2 += vmem2[tid + 32];
-        vmem3[tid] = val3 += vmem3[tid + 32];
-
-        vmem1[tid] = val1 += vmem1[tid + 16];
-        vmem2[tid] = val2 += vmem2[tid + 16];
-        vmem3[tid] = val3 += vmem3[tid + 16];
-
-        vmem1[tid] = val1 += vmem1[tid + 8];
-        vmem2[tid] = val2 += vmem2[tid + 8];
-        vmem3[tid] = val3 += vmem3[tid + 8];
+        smem1[tid] += smem1[tid + 4];
+        smem2[tid] += smem2[tid + 4];
+        smem3[tid] += smem3[tid + 4];
 
-        vmem1[tid] = val1 += vmem1[tid + 4];
-        vmem2[tid] = val2 += vmem2[tid + 4];
-        vmem3[tid] = val3 += vmem3[tid + 4];
+        smem1[tid] += smem1[tid + 2];
+        smem2[tid] += smem2[tid + 2];
+        smem3[tid] += smem3[tid + 2];
 
-        vmem1[tid] = val1 += vmem1[tid + 2];
-        vmem2[tid] = val2 += vmem2[tid + 2];
-        vmem3[tid] = val3 += vmem3[tid + 2];
-
-        vmem1[tid] = val1 += vmem1[tid + 1];
-        vmem2[tid] = val2 += vmem2[tid + 1];
-        vmem3[tid] = val3 += vmem3[tid + 1];
+        smem1[tid] += smem1[tid + 1];
+        smem2[tid] += smem2[tid + 1];
+        smem3[tid] += smem3[tid + 1];
     }
 }
 
-void reduce2(float val1, float val2, __local float* smem1, __local float* smem2, int tid)
+void reduce2(float val1, float val2, __local volatile float* smem1, __local volatile float* smem2, int tid)
 {
     smem1[tid] = val1;
     smem2[tid] = val2;
     barrier(CLK_LOCAL_MEM_FENCE);
 
-#if    BUFFER > 128
-    if (tid < 128)
+    if (tid < 32)
     {
-        smem1[tid] = val1 += smem1[tid + 128];
-        smem2[tid] = val2 += smem2[tid + 128];
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
+        smem1[tid] += smem1[tid + 32];
+        smem2[tid] += smem2[tid + 32];
+#if WAVE_SIZE < 32
+       } barrier(CLK_LOCAL_MEM_FENCE);
+       if (tid < 16) {
 #endif
-
-#if    BUFFER > 64
-    if (tid < 64)
-    {
-        smem1[tid] = val1 += smem1[tid + 64];
-        smem2[tid] = val2 += smem2[tid + 64];
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
+        smem1[tid] += smem1[tid + 16];
+        smem2[tid] += smem2[tid + 16];
+#if WAVE_SIZE <16
+       } barrier(CLK_LOCAL_MEM_FENCE);
+       if (tid < 8) {
 #endif
+        smem1[tid] += smem1[tid + 8];
+        smem2[tid] += smem2[tid + 8];
 
-    if (tid < 32)
-    {
-        volatile __local float* vmem1 = smem1;
-        volatile __local float* vmem2 = smem2;
-
-        vmem1[tid] = val1 += vmem1[tid + 32];
-        vmem2[tid] = val2 += vmem2[tid + 32];
-
-        vmem1[tid] = val1 += vmem1[tid + 16];
-        vmem2[tid] = val2 += vmem2[tid + 16];
+        smem1[tid] += smem1[tid + 4];
+        smem2[tid] += smem2[tid + 4];
 
-        vmem1[tid] = val1 += vmem1[tid + 8];
-        vmem2[tid] = val2 += vmem2[tid + 8];
+        smem1[tid] += smem1[tid + 2];
+        smem2[tid] += smem2[tid + 2];
 
-        vmem1[tid] = val1 += vmem1[tid + 4];
-        vmem2[tid] = val2 += vmem2[tid + 4];
-
-        vmem1[tid] = val1 += vmem1[tid + 2];
-        vmem2[tid] = val2 += vmem2[tid + 2];
-
-        vmem1[tid] = val1 += vmem1[tid + 1];
-        vmem2[tid] = val2 += vmem2[tid + 1];
+        smem1[tid] += smem1[tid + 1];
+        smem2[tid] += smem2[tid + 1];
     }
 }
 
-void reduce1(float val1, __local float* smem1, int tid)
+void reduce1(float val1, __local volatile float* smem1, int tid)
 {
     smem1[tid] = val1;
     barrier(CLK_LOCAL_MEM_FENCE);
 
-#if    BUFFER > 128
-    if (tid < 128)
+    if (tid < 32)
     {
-        smem1[tid] = val1 += smem1[tid + 128];
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
+        smem1[tid] += smem1[tid + 32];
+#if WAVE_SIZE < 32
+       } barrier(CLK_LOCAL_MEM_FENCE);
+       if (tid < 16) {
 #endif
-
-#if    BUFFER > 64
-    if (tid < 64)
-    {
-        smem1[tid] = val1 += smem1[tid + 64];
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
+        smem1[tid] += smem1[tid + 16];
+#if WAVE_SIZE <16
+       } barrier(CLK_LOCAL_MEM_FENCE);
+       if (tid < 8) {
 #endif
-
-    if (tid < 32)
-    {
-        volatile __local float* vmem1 = smem1;
-
-        vmem1[tid] = val1 += vmem1[tid + 32];
-        vmem1[tid] = val1 += vmem1[tid + 16];
-        vmem1[tid] = val1 += vmem1[tid + 8];
-        vmem1[tid] = val1 += vmem1[tid + 4];
-        vmem1[tid] = val1 += vmem1[tid + 2];
-        vmem1[tid] = val1 += vmem1[tid + 1];
+        smem1[tid] += smem1[tid + 8];
+        smem1[tid] += smem1[tid + 4];
+        smem1[tid] += smem1[tid + 2];
+        smem1[tid] += smem1[tid + 1];
     }
 }
 #endif
 
 #define SCALE (1.0f / (1 << 20))
 #define        THRESHOLD       0.01f
-#define        DIMENSION       21
 
 // Image read mode
 __constant sampler_t sampler    = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;
diff --git a/modules/ocl/src/opencl/tvl1flow.cl b/modules/ocl/src/opencl/tvl1flow.cl
new file mode 100644 (file)
index 0000000..e0ff730
--- /dev/null
@@ -0,0 +1,407 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Jin Ma jin@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+__kernel void centeredGradientKernel(__global const float* src, int src_col, int src_row, int src_step, 
+__global float* dx, __global float* dy, int dx_step)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if((x < src_col)&&(y < src_row))
+    {
+               int src_x1 = (x + 1) < (src_col -1)? (x + 1) : (src_col - 1);
+       int src_x2 = (x - 1) > 0 ? (x -1) : 0;
+        
+        //if(src[y * src_step + src_x1] == src[y * src_step+ src_x2])
+        //{
+        //    printf("y = %d\n", y);
+        //    printf("src_x1 = %d\n", src_x1);
+        //    printf("src_x2 = %d\n", src_x2);
+        //}
+        dx[y * dx_step+ x] = 0.5f * (src[y * src_step + src_x1] - src[y * src_step+ src_x2]);
+        
+               int src_y1 = (y+1) < (src_row - 1) ? (y + 1) : (src_row - 1);
+        int src_y2 = (y - 1) > 0 ? (y - 1) : 0;
+        dy[y * dx_step+ x] = 0.5f * (src[src_y1 * src_step + x] - src[src_y2 * src_step+ x]);
+    }
+
+}
+
+float bicubicCoeff(float x_)
+{
+
+    float x = fabs(x_);
+    if (x <= 1.0f)
+    {
+        return x * x * (1.5f * x - 2.5f) + 1.0f;
+    }
+    else if (x < 2.0f)
+    {
+        return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f;
+    }
+    else
+    {
+        return 0.0f;
+    }
+
+}
+
+__kernel void warpBackwardKernel(__global const float* I0, int I0_step, int I0_col, int I0_row,
+       image2d_t tex_I1, image2d_t tex_I1x, image2d_t tex_I1y,  
+    __global const float* u1, int u1_step, 
+    __global const float* u2,
+    __global float* I1w,
+       __global float* I1wx, /*int I1wx_step,*/
+       __global float* I1wy, /*int I1wy_step,*/
+       __global float* grad, /*int grad_step,*/
+       __global float* rho,
+       int I1w_step,
+       int u2_step,
+       int u1_offset_x,
+       int u1_offset_y,
+       int u2_offset_x,
+       int u2_offset_y)
+{
+    const int x = get_global_id(0);
+    const int y = get_global_id(1);
+
+    if(x < I0_col&&y < I0_row)
+    {
+        //const float u1Val = u1(y, x);
+        const float u1Val = u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
+        //const float u2Val = u2(y, x);
+        const float u2Val = u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
+
+        const float wx = x + u1Val;
+        const float wy = y + u2Val;
+
+        const int xmin = ceil(wx - 2.0f);
+        const int xmax = floor(wx + 2.0f);
+
+        const int ymin = ceil(wy - 2.0f);
+        const int ymax = floor(wy + 2.0f);
+
+        float sum  = 0.0f;
+        float sumx = 0.0f;
+        float sumy = 0.0f;
+        float wsum = 0.0f;
+        sampler_t sampleri = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
+
+        for (int cy = ymin; cy <= ymax; ++cy)
+        {
+            for (int cx = xmin; cx <= xmax; ++cx)
+            {
+                const float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy);
+
+                //sum  += w * tex2D(tex_I1 , cx, cy);
+                               int2 cood = (int2)(cx, cy);
+                sum += w * read_imagef(tex_I1, sampleri, cood).x;
+                //sumx += w * tex2D(tex_I1x, cx, cy);
+                sumx += w * read_imagef(tex_I1x, sampleri, cood).x;
+                //sumy += w * tex2D(tex_I1y, cx, cy);
+                sumy += w * read_imagef(tex_I1y, sampleri, cood).x;
+
+                wsum += w;
+            }
+        }
+
+        const float coeff = 1.0f / wsum;
+
+        const float I1wVal  = sum  * coeff;
+        const float I1wxVal = sumx * coeff;
+        const float I1wyVal = sumy * coeff;
+
+        I1w[y * I1w_step + x]  = I1wVal;
+        I1wx[y * I1w_step + x] = I1wxVal;
+        I1wy[y * I1w_step + x] = I1wyVal;
+
+        const float Ix2 = I1wxVal * I1wxVal;
+        const float Iy2 = I1wyVal * I1wyVal;
+
+        // store the |Grad(I1)|^2
+        grad[y * I1w_step + x] = Ix2 + Iy2;
+
+        // compute the constant part of the rho function
+        const float I0Val = I0[y * I0_step + x];
+        rho[y * I1w_step + x] = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val;
+    }
+
+}
+
+float readImage(__global const float *image,  const int x,  const int y,  const int rows,  const int cols, const int elemCntPerRow)
+{
+    int i0 = clamp(x, 0, cols - 1);
+    int j0 = clamp(y, 0, rows - 1);
+    int i1 = clamp(x + 1, 0, cols - 1);
+    int j1 = clamp(y + 1, 0, rows - 1);
+
+    return image[j0 * elemCntPerRow + i0];
+}
+
+__kernel void warpBackwardKernelNoImage2d(__global const float* I0, int I0_step, int I0_col, int I0_row,
+       __global const float* tex_I1, __global const float* tex_I1x, __global const float* tex_I1y,  
+    __global const float* u1, int u1_step, 
+    __global const float* u2,
+    __global float* I1w,
+       __global float* I1wx, /*int I1wx_step,*/
+       __global float* I1wy, /*int I1wy_step,*/
+       __global float* grad, /*int grad_step,*/
+       __global float* rho,
+       int I1w_step,
+       int u2_step,
+       int I1_step,
+       int I1x_step)
+{
+    const int x = get_global_id(0);
+    const int y = get_global_id(1);
+
+    if(x < I0_col&&y < I0_row)
+    {
+        //const float u1Val = u1(y, x);
+        const float u1Val = u1[y * u1_step + x];
+        //const float u2Val = u2(y, x);
+        const float u2Val = u2[y * u2_step + x];
+
+        const float wx = x + u1Val;
+        const float wy = y + u2Val;
+
+        const int xmin = ceil(wx - 2.0f);
+        const int xmax = floor(wx + 2.0f);
+
+        const int ymin = ceil(wy - 2.0f);
+        const int ymax = floor(wy + 2.0f);
+
+        float sum  = 0.0f;
+        float sumx = 0.0f;
+        float sumy = 0.0f;
+        float wsum = 0.0f;
+
+        for (int cy = ymin; cy <= ymax; ++cy)
+        {
+            for (int cx = xmin; cx <= xmax; ++cx)
+            {
+                const float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy);
+
+                               int2 cood = (int2)(cx, cy);
+                sum += w * readImage(tex_I1, cood.x, cood.y, I0_col, I0_row, I1_step);
+                sumx += w * readImage(tex_I1x, cood.x, cood.y, I0_col, I0_row, I1x_step);
+                sumy += w * readImage(tex_I1y, cood.x, cood.y, I0_col, I0_row, I1x_step);
+                wsum += w;
+            }
+        }
+
+        const float coeff = 1.0f / wsum;
+
+        const float I1wVal  = sum  * coeff;
+        const float I1wxVal = sumx * coeff;
+        const float I1wyVal = sumy * coeff;
+
+        I1w[y * I1w_step + x]  = I1wVal;
+        I1wx[y * I1w_step + x] = I1wxVal;
+        I1wy[y * I1w_step + x] = I1wyVal;
+
+        const float Ix2 = I1wxVal * I1wxVal;
+        const float Iy2 = I1wyVal * I1wyVal;
+
+        // store the |Grad(I1)|^2
+        grad[y * I1w_step + x] = Ix2 + Iy2;
+
+        // compute the constant part of the rho function
+        const float I0Val = I0[y * I0_step + x];
+        rho[y * I1w_step + x] = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val;
+    }
+
+}
+
+
+__kernel void estimateDualVariablesKernel(__global const float* u1, int u1_col, int u1_row, int u1_step, 
+    __global const float* u2, 
+    __global float* p11, int p11_step, 
+    __global float* p12,
+    __global float* p21,
+    __global float* p22, 
+    const float taut,
+       int u2_step,
+       int u1_offset_x,
+       int u1_offset_y,
+       int u2_offset_x,
+       int u2_offset_y)
+{
+
+    //const int x = blockIdx.x * blockDim.x + threadIdx.x;
+    //const int y = blockIdx.y * blockDim.y + threadIdx.y;
+    const int x = get_global_id(0);
+    const int y = get_global_id(1);
+
+    if(x < u1_col && y < u1_row)
+    {
+               int src_x1 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1);
+        const float u1x = u1[(y + u1_offset_y) * u1_step + src_x1 + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
+        
+               int src_y1 = (y + 1) < (u1_row - 1) ? (y + 1) : (u1_row - 1);
+        const float u1y = u1[(src_y1 + u1_offset_y) * u1_step + x + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
+
+               int src_x2 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1);
+        const float u2x = u2[(y + u2_offset_y) * u2_step + src_x2 + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
+
+               int src_y2 = (y + 1) <  (u1_row - 1) ? (y + 1) : (u1_row - 1);
+        const float u2y = u2[(src_y2 + u2_offset_y) * u2_step + x + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
+
+        const float g1 = hypot(u1x, u1y);
+        const float g2 = hypot(u2x, u2y);
+
+        const float ng1 = 1.0f + taut * g1;
+        const float ng2 = 1.0f + taut * g2;
+
+        p11[y * p11_step + x] = (p11[y * p11_step + x] + taut * u1x) / ng1;
+        p12[y * p11_step + x] = (p12[y * p11_step + x] + taut * u1y) / ng1;
+        p21[y * p11_step + x] = (p21[y * p11_step + x] + taut * u2x) / ng2;
+        p22[y * p11_step + x] = (p22[y * p11_step + x] + taut * u2y) / ng2;
+    }
+
+}
+
+float divergence(__global const float* v1, __global const float* v2, int y, int x, int v1_step, int v2_step)
+{
+
+    if (x > 0 && y > 0)
+    {
+        const float v1x = v1[y * v1_step + x] - v1[y * v1_step + x - 1];
+        const float v2y = v2[y * v2_step + x] - v2[(y - 1) * v2_step + x];
+        return v1x + v2y;
+    }
+    else
+    {
+        if (y > 0)
+            return v1[y * v1_step + 0] + v2[y * v2_step + 0] - v2[(y - 1) * v2_step + 0];
+        else
+        {
+            if (x > 0)
+                return v1[0 * v1_step + x] - v1[0 * v1_step + x - 1] + v2[0 * v2_step + x];
+            else
+                return v1[0 * v1_step + 0] + v2[0 * v2_step + 0];
+        }
+    }
+
+}
+
+__kernel void estimateUKernel(__global const float* I1wx, int I1wx_col, int I1wx_row, int I1wx_step,
+    __global const float* I1wy, /*int I1wy_step,*/
+    __global const float* grad, /*int grad_step,*/ 
+    __global const float* rho_c, /*int rho_c_step,*/
+    __global const float* p11, /*int p11_step,*/
+    __global const float* p12, /*int p12_step,*/
+    __global const float* p21, /*int p21_step,*/
+    __global const float* p22, /*int p22_step,*/
+    __global float* u1, int u1_step, 
+    __global float* u2, 
+    __global float* error, const float l_t, const float theta, int u2_step,
+       int u1_offset_x,
+       int u1_offset_y,
+       int u2_offset_x,
+       int u2_offset_y)
+{
+
+    //const int x = blockIdx.x * blockDim.x + threadIdx.x;
+    //const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+
+    if(x < I1wx_col && y < I1wx_row)
+    {
+        const float I1wxVal = I1wx[y * I1wx_step + x];
+        const float I1wyVal = I1wy[y * I1wx_step + x];
+        const float gradVal = grad[y * I1wx_step + x];
+        const float u1OldVal = u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
+        const float u2OldVal = u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
+
+        const float rho = rho_c[y * I1wx_step + x] + (I1wxVal * u1OldVal + I1wyVal * u2OldVal);
+
+        // estimate the values of the variable (v1, v2) (thresholding operator TH)
+
+        float d1 = 0.0f;
+        float d2 = 0.0f;
+
+        if (rho < -l_t * gradVal)
+        {
+            d1 = l_t * I1wxVal;
+            d2 = l_t * I1wyVal;
+        }
+        else if (rho > l_t * gradVal)
+        {
+            d1 = -l_t * I1wxVal;
+            d2 = -l_t * I1wyVal;
+        }
+        else if (gradVal > 1.192092896e-07f)
+        {
+            const float fi = -rho / gradVal;
+            d1 = fi * I1wxVal;
+            d2 = fi * I1wyVal;
+        }
+
+        const float v1 = u1OldVal + d1;
+        const float v2 = u2OldVal + d2;
+
+        // compute the divergence of the dual variable (p1, p2)
+
+        const float div_p1 = divergence(p11, p12, y, x, I1wx_step, I1wx_step);
+        const float div_p2 = divergence(p21, p22, y, x, I1wx_step, I1wx_step);
+
+        // estimate the values of the optical flow (u1, u2)
+
+        const float u1NewVal = v1 + theta * div_p1;
+        const float u2NewVal = v2 + theta * div_p2;
+
+        u1[(y + u1_offset_y) * u1_step + x + u1_offset_x] = u1NewVal;
+        u2[(y + u2_offset_y) * u2_step + x + u2_offset_x] = u2NewVal;
+
+        const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal);
+        const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal);
+        error[y * I1wx_step + x] = n1 + n2;
+    }
+
+}
index b2a3e41..4f93eac 100644 (file)
@@ -78,6 +78,7 @@
 
 #if defined (HAVE_OPENCL)
 
+#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
 #include "opencv2/ocl/private/util.hpp"
 #include "safe_call.hpp"
 
index 4a6ce1c..8e94204 100644 (file)
@@ -15,8 +15,8 @@
 // Third party copyrights are property of their respective owners.
 //
 // @Authors
-//             Dachuan Zhao, dachuan@multicorewareinc.com
-//             Yao Wang, bitwangyaoyao@gmail.com
+//      Dachuan Zhao, dachuan@multicorewareinc.com
+//      Yao Wang, bitwangyaoyao@gmail.com
 //      Nathan, liujun@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
@@ -56,31 +56,16 @@ namespace cv
 {
 namespace ocl
 {
-///////////////////////////OpenCL kernel strings///////////////////////////
 extern const char *pyrlk;
 extern const char *pyrlk_no_image;
-extern const char *arithm_mul;
 }
 }
-
 struct dim3
 {
     unsigned int x, y, z;
 };
 
-struct float2
-{
-    float x, y;
-};
-
-struct int2
-{
-    int x, y;
-};
-
-namespace
-{
-void calcPatchSize(cv::Size winSize, int cn, dim3 &block, dim3 &patch, bool isDeviceArch11)
+static void calcPatchSize(cv::Size winSize, int cn, dim3 &block, dim3 &patch, bool isDeviceArch11)
 {
     winSize.width *= cn;
 
@@ -100,45 +85,6 @@ void calcPatchSize(cv::Size winSize, int cn, dim3 &block, dim3 &patch, bool isDe
 
     block.z = patch.z = 1;
 }
-}
-
-static void multiply_cus(const oclMat &src1, oclMat &dst, float scalar)
-{
-    if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
-    {
-        CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
-        return;
-    }
-
-    CV_Assert(src1.cols == dst.cols &&
-              src1.rows == dst.rows);
-
-    CV_Assert(src1.type() == dst.type());
-    CV_Assert(src1.depth() != CV_8S);
-
-    Context  *clCxt = src1.clCxt;
-
-    size_t localThreads[3]  = { 16, 16, 1 };
-    size_t globalThreads[3] = { src1.cols,
-                                src1.rows,
-                                1
-                              };
-
-    int dst_step1 = dst.cols * dst.elemSize();
-    vector<pair<size_t , const void *> > args;
-    args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&src1.step ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&src1.offset ));
-    args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&src1.rows ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&src1.cols ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
-    args.push_back( make_pair( sizeof(float), (float *)&scalar ));
-
-    openCLExecuteKernel(clCxt, &arithm_mul, "arithm_muls", globalThreads, localThreads, args, -1, src1.depth());
-}
 
 static void lkSparse_run(oclMat &I, oclMat &J,
                          const oclMat &prevPts, oclMat &nextPts, oclMat &status, oclMat& err, bool /*GET_MIN_EIGENVALS*/, int ptcount,
@@ -151,15 +97,7 @@ static void lkSparse_run(oclMat &I, oclMat &J,
     size_t localThreads[3]  = { 8, isImageSupported ? 8 : 32, 1 };
     size_t globalThreads[3] = { 8 * ptcount, isImageSupported ? 8 : 32, 1};
     int cn = I.oclchannels();
-    char calcErr;
-    if (level == 0)
-    {
-        calcErr = 1;
-    }
-    else
-    {
-        calcErr = 0;
-    }
+    char calcErr = level==0?1:0;
 
     vector<pair<size_t , const void *> > args;
 
@@ -187,8 +125,7 @@ static void lkSparse_run(oclMat &I, oclMat &J,
     args.push_back( make_pair( sizeof(cl_int), (void *)&iters ));
     args.push_back( make_pair( sizeof(cl_char), (void *)&calcErr ));
 
-    bool is_cpu;
-    queryDeviceInfo(IS_CPU_DEVICE, &is_cpu);
+    bool is_cpu = queryDeviceInfo<IS_CPU_DEVICE, bool>();
     if (is_cpu)
     {
         openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), (char*)" -D CPU");
@@ -199,7 +136,17 @@ static void lkSparse_run(oclMat &I, oclMat &J,
     {
         if(isImageSupported)
         {
-            openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth());
+            stringstream idxStr;
+            idxStr << kernelName << "_C" << I.oclchannels() << "_D" << I.depth();
+            cl_kernel kernel = openCLGetKernelFromSource(clCxt, &pyrlk, idxStr.str());
+            int wave_size = queryDeviceInfo<WAVEFRONT_SIZE, int>(kernel);
+            openCLSafeCall(clReleaseKernel(kernel));
+
+            static char opt[32] = {0};
+            sprintf(opt, " -D WAVE_SIZE=%d", wave_size);
+
+            openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads, 
+                                args, I.oclchannels(), I.depth(), opt);
             releaseTexture(ITex);
             releaseTexture(JTex);
         }
@@ -242,8 +189,7 @@ void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat &prevImg, const oclMat &next
 
     oclMat temp1 = (useInitialFlow ? nextPts : prevPts).reshape(1);
     oclMat temp2 = nextPts.reshape(1);
-    multiply_cus(temp1, temp2, 1.0f / (1 << maxLevel) / 2.0f);
-    //::multiply(temp1, 1.0f / (1 << maxLevel) / 2.0f, temp2);
+    multiply(1.0f/(1<<maxLevel)/2.0f, temp1, temp2);
 
     ensureSizeIsEnough(1, prevPts.cols, CV_8UC1, status);
     status.setTo(Scalar::all(1));
@@ -258,7 +204,6 @@ void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat &prevImg, const oclMat &next
         ensureSizeIsEnough(1, prevPts.cols, CV_32FC1, *err);
 
     // build the image pyramids.
-
     prevPyr_.resize(maxLevel + 1);
     nextPyr_.resize(maxLevel + 1);
 
@@ -275,7 +220,6 @@ void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat &prevImg, const oclMat &next
     }
 
     // dI/dx ~ Ix, dI/dy ~ Iy
-
     for (int level = maxLevel; level >= 0; level--)
     {
         lkSparse_run(prevPyr_[level], nextPyr_[level],
index 441495f..ba36cab 100644 (file)
@@ -47,7 +47,7 @@
 #define __OPENCV_OPENCL_SAFE_CALL_HPP__
 
 #if defined __APPLE__
-#include <OpenCL/OpenCL.h>
+#include <OpenCL/opencl.h>
 #else
 #include <CL/cl.h>
 #endif
diff --git a/modules/ocl/src/tvl1flow.cpp b/modules/ocl/src/tvl1flow.cpp
new file mode 100644 (file)
index 0000000..a322f62
--- /dev/null
@@ -0,0 +1,479 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//             Jin Ma, jin@multicorewareinc.com
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+
+#include "precomp.hpp"
+using namespace std;
+using namespace cv;
+using namespace cv::ocl;
+
+namespace cv
+{
+    namespace ocl
+    {
+        ///////////////////////////OpenCL kernel strings///////////////////////////
+        extern const char* tvl1flow;
+    }
+}
+
+cv::ocl::OpticalFlowDual_TVL1_OCL::OpticalFlowDual_TVL1_OCL()
+{
+    tau            = 0.25;
+    lambda         = 0.15;
+    theta          = 0.3;
+    nscales        = 5;
+    warps          = 5;
+    epsilon        = 0.01;
+    iterations     = 300;
+    useInitialFlow = false;
+}
+
+void cv::ocl::OpticalFlowDual_TVL1_OCL::operator()(const oclMat& I0, const oclMat& I1, oclMat& flowx, oclMat& flowy)
+{
+    CV_Assert( I0.type() == CV_8UC1 || I0.type() == CV_32FC1 );
+    CV_Assert( I0.size() == I1.size() );
+    CV_Assert( I0.type() == I1.type() );
+    CV_Assert( !useInitialFlow || (flowx.size() == I0.size() && flowx.type() == CV_32FC1 && flowy.size() == flowx.size() && flowy.type() == flowx.type()) );
+    CV_Assert( nscales > 0 );
+
+    // allocate memory for the pyramid structure
+    I0s.resize(nscales);
+    I1s.resize(nscales);
+    u1s.resize(nscales);
+    u2s.resize(nscales);
+    //I0s_step == I1s_step
+    I0.convertTo(I0s[0], CV_32F, I0.depth() == CV_8U ? 1.0 : 255.0);
+    I1.convertTo(I1s[0], CV_32F, I1.depth() == CV_8U ? 1.0 : 255.0);
+
+
+    if (!useInitialFlow)
+    {
+        flowx.create(I0.size(), CV_32FC1);
+        flowy.create(I0.size(), CV_32FC1);
+    }
+    //u1s_step != u2s_step
+    u1s[0] = flowx;
+    u2s[0] = flowy;
+
+    I1x_buf.create(I0.size(), CV_32FC1);
+    I1y_buf.create(I0.size(), CV_32FC1);
+
+    I1w_buf.create(I0.size(), CV_32FC1);
+    I1wx_buf.create(I0.size(), CV_32FC1);
+    I1wy_buf.create(I0.size(), CV_32FC1);
+
+    grad_buf.create(I0.size(), CV_32FC1);
+    rho_c_buf.create(I0.size(), CV_32FC1);
+
+    p11_buf.create(I0.size(), CV_32FC1);
+    p12_buf.create(I0.size(), CV_32FC1);
+    p21_buf.create(I0.size(), CV_32FC1);
+    p22_buf.create(I0.size(), CV_32FC1);
+
+    diff_buf.create(I0.size(), CV_32FC1);
+
+    // create the scales
+    for (int s = 1; s < nscales; ++s)
+    {
+        ocl::pyrDown(I0s[s - 1], I0s[s]);
+        ocl::pyrDown(I1s[s - 1], I1s[s]);
+
+        if (I0s[s].cols < 16 || I0s[s].rows < 16)
+        {
+            nscales = s;
+            break;
+        }
+
+        if (useInitialFlow)
+        {
+            ocl::pyrDown(u1s[s - 1], u1s[s]);
+            ocl::pyrDown(u2s[s - 1], u2s[s]);
+
+            //ocl::multiply(u1s[s], Scalar::all(0.5), u1s[s]);
+            multiply(0.5, u1s[s], u1s[s]);
+            //ocl::multiply(u2s[s], Scalar::all(0.5), u2s[s]);
+            multiply(0.5, u1s[s], u2s[s]);
+        }
+    }
+
+    // pyramidal structure for computing the optical flow
+    for (int s = nscales - 1; s >= 0; --s)
+    {
+        // compute the optical flow at the current scale
+        procOneScale(I0s[s], I1s[s], u1s[s], u2s[s]);
+
+        // if this was the last scale, finish now
+        if (s == 0)
+            break;
+
+        // otherwise, upsample the optical flow
+
+        // zoom the optical flow for the next finer scale
+        ocl::resize(u1s[s], u1s[s - 1], I0s[s - 1].size());
+        ocl::resize(u2s[s], u2s[s - 1], I0s[s - 1].size());
+
+        // scale the optical flow with the appropriate zoom factor
+        multiply(2, u1s[s - 1], u1s[s - 1]);
+        multiply(2, u2s[s - 1], u2s[s - 1]);
+
+    }
+
+}
+
+namespace ocl_tvl1flow
+{
+    void centeredGradient(const oclMat &src, oclMat &dx, oclMat &dy);
+
+    void warpBackward(const oclMat &I0, const oclMat &I1, oclMat &I1x, oclMat &I1y, 
+        oclMat &u1, oclMat &u2, oclMat &I1w, oclMat &I1wx, oclMat &I1wy, 
+        oclMat &grad, oclMat &rho);
+
+    void estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad, 
+        oclMat &rho_c, oclMat &p11, oclMat &p12, 
+        oclMat &p21, oclMat &p22, oclMat &u1, 
+        oclMat &u2, oclMat &error, float l_t, float theta);
+
+    void estimateDualVariables(oclMat &u1, oclMat &u2, 
+        oclMat &p11, oclMat &p12, oclMat &p21, oclMat &p22, float taut);
+}
+
+void cv::ocl::OpticalFlowDual_TVL1_OCL::procOneScale(const oclMat &I0, const oclMat &I1, oclMat &u1, oclMat &u2)
+{
+    using namespace ocl_tvl1flow;
+
+    const double scaledEpsilon = epsilon * epsilon * I0.size().area();
+
+    CV_DbgAssert( I1.size() == I0.size() );
+    CV_DbgAssert( I1.type() == I0.type() );
+    CV_DbgAssert( u1.empty() || u1.size() == I0.size() );
+    CV_DbgAssert( u2.size() == u1.size() );
+
+    if (u1.empty())
+    {
+        u1.create(I0.size(), CV_32FC1);
+        u1.setTo(Scalar::all(0));
+
+        u2.create(I0.size(), CV_32FC1);
+        u2.setTo(Scalar::all(0));
+    }
+
+    oclMat I1x = I1x_buf(Rect(0, 0, I0.cols, I0.rows));
+    oclMat I1y = I1y_buf(Rect(0, 0, I0.cols, I0.rows));
+
+    centeredGradient(I1, I1x, I1y);
+
+    oclMat I1w = I1w_buf(Rect(0, 0, I0.cols, I0.rows));
+    oclMat I1wx = I1wx_buf(Rect(0, 0, I0.cols, I0.rows));
+    oclMat I1wy = I1wy_buf(Rect(0, 0, I0.cols, I0.rows));
+
+    oclMat grad = grad_buf(Rect(0, 0, I0.cols, I0.rows));
+    oclMat rho_c = rho_c_buf(Rect(0, 0, I0.cols, I0.rows));
+
+    oclMat p11 = p11_buf(Rect(0, 0, I0.cols, I0.rows));
+    oclMat p12 = p12_buf(Rect(0, 0, I0.cols, I0.rows));
+    oclMat p21 = p21_buf(Rect(0, 0, I0.cols, I0.rows));
+    oclMat p22 = p22_buf(Rect(0, 0, I0.cols, I0.rows));
+    p11.setTo(Scalar::all(0));
+    p12.setTo(Scalar::all(0));
+    p21.setTo(Scalar::all(0));
+    p22.setTo(Scalar::all(0));
+
+    oclMat diff = diff_buf(Rect(0, 0, I0.cols, I0.rows));
+
+    const float l_t = static_cast<float>(lambda * theta);
+    const float taut = static_cast<float>(tau / theta);
+
+    for (int warpings = 0; warpings < warps; ++warpings)
+    {
+        warpBackward(I0, I1, I1x, I1y, u1, u2, I1w, I1wx, I1wy, grad, rho_c);
+
+        double error = numeric_limits<double>::max();
+        for (int n = 0; error > scaledEpsilon && n < iterations; ++n)
+        {
+            estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, 
+                u1, u2, diff, l_t, static_cast<float>(theta));
+
+            error = ocl::sum(diff)[0];
+
+            estimateDualVariables(u1, u2, p11, p12, p21, p22, taut);
+
+        }
+    }
+
+}
+
+void cv::ocl::OpticalFlowDual_TVL1_OCL::collectGarbage()
+{
+    I0s.clear();
+    I1s.clear();
+    u1s.clear();
+    u2s.clear();
+
+    I1x_buf.release();
+    I1y_buf.release();
+
+    I1w_buf.release();
+    I1wx_buf.release();
+    I1wy_buf.release();
+
+    grad_buf.release();
+    rho_c_buf.release();
+
+    p11_buf.release();
+    p12_buf.release();
+    p21_buf.release();
+    p22_buf.release();
+
+    diff_buf.release();
+    norm_buf.release();
+}
+
+void ocl_tvl1flow::centeredGradient(const oclMat &src, oclMat &dx, oclMat &dy)
+{
+    Context  *clCxt = src.clCxt;
+    size_t localThreads[3] = {32, 8, 1};
+    size_t globalThreads[3] = {src.cols, src.rows, 1};
+
+    int srcElementSize = src.elemSize();
+    int src_step = src.step/srcElementSize;
+
+    int dElememntSize = dx.elemSize();
+    int dx_step = dx.step/dElememntSize;
+
+    string kernelName = "centeredGradientKernel";
+    vector< pair<size_t, const void *> > args;
+    args.push_back( make_pair( sizeof(cl_mem), (void*)&src.data));
+    args.push_back( make_pair( sizeof(cl_int), (void*)&src.cols));
+    args.push_back( make_pair( sizeof(cl_int), (void*)&src.rows));
+    args.push_back( make_pair( sizeof(cl_int), (void*)&src_step));
+    args.push_back( make_pair( sizeof(cl_mem), (void*)&dx.data));
+    args.push_back( make_pair( sizeof(cl_mem), (void*)&dy.data));
+    args.push_back( make_pair( sizeof(cl_int), (void*)&dx_step));
+    openCLExecuteKernel(clCxt, &tvl1flow, kernelName, globalThreads, localThreads, args, -1, -1);
+
+}
+
+void ocl_tvl1flow::estimateDualVariables(oclMat &u1, oclMat &u2, oclMat &p11, oclMat &p12, oclMat &p21, oclMat &p22, float taut)
+{
+    Context *clCxt = u1.clCxt;
+
+    size_t localThread[] = {32, 8, 1};
+    size_t globalThread[] = 
+    {
+        u1.cols, 
+        u1.rows,
+        1
+    };
+
+    int u1_element_size = u1.elemSize();
+    int u1_step = u1.step/u1_element_size;
+
+    int u2_element_size = u2.elemSize();
+    int u2_step = u2.step/u2_element_size;
+
+    int p11_element_size = p11.elemSize();
+    int p11_step = p11.step/p11_element_size;
+
+    int u1_offset_y = u1.offset/u1.step;
+    int u1_offset_x = u1.offset%u1.step;
+    u1_offset_x = u1_offset_x/u1.elemSize();
+
+    int u2_offset_y = u2.offset/u2.step;
+    int u2_offset_x = u2.offset%u2.step;
+    u2_offset_x = u2_offset_x/u2.elemSize();
+
+    string kernelName = "estimateDualVariablesKernel";
+    vector< pair<size_t, const void *> > args;
+    args.push_back( make_pair( sizeof(cl_mem), (void*)&u1.data));
+    args.push_back( make_pair( sizeof(cl_int), (void*)&u1.cols));
+    args.push_back( make_pair( sizeof(cl_int), (void*)&u1.rows));
+    args.push_back( make_pair( sizeof(cl_int), (void*)&u1_step));
+    args.push_back( make_pair( sizeof(cl_mem), (void*)&u2.data));
+    args.push_back( make_pair( sizeof(cl_mem), (void*)&p11.data));
+    args.push_back( make_pair( sizeof(cl_int), (void*)&p11_step));
+    args.push_back( make_pair( sizeof(cl_mem), (void*)&p12.data));
+    args.push_back( make_pair( sizeof(cl_mem), (void*)&p21.data));
+    args.push_back( make_pair( sizeof(cl_mem), (void*)&p22.data));
+    args.push_back( make_pair( sizeof(cl_float), (void*)&taut));
+    args.push_back( make_pair( sizeof(cl_int), (void*)&u2_step));
+    args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_x));
+    args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_y));
+    args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_x));
+    args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_y));
+
+    openCLExecuteKernel(clCxt, &tvl1flow, kernelName, globalThread, localThread, args, -1, -1);
+}
+
+void ocl_tvl1flow::estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad, 
+    oclMat &rho_c, oclMat &p11, oclMat &p12, 
+    oclMat &p21, oclMat &p22, oclMat &u1, 
+    oclMat &u2, oclMat &error, float l_t, float theta)
+{
+    Context* clCxt = I1wx.clCxt;
+
+    size_t localThread[] = {32, 8, 1};
+    size_t globalThread[] = 
+    {
+        I1wx.cols, 
+        I1wx.rows,
+        1
+    };
+
+    int I1wx_element_size = I1wx.elemSize();
+    int I1wx_step = I1wx.step/I1wx_element_size;
+
+    int u1_element_size = u1.elemSize();
+    int u1_step = u1.step/u1_element_size;
+
+    int u2_element_size = u2.elemSize();
+    int u2_step = u2.step/u2_element_size;
+
+    int u1_offset_y = u1.offset/u1.step;
+    int u1_offset_x = u1.offset%u1.step;
+    u1_offset_x = u1_offset_x/u1.elemSize();
+
+    int u2_offset_y = u2.offset/u2.step;
+    int u2_offset_x = u2.offset%u2.step;
+    u2_offset_x = u2_offset_x/u2.elemSize();
+
+    string kernelName = "estimateUKernel";
+    vector< pair<size_t, const void *> > args;
+    args.push_back( make_pair( sizeof(cl_mem), (void*)&I1wx.data));
+    args.push_back( make_pair( sizeof(cl_int), (void*)&I1wx.cols));
+    args.push_back( make_pair( sizeof(cl_int), (void*)&I1wx.rows));
+    args.push_back( make_pair( sizeof(cl_int), (void*)&I1wx_step));
+    args.push_back( make_pair( sizeof(cl_mem), (void*)&I1wy.data));
+    args.push_back( make_pair( sizeof(cl_mem), (void*)&grad.data));
+    args.push_back( make_pair( sizeof(cl_mem), (void*)&rho_c.data));
+    args.push_back( make_pair( sizeof(cl_mem), (void*)&p11.data));
+    args.push_back( make_pair( sizeof(cl_mem), (void*)&p12.data));
+    args.push_back( make_pair( sizeof(cl_mem), (void*)&p21.data));
+    args.push_back( make_pair( sizeof(cl_mem), (void*)&p22.data));
+    args.push_back( make_pair( sizeof(cl_mem), (void*)&u1.data));
+    args.push_back( make_pair( sizeof(cl_int), (void*)&u1_step));
+    args.push_back( make_pair( sizeof(cl_mem), (void*)&u2.data));
+    args.push_back( make_pair( sizeof(cl_mem), (void*)&error.data));
+    args.push_back( make_pair( sizeof(cl_float), (void*)&l_t));
+    args.push_back( make_pair( sizeof(cl_float), (void*)&theta));
+    args.push_back( make_pair( sizeof(cl_int), (void*)&u2_step));
+    args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_x));
+    args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_y));
+    args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_x));
+    args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_y));
+
+    openCLExecuteKernel(clCxt, &tvl1flow, kernelName, globalThread, localThread, args, -1, -1);
+}
+
+void ocl_tvl1flow::warpBackward(const oclMat &I0, const oclMat &I1, oclMat &I1x, oclMat &I1y, oclMat &u1, oclMat &u2, oclMat &I1w, oclMat &I1wx, oclMat &I1wy, oclMat &grad, oclMat &rho)
+{
+    Context* clCxt = I0.clCxt;
+    const bool isImgSupported = support_image2d(clCxt);
+    
+    CV_Assert(isImgSupported);
+
+    int u1ElementSize = u1.elemSize();
+    int u1Step = u1.step/u1ElementSize;
+
+    int u2ElementSize = u2.elemSize();
+    int u2Step = u2.step/u2ElementSize;
+
+    int I0ElementSize = I0.elemSize();
+    int I0Step = I0.step/I0ElementSize;
+
+    int I1w_element_size = I1w.elemSize();
+    int I1w_step = I1w.step/I1w_element_size;
+
+    int u1_offset_y = u1.offset/u1.step;
+    int u1_offset_x = u1.offset%u1.step;
+    u1_offset_x = u1_offset_x/u1.elemSize();
+
+    int u2_offset_y = u2.offset/u2.step;
+    int u2_offset_x = u2.offset%u2.step;
+    u2_offset_x = u2_offset_x/u2.elemSize();
+
+    size_t localThread[] = {32, 8, 1};
+    size_t globalThread[] = 
+    {
+        I0.cols, 
+        I0.rows,
+        1
+    };
+
+    cl_mem I1_tex;
+    cl_mem I1x_tex;
+    cl_mem I1y_tex;
+    I1_tex = bindTexture(I1);
+    I1x_tex = bindTexture(I1x);
+    I1y_tex = bindTexture(I1y);
+
+    string kernelName = "warpBackwardKernel";
+    vector< pair<size_t, const void *> > args;
+    args.push_back( make_pair( sizeof(cl_mem), (void*)&I0.data));
+    args.push_back( make_pair( sizeof(cl_int), (void*)&I0Step));
+    args.push_back( make_pair( sizeof(cl_int), (void*)&I0.cols));
+    args.push_back( make_pair( sizeof(cl_int), (void*)&I0.rows));
+    args.push_back( make_pair( sizeof(cl_mem), (void*)&I1_tex));
+    args.push_back( make_pair( sizeof(cl_mem), (void*)&I1x_tex));
+    args.push_back( make_pair( sizeof(cl_mem), (void*)&I1y_tex));
+    args.push_back( make_pair( sizeof(cl_mem), (void*)&u1.data));
+    args.push_back( make_pair( sizeof(cl_int), (void*)&u1Step));
+    args.push_back( make_pair( sizeof(cl_mem), (void*)&u2.data));
+    args.push_back( make_pair( sizeof(cl_mem), (void*)&I1w.data));
+    args.push_back( make_pair( sizeof(cl_mem), (void*)&I1wx.data));
+    args.push_back( make_pair( sizeof(cl_mem), (void*)&I1wy.data));
+    args.push_back( make_pair( sizeof(cl_mem), (void*)&grad.data));
+    args.push_back( make_pair( sizeof(cl_mem), (void*)&rho.data));
+    args.push_back( make_pair( sizeof(cl_int), (void*)&I1w_step));
+    args.push_back( make_pair( sizeof(cl_int), (void*)&u2Step));
+    args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_x));
+    args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_y));
+    args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_x));
+    args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_y));
+
+    openCLExecuteKernel(clCxt, &tvl1flow, kernelName, globalThread, localThread, args, -1, -1);
+
+    releaseTexture(I1_tex);
+    releaseTexture(I1x_tex);
+    releaseTexture(I1y_tex);
+}
\ No newline at end of file
index cac6b66..10032e8 100644 (file)
@@ -45,7 +45,6 @@
 
 #include "precomp.hpp"
 #ifdef HAVE_OPENCL
-#define SHOW_RESULT 0
 
 ////////////////////////////////////////////////////////
 // Canny
@@ -59,13 +58,10 @@ PARAM_TEST_CASE(Canny, AppertureSize, L2gradient)
     bool useL2gradient;
 
     cv::Mat edges_gold;
-    //std::vector<cv::ocl::Info> oclinfo;
     virtual void SetUp()
     {
         apperture_size = GET_PARAM(0);
         useL2gradient = GET_PARAM(1);
-        //int devnums = getDevice(oclinfo);
-        //CV_Assert(devnums > 0);
     }
 };
 
@@ -77,32 +73,18 @@ TEST_P(Canny, Accuracy)
     double low_thresh = 50.0;
     double high_thresh = 100.0;
 
-    cv::resize(img, img, cv::Size(512, 384));
     cv::ocl::oclMat ocl_img = cv::ocl::oclMat(img);
 
     cv::ocl::oclMat edges;
     cv::ocl::Canny(ocl_img, edges, low_thresh, high_thresh, apperture_size, useL2gradient);
 
-    char filename [100];
-    sprintf(filename, "G:/Valve_edges_a%d_L2Grad%d.jpg", apperture_size, (int)useL2gradient);
-
     cv::Mat edges_gold;
     cv::Canny(img, edges_gold, low_thresh, high_thresh, apperture_size, useL2gradient);
 
-#if SHOW_RESULT
-    cv::Mat edges_x2, ocl_edges(edges);
-    edges_x2.create(edges.rows, edges.cols * 2, edges.type());
-    edges_x2.setTo(0);
-    cv::add(edges_gold, cv::Mat(edges_x2, cv::Rect(0, 0, edges_gold.cols, edges_gold.rows)), cv::Mat(edges_x2, cv::Rect(0, 0, edges_gold.cols, edges_gold.rows)));
-    cv::add(ocl_edges, cv::Mat(edges_x2, cv::Rect(edges_gold.cols, 0, edges_gold.cols, edges_gold.rows)), cv::Mat(edges_x2, cv::Rect(edges_gold.cols, 0, edges_gold.cols, edges_gold.rows)));
-    cv::namedWindow("Canny result (left: cpu, right: ocl)");
-    cv::imshow("Canny result (left: cpu, right: ocl)", edges_x2);
-    cv::waitKey();
-#endif //OUTPUT_RESULT
     EXPECT_MAT_SIMILAR(edges_gold, edges, 1e-2);
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Canny, testing::Combine(
+INSTANTIATE_TEST_CASE_P(OCL_ImgProc, Canny, testing::Combine(
                             testing::Values(AppertureSize(3), AppertureSize(5)),
                             testing::Values(L2gradient(false), L2gradient(true))));
 #endif
\ No newline at end of file
index a5d90ff..5548456 100644 (file)
@@ -74,7 +74,7 @@ TEST_P(Gemm, Accuracy)
     cv::gemm(a, b, 1.0, c, 1.0, dst, flags);
     cv::ocl::gemm(cv::ocl::oclMat(a), cv::ocl::oclMat(b), 1.0, cv::ocl::oclMat(c), 1.0, ocl_dst, flags);
 
-    EXPECT_MAT_NEAR(dst, ocl_dst, mat_size.area() * 1e-4, "");
+    EXPECT_MAT_NEAR(dst, ocl_dst, mat_size.area() * 1e-4);
 }
 
 INSTANTIATE_TEST_CASE_P(ocl_gemm, Gemm, testing::Combine(
index 96f7211..52ddbb7 100644 (file)
@@ -55,6 +55,12 @@ using namespace testing;
 using namespace std;
 using namespace cv;
 extern string workdir;
+
+namespace
+{
+IMPLEMENT_PARAM_CLASS(CascadeName, std::string);
+CascadeName cascade_frontalface_alt(std::string("haarcascade_frontalface_alt.xml"));
+CascadeName cascade_frontalface_alt2(std::string("haarcascade_frontalface_alt2.xml"));
 struct getRect
 {
     Rect operator ()(const CvAvgComp &e) const
@@ -62,23 +68,24 @@ struct getRect
         return e.rect;
     }
 };
+}
 
-PARAM_TEST_CASE(Haar, double, int)
+PARAM_TEST_CASE(Haar, double, int, CascadeName)
 {
     cv::ocl::OclCascadeClassifier cascade, nestedCascade;
-    cv::ocl::OclCascadeClassifierBuf cascadebuf;
     cv::CascadeClassifier cpucascade, cpunestedCascade;
 
     double scale;
     int flags;
+    std::string cascadeName;
 
     virtual void SetUp()
     {
         scale = GET_PARAM(0);
         flags = GET_PARAM(1);
-        string cascadeName = workdir + "../../data/haarcascades/haarcascade_frontalface_alt.xml";
+        cascadeName = (workdir + "../../data/haarcascades/").append(GET_PARAM(2));
 
-        if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName)) || (!cascadebuf.load( cascadeName )))
+        if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName)) )
         {
             cout << "ERROR: Could not load classifier cascade" << endl;
             return;
@@ -115,7 +122,7 @@ TEST_P(Haar, FaceDetect)
     Seq<CvAvgComp>(_objects).copyTo(vecAvgComp);
     oclfaces.resize(vecAvgComp.size());
     std::transform(vecAvgComp.begin(), vecAvgComp.end(), oclfaces.begin(), getRect());
-
+    
     cpucascade.detectMultiScale( smallImg, faces,  1.1, 3,
                                  flags,
                                  Size(30, 30), Size(0, 0) );
@@ -136,7 +143,6 @@ TEST_P(Haar, FaceDetectUseBuf)
     vector<Rect> faces, oclfaces;
 
     Mat gray, smallImg(cvRound (img.rows / scale), cvRound(img.cols / scale), CV_8UC1 );
-    MemStorage storage(cvCreateMemStorage(0));
     cvtColor( img, gray, CV_BGR2GRAY );
     resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
     equalizeHist( smallImg, smallImg );
@@ -144,19 +150,31 @@ TEST_P(Haar, FaceDetectUseBuf)
     cv::ocl::oclMat image;
     image.upload(smallImg);
 
+    cv::ocl::OclCascadeClassifierBuf cascadebuf;
+    if( !cascadebuf.load( cascadeName ) )
+    {
+        cout << "ERROR: Could not load classifier cascade for FaceDetectUseBuf!" << endl;
+        return;
+    }
     cascadebuf.detectMultiScale( image, oclfaces,  1.1, 3,
                                  flags,
                                  Size(30, 30), Size(0, 0) );
-    cascadebuf.release();
 
     cpucascade.detectMultiScale( smallImg, faces,  1.1, 3,
                                  flags,
                                  Size(30, 30), Size(0, 0) );
     EXPECT_EQ(faces.size(), oclfaces.size());
+
+    // intentionally run ocl facedetect again and check if it still works after the first run
+    cascadebuf.detectMultiScale( image, oclfaces,  1.1, 3,
+        flags,
+        Size(30, 30));
+    cascadebuf.release();
+    EXPECT_EQ(faces.size(), oclfaces.size());
 }
 
 INSTANTIATE_TEST_CASE_P(FaceDetect, Haar,
     Combine(Values(1.0),
-            Values(CV_HAAR_SCALE_IMAGE, 0)));
+            Values(CV_HAAR_SCALE_IMAGE, 0), Values(cascade_frontalface_alt, cascade_frontalface_alt2)));
 
 #endif // HAVE_OPENCL
index 664f8a3..b9f4740 100644 (file)
@@ -23,6 +23,7 @@
 //    Rock Li, Rock.Li@amd.com
 //    Wu Zailong, bullet@yeah.net
 //    Xu Pang, pangxu010@163.com
+//    Sen Liu, swjtuls1987@126.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -1393,6 +1394,46 @@ TEST_P(calcHist, Mat)
         EXPECT_MAT_NEAR(dst_hist, cpu_hist, 0.0);
     }
 }
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+// CLAHE
+namespace
+{
+    IMPLEMENT_PARAM_CLASS(ClipLimit, double)
+}
+
+PARAM_TEST_CASE(CLAHE, cv::Size, ClipLimit)
+{
+    cv::Size size;
+    double clipLimit;
+
+    cv::Mat src;
+    cv::Mat dst_gold;
+
+    cv::ocl::oclMat g_src;
+    cv::ocl::oclMat g_dst;
+
+    virtual void SetUp()
+    {
+        size = GET_PARAM(0);
+        clipLimit = GET_PARAM(1);
+
+        cv::RNG &rng = TS::ptr()->get_rng();
+        src = randomMat(rng, size, CV_8UC1, 0, 256, false);
+        g_src.upload(src);
+    }
+};
+
+TEST_P(CLAHE, Accuracy)
+{
+    cv::Ptr<cv::ocl::CLAHE> clahe = cv::ocl::createCLAHE(clipLimit);
+    clahe->apply(g_src, g_dst);
+    cv::Mat dst(g_dst);
+
+    cv::Ptr<cv::CLAHE> clahe_gold = cv::createCLAHE(clipLimit);
+    clahe_gold->apply(src, dst_gold);
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 1.0);
+}
 
 ///////////////////////////Convolve//////////////////////////////////
 PARAM_TEST_CASE(ConvolveTestBase, MatType, bool)
@@ -1643,6 +1684,10 @@ INSTANTIATE_TEST_CASE_P(histTestBase, calcHist, Combine(
                             ONE_TYPE(CV_32SC1) //no use
                         ));
 
+INSTANTIATE_TEST_CASE_P(ImgProc, CLAHE, Combine(
+                        Values(cv::Size(128, 128), cv::Size(113, 113), cv::Size(1300, 1300)),
+                        Values(0.0, 40.0)));
+
 //INSTANTIATE_TEST_CASE_P(ConvolveTestBase, Convolve, Combine(
 //                            Values(CV_32FC1, CV_32FC1),
 //                            Values(false))); // Values(false) is the reserved parameter
similarity index 54%
rename from modules/ocl/test/test_pyrlk.cpp
rename to modules/ocl/test/test_optflow.cpp
index 064cb30..0121be8 100644 (file)
@@ -1,4 +1,4 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
@@ -7,12 +7,16 @@
 //  copy or use the software.
 //
 //
-//                        Intel License Agreement
+//                           License Agreement
 //                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
+// @Authors
+//
+//
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
@@ -21,9 +25,9 @@
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
+//     and/or other oclMaterials provided with the distribution.
 //
-//   * The name of Intel Corporation may not be used to endorse or promote products
+//   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
@@ -52,6 +56,124 @@ using namespace std;
 
 extern string workdir;
 
+
+//////////////////////////////////////////////////////
+// GoodFeaturesToTrack
+namespace
+{
+    IMPLEMENT_PARAM_CLASS(MinDistance, double)
+}
+PARAM_TEST_CASE(GoodFeaturesToTrack, MinDistance)
+{
+    double minDistance;
+
+    virtual void SetUp()
+    {
+        minDistance = GET_PARAM(0);
+    }
+};
+
+TEST_P(GoodFeaturesToTrack, Accuracy)
+{
+    cv::Mat frame = readImage(workdir + "../gpu/rubberwhale1.png", cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(frame.empty());
+
+    int maxCorners = 1000;
+    double qualityLevel = 0.01;
+
+    cv::ocl::GoodFeaturesToTrackDetector_OCL detector(maxCorners, qualityLevel, minDistance);
+
+    cv::ocl::oclMat d_pts;
+    detector(oclMat(frame), d_pts);
+
+    ASSERT_FALSE(d_pts.empty());
+
+    std::vector<cv::Point2f> pts(d_pts.cols);
+    
+    detector.downloadPoints(d_pts, pts);
+
+    std::vector<cv::Point2f> pts_gold;
+    cv::goodFeaturesToTrack(frame, pts_gold, maxCorners, qualityLevel, minDistance);
+
+    ASSERT_EQ(pts_gold.size(), pts.size());
+
+    size_t mistmatch = 0;
+    for (size_t i = 0; i < pts.size(); ++i)
+    {
+        cv::Point2i a = pts_gold[i];
+        cv::Point2i b = pts[i];
+
+        bool eq = std::abs(a.x - b.x) < 1 && std::abs(a.y - b.y) < 1;
+
+        if (!eq)
+            ++mistmatch;
+    }
+
+    double bad_ratio = static_cast<double>(mistmatch) / pts.size();
+
+    ASSERT_LE(bad_ratio, 0.01);
+}
+
+TEST_P(GoodFeaturesToTrack, EmptyCorners)
+{
+    int maxCorners = 1000;
+    double qualityLevel = 0.01;
+
+    cv::ocl::GoodFeaturesToTrackDetector_OCL detector(maxCorners, qualityLevel, minDistance);
+
+    cv::ocl::oclMat src(100, 100, CV_8UC1, cv::Scalar::all(0));
+    cv::ocl::oclMat corners(1, maxCorners, CV_32FC2);
+
+    detector(src, corners);
+
+    ASSERT_TRUE(corners.empty());
+}
+
+INSTANTIATE_TEST_CASE_P(OCL_Video, GoodFeaturesToTrack, 
+    testing::Values(MinDistance(0.0), MinDistance(3.0)));
+
+//////////////////////////////////////////////////////////////////////////
+PARAM_TEST_CASE(TVL1, bool)
+{
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        useRoi = GET_PARAM(0);
+    }
+
+};
+
+TEST_P(TVL1, Accuracy)
+{
+    cv::Mat frame0 = readImage(workdir + "../gpu/rubberwhale1.png", cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(frame0.empty());
+
+    cv::Mat frame1 = readImage(workdir + "../gpu/rubberwhale2.png", cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(frame1.empty());
+
+    cv::ocl::OpticalFlowDual_TVL1_OCL d_alg;
+    cv::RNG &rng = TS::ptr()->get_rng();
+    cv::Mat flowx = randomMat(rng, frame0.size(), CV_32FC1, 0, 0, useRoi);
+    cv::Mat flowy = randomMat(rng, frame0.size(), CV_32FC1, 0, 0, useRoi);
+    cv::ocl::oclMat d_flowx(flowx), d_flowy(flowy);
+    d_alg(oclMat(frame0), oclMat(frame1), d_flowx, d_flowy);
+
+    cv::Ptr<cv::DenseOpticalFlow> alg = cv::createOptFlow_DualTVL1();
+    cv::Mat flow;
+    alg->calc(frame0, frame1, flow);
+    cv::Mat gold[2];
+    cv::split(flow, gold);
+
+    EXPECT_MAT_SIMILAR(gold[0], d_flowx, 3e-3);
+    EXPECT_MAT_SIMILAR(gold[1], d_flowy, 3e-3);
+}
+INSTANTIATE_TEST_CASE_P(OCL_Video, TVL1, Values(true, false));
+
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+// PyrLKOpticalFlow
+
 PARAM_TEST_CASE(Sparse, bool, bool)
 {
     bool useGray;
@@ -60,7 +182,7 @@ PARAM_TEST_CASE(Sparse, bool, bool)
     virtual void SetUp()
     {
         UseSmart = GET_PARAM(0);
-        useGray = GET_PARAM(0);
+        useGray = GET_PARAM(1);
     }
 };
 
@@ -147,9 +269,9 @@ TEST_P(Sparse, Mat)
 
 }
 
-INSTANTIATE_TEST_CASE_P(Video, Sparse, Combine(
-                            Values(false, true),
-                            Values(false)));
+INSTANTIATE_TEST_CASE_P(OCL_Video, Sparse, Combine(
+    Values(false, true),
+    Values(false, true)));
 
 #endif // HAVE_OPENCL
 
index 02d7a6f..191926c 100644 (file)
@@ -59,17 +59,17 @@ void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, float h,
 
     switch (src.type()) {
         case CV_8U:
-            parallel_for(cv::BlockedRange(0, src.rows),
+            parallel_for_(cv::Range(0, src.rows),
                 FastNlMeansDenoisingInvoker<uchar>(
                     src, dst, templateWindowSize, searchWindowSize, h));
             break;
         case CV_8UC2:
-            parallel_for(cv::BlockedRange(0, src.rows),
+            parallel_for_(cv::Range(0, src.rows),
                 FastNlMeansDenoisingInvoker<cv::Vec2b>(
                     src, dst, templateWindowSize, searchWindowSize, h));
             break;
         case CV_8UC3:
-            parallel_for(cv::BlockedRange(0, src.rows),
+            parallel_for_(cv::Range(0, src.rows),
                 FastNlMeansDenoisingInvoker<cv::Vec3b>(
                     src, dst, templateWindowSize, searchWindowSize, h));
             break;
@@ -159,19 +159,19 @@ void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _ds
 
     switch (srcImgs[0].type()) {
         case CV_8U:
-            parallel_for(cv::BlockedRange(0, srcImgs[0].rows),
+            parallel_for_(cv::Range(0, srcImgs[0].rows),
                 FastNlMeansMultiDenoisingInvoker<uchar>(
                     srcImgs, imgToDenoiseIndex, temporalWindowSize,
                     dst, templateWindowSize, searchWindowSize, h));
             break;
         case CV_8UC2:
-            parallel_for(cv::BlockedRange(0, srcImgs[0].rows),
+            parallel_for_(cv::Range(0, srcImgs[0].rows),
                 FastNlMeansMultiDenoisingInvoker<cv::Vec2b>(
                     srcImgs, imgToDenoiseIndex, temporalWindowSize,
                     dst, templateWindowSize, searchWindowSize, h));
             break;
         case CV_8UC3:
-            parallel_for(cv::BlockedRange(0, srcImgs[0].rows),
+            parallel_for_(cv::Range(0, srcImgs[0].rows),
                 FastNlMeansMultiDenoisingInvoker<cv::Vec3b>(
                     srcImgs, imgToDenoiseIndex, temporalWindowSize,
                     dst, templateWindowSize, searchWindowSize, h));
index c4f1382..8824f17 100644 (file)
@@ -55,12 +55,12 @@ using namespace std;
 using namespace cv;
 
 template <typename T>
-struct FastNlMeansDenoisingInvoker {
+struct FastNlMeansDenoisingInvoker : ParallelLoopBody {
     public:
         FastNlMeansDenoisingInvoker(const Mat& src, Mat& dst,
             int template_window_size, int search_window_size, const float h);
 
-        void operator() (const BlockedRange& range) const;
+        void operator() (const Range& range) const;
 
     private:
         void operator= (const FastNlMeansDenoisingInvoker&);
@@ -156,9 +156,9 @@ FastNlMeansDenoisingInvoker<T>::FastNlMeansDenoisingInvoker(
 }
 
 template <class T>
-void FastNlMeansDenoisingInvoker<T>::operator() (const BlockedRange& range) const {
-    int row_from = range.begin();
-    int row_to = range.end() - 1;
+void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const {
+    int row_from = range.start;
+    int row_to = range.end - 1;
 
     Array2d<int> dist_sums(search_window_size_, search_window_size_);
 
index 2ae5054..8b32ede 100644 (file)
@@ -55,13 +55,13 @@ using namespace std;
 using namespace cv;
 
 template <typename T>
-struct FastNlMeansMultiDenoisingInvoker {
+struct FastNlMeansMultiDenoisingInvoker : ParallelLoopBody {
     public:
         FastNlMeansMultiDenoisingInvoker(
             const std::vector<Mat>& srcImgs, int imgToDenoiseIndex, int temporalWindowSize,
             Mat& dst, int template_window_size, int search_window_size, const float h);
 
-        void operator() (const BlockedRange& range) const;
+        void operator() (const Range& range) const;
 
     private:
         void operator= (const FastNlMeansMultiDenoisingInvoker&);
@@ -175,9 +175,9 @@ FastNlMeansMultiDenoisingInvoker<T>::FastNlMeansMultiDenoisingInvoker(
 }
 
 template <class T>
-void FastNlMeansMultiDenoisingInvoker<T>::operator() (const BlockedRange& range) const {
-    int row_from = range.begin();
-    int row_to = range.end() - 1;
+void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const {
+    int row_from = range.start;
+    int row_to = range.end - 1;
 
     Array3d<int> dist_sums(temporal_window_size_, search_window_size_, search_window_size_);
 
index 9bab58c..d918cff 100644 (file)
@@ -66,21 +66,17 @@ struct DistIdxPair
 };
 
 
-struct MatchPairsBody
+struct MatchPairsBody : ParallelLoopBody
 {
-    MatchPairsBody(const MatchPairsBody& other)
-            : matcher(other.matcher), features(other.features),
-              pairwise_matches(other.pairwise_matches), near_pairs(other.near_pairs) {}
-
     MatchPairsBody(FeaturesMatcher &_matcher, const vector<ImageFeatures> &_features,
                    vector<MatchesInfo> &_pairwise_matches, vector<pair<int,int> > &_near_pairs)
             : matcher(_matcher), features(_features),
               pairwise_matches(_pairwise_matches), near_pairs(_near_pairs) {}
 
-    void operator ()(const BlockedRange &r) const
+    void operator ()(const Range &r) const
     {
         const int num_images = static_cast<int>(features.size());
-        for (int i = r.begin(); i < r.end(); ++i)
+        for (int i = r.start; i < r.end; ++i)
         {
             int from = near_pairs[i].first;
             int to = near_pairs[i].second;
@@ -526,9 +522,9 @@ void FeaturesMatcher::operator ()(const vector<ImageFeatures> &features, vector<
     MatchPairsBody body(*this, features, pairwise_matches, near_pairs);
 
     if (is_thread_safe_)
-        parallel_for(BlockedRange(0, static_cast<int>(near_pairs.size())), body);
+        parallel_for_(Range(0, static_cast<int>(near_pairs.size())), body);
     else
-        body(BlockedRange(0, static_cast<int>(near_pairs.size())));
+        body(Range(0, static_cast<int>(near_pairs.size())));
     LOGLN_CHAT("");
 }
 
index ab27a46..c873bc7 100644 (file)
@@ -69,13 +69,13 @@ struct CalcRotation
         K_from(0,0) = cameras[edge.from].focal;
         K_from(1,1) = cameras[edge.from].focal * cameras[edge.from].aspect;
         K_from(0,2) = cameras[edge.from].ppx;
-        K_from(0,2) = cameras[edge.from].ppy;
+        K_from(1,2) = cameras[edge.from].ppy;
 
         Mat_<double> K_to = Mat::eye(3, 3, CV_64F);
         K_to(0,0) = cameras[edge.to].focal;
         K_to(1,1) = cameras[edge.to].focal * cameras[edge.to].aspect;
         K_to(0,2) = cameras[edge.to].ppx;
-        K_to(0,2) = cameras[edge.to].ppy;
+        K_to(1,2) = cameras[edge.to].ppy;
 
         Mat R = K_from.inv() * pairwise_matches[pair_idx].H.inv() * K_to;
         cameras[edge.to].R = cameras[edge.from].R * R;
index 4351713..a64127f 100755 (executable)
@@ -288,6 +288,16 @@ class TestSuite(object):
             if self.adb:
                 # construct name for aapt tool
                 self.aapt = [os.path.join(os.path.dirname(self.adb[0]), ("aapt","aapt.exe")[hostos == 'nt'])]
+                if not os.path.isfile(self.aapt[0]):
+                    # it's moved in SDK r22
+                    sdk_dir = os.path.dirname( os.path.dirname(self.adb[0]) )
+                    aapt_fn = ("aapt", "aapt.exe")[hostos == 'nt']
+                    for r, ds, fs in os.walk( os.path.join(sdk_dir, 'build-tools') ):
+                        if aapt_fn in fs:
+                            self.aapt = [ os.path.join(r, aapt_fn) ]
+                            break
+                    else:
+                        self.error = "Can't find '%s' tool!" % aapt_fn
 
         # fix has_perf_tests param
         self.has_perf_tests = self.has_perf_tests == "ON"
index e532af2..6bbb960 100644 (file)
@@ -248,7 +248,7 @@ detectShadowGMM(const float* data, int nchannels, int nmodes,
 //IEEE Trans. on Pattern Analysis and Machine Intelligence, vol.26, no.5, pages 651-656, 2004
 //http://www.zoranz.net/Publications/zivkovic2004PAMI.pdf
 
-struct MOG2Invoker
+struct MOG2Invoker : ParallelLoopBody
 {
     MOG2Invoker(const Mat& _src, Mat& _dst,
                 GMM* _gmm, float* _mean,
@@ -280,9 +280,9 @@ struct MOG2Invoker
         cvtfunc = src->depth() != CV_32F ? getConvertFunc(src->depth(), CV_32F) : 0;
     }
 
-    void operator()(const BlockedRange& range) const
+    void operator()(const Range& range) const
     {
-        int y0 = range.begin(), y1 = range.end();
+        int y0 = range.start, y1 = range.end;
         int ncols = src->cols, nchannels = src->channels();
         AutoBuffer<float> buf(src->cols*nchannels);
         float alpha1 = 1.f - alphaT;
@@ -562,15 +562,15 @@ void BackgroundSubtractorMOG2::operator()(InputArray _image, OutputArray _fgmask
     learningRate = learningRate >= 0 && nframes > 1 ? learningRate : 1./min( 2*nframes, history );
     CV_Assert(learningRate >= 0);
 
-    parallel_for(BlockedRange(0, image.rows),
-                 MOG2Invoker(image, fgmask,
-                             (GMM*)bgmodel.data,
-                             (float*)(bgmodel.data + sizeof(GMM)*nmixtures*image.rows*image.cols),
-                             bgmodelUsedModes.data, nmixtures, (float)learningRate,
-                             (float)varThreshold,
-                             backgroundRatio, varThresholdGen,
-                             fVarInit, fVarMin, fVarMax, float(-learningRate*fCT), fTau,
-                             bShadowDetection, nShadowDetection));
+    parallel_for_(Range(0, image.rows),
+                  MOG2Invoker(image, fgmask,
+                              (GMM*)bgmodel.data,
+                              (float*)(bgmodel.data + sizeof(GMM)*nmixtures*image.rows*image.cols),
+                              bgmodelUsedModes.data, nmixtures, (float)learningRate,
+                              (float)varThreshold,
+                              backgroundRatio, varThresholdGen,
+                              fVarInit, fVarMin, fVarMax, float(-learningRate*fCT), fTau,
+                              bShadowDetection, nShadowDetection));
 }
 
 void BackgroundSubtractorMOG2::getBackgroundImage(OutputArray backgroundImage) const
index 9e47eb8..291cb86 100644 (file)
@@ -156,7 +156,7 @@ cv::detail::LKTrackerInvoker::LKTrackerInvoker(
     minEigThreshold = _minEigThreshold;
 }
 
-void cv::detail::LKTrackerInvoker::operator()(const BlockedRange& range) const
+void cv::detail::LKTrackerInvoker::operator()(const Range& range) const
 {
     Point2f halfWin((winSize.width-1)*0.5f, (winSize.height-1)*0.5f);
     const Mat& I = *prevImg;
@@ -170,7 +170,7 @@ void cv::detail::LKTrackerInvoker::operator()(const BlockedRange& range) const
     Mat IWinBuf(winSize, CV_MAKETYPE(derivDepth, cn), (deriv_type*)_buf);
     Mat derivIWinBuf(winSize, CV_MAKETYPE(derivDepth, cn2), (deriv_type*)_buf + winSize.area()*cn);
 
-    for( int ptidx = range.begin(); ptidx < range.end(); ptidx++ )
+    for( int ptidx = range.start; ptidx < range.end; ptidx++ )
     {
         Point2f prevPt = prevPts[ptidx]*(float)(1./(1 << level));
         Point2f nextPt;
@@ -733,11 +733,11 @@ void cv::calcOpticalFlowPyrLK( InputArray _prevImg, InputArray _nextImg,
         typedef cv::detail::LKTrackerInvoker LKTrackerInvoker;
 #endif
 
-        parallel_for(BlockedRange(0, npoints), LKTrackerInvoker(prevPyr[level * lvlStep1], derivI,
-                                                                nextPyr[level * lvlStep2], prevPts, nextPts,
-                                                                status, err,
-                                                                winSize, criteria, level, maxLevel,
-                                                                flags, (float)minEigThreshold));
+        parallel_for_(Range(0, npoints), LKTrackerInvoker(prevPyr[level * lvlStep1], derivI,
+                                                          nextPyr[level * lvlStep2], prevPts, nextPts,
+                                                          status, err,
+                                                          winSize, criteria, level, maxLevel,
+                                                          flags, (float)minEigThreshold));
     }
 }
 
index 390e46b..4aff37e 100644 (file)
@@ -7,7 +7,7 @@ namespace detail
 
     typedef short deriv_type;
 
-    struct LKTrackerInvoker
+    struct LKTrackerInvoker : ParallelLoopBody
     {
         LKTrackerInvoker( const Mat& _prevImg, const Mat& _prevDeriv, const Mat& _nextImg,
                           const Point2f* _prevPts, Point2f* _nextPts,
@@ -15,7 +15,7 @@ namespace detail
                           Size _winSize, TermCriteria _criteria,
                           int _level, int _maxLevel, int _flags, float _minEigThreshold );
 
-        void operator()(const BlockedRange& range) const;
+        void operator()(const Range& range) const;
 
         const Mat* prevImg;
         const Mat* nextImg;
index 0f3cec1..7ec860f 100644 (file)
@@ -60,7 +60,15 @@ CV_INIT_ALGORITHM(BackgroundSubtractorMOG2, "BackgroundSubtractor.MOG2",
     obj.info()->addParam(obj, "history", obj.history);
     obj.info()->addParam(obj, "nmixtures", obj.nmixtures);
     obj.info()->addParam(obj, "varThreshold", obj.varThreshold);
-    obj.info()->addParam(obj, "detectShadows", obj.bShadowDetection));
+    obj.info()->addParam(obj, "detectShadows", obj.bShadowDetection);
+    obj.info()->addParam(obj, "backgroundRatio", obj.backgroundRatio);
+    obj.info()->addParam(obj, "varThresholdGen", obj.varThresholdGen);
+    obj.info()->addParam(obj, "fVarInit", obj.fVarInit);
+    obj.info()->addParam(obj, "fVarMin", obj.fVarMin);
+    obj.info()->addParam(obj, "fVarMax", obj.fVarMax);
+    obj.info()->addParam(obj, "fCT", obj.fCT);
+    obj.info()->addParam(obj, "nShadowDetection", obj.nShadowDetection);
+    obj.info()->addParam(obj, "fTau", obj.fTau));
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////////////
 
index 484b598..de93d5c 100644 (file)
@@ -205,6 +205,9 @@ Mat estimateGlobalMotionRobust(
                             estimateGlobMotionLeastSquaresAffine };
 
     const int npoints = static_cast<int>(points0.size());
+    if (npoints < params.size)
+        return Mat::eye(3, 3, CV_32F);
+
     const int niters = static_cast<int>(ceil(log(1 - params.prob) /
                                              log(1 - pow(1 - params.eps, params.size))));
 
@@ -300,6 +303,8 @@ PyrLkRobustMotionEstimator::PyrLkRobustMotionEstimator()
 Mat PyrLkRobustMotionEstimator::estimate(const Mat &frame0, const Mat &frame1)
 {
     detector_->detect(frame0, keypointsPrev_);
+    if (keypointsPrev_.empty())
+        return Mat::eye(3, 3, CV_32F);
 
     pointsPrev_.resize(keypointsPrev_.size());
     for (size_t i = 0; i < keypointsPrev_.size(); ++i)
diff --git a/platforms/android/android.toolchain.cmake b/platforms/android/android.toolchain.cmake
new file mode 100644 (file)
index 0000000..d7f09c7
--- /dev/null
@@ -0,0 +1,1744 @@
+# Copyright (c) 2010-2011, Ethan Rublee
+# Copyright (c) 2011-2013, Andrey Kamaev
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1.  Redistributions of source code must retain the above copyright notice,
+#     this list of conditions and the following disclaimer.
+#
+# 2.  Redistributions in binary form must reproduce the above copyright notice,
+#     this list of conditions and the following disclaimer in the documentation
+#     and/or other materials provided with the distribution.
+#
+# 3.  The name of the copyright holders may be used to endorse or promote
+#     products derived from this software without specific prior written
+#     permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+# ------------------------------------------------------------------------------
+#  Android CMake toolchain file, for use with the Android NDK r5-r8
+#  Requires cmake 2.6.3 or newer (2.8.5 or newer is recommended).
+#  See home page: https://github.com/taka-no-me/android-cmake
+#
+#  The file is mantained by the OpenCV project. The latest version can be get at
+#  http://code.opencv.org/projects/opencv/repository/revisions/master/changes/android/android.toolchain.cmake
+#
+#  Usage Linux:
+#   $ export ANDROID_NDK=/absolute/path/to/the/android-ndk
+#   $ mkdir build && cd build
+#   $ cmake -DCMAKE_TOOLCHAIN_FILE=path/to/the/android.toolchain.cmake ..
+#   $ make -j8
+#
+#  Usage Linux (using standalone toolchain):
+#   $ export ANDROID_STANDALONE_TOOLCHAIN=/absolute/path/to/android-toolchain
+#   $ mkdir build && cd build
+#   $ cmake -DCMAKE_TOOLCHAIN_FILE=path/to/the/android.toolchain.cmake ..
+#   $ make -j8
+#
+#  Usage Windows:
+#     You need native port of make to build your project.
+#     Android NDK r7 (or newer) already has make.exe on board.
+#     For older NDK you have to install it separately.
+#     For example, this one: http://gnuwin32.sourceforge.net/packages/make.htm
+#
+#   $ SET ANDROID_NDK=C:\absolute\path\to\the\android-ndk
+#   $ mkdir build && cd build
+#   $ cmake.exe -G"MinGW Makefiles"
+#       -DCMAKE_TOOLCHAIN_FILE=path\to\the\android.toolchain.cmake
+#       -DCMAKE_MAKE_PROGRAM="%ANDROID_NDK%\prebuilt\windows\bin\make.exe" ..
+#   $ cmake.exe --build .
+#
+#
+#  Options (can be set as cmake parameters: -D<option_name>=<value>):
+#    ANDROID_NDK=/opt/android-ndk - path to the NDK root.
+#      Can be set as environment variable. Can be set only at first cmake run.
+#
+#    ANDROID_STANDALONE_TOOLCHAIN=/opt/android-toolchain - path to the
+#      standalone toolchain. This option is not used if full NDK is found
+#      (ignored if ANDROID_NDK is set).
+#      Can be set as environment variable. Can be set only at first cmake run.
+#
+#    ANDROID_ABI=armeabi-v7a - specifies the target Application Binary
+#      Interface (ABI). This option nearly matches to the APP_ABI variable
+#      used by ndk-build tool from Android NDK.
+#
+#      Possible targets are:
+#        "armeabi" - matches to the NDK ABI with the same name.
+#           See ${ANDROID_NDK}/docs/CPU-ARCH-ABIS.html for the documentation.
+#        "armeabi-v7a" - matches to the NDK ABI with the same name.
+#           See ${ANDROID_NDK}/docs/CPU-ARCH-ABIS.html for the documentation.
+#        "armeabi-v7a with NEON" - same as armeabi-v7a, but
+#            sets NEON as floating-point unit
+#        "armeabi-v7a with VFPV3" - same as armeabi-v7a, but
+#            sets VFPV3 as floating-point unit (has 32 registers instead of 16).
+#        "armeabi-v6 with VFP" - tuned for ARMv6 processors having VFP.
+#        "x86" - matches to the NDK ABI with the same name.
+#            See ${ANDROID_NDK}/docs/CPU-ARCH-ABIS.html for the documentation.
+#        "mips" - matches to the NDK ABI with the same name
+#            (It is not tested on real devices by the authos of this toolchain)
+#            See ${ANDROID_NDK}/docs/CPU-ARCH-ABIS.html for the documentation.
+#
+#    ANDROID_NATIVE_API_LEVEL=android-8 - level of Android API compile for.
+#      Option is read-only when standalone toolchain is used.
+#
+#    ANDROID_TOOLCHAIN_NAME=arm-linux-androideabi-4.6 - the name of compiler
+#      toolchain to be used. The list of possible values depends on the NDK
+#      version. For NDK r8c the possible values are:
+#
+#        * arm-linux-androideabi-4.4.3
+#        * arm-linux-androideabi-4.6
+#        * arm-linux-androideabi-clang3.1
+#        * mipsel-linux-android-4.4.3
+#        * mipsel-linux-android-4.6
+#        * mipsel-linux-android-clang3.1
+#        * x86-4.4.3
+#        * x86-4.6
+#        * x86-clang3.1
+#
+#    ANDROID_FORCE_ARM_BUILD=OFF - set ON to generate 32-bit ARM instructions
+#      instead of Thumb. Is not available for "x86" (inapplicable) and
+#      "armeabi-v6 with VFP" (is forced to be ON) ABIs.
+#
+#    ANDROID_NO_UNDEFINED=ON - set ON to show all undefined symbols as linker
+#      errors even if they are not used.
+#
+#    ANDROID_SO_UNDEFINED=OFF - set ON to allow undefined symbols in shared
+#      libraries. Automatically turned for NDK r5x and r6x due to GLESv2
+#      problems.
+#
+#    LIBRARY_OUTPUT_PATH_ROOT=${CMAKE_SOURCE_DIR} - where to output binary
+#      files. See additional details below.
+#
+#    ANDROID_SET_OBSOLETE_VARIABLES=ON - if set, then toolchain defines some
+#      obsolete variables which were used by previous versions of this file for
+#      backward compatibility.
+#
+#    ANDROID_STL=gnustl_static - specify the runtime to use.
+#
+#      Possible values are:
+#        none           -> Do not configure the runtime.
+#        system         -> Use the default minimal system C++ runtime library.
+#                          Implies -fno-rtti -fno-exceptions.
+#                          Is not available for standalone toolchain.
+#        system_re      -> Use the default minimal system C++ runtime library.
+#                          Implies -frtti -fexceptions.
+#                          Is not available for standalone toolchain.
+#        gabi++_static  -> Use the GAbi++ runtime as a static library.
+#                          Implies -frtti -fno-exceptions.
+#                          Available for NDK r7 and newer.
+#                          Is not available for standalone toolchain.
+#        gabi++_shared  -> Use the GAbi++ runtime as a shared library.
+#                          Implies -frtti -fno-exceptions.
+#                          Available for NDK r7 and newer.
+#                          Is not available for standalone toolchain.
+#        stlport_static -> Use the STLport runtime as a static library.
+#                          Implies -fno-rtti -fno-exceptions for NDK before r7.
+#                          Implies -frtti -fno-exceptions for NDK r7 and newer.
+#                          Is not available for standalone toolchain.
+#        stlport_shared -> Use the STLport runtime as a shared library.
+#                          Implies -fno-rtti -fno-exceptions for NDK before r7.
+#                          Implies -frtti -fno-exceptions for NDK r7 and newer.
+#                          Is not available for standalone toolchain.
+#        gnustl_static  -> Use the GNU STL as a static library.
+#                          Implies -frtti -fexceptions.
+#        gnustl_shared  -> Use the GNU STL as a shared library.
+#                          Implies -frtti -fno-exceptions.
+#                          Available for NDK r7b and newer.
+#                          Silently degrades to gnustl_static if not available.
+#
+#    ANDROID_STL_FORCE_FEATURES=ON - turn rtti and exceptions support based on
+#      chosen runtime. If disabled, then the user is responsible for settings
+#      these options.
+#
+#  What?:
+#    android-cmake toolchain searches for NDK/toolchain in the following order:
+#      ANDROID_NDK - cmake parameter
+#      ANDROID_NDK - environment variable
+#      ANDROID_STANDALONE_TOOLCHAIN - cmake parameter
+#      ANDROID_STANDALONE_TOOLCHAIN - environment variable
+#      ANDROID_NDK - default locations
+#      ANDROID_STANDALONE_TOOLCHAIN - default locations
+#
+#    Make sure to do the following in your scripts:
+#      SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${my_cxx_flags}" )
+#      SET( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${my_cxx_flags}" )
+#    The flags will be prepopulated with critical flags, so don't loose them.
+#    Also be aware that toolchain also sets configuration-specific compiler
+#    flags and linker flags.
+#
+#    ANDROID and BUILD_ANDROID will be set to true, you may test any of these
+#    variables to make necessary Android-specific configuration changes.
+#
+#    Also ARMEABI or ARMEABI_V7A or X86 or MIPS will be set true, mutually
+#    exclusive. NEON option will be set true if VFP is set to NEON.
+#
+#    LIBRARY_OUTPUT_PATH_ROOT should be set in cache to determine where Android
+#    libraries will be installed.
+#    Default is ${CMAKE_SOURCE_DIR}, and the android libs will always be
+#    under the ${LIBRARY_OUTPUT_PATH_ROOT}/libs/${ANDROID_NDK_ABI_NAME}
+#    (depending on the target ABI). This is convenient for Android packaging.
+#
+#  Change Log:
+#   - initial version December 2010
+#   - April 2011
+#     [+] added possibility to build with NDK (without standalone toolchain)
+#     [+] support cross-compilation on Windows (native, no cygwin support)
+#     [+] added compiler option to force "char" type to be signed
+#     [+] added toolchain option to compile to 32-bit ARM instructions
+#     [+] added toolchain option to disable SWIG search
+#     [+] added platform "armeabi-v7a with VFPV3"
+#     [~] ARM_TARGETS renamed to ARM_TARGET
+#     [+] EXECUTABLE_OUTPUT_PATH is set by toolchain (required on Windows)
+#     [~] Fixed bug with ANDROID_API_LEVEL variable
+#     [~] turn off SWIG search if it is not found first time
+#   - May 2011
+#     [~] ANDROID_LEVEL is renamed to ANDROID_API_LEVEL
+#     [+] ANDROID_API_LEVEL is detected by toolchain if not specified
+#     [~] added guard to prevent changing of output directories on the first
+#         cmake pass
+#     [~] toolchain exits with error if ARM_TARGET is not recognized
+#   - June 2011
+#     [~] default NDK path is updated for version r5c
+#     [+] variable CMAKE_SYSTEM_PROCESSOR is set based on ARM_TARGET
+#     [~] toolchain install directory is added to linker paths
+#     [-] removed SWIG-related stuff from toolchain
+#     [+] added macro find_host_package, find_host_program to search
+#         packages/programs on the host system
+#     [~] fixed path to STL library
+#   - July 2011
+#     [~] fixed options caching
+#     [~] search for all supported NDK versions
+#     [~] allowed spaces in NDK path
+#   - September 2011
+#     [~] updated for NDK r6b
+#   - November 2011
+#     [*] rewritten for NDK r7
+#     [+] x86 toolchain support (experimental)
+#     [+] added "armeabi-v6 with VFP" ABI for ARMv6 processors.
+#     [~] improved compiler and linker flags management
+#     [+] support different build flags for Release and Debug configurations
+#     [~] by default compiler flags the same as used by ndk-build (but only
+#         where reasonable)
+#     [~] ANDROID_NDK_TOOLCHAIN_ROOT is splitted to ANDROID_STANDALONE_TOOLCHAIN
+#         and ANDROID_TOOLCHAIN_ROOT
+#     [~] ARM_TARGET is renamed to ANDROID_ABI
+#     [~] ARMEABI_NDK_NAME is renamed to ANDROID_NDK_ABI_NAME
+#     [~] ANDROID_API_LEVEL is renamed to ANDROID_NATIVE_API_LEVEL
+#   - January 2012
+#     [+] added stlport_static support (experimental)
+#     [+] added special check for cygwin
+#     [+] filtered out hidden files (starting with .) while globbing inside NDK
+#     [+] automatically applied GLESv2 linkage fix for NDK revisions 5-6
+#     [+] added ANDROID_GET_ABI_RAWNAME to get NDK ABI names by CMake flags
+#   - February 2012
+#     [+] updated for NDK r7b
+#     [~] fixed cmake try_compile() command
+#     [~] Fix for missing install_name_tool on OS X
+#   - March 2012
+#     [~] fixed incorrect C compiler flags
+#     [~] fixed CMAKE_SYSTEM_PROCESSOR change on ANDROID_ABI change
+#     [+] improved toolchain loading speed
+#     [+] added assembler language support (.S)
+#     [+] allowed preset search paths and extra search suffixes
+#   - April 2012
+#     [+] updated for NDK r7c
+#     [~] fixed most of problems with compiler/linker flags and caching
+#     [+] added option ANDROID_FUNCTION_LEVEL_LINKING
+#   - May 2012
+#     [+] updated for NDK r8
+#     [+] added mips architecture support
+#   - August 2012
+#     [+] updated for NDK r8b
+#     [~] all intermediate files generated by toolchain are moved to CMakeFiles
+#     [~] libstdc++ and libsupc are removed from explicit link libraries
+#     [+] added CCache support (via NDK_CCACHE environment or cmake variable)
+#     [+] added gold linker support for NDK r8b
+#     [~] fixed mips linker flags for NDK r8b
+#   - September 2012
+#     [+] added NDK release name detection (see ANDROID_NDK_RELEASE)
+#     [+] added support for all C++ runtimes from NDK
+#         (system, gabi++, stlport, gnustl)
+#     [+] improved warnings on known issues of NDKs
+#     [~] use gold linker as default if available (NDK r8b)
+#     [~] globally turned off rpath
+#     [~] compiler options are aligned with NDK r8b
+#   - October 2012
+#     [~] fixed C++ linking: explicitly link with math library (OpenCV #2426)
+#   - November 2012
+#     [+] updated for NDK r8c
+#     [+] added support for clang compiler
+#   - December 2012
+#     [+] suppress warning about unused CMAKE_TOOLCHAIN_FILE variable
+#     [+] adjust API level to closest compatible as NDK does
+#     [~] fixed ccache full path search
+#     [+] updated for NDK r8d
+#     [~] compiler options are aligned with NDK r8d
+#   - March 2013
+#     [+] updated for NDK r8e (x86 version)
+#     [+] support x86_64 version of NDK
+#   - April 2013
+#     [+] support non-release NDK layouts (from Linaro git and Android git)
+#     [~] automatically detect if explicit link to crtbegin_*.o is needed
+# ------------------------------------------------------------------------------
+
+cmake_minimum_required( VERSION 2.6.3 )
+
+if( DEFINED CMAKE_CROSSCOMPILING )
+ # subsequent toolchain loading is not really needed
+ return()
+endif()
+
+if( CMAKE_TOOLCHAIN_FILE )
+ # touch toolchain variable only to suppress "unused variable" warning
+endif()
+
+get_property( _CMAKE_IN_TRY_COMPILE GLOBAL PROPERTY IN_TRY_COMPILE )
+if( _CMAKE_IN_TRY_COMPILE )
+ include( "${CMAKE_CURRENT_SOURCE_DIR}/../android.toolchain.config.cmake" OPTIONAL )
+endif()
+
+# this one is important
+set( CMAKE_SYSTEM_NAME Linux )
+# this one not so much
+set( CMAKE_SYSTEM_VERSION 1 )
+
+# rpath makes low sence for Android
+set( CMAKE_SKIP_RPATH TRUE CACHE BOOL "If set, runtime paths are not added when using shared libraries." )
+
+set( ANDROID_SUPPORTED_NDK_VERSIONS ${ANDROID_EXTRA_NDK_VERSIONS} -r8e -r8d -r8c -r8b -r8 -r7c -r7b -r7 -r6b -r6 -r5c -r5b -r5 "" )
+if(NOT DEFINED ANDROID_NDK_SEARCH_PATHS)
+ if( CMAKE_HOST_WIN32 )
+  file( TO_CMAKE_PATH "$ENV{PROGRAMFILES}" ANDROID_NDK_SEARCH_PATHS )
+  set( ANDROID_NDK_SEARCH_PATHS "${ANDROID_NDK_SEARCH_PATHS}/android-ndk" "$ENV{SystemDrive}/NVPACK/android-ndk" )
+ else()
+  file( TO_CMAKE_PATH "$ENV{HOME}" ANDROID_NDK_SEARCH_PATHS )
+  set( ANDROID_NDK_SEARCH_PATHS /opt/android-ndk "${ANDROID_NDK_SEARCH_PATHS}/NVPACK/android-ndk" )
+ endif()
+endif()
+if(NOT DEFINED ANDROID_STANDALONE_TOOLCHAIN_SEARCH_PATH)
+ set( ANDROID_STANDALONE_TOOLCHAIN_SEARCH_PATH /opt/android-toolchain )
+endif()
+
+set( ANDROID_SUPPORTED_ABIS_arm "armeabi-v7a;armeabi;armeabi-v7a with NEON;armeabi-v7a with VFPV3;armeabi-v6 with VFP" )
+set( ANDROID_SUPPORTED_ABIS_x86 "x86" )
+set( ANDROID_SUPPORTED_ABIS_mipsel "mips" )
+
+set( ANDROID_DEFAULT_NDK_API_LEVEL 8 )
+set( ANDROID_DEFAULT_NDK_API_LEVEL_x86 9 )
+set( ANDROID_DEFAULT_NDK_API_LEVEL_mips 9 )
+
+
+macro( __LIST_FILTER listvar regex )
+ if( ${listvar} )
+  foreach( __val ${${listvar}} )
+   if( __val MATCHES "${regex}" )
+    list( REMOVE_ITEM ${listvar} "${__val}" )
+   endif()
+  endforeach()
+ endif()
+endmacro()
+
+macro( __INIT_VARIABLE var_name )
+ set( __test_path 0 )
+ foreach( __var ${ARGN} )
+  if( __var STREQUAL "PATH" )
+   set( __test_path 1 )
+   break()
+  endif()
+ endforeach()
+ if( __test_path AND NOT EXISTS "${${var_name}}" )
+  unset( ${var_name} CACHE )
+ endif()
+ if( "${${var_name}}" STREQUAL "" )
+  set( __values 0 )
+  foreach( __var ${ARGN} )
+   if( __var STREQUAL "VALUES" )
+    set( __values 1 )
+   elseif( NOT __var STREQUAL "PATH" )
+    set( __obsolete 0 )
+    if( __var MATCHES "^OBSOLETE_.*$" )
+     string( REPLACE "OBSOLETE_" "" __var "${__var}" )
+     set( __obsolete 1 )
+    endif()
+    if( __var MATCHES "^ENV_.*$" )
+     string( REPLACE "ENV_" "" __var "${__var}" )
+     set( __value "$ENV{${__var}}" )
+    elseif( DEFINED ${__var} )
+     set( __value "${${__var}}" )
+    else()
+     if( __values )
+      set( __value "${__var}" )
+     else()
+      set( __value "" )
+     endif()
+    endif()
+    if( NOT "${__value}" STREQUAL "" )
+     if( __test_path )
+      if( EXISTS "${__value}" )
+       file( TO_CMAKE_PATH "${__value}" ${var_name} )
+       if( __obsolete AND NOT _CMAKE_IN_TRY_COMPILE )
+        message( WARNING "Using value of obsolete variable ${__var} as initial value for ${var_name}. Please note, that ${__var} can be completely removed in future versions of the toolchain." )
+       endif()
+       break()
+      endif()
+     else()
+      set( ${var_name} "${__value}" )
+       if( __obsolete AND NOT _CMAKE_IN_TRY_COMPILE )
+        message( WARNING "Using value of obsolete variable ${__var} as initial value for ${var_name}. Please note, that ${__var} can be completely removed in future versions of the toolchain." )
+       endif()
+      break()
+     endif()
+    endif()
+   endif()
+  endforeach()
+  unset( __value )
+  unset( __values )
+  unset( __obsolete )
+ elseif( __test_path )
+  file( TO_CMAKE_PATH "${${var_name}}" ${var_name} )
+ endif()
+ unset( __test_path )
+endmacro()
+
+macro( __DETECT_NATIVE_API_LEVEL _var _path )
+ SET( __ndkApiLevelRegex "^[\t ]*#define[\t ]+__ANDROID_API__[\t ]+([0-9]+)[\t ]*$" )
+ FILE( STRINGS ${_path} __apiFileContent REGEX "${__ndkApiLevelRegex}" )
+ if( NOT __apiFileContent )
+  message( SEND_ERROR "Could not get Android native API level. Probably you have specified invalid level value, or your copy of NDK/toolchain is broken." )
+ endif()
+ string( REGEX REPLACE "${__ndkApiLevelRegex}" "\\1" ${_var} "${__apiFileContent}" )
+ unset( __apiFileContent )
+ unset( __ndkApiLevelRegex )
+endmacro()
+
+macro( __DETECT_TOOLCHAIN_MACHINE_NAME _var _root )
+ if( EXISTS "${_root}" )
+  file( GLOB __gccExePath RELATIVE "${_root}/bin/" "${_root}/bin/*-gcc${TOOL_OS_SUFFIX}" )
+  __LIST_FILTER( __gccExePath "^[.].*" )
+  list( LENGTH __gccExePath __gccExePathsCount )
+  if( NOT __gccExePathsCount EQUAL 1  AND NOT _CMAKE_IN_TRY_COMPILE )
+   message( WARNING "Could not determine machine name for compiler from ${_root}" )
+   set( ${_var} "" )
+  else()
+   get_filename_component( __gccExeName "${__gccExePath}" NAME_WE )
+   string( REPLACE "-gcc" "" ${_var} "${__gccExeName}" )
+  endif()
+  unset( __gccExePath )
+  unset( __gccExePathsCount )
+  unset( __gccExeName )
+ else()
+  set( ${_var} "" )
+ endif()
+endmacro()
+
+
+# fight against cygwin
+set( ANDROID_FORBID_SYGWIN TRUE CACHE BOOL "Prevent cmake from working under cygwin and using cygwin tools")
+mark_as_advanced( ANDROID_FORBID_SYGWIN )
+if( ANDROID_FORBID_SYGWIN )
+ if( CYGWIN )
+  message( FATAL_ERROR "Android NDK and android-cmake toolchain are not welcome Cygwin. It is unlikely that this cmake toolchain will work under cygwin. But if you want to try then you can set cmake variable ANDROID_FORBID_SYGWIN to FALSE and rerun cmake." )
+ endif()
+
+ if( CMAKE_HOST_WIN32 )
+  # remove cygwin from PATH
+  set( __new_path "$ENV{PATH}")
+  __LIST_FILTER( __new_path "cygwin" )
+  set(ENV{PATH} "${__new_path}")
+  unset(__new_path)
+ endif()
+endif()
+
+
+# detect current host platform
+if( NOT DEFINED ANDROID_NDK_HOST_X64 AND CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "amd64|x86_64|AMD64")
+ set( ANDROID_NDK_HOST_X64 1 CACHE BOOL "Try to use 64-bit compiler toolchain" )
+ mark_as_advanced( ANDROID_NDK_HOST_X64 )
+endif()
+
+set( TOOL_OS_SUFFIX "" )
+if( CMAKE_HOST_APPLE )
+ set( ANDROID_NDK_HOST_SYSTEM_NAME "darwin-x86_64" )
+ set( ANDROID_NDK_HOST_SYSTEM_NAME2 "darwin-x86" )
+elseif( CMAKE_HOST_WIN32 )
+ set( ANDROID_NDK_HOST_SYSTEM_NAME "windows-x86_64" )
+ set( ANDROID_NDK_HOST_SYSTEM_NAME2 "windows" )
+ set( TOOL_OS_SUFFIX ".exe" )
+elseif( CMAKE_HOST_UNIX )
+ set( ANDROID_NDK_HOST_SYSTEM_NAME "linux-x86_64" )
+ set( ANDROID_NDK_HOST_SYSTEM_NAME2 "linux-x86" )
+else()
+ message( FATAL_ERROR "Cross-compilation on your platform is not supported by this cmake toolchain" )
+endif()
+
+if( NOT ANDROID_NDK_HOST_X64 )
+ set( ANDROID_NDK_HOST_SYSTEM_NAME ${ANDROID_NDK_HOST_SYSTEM_NAME2} )
+endif()
+
+# see if we have path to Android NDK
+__INIT_VARIABLE( ANDROID_NDK PATH ENV_ANDROID_NDK )
+if( NOT ANDROID_NDK )
+ # see if we have path to Android standalone toolchain
+ __INIT_VARIABLE( ANDROID_STANDALONE_TOOLCHAIN PATH ENV_ANDROID_STANDALONE_TOOLCHAIN OBSOLETE_ANDROID_NDK_TOOLCHAIN_ROOT OBSOLETE_ENV_ANDROID_NDK_TOOLCHAIN_ROOT )
+
+ if( NOT ANDROID_STANDALONE_TOOLCHAIN )
+  #try to find Android NDK in one of the the default locations
+  set( __ndkSearchPaths )
+  foreach( __ndkSearchPath ${ANDROID_NDK_SEARCH_PATHS} )
+   foreach( suffix ${ANDROID_SUPPORTED_NDK_VERSIONS} )
+    list( APPEND __ndkSearchPaths "${__ndkSearchPath}${suffix}" )
+   endforeach()
+  endforeach()
+  __INIT_VARIABLE( ANDROID_NDK PATH VALUES ${__ndkSearchPaths} )
+  unset( __ndkSearchPaths )
+
+  if( ANDROID_NDK )
+   message( STATUS "Using default path for Android NDK: ${ANDROID_NDK}" )
+   message( STATUS "  If you prefer to use a different location, please define a cmake or environment variable: ANDROID_NDK" )
+  else()
+   #try to find Android standalone toolchain in one of the the default locations
+   __INIT_VARIABLE( ANDROID_STANDALONE_TOOLCHAIN PATH ANDROID_STANDALONE_TOOLCHAIN_SEARCH_PATH )
+
+   if( ANDROID_STANDALONE_TOOLCHAIN )
+    message( STATUS "Using default path for standalone toolchain ${ANDROID_STANDALONE_TOOLCHAIN}" )
+    message( STATUS "  If you prefer to use a different location, please define the variable: ANDROID_STANDALONE_TOOLCHAIN" )
+   endif( ANDROID_STANDALONE_TOOLCHAIN )
+  endif( ANDROID_NDK )
+ endif( NOT ANDROID_STANDALONE_TOOLCHAIN )
+endif( NOT ANDROID_NDK )
+
+# remember found paths
+if( ANDROID_NDK )
+ get_filename_component( ANDROID_NDK "${ANDROID_NDK}" ABSOLUTE )
+ set( ANDROID_NDK "${ANDROID_NDK}" CACHE INTERNAL "Path of the Android NDK" FORCE )
+ set( BUILD_WITH_ANDROID_NDK True )
+ if( EXISTS "${ANDROID_NDK}/RELEASE.TXT" )
+  file( STRINGS "${ANDROID_NDK}/RELEASE.TXT" ANDROID_NDK_RELEASE_FULL LIMIT_COUNT 1 REGEX r[0-9]+[a-z]? )
+  string( REGEX MATCH r[0-9]+[a-z]? ANDROID_NDK_RELEASE "${ANDROID_NDK_RELEASE_FULL}" )
+ else()
+  set( ANDROID_NDK_RELEASE "r1x" )
+  set( ANDROID_NDK_RELEASE_FULL "unreleased" )
+ endif()
+elseif( ANDROID_STANDALONE_TOOLCHAIN )
+ get_filename_component( ANDROID_STANDALONE_TOOLCHAIN "${ANDROID_STANDALONE_TOOLCHAIN}" ABSOLUTE )
+ # try to detect change
+ if( CMAKE_AR )
+  string( LENGTH "${ANDROID_STANDALONE_TOOLCHAIN}" __length )
+  string( SUBSTRING "${CMAKE_AR}" 0 ${__length} __androidStandaloneToolchainPreviousPath )
+  if( NOT __androidStandaloneToolchainPreviousPath STREQUAL ANDROID_STANDALONE_TOOLCHAIN )
+   message( FATAL_ERROR "It is not possible to change path to the Android standalone toolchain on subsequent run." )
+  endif()
+  unset( __androidStandaloneToolchainPreviousPath )
+  unset( __length )
+ endif()
+ set( ANDROID_STANDALONE_TOOLCHAIN "${ANDROID_STANDALONE_TOOLCHAIN}" CACHE INTERNAL "Path of the Android standalone toolchain" FORCE )
+ set( BUILD_WITH_STANDALONE_TOOLCHAIN True )
+else()
+ list(GET ANDROID_NDK_SEARCH_PATHS 0 ANDROID_NDK_SEARCH_PATH)
+ message( FATAL_ERROR "Could not find neither Android NDK nor Android standalone toolchain.
+    You should either set an environment variable:
+      export ANDROID_NDK=~/my-android-ndk
+    or
+      export ANDROID_STANDALONE_TOOLCHAIN=~/my-android-toolchain
+    or put the toolchain or NDK in the default path:
+      sudo ln -s ~/my-android-ndk ${ANDROID_NDK_SEARCH_PATH}
+      sudo ln -s ~/my-android-toolchain ${ANDROID_STANDALONE_TOOLCHAIN_SEARCH_PATH}" )
+endif()
+
+# android NDK layout
+if( BUILD_WITH_ANDROID_NDK )
+ if( NOT DEFINED ANDROID_NDK_LAYOUT )
+  # try to automatically detect the layout
+  if( EXISTS "${ANDROID_NDK}/RELEASE.TXT")
+   set( ANDROID_NDK_LAYOUT "RELEASE" )
+  elseif( EXISTS "${ANDROID_NDK}/../../linux-x86/toolchain/" )
+   set( ANDROID_NDK_LAYOUT "LINARO" )
+  elseif( EXISTS "${ANDROID_NDK}/../../gcc/" )
+   set( ANDROID_NDK_LAYOUT "ANDROID" )
+  endif()
+ endif()
+ set( ANDROID_NDK_LAYOUT "${ANDROID_NDK_LAYOUT}" CACHE STRING "The inner layout of NDK" )
+ mark_as_advanced( ANDROID_NDK_LAYOUT )
+ if( ANDROID_NDK_LAYOUT STREQUAL "LINARO" )
+  set( ANDROID_NDK_HOST_SYSTEM_NAME ${ANDROID_NDK_HOST_SYSTEM_NAME2} ) # only 32-bit at the moment
+  set( ANDROID_NDK_TOOLCHAINS_PATH "${ANDROID_NDK}/../../${ANDROID_NDK_HOST_SYSTEM_NAME}/toolchain" )
+  set( ANDROID_NDK_TOOLCHAINS_SUBPATH  "" )
+  set( ANDROID_NDK_TOOLCHAINS_SUBPATH2 "" )
+ elseif( ANDROID_NDK_LAYOUT STREQUAL "ANDROID" )
+  set( ANDROID_NDK_HOST_SYSTEM_NAME ${ANDROID_NDK_HOST_SYSTEM_NAME2} ) # only 32-bit at the moment
+  set( ANDROID_NDK_TOOLCHAINS_PATH "${ANDROID_NDK}/../../gcc/${ANDROID_NDK_HOST_SYSTEM_NAME}/arm" )
+  set( ANDROID_NDK_TOOLCHAINS_SUBPATH  "" )
+  set( ANDROID_NDK_TOOLCHAINS_SUBPATH2 "" )
+ else() # ANDROID_NDK_LAYOUT STREQUAL "RELEASE"
+  set( ANDROID_NDK_TOOLCHAINS_PATH "${ANDROID_NDK}/toolchains" )
+  set( ANDROID_NDK_TOOLCHAINS_SUBPATH  "/prebuilt/${ANDROID_NDK_HOST_SYSTEM_NAME}" )
+  set( ANDROID_NDK_TOOLCHAINS_SUBPATH2 "/prebuilt/${ANDROID_NDK_HOST_SYSTEM_NAME2}" )
+ endif()
+ get_filename_component( ANDROID_NDK_TOOLCHAINS_PATH "${ANDROID_NDK_TOOLCHAINS_PATH}" ABSOLUTE )
+
+ # try to detect change of NDK
+ if( CMAKE_AR )
+  string( LENGTH "${ANDROID_NDK_TOOLCHAINS_PATH}" __length )
+  string( SUBSTRING "${CMAKE_AR}" 0 ${__length} __androidNdkPreviousPath )
+  if( NOT __androidNdkPreviousPath STREQUAL ANDROID_NDK_TOOLCHAINS_PATH )
+   message( FATAL_ERROR "It is not possible to change the path to the NDK on subsequent CMake run. You must remove all generated files from your build folder first.
+   " )
+  endif()
+  unset( __androidNdkPreviousPath )
+  unset( __length )
+ endif()
+endif()
+
+
+# get all the details about standalone toolchain
+if( BUILD_WITH_STANDALONE_TOOLCHAIN )
+ __DETECT_NATIVE_API_LEVEL( ANDROID_SUPPORTED_NATIVE_API_LEVELS "${ANDROID_STANDALONE_TOOLCHAIN}/sysroot/usr/include/android/api-level.h" )
+ set( ANDROID_STANDALONE_TOOLCHAIN_API_LEVEL ${ANDROID_SUPPORTED_NATIVE_API_LEVELS} )
+ set( __availableToolchains "standalone" )
+ __DETECT_TOOLCHAIN_MACHINE_NAME( __availableToolchainMachines "${ANDROID_STANDALONE_TOOLCHAIN}" )
+ if( NOT __availableToolchainMachines )
+  message( FATAL_ERROR "Could not determine machine name of your toolchain. Probably your Android standalone toolchain is broken." )
+ endif()
+ if( __availableToolchainMachines MATCHES i686 )
+  set( __availableToolchainArchs "x86" )
+ elseif( __availableToolchainMachines MATCHES arm )
+  set( __availableToolchainArchs "arm" )
+ elseif( __availableToolchainMachines MATCHES mipsel )
+  set( __availableToolchainArchs "mipsel" )
+ endif()
+ execute_process( COMMAND "${ANDROID_STANDALONE_TOOLCHAIN}/bin/${__availableToolchainMachines}-gcc${TOOL_OS_SUFFIX}" -dumpversion
+                  OUTPUT_VARIABLE __availableToolchainCompilerVersions OUTPUT_STRIP_TRAILING_WHITESPACE )
+ string( REGEX MATCH "[0-9]+[.][0-9]+([.][0-9]+)?" __availableToolchainCompilerVersions "${__availableToolchainCompilerVersions}" )
+ if( EXISTS "${ANDROID_STANDALONE_TOOLCHAIN}/bin/clang${TOOL_OS_SUFFIX}" )
+  list( APPEND __availableToolchains "standalone-clang" )
+  list( APPEND __availableToolchainMachines ${__availableToolchainMachines} )
+  list( APPEND __availableToolchainArchs ${__availableToolchainArchs} )
+  list( APPEND __availableToolchainCompilerVersions ${__availableToolchainCompilerVersions} )
+ endif()
+endif()
+
+macro( __GLOB_NDK_TOOLCHAINS __availableToolchainsVar __availableToolchainsLst __toolchain_subpath )
+ foreach( __toolchain ${${__availableToolchainsLst}} )
+  if( "${__toolchain}" MATCHES "-clang3[.][0-9]$" AND NOT EXISTS "${ANDROID_NDK_TOOLCHAINS_PATH}/${__toolchain}${__toolchain_subpath}" )
+   string( REGEX REPLACE "-clang3[.][0-9]$" "-4.6" __gcc_toolchain "${__toolchain}" )
+  else()
+   set( __gcc_toolchain "${__toolchain}" )
+  endif()
+  __DETECT_TOOLCHAIN_MACHINE_NAME( __machine "${ANDROID_NDK_TOOLCHAINS_PATH}/${__gcc_toolchain}${__toolchain_subpath}" )
+  if( __machine )
+   string( REGEX MATCH "[0-9]+[.][0-9]+([.][0-9x]+)?$" __version "${__gcc_toolchain}" )
+   if( __machine MATCHES i686 )
+    set( __arch "x86" )
+   elseif( __machine MATCHES arm )
+    set( __arch "arm" )
+   elseif( __machine MATCHES mipsel )
+    set( __arch "mipsel" )
+   endif()
+   list( APPEND __availableToolchainMachines "${__machine}" )
+   list( APPEND __availableToolchainArchs "${__arch}" )
+   list( APPEND __availableToolchainCompilerVersions "${__version}" )
+   list( APPEND ${__availableToolchainsVar} "${__toolchain}" )
+  endif()
+  unset( __gcc_toolchain )
+ endforeach()
+endmacro()
+
+# get all the details about NDK
+if( BUILD_WITH_ANDROID_NDK )
+ file( GLOB ANDROID_SUPPORTED_NATIVE_API_LEVELS RELATIVE "${ANDROID_NDK}/platforms" "${ANDROID_NDK}/platforms/android-*" )
+ string( REPLACE "android-" "" ANDROID_SUPPORTED_NATIVE_API_LEVELS "${ANDROID_SUPPORTED_NATIVE_API_LEVELS}" )
+ set( __availableToolchains "" )
+ set( __availableToolchainMachines "" )
+ set( __availableToolchainArchs "" )
+ set( __availableToolchainCompilerVersions "" )
+ if( ANDROID_TOOLCHAIN_NAME AND EXISTS "${ANDROID_NDK_TOOLCHAINS_PATH}/${ANDROID_TOOLCHAIN_NAME}/" )
+  # do not go through all toolchains if we know the name
+  set( __availableToolchainsLst "${ANDROID_TOOLCHAIN_NAME}" )
+  __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst "${ANDROID_NDK_TOOLCHAINS_SUBPATH}" )
+  if( NOT __availableToolchains AND NOT ANDROID_NDK_TOOLCHAINS_SUBPATH STREQUAL ANDROID_NDK_TOOLCHAINS_SUBPATH2 )
+   __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst "${ANDROID_NDK_TOOLCHAINS_SUBPATH2}" )
+   if( __availableToolchains )
+    set( ANDROID_NDK_TOOLCHAINS_SUBPATH ${ANDROID_NDK_TOOLCHAINS_SUBPATH2} )
+   endif()
+  endif()
+ endif()
+ if( NOT __availableToolchains )
+  file( GLOB __availableToolchainsLst RELATIVE "${ANDROID_NDK_TOOLCHAINS_PATH}" "${ANDROID_NDK_TOOLCHAINS_PATH}/*" )
+  if( __availableToolchains )
+   list(SORT __availableToolchainsLst) # we need clang to go after gcc
+  endif()
+  __LIST_FILTER( __availableToolchainsLst "^[.]" )
+  __LIST_FILTER( __availableToolchainsLst "llvm" )
+  __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst "${ANDROID_NDK_TOOLCHAINS_SUBPATH}" )
+  if( NOT __availableToolchains AND NOT ANDROID_NDK_TOOLCHAINS_SUBPATH STREQUAL ANDROID_NDK_TOOLCHAINS_SUBPATH2 )
+   __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst "${ANDROID_NDK_TOOLCHAINS_SUBPATH2}" )
+   if( __availableToolchains )
+    set( ANDROID_NDK_TOOLCHAINS_SUBPATH ${ANDROID_NDK_TOOLCHAINS_SUBPATH2} )
+   endif()
+  endif()
+ endif()
+ if( NOT __availableToolchains )
+  message( FATAL_ERROR "Could not find any working toolchain in the NDK. Probably your Android NDK is broken." )
+ endif()
+endif()
+
+# build list of available ABIs
+set( ANDROID_SUPPORTED_ABIS "" )
+set( __uniqToolchainArchNames ${__availableToolchainArchs} )
+list( REMOVE_DUPLICATES __uniqToolchainArchNames )
+list( SORT __uniqToolchainArchNames )
+foreach( __arch ${__uniqToolchainArchNames} )
+ list( APPEND ANDROID_SUPPORTED_ABIS ${ANDROID_SUPPORTED_ABIS_${__arch}} )
+endforeach()
+unset( __uniqToolchainArchNames )
+if( NOT ANDROID_SUPPORTED_ABIS )
+ message( FATAL_ERROR "No one of known Android ABIs is supported by this cmake toolchain." )
+endif()
+
+# choose target ABI
+__INIT_VARIABLE( ANDROID_ABI OBSOLETE_ARM_TARGET OBSOLETE_ARM_TARGETS VALUES ${ANDROID_SUPPORTED_ABIS} )
+# verify that target ABI is supported
+list( FIND ANDROID_SUPPORTED_ABIS "${ANDROID_ABI}" __androidAbiIdx )
+if( __androidAbiIdx EQUAL -1 )
+ string( REPLACE ";" "\", \"", PRINTABLE_ANDROID_SUPPORTED_ABIS  "${ANDROID_SUPPORTED_ABIS}" )
+ message( FATAL_ERROR "Specified ANDROID_ABI = \"${ANDROID_ABI}\" is not supported by this cmake toolchain or your NDK/toolchain.
+   Supported values are: \"${PRINTABLE_ANDROID_SUPPORTED_ABIS}\"
+   " )
+endif()
+unset( __androidAbiIdx )
+
+# set target ABI options
+if( ANDROID_ABI STREQUAL "x86" )
+ set( X86 true )
+ set( ANDROID_NDK_ABI_NAME "x86" )
+ set( ANDROID_ARCH_NAME "x86" )
+ set( ANDROID_ARCH_FULLNAME "x86" )
+ set( ANDROID_LLVM_TRIPLE "i686-none-linux-android" )
+ set( CMAKE_SYSTEM_PROCESSOR "i686" )
+elseif( ANDROID_ABI STREQUAL "mips" )
+ set( MIPS true )
+ set( ANDROID_NDK_ABI_NAME "mips" )
+ set( ANDROID_ARCH_NAME "mips" )
+ set( ANDROID_ARCH_FULLNAME "mipsel" )
+ set( ANDROID_LLVM_TRIPLE "mipsel-none-linux-android" )
+ set( CMAKE_SYSTEM_PROCESSOR "mips" )
+elseif( ANDROID_ABI STREQUAL "armeabi" )
+ set( ARMEABI true )
+ set( ANDROID_NDK_ABI_NAME "armeabi" )
+ set( ANDROID_ARCH_NAME "arm" )
+ set( ANDROID_ARCH_FULLNAME "arm" )
+ set( ANDROID_LLVM_TRIPLE "armv5te-none-linux-androideabi" )
+ set( CMAKE_SYSTEM_PROCESSOR "armv5te" )
+elseif( ANDROID_ABI STREQUAL "armeabi-v6 with VFP" )
+ set( ARMEABI_V6 true )
+ set( ANDROID_NDK_ABI_NAME "armeabi" )
+ set( ANDROID_ARCH_NAME "arm" )
+ set( ANDROID_ARCH_FULLNAME "arm" )
+ set( ANDROID_LLVM_TRIPLE "armv5te-none-linux-androideabi" )
+ set( CMAKE_SYSTEM_PROCESSOR "armv6" )
+ # need always fallback to older platform
+ set( ARMEABI true )
+elseif( ANDROID_ABI STREQUAL "armeabi-v7a")
+ set( ARMEABI_V7A true )
+ set( ANDROID_NDK_ABI_NAME "armeabi-v7a" )
+ set( ANDROID_ARCH_NAME "arm" )
+ set( ANDROID_ARCH_FULLNAME "arm" )
+ set( ANDROID_LLVM_TRIPLE "armv7-none-linux-androideabi" )
+ set( CMAKE_SYSTEM_PROCESSOR "armv7-a" )
+elseif( ANDROID_ABI STREQUAL "armeabi-v7a with VFPV3" )
+ set( ARMEABI_V7A true )
+ set( ANDROID_NDK_ABI_NAME "armeabi-v7a" )
+ set( ANDROID_ARCH_NAME "arm" )
+ set( ANDROID_ARCH_FULLNAME "arm" )
+ set( ANDROID_LLVM_TRIPLE "armv7-none-linux-androideabi" )
+ set( CMAKE_SYSTEM_PROCESSOR "armv7-a" )
+ set( VFPV3 true )
+elseif( ANDROID_ABI STREQUAL "armeabi-v7a with NEON" )
+ set( ARMEABI_V7A true )
+ set( ANDROID_NDK_ABI_NAME "armeabi-v7a" )
+ set( ANDROID_ARCH_NAME "arm" )
+ set( ANDROID_ARCH_FULLNAME "arm" )
+ set( ANDROID_LLVM_TRIPLE "armv7-none-linux-androideabi" )
+ set( CMAKE_SYSTEM_PROCESSOR "armv7-a" )
+ set( VFPV3 true )
+ set( NEON true )
+else()
+ message( SEND_ERROR "Unknown ANDROID_ABI=\"${ANDROID_ABI}\" is specified." )
+endif()
+
+if( CMAKE_BINARY_DIR AND EXISTS "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeSystem.cmake" )
+ # really dirty hack
+ # it is not possible to change CMAKE_SYSTEM_PROCESSOR after the first run...
+ file( APPEND "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeSystem.cmake" "SET(CMAKE_SYSTEM_PROCESSOR \"${CMAKE_SYSTEM_PROCESSOR}\")\n" )
+endif()
+
+if( ANDROID_ARCH_NAME STREQUAL "arm" AND NOT ARMEABI_V6 )
+ __INIT_VARIABLE( ANDROID_FORCE_ARM_BUILD OBSOLETE_FORCE_ARM VALUES OFF )
+ set( ANDROID_FORCE_ARM_BUILD ${ANDROID_FORCE_ARM_BUILD} CACHE BOOL "Use 32-bit ARM instructions instead of Thumb-1" FORCE )
+ mark_as_advanced( ANDROID_FORCE_ARM_BUILD )
+else()
+ unset( ANDROID_FORCE_ARM_BUILD CACHE )
+endif()
+
+# choose toolchain
+if( ANDROID_TOOLCHAIN_NAME )
+ list( FIND __availableToolchains "${ANDROID_TOOLCHAIN_NAME}" __toolchainIdx )
+ if( __toolchainIdx EQUAL -1 )
+  list( SORT __availableToolchains )
+  string( REPLACE ";" "\n  * " toolchains_list "${__availableToolchains}" )
+  set( toolchains_list "  * ${toolchains_list}")
+  message( FATAL_ERROR "Specified toolchain \"${ANDROID_TOOLCHAIN_NAME}\" is missing in your NDK or broken. Please verify that your NDK is working or select another compiler toolchain.
+To configure the toolchain set CMake variable ANDROID_TOOLCHAIN_NAME to one of the following values:\n${toolchains_list}\n" )
+ endif()
+ list( GET __availableToolchainArchs ${__toolchainIdx} __toolchainArch )
+ if( NOT __toolchainArch STREQUAL ANDROID_ARCH_FULLNAME )
+  message( SEND_ERROR "Selected toolchain \"${ANDROID_TOOLCHAIN_NAME}\" is not able to compile binaries for the \"${ANDROID_ARCH_NAME}\" platform." )
+ endif()
+else()
+ set( __toolchainIdx -1 )
+ set( __applicableToolchains "" )
+ set( __toolchainMaxVersion "0.0.0" )
+ list( LENGTH __availableToolchains __availableToolchainsCount )
+ math( EXPR __availableToolchainsCount "${__availableToolchainsCount}-1" )
+ foreach( __idx RANGE ${__availableToolchainsCount} )
+  list( GET __availableToolchainArchs ${__idx} __toolchainArch )
+  if( __toolchainArch STREQUAL ANDROID_ARCH_FULLNAME )
+   list( GET __availableToolchainCompilerVersions ${__idx} __toolchainVersion )
+   string( REPLACE "x" "99" __toolchainVersion "${__toolchainVersion}")
+   if( __toolchainVersion VERSION_GREATER __toolchainMaxVersion )
+    set( __toolchainMaxVersion "${__toolchainVersion}" )
+    set( __toolchainIdx ${__idx} )
+   endif()
+  endif()
+ endforeach()
+ unset( __availableToolchainsCount )
+ unset( __toolchainMaxVersion )
+ unset( __toolchainVersion )
+endif()
+unset( __toolchainArch )
+if( __toolchainIdx EQUAL -1 )
+ message( FATAL_ERROR "No one of available compiler toolchains is able to compile for ${ANDROID_ARCH_NAME} platform." )
+endif()
+list( GET __availableToolchains ${__toolchainIdx} ANDROID_TOOLCHAIN_NAME )
+list( GET __availableToolchainMachines ${__toolchainIdx} ANDROID_TOOLCHAIN_MACHINE_NAME )
+list( GET __availableToolchainCompilerVersions ${__toolchainIdx} ANDROID_COMPILER_VERSION )
+
+unset( __toolchainIdx )
+unset( __availableToolchains )
+unset( __availableToolchainMachines )
+unset( __availableToolchainArchs )
+unset( __availableToolchainCompilerVersions )
+
+# choose native API level
+__INIT_VARIABLE( ANDROID_NATIVE_API_LEVEL ENV_ANDROID_NATIVE_API_LEVEL ANDROID_API_LEVEL ENV_ANDROID_API_LEVEL ANDROID_STANDALONE_TOOLCHAIN_API_LEVEL ANDROID_DEFAULT_NDK_API_LEVEL_${ANDROID_ARCH_NAME} ANDROID_DEFAULT_NDK_API_LEVEL )
+string( REGEX MATCH "[0-9]+" ANDROID_NATIVE_API_LEVEL "${ANDROID_NATIVE_API_LEVEL}" )
+# adjust API level
+set( __real_api_level ${ANDROID_DEFAULT_NDK_API_LEVEL_${ANDROID_ARCH_NAME}} )
+foreach( __level ${ANDROID_SUPPORTED_NATIVE_API_LEVELS} )
+ if( NOT __level GREATER ANDROID_NATIVE_API_LEVEL AND NOT __level LESS __real_api_level )
+  set( __real_api_level ${__level} )
+ endif()
+endforeach()
+if( __real_api_level AND NOT ANDROID_NATIVE_API_LEVEL EQUAL __real_api_level )
+ message( STATUS "Adjusting Android API level 'android-${ANDROID_NATIVE_API_LEVEL}' to 'android-${__real_api_level}'")
+ set( ANDROID_NATIVE_API_LEVEL ${__real_api_level} )
+endif()
+unset(__real_api_level)
+# validate
+list( FIND ANDROID_SUPPORTED_NATIVE_API_LEVELS "${ANDROID_NATIVE_API_LEVEL}" __levelIdx )
+if( __levelIdx EQUAL -1 )
+ message( SEND_ERROR "Specified Android native API level 'android-${ANDROID_NATIVE_API_LEVEL}' is not supported by your NDK/toolchain." )
+else()
+ if( BUILD_WITH_ANDROID_NDK )
+  __DETECT_NATIVE_API_LEVEL( __realApiLevel "${ANDROID_NDK}/platforms/android-${ANDROID_NATIVE_API_LEVEL}/arch-${ANDROID_ARCH_NAME}/usr/include/android/api-level.h" )
+  if( NOT __realApiLevel EQUAL ANDROID_NATIVE_API_LEVEL )
+   message( SEND_ERROR "Specified Android API level (${ANDROID_NATIVE_API_LEVEL}) does not match to the level found (${__realApiLevel}). Probably your copy of NDK is broken." )
+  endif()
+  unset( __realApiLevel )
+ endif()
+ set( ANDROID_NATIVE_API_LEVEL "${ANDROID_NATIVE_API_LEVEL}" CACHE STRING "Android API level for native code" FORCE )
+ if( CMAKE_VERSION VERSION_GREATER "2.8" )
+  list( SORT ANDROID_SUPPORTED_NATIVE_API_LEVELS )
+  set_property( CACHE ANDROID_NATIVE_API_LEVEL PROPERTY STRINGS ${ANDROID_SUPPORTED_NATIVE_API_LEVELS} )
+ endif()
+endif()
+unset( __levelIdx )
+
+
+# remember target ABI
+set( ANDROID_ABI "${ANDROID_ABI}" CACHE STRING "The target ABI for Android. If arm, then armeabi-v7a is recommended for hardware floating point." FORCE )
+if( CMAKE_VERSION VERSION_GREATER "2.8" )
+ list( SORT ANDROID_SUPPORTED_ABIS_${ANDROID_ARCH_FULLNAME} )
+ set_property( CACHE ANDROID_ABI PROPERTY STRINGS ${ANDROID_SUPPORTED_ABIS_${ANDROID_ARCH_FULLNAME}} )
+endif()
+
+
+# runtime choice (STL, rtti, exceptions)
+if( NOT ANDROID_STL )
+ # honor legacy ANDROID_USE_STLPORT
+ if( DEFINED ANDROID_USE_STLPORT )
+  if( ANDROID_USE_STLPORT )
+   set( ANDROID_STL stlport_static )
+  endif()
+  message( WARNING "You are using an obsolete variable ANDROID_USE_STLPORT to select the STL variant. Use -DANDROID_STL=stlport_static instead." )
+ endif()
+ if( NOT ANDROID_STL )
+  set( ANDROID_STL gnustl_static )
+ endif()
+endif()
+set( ANDROID_STL "${ANDROID_STL}" CACHE STRING "C++ runtime" )
+set( ANDROID_STL_FORCE_FEATURES ON CACHE BOOL "automatically configure rtti and exceptions support based on C++ runtime" )
+mark_as_advanced( ANDROID_STL ANDROID_STL_FORCE_FEATURES )
+
+if( BUILD_WITH_ANDROID_NDK )
+ if( NOT "${ANDROID_STL}" MATCHES "^(none|system|system_re|gabi\\+\\+_static|gabi\\+\\+_shared|stlport_static|stlport_shared|gnustl_static|gnustl_shared)$")
+  message( FATAL_ERROR "ANDROID_STL is set to invalid value \"${ANDROID_STL}\".
+The possible values are:
+  none           -> Do not configure the runtime.
+  system         -> Use the default minimal system C++ runtime library.
+  system_re      -> Same as system but with rtti and exceptions.
+  gabi++_static  -> Use the GAbi++ runtime as a static library.
+  gabi++_shared  -> Use the GAbi++ runtime as a shared library.
+  stlport_static -> Use the STLport runtime as a static library.
+  stlport_shared -> Use the STLport runtime as a shared library.
+  gnustl_static  -> (default) Use the GNU STL as a static library.
+  gnustl_shared  -> Use the GNU STL as a shared library.
+" )
+ endif()
+elseif( BUILD_WITH_STANDALONE_TOOLCHAIN )
+ if( NOT "${ANDROID_STL}" MATCHES "^(none|gnustl_static|gnustl_shared)$")
+  message( FATAL_ERROR "ANDROID_STL is set to invalid value \"${ANDROID_STL}\".
+The possible values are:
+  none           -> Do not configure the runtime.
+  gnustl_static  -> (default) Use the GNU STL as a static library.
+  gnustl_shared  -> Use the GNU STL as a shared library.
+" )
+ endif()
+endif()
+
+unset( ANDROID_RTTI )
+unset( ANDROID_EXCEPTIONS )
+unset( ANDROID_STL_INCLUDE_DIRS )
+unset( __libstl )
+unset( __libsupcxx )
+
+if( NOT _CMAKE_IN_TRY_COMPILE AND ANDROID_NDK_RELEASE STREQUAL "r7b" AND ARMEABI_V7A AND NOT VFPV3 AND ANDROID_STL MATCHES "gnustl" )
+ message( WARNING  "The GNU STL armeabi-v7a binaries from NDK r7b can crash non-NEON devices. The files provided with NDK r7b were not configured properly, resulting in crashes on Tegra2-based devices and others when trying to use certain floating-point functions (e.g., cosf, sinf, expf).
+You are strongly recommended to switch to another NDK release.
+" )
+endif()
+
+if( NOT _CMAKE_IN_TRY_COMPILE AND X86 AND ANDROID_STL MATCHES "gnustl" AND ANDROID_NDK_RELEASE STREQUAL "r6" )
+  message( WARNING  "The x86 system header file from NDK r6 has incorrect definition for ptrdiff_t. You are recommended to upgrade to a newer NDK release or manually patch the header:
+See https://android.googlesource.com/platform/development.git f907f4f9d4e56ccc8093df6fee54454b8bcab6c2
+  diff --git a/ndk/platforms/android-9/arch-x86/include/machine/_types.h b/ndk/platforms/android-9/arch-x86/include/machine/_types.h
+  index 5e28c64..65892a1 100644
+  --- a/ndk/platforms/android-9/arch-x86/include/machine/_types.h
+  +++ b/ndk/platforms/android-9/arch-x86/include/machine/_types.h
+  @@ -51,7 +51,11 @@ typedef long int       ssize_t;
+   #endif
+   #ifndef _PTRDIFF_T
+   #define _PTRDIFF_T
+  -typedef long           ptrdiff_t;
+  +#  ifdef __ANDROID__
+  +     typedef int            ptrdiff_t;
+  +#  else
+  +     typedef long           ptrdiff_t;
+  +#  endif
+   #endif
+" )
+endif()
+
+
+# setup paths and STL for standalone toolchain
+if( BUILD_WITH_STANDALONE_TOOLCHAIN )
+ set( ANDROID_TOOLCHAIN_ROOT "${ANDROID_STANDALONE_TOOLCHAIN}" )
+ set( ANDROID_CLANG_TOOLCHAIN_ROOT "${ANDROID_STANDALONE_TOOLCHAIN}" )
+ set( ANDROID_SYSROOT "${ANDROID_STANDALONE_TOOLCHAIN}/sysroot" )
+
+ if( NOT ANDROID_STL STREQUAL "none" )
+  set( ANDROID_STL_INCLUDE_DIRS "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/include/c++/${ANDROID_COMPILER_VERSION}" )
+  if( ARMEABI_V7A AND EXISTS "${ANDROID_STL_INCLUDE_DIRS}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/${CMAKE_SYSTEM_PROCESSOR}/bits" )
+   list( APPEND ANDROID_STL_INCLUDE_DIRS "${ANDROID_STL_INCLUDE_DIRS}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/${CMAKE_SYSTEM_PROCESSOR}" )
+  elseif( ARMEABI AND NOT ANDROID_FORCE_ARM_BUILD AND EXISTS "${ANDROID_STL_INCLUDE_DIRS}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/thumb/bits" )
+   list( APPEND ANDROID_STL_INCLUDE_DIRS "${ANDROID_STL_INCLUDE_DIRS}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/thumb" )
+  else()
+   list( APPEND ANDROID_STL_INCLUDE_DIRS "${ANDROID_STL_INCLUDE_DIRS}/${ANDROID_TOOLCHAIN_MACHINE_NAME}" )
+  endif()
+  # always search static GNU STL to get the location of libsupc++.a
+  if( ARMEABI_V7A AND NOT ANDROID_FORCE_ARM_BUILD AND EXISTS "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/thumb/libstdc++.a" )
+   set( __libstl "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/thumb" )
+  elseif( ARMEABI_V7A AND EXISTS "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/libstdc++.a" )
+   set( __libstl "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}" )
+  elseif( ARMEABI AND NOT ANDROID_FORCE_ARM_BUILD AND EXISTS "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/thumb/libstdc++.a" )
+   set( __libstl "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/thumb" )
+  elseif( EXISTS "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/libstdc++.a" )
+   set( __libstl "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib" )
+  endif()
+  if( __libstl )
+   set( __libsupcxx "${__libstl}/libsupc++.a" )
+   set( __libstl    "${__libstl}/libstdc++.a" )
+  endif()
+  if( NOT EXISTS "${__libsupcxx}" )
+   message( FATAL_ERROR "The required libstdsupc++.a is missing in your standalone toolchain.
+ Usually it happens because of bug in make-standalone-toolchain.sh script from NDK r7, r7b and r7c.
+ You need to either upgrade to newer NDK or manually copy
+     $ANDROID_NDK/sources/cxx-stl/gnu-libstdc++/libs/${ANDROID_NDK_ABI_NAME}/libsupc++.a
+ to
+     ${__libsupcxx}
+   " )
+  endif()
+  if( ANDROID_STL STREQUAL "gnustl_shared" )
+   if( ARMEABI_V7A AND EXISTS "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/libgnustl_shared.so" )
+    set( __libstl "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/libgnustl_shared.so" )
+   elseif( ARMEABI AND NOT ANDROID_FORCE_ARM_BUILD AND EXISTS "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/thumb/libgnustl_shared.so" )
+    set( __libstl "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/thumb/libgnustl_shared.so" )
+   elseif( EXISTS "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/libgnustl_shared.so" )
+    set( __libstl "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/libgnustl_shared.so" )
+   endif()
+  endif()
+ endif()
+endif()
+
+# clang
+if( "${ANDROID_TOOLCHAIN_NAME}" STREQUAL "standalone-clang" )
+ set( ANDROID_COMPILER_IS_CLANG 1 )
+ execute_process( COMMAND "${ANDROID_CLANG_TOOLCHAIN_ROOT}/bin/clang${TOOL_OS_SUFFIX}" --version OUTPUT_VARIABLE ANDROID_CLANG_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE )
+ string( REGEX MATCH "[0-9]+[.][0-9]+" ANDROID_CLANG_VERSION "${ANDROID_CLANG_VERSION}")
+elseif( "${ANDROID_TOOLCHAIN_NAME}" MATCHES "-clang3[.][0-9]?$" )
+ string( REGEX MATCH "3[.][0-9]$" ANDROID_CLANG_VERSION "${ANDROID_TOOLCHAIN_NAME}")
+ string( REGEX REPLACE "-clang${ANDROID_CLANG_VERSION}$" "-4.6" ANDROID_GCC_TOOLCHAIN_NAME "${ANDROID_TOOLCHAIN_NAME}" )
+ if( NOT EXISTS "${ANDROID_NDK_TOOLCHAINS_PATH}/llvm-${ANDROID_CLANG_VERSION}${ANDROID_NDK_TOOLCHAINS_SUBPATH}/bin/clang${TOOL_OS_SUFFIX}" )
+  message( FATAL_ERROR "Could not find the Clang compiler driver" )
+ endif()
+ set( ANDROID_COMPILER_IS_CLANG 1 )
+ set( ANDROID_CLANG_TOOLCHAIN_ROOT "${ANDROID_NDK_TOOLCHAINS_PATH}/llvm-${ANDROID_CLANG_VERSION}${ANDROID_NDK_TOOLCHAINS_SUBPATH}" )
+else()
+ set( ANDROID_GCC_TOOLCHAIN_NAME "${ANDROID_TOOLCHAIN_NAME}" )
+ unset( ANDROID_COMPILER_IS_CLANG CACHE )
+endif()
+
+string( REPLACE "." "" _clang_name "clang${ANDROID_CLANG_VERSION}" )
+if( NOT EXISTS "${ANDROID_CLANG_TOOLCHAIN_ROOT}/bin/${_clang_name}${TOOL_OS_SUFFIX}" )
+ set( _clang_name "clang" )
+endif()
+
+
+# setup paths and STL for NDK
+if( BUILD_WITH_ANDROID_NDK )
+ set( ANDROID_TOOLCHAIN_ROOT "${ANDROID_NDK_TOOLCHAINS_PATH}/${ANDROID_GCC_TOOLCHAIN_NAME}${ANDROID_NDK_TOOLCHAINS_SUBPATH}" )
+ set( ANDROID_SYSROOT "${ANDROID_NDK}/platforms/android-${ANDROID_NATIVE_API_LEVEL}/arch-${ANDROID_ARCH_NAME}" )
+
+ if( ANDROID_STL STREQUAL "none" )
+  # do nothing
+ elseif( ANDROID_STL STREQUAL "system" )
+  set( ANDROID_RTTI             OFF )
+  set( ANDROID_EXCEPTIONS       OFF )
+  set( ANDROID_STL_INCLUDE_DIRS "${ANDROID_NDK}/sources/cxx-stl/system/include" )
+ elseif( ANDROID_STL STREQUAL "system_re" )
+  set( ANDROID_RTTI             ON )
+  set( ANDROID_EXCEPTIONS       ON )
+  set( ANDROID_STL_INCLUDE_DIRS "${ANDROID_NDK}/sources/cxx-stl/system/include" )
+ elseif( ANDROID_STL MATCHES "gabi" )
+  if( ANDROID_NDK_RELEASE STRLESS "r7" )
+   message( FATAL_ERROR "gabi++ is not awailable in your NDK. You have to upgrade to NDK r7 or newer to use gabi++.")
+  endif()
+  set( ANDROID_RTTI             ON )
+  set( ANDROID_EXCEPTIONS       OFF )
+  set( ANDROID_STL_INCLUDE_DIRS "${ANDROID_NDK}/sources/cxx-stl/gabi++/include" )
+  set( __libstl                 "${ANDROID_NDK}/sources/cxx-stl/gabi++/libs/${ANDROID_NDK_ABI_NAME}/libgabi++_static.a" )
+ elseif( ANDROID_STL MATCHES "stlport" )
+  if( NOT ANDROID_NDK_RELEASE STRLESS "r8d" )
+   set( ANDROID_EXCEPTIONS       ON )
+  else()
+   set( ANDROID_EXCEPTIONS       OFF )
+  endif()
+  if( ANDROID_NDK_RELEASE STRLESS "r7" )
+   set( ANDROID_RTTI            OFF )
+  else()
+   set( ANDROID_RTTI            ON )
+  endif()
+  set( ANDROID_STL_INCLUDE_DIRS "${ANDROID_NDK}/sources/cxx-stl/stlport/stlport" )
+  set( __libstl                 "${ANDROID_NDK}/sources/cxx-stl/stlport/libs/${ANDROID_NDK_ABI_NAME}/libstlport_static.a" )
+ elseif( ANDROID_STL MATCHES "gnustl" )
+  set( ANDROID_EXCEPTIONS       ON )
+  set( ANDROID_RTTI             ON )
+  if( EXISTS "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/${ANDROID_COMPILER_VERSION}" )
+   if( ARMEABI_V7A AND ANDROID_COMPILER_VERSION VERSION_EQUAL "4.7" AND ANDROID_NDK_RELEASE STREQUAL "r8d" )
+    # gnustl binary for 4.7 compiler is buggy :(
+    # TODO: look for right fix
+    set( __libstl                "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/4.6" )
+   else()
+    set( __libstl                "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/${ANDROID_COMPILER_VERSION}" )
+   endif()
+  else()
+   set( __libstl                "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++" )
+  endif()
+  set( ANDROID_STL_INCLUDE_DIRS "${__libstl}/include" "${__libstl}/libs/${ANDROID_NDK_ABI_NAME}/include" )
+  if( EXISTS "${__libstl}/libs/${ANDROID_NDK_ABI_NAME}/libgnustl_static.a" )
+   set( __libstl                "${__libstl}/libs/${ANDROID_NDK_ABI_NAME}/libgnustl_static.a" )
+  else()
+   set( __libstl                "${__libstl}/libs/${ANDROID_NDK_ABI_NAME}/libstdc++.a" )
+  endif()
+ else()
+  message( FATAL_ERROR "Unknown runtime: ${ANDROID_STL}" )
+ endif()
+ # find libsupc++.a - rtti & exceptions
+ if( ANDROID_STL STREQUAL "system_re" OR ANDROID_STL MATCHES "gnustl" )
+  set( __libsupcxx "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/${ANDROID_COMPILER_VERSION}/libs/${ANDROID_NDK_ABI_NAME}/libsupc++.a" ) # r8b or newer
+  if( NOT EXISTS "${__libsupcxx}" )
+   set( __libsupcxx "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/libs/${ANDROID_NDK_ABI_NAME}/libsupc++.a" ) # r7-r8
+  endif()
+  if( NOT EXISTS "${__libsupcxx}" ) # before r7
+   if( ARMEABI_V7A )
+    if( ANDROID_FORCE_ARM_BUILD )
+     set( __libsupcxx "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/libsupc++.a" )
+    else()
+     set( __libsupcxx "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/thumb/libsupc++.a" )
+    endif()
+   elseif( ARMEABI AND NOT ANDROID_FORCE_ARM_BUILD )
+    set( __libsupcxx "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/thumb/libsupc++.a" )
+   else()
+    set( __libsupcxx "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/libsupc++.a" )
+   endif()
+  endif()
+  if( NOT EXISTS "${__libsupcxx}")
+   message( ERROR "Could not find libsupc++.a for a chosen platform. Either your NDK is not supported or is broken.")
+  endif()
+ endif()
+endif()
+
+
+# case of shared STL linkage
+if( ANDROID_STL MATCHES "shared" AND DEFINED __libstl )
+ string( REPLACE "_static.a" "_shared.so" __libstl "${__libstl}" )
+ if( NOT _CMAKE_IN_TRY_COMPILE AND __libstl MATCHES "[.]so$" )
+  get_filename_component( __libstlname "${__libstl}" NAME )
+  execute_process( COMMAND "${CMAKE_COMMAND}" -E copy_if_different "${__libstl}" "${LIBRARY_OUTPUT_PATH}/${__libstlname}" RESULT_VARIABLE __fileCopyProcess )
+  if( NOT __fileCopyProcess EQUAL 0 OR NOT EXISTS "${LIBRARY_OUTPUT_PATH}/${__libstlname}")
+   message( SEND_ERROR "Failed copying of ${__libstl} to the ${LIBRARY_OUTPUT_PATH}/${__libstlname}" )
+  endif()
+  unset( __fileCopyProcess )
+  unset( __libstlname )
+ endif()
+endif()
+
+
+# ccache support
+__INIT_VARIABLE( _ndk_ccache NDK_CCACHE ENV_NDK_CCACHE )
+if( _ndk_ccache )
+ if( DEFINED NDK_CCACHE AND NOT EXISTS NDK_CCACHE )
+  unset( NDK_CCACHE CACHE )
+ endif()
+ find_program( NDK_CCACHE "${_ndk_ccache}" DOC "The path to ccache binary")
+else()
+ unset( NDK_CCACHE CACHE )
+endif()
+unset( _ndk_ccache )
+
+
+# setup the cross-compiler
+if( NOT CMAKE_C_COMPILER )
+ if( NDK_CCACHE AND NOT ANDROID_SYSROOT MATCHES "[ ;\"]" )
+  set( CMAKE_C_COMPILER   "${NDK_CCACHE}" CACHE PATH "ccache as C compiler" )
+  set( CMAKE_CXX_COMPILER "${NDK_CCACHE}" CACHE PATH "ccache as C++ compiler" )
+  if( ANDROID_COMPILER_IS_CLANG )
+   set( CMAKE_C_COMPILER_ARG1   "${ANDROID_CLANG_TOOLCHAIN_ROOT}/bin/${_clang_name}${TOOL_OS_SUFFIX}"   CACHE PATH "C compiler")
+   set( CMAKE_CXX_COMPILER_ARG1 "${ANDROID_CLANG_TOOLCHAIN_ROOT}/bin/${_clang_name}++${TOOL_OS_SUFFIX}" CACHE PATH "C++ compiler")
+  else()
+   set( CMAKE_C_COMPILER_ARG1   "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-gcc${TOOL_OS_SUFFIX}" CACHE PATH "C compiler")
+   set( CMAKE_CXX_COMPILER_ARG1 "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-g++${TOOL_OS_SUFFIX}" CACHE PATH "C++ compiler")
+  endif()
+ else()
+  if( ANDROID_COMPILER_IS_CLANG )
+   set( CMAKE_C_COMPILER   "${ANDROID_CLANG_TOOLCHAIN_ROOT}/bin/${_clang_name}${TOOL_OS_SUFFIX}"   CACHE PATH "C compiler")
+   set( CMAKE_CXX_COMPILER "${ANDROID_CLANG_TOOLCHAIN_ROOT}/bin/${_clang_name}++${TOOL_OS_SUFFIX}" CACHE PATH "C++ compiler")
+  else()
+   set( CMAKE_C_COMPILER   "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-gcc${TOOL_OS_SUFFIX}"    CACHE PATH "C compiler" )
+   set( CMAKE_CXX_COMPILER "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-g++${TOOL_OS_SUFFIX}"    CACHE PATH "C++ compiler" )
+  endif()
+ endif()
+ set( CMAKE_ASM_COMPILER "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-gcc${TOOL_OS_SUFFIX}"     CACHE PATH "assembler" )
+ set( CMAKE_STRIP        "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-strip${TOOL_OS_SUFFIX}"   CACHE PATH "strip" )
+ set( CMAKE_AR           "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-ar${TOOL_OS_SUFFIX}"      CACHE PATH "archive" )
+ set( CMAKE_LINKER       "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-ld${TOOL_OS_SUFFIX}"      CACHE PATH "linker" )
+ set( CMAKE_NM           "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-nm${TOOL_OS_SUFFIX}"      CACHE PATH "nm" )
+ set( CMAKE_OBJCOPY      "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-objcopy${TOOL_OS_SUFFIX}" CACHE PATH "objcopy" )
+ set( CMAKE_OBJDUMP      "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-objdump${TOOL_OS_SUFFIX}" CACHE PATH "objdump" )
+ set( CMAKE_RANLIB       "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-ranlib${TOOL_OS_SUFFIX}"  CACHE PATH "ranlib" )
+endif()
+
+set( _CMAKE_TOOLCHAIN_PREFIX "${ANDROID_TOOLCHAIN_MACHINE_NAME}-" )
+if( CMAKE_VERSION VERSION_LESS 2.8.5 )
+ set( CMAKE_ASM_COMPILER_ARG1 "-c" )
+endif()
+if( APPLE )
+ find_program( CMAKE_INSTALL_NAME_TOOL NAMES install_name_tool )
+ if( NOT CMAKE_INSTALL_NAME_TOOL )
+  message( FATAL_ERROR "Could not find install_name_tool, please check your installation." )
+ endif()
+ mark_as_advanced( CMAKE_INSTALL_NAME_TOOL )
+endif()
+
+# Force set compilers because standard identification works badly for us
+include( CMakeForceCompiler )
+CMAKE_FORCE_C_COMPILER( "${CMAKE_C_COMPILER}" GNU )
+if( ANDROID_COMPILER_IS_CLANG )
+ set( CMAKE_C_COMPILER_ID Clang)
+endif()
+set( CMAKE_C_PLATFORM_ID Linux )
+set( CMAKE_C_SIZEOF_DATA_PTR 4 )
+set( CMAKE_C_HAS_ISYSROOT 1 )
+set( CMAKE_C_COMPILER_ABI ELF )
+CMAKE_FORCE_CXX_COMPILER( "${CMAKE_CXX_COMPILER}" GNU )
+if( ANDROID_COMPILER_IS_CLANG )
+ set( CMAKE_CXX_COMPILER_ID Clang)
+endif()
+set( CMAKE_CXX_PLATFORM_ID Linux )
+set( CMAKE_CXX_SIZEOF_DATA_PTR 4 )
+set( CMAKE_CXX_HAS_ISYSROOT 1 )
+set( CMAKE_CXX_COMPILER_ABI ELF )
+set( CMAKE_CXX_SOURCE_FILE_EXTENSIONS cc cp cxx cpp CPP c++ C )
+# force ASM compiler (required for CMake < 2.8.5)
+set( CMAKE_ASM_COMPILER_ID_RUN TRUE )
+set( CMAKE_ASM_COMPILER_ID GNU )
+set( CMAKE_ASM_COMPILER_WORKS TRUE )
+set( CMAKE_ASM_COMPILER_FORCED TRUE )
+set( CMAKE_COMPILER_IS_GNUASM 1)
+set( CMAKE_ASM_SOURCE_FILE_EXTENSIONS s S asm )
+
+# flags and definitions
+remove_definitions( -DANDROID )
+add_definitions( -DANDROID )
+
+if( ANDROID_SYSROOT MATCHES "[ ;\"]" )
+ if( CMAKE_HOST_WIN32 )
+  # try to convert path to 8.3 form
+  file( WRITE "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/cvt83.cmd" "@echo %~s1" )
+  execute_process( COMMAND "$ENV{ComSpec}" /c "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/cvt83.cmd" "${ANDROID_SYSROOT}"
+                   OUTPUT_VARIABLE __path OUTPUT_STRIP_TRAILING_WHITESPACE
+                   RESULT_VARIABLE __result ERROR_QUIET )
+  if( __result EQUAL 0 )
+   file( TO_CMAKE_PATH "${__path}" ANDROID_SYSROOT )
+   set( ANDROID_CXX_FLAGS "--sysroot=${ANDROID_SYSROOT}" )
+  else()
+   set( ANDROID_CXX_FLAGS "--sysroot=\"${ANDROID_SYSROOT}\"" )
+  endif()
+ else()
+  set( ANDROID_CXX_FLAGS "'--sysroot=${ANDROID_SYSROOT}'" )
+ endif()
+ if( NOT _CMAKE_IN_TRY_COMPILE )
+  # quotes can break try_compile and compiler identification
+  message(WARNING "Path to your Android NDK (or toolchain) has non-alphanumeric symbols.\nThe build might be broken.\n")
+ endif()
+else()
+ set( ANDROID_CXX_FLAGS "--sysroot=${ANDROID_SYSROOT}" )
+endif()
+
+# NDK flags
+if( ARMEABI OR ARMEABI_V7A )
+ set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -fpic -funwind-tables" )
+ if( NOT ANDROID_FORCE_ARM_BUILD AND NOT ARMEABI_V6 )
+  set( ANDROID_CXX_FLAGS_RELEASE "-mthumb -fomit-frame-pointer -fno-strict-aliasing" )
+  set( ANDROID_CXX_FLAGS_DEBUG   "-marm -fno-omit-frame-pointer -fno-strict-aliasing" )
+  if( NOT ANDROID_COMPILER_IS_CLANG )
+   set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -finline-limit=64" )
+  endif()
+ else()
+  # always compile ARMEABI_V6 in arm mode; otherwise there is no difference from ARMEABI
+  set( ANDROID_CXX_FLAGS_RELEASE "-marm -fomit-frame-pointer -fstrict-aliasing" )
+  set( ANDROID_CXX_FLAGS_DEBUG   "-marm -fno-omit-frame-pointer -fno-strict-aliasing" )
+  if( NOT ANDROID_COMPILER_IS_CLANG )
+   set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -funswitch-loops -finline-limit=300" )
+  endif()
+ endif()
+elseif( X86 )
+ set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -funwind-tables" )
+ if( NOT ANDROID_COMPILER_IS_CLANG )
+  set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -funswitch-loops -finline-limit=300" )
+ else()
+  set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -fPIC" )
+ endif()
+ set( ANDROID_CXX_FLAGS_RELEASE "-fomit-frame-pointer -fstrict-aliasing" )
+ set( ANDROID_CXX_FLAGS_DEBUG   "-fno-omit-frame-pointer -fno-strict-aliasing" )
+elseif( MIPS )
+ set( ANDROID_CXX_FLAGS         "${ANDROID_CXX_FLAGS} -fpic -fno-strict-aliasing -finline-functions -ffunction-sections -funwind-tables -fmessage-length=0" )
+ set( ANDROID_CXX_FLAGS_RELEASE "-fomit-frame-pointer" )
+ set( ANDROID_CXX_FLAGS_DEBUG   "-fno-omit-frame-pointer" )
+ if( NOT ANDROID_COMPILER_IS_CLANG )
+  set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -fno-inline-functions-called-once -fgcse-after-reload -frerun-cse-after-loop -frename-registers" )
+  set( ANDROID_CXX_FLAGS_RELEASE "${ANDROID_CXX_FLAGS_RELEASE} -funswitch-loops -finline-limit=300" )
+ endif()
+elseif()
+ set( ANDROID_CXX_FLAGS_RELEASE "" )
+ set( ANDROID_CXX_FLAGS_DEBUG   "" )
+endif()
+
+set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -fsigned-char" ) # good/necessary when porting desktop libraries
+
+if( NOT X86 AND NOT ANDROID_COMPILER_IS_CLANG )
+ set( ANDROID_CXX_FLAGS "-Wno-psabi ${ANDROID_CXX_FLAGS}" )
+endif()
+
+if( NOT ANDROID_COMPILER_VERSION VERSION_LESS "4.6" )
+ set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -no-canonical-prefixes" ) # see https://android-review.googlesource.com/#/c/47564/
+endif()
+
+# ABI-specific flags
+if( ARMEABI_V7A )
+ set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -march=armv7-a -mfloat-abi=softfp" )
+ if( NEON )
+  set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -mfpu=neon" )
+ elseif( VFPV3 )
+  set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -mfpu=vfpv3" )
+ else()
+  set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -mfpu=vfpv3-d16" )
+ endif()
+elseif( ARMEABI_V6 )
+ set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -march=armv6 -mfloat-abi=softfp -mfpu=vfp" ) # vfp == vfpv2
+elseif( ARMEABI )
+ set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -march=armv5te -mtune=xscale -msoft-float" )
+endif()
+
+if( ANDROID_STL MATCHES "gnustl" AND (EXISTS "${__libstl}" OR EXISTS "${__libsupcxx}") )
+ set( CMAKE_CXX_CREATE_SHARED_LIBRARY "<CMAKE_C_COMPILER> <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>" )
+ set( CMAKE_CXX_CREATE_SHARED_MODULE  "<CMAKE_C_COMPILER> <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>" )
+ set( CMAKE_CXX_LINK_EXECUTABLE       "<CMAKE_C_COMPILER> <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>" )
+else()
+ set( CMAKE_CXX_CREATE_SHARED_LIBRARY "<CMAKE_CXX_COMPILER> <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>" )
+ set( CMAKE_CXX_CREATE_SHARED_MODULE  "<CMAKE_CXX_COMPILER> <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>" )
+ set( CMAKE_CXX_LINK_EXECUTABLE       "<CMAKE_CXX_COMPILER> <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>" )
+endif()
+
+# STL
+if( EXISTS "${__libstl}" OR EXISTS "${__libsupcxx}" )
+ if( EXISTS "${__libstl}" )
+  set( CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_LIBRARY} \"${__libstl}\"" )
+  set( CMAKE_CXX_CREATE_SHARED_MODULE  "${CMAKE_CXX_CREATE_SHARED_MODULE} \"${__libstl}\"" )
+  set( CMAKE_CXX_LINK_EXECUTABLE       "${CMAKE_CXX_LINK_EXECUTABLE} \"${__libstl}\"" )
+ endif()
+ if( EXISTS "${__libsupcxx}" )
+  set( CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_LIBRARY} \"${__libsupcxx}\"" )
+  set( CMAKE_CXX_CREATE_SHARED_MODULE  "${CMAKE_CXX_CREATE_SHARED_MODULE} \"${__libsupcxx}\"" )
+  set( CMAKE_CXX_LINK_EXECUTABLE       "${CMAKE_CXX_LINK_EXECUTABLE} \"${__libsupcxx}\"" )
+  # C objects:
+  set( CMAKE_C_CREATE_SHARED_LIBRARY "<CMAKE_C_COMPILER> <CMAKE_SHARED_LIBRARY_C_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_C_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>" )
+  set( CMAKE_C_CREATE_SHARED_MODULE  "<CMAKE_C_COMPILER> <CMAKE_SHARED_LIBRARY_C_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_C_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>" )
+  set( CMAKE_C_LINK_EXECUTABLE       "<CMAKE_C_COMPILER> <FLAGS> <CMAKE_C_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>" )
+  set( CMAKE_C_CREATE_SHARED_LIBRARY "${CMAKE_C_CREATE_SHARED_LIBRARY} \"${__libsupcxx}\"" )
+  set( CMAKE_C_CREATE_SHARED_MODULE  "${CMAKE_C_CREATE_SHARED_MODULE} \"${__libsupcxx}\"" )
+  set( CMAKE_C_LINK_EXECUTABLE       "${CMAKE_C_LINK_EXECUTABLE} \"${__libsupcxx}\"" )
+ endif()
+ if( ANDROID_STL MATCHES "gnustl" )
+  if( NOT EXISTS "${ANDROID_LIBM_PATH}" )
+   set( ANDROID_LIBM_PATH -lm )
+  endif()
+  set( CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_LIBRARY} ${ANDROID_LIBM_PATH}" )
+  set( CMAKE_CXX_CREATE_SHARED_MODULE  "${CMAKE_CXX_CREATE_SHARED_MODULE} ${ANDROID_LIBM_PATH}" )
+  set( CMAKE_CXX_LINK_EXECUTABLE       "${CMAKE_CXX_LINK_EXECUTABLE} ${ANDROID_LIBM_PATH}" )
+ endif()
+endif()
+
+# variables controlling optional build flags
+if (ANDROID_NDK_RELEASE STRLESS "r7")
+ # libGLESv2.so in NDK's prior to r7 refers to missing external symbols.
+ # So this flag option is required for all projects using OpenGL from native.
+ __INIT_VARIABLE( ANDROID_SO_UNDEFINED                      VALUES ON )
+else()
+ __INIT_VARIABLE( ANDROID_SO_UNDEFINED                      VALUES OFF )
+endif()
+__INIT_VARIABLE( ANDROID_NO_UNDEFINED OBSOLETE_NO_UNDEFINED VALUES ON )
+__INIT_VARIABLE( ANDROID_FUNCTION_LEVEL_LINKING             VALUES ON )
+__INIT_VARIABLE( ANDROID_GOLD_LINKER                        VALUES ON )
+__INIT_VARIABLE( ANDROID_NOEXECSTACK                        VALUES ON )
+__INIT_VARIABLE( ANDROID_RELRO                              VALUES ON )
+
+set( ANDROID_NO_UNDEFINED           ${ANDROID_NO_UNDEFINED}           CACHE BOOL "Show all undefined symbols as linker errors" )
+set( ANDROID_SO_UNDEFINED           ${ANDROID_SO_UNDEFINED}           CACHE BOOL "Allows or disallows undefined symbols in shared libraries" )
+set( ANDROID_FUNCTION_LEVEL_LINKING ${ANDROID_FUNCTION_LEVEL_LINKING} CACHE BOOL "Allows or disallows undefined symbols in shared libraries" )
+set( ANDROID_GOLD_LINKER            ${ANDROID_GOLD_LINKER}            CACHE BOOL "Enables gold linker (only avaialble for NDK r8b for ARM and x86 architectures on linux-86 and darwin-x86 hosts)" )
+set( ANDROID_NOEXECSTACK            ${ANDROID_NOEXECSTACK}            CACHE BOOL "Allows or disallows undefined symbols in shared libraries" )
+set( ANDROID_RELRO                  ${ANDROID_RELRO}                  CACHE BOOL "Enables RELRO - a memory corruption mitigation technique" )
+mark_as_advanced( ANDROID_NO_UNDEFINED ANDROID_SO_UNDEFINED ANDROID_FUNCTION_LEVEL_LINKING ANDROID_GOLD_LINKER ANDROID_NOEXECSTACK ANDROID_RELRO )
+
+# linker flags
+set( ANDROID_LINKER_FLAGS "" )
+
+if( ARMEABI_V7A )
+ # this is *required* to use the following linker flags that routes around
+ # a CPU bug in some Cortex-A8 implementations:
+ set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,--fix-cortex-a8" )
+endif()
+
+if( ANDROID_NO_UNDEFINED )
+ if( MIPS )
+  # there is some sysroot-related problem in mips linker...
+  if( NOT ANDROID_SYSROOT MATCHES "[ ;\"]" )
+   set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,--no-undefined -Wl,-rpath-link,${ANDROID_SYSROOT}/usr/lib" )
+  endif()
+ else()
+  set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,--no-undefined" )
+ endif()
+endif()
+
+if( ANDROID_SO_UNDEFINED )
+ set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,-allow-shlib-undefined" )
+endif()
+
+if( ANDROID_FUNCTION_LEVEL_LINKING )
+ set( ANDROID_CXX_FLAGS    "${ANDROID_CXX_FLAGS} -fdata-sections -ffunction-sections" )
+ set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,--gc-sections" )
+endif()
+
+if( ANDROID_COMPILER_VERSION VERSION_EQUAL "4.6" )
+ if( ANDROID_GOLD_LINKER AND (CMAKE_HOST_UNIX OR ANDROID_NDK_RELEASE STRGREATER "r8b") AND (ARMEABI OR ARMEABI_V7A OR X86) )
+  set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -fuse-ld=gold" )
+ elseif( ANDROID_NDK_RELEASE STRGREATER "r8b")
+  set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -fuse-ld=bfd" )
+ elseif( ANDROID_NDK_RELEASE STREQUAL "r8b" AND ARMEABI AND NOT _CMAKE_IN_TRY_COMPILE )
+  message( WARNING "The default bfd linker from arm GCC 4.6 toolchain can fail with 'unresolvable R_ARM_THM_CALL relocation' error message. See https://code.google.com/p/android/issues/detail?id=35342
+  On Linux and OS X host platform you can workaround this problem using gold linker (default).
+  Rerun cmake with -DANDROID_GOLD_LINKER=ON option in case of problems.
+" )
+ endif()
+endif() # version 4.6
+
+if( ANDROID_NOEXECSTACK )
+ if( ANDROID_COMPILER_IS_CLANG )
+  set( ANDROID_CXX_FLAGS    "${ANDROID_CXX_FLAGS} -Xclang -mnoexecstack" )
+ else()
+  set( ANDROID_CXX_FLAGS    "${ANDROID_CXX_FLAGS} -Wa,--noexecstack" )
+ endif()
+ set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,-z,noexecstack" )
+endif()
+
+if( ANDROID_RELRO )
+ set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,-z,relro -Wl,-z,now" )
+endif()
+
+if( ANDROID_COMPILER_IS_CLANG )
+ set( ANDROID_CXX_FLAGS "-Qunused-arguments ${ANDROID_CXX_FLAGS}" )
+ if( ARMEABI_V7A AND NOT ANDROID_FORCE_ARM_BUILD )
+  set( ANDROID_CXX_FLAGS_RELEASE "-target thumbv7-none-linux-androideabi ${ANDROID_CXX_FLAGS_RELEASE}" )
+  set( ANDROID_CXX_FLAGS_DEBUG   "-target ${ANDROID_LLVM_TRIPLE} ${ANDROID_CXX_FLAGS_DEBUG}" )
+ else()
+  set( ANDROID_CXX_FLAGS "-target ${ANDROID_LLVM_TRIPLE} ${ANDROID_CXX_FLAGS}" )
+ endif()
+ if( BUILD_WITH_ANDROID_NDK )
+  set( ANDROID_CXX_FLAGS "-gcc-toolchain ${ANDROID_TOOLCHAIN_ROOT} ${ANDROID_CXX_FLAGS}" )
+ endif()
+endif()
+
+# cache flags
+set( CMAKE_CXX_FLAGS           ""                        CACHE STRING "c++ flags" )
+set( CMAKE_C_FLAGS             ""                        CACHE STRING "c flags" )
+set( CMAKE_CXX_FLAGS_RELEASE   "-O3 -DNDEBUG"            CACHE STRING "c++ Release flags" )
+set( CMAKE_C_FLAGS_RELEASE     "-O3 -DNDEBUG"            CACHE STRING "c Release flags" )
+set( CMAKE_CXX_FLAGS_DEBUG     "-O0 -g -DDEBUG -D_DEBUG" CACHE STRING "c++ Debug flags" )
+set( CMAKE_C_FLAGS_DEBUG       "-O0 -g -DDEBUG -D_DEBUG" CACHE STRING "c Debug flags" )
+set( CMAKE_SHARED_LINKER_FLAGS ""                        CACHE STRING "shared linker flags" )
+set( CMAKE_MODULE_LINKER_FLAGS ""                        CACHE STRING "module linker flags" )
+set( CMAKE_EXE_LINKER_FLAGS    "-Wl,-z,nocopyreloc"      CACHE STRING "executable linker flags" )
+
+# put flags to cache (for debug purpose only)
+set( ANDROID_CXX_FLAGS         "${ANDROID_CXX_FLAGS}"         CACHE INTERNAL "Android specific c/c++ flags" )
+set( ANDROID_CXX_FLAGS_RELEASE "${ANDROID_CXX_FLAGS_RELEASE}" CACHE INTERNAL "Android specific c/c++ Release flags" )
+set( ANDROID_CXX_FLAGS_DEBUG   "${ANDROID_CXX_FLAGS_DEBUG}"   CACHE INTERNAL "Android specific c/c++ Debug flags" )
+set( ANDROID_LINKER_FLAGS      "${ANDROID_LINKER_FLAGS}"      CACHE INTERNAL "Android specific c/c++ linker flags" )
+
+# finish flags
+set( CMAKE_CXX_FLAGS           "${ANDROID_CXX_FLAGS} ${CMAKE_CXX_FLAGS}" )
+set( CMAKE_C_FLAGS             "${ANDROID_CXX_FLAGS} ${CMAKE_C_FLAGS}" )
+set( CMAKE_CXX_FLAGS_RELEASE   "${ANDROID_CXX_FLAGS_RELEASE} ${CMAKE_CXX_FLAGS_RELEASE}" )
+set( CMAKE_C_FLAGS_RELEASE     "${ANDROID_CXX_FLAGS_RELEASE} ${CMAKE_C_FLAGS_RELEASE}" )
+set( CMAKE_CXX_FLAGS_DEBUG     "${ANDROID_CXX_FLAGS_DEBUG} ${CMAKE_CXX_FLAGS_DEBUG}" )
+set( CMAKE_C_FLAGS_DEBUG       "${ANDROID_CXX_FLAGS_DEBUG} ${CMAKE_C_FLAGS_DEBUG}" )
+set( CMAKE_SHARED_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} ${CMAKE_SHARED_LINKER_FLAGS}" )
+set( CMAKE_MODULE_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} ${CMAKE_MODULE_LINKER_FLAGS}" )
+set( CMAKE_EXE_LINKER_FLAGS    "${ANDROID_LINKER_FLAGS} ${CMAKE_EXE_LINKER_FLAGS}" )
+
+if( MIPS AND BUILD_WITH_ANDROID_NDK AND ANDROID_NDK_RELEASE STREQUAL "r8" )
+ set( CMAKE_SHARED_LINKER_FLAGS "-Wl,-T,${ANDROID_NDK_TOOLCHAINS_PATH}/${ANDROID_GCC_TOOLCHAIN_NAME}/mipself.xsc ${CMAKE_SHARED_LINKER_FLAGS}" )
+ set( CMAKE_MODULE_LINKER_FLAGS "-Wl,-T,${ANDROID_NDK_TOOLCHAINS_PATH}/${ANDROID_GCC_TOOLCHAIN_NAME}/mipself.xsc ${CMAKE_MODULE_LINKER_FLAGS}" )
+ set( CMAKE_EXE_LINKER_FLAGS    "-Wl,-T,${ANDROID_NDK_TOOLCHAINS_PATH}/${ANDROID_GCC_TOOLCHAIN_NAME}/mipself.x ${CMAKE_EXE_LINKER_FLAGS}" )
+endif()
+
+# configure rtti
+if( DEFINED ANDROID_RTTI AND ANDROID_STL_FORCE_FEATURES )
+ if( ANDROID_RTTI )
+  set( CMAKE_CXX_FLAGS "-frtti ${CMAKE_CXX_FLAGS}" )
+ else()
+  set( CMAKE_CXX_FLAGS "-fno-rtti ${CMAKE_CXX_FLAGS}" )
+ endif()
+endif()
+
+# configure exceptios
+if( DEFINED ANDROID_EXCEPTIONS AND ANDROID_STL_FORCE_FEATURES )
+ if( ANDROID_EXCEPTIONS )
+  set( CMAKE_CXX_FLAGS "-fexceptions ${CMAKE_CXX_FLAGS}" )
+  set( CMAKE_C_FLAGS "-fexceptions ${CMAKE_C_FLAGS}" )
+ else()
+  set( CMAKE_CXX_FLAGS "-fno-exceptions ${CMAKE_CXX_FLAGS}" )
+  set( CMAKE_C_FLAGS "-fno-exceptions ${CMAKE_C_FLAGS}" )
+ endif()
+endif()
+
+# global includes and link directories
+include_directories( SYSTEM "${ANDROID_SYSROOT}/usr/include" ${ANDROID_STL_INCLUDE_DIRS} )
+link_directories( "${CMAKE_INSTALL_PREFIX}/libs/${ANDROID_NDK_ABI_NAME}" )
+
+# detect if need link crtbegin_so.o explicitly
+if( NOT DEFINED ANDROID_EXPLICIT_CRT_LINK )
+ set( __cmd "${CMAKE_CXX_CREATE_SHARED_LIBRARY}" )
+ string( REPLACE "<CMAKE_CXX_COMPILER>" "${CMAKE_CXX_COMPILER} ${CMAKE_CXX_COMPILER_ARG1}" __cmd "${__cmd}" )
+ string( REPLACE "<CMAKE_C_COMPILER>"   "${CMAKE_C_COMPILER} ${CMAKE_C_COMPILER_ARG1}"   __cmd "${__cmd}" )
+ string( REPLACE "<CMAKE_SHARED_LIBRARY_CXX_FLAGS>" "${CMAKE_CXX_FLAGS}" __cmd "${__cmd}" )
+ string( REPLACE "<LANGUAGE_COMPILE_FLAGS>" "" __cmd "${__cmd}" )
+ string( REPLACE "<LINK_FLAGS>" "${CMAKE_SHARED_LINKER_FLAGS}" __cmd "${__cmd}" )
+ string( REPLACE "<CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS>" "-shared" __cmd "${__cmd}" )
+ string( REPLACE "<CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG>" "" __cmd "${__cmd}" )
+ string( REPLACE "<TARGET_SONAME>" "" __cmd "${__cmd}" )
+ string( REPLACE "<TARGET>" "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/toolchain_crtlink_test.so" __cmd "${__cmd}" )
+ string( REPLACE "<OBJECTS>" "\"${ANDROID_SYSROOT}/usr/lib/crtbegin_so.o\"" __cmd "${__cmd}" )
+ string( REPLACE "<LINK_LIBRARIES>" "" __cmd "${__cmd}" )
+ separate_arguments( __cmd )
+ foreach( __var ANDROID_NDK ANDROID_NDK_TOOLCHAINS_PATH ANDROID_STANDALONE_TOOLCHAIN )
+  if( ${__var} )
+   set( __tmp "${${__var}}" )
+   separate_arguments( __tmp )
+   string( REPLACE "${__tmp}" "${${__var}}" __cmd "${__cmd}")
+  endif()
+ endforeach()
+ string( REPLACE "'" "" __cmd "${__cmd}" )
+ string( REPLACE "\"" "" __cmd "${__cmd}" )
+ execute_process( COMMAND ${__cmd} RESULT_VARIABLE __cmd_result OUTPUT_QUIET ERROR_QUIET )
+ if( __cmd_result EQUAL 0 )
+  set( ANDROID_EXPLICIT_CRT_LINK ON )
+ else()
+  set( ANDROID_EXPLICIT_CRT_LINK OFF )
+ endif()
+endif()
+
+if( ANDROID_EXPLICIT_CRT_LINK )
+ set( CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_LIBRARY} \"${ANDROID_SYSROOT}/usr/lib/crtbegin_so.o\"" )
+ set( CMAKE_CXX_CREATE_SHARED_MODULE  "${CMAKE_CXX_CREATE_SHARED_MODULE} \"${ANDROID_SYSROOT}/usr/lib/crtbegin_so.o\"" )
+endif()
+
+# setup output directories
+set( LIBRARY_OUTPUT_PATH_ROOT ${CMAKE_SOURCE_DIR} CACHE PATH "root for library output, set this to change where android libs are installed to" )
+set( CMAKE_INSTALL_PREFIX "${ANDROID_TOOLCHAIN_ROOT}/user" CACHE STRING "path for installing" )
+
+if(NOT _CMAKE_IN_TRY_COMPILE)
+ if( EXISTS "${CMAKE_SOURCE_DIR}/jni/CMakeLists.txt" )
+  set( EXECUTABLE_OUTPUT_PATH "${LIBRARY_OUTPUT_PATH_ROOT}/bin/${ANDROID_NDK_ABI_NAME}" CACHE PATH "Output directory for applications" )
+ else()
+  set( EXECUTABLE_OUTPUT_PATH "${LIBRARY_OUTPUT_PATH_ROOT}/bin" CACHE PATH "Output directory for applications" )
+ endif()
+ set( LIBRARY_OUTPUT_PATH "${LIBRARY_OUTPUT_PATH_ROOT}/libs/${ANDROID_NDK_ABI_NAME}" CACHE PATH "path for android libs" )
+endif()
+
+# set these global flags for cmake client scripts to change behavior
+set( ANDROID True )
+set( BUILD_ANDROID True )
+
+# where is the target environment
+set( CMAKE_FIND_ROOT_PATH "${ANDROID_TOOLCHAIN_ROOT}/bin" "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}" "${ANDROID_SYSROOT}" "${CMAKE_INSTALL_PREFIX}" "${CMAKE_INSTALL_PREFIX}/share" )
+
+# only search for libraries and includes in the ndk toolchain
+set( CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY )
+set( CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY )
+set( CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY )
+
+
+# macro to find packages on the host OS
+macro( find_host_package )
+ set( CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER )
+ set( CMAKE_FIND_ROOT_PATH_MODE_LIBRARY NEVER )
+ set( CMAKE_FIND_ROOT_PATH_MODE_INCLUDE NEVER )
+ if( CMAKE_HOST_WIN32 )
+  SET( WIN32 1 )
+  SET( UNIX )
+ elseif( CMAKE_HOST_APPLE )
+  SET( APPLE 1 )
+  SET( UNIX )
+ endif()
+ find_package( ${ARGN} )
+ SET( WIN32 )
+ SET( APPLE )
+ SET( UNIX 1 )
+ set( CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY )
+ set( CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY )
+ set( CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY )
+endmacro()
+
+
+# macro to find programs on the host OS
+macro( find_host_program )
+ set( CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER )
+ set( CMAKE_FIND_ROOT_PATH_MODE_LIBRARY NEVER )
+ set( CMAKE_FIND_ROOT_PATH_MODE_INCLUDE NEVER )
+ if( CMAKE_HOST_WIN32 )
+  SET( WIN32 1 )
+  SET( UNIX )
+ elseif( CMAKE_HOST_APPLE )
+  SET( APPLE 1 )
+  SET( UNIX )
+ endif()
+ find_program( ${ARGN} )
+ SET( WIN32 )
+ SET( APPLE )
+ SET( UNIX 1 )
+ set( CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY )
+ set( CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY )
+ set( CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY )
+endmacro()
+
+
+macro( ANDROID_GET_ABI_RAWNAME TOOLCHAIN_FLAG VAR )
+ if( "${TOOLCHAIN_FLAG}" STREQUAL "ARMEABI" )
+  set( ${VAR} "armeabi" )
+ elseif( "${TOOLCHAIN_FLAG}" STREQUAL "ARMEABI_V7A" )
+  set( ${VAR} "armeabi-v7a" )
+ elseif( "${TOOLCHAIN_FLAG}" STREQUAL "X86" )
+  set( ${VAR} "x86" )
+ elseif( "${TOOLCHAIN_FLAG}" STREQUAL "MIPS" )
+  set( ${VAR} "mips" )
+ else()
+  set( ${VAR} "unknown" )
+ endif()
+endmacro()
+
+
+# export toolchain settings for the try_compile() command
+if( NOT PROJECT_NAME STREQUAL "CMAKE_TRY_COMPILE" )
+ set( __toolchain_config "")
+ foreach( __var NDK_CCACHE  LIBRARY_OUTPUT_PATH_ROOT  ANDROID_FORBID_SYGWIN  ANDROID_SET_OBSOLETE_VARIABLES
+                ANDROID_NDK_HOST_X64
+                ANDROID_NDK
+                ANDROID_NDK_LAYOUT
+                ANDROID_STANDALONE_TOOLCHAIN
+                ANDROID_TOOLCHAIN_NAME
+                ANDROID_ABI
+                ANDROID_NATIVE_API_LEVEL
+                ANDROID_STL
+                ANDROID_STL_FORCE_FEATURES
+                ANDROID_FORCE_ARM_BUILD
+                ANDROID_NO_UNDEFINED
+                ANDROID_SO_UNDEFINED
+                ANDROID_FUNCTION_LEVEL_LINKING
+                ANDROID_GOLD_LINKER
+                ANDROID_NOEXECSTACK
+                ANDROID_RELRO
+                ANDROID_LIBM_PATH
+                ANDROID_EXPLICIT_CRT_LINK
+                )
+  if( DEFINED ${__var} )
+   if( "${__var}" MATCHES " ")
+    set( __toolchain_config "${__toolchain_config}set( ${__var} \"${${__var}}\" CACHE INTERNAL \"\" )\n" )
+   else()
+    set( __toolchain_config "${__toolchain_config}set( ${__var} ${${__var}} CACHE INTERNAL \"\" )\n" )
+   endif()
+  endif()
+ endforeach()
+ file( WRITE "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/android.toolchain.config.cmake" "${__toolchain_config}" )
+ unset( __toolchain_config )
+endif()
+
+
+# set some obsolete variables for backward compatibility
+set( ANDROID_SET_OBSOLETE_VARIABLES ON CACHE BOOL "Define obsolete Andrid-specific cmake variables" )
+mark_as_advanced( ANDROID_SET_OBSOLETE_VARIABLES )
+if( ANDROID_SET_OBSOLETE_VARIABLES )
+ set( ANDROID_API_LEVEL ${ANDROID_NATIVE_API_LEVEL} )
+ set( ARM_TARGET "${ANDROID_ABI}" )
+ set( ARMEABI_NDK_NAME "${ANDROID_NDK_ABI_NAME}" )
+endif()
+
+
+# Variables controlling behavior or set by cmake toolchain:
+#   ANDROID_ABI : "armeabi-v7a" (default), "armeabi", "armeabi-v7a with NEON", "armeabi-v7a with VFPV3", "armeabi-v6 with VFP", "x86", "mips"
+#   ANDROID_NATIVE_API_LEVEL : 3,4,5,8,9,14 (depends on NDK version)
+#   ANDROID_STL : gnustl_static/gnustl_shared/stlport_static/stlport_shared/gabi++_static/gabi++_shared/system_re/system/none
+#   ANDROID_FORBID_SYGWIN : ON/OFF
+#   ANDROID_NO_UNDEFINED : ON/OFF
+#   ANDROID_SO_UNDEFINED : OFF/ON  (default depends on NDK version)
+#   ANDROID_FUNCTION_LEVEL_LINKING : ON/OFF
+#   ANDROID_GOLD_LINKER : ON/OFF
+#   ANDROID_NOEXECSTACK : ON/OFF
+#   ANDROID_RELRO : ON/OFF
+#   ANDROID_FORCE_ARM_BUILD : ON/OFF
+#   ANDROID_STL_FORCE_FEATURES : ON/OFF
+#   ANDROID_SET_OBSOLETE_VARIABLES : ON/OFF
+# Can be set only at the first run:
+#   ANDROID_NDK
+#   ANDROID_STANDALONE_TOOLCHAIN
+#   ANDROID_TOOLCHAIN_NAME : the NDK name of compiler toolchain
+#   ANDROID_NDK_HOST_X64 : try to use x86_64 toolchain (default for x64 host systems)
+#   ANDROID_NDK_LAYOUT : the inner NDK structure (RELEASE, LINARO, ANDROID)
+#   LIBRARY_OUTPUT_PATH_ROOT : <any valid path>
+#   NDK_CCACHE : <path to your ccache executable>
+# Obsolete:
+#   ANDROID_API_LEVEL : superseded by ANDROID_NATIVE_API_LEVEL
+#   ARM_TARGET : superseded by ANDROID_ABI
+#   ARM_TARGETS : superseded by ANDROID_ABI (can be set only)
+#   ANDROID_NDK_TOOLCHAIN_ROOT : superseded by ANDROID_STANDALONE_TOOLCHAIN (can be set only)
+#   ANDROID_USE_STLPORT : superseded by ANDROID_STL=stlport_static
+#   ANDROID_LEVEL : superseded by ANDROID_NATIVE_API_LEVEL (completely removed)
+#
+# Primary read-only variables:
+#   ANDROID : always TRUE
+#   ARMEABI : TRUE for arm v6 and older devices
+#   ARMEABI_V6 : TRUE for arm v6
+#   ARMEABI_V7A : TRUE for arm v7a
+#   NEON : TRUE if NEON unit is enabled
+#   VFPV3 : TRUE if VFP version 3 is enabled
+#   X86 : TRUE if configured for x86
+#   MIPS : TRUE if configured for mips
+#   BUILD_ANDROID : always TRUE
+#   BUILD_WITH_ANDROID_NDK : TRUE if NDK is used
+#   BUILD_WITH_STANDALONE_TOOLCHAIN : TRUE if standalone toolchain is used
+#   ANDROID_NDK_HOST_SYSTEM_NAME : "windows", "linux-x86" or "darwin-x86" depending on host platform
+#   ANDROID_NDK_ABI_NAME : "armeabi", "armeabi-v7a", "x86" or "mips" depending on ANDROID_ABI
+#   ANDROID_NDK_RELEASE : one of r5, r5b, r5c, r6, r6b, r7, r7b, r7c, r8, r8b, r8c, r8d, r8e; set only for NDK
+#   ANDROID_ARCH_NAME : "arm" or "x86" or "mips" depending on ANDROID_ABI
+#   ANDROID_SYSROOT : path to the compiler sysroot
+#   TOOL_OS_SUFFIX : "" or ".exe" depending on host platform
+#   ANDROID_COMPILER_IS_CLANG : TRUE if clang compiler is used
+# Obsolete:
+#   ARMEABI_NDK_NAME : superseded by ANDROID_NDK_ABI_NAME
+#
+# Secondary (less stable) read-only variables:
+#   ANDROID_COMPILER_VERSION : GCC version used
+#   ANDROID_CXX_FLAGS : C/C++ compiler flags required by Android platform
+#   ANDROID_SUPPORTED_ABIS : list of currently allowed values for ANDROID_ABI
+#   ANDROID_TOOLCHAIN_MACHINE_NAME : "arm-linux-androideabi", "arm-eabi" or "i686-android-linux"
+#   ANDROID_TOOLCHAIN_ROOT : path to the top level of toolchain (standalone or placed inside NDK)
+#   ANDROID_CLANG_TOOLCHAIN_ROOT : path to clang tools
+#   ANDROID_SUPPORTED_NATIVE_API_LEVELS : list of native API levels found inside NDK
+#   ANDROID_STL_INCLUDE_DIRS : stl include paths
+#   ANDROID_RTTI : if rtti is enabled by the runtime
+#   ANDROID_EXCEPTIONS : if exceptions are enabled by the runtime
+#   ANDROID_GCC_TOOLCHAIN_NAME : read-only, differs from ANDROID_TOOLCHAIN_NAME only if clang is used
+#   ANDROID_CLANG_VERSION : version of clang compiler if clang is used
+#   ANDROID_LIBM_PATH : path to libm.so (set to something like $(TOP)/out/target/product/<product_name>/obj/lib/libm.so) to workaround unresolved `sincos`
+#
+# Defaults:
+#   ANDROID_DEFAULT_NDK_API_LEVEL
+#   ANDROID_DEFAULT_NDK_API_LEVEL_${ARCH}
+#   ANDROID_NDK_SEARCH_PATHS
+#   ANDROID_STANDALONE_TOOLCHAIN_SEARCH_PATH
+#   ANDROID_SUPPORTED_ABIS_${ARCH}
+#   ANDROID_SUPPORTED_NDK_VERSIONS
similarity index 100%
rename from android/java.rst
rename to platforms/android/java.rst
similarity index 97%
rename from android/service/engine/CMakeLists.txt
rename to platforms/android/service/engine/CMakeLists.txt
index 8b88393..852a028 100644 (file)
@@ -24,7 +24,7 @@ else()
   message(WARNING "Can not automatically determine the value for ANDROID_PLATFORM_VERSION_CODE")
 endif()
 
-configure_file("${CMAKE_CURRENT_SOURCE_DIR}/${ANDROID_MANIFEST_FILE}" "${OpenCV_BINARY_DIR}/android/service/engine/.build/${ANDROID_MANIFEST_FILE}"  @ONLY)
+configure_file("${CMAKE_CURRENT_SOURCE_DIR}/${ANDROID_MANIFEST_FILE}" "${OpenCV_BINARY_DIR}/platforms/android/service/engine/.build/${ANDROID_MANIFEST_FILE}"  @ONLY)
 
 link_directories("${ANDROID_SOURCE_TREE}/out/target/product/generic/system/lib" "${ANDROID_SOURCE_TREE}/out/target/product/${ANDROID_PRODUCT}/system/lib" "${ANDROID_SOURCE_TREE}/bin/${ANDROID_ARCH_NAME}")
 
@@ -72,4 +72,3 @@ file(GLOB engine_test_files "jni/Tests/*.cpp")
 
 add_executable(opencv_test_engine ${engine_test_files} jni/Tests/gtest/gtest-all.cpp)
 target_link_libraries(opencv_test_engine z binder log utils android_runtime ${engine} ${engine}_jni)
-
@@ -137,5 +137,3 @@ TEST(PackageManager, GetPackagePathForMips)
 //     string path = pm.GetPackagePathByVersion("240", PLATFORM_TEGRA2, 0);
 //     EXPECT_STREQ("/data/data/org.opencv.lib_v24_tegra2/lib", path.c_str());
 // }
-
-
similarity index 99%
rename from android/service/test_native.py
rename to platforms/android/service/test_native.py
index 9a39032..328b9a8 100755 (executable)
@@ -34,4 +34,3 @@ if (__name__ ==  "__main__"):
     os.system("adb %s shell mkdir -p \"%s\"" % (DEVICE_STR, DEVICE_LOG_PATH))
 
     RunTestApp("OpenCVEngineTestApp")
-
diff --git a/platforms/linux/scripts/cmake_arm_gnueabi_hardfp.sh b/platforms/linux/scripts/cmake_arm_gnueabi_hardfp.sh
deleted file mode 100755 (executable)
index f8df785..0000000
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/bin/sh
-cd `dirname $0`/..
-
-mkdir -p build_hardfp
-cd build_hardfp
-
-cmake -DCMAKE_TOOLCHAIN_FILE=../arm-gnueabi.toolchain.cmake $@ ../../..
-
diff --git a/platforms/linux/scripts/cmake_arm_gnueabi_softfp.sh b/platforms/linux/scripts/cmake_arm_gnueabi_softfp.sh
deleted file mode 100755 (executable)
index f4210fa..0000000
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/bin/sh
-cd `dirname $0`/..
-
-mkdir -p build_softfp
-cd build_softfp
-
-cmake -DSOFTFP=ON -DCMAKE_TOOLCHAIN_FILE=../arm-gnueabi.toolchain.cmake $@ ../../..
-
similarity index 98%
rename from android/scripts/ABI_compat_generator.py
rename to platforms/scripts/ABI_compat_generator.py
index b492a70..fdabf00 100755 (executable)
@@ -6,9 +6,7 @@ import os
 
 
 architecture = 'armeabi'
-excludedHeaders = set(['hdf5.h', 'cap_ios.h', 
-    'eigen.hpp', 'cxeigen.hpp' #TOREMOVE
-    ])
+excludedHeaders = set(['hdf5.h', 'cap_ios.h', 'eigen.hpp', 'cxeigen.hpp']) #TOREMOVE
 systemIncludes = ['sources/cxx-stl/gnu-libstdc++/4.6/include', \
     '/opt/android-ndk-r8c/platforms/android-8/arch-arm', # TODO: check if this one could be passed as command line arg
     'sources/cxx-stl/gnu-libstdc++/4.6/libs/armeabi-v7a/include']
@@ -113,7 +111,7 @@ def FindHeaders():
             if f == m:
                 moduleHeaders += GetHeaderFiles(os.path.join(cppHeadersFolder, f))
                 if m == 'flann':
-                    flann = os.path.join(cppHeadersFolder, f, 'flann.hpp') 
+                    flann = os.path.join(cppHeadersFolder, f, 'flann.hpp')
                     moduleHeaders.remove(flann)
                     moduleHeaders.insert(0, flann)
                 cppHeaders += moduleHeaders
similarity index 90%
rename from android/scripts/cmake_android_all_cameras.py
rename to platforms/scripts/cmake_android_all_cameras.py
index afcab63..c160df0 100755 (executable)
@@ -49,7 +49,7 @@ for s in ConfFile.readlines():
 
     os.chdir(BuildDir)
     BuildLog = os.path.join(BuildDir, "build.log")
-    CmakeCmdLine = "cmake -DCMAKE_TOOLCHAIN_FILE=../android.toolchain.cmake -DANDROID_SOURCE_TREE=\"%s\" -DANDROID_NATIVE_API_LEVEL=\"%s\" -DANDROID_ABI=\"%s\" -DANDROID_STL=stlport_static ../../ > \"%s\" 2>&1" % (AndroidTreeRoot, NativeApiLevel, Arch, BuildLog)
+    CmakeCmdLine = "cmake -DCMAKE_TOOLCHAIN_FILE=../android/android.toolchain.cmake -DANDROID_SOURCE_TREE=\"%s\" -DANDROID_NATIVE_API_LEVEL=\"%s\" -DANDROID_ABI=\"%s\" -DANDROID_STL=stlport_static ../.. > \"%s\" 2>&1" % (AndroidTreeRoot, NativeApiLevel, Arch, BuildLog)
     MakeCmdLine = "make %s >> \"%s\" 2>&1" % (MakeTarget, BuildLog);
     #print(CmakeCmdLine)
     os.system(CmakeCmdLine)
similarity index 50%
rename from android/scripts/cmake_android.sh
rename to platforms/scripts/cmake_android_arm.sh
index 101ba3c..84c88a8 100755 (executable)
@@ -1,8 +1,7 @@
 #!/bin/sh
 cd `dirname $0`/..
 
-mkdir -p build
-cd build
-
-cmake -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON -DCMAKE_TOOLCHAIN_FILE=../android.toolchain.cmake $@ ../..
+mkdir -p build_android_arm
+cd build_android_arm
 
+cmake -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON -DCMAKE_TOOLCHAIN_FILE=../android/android.toolchain.cmake $@ ../..
diff --git a/platforms/scripts/cmake_android_mips.sh b/platforms/scripts/cmake_android_mips.sh
new file mode 100755 (executable)
index 0000000..6bc7944
--- /dev/null
@@ -0,0 +1,7 @@
+#!/bin/sh
+cd `dirname $0`/..
+
+mkdir -p build_android_mips
+cd build_android_mips
+
+cmake -DANDROID_ABI=mips -DCMAKE_TOOLCHAIN_FILE=../android/android.toolchain.cmake $@ ../..
diff --git a/platforms/scripts/cmake_android_service.sh b/platforms/scripts/cmake_android_service.sh
new file mode 100755 (executable)
index 0000000..7ba8865
--- /dev/null
@@ -0,0 +1,7 @@
+#!/bin/sh
+cd `dirname $0`/..
+
+mkdir -p build_android_service
+cd build_android_service
+
+cmake -DCMAKE_TOOLCHAIN_FILE=../android/android.toolchain.cmake -DANDROID_TOOLCHAIN_NAME="arm-linux-androideabi-4.4.3" -DANDROID_STL=stlport_static -DANDROID_STL_FORCE_FEATURES=OFF -DBUILD_ANDROID_SERVICE=ON -DANDROID_SOURCE_TREE=~/Projects/AndroidSource/ServiceStub/ $@ ../..
diff --git a/platforms/scripts/cmake_android_x86.sh b/platforms/scripts/cmake_android_x86.sh
new file mode 100755 (executable)
index 0000000..8fb8abd
--- /dev/null
@@ -0,0 +1,8 @@
+#!/bin/sh
+
+cd `dirname $0`/..
+
+mkdir -p build_android_x86
+cd build_android_x86
+
+cmake -DANDROID_ABI=x86 -DCMAKE_TOOLCHAIN_FILE=../android/android.toolchain.cmake $@ ../..
diff --git a/platforms/scripts/cmake_arm_gnueabi_hardfp.sh b/platforms/scripts/cmake_arm_gnueabi_hardfp.sh
new file mode 100755 (executable)
index 0000000..1fce4f9
--- /dev/null
@@ -0,0 +1,7 @@
+#!/bin/sh
+cd `dirname $0`/..
+
+mkdir -p build_linux_arm_hardfp
+cd build_linux_arm_hardfp
+
+cmake -DCMAKE_TOOLCHAIN_FILE=../linux/arm-gnueabi.toolchain.cmake $@ ../..
diff --git a/platforms/scripts/cmake_arm_gnueabi_softfp.sh b/platforms/scripts/cmake_arm_gnueabi_softfp.sh
new file mode 100755 (executable)
index 0000000..7343489
--- /dev/null
@@ -0,0 +1,7 @@
+#!/bin/sh
+cd `dirname $0`/..
+
+mkdir -p build_linux_arm_softfp
+cd build_linux_arm_softfp
+
+cmake -DSOFTFP=ON -DCMAKE_TOOLCHAIN_FILE=../linux/arm-gnueabi.toolchain.cmake $@ ../..
index 2372773..b06b2cc 100644 (file)
@@ -215,9 +215,9 @@ public class FdActivity extends Activity implements CvCameraViewListener2 {
         else if (item == mItemFace20)
             setMinFaceSize(0.2f);
         else if (item == mItemType) {
-            mDetectorType = (mDetectorType + 1) % mDetectorName.length;
-            item.setTitle(mDetectorName[mDetectorType]);
-            setDetectorType(mDetectorType);
+            int tmpDetectorType = (mDetectorType + 1) % mDetectorName.length;
+            item.setTitle(mDetectorName[tmpDetectorType]);
+            setDetectorType(tmpDetectorType);
         }
         return true;
     }
index 09687f3..44aadfe 100644 (file)
@@ -1,75 +1,61 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>\r
-<?fileVersion 4.0.0?>\r
-\r
-<cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">\r
-       <storageModule moduleId="org.eclipse.cdt.core.settings">\r
-               <cconfiguration id="0.129633445">\r
-                       <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="0.129633445" moduleId="org.eclipse.cdt.core.settings" name="Default">\r
-                               <externalSettings/>\r
-                               <extensions>\r
-                                       <extension id="org.eclipse.cdt.core.VCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>\r
-                                       <extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>\r
-                                       <extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>\r
-                                       <extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>\r
-                                       <extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>\r
-                                       <extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>\r
-                               </extensions>\r
-                       </storageModule>\r
-                       <storageModule moduleId="cdtBuildSystem" version="4.0.0">\r
-                               <configuration artifactName="${ProjName}" buildProperties="" description="" id="0.129633445" name="Default" parent="org.eclipse.cdt.build.core.prefbase.cfg">\r
-                                       <folderInfo id="0.129633445." name="/" resourcePath="">\r
-                                               <toolChain id="org.eclipse.cdt.build.core.prefbase.toolchain.2006441180" name="No ToolChain" resourceTypeBasedDiscovery="false" superClass="org.eclipse.cdt.build.core.prefbase.toolchain">\r
-                                                       <targetPlatform id="org.eclipse.cdt.build.core.prefbase.toolchain.2006441180.527973180" name=""/>\r
-                                                       <builder autoBuildTarget="" command="${NDKROOT}/ndk-build.cmd" enableAutoBuild="true" enableCleanBuild="false" id="org.eclipse.cdt.build.core.settings.default.builder.180541221" incrementalBuildTarget="" keepEnvironmentInBuildfile="false" managedBuildOn="false" name="Gnu Make Builder" superClass="org.eclipse.cdt.build.core.settings.default.builder"/>\r
-                                                       <tool id="org.eclipse.cdt.build.core.settings.holder.libs.791069665" name="holder for library settings" superClass="org.eclipse.cdt.build.core.settings.holder.libs"/>\r
-                                                       <tool id="org.eclipse.cdt.build.core.settings.holder.1894181736" name="Assembly" superClass="org.eclipse.cdt.build.core.settings.holder">\r
-                                                               <inputType id="org.eclipse.cdt.build.core.settings.holder.inType.588929884" languageId="org.eclipse.cdt.core.assembly" languageName="Assembly" sourceContentType="org.eclipse.cdt.core.asmSource" superClass="org.eclipse.cdt.build.core.settings.holder.inType"/>\r
-                                                       </tool>\r
-                                                       <tool id="org.eclipse.cdt.build.core.settings.holder.303359177" name="GNU C++" superClass="org.eclipse.cdt.build.core.settings.holder">\r
-                                                               <option id="org.eclipse.cdt.build.core.settings.holder.incpaths.373249505" name="Include Paths" superClass="org.eclipse.cdt.build.core.settings.holder.incpaths" valueType="includePath">\r
-                                                                       <listOptionValue builtIn="false" value="&quot;${NDKROOT}/platforms/android-9/arch-arm/usr/include&quot;"/>\r
-                                                                       <listOptionValue builtIn="false" value="&quot;${NDKROOT}/sources/cxx-stl/gnu-libstdc++/4.6/include&quot;"/>\r
-                                                                       <listOptionValue builtIn="false" value="&quot;${NDKROOT}/sources/cxx-stl/gnu-libstdc++/4.6/libs/armeabi-v7a/include&quot;"/>\r
-                                                                       <listOptionValue builtIn="false" value="&quot;${ProjDirPath}/../../sdk/native/jni/include&quot;"/>\r
-                                                               </option>\r
-                                                               <option id="org.eclipse.cdt.build.core.settings.holder.symbols.1424359063" name="Symbols" superClass="org.eclipse.cdt.build.core.settings.holder.symbols" valueType="definedSymbols">\r
-                                                                       <listOptionValue builtIn="false" value="ANDROID=1"/>\r
-                                                               </option>\r
-                                                               <inputType id="org.eclipse.cdt.build.core.settings.holder.inType.360067880" languageId="org.eclipse.cdt.core.g++" languageName="GNU C++" sourceContentType="org.eclipse.cdt.core.cxxSource,org.eclipse.cdt.core.cxxHeader" superClass="org.eclipse.cdt.build.core.settings.holder.inType"/>\r
-                                                       </tool>\r
-                                                       <tool id="org.eclipse.cdt.build.core.settings.holder.1156172258" name="GNU C" superClass="org.eclipse.cdt.build.core.settings.holder">\r
-                                                               <option id="org.eclipse.cdt.build.core.settings.holder.incpaths.149918263" name="Include Paths" superClass="org.eclipse.cdt.build.core.settings.holder.incpaths" valueType="includePath">\r
-                                                                       <listOptionValue builtIn="false" value="&quot;${NDKROOT}/platforms/android-9/arch-arm/usr/include&quot;"/>\r
-                                                                       <listOptionValue builtIn="false" value="&quot;${NDKROOT}/sources/cxx-stl/gnu-libstdc++/4.6/include&quot;"/>\r
-                                                                       <listOptionValue builtIn="false" value="&quot;${NDKROOT}/sources/cxx-stl/gnu-libstdc++/4.6/libs/armeabi-v7a/include&quot;"/>\r
-                                                                       <listOptionValue builtIn="false" value="&quot;${ProjDirPath}/../../sdk/native/jni/include&quot;"/>\r
-                                                               </option>\r
-                                                               <option id="org.eclipse.cdt.build.core.settings.holder.symbols.719752707" name="Symbols" superClass="org.eclipse.cdt.build.core.settings.holder.symbols" valueType="definedSymbols">\r
-                                                                       <listOptionValue builtIn="false" value="ANDROID=1"/>\r
-                                                               </option>\r
-                                                               <inputType id="org.eclipse.cdt.build.core.settings.holder.inType.232493949" languageId="org.eclipse.cdt.core.gcc" languageName="GNU C" sourceContentType="org.eclipse.cdt.core.cSource,org.eclipse.cdt.core.cHeader" superClass="org.eclipse.cdt.build.core.settings.holder.inType"/>\r
-                                                       </tool>\r
-                                               </toolChain>\r
-                                       </folderInfo>\r
-                                       <sourceEntries>\r
-                                               <entry flags="VALUE_WORKSPACE_PATH" kind="sourcePath" name="jni"/>\r
-                                       </sourceEntries>\r
-                               </configuration>\r
-                       </storageModule>\r
-                       <storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>\r
-               </cconfiguration>\r
-       </storageModule>\r
-       <storageModule moduleId="cdtBuildSystem" version="4.0.0">\r
-               <project id="OpenCV Sample - face-detection.null.1639518055" name="OpenCV Sample - face-detection"/>\r
-       </storageModule>\r
-       <storageModule moduleId="scannerConfiguration">\r
-               <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>\r
-               <scannerConfigBuildInfo instanceId="0.129633445">\r
-                       <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>\r
-               </scannerConfigBuildInfo>\r
-       </storageModule>\r
-       <storageModule moduleId="refreshScope" versionNumber="1">\r
-               <resource resourceType="PROJECT" workspacePath="/OpenCV Sample - face-detection"/>\r
-       </storageModule>\r
-       <storageModule moduleId="org.eclipse.cdt.internal.ui.text.commentOwnerProjectMappings"/>\r
-</cproject>\r
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<?fileVersion 4.0.0?>
+
+<cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
+       <storageModule moduleId="org.eclipse.cdt.core.settings">
+               <cconfiguration id="0.882924228">
+                       <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="0.882924228" moduleId="org.eclipse.cdt.core.settings" name="Default">
+                               <externalSettings/>
+                               <extensions>
+                                       <extension id="org.eclipse.cdt.core.VCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+                                       <extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+                                       <extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
+                                       <extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+                                       <extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+                                       <extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+                               </extensions>
+                       </storageModule>
+                       <storageModule moduleId="cdtBuildSystem" version="4.0.0">
+                               <configuration artifactName="${ProjName}" buildProperties="" description="" id="0.882924228" name="Default" parent="org.eclipse.cdt.build.core.prefbase.cfg">
+                                       <folderInfo id="0.882924228." name="/" resourcePath="">
+                                               <toolChain id="org.eclipse.cdt.build.core.prefbase.toolchain.1667980868" name="No ToolChain" resourceTypeBasedDiscovery="false" superClass="org.eclipse.cdt.build.core.prefbase.toolchain">
+                                                       <targetPlatform id="org.eclipse.cdt.build.core.prefbase.toolchain.1667980868.2108168132" name=""/>
+                                                       <builder autoBuildTarget="" command="&quot;${NDKROOT}/ndk-build.cmd&quot;" enableAutoBuild="true" enableCleanBuild="false" id="org.eclipse.cdt.build.core.settings.default.builder.328915772" incrementalBuildTarget="" keepEnvironmentInBuildfile="false" managedBuildOn="false" name="Gnu Make Builder" superClass="org.eclipse.cdt.build.core.settings.default.builder"/>
+                                                       <tool id="org.eclipse.cdt.build.core.settings.holder.libs.630148311" name="holder for library settings" superClass="org.eclipse.cdt.build.core.settings.holder.libs"/>
+                                                       <tool id="org.eclipse.cdt.build.core.settings.holder.525090327" name="Assembly" superClass="org.eclipse.cdt.build.core.settings.holder">
+                                                               <inputType id="org.eclipse.cdt.build.core.settings.holder.inType.1491216279" languageId="org.eclipse.cdt.core.assembly" languageName="Assembly" sourceContentType="org.eclipse.cdt.core.asmSource" superClass="org.eclipse.cdt.build.core.settings.holder.inType"/>
+                                                       </tool>
+                                                       <tool id="org.eclipse.cdt.build.core.settings.holder.1242729366" name="GNU C++" superClass="org.eclipse.cdt.build.core.settings.holder">
+                                                               <option id="org.eclipse.cdt.build.core.settings.holder.incpaths.881377735" name="Include Paths" superClass="org.eclipse.cdt.build.core.settings.holder.incpaths" valueType="includePath">
+                                                                       <listOptionValue builtIn="false" value="&quot;${NDKROOT}/platforms/android-9/arch-arm/usr/include&quot;"/>
+                                                                       <listOptionValue builtIn="false" value="&quot;${NDKROOT}/sources/cxx-stl/gnu-libstdc++/4.6/include&quot;"/>
+                                                                       <listOptionValue builtIn="false" value="&quot;${NDKROOT}/sources/cxx-stl/gnu-libstdc++/4.6/libs/armeabi-v7a/include&quot;"/>
+                                                                       <listOptionValue builtIn="false" value="&quot;${ProjDirPath}/../../sdk/native/jni/include&quot;"/>
+                                                                       <listOptionValue builtIn="false" value="&quot;${NDKROOT}/sources/android/native_app_glue&quot;"/>
+                                                               </option>
+                                                               <inputType id="org.eclipse.cdt.build.core.settings.holder.inType.273216997" languageId="org.eclipse.cdt.core.g++" languageName="GNU C++" sourceContentType="org.eclipse.cdt.core.cxxSource,org.eclipse.cdt.core.cxxHeader" superClass="org.eclipse.cdt.build.core.settings.holder.inType"/>
+                                                       </tool>
+                                                       <tool id="org.eclipse.cdt.build.core.settings.holder.1779128177" name="GNU C" superClass="org.eclipse.cdt.build.core.settings.holder">
+                                                               <inputType id="org.eclipse.cdt.build.core.settings.holder.inType.1778510041" languageId="org.eclipse.cdt.core.gcc" languageName="GNU C" sourceContentType="org.eclipse.cdt.core.cSource,org.eclipse.cdt.core.cHeader" superClass="org.eclipse.cdt.build.core.settings.holder.inType"/>
+                                                       </tool>
+                                               </toolChain>
+                                       </folderInfo>
+                               </configuration>
+                       </storageModule>
+                       <storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
+               </cconfiguration>
+       </storageModule>
+       <storageModule moduleId="cdtBuildSystem" version="4.0.0">
+               <project id="CvNativeActivity.null.708321898" name="CvNativeActivity"/>
+       </storageModule>
+       <storageModule moduleId="scannerConfiguration">
+               <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
+               <scannerConfigBuildInfo instanceId="0.882924228">
+                       <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
+               </scannerConfigBuildInfo>
+       </storageModule>
+       <storageModule moduleId="refreshScope" versionNumber="1">
+               <resource resourceType="PROJECT" workspacePath="/CvNativeActivity"/>
+       </storageModule>
+       <storageModule moduleId="org.eclipse.cdt.internal.ui.text.commentOwnerProjectMappings"/>
+</cproject>
index cf0823c..c20be83 100644 (file)
@@ -6,6 +6,64 @@
        </projects>
        <buildSpec>
                <buildCommand>
+                       <name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
+                       <triggers>auto,full,incremental,</triggers>
+                       <arguments>
+                               <dictionary>
+                                       <key>?name?</key>
+                                       <value></value>
+                               </dictionary>
+                               <dictionary>
+                                       <key>org.eclipse.cdt.make.core.append_environment</key>
+                                       <value>true</value>
+                               </dictionary>
+                               <dictionary>
+                                       <key>org.eclipse.cdt.make.core.autoBuildTarget</key>
+                                       <value></value>
+                               </dictionary>
+                               <dictionary>
+                                       <key>org.eclipse.cdt.make.core.buildArguments</key>
+                                       <value></value>
+                               </dictionary>
+                               <dictionary>
+                                       <key>org.eclipse.cdt.make.core.buildCommand</key>
+                                       <value>&quot;${NDKROOT}/ndk-build.cmd&quot;</value>
+                               </dictionary>
+                               <dictionary>
+                                       <key>org.eclipse.cdt.make.core.cleanBuildTarget</key>
+                                       <value>clean</value>
+                               </dictionary>
+                               <dictionary>
+                                       <key>org.eclipse.cdt.make.core.contents</key>
+                                       <value>org.eclipse.cdt.make.core.activeConfigSettings</value>
+                               </dictionary>
+                               <dictionary>
+                                       <key>org.eclipse.cdt.make.core.enableAutoBuild</key>
+                                       <value>true</value>
+                               </dictionary>
+                               <dictionary>
+                                       <key>org.eclipse.cdt.make.core.enableCleanBuild</key>
+                                       <value>false</value>
+                               </dictionary>
+                               <dictionary>
+                                       <key>org.eclipse.cdt.make.core.enableFullBuild</key>
+                                       <value>true</value>
+                               </dictionary>
+                               <dictionary>
+                                       <key>org.eclipse.cdt.make.core.fullBuildTarget</key>
+                                       <value></value>
+                               </dictionary>
+                               <dictionary>
+                                       <key>org.eclipse.cdt.make.core.stopOnError</key>
+                                       <value>true</value>
+                               </dictionary>
+                               <dictionary>
+                                       <key>org.eclipse.cdt.make.core.useDefaultBuildCmd</key>
+                                       <value>false</value>
+                               </dictionary>
+                       </arguments>
+               </buildCommand>
+               <buildCommand>
                        <name>com.android.ide.eclipse.adt.ResourceManagerBuilder</name>
                        <arguments>
                        </arguments>
                        <arguments>
                        </arguments>
                </buildCommand>
+               <buildCommand>
+                       <name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
+                       <triggers>full,incremental,</triggers>
+                       <arguments>
+                       </arguments>
+               </buildCommand>
        </buildSpec>
        <natures>
                <nature>com.android.ide.eclipse.adt.AndroidNature</nature>
                <nature>org.eclipse.jdt.core.javanature</nature>
+               <nature>org.eclipse.cdt.core.cnature</nature>
+               <nature>org.eclipse.cdt.core.ccnature</nature>
+               <nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
+               <nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
        </natures>
 </projectDescription>
index 66bc006..5cfb3a9 100644 (file)
@@ -9,7 +9,6 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <math.h>
-#include <float.h>
 #include <queue>
 
 #include <opencv2/core/core.hpp>
@@ -60,7 +59,7 @@ static cv::Size calc_optimal_camera_resolution(const char* supported, int width,
             }
         }
 
-        idx++; // to skip coma symbol
+        idx++; // to skip comma symbol
 
     } while(supported[idx-1] != '\0');
 
@@ -86,9 +85,9 @@ static void engine_draw_frame(Engine* engine, const cv::Mat& frame)
 
     for (int yy = top_indent; yy < std::min(frame.rows+top_indent, buffer.height); yy++)
     {
-        unsigned char* line = (unsigned char*)pixels;
-        memcpy(line+left_indent*4*sizeof(unsigned char), frame.ptr<unsigned char>(yy),
-               std::min(frame.cols, buffer.width)*4*sizeof(unsigned char));
+        unsigned char* line = (unsigned char*)pixels + left_indent*4*sizeof(unsigned char);
+        size_t line_size = std::min(frame.cols, buffer.width)*4*sizeof(unsigned char);
+        memcpy(line, frame.ptr<unsigned char>(yy), line_size);
         // go to next line
         pixels = (int32_t*)pixels + buffer.stride;
     }
@@ -139,7 +138,7 @@ static void engine_handle_cmd(android_app* app, int32_t cmd)
                     return;
                 }
 
-                LOGI("Camera initialized at resoution %dx%d", camera_resolution.width, camera_resolution.height);
+                LOGI("Camera initialized at resolution %dx%d", camera_resolution.width, camera_resolution.height);
             }
             break;
         case APP_CMD_TERM_WINDOW:
@@ -157,7 +156,8 @@ void android_main(android_app* app)
     // Make sure glue isn't stripped.
     app_dummy();
 
-    memset(&engine, 0, sizeof(engine));
+    size_t engine_size = sizeof(engine); // for Eclipse CDT parser
+    memset((void*)&engine, 0, engine_size);
     app->userData = &engine;
     app->onAppCmd = engine_handle_cmd;
     engine.app = app;
index f4cde9b..ead7fd7 100644 (file)
@@ -70,7 +70,7 @@ int main( int argc, char** argv )
   std::vector< DMatch > good_matches;
 
   for( int i = 0; i < descriptors_1.rows; i++ )
-  { if( matches[i].distance < 2*min_dist )
+  { if( matches[i].distance <= 2*min_dist )
     { good_matches.push_back( matches[i]); }
   }
 
index a77d336..6963e75 100644 (file)
@@ -1,15 +1,10 @@
 #include <iostream>
 #include <string>
 
-#include "opencv2/opencv_modules.hpp"
 #include "opencv2/core/core.hpp"
 #include "opencv2/gpu/gpu.hpp"
 #include "opencv2/highgui/highgui.hpp"
 
-#ifdef HAVE_OPENCV_NONFREE
-#include "opencv2/nonfree/gpu.hpp"
-#endif
-
 using namespace std;
 using namespace cv;
 using namespace cv::gpu;
@@ -19,9 +14,6 @@ enum Method
     FGD_STAT,
     MOG,
     MOG2,
-#ifdef HAVE_OPENCV_NONFREE
-    VIBE,
-#endif
     GMG
 };
 
@@ -30,7 +22,7 @@ int main(int argc, const char** argv)
     cv::CommandLineParser cmd(argc, argv,
         "{ c | camera | false       | use camera }"
         "{ f | file   | 768x576.avi | input video file }"
-        "{ m | method | mog         | method (fgd, mog, mog2, vibe, gmg) }"
+        "{ m | method | mog         | method (fgd, mog, mog2, gmg) }"
         "{ h | help   | false       | print help message }");
 
     if (cmd.get<bool>("help"))
@@ -48,9 +40,6 @@ int main(int argc, const char** argv)
     if (method != "fgd"
         && method != "mog"
         && method != "mog2"
-    #ifdef HAVE_OPENCV_NONFREE
-        && method != "vibe"
-    #endif
         && method != "gmg")
     {
         cerr << "Incorrect method" << endl;
@@ -60,9 +49,6 @@ int main(int argc, const char** argv)
     Method m = method == "fgd" ? FGD_STAT :
                method == "mog" ? MOG :
                method == "mog2" ? MOG2 :
-            #ifdef HAVE_OPENCV_NONFREE
-               method == "vibe" ? VIBE :
-            #endif
                                   GMG;
 
     VideoCapture cap;
@@ -86,9 +72,6 @@ int main(int argc, const char** argv)
     FGDStatModel fgd_stat;
     MOG_GPU mog;
     MOG2_GPU mog2;
-#ifdef HAVE_OPENCV_NONFREE
-    VIBE_GPU vibe;
-#endif
     GMG_GPU gmg;
     gmg.numInitializationFrames = 40;
 
@@ -114,12 +97,6 @@ int main(int argc, const char** argv)
         mog2(d_frame, d_fgmask);
         break;
 
-#ifdef HAVE_OPENCV_NONFREE
-    case VIBE:
-        vibe.initialize(d_frame);
-        break;
-#endif
-
     case GMG:
         gmg.initialize(d_frame.size());
         break;
@@ -128,11 +105,7 @@ int main(int argc, const char** argv)
     namedWindow("image", WINDOW_NORMAL);
     namedWindow("foreground mask", WINDOW_NORMAL);
     namedWindow("foreground image", WINDOW_NORMAL);
-    if (m != GMG
-    #ifdef HAVE_OPENCV_NONFREE
-        && m != VIBE
-    #endif
-        )
+    if (m != GMG)
     {
         namedWindow("mean background image", WINDOW_NORMAL);
     }
@@ -165,12 +138,6 @@ int main(int argc, const char** argv)
             mog2.getBackgroundImage(d_bgimg);
             break;
 
-#ifdef HAVE_OPENCV_NONFREE
-        case VIBE:
-            vibe(d_frame, d_fgmask);
-            break;
-#endif
-
         case GMG:
             gmg(d_frame, d_fgmask);
             break;
index 99c95ab..98195b3 100644 (file)
@@ -17,12 +17,21 @@ using namespace std;
 using namespace cv;
 
 
-#if !defined(HAVE_CUDA)
+#if !defined(HAVE_CUDA) || defined(__arm__)
+
 int main( int, const char** )
 {
-    cout << "Please compile the library with CUDA support" << endl;
-    return -1;
+#if !defined(HAVE_CUDA)
+    std::cout << "CUDA support is required (CMake key 'WITH_CUDA' must be true)." << std::endl;
+#endif
+
+#if defined(__arm__)
+    std::cout << "Unsupported for ARM CUDA library." << std::endl;
+#endif
+
+    return 0;
 }
+
 #else
 
 
index 2d743f0..c829830 100644 (file)
@@ -11,7 +11,7 @@
 #include "opencv2/core/core.hpp"
 #include "opencv2/gpu/gpu.hpp"
 
-#if !defined(HAVE_CUDA) || !defined(HAVE_TBB)
+#if !defined(HAVE_CUDA) || !defined(HAVE_TBB) || defined(__arm__)
 
 int main()
 {
@@ -23,6 +23,10 @@ int main()
     std::cout << "TBB support is required (CMake key 'WITH_TBB' must be true).\n";
 #endif
 
+#if defined(__arm__)
+    std::cout << "Unsupported for ARM CUDA library." << std::endl;
+#endif
+
     return 0;
 }
 
index 10c3974..d4d0af4 100644 (file)
@@ -13,7 +13,7 @@
 #include "opencv2/highgui/highgui.hpp"
 #include "opencv2/gpu/gpu.hpp"
 
-#if !defined(HAVE_CUDA) || !defined(HAVE_TBB)
+#if !defined(HAVE_CUDA) || !defined(HAVE_TBB) || defined(__arm__)
 
 int main()
 {
@@ -25,6 +25,10 @@ int main()
     std::cout << "TBB support is required (CMake key 'WITH_TBB' must be true).\n";
 #endif
 
+#if defined(__arm__)
+    std::cout << "Unsupported for ARM CUDA library." << std::endl;
+#endif
+
     return 0;
 }
 
diff --git a/samples/ocl/aloe-L.png b/samples/ocl/aloe-L.png
deleted file mode 100644 (file)
index 4758766..0000000
Binary files a/samples/ocl/aloe-L.png and /dev/null differ
diff --git a/samples/ocl/aloe-R.png b/samples/ocl/aloe-R.png
deleted file mode 100644 (file)
index 5d11c57..0000000
Binary files a/samples/ocl/aloe-R.png and /dev/null differ
diff --git a/samples/ocl/aloe-disp.png b/samples/ocl/aloe-disp.png
deleted file mode 100644 (file)
index dd4a499..0000000
Binary files a/samples/ocl/aloe-disp.png and /dev/null differ
index ec79339..684c2d9 100644 (file)
@@ -1,5 +1,3 @@
-//This sample is inherited from facedetect.cpp in smaple/c
-
 #include "opencv2/objdetect/objdetect.hpp"
 #include "opencv2/highgui/highgui.hpp"
 #include "opencv2/imgproc/imgproc.hpp"
@@ -9,78 +7,84 @@
 
 using namespace std;
 using namespace cv;
+#define LOOP_NUM 10 
+
+const static Scalar colors[] =  { CV_RGB(0,0,255),
+        CV_RGB(0,128,255),
+        CV_RGB(0,255,255),
+        CV_RGB(0,255,0),
+        CV_RGB(255,128,0),
+        CV_RGB(255,255,0),
+        CV_RGB(255,0,0),
+        CV_RGB(255,0,255)} ;
 
-static void help()
+int64 work_begin = 0;
+int64 work_end = 0;
+
+static void workBegin() 
+{ 
+    work_begin = getTickCount();
+}
+static void workEnd()
 {
-    cout << "\nThis program demonstrates the cascade recognizer.\n"
-        "This classifier can recognize many ~rigid objects, it's most known use is for faces.\n"
-        "Usage:\n"
-        "./facedetect [--cascade=<cascade_path> this is the primary trained classifier such as frontal face]\n"
-        "   [--scale=<image scale greater or equal to 1, try 1.3 for example>\n"
-        "   [filename|camera_index]\n\n"
-        "see facedetect.cmd for one call:\n"
-        "./facedetect --cascade=\"../../data/haarcascades/haarcascade_frontalface_alt.xml\" --scale=1.3 \n"
-        "Hit any key to quit.\n"
-        "Using OpenCV version " << CV_VERSION << "\n" << endl;
+    work_end += (getTickCount() - work_begin);
 }
-struct getRect { Rect operator ()(const CvAvgComp& e) const { return e.rect; } };
-void detectAndDraw( Mat& img,
-    cv::ocl::OclCascadeClassifier& cascade, CascadeClassifier& nestedCascade,
-    double scale);
+static double getTime(){
+    return work_end /((double)cvGetTickFrequency() * 1000.);
+}
+
+void detect( Mat& img, vector<Rect>& faces, 
+    cv::ocl::OclCascadeClassifierBuf& cascade, 
+    double scale, bool calTime);
 
-String cascadeName = "../../../data/haarcascades/haarcascade_frontalface_alt.xml";
+void detectCPU( Mat& img, vector<Rect>& faces, 
+    CascadeClassifier& cascade, 
+    double scale, bool calTime);
+
+void Draw(Mat& img, vector<Rect>& faces, double scale);
+
+// This function test if gpu_rst matches cpu_rst.
+// If the two vectors are not equal, it will return the difference in vector size
+// Else if will return (total diff of each cpu and gpu rects covered pixels)/(total cpu rects covered pixels)
+double checkRectSimilarity(Size sz, std::vector<Rect>& cpu_rst, std::vector<Rect>& gpu_rst);
 
 int main( int argc, const char** argv )
 {
-    CvCapture* capture = 0;
-    Mat frame, frameCopy, image;
-    const String scaleOpt = "--scale=";
-    size_t scaleOptLen = scaleOpt.length();
-    const String cascadeOpt = "--cascade=";
-    size_t cascadeOptLen = cascadeOpt.length();
-    String inputName;
-
-    help();
-    cv::ocl::OclCascadeClassifier cascade;
-    CascadeClassifier  nestedCascade;
-    double scale = 1;
-
-    for( int i = 1; i < argc; i++ )
+    const char* keys =
+        "{ h | help       | false       | print help message }"
+        "{ i | input      |             | specify input image }"
+        "{ t | template   | ../../../data/haarcascades/haarcascade_frontalface_alt.xml  | specify template file }"
+        "{ c | scale      |   1.0       | scale image }"
+        "{ s | use_cpu    | false       | use cpu or gpu to process the image }";
+
+    CommandLineParser cmd(argc, argv, keys);
+    if (cmd.get<bool>("help"))
     {
-        cout << "Processing " << i << " " <<  argv[i] << endl;
-        if( cascadeOpt.compare( 0, cascadeOptLen, argv[i], cascadeOptLen ) == 0 )
-        {
-            cascadeName.assign( argv[i] + cascadeOptLen );
-            cout << "  from which we have cascadeName= " << cascadeName << endl;
-        }
-        else if( scaleOpt.compare( 0, scaleOptLen, argv[i], scaleOptLen ) == 0 )
-        {
-            if( !sscanf( argv[i] + scaleOpt.length(), "%lf", &scale ) || scale < 1 )
-                scale = 1;
-            cout << " from which we read scale = " << scale << endl;
-        }
-        else if( argv[i][0] == '-' )
-        {
-            cerr << "WARNING: Unknown option %s" << argv[i] << endl;
-        }
-        else
-            inputName.assign( argv[i] );
+        cout << "Avaible options:" << endl;
+        cmd.printParams();
+        return 0;
     }
+    CvCapture* capture = 0;
+    Mat frame, frameCopy, image;
 
-    if( !cascade.load( cascadeName ) )
+    bool useCPU = cmd.get<bool>("s");
+    string inputName = cmd.get<string>("i");
+    string cascadeName = cmd.get<string>("t");
+    double scale = cmd.get<double>("c");
+    cv::ocl::OclCascadeClassifierBuf cascade;
+    CascadeClassifier  cpu_cascade;
+
+    if( !cascade.load( cascadeName ) || !cpu_cascade.load(cascadeName) )
     {
         cerr << "ERROR: Could not load classifier cascade" << endl;
-        cerr << "Usage: facedetect [--cascade=<cascade_path>]\n"
-            "   [--scale[=<image scale>\n"
-            "   [filename|camera_index]\n" << endl ;
         return -1;
     }
 
-    if( inputName.empty() || (isdigit(inputName.c_str()[0]) && inputName.c_str()[1] == '\0') )
+    if( inputName.empty() )
     {
-        capture = cvCaptureFromCAM( inputName.empty() ? 0 : inputName.c_str()[0] - '0' );
-        int c = inputName.empty() ? 0 : inputName.c_str()[0] - '0' ;
-        if(!capture) cout << "Capture from CAM " <<  c << " didn't work" << endl;
+        capture = cvCaptureFromCAM(0);
+        if(!capture) 
+            cout << "Capture from CAM 0 didn't work" << endl;
     }
     else if( inputName.size() )
     {
@@ -88,26 +92,30 @@ int main( int argc, const char** argv )
         if( image.empty() )
         {
             capture = cvCaptureFromAVI( inputName.c_str() );
-            if(!capture) cout << "Capture from AVI didn't work" << endl;
+            if(!capture) 
+                cout << "Capture from AVI didn't work" << endl;
+            return -1;
         }
     }
     else
     {
         image = imread( "lena.jpg", 1 );
-        if(image.empty()) cout << "Couldn't read lena.jpg" << endl;
+        if(image.empty()) 
+            cout << "Couldn't read lena.jpg" << endl;
+        return -1;
     }
 
     cvNamedWindow( "result", 1 );
     std::vector<cv::ocl::Info> oclinfo;
     int devnums = cv::ocl::getDevice(oclinfo);
-    if(devnums<1)
+    if( devnums < 1 )
     {
         std::cout << "no device found\n";
         return -1;
     }
     //if you want to use undefault device, set it here
     //setDevice(oclinfo[0]);
-    //setBinpath(CLBINPATH);
+    ocl::setBinpath("./");
     if( capture )
     {
         cout << "In capture ..." << endl;
@@ -115,15 +123,20 @@ int main( int argc, const char** argv )
         {
             IplImage* iplImg = cvQueryFrame( capture );
             frame = iplImg;
+            vector<Rect> faces;
             if( frame.empty() )
                 break;
             if( iplImg->origin == IPL_ORIGIN_TL )
                 frame.copyTo( frameCopy );
             else
                 flip( frame, frameCopy, 0 );
-
-            detectAndDraw( frameCopy, cascade, nestedCascade, scale );
-
+            if(useCPU){
+                detectCPU(frameCopy, faces, cpu_cascade, scale, false);
+            }
+            else{
+                detect(frameCopy, faces, cascade, scale, false);     
+            }
+            Draw(frameCopy, faces, scale);
             if( waitKey( 10 ) >= 0 )
                 goto _cleanup_;
         }
@@ -136,42 +149,34 @@ _cleanup_:
     else
     {
         cout << "In image read" << endl;
-        if( !image.empty() )
-        {
-            detectAndDraw( image, cascade, nestedCascade, scale );
-            waitKey(0);
-        }
-        else if( !inputName.empty() )
+        vector<Rect> faces;
+        vector<Rect> ref_rst;
+        double accuracy = 0.;
+        for(int i = 0; i <= LOOP_NUM;i ++) 
         {
-            /* assume it is a text file containing the
-            list of the image filenames to be processed - one per line */
-            FILE* f = fopen( inputName.c_str(), "rt" );
-            if( f )
+            cout << "loop" << i << endl;
+            if(useCPU){
+                detectCPU(image, faces, cpu_cascade, scale, i==0?false:true);  
+            }
+            else{
+                detect(image, faces, cascade, scale, i==0?false:true);
+                if(i == 0){
+                    detectCPU(image, ref_rst, cpu_cascade, scale, false);
+                    accuracy = checkRectSimilarity(image.size(), ref_rst, faces);
+                }                    
+            }
+            if (i == LOOP_NUM)
             {
-                char buf[1000+1];
-                while( fgets( buf, 1000, f ) )
-                {
-                    int len = (int)strlen(buf), c;
-                    while( len > 0 && isspace(buf[len-1]) )
-                        len--;
-                    buf[len] = '\0';
-                    cout << "file " << buf << endl;
-                    image = imread( buf, 1 );
-                    if( !image.empty() )
-                    {
-                        detectAndDraw( image, cascade, nestedCascade, scale );
-                        c = waitKey(0);
-                        if( c == 27 || c == 'q' || c == 'Q' )
-                            break;
-                    }
-                    else
-                    {
-                        cerr << "Aw snap, couldn't read image " << buf << endl;
-                    }
-                }
-                fclose(f);
+                if (useCPU)
+                    cout << "average CPU time (noCamera) : ";
+                else
+                    cout << "average GPU time (noCamera) : ";
+                cout << getTime() / LOOP_NUM << " ms" << endl;
+                cout << "accuracy value: " << accuracy <<endl;
             }
         }
+        Draw(image, faces, scale);
+        waitKey(0);
     }
 
     cvDestroyWindow("result");
@@ -179,44 +184,44 @@ _cleanup_:
     return 0;
 }
 
-void detectAndDraw( Mat& img,
-    cv::ocl::OclCascadeClassifier& cascade, CascadeClassifier&,
-    double scale)
+void detect( Mat& img, vector<Rect>& faces, 
+    cv::ocl::OclCascadeClassifierBuf& cascade, 
+    double scale, bool calTime)
 {
-    int i = 0;
-    double t = 0;
-    vector<Rect> faces;
-    const static Scalar colors[] =  { CV_RGB(0,0,255),
-        CV_RGB(0,128,255),
-        CV_RGB(0,255,255),
-        CV_RGB(0,255,0),
-        CV_RGB(255,128,0),
-        CV_RGB(255,255,0),
-        CV_RGB(255,0,0),
-        CV_RGB(255,0,255)} ;
     cv::ocl::oclMat image(img);
     cv::ocl::oclMat gray, smallImg( cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 );
-
+    if(calTime) workBegin();
     cv::ocl::cvtColor( image, gray, CV_BGR2GRAY );
     cv::ocl::resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
     cv::ocl::equalizeHist( smallImg, smallImg );
 
-    CvSeq* _objects;
-    MemStorage storage(cvCreateMemStorage(0));
-    t = (double)cvGetTickCount();
-    _objects = cascade.oclHaarDetectObjects( smallImg, storage, 1.1,
+    cascade.detectMultiScale( smallImg, faces, 1.1,
         3, 0
         |CV_HAAR_SCALE_IMAGE
         , Size(30,30), Size(0, 0) );
-    vector<CvAvgComp> vecAvgComp;
-    Seq<CvAvgComp>(_objects).copyTo(vecAvgComp);
-    faces.resize(vecAvgComp.size());
-    std::transform(vecAvgComp.begin(), vecAvgComp.end(), faces.begin(), getRect());
-    t = (double)cvGetTickCount() - t;
-    printf( "detection time = %g ms\n", t/((double)cvGetTickFrequency()*1000.) );
+    if(calTime) workEnd();
+}
+
+void detectCPU( Mat& img, vector<Rect>& faces, 
+    CascadeClassifier& cascade, 
+    double scale, bool calTime)
+{
+    if(calTime) workBegin();
+    Mat cpu_gray, cpu_smallImg( cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 );
+    cvtColor(img, cpu_gray, CV_BGR2GRAY);
+    resize(cpu_gray, cpu_smallImg, cpu_smallImg.size(), 0, 0, INTER_LINEAR);
+    equalizeHist(cpu_smallImg, cpu_smallImg);
+    cascade.detectMultiScale(cpu_smallImg, faces, 1.1,
+        3, 0 | CV_HAAR_SCALE_IMAGE,
+        Size(30, 30), Size(0, 0));
+    if(calTime) workEnd(); 
+}
+
+void Draw(Mat& img, vector<Rect>& faces, double scale)
+{
+    int i = 0;
     for( vector<Rect>::const_iterator r = faces.begin(); r != faces.end(); r++, i++ )
     {
-        Mat smallImgROI;
         Point center;
         Scalar color = colors[i%8];
         int radius;
@@ -227,3 +232,42 @@ void detectAndDraw( Mat& img,
     }
     cv::imshow( "result", img );
 }
+
+double checkRectSimilarity(Size sz, std::vector<Rect>& ob1, std::vector<Rect>& ob2)
+{
+    double final_test_result = 0.0;
+    size_t sz1 = ob1.size();
+    size_t sz2 = ob2.size();
+
+    if(sz1 != sz2)
+        return sz1 > sz2 ? (double)(sz1 - sz2) : (double)(sz2 - sz1);
+    else
+    {
+        cv::Mat cpu_result(sz, CV_8UC1);
+        cpu_result.setTo(0);
+
+        for(vector<Rect>::const_iterator r = ob1.begin(); r != ob1.end(); r++)
+        {      
+            cv::Mat cpu_result_roi(cpu_result, *r);
+            cpu_result_roi.setTo(1);
+            cpu_result.copyTo(cpu_result);
+        }
+        int cpu_area = cv::countNonZero(cpu_result > 0);
+
+        cv::Mat gpu_result(sz, CV_8UC1);
+        gpu_result.setTo(0);
+        for(vector<Rect>::const_iterator r2 = ob2.begin(); r2 != ob2.end(); r2++)
+        {
+            cv::Mat gpu_result_roi(gpu_result, *r2);
+            gpu_result_roi.setTo(1);
+            gpu_result.copyTo(gpu_result);
+        }
+
+        cv::Mat result_;
+        multiply(cpu_result, gpu_result, result_);
+        int result = cv::countNonZero(result_ > 0);
+
+        final_test_result = 1.0 - (double)result/(double)cpu_area;
+    }
+    return final_test_result;
+}
index 76b6d28..28be6fa 100644 (file)
@@ -45,7 +45,6 @@ public:
     bool gamma_corr;
 };
 
-
 class App
 {
 public:
@@ -64,6 +63,13 @@ public:
 
     string message() const;
 
+// This function test if gpu_rst matches cpu_rst.
+// If the two vectors are not equal, it will return the difference in vector size
+// Else if will return 
+// (total diff of each cpu and gpu rects covered pixels)/(total cpu rects covered pixels)
+    double checkRectSimilarity(Size sz, 
+                               std::vector<Rect>& cpu_rst, 
+                               std::vector<Rect>& gpu_rst);
 private:
     App operator=(App&);
 
@@ -290,6 +296,7 @@ void App::run()
         ocl::oclMat gpu_img;
 
         // Iterate over all frames
+        bool verify = false;
         while (running && !frame.empty())
         {
             workBegin();
@@ -316,7 +323,18 @@ void App::run()
                 gpu_img.upload(img);
                 gpu_hog.detectMultiScale(gpu_img, found, hit_threshold, win_stride,
                                          Size(0, 0), scale, gr_threshold);
-            }
+                if (!verify)
+                {
+                    // verify if GPU output same objects with CPU at 1st run
+                    verify = true;
+                    vector<Rect> ref_rst;
+                    cvtColor(img, img, CV_BGRA2BGR);
+                    cpu_hog.detectMultiScale(img, ref_rst, hit_threshold, win_stride,
+                                              Size(0, 0), scale, gr_threshold-2);
+                    double accuracy = checkRectSimilarity(img.size(), ref_rst, found);
+                    cout << "\naccuracy value: " << accuracy << endl;           
+                } 
+           }
             else cpu_hog.detectMultiScale(img, found, hit_threshold, win_stride,
                                           Size(0, 0), scale, gr_threshold);
             hogWorkEnd();
@@ -457,3 +475,45 @@ inline string App::workFps() const
     return ss.str();
 }
 
+double App::checkRectSimilarity(Size sz, 
+                                std::vector<Rect>& ob1, 
+                                std::vector<Rect>& ob2)
+{
+    double final_test_result = 0.0;
+    size_t sz1 = ob1.size();
+    size_t sz2 = ob2.size();
+
+    if(sz1 != sz2)
+        return sz1 > sz2 ? (double)(sz1 - sz2) : (double)(sz2 - sz1);
+    else
+    {
+        cv::Mat cpu_result(sz, CV_8UC1);
+        cpu_result.setTo(0);
+
+        for(vector<Rect>::const_iterator r = ob1.begin(); r != ob1.end(); r++)
+        {      
+            cv::Mat cpu_result_roi(cpu_result, *r);
+            cpu_result_roi.setTo(1);
+            cpu_result.copyTo(cpu_result);
+        }
+        int cpu_area = cv::countNonZero(cpu_result > 0);
+
+        cv::Mat gpu_result(sz, CV_8UC1);
+        gpu_result.setTo(0);
+        for(vector<Rect>::const_iterator r2 = ob2.begin(); r2 != ob2.end(); r2++)
+        {
+            cv::Mat gpu_result_roi(gpu_result, *r2);
+            gpu_result_roi.setTo(1);
+            gpu_result.copyTo(gpu_result);
+        }
+
+        cv::Mat result_;
+        multiply(cpu_result, gpu_result, result_);
+        int result = cv::countNonZero(result_ > 0);
+
+        final_test_result = 1.0 - (double)result/(double)cpu_area;
+    }
+    return final_test_result;
+
+}
+
diff --git a/samples/ocl/pyrlk_optical_flow.cpp b/samples/ocl/pyrlk_optical_flow.cpp
new file mode 100644 (file)
index 0000000..cc8d886
--- /dev/null
@@ -0,0 +1,286 @@
+#include <iostream>
+#include <vector>
+#include <iomanip>
+
+#include "opencv2/highgui/highgui.hpp"
+#include "opencv2/ocl/ocl.hpp"
+#include "opencv2/video/video.hpp"
+
+using namespace std;
+using namespace cv;
+using namespace cv::ocl;
+
+typedef unsigned char uchar;
+#define LOOP_NUM 10 
+int64 work_begin = 0;
+int64 work_end = 0;
+
+static void workBegin() 
+{ 
+    work_begin = getTickCount();
+}
+static void workEnd()
+{
+    work_end += (getTickCount() - work_begin);
+}
+static double getTime(){
+    return work_end * 1000. / getTickFrequency();
+}
+
+static void download(const oclMat& d_mat, vector<Point2f>& vec)
+{
+    vec.clear();
+    vec.resize(d_mat.cols);
+    Mat mat(1, d_mat.cols, CV_32FC2, (void*)&vec[0]);
+    d_mat.download(mat);
+}
+
+static void download(const oclMat& d_mat, vector<uchar>& vec)
+{
+    vec.clear();
+    vec.resize(d_mat.cols);
+    Mat mat(1, d_mat.cols, CV_8UC1, (void*)&vec[0]);
+    d_mat.download(mat);
+}
+
+static void drawArrows(Mat& frame, const vector<Point2f>& prevPts, const vector<Point2f>& nextPts, const vector<uchar>& status, Scalar line_color = Scalar(0, 0, 255))
+{
+    for (size_t i = 0; i < prevPts.size(); ++i)
+    {
+        if (status[i])
+        {
+            int line_thickness = 1;
+
+            Point p = prevPts[i];
+            Point q = nextPts[i];
+
+            double angle = atan2((double) p.y - q.y, (double) p.x - q.x);
+
+            double hypotenuse = sqrt( (double)(p.y - q.y)*(p.y - q.y) + (double)(p.x - q.x)*(p.x - q.x) );
+
+            if (hypotenuse < 1.0)
+                continue;
+
+            // Here we lengthen the arrow by a factor of three.
+            q.x = (int) (p.x - 3 * hypotenuse * cos(angle));
+            q.y = (int) (p.y - 3 * hypotenuse * sin(angle));
+
+            // Now we draw the main line of the arrow.
+            line(frame, p, q, line_color, line_thickness);
+
+            // Now draw the tips of the arrow. I do some scaling so that the
+            // tips look proportional to the main line of the arrow.
+
+            p.x = (int) (q.x + 9 * cos(angle + CV_PI / 4));
+            p.y = (int) (q.y + 9 * sin(angle + CV_PI / 4));
+            line(frame, p, q, line_color, line_thickness);
+
+            p.x = (int) (q.x + 9 * cos(angle - CV_PI / 4));
+            p.y = (int) (q.y + 9 * sin(angle - CV_PI / 4));
+            line(frame, p, q, line_color, line_thickness);
+        }
+    }
+}
+
+
+int main(int argc, const char* argv[])
+{
+    static std::vector<Info> ocl_info;
+    ocl::getDevice(ocl_info);
+    //if you want to use undefault device, set it here
+    setDevice(ocl_info[0]);
+
+    //set this to save kernel compile time from second time you run
+    ocl::setBinpath("./");
+    const char* keys =
+        "{ h            | help           | false | print help message }"
+        "{ l            | left           |       | specify left image }"
+        "{ r            | right          |       | specify right image }"
+        "{ c            | camera         | 0     | enable camera capturing }"
+        "{ s            | use_cpu        | false | use cpu or gpu to process the image }"
+        "{ v            | video          |       | use video as input }"
+        "{ points       | points         | 1000  | specify points count [GoodFeatureToTrack] }"
+        "{ min_dist     | min_dist       | 0     | specify minimal distance between points [GoodFeatureToTrack] }";
+
+    CommandLineParser cmd(argc, argv, keys);
+
+    if (cmd.get<bool>("help"))
+    {
+        cout << "Usage: pyrlk_optical_flow [options]" << endl;
+        cout << "Avaible options:" << endl;
+        cmd.printParams();
+        return 0;
+    }
+
+    bool defaultPicturesFail = false;
+    string fname0 = cmd.get<string>("left");
+    string fname1 = cmd.get<string>("right");
+    string vdofile = cmd.get<string>("video");
+    int points = cmd.get<int>("points");
+    double minDist = cmd.get<double>("min_dist");
+    bool useCPU = cmd.get<bool>("s");
+    bool useCamera = cmd.get<bool>("c");
+    int inputName = cmd.get<int>("c");
+
+    oclMat d_nextPts, d_status;
+    GoodFeaturesToTrackDetector_OCL d_features(points);
+    Mat frame0 = imread(fname0, cv::IMREAD_GRAYSCALE);
+    Mat frame1 = imread(fname1, cv::IMREAD_GRAYSCALE);
+    PyrLKOpticalFlow d_pyrLK;
+    vector<cv::Point2f> pts(points);
+    vector<cv::Point2f> nextPts(points);
+    vector<unsigned char> status(points);
+    vector<float> err;
+
+    if (frame0.empty() || frame1.empty())
+    {
+        useCamera = true;
+        defaultPicturesFail = true;
+        CvCapture* capture = 0;
+        capture = cvCaptureFromCAM( inputName );
+        if (!capture)
+        {
+            cout << "Can't load input images" << endl;
+            return -1;
+        }
+    }
+
+    cout << "Points count : " << points << endl << endl;
+
+    if (useCamera)
+    {
+        CvCapture* capture = 0;
+        Mat frame, frameCopy;
+        Mat frame0Gray, frame1Gray;
+        Mat ptr0, ptr1;
+
+        if(vdofile == "")
+            capture = cvCaptureFromCAM( inputName );
+        else
+            capture = cvCreateFileCapture(vdofile.c_str());
+
+        int c = inputName ;
+        if(!capture)
+        {
+            if(vdofile == "")
+                cout << "Capture from CAM " << c << " didn't work" << endl;
+            else
+                cout << "Capture from file " << vdofile << " failed" <<endl;
+            if (defaultPicturesFail)
+            {
+                return -1;
+            }
+            goto nocamera;
+        }
+
+        cout << "In capture ..." << endl;
+        for(int i = 0;; i++)
+        {
+            frame = cvQueryFrame( capture );
+            if( frame.empty() )
+                break;
+
+            if (i == 0)
+            {
+                frame.copyTo( frame0 );
+                cvtColor(frame0, frame0Gray, COLOR_BGR2GRAY);
+            }
+            else
+            {
+                if (i%2 == 1)
+                {
+                    frame.copyTo(frame1);
+                    cvtColor(frame1, frame1Gray, COLOR_BGR2GRAY);
+                    ptr0 = frame0Gray;
+                    ptr1 = frame1Gray;
+                }
+                else
+                {
+                    frame.copyTo(frame0);
+                    cvtColor(frame0, frame0Gray, COLOR_BGR2GRAY);
+                    ptr0 = frame1Gray;
+                    ptr1 = frame0Gray;
+                }
+
+                if (useCPU)
+                {
+                    pts.clear();
+                    goodFeaturesToTrack(ptr0, pts, points, 0.01, 0.0);
+                    if(pts.size() == 0)
+                        continue;
+                    calcOpticalFlowPyrLK(ptr0, ptr1, pts, nextPts, status, err);
+                }
+                else
+                {
+                    oclMat d_img(ptr0), d_prevPts;
+                    d_features(d_img, d_prevPts);
+                    if(!d_prevPts.rows || !d_prevPts.cols)
+                        continue;
+                    d_pyrLK.sparse(d_img, oclMat(ptr1), d_prevPts, d_nextPts, d_status);
+                    d_features.downloadPoints(d_prevPts,pts);
+                    download(d_nextPts, nextPts);
+                    download(d_status, status);
+                }
+                if (i%2 == 1)
+                    frame1.copyTo(frameCopy);
+                else
+                    frame0.copyTo(frameCopy);
+                drawArrows(frameCopy, pts, nextPts, status, Scalar(255, 0, 0));
+                imshow("PyrLK [Sparse]", frameCopy);
+            }
+
+            if( waitKey( 10 ) >= 0 )
+                goto _cleanup_;
+        }
+
+        waitKey(0);
+
+_cleanup_:
+        cvReleaseCapture( &capture );
+    }
+    else
+    {
+nocamera:
+        for(int i = 0; i <= LOOP_NUM;i ++) 
+        {
+            cout << "loop" << i << endl;
+            if (i > 0) workBegin();     
+
+            if (useCPU)
+            {
+                goodFeaturesToTrack(frame0, pts, points, 0.01, minDist);
+                calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, err);
+            }
+            else
+            {
+                oclMat d_img(frame0), d_prevPts;
+                d_features(d_img, d_prevPts);
+                d_pyrLK.sparse(d_img, oclMat(frame1), d_prevPts, d_nextPts, d_status);
+                d_features.downloadPoints(d_prevPts, pts);
+                download(d_nextPts, nextPts);
+                download(d_status, status);
+            }
+
+            if (i > 0 && i <= LOOP_NUM)
+                workEnd();
+
+            if (i == LOOP_NUM)
+            {
+                if (useCPU)
+                    cout << "average CPU time (noCamera) : ";
+                else
+                    cout << "average GPU time (noCamera) : ";
+
+                cout << getTime() / LOOP_NUM << " ms" << endl;
+
+                drawArrows(frame0, pts, nextPts, status, Scalar(255, 0, 0));
+
+                imshow("PyrLK [Sparse]", frame0);
+            }
+        }
+    }
+
+    waitKey();
+
+    return 0;
+}
diff --git a/samples/ocl/stereo_match.cpp b/samples/ocl/stereo_match.cpp
new file mode 100644 (file)
index 0000000..7ac2c9a
--- /dev/null
@@ -0,0 +1,419 @@
+#include <iostream>
+#include <string>
+#include <sstream>
+#include <iomanip>
+#include <stdexcept>
+#include "opencv2/ocl/ocl.hpp"
+#include "opencv2/highgui/highgui.hpp"
+
+using namespace cv;
+using namespace std;
+using namespace ocl;
+
+bool help_showed = false;
+
+struct Params
+{
+    Params();
+    static Params read(int argc, char** argv);
+
+    string left;
+    string right;
+
+    string method_str() const
+    {
+        switch (method)
+        {
+        case BM: return "BM";
+        case BP: return "BP";
+        case CSBP: return "CSBP";
+        }
+        return "";
+    }
+    enum {BM, BP, CSBP} method;
+    int ndisp; // Max disparity + 1
+    enum {GPU, CPU} type;
+};
+
+
+struct App
+{
+    App(const Params& p);
+    void run();
+    void handleKey(char key);
+    void printParams() const;
+
+    void workBegin() { work_begin = getTickCount(); }
+    void workEnd()
+    {
+        int64 d = getTickCount() - work_begin;
+        double f = getTickFrequency();
+        work_fps = f / d;
+    }
+
+    string text() const
+    {
+        stringstream ss;
+        ss << "(" << p.method_str() << ") FPS: " << setiosflags(ios::left)
+            << setprecision(4) << work_fps;
+        return ss.str();
+    }
+private:
+    Params p;
+    bool running;
+
+    Mat left_src, right_src;
+    Mat left, right;
+    oclMat d_left, d_right;
+
+    StereoBM_OCL bm;
+    StereoBeliefPropagation bp;
+    StereoConstantSpaceBP csbp;
+
+    int64 work_begin;
+    double work_fps;
+};
+
+static void printHelp()
+{
+    cout << "Usage: stereo_match_gpu\n"
+        << "\t--left <left_view> --right <right_view> # must be rectified\n"
+        << "\t--method <stereo_match_method> # BM | BP | CSBP\n"
+        << "\t--ndisp <number> # number of disparity levels\n"
+        << "\t--type <device_type> # cpu | CPU | gpu | GPU\n";
+    help_showed = true;
+}
+
+int main(int argc, char** argv)
+{
+    try
+    {
+        if (argc < 2)
+        {
+            printHelp();
+            return 1;
+        }
+
+        Params args = Params::read(argc, argv);
+        if (help_showed)
+            return -1;
+
+        int flags[2] = { CVCL_DEVICE_TYPE_GPU, CVCL_DEVICE_TYPE_CPU };
+        vector<Info> info;
+
+        if(getDevice(info, flags[args.type]) == 0)
+        {
+            throw runtime_error("Error: Did not find a valid OpenCL device!");
+        }
+        cout << "Device name:" << info[0].DeviceName[0] << endl;
+
+        App app(args);
+        app.run();
+    }
+    catch (const exception& e)
+    {
+        cout << "error: " << e.what() << endl;
+    }
+    return 0;
+}
+
+
+Params::Params()
+{
+    method = BM;
+    ndisp = 64;
+    type = GPU;
+}
+
+
+Params Params::read(int argc, char** argv)
+{
+    Params p;
+
+    for (int i = 1; i < argc; i++)
+    {
+        if (string(argv[i]) == "--left") p.left = argv[++i];
+        else if (string(argv[i]) == "--right") p.right = argv[++i];
+        else if (string(argv[i]) == "--method")
+        {
+            if (string(argv[i + 1]) == "BM") p.method = BM;
+            else if (string(argv[i + 1]) == "BP") p.method = BP;
+            else if (string(argv[i + 1]) == "CSBP") p.method = CSBP;
+            else throw runtime_error("unknown stereo match method: " + string(argv[i + 1]));
+            i++;
+        }
+        else if (string(argv[i]) == "--ndisp") p.ndisp = atoi(argv[++i]);
+        else if (string(argv[i]) == "--type")
+        {
+            string t(argv[++i]);
+            if (t == "cpu" || t == "CPU")
+            {
+                p.type = CPU;
+            } 
+            else if (t == "gpu" || t == "GPU")
+            {
+                p.type = GPU;
+            }
+            else throw runtime_error("unknown device type: " + t);
+        }
+        else if (string(argv[i]) == "--help") printHelp();
+        else throw runtime_error("unknown key: " + string(argv[i]));
+    }
+
+    return p;
+}
+
+
+App::App(const Params& params)
+    : p(params), running(false)
+{
+    cout << "stereo_match_ocl sample\n";
+    cout << "\nControls:\n"
+        << "\tesc - exit\n"
+        << "\tp - print current parameters\n"
+        << "\tg - convert source images into gray\n"
+        << "\tm - change stereo match method\n"
+        << "\ts - change Sobel prefiltering flag (for BM only)\n"
+        << "\t1/q - increase/decrease maximum disparity\n"
+        << "\t2/w - increase/decrease window size (for BM only)\n"
+        << "\t3/e - increase/decrease iteration count (for BP and CSBP only)\n"
+        << "\t4/r - increase/decrease level count (for BP and CSBP only)\n";
+}
+
+
+void App::run()
+{
+    // Load images
+    left_src = imread(p.left);
+    right_src = imread(p.right);
+    if (left_src.empty()) throw runtime_error("can't open file \"" + p.left + "\"");
+    if (right_src.empty()) throw runtime_error("can't open file \"" + p.right + "\"");
+
+    cvtColor(left_src, left, CV_BGR2GRAY);
+    cvtColor(right_src, right, CV_BGR2GRAY);
+
+    d_left.upload(left);
+    d_right.upload(right);
+
+    imshow("left", left);
+    imshow("right", right);
+
+    // Set common parameters
+    bm.ndisp = p.ndisp;
+    bp.ndisp = p.ndisp;
+    csbp.ndisp = p.ndisp;
+
+    cout << endl;
+    printParams();
+
+    running = true;
+    while (running)
+    {
+
+        // Prepare disparity map of specified type
+        Mat disp;
+        oclMat d_disp;
+        workBegin();
+        switch (p.method)
+        {
+        case Params::BM:
+            if (d_left.channels() > 1 || d_right.channels() > 1)
+            {
+                cout << "BM doesn't support color images\n";
+                cvtColor(left_src, left, CV_BGR2GRAY);
+                cvtColor(right_src, right, CV_BGR2GRAY);
+                cout << "image_channels: " << left.channels() << endl;
+                d_left.upload(left);
+                d_right.upload(right);
+                imshow("left", left);
+                imshow("right", right);
+            }
+            bm(d_left, d_right, d_disp);
+            break;
+        case Params::BP:
+            bp(d_left, d_right, d_disp);
+            break;
+        case Params::CSBP:
+            csbp(d_left, d_right, d_disp);
+            break;
+        }
+        ocl::finish();
+        workEnd();
+
+        // Show results
+        d_disp.download(disp);
+        if (p.method != Params::BM)
+        {
+            disp.convertTo(disp, 0);
+        }
+        putText(disp, text(), Point(5, 25), FONT_HERSHEY_SIMPLEX, 1.0, Scalar::all(255));
+        imshow("disparity", disp);
+
+        handleKey((char)waitKey(3));
+    }
+}
+
+
+void App::printParams() const
+{
+    cout << "--- Parameters ---\n";
+    cout << "image_size: (" << left.cols << ", " << left.rows << ")\n";
+    cout << "image_channels: " << left.channels() << endl;
+    cout << "method: " << p.method_str() << endl
+        << "ndisp: " << p.ndisp << endl;
+    switch (p.method)
+    {
+    case Params::BM:
+        cout << "win_size: " << bm.winSize << endl;
+        cout << "prefilter_sobel: " << bm.preset << endl;
+        break;
+    case Params::BP:
+        cout << "iter_count: " << bp.iters << endl;
+        cout << "level_count: " << bp.levels << endl;
+        break;
+    case Params::CSBP:
+        cout << "iter_count: " << csbp.iters << endl;
+        cout << "level_count: " << csbp.levels << endl;
+        break;
+    }
+    cout << endl;
+}
+
+
+void App::handleKey(char key)
+{
+    switch (key)
+    {
+    case 27:
+        running = false;
+        break;
+    case 'p': case 'P':
+        printParams();
+        break;
+    case 'g': case 'G':
+        if (left.channels() == 1 && p.method != Params::BM)
+        {
+            left = left_src;
+            right = right_src;
+        }
+        else
+        {
+            cvtColor(left_src, left, CV_BGR2GRAY);
+            cvtColor(right_src, right, CV_BGR2GRAY);
+        }
+        d_left.upload(left);
+        d_right.upload(right);
+        cout << "image_channels: " << left.channels() << endl;
+        imshow("left", left);
+        imshow("right", right);
+        break;
+    case 'm': case 'M':
+        switch (p.method)
+        {
+        case Params::BM:
+            p.method = Params::BP;
+            break;
+        case Params::BP:
+            p.method = Params::CSBP;
+            break;
+        case Params::CSBP:
+            p.method = Params::BM;
+            break;
+        }
+        cout << "method: " << p.method_str() << endl;
+        break;
+    case 's': case 'S':
+        if (p.method == Params::BM)
+        {
+            switch (bm.preset)
+            {
+            case StereoBM_OCL::BASIC_PRESET:
+                bm.preset = StereoBM_OCL::PREFILTER_XSOBEL;
+                break;
+            case StereoBM_OCL::PREFILTER_XSOBEL:
+                bm.preset = StereoBM_OCL::BASIC_PRESET;
+                break;
+            }
+            cout << "prefilter_sobel: " << bm.preset << endl;
+        }
+        break;
+    case '1':
+        p.ndisp = p.ndisp == 1 ? 8 : p.ndisp + 8;
+        cout << "ndisp: " << p.ndisp << endl;
+        bm.ndisp = p.ndisp;
+        bp.ndisp = p.ndisp;
+        csbp.ndisp = p.ndisp;
+        break;
+    case 'q': case 'Q':
+        p.ndisp = max(p.ndisp - 8, 1);
+        cout << "ndisp: " << p.ndisp << endl;
+        bm.ndisp = p.ndisp;
+        bp.ndisp = p.ndisp;
+        csbp.ndisp = p.ndisp;
+        break;
+    case '2':
+        if (p.method == Params::BM)
+        {
+            bm.winSize = min(bm.winSize + 1, 51);
+            cout << "win_size: " << bm.winSize << endl;
+        }
+        break;
+    case 'w': case 'W':
+        if (p.method == Params::BM)
+        {
+            bm.winSize = max(bm.winSize - 1, 2);
+            cout << "win_size: " << bm.winSize << endl;
+        }
+        break;
+    case '3':
+        if (p.method == Params::BP)
+        {
+            bp.iters += 1;
+            cout << "iter_count: " << bp.iters << endl;
+        }
+        else if (p.method == Params::CSBP)
+        {
+            csbp.iters += 1;
+            cout << "iter_count: " << csbp.iters << endl;
+        }
+        break;
+    case 'e': case 'E':
+        if (p.method == Params::BP)
+        {
+            bp.iters = max(bp.iters - 1, 1);
+            cout << "iter_count: " << bp.iters << endl;
+        }
+        else if (p.method == Params::CSBP)
+        {
+            csbp.iters = max(csbp.iters - 1, 1);
+            cout << "iter_count: " << csbp.iters << endl;
+        }
+        break;
+    case '4':
+        if (p.method == Params::BP)
+        {
+            bp.levels += 1;
+            cout << "level_count: " << bp.levels << endl;
+        }
+        else if (p.method == Params::CSBP)
+        {
+            csbp.levels += 1;
+            cout << "level_count: " << csbp.levels << endl;
+        }
+        break;
+    case 'r': case 'R':
+        if (p.method == Params::BP)
+        {
+            bp.levels = max(bp.levels - 1, 1);
+            cout << "level_count: " << bp.levels << endl;
+        }
+        else if (p.method == Params::CSBP)
+        {
+            csbp.levels = max(csbp.levels - 1, 1);
+            cout << "level_count: " << csbp.levels << endl;
+        }
+        break;
+    }
+}
+
+
index ea6ee97..038a8dc 100644 (file)
 #include <iostream>
 #include <stdio.h>
 #include "opencv2/core/core.hpp"
-#include "opencv2/features2d/features2d.hpp"
 #include "opencv2/highgui/highgui.hpp"
 #include "opencv2/ocl/ocl.hpp"
-#include "opencv2/nonfree/nonfree.hpp"
 #include "opencv2/nonfree/ocl.hpp"
 #include "opencv2/calib3d/calib3d.hpp"
+#include "opencv2/nonfree/nonfree.hpp"
 
-using namespace std;
 using namespace cv;
 using namespace cv::ocl;
 
-//#define USE_CPU_DESCRIPTOR // use cpu descriptor extractor until ocl descriptor extractor is fixed
-//#define USE_CPU_BFMATCHER
+const int LOOP_NUM = 10;
+const int GOOD_PTS_MAX = 50;
+const float GOOD_PORTION = 0.15f;
+
+namespace
+{
 void help();
 
 void help()
 {
-    cout << "\nThis program demonstrates using SURF_OCL features detector and descriptor extractor" << endl;
-    cout << "\nUsage:\n\tsurf_matcher --left <image1> --right <image2>" << endl;
+    std::cout << "\nThis program demonstrates using SURF_OCL features detector and descriptor extractor" << std::endl;
+    std::cout << "\nUsage:\n\tsurf_matcher --left <image1> --right <image2> [-c]" << std::endl;
+    std::cout << "\nExample:\n\tsurf_matcher --left box.png --right box_in_scene.png" << std::endl;
 }
 
+int64 work_begin = 0;
+int64 work_end = 0;
 
-////////////////////////////////////////////////////
-// This program demonstrates the usage of SURF_OCL.
-// use cpu findHomography interface to calculate the transformation matrix
-int main(int argc, char* argv[])
+void workBegin() 
+{ 
+    work_begin = getTickCount();
+}
+void workEnd()
 {
-    if (argc != 5 && argc != 1)
-    {
-        help();
-        return -1;
-    }
-    vector<cv::ocl::Info> info;
-    if(!cv::ocl::getDevice(info))
-    {
-        cout << "Error: Did not find a valid OpenCL device!" << endl;
-        return -1;
-    }
-    Mat cpu_img1, cpu_img2, cpu_img1_grey, cpu_img2_grey;
-    oclMat img1, img2;
-    if(argc != 5)
+    work_end = getTickCount() - work_begin;
+}
+double getTime(){
+    return work_end /((double)cvGetTickFrequency() * 1000.);
+}
+
+template<class KPDetector>
+struct SURFDetector
+{
+    KPDetector surf;
+    SURFDetector(double hessian = 800.0)
+        :surf(hessian)
     {
-        cpu_img1 = imread("o.png");
-        cvtColor(cpu_img1, cpu_img1_grey, CV_BGR2GRAY);
-        img1     = cpu_img1_grey;
-        CV_Assert(!img1.empty());
-
-        cpu_img2 = imread("r2.png");
-        cvtColor(cpu_img2, cpu_img2_grey, CV_BGR2GRAY);
-        img2     = cpu_img2_grey;
     }
-    else
+    template<class T>
+    void operator()(const T& in, const T& mask, vector<cv::KeyPoint>& pts, T& descriptors, bool useProvided = false)
     {
-        for (int i = 1; i < argc; ++i)
-        {
-            if (string(argv[i]) == "--left")
-            {
-                cpu_img1 = imread(argv[++i]);
-                cvtColor(cpu_img1, cpu_img1_grey, CV_BGR2GRAY);
-                img1     = cpu_img1_grey;
-                CV_Assert(!img1.empty());
-            }
-            else if (string(argv[i]) == "--right")
-            {
-                cpu_img2 = imread(argv[++i]);
-                cvtColor(cpu_img2, cpu_img2_grey, CV_BGR2GRAY);
-                img2     = cpu_img2_grey;
-            }
-            else if (string(argv[i]) == "--help")
-            {
-                help();
-                return -1;
-            }
-        }
+        surf(in, mask, pts, descriptors, useProvided);
     }
+};
 
-    SURF_OCL surf;
-    //surf.hessianThreshold = 400.f;
-    //surf.extended = false;
-
-    // detecting keypoints & computing descriptors
-    oclMat keypoints1GPU, keypoints2GPU;
-    oclMat descriptors1GPU, descriptors2GPU;
-
-    // downloading results
-    vector<KeyPoint> keypoints1, keypoints2;
-    vector<DMatch> matches;
-
-
-#ifndef USE_CPU_DESCRIPTOR
-    surf(img1, oclMat(), keypoints1GPU, descriptors1GPU);
-    surf(img2, oclMat(), keypoints2GPU, descriptors2GPU);
-
-    surf.downloadKeypoints(keypoints1GPU, keypoints1);
-    surf.downloadKeypoints(keypoints2GPU, keypoints2);
-
-
-#ifdef USE_CPU_BFMATCHER
-    //BFMatcher
-    BFMatcher matcher(cv::NORM_L2);
-    matcher.match(Mat(descriptors1GPU), Mat(descriptors2GPU), matches);
-#else
-    BruteForceMatcher_OCL_base matcher(BruteForceMatcher_OCL_base::L2Dist);
-    matcher.match(descriptors1GPU, descriptors2GPU, matches);
-#endif
-
-#else
-    surf(img1, oclMat(), keypoints1GPU);
-    surf(img2, oclMat(), keypoints2GPU);
-    surf.downloadKeypoints(keypoints1GPU, keypoints1);
-    surf.downloadKeypoints(keypoints2GPU, keypoints2);
-
-    // use SURF_OCL to detect keypoints and use SURF to extract descriptors
-    SURF surf_cpu;
-    Mat descriptors1, descriptors2;
-    surf_cpu(cpu_img1, Mat(), keypoints1, descriptors1, true);
-    surf_cpu(cpu_img2, Mat(), keypoints2, descriptors2, true);
-    matcher.match(descriptors1, descriptors2, matches);
-#endif
-    cout << "OCL: FOUND " << keypoints1GPU.cols << " keypoints on first image" << endl;
-    cout << "OCL: FOUND " << keypoints2GPU.cols << " keypoints on second image" << endl;
-
-    double max_dist = 0; double min_dist = 100;
-    //-- Quick calculation of max and min distances between keypoints
-    for( size_t i = 0; i < keypoints1.size(); i++ )
+template<class KPMatcher>
+struct SURFMatcher
+{
+    KPMatcher matcher;
+    template<class T>
+    void match(const T& in1, const T& in2, vector<cv::DMatch>& matches)
     {
-        double dist = matches[i].distance;
-        if( dist < min_dist ) min_dist = dist;
-        if( dist > max_dist ) max_dist = dist;
+        matcher.match(in1, in2, matches);
     }
+};
 
-    printf("-- Max dist : %f \n", max_dist );
-    printf("-- Min dist : %f \n", min_dist );
-
-    //-- Draw only "good" matches (i.e. whose distance is less than 2.5*min_dist )
+Mat drawGoodMatches(
+    const Mat& cpu_img1,
+    const Mat& cpu_img2,
+    const vector<KeyPoint>& keypoints1, 
+    const vector<KeyPoint>& keypoints2, 
+    vector<DMatch>& matches,
+    vector<Point2f>& scene_corners_
+    )
+{
+    //-- Sort matches and preserve top 10% matches 
+    std::sort(matches.begin(), matches.end());
     std::vector< DMatch > good_matches;
+    double minDist = matches.front().distance,
+        maxDist = matches.back().distance;
 
-    for( size_t i = 0; i < keypoints1.size(); i++ )
+    const int ptsPairs = std::min(GOOD_PTS_MAX, (int)(matches.size() * GOOD_PORTION));
+    for( int i = 0; i < ptsPairs; i++ )
     {
-        if( matches[i].distance < 3*min_dist )
-        {
-            good_matches.push_back( matches[i]);
-        }
+        good_matches.push_back( matches[i] );
     }
+    std::cout << "\nMax distance: " << maxDist << std::endl;
+    std::cout << "Min distance: " << minDist << std::endl;
+
+    std::cout << "Calculating homography using " << ptsPairs << " point pairs." << std::endl;
 
     // drawing the results
     Mat img_matches;
     drawMatches( cpu_img1, keypoints1, cpu_img2, keypoints2,
         good_matches, img_matches, Scalar::all(-1), Scalar::all(-1),
-        vector<char>(), DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS );
+        vector<char>(), DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS  );
 
     //-- Localize the object
     std::vector<Point2f> obj;
@@ -207,26 +152,238 @@ int main(int argc, char* argv[])
         obj.push_back( keypoints1[ good_matches[i].queryIdx ].pt );
         scene.push_back( keypoints2[ good_matches[i].trainIdx ].pt );
     }
-    Mat H = findHomography( obj, scene, CV_RANSAC );
-
     //-- Get the corners from the image_1 ( the object to be "detected" )
     std::vector<Point2f> obj_corners(4);
     obj_corners[0] = cvPoint(0,0); obj_corners[1] = cvPoint( cpu_img1.cols, 0 );
     obj_corners[2] = cvPoint( cpu_img1.cols, cpu_img1.rows ); obj_corners[3] = cvPoint( 0, cpu_img1.rows );
     std::vector<Point2f> scene_corners(4);
-
+    
+    Mat H = findHomography( obj, scene, CV_RANSAC );
     perspectiveTransform( obj_corners, scene_corners, H);
 
+    scene_corners_ = scene_corners;
+    
     //-- Draw lines between the corners (the mapped object in the scene - image_2 )
-    line( img_matches, scene_corners[0] + Point2f( (float)cpu_img1.cols, 0), scene_corners[1] + Point2f( (float)cpu_img1.cols, 0), Scalar( 0, 255, 0), 4 );
-    line( img_matches, scene_corners[1] + Point2f( (float)cpu_img1.cols, 0), scene_corners[2] + Point2f( (float)cpu_img1.cols, 0), Scalar( 0, 255, 0), 4 );
-    line( img_matches, scene_corners[2] + Point2f( (float)cpu_img1.cols, 0), scene_corners[3] + Point2f( (float)cpu_img1.cols, 0), Scalar( 0, 255, 0), 4 );
-    line( img_matches, scene_corners[3] + Point2f( (float)cpu_img1.cols, 0), scene_corners[0] + Point2f( (float)cpu_img1.cols, 0), Scalar( 0, 255, 0), 4 );
+    line( img_matches, 
+        scene_corners[0] + Point2f( (float)cpu_img1.cols, 0), scene_corners[1] + Point2f( (float)cpu_img1.cols, 0), 
+        Scalar( 0, 255, 0), 2, CV_AA );
+    line( img_matches, 
+        scene_corners[1] + Point2f( (float)cpu_img1.cols, 0), scene_corners[2] + Point2f( (float)cpu_img1.cols, 0), 
+        Scalar( 0, 255, 0), 2, CV_AA );
+    line( img_matches, 
+        scene_corners[2] + Point2f( (float)cpu_img1.cols, 0), scene_corners[3] + Point2f( (float)cpu_img1.cols, 0), 
+        Scalar( 0, 255, 0), 2, CV_AA );
+    line( img_matches, 
+        scene_corners[3] + Point2f( (float)cpu_img1.cols, 0), scene_corners[0] + Point2f( (float)cpu_img1.cols, 0), 
+        Scalar( 0, 255, 0), 2, CV_AA );
+    return img_matches;
+}
+
+}
+////////////////////////////////////////////////////
+// This program demonstrates the usage of SURF_OCL.
+// use cpu findHomography interface to calculate the transformation matrix
+int main(int argc, char* argv[])
+{
+    vector<cv::ocl::Info> info;
+    if(cv::ocl::getDevice(info) == 0)
+    {
+        std::cout << "Error: Did not find a valid OpenCL device!" << std::endl;
+        return -1;
+    }
+    ocl::setDevice(info[0]);
+
+    Mat cpu_img1, cpu_img2, cpu_img1_grey, cpu_img2_grey;
+    oclMat img1, img2;
+    bool useCPU = false;
+    bool useGPU = false;
+    bool useALL = false;
+
+    for (int i = 1; i < argc; ++i)
+    {
+        if (string(argv[i]) == "--left")
+        {
+            cpu_img1 = imread(argv[++i]);
+            CV_Assert(!cpu_img1.empty());
+            cvtColor(cpu_img1, cpu_img1_grey, CV_BGR2GRAY);
+            img1 = cpu_img1_grey;
+        }
+        else if (string(argv[i]) == "--right")
+        {
+            cpu_img2 = imread(argv[++i]);
+            CV_Assert(!cpu_img2.empty());
+            cvtColor(cpu_img2, cpu_img2_grey, CV_BGR2GRAY);
+            img2 = cpu_img2_grey;
+        }
+        else if (string(argv[i]) == "-c")
+        {
+            useCPU = true;
+            useGPU = false;
+            useALL = false;
+        }else if(string(argv[i]) == "-g")
+        {
+            useGPU = true;
+            useCPU = false;
+            useALL = false;
+        }else if(string(argv[i]) == "-a")
+        {
+            useALL = true;
+            useCPU = false;
+            useGPU = false;
+        }
+        else if (string(argv[i]) == "--help")
+        {
+            help();
+            return -1;
+        }
+    }
+    if(!useCPU)
+    {
+        std::cout
+            << "Device name:"
+            << info[0].DeviceName[0]
+        << std::endl;
+    }
+    double surf_time = 0.;
+
+    //declare input/output
+    vector<KeyPoint> keypoints1, keypoints2;
+    vector<DMatch> matches;
+
+    vector<KeyPoint> gpu_keypoints1;
+    vector<KeyPoint> gpu_keypoints2;
+    vector<DMatch> gpu_matches;
+
+    Mat descriptors1CPU, descriptors2CPU;
+
+    oclMat keypoints1GPU, keypoints2GPU;
+    oclMat descriptors1GPU, descriptors2GPU;
+
+    //instantiate detectors/matchers
+    SURFDetector<SURF>     cpp_surf;
+    SURFDetector<SURF_OCL> ocl_surf;
+    
+    SURFMatcher<BFMatcher>      cpp_matcher;
+    SURFMatcher<BFMatcher_OCL>  ocl_matcher;
+
+    //-- start of timing section
+    if (useCPU) 
+    {
+        for (int i = 0; i <= LOOP_NUM; i++)
+        {
+            if(i == 1) workBegin();
+            cpp_surf(cpu_img1_grey, Mat(), keypoints1, descriptors1CPU);
+            cpp_surf(cpu_img2_grey, Mat(), keypoints2, descriptors2CPU);
+            cpp_matcher.match(descriptors1CPU, descriptors2CPU, matches);
+        }
+        workEnd();
+        std::cout << "CPP: FOUND " << keypoints1.size() << " keypoints on first image" << std::endl;
+        std::cout << "CPP: FOUND " << keypoints2.size() << " keypoints on second image" << std::endl;
+
+        surf_time = getTime();
+        std::cout << "SURF run time: " << surf_time / LOOP_NUM << " ms" << std::endl<<"\n";
+    }
+    else if(useGPU)
+    {
+        for (int i = 0; i <= LOOP_NUM; i++)
+        {
+            if(i == 1) workBegin();
+            ocl_surf(img1, oclMat(), keypoints1, descriptors1GPU);
+            ocl_surf(img2, oclMat(), keypoints2, descriptors2GPU);
+            ocl_matcher.match(descriptors1GPU, descriptors2GPU, matches);
+        }
+        workEnd();
+        std::cout << "OCL: FOUND " << keypoints1.size() << " keypoints on first image" << std::endl;
+        std::cout << "OCL: FOUND " << keypoints2.size() << " keypoints on second image" << std::endl;
+
+        surf_time = getTime();
+        std::cout << "SURF run time: " << surf_time / LOOP_NUM << " ms" << std::endl<<"\n";
+    }else
+    {
+        //cpu runs
+        for (int i = 0; i <= LOOP_NUM; i++)
+        {
+            if(i == 1) workBegin();
+            cpp_surf(cpu_img1_grey, Mat(), keypoints1, descriptors1CPU);
+            cpp_surf(cpu_img2_grey, Mat(), keypoints2, descriptors2CPU);
+            cpp_matcher.match(descriptors1CPU, descriptors2CPU, matches);
+        }
+        workEnd();
+        std::cout << "\nCPP: FOUND " << keypoints1.size() << " keypoints on first image" << std::endl;
+        std::cout << "CPP: FOUND " << keypoints2.size() << " keypoints on second image" << std::endl;
+
+        surf_time = getTime();
+        std::cout << "(CPP)SURF run time: " << surf_time / LOOP_NUM << " ms" << std::endl;
+
+        //gpu runs
+        for (int i = 0; i <= LOOP_NUM; i++)
+        {
+            if(i == 1) workBegin();
+            ocl_surf(img1, oclMat(), gpu_keypoints1, descriptors1GPU);
+            ocl_surf(img2, oclMat(), gpu_keypoints2, descriptors2GPU);
+            ocl_matcher.match(descriptors1GPU, descriptors2GPU, gpu_matches);
+        }
+        workEnd();
+        std::cout << "\nOCL: FOUND " << keypoints1.size() << " keypoints on first image" << std::endl;
+        std::cout << "OCL: FOUND " << keypoints2.size() << " keypoints on second image" << std::endl;
+
+        surf_time = getTime();
+        std::cout << "(OCL)SURF run time: " << surf_time / LOOP_NUM << " ms" << std::endl<<"\n";
+
+    }
+
+    //--------------------------------------------------------------------------
+    std::vector<Point2f> cpu_corner;
+    Mat img_matches = drawGoodMatches(cpu_img1, cpu_img2, keypoints1, keypoints2, matches, cpu_corner);
+
+    std::vector<Point2f> gpu_corner;
+    Mat ocl_img_matches;
+    if(useALL || (!useCPU&&!useGPU))
+    {
+        ocl_img_matches = drawGoodMatches(cpu_img1, cpu_img2, gpu_keypoints1, gpu_keypoints2, gpu_matches, gpu_corner);
+
+        //check accuracy
+        std::cout<<"\nCheck accuracy:\n";
+
+        if(cpu_corner.size()!=gpu_corner.size())
+            std::cout<<"Failed\n";
+        else
+        {
+            bool result = false;
+            for(size_t i = 0; i < cpu_corner.size(); i++)
+            {
+                if((std::abs(cpu_corner[i].x - gpu_corner[i].x) > 10)
+                    ||(std::abs(cpu_corner[i].y - gpu_corner[i].y) > 10))
+                {
+                    std::cout<<"Failed\n";
+                    result = false;
+                    break;
+                }
+                result = true;
+            } 
+            if(result)
+                std::cout<<"Passed\n";
+        }
+    }
 
     //-- Show detected matches
-    namedWindow("ocl surf matches", 0);
-    imshow("ocl surf matches", img_matches);
-    waitKey(0);
+    if (useCPU)
+    {
+        namedWindow("cpu surf matches", 0);
+        imshow("cpu surf matches", img_matches);
+    }
+    else if(useGPU)
+    {
+        namedWindow("ocl surf matches", 0);
+        imshow("ocl surf matches", img_matches);
+    }else
+    {
+        namedWindow("cpu surf matches", 0);
+        imshow("cpu surf matches", img_matches);
 
+        namedWindow("ocl surf matches", 0);
+        imshow("ocl surf matches", ocl_img_matches);
+    }
+    waitKey(0);
     return 0;
 }
diff --git a/samples/python2/grabcut.py b/samples/python2/grabcut.py
new file mode 100644 (file)
index 0000000..9fc1280
--- /dev/null
@@ -0,0 +1,174 @@
+#!/usr/bin/env python
+'''
+===============================================================================
+Interactive Image Segmentation using GrabCut algorithm.
+
+This sample shows interactive image segmentation using grabcut algorithm.
+
+USAGE :
+    python grabcut.py <filename>
+
+README FIRST:    
+    Two windows will show up, one for input and one for output.
+    
+    At first, in input window, draw a rectangle around the object using 
+mouse right button. Then press 'n' to segment the object (once or a few times)
+For any finer touch-ups, you can press any of the keys below and draw lines on 
+the areas you want. Then again press 'n' for updating the output.
+
+Key '0' - To select areas of sure background
+Key '1' - To select areas of sure foreground
+Key '2' - To select areas of probable background
+Key '3' - To select areas of probable foreground
+
+Key 'n' - To update the segmentation
+Key 'r' - To reset the setup
+Key 's' - To save the results
+===============================================================================
+'''
+
+import numpy as np
+import cv2
+import sys
+
+BLUE = [255,0,0]        # rectangle color
+RED = [0,0,255]         # PR BG
+GREEN = [0,255,0]       # PR FG
+BLACK = [0,0,0]         # sure BG
+WHITE = [255,255,255]   # sure FG
+
+DRAW_BG = {'color' : BLACK, 'val' : 0}
+DRAW_FG = {'color' : WHITE, 'val' : 1}
+DRAW_PR_FG = {'color' : GREEN, 'val' : 3}
+DRAW_PR_BG = {'color' : RED, 'val' : 2}
+
+# setting up flags
+rect = (0,0,1,1)
+drawing = False         # flag for drawing curves
+rectangle = False       # flag for drawing rect
+rect_over = False       # flag to check if rect drawn
+rect_or_mask = 100      # flag for selecting rect or mask mode
+value = DRAW_FG         # drawing initialized to FG
+thickness = 3           # brush thickness
+
+def onmouse(event,x,y,flags,param):
+    global img,img2,drawing,value,mask,rectangle,rect,rect_or_mask,ix,iy,rect_over
+    
+    # Draw Rectangle
+    if event == cv2.EVENT_RBUTTONDOWN:
+        rectangle = True
+        ix,iy = x,y
+
+    elif event == cv2.EVENT_MOUSEMOVE:
+        if rectangle == True:
+            img = img2.copy()
+            cv2.rectangle(img,(ix,iy),(x,y),BLUE,2)
+            rect = (ix,iy,abs(ix-x),abs(iy-y))
+            rect_or_mask = 0
+
+    elif event == cv2.EVENT_RBUTTONUP:
+        rectangle = False
+        rect_over = True
+        cv2.rectangle(img,(ix,iy),(x,y),BLUE,2)
+        rect = (ix,iy,abs(ix-x),abs(iy-y))
+        rect_or_mask = 0
+        print " Now press the key 'n' a few times until no further change \n"
+        
+    # draw touchup curves
+    
+    if event == cv2.EVENT_LBUTTONDOWN:
+        if rect_over == False:
+            print "first draw rectangle \n"
+        else:
+            drawing = True
+            cv2.circle(img,(x,y),thickness,value['color'],-1)
+            cv2.circle(mask,(x,y),thickness,value['val'],-1)
+
+    elif event == cv2.EVENT_MOUSEMOVE:
+        if drawing == True:
+            cv2.circle(img,(x,y),thickness,value['color'],-1)
+            cv2.circle(mask,(x,y),thickness,value['val'],-1)
+
+    elif event == cv2.EVENT_LBUTTONUP:
+        if drawing == True:
+            drawing = False
+            cv2.circle(img,(x,y),thickness,value['color'],-1)
+            cv2.circle(mask,(x,y),thickness,value['val'],-1)
+        
+# print documentation
+print __doc__
+
+# Loading images
+if len(sys.argv) == 2:
+    filename = sys.argv[1] # for drawing purposes
+else:
+    print "No input image given, so loading default image, lena.jpg \n"
+    print "Correct Usage : python grabcut.py <filename> \n"
+    filename = '../cpp/lena.jpg'
+
+img = cv2.imread(filename)
+img2 = img.copy()                               # a copy of original image
+mask = np.zeros(img.shape[:2],dtype = np.uint8) # mask initialized to PR_BG
+output = np.zeros(img.shape,np.uint8)           # output image to be shown
+
+# input and output windows
+cv2.namedWindow('output')
+cv2.namedWindow('input')
+cv2.setMouseCallback('input',onmouse)
+cv2.moveWindow('input',img.shape[1]+10,90)
+
+print " Instructions : \n"
+print " Draw a rectangle around the object using right mouse button \n"
+
+while(1):
+
+    cv2.imshow('output',output)
+    cv2.imshow('input',img)
+    k = 0xFF & cv2.waitKey(1)
+    
+    # key bindings
+    if k == 27:         # esc to exit
+        break
+    elif k == ord('0'): # BG drawing
+        print " mark background regions with left mouse button \n"
+        value = DRAW_BG
+    elif k == ord('1'): # FG drawing
+        print " mark foreground regions with left mouse button \n"
+        value = DRAW_FG
+    elif k == ord('2'): # PR_BG drawing
+        value = DRAW_PR_BG
+    elif k == ord('3'): # PR_FG drawing
+        value = DRAW_PR_FG
+    elif k == ord('s'): # save image
+        bar = np.zeros((img.shape[0],5,3),np.uint8)
+        res = np.hstack((img2,bar,img,bar,output))
+        cv2.imwrite('grabcut_output.png',res)
+        print " Result saved as image \n"
+    elif k == ord('r'): # reset everything
+        print "resetting \n"
+        rect = (0,0,1,1)
+        drawing = False         
+        rectangle = False       
+        rect_or_mask = 100 
+        rect_over = False     
+        value = DRAW_FG         
+        img = img2.copy()
+        mask = np.zeros(img.shape[:2],dtype = np.uint8) # mask initialized to PR_BG
+        output = np.zeros(img.shape,np.uint8)           # output image to be shown
+    elif k == ord('n'): # segment the image
+        print """ For finer touchups, mark foreground and background after pressing keys 0-3
+        and again press 'n' \n"""
+        if (rect_or_mask == 0):         # grabcut with rect
+            bgdmodel = np.zeros((1,65),np.float64)
+            fgdmodel = np.zeros((1,65),np.float64)    
+            cv2.grabCut(img2,mask,rect,bgdmodel,fgdmodel,1,cv2.GC_INIT_WITH_RECT)
+            rect_or_mask = 1
+        elif rect_or_mask == 1:         # grabcut with mask
+            bgdmodel = np.zeros((1,65),np.float64)
+            fgdmodel = np.zeros((1,65),np.float64) 
+            cv2.grabCut(img2,mask,rect,bgdmodel,fgdmodel,1,cv2.GC_INIT_WITH_MASK)
+
+    mask2 = np.where((mask==1) + (mask==3),255,0).astype('uint8')
+    output = cv2.bitwise_and(img2,img2,mask=mask2)   
+
+cv2.destroyAllWindows()