if(NOT ANDROID_TOOLS_Pkg_Revision GREATER 13)
message(WARNING "OpenCV requires Android SDK tools revision 14 or newer. Otherwise tests and samples will no be compiled.")
endif()
-elseif(ANT_EXECUTABLE)
+else()
find_package(JNI)
endif()
endif()
if(ANDROID)
- add_subdirectory(android/service)
+ add_subdirectory(platforms/android/service)
endif()
if(BUILD_ANDROID_PACKAGE)
- add_subdirectory(android/package)
+ add_subdirectory(platforms/android/package)
endif()
if (ANDROID)
- add_subdirectory(android/libinfo)
+ add_subdirectory(platforms/android/libinfo)
endif()
# ----------------------------------------------------------------------------
if(NOT ANDROID)
status(" JNI:" JNI_INCLUDE_DIRS THEN "${JNI_INCLUDE_DIRS}" ELSE NO)
endif()
-status(" Java tests:" BUILD_TESTS AND (NOT ANDROID OR CAN_BUILD_ANDROID_PROJECTS) THEN YES ELSE NO)
+status(" Java tests:" BUILD_TESTS AND (CAN_BUILD_ANDROID_PROJECTS OR HAVE_opencv_java) THEN YES ELSE NO)
# ========================== documentation ==========================
if(BUILD_DOCS)
+++ /dev/null
-We greatly appreciate your support and contributions and they are always welcomed!
-
-Github pull requests are the convenient way to contribute to OpenCV project. Good pull requests have all of these attributes:
-
-* Are scoped to one specific issue
-* Include a test to demonstrate the correctness
-* Update the docs if relevant
-* Match the [coding style guidelines](http://code.opencv.org/projects/opencv/wiki/CodingStyleGuide)
-* Don't messed by "oops" commits
-
-You can find more detailes about contributing process on http://opencv.org/contribute.html
\ No newline at end of file
Online docs: http://docs.opencv.org
Q&A forum: http://answers.opencv.org
Dev zone: http://code.opencv.org
+
+Please read before starting work on a pull request:
+ http://code.opencv.org/projects/opencv/wiki/How_to_contribute
+
+Summary of guidelines:
+
+* One pull request per issue;
+* Choose the right base branch;
+* Include tests and documentation;
+* Clean up "oops" commits before submitting;
+* Follow the coding style guide.
-# Copyright (c) 2010-2011, Ethan Rublee
+message(STATUS "Android toolchain was moved to platfroms/android!")
+message(STATUS "This file is depricated and will be removed!")
+
# Copyright (c) 2011-2013, Andrey Kamaev
# All rights reserved.
#
--- /dev/null
+All Android specific sources are moved to platforms/android.
\ No newline at end of file
+++ /dev/null
-@ECHO OFF
-
-:: enable command extensions
-VERIFY BADVALUE 2>NUL
-SETLOCAL ENABLEEXTENSIONS || (ECHO Unable to enable command extensions. & EXIT \B)
-
-:: build environment
-SET SOURCE_DIR=%cd%
-IF EXIST .\android.toolchain.cmake (SET BUILD_OPENCV=1) ELSE (SET BUILD_OPENCV=0)
-IF EXIST .\jni\nul (SET BUILD_JAVA_PART=1) ELSE (SET BUILD_JAVA_PART=0)
-
-:: load configuration
-PUSHD %~dp0
-SET SCRIPTS_DIR=%cd%
-IF EXIST .\wincfg.cmd CALL .\wincfg.cmd
-POPD
-
-:: inherit old names
-IF NOT DEFINED CMAKE SET CMAKE=%CMAKE_EXE%
-IF NOT DEFINED MAKE SET MAKE=%MAKE_EXE%
-
-:: defaults
-IF NOT DEFINED BUILD_DIR SET BUILD_DIR=build
-IF NOT DEFINED ANDROID_ABI SET ANDROID_ABI=armeabi-v7a
-SET OPENCV_BUILD_DIR=%SCRIPTS_DIR%\..\%BUILD_DIR%
-
-:: check that all required variables defined
-PUSHD .
-IF NOT DEFINED ANDROID_NDK (ECHO. & ECHO You should set an environment variable ANDROID_NDK to the full path to your copy of Android NDK & GOTO end)
-(CD "%ANDROID_NDK%") || (ECHO. & ECHO Directory "%ANDROID_NDK%" specified by ANDROID_NDK variable does not exist & GOTO end)
-
-IF NOT EXIST "%CMAKE%" (ECHO. & ECHO You should set an environment variable CMAKE to the full path to cmake executable & GOTO end)
-IF NOT EXIST "%MAKE%" (ECHO. & ECHO You should set an environment variable MAKE to the full path to native port of make executable & GOTO end)
-
-IF NOT %BUILD_JAVA_PART%==1 GOTO required_variables_checked
-
-IF NOT DEFINED ANDROID_SDK (ECHO. & ECHO You should set an environment variable ANDROID_SDK to the full path to your copy of Android SDK & GOTO end)
-(CD "%ANDROID_SDK%" 2>NUL) || (ECHO. & ECHO Directory "%ANDROID_SDK%" specified by ANDROID_SDK variable does not exist & GOTO end)
-
-IF NOT DEFINED ANT_DIR (ECHO. & ECHO You should set an environment variable ANT_DIR to the full path to Apache Ant root & GOTO end)
-(CD "%ANT_DIR%" 2>NUL) || (ECHO. & ECHO Directory "%ANT_DIR%" specified by ANT_DIR variable does not exist & GOTO end)
-
-IF NOT DEFINED JAVA_HOME (ECHO. & ECHO You should set an environment variable JAVA_HOME to the full path to JDK & GOTO end)
-(CD "%JAVA_HOME%" 2>NUL) || (ECHO. & ECHO Directory "%JAVA_HOME%" specified by JAVA_HOME variable does not exist & GOTO end)
-
-:required_variables_checked
-POPD
-
-:: check for ninja
-echo "%MAKE%"|findstr /i ninja >nul:
-IF %errorlevel%==1 (SET BUILD_WITH_NINJA=0) ELSE (SET BUILD_WITH_NINJA=1)
-IF %BUILD_WITH_NINJA%==1 (SET CMAKE_GENERATOR=Ninja) ELSE (SET CMAKE_GENERATOR=MinGW Makefiles)
-
-:: create build dir
-IF DEFINED REBUILD rmdir /S /Q "%BUILD_DIR%" 2>NUL
-MKDIR "%BUILD_DIR%" 2>NUL
-PUSHD "%BUILD_DIR%" || (ECHO. & ECHO Directory "%BUILD_DIR%" is not found & GOTO end)
-
-:: run cmake
-ECHO. & ECHO Runnning cmake...
-ECHO ANDROID_ABI=%ANDROID_ABI%
-ECHO.
-IF NOT %BUILD_OPENCV%==1 GOTO other-cmake
-:opencv-cmake
-("%CMAKE%" -G"%CMAKE_GENERATOR%" -DANDROID_ABI="%ANDROID_ABI%" -DCMAKE_TOOLCHAIN_FILE="%SOURCE_DIR%"\android.toolchain.cmake -DCMAKE_MAKE_PROGRAM="%MAKE%" %* "%SOURCE_DIR%\..") && GOTO cmakefin
-ECHO. & ECHO cmake failed & GOTO end
-:other-cmake
-("%CMAKE%" -G"%CMAKE_GENERATOR%" -DANDROID_ABI="%ANDROID_ABI%" -DOpenCV_DIR="%OPENCV_BUILD_DIR%" -DCMAKE_TOOLCHAIN_FILE="%OPENCV_BUILD_DIR%\..\android.toolchain.cmake" -DCMAKE_MAKE_PROGRAM="%MAKE%" %* "%SOURCE_DIR%") && GOTO cmakefin
-ECHO. & ECHO cmake failed & GOTO end
-:cmakefin
-
-:: run make
-ECHO. & ECHO Building native libs...
-IF %BUILD_WITH_NINJA%==0 ("%MAKE%" -j %NUMBER_OF_PROCESSORS% VERBOSE=%VERBOSE%) || (ECHO. & ECHO make failed & GOTO end)
-IF %BUILD_WITH_NINJA%==1 ("%MAKE%") || (ECHO. & ECHO ninja failed & GOTO end)
-
-IF NOT %BUILD_JAVA_PART%==1 GOTO end
-POPD && PUSHD %SOURCE_DIR%
-
-:: configure java part
-ECHO. & ECHO Updating Android project...
-(CALL "%ANDROID_SDK%\tools\android" update project --name %PROJECT_NAME% --path .) || (ECHO. & ECHO failed to update android project & GOTO end)
-
-:: compile java part
-ECHO. & ECHO Compiling Android project...
-(CALL "%ANT_DIR%\bin\ant" debug) || (ECHO. & ECHO failed to compile android project & GOTO end)
-
-:end
-POPD
-ENDLOCAL
+++ /dev/null
-@ECHO OFF
-
-PUSHD %~dp0..
-CALL .\scripts\build.cmd %* -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON
-POPD
\ No newline at end of file
+++ /dev/null
-#!/bin/sh
-cd `dirname $0`/..
-
-mkdir -p build_armeabi
-cd build_armeabi
-
-cmake -DANDROID_ABI=armeabi -DCMAKE_TOOLCHAIN_FILE=../android.toolchain.cmake $@ ../..
-
+++ /dev/null
-#!/bin/sh
-cd `dirname $0`/..
-
-mkdir -p build_mips
-cd build_mips
-
-cmake -DANDROID_ABI=mips -DCMAKE_TOOLCHAIN_FILE=../android.toolchain.cmake $@ ../..
-
+++ /dev/null
-#!/bin/sh
-cd `dirname $0`/..
-
-mkdir -p build_neon
-cd build_neon
-
-cmake -DANDROID_ABI="armeabi-v7a with NEON" -DCMAKE_TOOLCHAIN_FILE=../android.toolchain.cmake $@ ../..
-
+++ /dev/null
-#!/bin/sh
-cd `dirname $0`/..
-
-mkdir -p build_service
-cd build_service
-
-cmake -DCMAKE_TOOLCHAIN_FILE=../android.toolchain.cmake -DANDROID_TOOLCHAIN_NAME="arm-linux-androideabi-4.4.3" -DANDROID_STL=stlport_static -DANDROID_STL_FORCE_FEATURES=OFF -DBUILD_ANDROID_SERVICE=ON -DANDROID_SOURCE_TREE=~/Projects/AndroidSource/ServiceStub/ $@ ../..
+++ /dev/null
-#!/bin/sh
-
-cd `dirname $0`/..
-
-mkdir -p build_x86
-cd build_x86
-
-cmake -DANDROID_ABI=x86 -DCMAKE_TOOLCHAIN_FILE=../android.toolchain.cmake $@ ../..
-
+++ /dev/null
-:: variables required for OpenCV build ::
-:: Note: all pathes should be specified without tailing slashes!
-SET ANDROID_NDK=C:\full\path\to\your\copy\of\android\NDK\android-ndk-r7b
-SET CMAKE_EXE=C:\full\path\to\cmake\utility\cmake.exe
-SET MAKE_EXE=%ANDROID_NDK%\prebuilt\windows\bin\make.exe
-
-:: variables required for android-opencv build ::
-SET ANDROID_SDK=C:\full\path\to\your\copy\of\android\SDK\android-sdk-windows
-SET ANT_DIR=C:\full\path\to\ant\directory\apache-ant-1.8.2
-SET JAVA_HOME=C:\full\path\to\JDK\jdk1.6.0_25
-
-:: configuration options ::
-:::: general ARM-V7 settings
-SET ANDROID_ABI=armeabi-v7a
-SET BUILD_DIR=build
-
-:::: uncomment following lines to compile for old emulator or old device
-::SET ANDROID_ABI=armeabi
-::SET BUILD_DIR=build_armeabi
-
-:::: uncomment following lines to compile for ARM-V7 with NEON support
-::SET ANDROID_ABI=armeabi-v7a with NEON
-::SET BUILD_DIR=build_neon
-
-:::: uncomment following lines to compile for x86
-::SET ANDROID_ABI=x86
-::SET BUILD_DIR=build_x86
-
-:::: other options
-::SET ANDROID_NATIVE_API_LEVEL=8 &:: android-3 is enough for native part of OpenCV but android-8 is required for Java API
+++ /dev/null
-# Makefile for Sphinx documentation
-#
-
-# You can set these variables from the command line.
-SPHINXOPTS =
-SPHINXBUILD = sphinx-build
-PAPER =
-BUILDDIR = _build
-
-# Internal variables.
-PAPEROPT_a4 = -D latex_paper_size=a4
-PAPEROPT_letter = -D latex_paper_size=letter
-ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
-
-.PHONY: help clean html dirhtml pickle json htmlhelp qthelp latex changes linkcheck doctest
-
-help:
- @echo "Please use \`make <target>' where <target> is one of"
- @echo " html to make standalone HTML files"
- @echo " dirhtml to make HTML files named index.html in directories"
- @echo " pickle to make pickle files"
- @echo " json to make JSON files"
- @echo " htmlhelp to make HTML files and a HTML help project"
- @echo " qthelp to make HTML files and a qthelp project"
- @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
- @echo " changes to make an overview of all changed/added/deprecated items"
- @echo " linkcheck to check all external links for integrity"
- @echo " doctest to run all doctests embedded in the documentation (if enabled)"
-
-clean:
- -rm -rf $(BUILDDIR)/*
-
-html:
- $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
- @echo
- @echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
-
-dirhtml:
- $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
- @echo
- @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
-
-pickle:
- $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
- @echo
- @echo "Build finished; now you can process the pickle files."
-
-json:
- $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
- @echo
- @echo "Build finished; now you can process the JSON files."
-
-htmlhelp:
- $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
- @echo
- @echo "Build finished; now you can run HTML Help Workshop with the" \
- ".hhp project file in $(BUILDDIR)/htmlhelp."
-
-qthelp:
- $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
- @echo
- @echo "Build finished; now you can run "qcollectiongenerator" with the" \
- ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
- @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/OpenCVEngine.qhcp"
- @echo "To view the help file:"
- @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/OpenCVEngine.qhc"
-
-latex:
- $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
- @echo
- @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
- @echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \
- "run these through (pdf)latex."
-
-changes:
- $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
- @echo
- @echo "The overview file is in $(BUILDDIR)/changes."
-
-linkcheck:
- $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
- @echo
- @echo "Link check complete; look for any errors in the above output " \
- "or in $(BUILDDIR)/linkcheck/output.txt."
-
-doctest:
- $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
- @echo "Testing of doctests in the sources finished, look at the " \
- "results in $(BUILDDIR)/doctest/output.txt."
}
-struct FeatureIdxOnlyPrecalc
+struct FeatureIdxOnlyPrecalc : ParallelLoopBody
{
FeatureIdxOnlyPrecalc( const CvFeatureEvaluator* _featureEvaluator, CvMat* _buf, int _sample_count, bool _is_buf_16u )
{
idst = _buf->data.i;
is_buf_16u = _is_buf_16u;
}
- void operator()( const BlockedRange& range ) const
+ void operator()( const Range& range ) const
{
cv::AutoBuffer<float> valCache(sample_count);
float* valCachePtr = (float*)valCache;
- for ( int fi = range.begin(); fi < range.end(); fi++)
+ for ( int fi = range.start; fi < range.end; fi++)
{
for( int si = 0; si < sample_count; si++ )
{
bool is_buf_16u;
};
-struct FeatureValAndIdxPrecalc
+struct FeatureValAndIdxPrecalc : ParallelLoopBody
{
FeatureValAndIdxPrecalc( const CvFeatureEvaluator* _featureEvaluator, CvMat* _buf, Mat* _valCache, int _sample_count, bool _is_buf_16u )
{
idst = _buf->data.i;
is_buf_16u = _is_buf_16u;
}
- void operator()( const BlockedRange& range ) const
+ void operator()( const Range& range ) const
{
- for ( int fi = range.begin(); fi < range.end(); fi++)
+ for ( int fi = range.start; fi < range.end; fi++)
{
for( int si = 0; si < sample_count; si++ )
{
bool is_buf_16u;
};
-struct FeatureValOnlyPrecalc
+struct FeatureValOnlyPrecalc : ParallelLoopBody
{
FeatureValOnlyPrecalc( const CvFeatureEvaluator* _featureEvaluator, Mat* _valCache, int _sample_count )
{
valCache = _valCache;
sample_count = _sample_count;
}
- void operator()( const BlockedRange& range ) const
+ void operator()( const Range& range ) const
{
- for ( int fi = range.begin(); fi < range.end(); fi++)
+ for ( int fi = range.start; fi < range.end; fi++)
for( int si = 0; si < sample_count; si++ )
valCache->at<float>(fi,si) = (*featureEvaluator)( fi, si );
}
int minNum = MIN( numPrecalcVal, numPrecalcIdx);
double proctime = -TIME( 0 );
- parallel_for( BlockedRange(numPrecalcVal, numPrecalcIdx),
- FeatureIdxOnlyPrecalc(featureEvaluator, buf, sample_count, is_buf_16u!=0) );
- parallel_for( BlockedRange(0, minNum),
- FeatureValAndIdxPrecalc(featureEvaluator, buf, &valCache, sample_count, is_buf_16u!=0) );
- parallel_for( BlockedRange(minNum, numPrecalcVal),
- FeatureValOnlyPrecalc(featureEvaluator, &valCache, sample_count) );
+ parallel_for_( Range(numPrecalcVal, numPrecalcIdx),
+ FeatureIdxOnlyPrecalc(featureEvaluator, buf, sample_count, is_buf_16u!=0) );
+ parallel_for_( Range(0, minNum),
+ FeatureValAndIdxPrecalc(featureEvaluator, buf, &valCache, sample_count, is_buf_16u!=0) );
+ parallel_for_( Range(minNum, numPrecalcVal),
+ FeatureValOnlyPrecalc(featureEvaluator, &valCache, sample_count) );
cout << "Precalculation time: " << (proctime + TIME( 0 )) << endl;
}
set(HAVE_CUBLAS 1)
endif()
+ if(${CUDA_VERSION} VERSION_LESS "5.5")
+ find_cuda_helper_libs(npp)
+ else()
+ find_cuda_helper_libs(nppc)
+ find_cuda_helper_libs(nppi)
+ find_cuda_helper_libs(npps)
+ set(CUDA_npp_LIBRARY ${CUDA_nppc_LIBRARY} ${CUDA_nppi_LIBRARY} ${CUDA_npps_LIBRARY})
+ endif()
+
if(WITH_NVCUVID)
find_cuda_helper_libs(nvcuvid)
set(HAVE_NVCUVID 1)
mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD CUDA_SDK_ROOT_DIR)
- find_cuda_helper_libs(npp)
-
macro(ocv_cuda_compile VAR)
foreach(var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG)
set(${var}_backup_in_cuda_compile_ "${${var}}")
endif()
if(ANDROID)
- install(FILES "${OpenCV_SOURCE_DIR}/android/android.toolchain.cmake" DESTINATION ${OPENCV_CONFIG_INSTALL_PATH}/)
+ install(FILES "${OpenCV_SOURCE_DIR}/platforms/android/android.toolchain.cmake" DESTINATION ${OPENCV_CONFIG_INSTALL_PATH}/)
endif()
# --------------------------------------------------------------------------------------------
endif()
endforeach()
- file(GLOB_RECURSE _OPENCV_FILES_REF "${OpenCV_SOURCE_DIR}/android/service/doc/*.rst")
- file(GLOB_RECURSE _OPENCV_FILES_REF_PICT "${OpenCV_SOURCE_DIR}/android/service/doc/*.png" "${OpenCV_SOURCE_DIR}/android/service/doc/*.jpg")
+ file(GLOB_RECURSE _OPENCV_FILES_REF "${OpenCV_SOURCE_DIR}/platforms/android/service/doc/*.rst")
+ file(GLOB_RECURSE _OPENCV_FILES_REF_PICT "${OpenCV_SOURCE_DIR}/platforms/android/service/doc/*.png" "${OpenCV_SOURCE_DIR}/platforms/android/service/doc/*.jpg")
list(APPEND OPENCV_FILES_REF ${_OPENCV_FILES_REF})
list(APPEND OPENCV_FILES_REF_PICT ${_OPENCV_FILES_REF_PICT})
u'', 'manual'),
('doc/tutorials/tutorials', 'opencv_tutorials.tex', u'The OpenCV Tutorials',
u'', 'manual'),
- ('android/refman', 'opencv2manager.tex', u'The OpenCV Manager Manual',
+ ('platforms/android/refman', 'opencv2manager.tex', u'The OpenCV Manager Manual',
u'', 'manual'),
]
:maxdepth: 2
modules/refman.rst
- android/refman.rst
+ platforms/android/refman.rst
doc/user_guide/user_guide.rst
doc/tutorials/tutorials.rst
set(OPENCV_MODULE_TYPE STATIC)
ocv_define_module(androidcamera INTERNAL opencv_core log dl)
-ocv_include_directories("${CMAKE_CURRENT_SOURCE_DIR}/camera_wrapper" "${OpenCV_SOURCE_DIR}/android/service/engine/jni/include")
+ocv_include_directories("${CMAKE_CURRENT_SOURCE_DIR}/camera_wrapper" "${OpenCV_SOURCE_DIR}/platforms/android/service/engine/jni/include")
# Android source tree for native camera
SET (ANDROID_SOURCE_TREE "ANDROID_SOURCE_TREE-NOTFOUND" CACHE PATH
transform(points, modif_points, transformation);
}
- class Mutex
- {
- public:
- Mutex() {
- }
- void lock()
- {
-#ifdef HAVE_TBB
- resultsMutex.lock();
-#endif
- }
-
- void unlock()
- {
-#ifdef HAVE_TBB
- resultsMutex.unlock();
-#endif
- }
-
- private:
-#ifdef HAVE_TBB
- tbb::mutex resultsMutex;
-#endif
- };
-
struct CameraParameters
{
void init(Mat _intrinsics, Mat _distCoeffs)
};
-struct FindStereoCorrespInvoker
+struct FindStereoCorrespInvoker : ParallelLoopBody
{
FindStereoCorrespInvoker( const Mat& _left, const Mat& _right,
Mat& _disp, CvStereoBMState* _state,
validDisparityRect = _validDisparityRect;
}
- void operator()( const BlockedRange& range ) const
+ void operator()( const Range& range ) const
{
int cols = left->cols, rows = left->rows;
- int _row0 = min(cvRound(range.begin() * rows / nstripes), rows);
- int _row1 = min(cvRound(range.end() * rows / nstripes), rows);
- uchar *ptr = state->slidingSumBuf->data.ptr + range.begin() * stripeBufSize;
+ int _row0 = min(cvRound(range.start * rows / nstripes), rows);
+ int _row1 = min(cvRound(range.end * rows / nstripes), rows);
+ uchar *ptr = state->slidingSumBuf->data.ptr + range.start * stripeBufSize;
int FILTERED = (state->minDisparity - 1)*16;
Rect roi = validDisparityRect & Rect(0, _row0, cols, _row1 - _row0);
const bool useShorts = false;
#endif
-#ifdef HAVE_TBB
const double SAD_overhead_coeff = 10.0;
double N0 = 8000000 / (useShorts ? 1 : 4); // approx tbb's min number instructions reasonable for one thread
double maxStripeSize = min(max(N0 / (width * ndisp), (wsz-1) * SAD_overhead_coeff), (double)height);
int nstripes = cvCeil(height / maxStripeSize);
-#else
- const int nstripes = 1;
-#endif
int bufSize = max(bufSize0 * nstripes, max(bufSize1 * 2, bufSize2));
state->minDisparity, state->numberOfDisparities,
state->SADWindowSize);
- parallel_for(BlockedRange(0, nstripes),
- FindStereoCorrespInvoker(left, right, disp, state, nstripes,
- bufSize0, useShorts, validDisparityRect));
+ parallel_for_(Range(0, nstripes),
+ FindStereoCorrespInvoker(left, right, disp, state, nstripes,
+ bufSize0, useShorts, validDisparityRect));
if( state->speckleRange >= 0 && state->speckleWindowSize > 0 )
{
}
namespace {
-class GridAdaptedFeatureDetectorInvoker
+class GridAdaptedFeatureDetectorInvoker : public ParallelLoopBody
{
private:
int gridRows_, gridCols_;
const Mat& image_;
const Mat& mask_;
const Ptr<FeatureDetector>& detector_;
-#ifdef HAVE_TBB
- tbb::mutex* kptLock_;
-#endif
+ Mutex* kptLock_;
GridAdaptedFeatureDetectorInvoker& operator=(const GridAdaptedFeatureDetectorInvoker&); // to quiet MSVC
public:
- GridAdaptedFeatureDetectorInvoker(const Ptr<FeatureDetector>& detector, const Mat& image, const Mat& mask, vector<KeyPoint>& keypoints, int maxPerCell, int gridRows, int gridCols
-#ifdef HAVE_TBB
- , tbb::mutex* kptLock
-#endif
- ) : gridRows_(gridRows), gridCols_(gridCols), maxPerCell_(maxPerCell),
- keypoints_(keypoints), image_(image), mask_(mask), detector_(detector)
-#ifdef HAVE_TBB
- , kptLock_(kptLock)
-#endif
+ GridAdaptedFeatureDetectorInvoker(const Ptr<FeatureDetector>& detector, const Mat& image, const Mat& mask,
+ vector<KeyPoint>& keypoints, int maxPerCell, int gridRows, int gridCols,
+ cv::Mutex* kptLock)
+ : gridRows_(gridRows), gridCols_(gridCols), maxPerCell_(maxPerCell),
+ keypoints_(keypoints), image_(image), mask_(mask), detector_(detector),
+ kptLock_(kptLock)
{
}
- void operator() (const BlockedRange& range) const
+ void operator() (const Range& range) const
{
- for (int i = range.begin(); i < range.end(); ++i)
+ for (int i = range.start; i < range.end; ++i)
{
int celly = i / gridCols_;
int cellx = i - celly * gridCols_;
it->pt.x += col_range.start;
it->pt.y += row_range.start;
}
-#ifdef HAVE_TBB
- tbb::mutex::scoped_lock join_keypoints(*kptLock_);
-#endif
+
+ cv::AutoLock join_keypoints(*kptLock_);
keypoints_.insert( keypoints_.end(), sub_keypoints.begin(), sub_keypoints.end() );
}
}
keypoints.reserve(maxTotalKeypoints);
int maxPerCell = maxTotalKeypoints / (gridRows * gridCols);
-#ifdef HAVE_TBB
- tbb::mutex kptLock;
- cv::parallel_for(cv::BlockedRange(0, gridRows * gridCols),
+ cv::Mutex kptLock;
+ cv::parallel_for_(cv::Range(0, gridRows * gridCols),
GridAdaptedFeatureDetectorInvoker(detector, image, mask, keypoints, maxPerCell, gridRows, gridCols, &kptLock));
-#else
- GridAdaptedFeatureDetectorInvoker(detector, image, mask, keypoints, maxPerCell, gridRows, gridCols)(cv::BlockedRange(0, gridRows * gridCols));
-#endif
}
/*
ocv_cuda_compile(cuda_objs ${lib_cuda} ${ncv_cuda})
- set(cuda_link_libs ${CUDA_LIBRARIES} ${CUDA_npp_LIBRARY})
+ set(cuda_link_libs ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY} ${CUDA_npp_LIBRARY})
if(WITH_NVCUVID)
set(cuda_link_libs ${cuda_link_libs} ${CUDA_nvcuvid_LIBRARY})
return dst;
}
- __device__ __forceinline__ RGB2RGB()
- : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
-
- __device__ __forceinline__ RGB2RGB(const RGB2RGB& other_)
- :unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
+ __host__ __device__ __forceinline__ RGB2RGB() {}
+ __host__ __device__ __forceinline__ RGB2RGB(const RGB2RGB&) {}
};
template <> struct RGB2RGB<uchar, 4, 4, 2> : unary_function<uint, uint>
return dst;
}
- __device__ __forceinline__ RGB2RGB():unary_function<uint, uint>(){}
- __device__ __forceinline__ RGB2RGB(const RGB2RGB& other_):unary_function<uint, uint>(){}
+ __host__ __device__ __forceinline__ RGB2RGB() {}
+ __host__ __device__ __forceinline__ RGB2RGB(const RGB2RGB&) {}
};
}
return RGB2RGB5x5Converter<green_bits, bidx>::cvt(src);
}
- __device__ __forceinline__ RGB2RGB5x5():unary_function<uchar3, ushort>(){}
- __device__ __forceinline__ RGB2RGB5x5(const RGB2RGB5x5& other_):unary_function<uchar3, ushort>(){}
+ __host__ __device__ __forceinline__ RGB2RGB5x5() {}
+ __host__ __device__ __forceinline__ RGB2RGB5x5(const RGB2RGB5x5&) {}
};
template<int bidx, int green_bits> struct RGB2RGB5x5<4, bidx,green_bits> : unary_function<uint, ushort>
return RGB2RGB5x5Converter<green_bits, bidx>::cvt(src);
}
- __device__ __forceinline__ RGB2RGB5x5():unary_function<uint, ushort>(){}
- __device__ __forceinline__ RGB2RGB5x5(const RGB2RGB5x5& other_):unary_function<uint, ushort>(){}
+ __host__ __device__ __forceinline__ RGB2RGB5x5() {}
+ __host__ __device__ __forceinline__ RGB2RGB5x5(const RGB2RGB5x5&) {}
};
}
RGB5x52RGBConverter<green_bits, bidx>::cvt(src, dst);
return dst;
}
- __device__ __forceinline__ RGB5x52RGB():unary_function<ushort, uchar3>(){}
- __device__ __forceinline__ RGB5x52RGB(const RGB5x52RGB& other_):unary_function<ushort, uchar3>(){}
+ __host__ __device__ __forceinline__ RGB5x52RGB() {}
+ __host__ __device__ __forceinline__ RGB5x52RGB(const RGB5x52RGB&) {}
};
RGB5x52RGBConverter<green_bits, bidx>::cvt(src, dst);
return dst;
}
- __device__ __forceinline__ RGB5x52RGB():unary_function<ushort, uint>(){}
- __device__ __forceinline__ RGB5x52RGB(const RGB5x52RGB& other_):unary_function<ushort, uint>(){}
+ __host__ __device__ __forceinline__ RGB5x52RGB() {}
+ __host__ __device__ __forceinline__ RGB5x52RGB(const RGB5x52RGB&) {}
};
}
return dst;
}
- __device__ __forceinline__ Gray2RGB():unary_function<T, typename TypeVec<T, dcn>::vec_type>(){}
- __device__ __forceinline__ Gray2RGB(const Gray2RGB& other_)
- : unary_function<T, typename TypeVec<T, dcn>::vec_type>(){}
+ __host__ __device__ __forceinline__ Gray2RGB() {}
+ __host__ __device__ __forceinline__ Gray2RGB(const Gray2RGB&) {}
};
template <> struct Gray2RGB<uchar, 4> : unary_function<uchar, uint>
return dst;
}
- __device__ __forceinline__ Gray2RGB():unary_function<uchar, uint>(){}
- __device__ __forceinline__ Gray2RGB(const Gray2RGB& other_):unary_function<uchar, uint>(){}
+ __host__ __device__ __forceinline__ Gray2RGB() {}
+ __host__ __device__ __forceinline__ Gray2RGB(const Gray2RGB&) {}
};
}
return Gray2RGB5x5Converter<green_bits>::cvt(src);
}
- __device__ __forceinline__ Gray2RGB5x5():unary_function<uchar, ushort>(){}
- __device__ __forceinline__ Gray2RGB5x5(const Gray2RGB5x5& other_):unary_function<uchar, ushort>(){}
+ __host__ __device__ __forceinline__ Gray2RGB5x5() {}
+ __host__ __device__ __forceinline__ Gray2RGB5x5(const Gray2RGB5x5&) {}
};
}
{
return RGB5x52GrayConverter<green_bits>::cvt(src);
}
- __device__ __forceinline__ RGB5x52Gray() : unary_function<ushort, uchar>(){}
- __device__ __forceinline__ RGB5x52Gray(const RGB5x52Gray& other_) : unary_function<ushort, uchar>(){}
+ __host__ __device__ __forceinline__ RGB5x52Gray() {}
+ __host__ __device__ __forceinline__ RGB5x52Gray(const RGB5x52Gray&) {}
};
}
{
return RGB2GrayConvert<bidx>(&src.x);
}
- __device__ __forceinline__ RGB2Gray() : unary_function<typename TypeVec<T, scn>::vec_type, T>(){}
- __device__ __forceinline__ RGB2Gray(const RGB2Gray& other_)
- : unary_function<typename TypeVec<T, scn>::vec_type, T>(){}
+ __host__ __device__ __forceinline__ RGB2Gray() {}
+ __host__ __device__ __forceinline__ RGB2Gray(const RGB2Gray&) {}
};
template <int bidx> struct RGB2Gray<uchar, 4, bidx> : unary_function<uint, uchar>
{
return RGB2GrayConvert<bidx>(src);
}
- __device__ __forceinline__ RGB2Gray() : unary_function<uint, uchar>(){}
- __device__ __forceinline__ RGB2Gray(const RGB2Gray& other_) : unary_function<uint, uchar>(){}
+ __host__ __device__ __forceinline__ RGB2Gray() {}
+ __host__ __device__ __forceinline__ RGB2Gray(const RGB2Gray&) {}
};
}
RGB2YUVConvert<bidx>(&src.x, dst);
return dst;
}
- __device__ __forceinline__ RGB2YUV()
- : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
- __device__ __forceinline__ RGB2YUV(const RGB2YUV& other_)
- : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
+ __host__ __device__ __forceinline__ RGB2YUV() {}
+ __host__ __device__ __forceinline__ RGB2YUV(const RGB2YUV&) {}
};
}
return dst;
}
- __device__ __forceinline__ YUV2RGB()
- : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
- __device__ __forceinline__ YUV2RGB(const YUV2RGB& other_)
- : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
+ __host__ __device__ __forceinline__ YUV2RGB() {}
+ __host__ __device__ __forceinline__ YUV2RGB(const YUV2RGB&) {}
};
template <int bidx> struct YUV2RGB<uchar, 4, 4, bidx> : unary_function<uint, uint>
{
return YUV2RGBConvert<bidx>(src);
}
- __device__ __forceinline__ YUV2RGB() : unary_function<uint, uint>(){}
- __device__ __forceinline__ YUV2RGB(const YUV2RGB& other_) : unary_function<uint, uint>(){}
+ __host__ __device__ __forceinline__ YUV2RGB() {}
+ __host__ __device__ __forceinline__ YUV2RGB(const YUV2RGB&) {}
};
}
RGB2YCrCbConvert<bidx>(&src.x, dst);
return dst;
}
- __device__ __forceinline__ RGB2YCrCb()
- : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
- __device__ __forceinline__ RGB2YCrCb(const RGB2YCrCb& other_)
- : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
+ __host__ __device__ __forceinline__ RGB2YCrCb() {}
+ __host__ __device__ __forceinline__ RGB2YCrCb(const RGB2YCrCb&) {}
};
template <int bidx> struct RGB2YCrCb<uchar, 4, 4, bidx> : unary_function<uint, uint>
return RGB2YCrCbConvert<bidx>(src);
}
- __device__ __forceinline__ RGB2YCrCb() : unary_function<uint, uint>(){}
- __device__ __forceinline__ RGB2YCrCb(const RGB2YCrCb& other_) : unary_function<uint, uint>(){}
+ __host__ __device__ __forceinline__ RGB2YCrCb() {}
+ __host__ __device__ __forceinline__ RGB2YCrCb(const RGB2YCrCb&) {}
};
}
return dst;
}
- __device__ __forceinline__ YCrCb2RGB()
- : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
- __device__ __forceinline__ YCrCb2RGB(const YCrCb2RGB& other_)
- : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
+ __host__ __device__ __forceinline__ YCrCb2RGB() {}
+ __host__ __device__ __forceinline__ YCrCb2RGB(const YCrCb2RGB&) {}
};
template <int bidx> struct YCrCb2RGB<uchar, 4, 4, bidx> : unary_function<uint, uint>
{
return YCrCb2RGBConvert<bidx>(src);
}
- __device__ __forceinline__ YCrCb2RGB() : unary_function<uint, uint>(){}
- __device__ __forceinline__ YCrCb2RGB(const YCrCb2RGB& other_) : unary_function<uint, uint>(){}
+ __host__ __device__ __forceinline__ YCrCb2RGB() {}
+ __host__ __device__ __forceinline__ YCrCb2RGB(const YCrCb2RGB&) {}
};
}
return dst;
}
- __device__ __forceinline__ RGB2XYZ()
- : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
- __device__ __forceinline__ RGB2XYZ(const RGB2XYZ& other_)
- : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
+ __host__ __device__ __forceinline__ RGB2XYZ() {}
+ __host__ __device__ __forceinline__ RGB2XYZ(const RGB2XYZ&) {}
};
template <int bidx> struct RGB2XYZ<uchar, 4, 4, bidx> : unary_function<uint, uint>
{
return RGB2XYZConvert<bidx>(src);
}
- __device__ __forceinline__ RGB2XYZ() : unary_function<uint, uint>(){}
- __device__ __forceinline__ RGB2XYZ(const RGB2XYZ& other_) : unary_function<uint, uint>(){}
+ __host__ __device__ __forceinline__ RGB2XYZ() {}
+ __host__ __device__ __forceinline__ RGB2XYZ(const RGB2XYZ&) {}
};
}
return dst;
}
- __device__ __forceinline__ XYZ2RGB()
- : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
- __device__ __forceinline__ XYZ2RGB(const XYZ2RGB& other_)
- : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
+ __host__ __device__ __forceinline__ XYZ2RGB() {}
+ __host__ __device__ __forceinline__ XYZ2RGB(const XYZ2RGB&) {}
};
template <int bidx> struct XYZ2RGB<uchar, 4, 4, bidx> : unary_function<uint, uint>
{
return XYZ2RGBConvert<bidx>(src);
}
- __device__ __forceinline__ XYZ2RGB() : unary_function<uint, uint>(){}
- __device__ __forceinline__ XYZ2RGB(const XYZ2RGB& other_) : unary_function<uint, uint>(){}
+ __host__ __device__ __forceinline__ XYZ2RGB() {}
+ __host__ __device__ __forceinline__ XYZ2RGB(const XYZ2RGB&) {}
};
}
return dst;
}
- __device__ __forceinline__ RGB2HSV()
- : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
- __device__ __forceinline__ RGB2HSV(const RGB2HSV& other_)
- : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
+ __host__ __device__ __forceinline__ RGB2HSV() {}
+ __host__ __device__ __forceinline__ RGB2HSV(const RGB2HSV&) {}
};
template <int bidx, int hr> struct RGB2HSV<uchar, 4, 4, bidx, hr> : unary_function<uint, uint>
{
return RGB2HSVConvert<bidx, hr>(src);
}
- __device__ __forceinline__ RGB2HSV():unary_function<uint, uint>(){}
- __device__ __forceinline__ RGB2HSV(const RGB2HSV& other_):unary_function<uint, uint>(){}
+ __host__ __device__ __forceinline__ RGB2HSV() {}
+ __host__ __device__ __forceinline__ RGB2HSV(const RGB2HSV&) {}
};
}
return dst;
}
- __device__ __forceinline__ HSV2RGB()
- : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
- __device__ __forceinline__ HSV2RGB(const HSV2RGB& other_)
- : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
+ __host__ __device__ __forceinline__ HSV2RGB() {}
+ __host__ __device__ __forceinline__ HSV2RGB(const HSV2RGB&) {}
};
template <int bidx, int hr> struct HSV2RGB<uchar, 4, 4, bidx, hr> : unary_function<uint, uint>
{
return HSV2RGBConvert<bidx, hr>(src);
}
- __device__ __forceinline__ HSV2RGB():unary_function<uint, uint>(){}
- __device__ __forceinline__ HSV2RGB(const HSV2RGB& other_):unary_function<uint, uint>(){}
+ __host__ __device__ __forceinline__ HSV2RGB() {}
+ __host__ __device__ __forceinline__ HSV2RGB(const HSV2RGB&) {}
};
}
return dst;
}
- __device__ __forceinline__ RGB2HLS()
- : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
- __device__ __forceinline__ RGB2HLS(const RGB2HLS& other_)
- : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
+ __host__ __device__ __forceinline__ RGB2HLS() {}
+ __host__ __device__ __forceinline__ RGB2HLS(const RGB2HLS&) {}
};
template <int bidx, int hr> struct RGB2HLS<uchar, 4, 4, bidx, hr> : unary_function<uint, uint>
{
return RGB2HLSConvert<bidx, hr>(src);
}
- __device__ __forceinline__ RGB2HLS() : unary_function<uint, uint>(){}
- __device__ __forceinline__ RGB2HLS(const RGB2HLS& other_) : unary_function<uint, uint>(){}
+ __host__ __device__ __forceinline__ RGB2HLS() {}
+ __host__ __device__ __forceinline__ RGB2HLS(const RGB2HLS&) {}
};
}
return dst;
}
- __device__ __forceinline__ HLS2RGB()
- : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
- __device__ __forceinline__ HLS2RGB(const HLS2RGB& other_)
- : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
+ __host__ __device__ __forceinline__ HLS2RGB() {}
+ __host__ __device__ __forceinline__ HLS2RGB(const HLS2RGB&) {}
};
template <int bidx, int hr> struct HLS2RGB<uchar, 4, 4, bidx, hr> : unary_function<uint, uint>
{
return HLS2RGBConvert<bidx, hr>(src);
}
- __device__ __forceinline__ HLS2RGB() : unary_function<uint, uint>(){}
- __device__ __forceinline__ HLS2RGB(const HLS2RGB& other_) : unary_function<uint, uint>(){}
+ __host__ __device__ __forceinline__ HLS2RGB() {}
+ __host__ __device__ __forceinline__ HLS2RGB(const HLS2RGB&) {}
};
}
return dst;
}
- __device__ __forceinline__ RGB2Lab() {}
- __device__ __forceinline__ RGB2Lab(const RGB2Lab& other_) {}
+ __host__ __device__ __forceinline__ RGB2Lab() {}
+ __host__ __device__ __forceinline__ RGB2Lab(const RGB2Lab&) {}
};
template <int scn, int dcn, bool srgb, int blueIdx>
struct RGB2Lab<float, scn, dcn, srgb, blueIdx>
return dst;
}
- __device__ __forceinline__ RGB2Lab() {}
- __device__ __forceinline__ RGB2Lab(const RGB2Lab& other_) {}
+ __host__ __device__ __forceinline__ RGB2Lab() {}
+ __host__ __device__ __forceinline__ RGB2Lab(const RGB2Lab&) {}
};
}
return dst;
}
- __device__ __forceinline__ Lab2RGB() {}
- __device__ __forceinline__ Lab2RGB(const Lab2RGB& other_) {}
+ __host__ __device__ __forceinline__ Lab2RGB() {}
+ __host__ __device__ __forceinline__ Lab2RGB(const Lab2RGB&) {}
};
template <int scn, int dcn, bool srgb, int blueIdx>
struct Lab2RGB<float, scn, dcn, srgb, blueIdx>
return dst;
}
- __device__ __forceinline__ Lab2RGB() {}
- __device__ __forceinline__ Lab2RGB(const Lab2RGB& other_) {}
+ __host__ __device__ __forceinline__ Lab2RGB() {}
+ __host__ __device__ __forceinline__ Lab2RGB(const Lab2RGB&) {}
};
}
return dst;
}
- __device__ __forceinline__ RGB2Luv() {}
- __device__ __forceinline__ RGB2Luv(const RGB2Luv& other_) {}
+ __host__ __device__ __forceinline__ RGB2Luv() {}
+ __host__ __device__ __forceinline__ RGB2Luv(const RGB2Luv&) {}
};
template <int scn, int dcn, bool srgb, int blueIdx>
struct RGB2Luv<float, scn, dcn, srgb, blueIdx>
return dst;
}
- __device__ __forceinline__ RGB2Luv() {}
- __device__ __forceinline__ RGB2Luv(const RGB2Luv& other_) {}
+ __host__ __device__ __forceinline__ RGB2Luv() {}
+ __host__ __device__ __forceinline__ RGB2Luv(const RGB2Luv&) {}
};
}
return dst;
}
- __device__ __forceinline__ Luv2RGB() {}
- __device__ __forceinline__ Luv2RGB(const Luv2RGB& other_) {}
+ __host__ __device__ __forceinline__ Luv2RGB() {}
+ __host__ __device__ __forceinline__ Luv2RGB(const Luv2RGB&) {}
};
template <int scn, int dcn, bool srgb, int blueIdx>
struct Luv2RGB<float, scn, dcn, srgb, blueIdx>
return dst;
}
- __device__ __forceinline__ Luv2RGB() {}
- __device__ __forceinline__ Luv2RGB(const Luv2RGB& other_) {}
+ __host__ __device__ __forceinline__ Luv2RGB() {}
+ __host__ __device__ __forceinline__ Luv2RGB(const Luv2RGB&) {}
};
}
{
return a + b;
}
- __device__ __forceinline__ plus(const plus& other):binary_function<T,T,T>(){}
- __device__ __forceinline__ plus():binary_function<T,T,T>(){}
+ __host__ __device__ __forceinline__ plus() {}
+ __host__ __device__ __forceinline__ plus(const plus&) {}
};
template <typename T> struct minus : binary_function<T, T, T>
{
return a - b;
}
- __device__ __forceinline__ minus(const minus& other):binary_function<T,T,T>(){}
- __device__ __forceinline__ minus():binary_function<T,T,T>(){}
+ __host__ __device__ __forceinline__ minus() {}
+ __host__ __device__ __forceinline__ minus(const minus&) {}
};
template <typename T> struct multiplies : binary_function<T, T, T>
{
return a * b;
}
- __device__ __forceinline__ multiplies(const multiplies& other):binary_function<T,T,T>(){}
- __device__ __forceinline__ multiplies():binary_function<T,T,T>(){}
+ __host__ __device__ __forceinline__ multiplies() {}
+ __host__ __device__ __forceinline__ multiplies(const multiplies&) {}
};
template <typename T> struct divides : binary_function<T, T, T>
{
return a / b;
}
- __device__ __forceinline__ divides(const divides& other):binary_function<T,T,T>(){}
- __device__ __forceinline__ divides():binary_function<T,T,T>(){}
+ __host__ __device__ __forceinline__ divides() {}
+ __host__ __device__ __forceinline__ divides(const divides&) {}
};
template <typename T> struct modulus : binary_function<T, T, T>
{
return a % b;
}
- __device__ __forceinline__ modulus(const modulus& other):binary_function<T,T,T>(){}
- __device__ __forceinline__ modulus():binary_function<T,T,T>(){}
+ __host__ __device__ __forceinline__ modulus() {}
+ __host__ __device__ __forceinline__ modulus(const modulus&) {}
};
template <typename T> struct negate : unary_function<T, T>
{
return -a;
}
- __device__ __forceinline__ negate(const negate& other):unary_function<T,T>(){}
- __device__ __forceinline__ negate():unary_function<T,T>(){}
+ __host__ __device__ __forceinline__ negate() {}
+ __host__ __device__ __forceinline__ negate(const negate&) {}
};
// Comparison Operations
{
return a == b;
}
- __device__ __forceinline__ equal_to(const equal_to& other):binary_function<T,T,bool>(){}
- __device__ __forceinline__ equal_to():binary_function<T,T,bool>(){}
+ __host__ __device__ __forceinline__ equal_to() {}
+ __host__ __device__ __forceinline__ equal_to(const equal_to&) {}
};
template <typename T> struct not_equal_to : binary_function<T, T, bool>
{
return a != b;
}
- __device__ __forceinline__ not_equal_to(const not_equal_to& other):binary_function<T,T,bool>(){}
- __device__ __forceinline__ not_equal_to():binary_function<T,T,bool>(){}
+ __host__ __device__ __forceinline__ not_equal_to() {}
+ __host__ __device__ __forceinline__ not_equal_to(const not_equal_to&) {}
};
template <typename T> struct greater : binary_function<T, T, bool>
{
return a > b;
}
- __device__ __forceinline__ greater(const greater& other):binary_function<T,T,bool>(){}
- __device__ __forceinline__ greater():binary_function<T,T,bool>(){}
+ __host__ __device__ __forceinline__ greater() {}
+ __host__ __device__ __forceinline__ greater(const greater&) {}
};
template <typename T> struct less : binary_function<T, T, bool>
{
return a < b;
}
- __device__ __forceinline__ less(const less& other):binary_function<T,T,bool>(){}
- __device__ __forceinline__ less():binary_function<T,T,bool>(){}
+ __host__ __device__ __forceinline__ less() {}
+ __host__ __device__ __forceinline__ less(const less&) {}
};
template <typename T> struct greater_equal : binary_function<T, T, bool>
{
return a >= b;
}
- __device__ __forceinline__ greater_equal(const greater_equal& other):binary_function<T,T,bool>(){}
- __device__ __forceinline__ greater_equal():binary_function<T,T,bool>(){}
+ __host__ __device__ __forceinline__ greater_equal() {}
+ __host__ __device__ __forceinline__ greater_equal(const greater_equal&) {}
};
template <typename T> struct less_equal : binary_function<T, T, bool>
{
return a <= b;
}
- __device__ __forceinline__ less_equal(const less_equal& other):binary_function<T,T,bool>(){}
- __device__ __forceinline__ less_equal():binary_function<T,T,bool>(){}
+ __host__ __device__ __forceinline__ less_equal() {}
+ __host__ __device__ __forceinline__ less_equal(const less_equal&) {}
};
// Logical Operations
{
return a && b;
}
- __device__ __forceinline__ logical_and(const logical_and& other):binary_function<T,T,bool>(){}
- __device__ __forceinline__ logical_and():binary_function<T,T,bool>(){}
+ __host__ __device__ __forceinline__ logical_and() {}
+ __host__ __device__ __forceinline__ logical_and(const logical_and&) {}
};
template <typename T> struct logical_or : binary_function<T, T, bool>
{
return a || b;
}
- __device__ __forceinline__ logical_or(const logical_or& other):binary_function<T,T,bool>(){}
- __device__ __forceinline__ logical_or():binary_function<T,T,bool>(){}
+ __host__ __device__ __forceinline__ logical_or() {}
+ __host__ __device__ __forceinline__ logical_or(const logical_or&) {}
};
template <typename T> struct logical_not : unary_function<T, bool>
{
return !a;
}
- __device__ __forceinline__ logical_not(const logical_not& other):unary_function<T,bool>(){}
- __device__ __forceinline__ logical_not():unary_function<T,bool>(){}
+ __host__ __device__ __forceinline__ logical_not() {}
+ __host__ __device__ __forceinline__ logical_not(const logical_not&) {}
};
// Bitwise Operations
{
return a & b;
}
- __device__ __forceinline__ bit_and(const bit_and& other):binary_function<T,T,T>(){}
- __device__ __forceinline__ bit_and():binary_function<T,T,T>(){}
+ __host__ __device__ __forceinline__ bit_and() {}
+ __host__ __device__ __forceinline__ bit_and(const bit_and&) {}
};
template <typename T> struct bit_or : binary_function<T, T, T>
{
return a | b;
}
- __device__ __forceinline__ bit_or(const bit_or& other):binary_function<T,T,T>(){}
- __device__ __forceinline__ bit_or():binary_function<T,T,T>(){}
+ __host__ __device__ __forceinline__ bit_or() {}
+ __host__ __device__ __forceinline__ bit_or(const bit_or&) {}
};
template <typename T> struct bit_xor : binary_function<T, T, T>
{
return a ^ b;
}
- __device__ __forceinline__ bit_xor(const bit_xor& other):binary_function<T,T,T>(){}
- __device__ __forceinline__ bit_xor():binary_function<T,T,T>(){}
+ __host__ __device__ __forceinline__ bit_xor() {}
+ __host__ __device__ __forceinline__ bit_xor(const bit_xor&) {}
};
template <typename T> struct bit_not : unary_function<T, T>
{
return ~v;
}
- __device__ __forceinline__ bit_not(const bit_not& other):unary_function<T,T>(){}
- __device__ __forceinline__ bit_not():unary_function<T,T>(){}
+ __host__ __device__ __forceinline__ bit_not() {}
+ __host__ __device__ __forceinline__ bit_not(const bit_not&) {}
};
// Generalized Identity Operations
{
return x;
}
- __device__ __forceinline__ identity(const identity& other):unary_function<T,T>(){}
- __device__ __forceinline__ identity():unary_function<T,T>(){}
+ __host__ __device__ __forceinline__ identity() {}
+ __host__ __device__ __forceinline__ identity(const identity&) {}
};
template <typename T1, typename T2> struct project1st : binary_function<T1, T2, T1>
{
return lhs;
}
- __device__ __forceinline__ project1st(const project1st& other):binary_function<T1,T2,T1>(){}
- __device__ __forceinline__ project1st():binary_function<T1,T2,T1>(){}
+ __host__ __device__ __forceinline__ project1st() {}
+ __host__ __device__ __forceinline__ project1st(const project1st&) {}
};
template <typename T1, typename T2> struct project2nd : binary_function<T1, T2, T2>
{
return rhs;
}
- __device__ __forceinline__ project2nd(const project2nd& other):binary_function<T1,T2,T2>(){}
- __device__ __forceinline__ project2nd():binary_function<T1,T2,T2>(){}
+ __host__ __device__ __forceinline__ project2nd() {}
+ __host__ __device__ __forceinline__ project2nd(const project2nd&) {}
};
// Min/Max Operations
template <> struct name<type> : binary_function<type, type, type> \
{ \
__device__ __forceinline__ type operator()(type lhs, type rhs) const {return op(lhs, rhs);} \
- __device__ __forceinline__ name() {}\
- __device__ __forceinline__ name(const name&) {}\
+ __host__ __device__ __forceinline__ name() {}\
+ __host__ __device__ __forceinline__ name(const name&) {}\
};
template <typename T> struct maximum : binary_function<T, T, T>
{
return max(lhs, rhs);
}
- __device__ __forceinline__ maximum() {}
- __device__ __forceinline__ maximum(const maximum&) {}
+ __host__ __device__ __forceinline__ maximum() {}
+ __host__ __device__ __forceinline__ maximum(const maximum&) {}
};
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, uchar, ::max)
{
return min(lhs, rhs);
}
- __device__ __forceinline__ minimum() {}
- __device__ __forceinline__ minimum(const minimum&) {}
+ __host__ __device__ __forceinline__ minimum() {}
+ __host__ __device__ __forceinline__ minimum(const minimum&) {}
};
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, uchar, ::min)
#undef OPENCV_GPU_IMPLEMENT_MINMAX
// Math functions
-///bound=========================================
template <typename T> struct abs_func : unary_function<T, T>
{
return abs(x);
}
- __device__ __forceinline__ abs_func() {}
- __device__ __forceinline__ abs_func(const abs_func&) {}
+ __host__ __device__ __forceinline__ abs_func() {}
+ __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
};
template <> struct abs_func<unsigned char> : unary_function<unsigned char, unsigned char>
{
return x;
}
- __device__ __forceinline__ abs_func() {}
- __device__ __forceinline__ abs_func(const abs_func&) {}
+ __host__ __device__ __forceinline__ abs_func() {}
+ __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
};
template <> struct abs_func<signed char> : unary_function<signed char, signed char>
{
return ::abs((int)x);
}
- __device__ __forceinline__ abs_func() {}
- __device__ __forceinline__ abs_func(const abs_func&) {}
+ __host__ __device__ __forceinline__ abs_func() {}
+ __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
};
template <> struct abs_func<char> : unary_function<char, char>
{
return ::abs((int)x);
}
- __device__ __forceinline__ abs_func() {}
- __device__ __forceinline__ abs_func(const abs_func&) {}
+ __host__ __device__ __forceinline__ abs_func() {}
+ __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
};
template <> struct abs_func<unsigned short> : unary_function<unsigned short, unsigned short>
{
return x;
}
- __device__ __forceinline__ abs_func() {}
- __device__ __forceinline__ abs_func(const abs_func&) {}
+ __host__ __device__ __forceinline__ abs_func() {}
+ __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
};
template <> struct abs_func<short> : unary_function<short, short>
{
return ::abs((int)x);
}
- __device__ __forceinline__ abs_func() {}
- __device__ __forceinline__ abs_func(const abs_func&) {}
+ __host__ __device__ __forceinline__ abs_func() {}
+ __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
};
template <> struct abs_func<unsigned int> : unary_function<unsigned int, unsigned int>
{
return x;
}
- __device__ __forceinline__ abs_func() {}
- __device__ __forceinline__ abs_func(const abs_func&) {}
+ __host__ __device__ __forceinline__ abs_func() {}
+ __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
};
template <> struct abs_func<int> : unary_function<int, int>
{
return ::abs(x);
}
- __device__ __forceinline__ abs_func() {}
- __device__ __forceinline__ abs_func(const abs_func&) {}
+ __host__ __device__ __forceinline__ abs_func() {}
+ __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
};
template <> struct abs_func<float> : unary_function<float, float>
{
return ::fabsf(x);
}
- __device__ __forceinline__ abs_func() {}
- __device__ __forceinline__ abs_func(const abs_func&) {}
+ __host__ __device__ __forceinline__ abs_func() {}
+ __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
};
template <> struct abs_func<double> : unary_function<double, double>
{
return ::fabs(x);
}
- __device__ __forceinline__ abs_func() {}
- __device__ __forceinline__ abs_func(const abs_func&) {}
+ __host__ __device__ __forceinline__ abs_func() {}
+ __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
};
#define OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(name, func) \
{ \
return func ## f(v); \
} \
- __device__ __forceinline__ name ## _func() {} \
- __device__ __forceinline__ name ## _func(const name ## _func&) {} \
+ __host__ __device__ __forceinline__ name ## _func() {} \
+ __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
}; \
template <> struct name ## _func<double> : unary_function<double, double> \
{ \
{ \
return func(v); \
} \
- __device__ __forceinline__ name ## _func() {} \
- __device__ __forceinline__ name ## _func(const name ## _func&) {} \
+ __host__ __device__ __forceinline__ name ## _func() {} \
+ __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
};
#define OPENCV_GPU_IMPLEMENT_BIN_FUNCTOR(name, func) \
{ \
return func ## f(v1, v2); \
} \
+ __host__ __device__ __forceinline__ name ## _func() {} \
+ __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
}; \
template <> struct name ## _func<double> : binary_function<double, double, double> \
{ \
{ \
return func(v1, v2); \
} \
+ __host__ __device__ __forceinline__ name ## _func() {} \
+ __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
};
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(sqrt, ::sqrt)
{
return src1 * src1 + src2 * src2;
}
- __device__ __forceinline__ hypot_sqr_func(const hypot_sqr_func& other) : binary_function<T, T, float>(){}
- __device__ __forceinline__ hypot_sqr_func() : binary_function<T, T, float>(){}
+ __host__ __device__ __forceinline__ hypot_sqr_func() {}
+ __host__ __device__ __forceinline__ hypot_sqr_func(const hypot_sqr_func&) {}
};
// Saturate Cast Functor
{
return saturate_cast<D>(v);
}
- __device__ __forceinline__ saturate_cast_func(const saturate_cast_func& other):unary_function<T, D>(){}
- __device__ __forceinline__ saturate_cast_func():unary_function<T, D>(){}
+ __host__ __device__ __forceinline__ saturate_cast_func() {}
+ __host__ __device__ __forceinline__ saturate_cast_func(const saturate_cast_func&) {}
};
// Threshold Functors
return (src > thresh) * maxVal;
}
- __device__ __forceinline__ thresh_binary_func(const thresh_binary_func& other)
- : unary_function<T, T>(), thresh(other.thresh), maxVal(other.maxVal){}
-
- __device__ __forceinline__ thresh_binary_func():unary_function<T, T>(){}
+ __host__ __device__ __forceinline__ thresh_binary_func() {}
+ __host__ __device__ __forceinline__ thresh_binary_func(const thresh_binary_func& other)
+ : thresh(other.thresh), maxVal(other.maxVal) {}
const T thresh;
const T maxVal;
return (src <= thresh) * maxVal;
}
- __device__ __forceinline__ thresh_binary_inv_func(const thresh_binary_inv_func& other)
- : unary_function<T, T>(), thresh(other.thresh), maxVal(other.maxVal){}
-
- __device__ __forceinline__ thresh_binary_inv_func():unary_function<T, T>(){}
+ __host__ __device__ __forceinline__ thresh_binary_inv_func() {}
+ __host__ __device__ __forceinline__ thresh_binary_inv_func(const thresh_binary_inv_func& other)
+ : thresh(other.thresh), maxVal(other.maxVal) {}
const T thresh;
const T maxVal;
return minimum<T>()(src, thresh);
}
- __device__ __forceinline__ thresh_trunc_func(const thresh_trunc_func& other)
- : unary_function<T, T>(), thresh(other.thresh){}
-
- __device__ __forceinline__ thresh_trunc_func():unary_function<T, T>(){}
+ __host__ __device__ __forceinline__ thresh_trunc_func() {}
+ __host__ __device__ __forceinline__ thresh_trunc_func(const thresh_trunc_func& other)
+ : thresh(other.thresh) {}
const T thresh;
};
{
return (src > thresh) * src;
}
- __device__ __forceinline__ thresh_to_zero_func(const thresh_to_zero_func& other)
- : unary_function<T, T>(), thresh(other.thresh){}
- __device__ __forceinline__ thresh_to_zero_func():unary_function<T, T>(){}
+ __host__ __device__ __forceinline__ thresh_to_zero_func() {}
+ __host__ __device__ __forceinline__ thresh_to_zero_func(const thresh_to_zero_func& other)
+ : thresh(other.thresh) {}
const T thresh;
};
{
return (src <= thresh) * src;
}
- __device__ __forceinline__ thresh_to_zero_inv_func(const thresh_to_zero_inv_func& other)
- : unary_function<T, T>(), thresh(other.thresh){}
- __device__ __forceinline__ thresh_to_zero_inv_func():unary_function<T, T>(){}
+ __host__ __device__ __forceinline__ thresh_to_zero_inv_func() {}
+ __host__ __device__ __forceinline__ thresh_to_zero_inv_func(const thresh_to_zero_inv_func& other)
+ : thresh(other.thresh) {}
const T thresh;
};
-//bound!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ============>
+
// Function Object Adaptors
template <typename Predicate> struct unary_negate : unary_function<typename Predicate::argument_type, bool>
{
return !pred(x);
}
- __device__ __forceinline__ unary_negate(const unary_negate& other) : unary_function<typename Predicate::argument_type, bool>(){}
- __device__ __forceinline__ unary_negate() : unary_function<typename Predicate::argument_type, bool>(){}
+ __host__ __device__ __forceinline__ unary_negate() {}
+ __host__ __device__ __forceinline__ unary_negate(const unary_negate& other) : pred(other.pred) {}
const Predicate pred;
};
{
return !pred(x,y);
}
- __device__ __forceinline__ binary_negate(const binary_negate& other)
- : binary_function<typename Predicate::first_argument_type, typename Predicate::second_argument_type, bool>(){}
- __device__ __forceinline__ binary_negate() :
- binary_function<typename Predicate::first_argument_type, typename Predicate::second_argument_type, bool>(){}
+ __host__ __device__ __forceinline__ binary_negate() {}
+ __host__ __device__ __forceinline__ binary_negate(const binary_negate& other) : pred(other.pred) {}
const Predicate pred;
};
return op(arg1, a);
}
- __device__ __forceinline__ binder1st(const binder1st& other) :
- unary_function<typename Op::second_argument_type, typename Op::result_type>(){}
+ __host__ __device__ __forceinline__ binder1st() {}
+ __host__ __device__ __forceinline__ binder1st(const binder1st& other) : op(other.op), arg1(other.arg1) {}
const Op op;
const typename Op::first_argument_type arg1;
return op(a, arg2);
}
- __device__ __forceinline__ binder2nd(const binder2nd& other) :
- unary_function<typename Op::first_argument_type, typename Op::result_type>(), op(other.op), arg2(other.arg2){}
+ __host__ __device__ __forceinline__ binder2nd() {}
+ __host__ __device__ __forceinline__ binder2nd(const binder2nd& other) : op(other.op), arg2(other.arg2) {}
const Op op;
const typename Op::second_argument_type arg2;
struct WithOutMask
{
- __device__ __forceinline__ WithOutMask(){}
- __device__ __forceinline__ WithOutMask(const WithOutMask& mask){}
+ __host__ __device__ __forceinline__ WithOutMask(){}
+ __host__ __device__ __forceinline__ WithOutMask(const WithOutMask&){}
__device__ __forceinline__ void next() const
{
}
// Computes rotation, translation pair for small subsets if the input data
- class TransformHypothesesGenerator
+ class TransformHypothesesGenerator : public ParallelLoopBody
{
public:
TransformHypothesesGenerator(const Mat& object_, const Mat& image_, const Mat& dist_coef_,
num_points(num_points_), subset_size(subset_size_), rot_matrices(rot_matrices_),
transl_vectors(transl_vectors_) {}
- void operator()(const BlockedRange& range) const
+ void operator()(const Range& range) const
{
// Input data for generation of the current hypothesis
vector<int> subset_indices(subset_size);
Mat rot_mat(3, 3, CV_64F);
Mat transl_vec(1, 3, CV_64F);
- for (int iter = range.begin(); iter < range.end(); ++iter)
+ for (int iter = range.start; iter < range.end; ++iter)
{
selectRandom(subset_size, num_points, subset_indices);
for (int i = 0; i < subset_size; ++i)
// Generate set of hypotheses using small subsets of the input data
TransformHypothesesGenerator body(object, image_normalized, empty_dist_coef, eye_camera_mat,
num_points, subset_size, rot_matrices, transl_vectors);
- parallel_for(BlockedRange(0, num_iters), body);
+ parallel_for_(Range(0, num_iters), body);
// Compute scores (i.e. number of inliers) for each hypothesis
GpuMat d_object(object);
crot1.x * p.x + crot1.y * p.y + crot1.z * p.z + ctransl.y,
crot2.x * p.x + crot2.y * p.y + crot2.z * p.z + ctransl.z);
}
- __device__ __forceinline__ TransformOp() {}
- __device__ __forceinline__ TransformOp(const TransformOp&) {}
+ __host__ __device__ __forceinline__ TransformOp() {}
+ __host__ __device__ __forceinline__ TransformOp(const TransformOp&) {}
};
void call(const PtrStepSz<float3> src, const float* rot,
(cproj0.x * t.x + cproj0.y * t.y) / t.z + cproj0.z,
(cproj1.x * t.x + cproj1.y * t.y) / t.z + cproj1.z);
}
- __device__ __forceinline__ ProjectOp() {}
- __device__ __forceinline__ ProjectOp(const ProjectOp&) {}
+ __host__ __device__ __forceinline__ ProjectOp() {}
+ __host__ __device__ __forceinline__ ProjectOp(const ProjectOp&) {}
};
void call(const PtrStepSz<float3> src, const float* rot,
return ::abs(x) + ::abs(y);
}
- __device__ __forceinline__ L1() {}
- __device__ __forceinline__ L1(const L1&) {}
+ __host__ __device__ __forceinline__ L1() {}
+ __host__ __device__ __forceinline__ L1(const L1&) {}
};
struct L2 : binary_function<int, int, float>
{
return ::sqrtf(x * x + y * y);
}
- __device__ __forceinline__ L2() {}
- __device__ __forceinline__ L2(const L2&) {}
+ __host__ __device__ __forceinline__ L2() {}
+ __host__ __device__ __forceinline__ L2(const L2&) {}
};
}
return (uchar)(-(e >> 1));
}
- __device__ __forceinline__ GetEdges() {}
- __device__ __forceinline__ GetEdges(const GetEdges&) {}
+ __host__ __device__ __forceinline__ GetEdges() {}
+ __host__ __device__ __forceinline__ GetEdges(const GetEdges&) {}
};
}
return vadd4(a, b);
}
- __device__ __forceinline__ VAdd4() {}
- __device__ __forceinline__ VAdd4(const VAdd4& other) {}
+ __host__ __device__ __forceinline__ VAdd4() {}
+ __host__ __device__ __forceinline__ VAdd4(const VAdd4&) {}
};
////////////////////////////////////
return vadd2(a, b);
}
- __device__ __forceinline__ VAdd2() {}
- __device__ __forceinline__ VAdd2(const VAdd2& other) {}
+ __host__ __device__ __forceinline__ VAdd2() {}
+ __host__ __device__ __forceinline__ VAdd2(const VAdd2&) {}
};
////////////////////////////////////
return saturate_cast<D>(a + b);
}
- __device__ __forceinline__ AddMat() {}
- __device__ __forceinline__ AddMat(const AddMat& other) {}
+ __host__ __device__ __forceinline__ AddMat() {}
+ __host__ __device__ __forceinline__ AddMat(const AddMat&) {}
};
}
return vsub4(a, b);
}
- __device__ __forceinline__ VSub4() {}
- __device__ __forceinline__ VSub4(const VSub4& other) {}
+ __host__ __device__ __forceinline__ VSub4() {}
+ __host__ __device__ __forceinline__ VSub4(const VSub4&) {}
};
////////////////////////////////////
return vsub2(a, b);
}
- __device__ __forceinline__ VSub2() {}
- __device__ __forceinline__ VSub2(const VSub2& other) {}
+ __host__ __device__ __forceinline__ VSub2() {}
+ __host__ __device__ __forceinline__ VSub2(const VSub2&) {}
};
////////////////////////////////////
return saturate_cast<D>(a - b);
}
- __device__ __forceinline__ SubMat() {}
- __device__ __forceinline__ SubMat(const SubMat& other) {}
+ __host__ __device__ __forceinline__ SubMat() {}
+ __host__ __device__ __forceinline__ SubMat(const SubMat&) {}
};
}
return res;
}
- __device__ __forceinline__ Mul_8uc4_32f() {}
- __device__ __forceinline__ Mul_8uc4_32f(const Mul_8uc4_32f& other) {}
+ __host__ __device__ __forceinline__ Mul_8uc4_32f() {}
+ __host__ __device__ __forceinline__ Mul_8uc4_32f(const Mul_8uc4_32f&) {}
};
struct Mul_16sc4_32f : binary_function<short4, float, short4>
saturate_cast<short>(a.z * b), saturate_cast<short>(a.w * b));
}
- __device__ __forceinline__ Mul_16sc4_32f() {}
- __device__ __forceinline__ Mul_16sc4_32f(const Mul_16sc4_32f& other) {}
+ __host__ __device__ __forceinline__ Mul_16sc4_32f() {}
+ __host__ __device__ __forceinline__ Mul_16sc4_32f(const Mul_16sc4_32f&) {}
};
template <typename T, typename D> struct Mul : binary_function<T, T, D>
return saturate_cast<D>(a * b);
}
- __device__ __forceinline__ Mul() {}
- __device__ __forceinline__ Mul(const Mul& other) {}
+ __host__ __device__ __forceinline__ Mul() {}
+ __host__ __device__ __forceinline__ Mul(const Mul&) {}
};
template <typename T, typename S, typename D> struct MulScale : binary_function<T, T, D>
return b != 0 ? saturate_cast<D>(a / b) : 0;
}
- __device__ __forceinline__ Div() {}
- __device__ __forceinline__ Div(const Div& other) {}
+ __host__ __device__ __forceinline__ Div() {}
+ __host__ __device__ __forceinline__ Div(const Div&) {}
};
template <typename T> struct Div<T, float> : binary_function<T, T, float>
{
return b != 0 ? static_cast<float>(a) / b : 0;
}
- __device__ __forceinline__ Div() {}
- __device__ __forceinline__ Div(const Div& other) {}
+ __host__ __device__ __forceinline__ Div() {}
+ __host__ __device__ __forceinline__ Div(const Div&) {}
};
template <typename T> struct Div<T, double> : binary_function<T, T, double>
{
return b != 0 ? static_cast<double>(a) / b : 0;
}
- __device__ __forceinline__ Div() {}
- __device__ __forceinline__ Div(const Div& other) {}
+ __host__ __device__ __forceinline__ Div() {}
+ __host__ __device__ __forceinline__ Div(const Div&) {}
};
template <typename T, typename S, typename D> struct DivScale : binary_function<T, T, D>
return vabsdiff4(a, b);
}
- __device__ __forceinline__ VAbsDiff4() {}
- __device__ __forceinline__ VAbsDiff4(const VAbsDiff4& other) {}
+ __host__ __device__ __forceinline__ VAbsDiff4() {}
+ __host__ __device__ __forceinline__ VAbsDiff4(const VAbsDiff4&) {}
};
////////////////////////////////////
return vabsdiff2(a, b);
}
- __device__ __forceinline__ VAbsDiff2() {}
- __device__ __forceinline__ VAbsDiff2(const VAbsDiff2& other) {}
+ __host__ __device__ __forceinline__ VAbsDiff2() {}
+ __host__ __device__ __forceinline__ VAbsDiff2(const VAbsDiff2&) {}
};
////////////////////////////////////
return saturate_cast<T>(_abs(a - b));
}
- __device__ __forceinline__ AbsDiffMat() {}
- __device__ __forceinline__ AbsDiffMat(const AbsDiffMat& other) {}
+ __host__ __device__ __forceinline__ AbsDiffMat() {}
+ __host__ __device__ __forceinline__ AbsDiffMat(const AbsDiffMat&) {}
};
}
return saturate_cast<T>(x * x);
}
- __device__ __forceinline__ Sqr() {}
- __device__ __forceinline__ Sqr(const Sqr& other) {}
+ __host__ __device__ __forceinline__ Sqr() {}
+ __host__ __device__ __forceinline__ Sqr(const Sqr&) {}
};
}
return saturate_cast<T>(f(x));
}
- __device__ __forceinline__ Exp() {}
- __device__ __forceinline__ Exp(const Exp& other) {}
+ __host__ __device__ __forceinline__ Exp() {}
+ __host__ __device__ __forceinline__ Exp(const Exp&) {}
};
}
return vcmpeq4(a, b);
}
- __device__ __forceinline__ VCmpEq4() {}
- __device__ __forceinline__ VCmpEq4(const VCmpEq4& other) {}
+ __host__ __device__ __forceinline__ VCmpEq4() {}
+ __host__ __device__ __forceinline__ VCmpEq4(const VCmpEq4&) {}
};
struct VCmpNe4 : binary_function<uint, uint, uint>
{
return vcmpne4(a, b);
}
- __device__ __forceinline__ VCmpNe4() {}
- __device__ __forceinline__ VCmpNe4(const VCmpNe4& other) {}
+ __host__ __device__ __forceinline__ VCmpNe4() {}
+ __host__ __device__ __forceinline__ VCmpNe4(const VCmpNe4&) {}
};
struct VCmpLt4 : binary_function<uint, uint, uint>
{
return vcmplt4(a, b);
}
- __device__ __forceinline__ VCmpLt4() {}
- __device__ __forceinline__ VCmpLt4(const VCmpLt4& other) {}
+ __host__ __device__ __forceinline__ VCmpLt4() {}
+ __host__ __device__ __forceinline__ VCmpLt4(const VCmpLt4&) {}
};
struct VCmpLe4 : binary_function<uint, uint, uint>
{
return vcmple4(a, b);
}
- __device__ __forceinline__ VCmpLe4() {}
- __device__ __forceinline__ VCmpLe4(const VCmpLe4& other) {}
+ __host__ __device__ __forceinline__ VCmpLe4() {}
+ __host__ __device__ __forceinline__ VCmpLe4(const VCmpLe4&) {}
};
////////////////////////////////////
return vmin4(a, b);
}
- __device__ __forceinline__ VMin4() {}
- __device__ __forceinline__ VMin4(const VMin4& other) {}
+ __host__ __device__ __forceinline__ VMin4() {}
+ __host__ __device__ __forceinline__ VMin4(const VMin4&) {}
};
////////////////////////////////////
return vmin2(a, b);
}
- __device__ __forceinline__ VMin2() {}
- __device__ __forceinline__ VMin2(const VMin2& other) {}
+ __host__ __device__ __forceinline__ VMin2() {}
+ __host__ __device__ __forceinline__ VMin2(const VMin2&) {}
};
}
return vmax4(a, b);
}
- __device__ __forceinline__ VMax4() {}
- __device__ __forceinline__ VMax4(const VMax4& other) {}
+ __host__ __device__ __forceinline__ VMax4() {}
+ __host__ __device__ __forceinline__ VMax4(const VMax4&) {}
};
////////////////////////////////////
return vmax2(a, b);
}
- __device__ __forceinline__ VMax2() {}
- __device__ __forceinline__ VMax2(const VMax2& other) {}
+ __host__ __device__ __forceinline__ VMax2() {}
+ __host__ __device__ __forceinline__ VMax2(const VMax2&) {}
};
}
const ErrorEntry npp_errors [] =
{
- error_entry( NPP_NOT_SUPPORTED_MODE_ERROR ),
- error_entry( NPP_ROUND_MODE_NOT_SUPPORTED_ERROR ),
- error_entry( NPP_RESIZE_NO_OPERATION_ERROR ),
-
#if defined (_MSC_VER)
error_entry( NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY ),
#endif
+#if NPP_VERSION < 5500
error_entry( NPP_BAD_ARG_ERROR ),
- error_entry( NPP_LUT_NUMBER_OF_LEVELS_ERROR ),
- error_entry( NPP_TEXTURE_BIND_ERROR ),
error_entry( NPP_COEFF_ERROR ),
error_entry( NPP_RECT_ERROR ),
error_entry( NPP_QUAD_ERROR ),
- error_entry( NPP_WRONG_INTERSECTION_ROI_ERROR ),
- error_entry( NPP_NOT_EVEN_STEP_ERROR ),
- error_entry( NPP_INTERPOLATION_ERROR ),
- error_entry( NPP_RESIZE_FACTOR_ERROR ),
- error_entry( NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR ),
error_entry( NPP_MEMFREE_ERR ),
error_entry( NPP_MEMSET_ERR ),
- error_entry( NPP_MEMCPY_ERROR ),
error_entry( NPP_MEM_ALLOC_ERR ),
error_entry( NPP_HISTO_NUMBER_OF_LEVELS_ERROR ),
error_entry( NPP_MIRROR_FLIP_ERR ),
error_entry( NPP_INVALID_INPUT ),
+ error_entry( NPP_POINTER_ERROR ),
+ error_entry( NPP_WARNING ),
+ error_entry( NPP_ODD_ROI_WARNING ),
+#else
+ error_entry( NPP_INVALID_HOST_POINTER_ERROR ),
+ error_entry( NPP_INVALID_DEVICE_POINTER_ERROR ),
+ error_entry( NPP_LUT_PALETTE_BITSIZE_ERROR ),
+ error_entry( NPP_ZC_MODE_NOT_SUPPORTED_ERROR ),
+ error_entry( NPP_MEMFREE_ERROR ),
+ error_entry( NPP_MEMSET_ERROR ),
+ error_entry( NPP_QUALITY_INDEX_ERROR ),
+ error_entry( NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR ),
+ error_entry( NPP_CHANNEL_ORDER_ERROR ),
+ error_entry( NPP_ZERO_MASK_VALUE_ERROR ),
+ error_entry( NPP_QUADRANGLE_ERROR ),
+ error_entry( NPP_RECTANGLE_ERROR ),
+ error_entry( NPP_COEFFICIENT_ERROR ),
+ error_entry( NPP_NUMBER_OF_CHANNELS_ERROR ),
+ error_entry( NPP_COI_ERROR ),
+ error_entry( NPP_DIVISOR_ERROR ),
+ error_entry( NPP_CHANNEL_ERROR ),
+ error_entry( NPP_STRIDE_ERROR ),
+ error_entry( NPP_ANCHOR_ERROR ),
+ error_entry( NPP_MASK_SIZE_ERROR ),
+ error_entry( NPP_MIRROR_FLIP_ERROR ),
+ error_entry( NPP_MOMENT_00_ZERO_ERROR ),
+ error_entry( NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR ),
+ error_entry( NPP_THRESHOLD_ERROR ),
+ error_entry( NPP_CONTEXT_MATCH_ERROR ),
+ error_entry( NPP_FFT_FLAG_ERROR ),
+ error_entry( NPP_FFT_ORDER_ERROR ),
+ error_entry( NPP_SCALE_RANGE_ERROR ),
+ error_entry( NPP_DATA_TYPE_ERROR ),
+ error_entry( NPP_OUT_OFF_RANGE_ERROR ),
+ error_entry( NPP_DIVIDE_BY_ZERO_ERROR ),
+ error_entry( NPP_MEMORY_ALLOCATION_ERR ),
+ error_entry( NPP_RANGE_ERROR ),
+ error_entry( NPP_BAD_ARGUMENT_ERROR ),
+ error_entry( NPP_NO_MEMORY_ERROR ),
+ error_entry( NPP_ERROR_RESERVED ),
+ error_entry( NPP_NO_OPERATION_WARNING ),
+ error_entry( NPP_DIVIDE_BY_ZERO_WARNING ),
+ error_entry( NPP_WRONG_INTERSECTION_ROI_WARNING ),
+#endif
+
+ error_entry( NPP_NOT_SUPPORTED_MODE_ERROR ),
+ error_entry( NPP_ROUND_MODE_NOT_SUPPORTED_ERROR ),
+ error_entry( NPP_RESIZE_NO_OPERATION_ERROR ),
+ error_entry( NPP_LUT_NUMBER_OF_LEVELS_ERROR ),
+ error_entry( NPP_TEXTURE_BIND_ERROR ),
+ error_entry( NPP_WRONG_INTERSECTION_ROI_ERROR ),
+ error_entry( NPP_NOT_EVEN_STEP_ERROR ),
+ error_entry( NPP_INTERPOLATION_ERROR ),
+ error_entry( NPP_RESIZE_FACTOR_ERROR ),
+ error_entry( NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR ),
+ error_entry( NPP_MEMCPY_ERROR ),
error_entry( NPP_ALIGNMENT_ERROR ),
error_entry( NPP_STEP_ERROR ),
error_entry( NPP_SIZE_ERROR ),
- error_entry( NPP_POINTER_ERROR ),
error_entry( NPP_NULL_POINTER_ERROR ),
error_entry( NPP_CUDA_KERNEL_EXECUTION_ERROR ),
error_entry( NPP_NOT_IMPLEMENTED_ERROR ),
error_entry( NPP_ERROR ),
error_entry( NPP_NO_ERROR ),
error_entry( NPP_SUCCESS ),
- error_entry( NPP_WARNING ),
error_entry( NPP_WRONG_INTERSECTION_QUAD_WARNING ),
error_entry( NPP_MISALIGNED_DST_ROI_WARNING ),
error_entry( NPP_AFFINE_QUAD_INCORRECT_WARNING ),
- error_entry( NPP_DOUBLE_SIZE_WARNING ),
- error_entry( NPP_ODD_ROI_WARNING )
+ error_entry( NPP_DOUBLE_SIZE_WARNING )
};
const size_t npp_error_num = sizeof(npp_errors) / sizeof(npp_errors[0]);
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
CV_Assert(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2);
- typedef NppStatus (*npp_norm_diff_func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2,
- NppiSize oSizeROI, Npp64f* pRetVal);
+#if CUDA_VERSION < 5050
+ typedef NppStatus (*func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2, NppiSize oSizeROI, Npp64f* pRetVal);
- static const npp_norm_diff_func_t npp_norm_diff_func[] = {nppiNormDiff_Inf_8u_C1R, nppiNormDiff_L1_8u_C1R, nppiNormDiff_L2_8u_C1R};
+ static const func_t funcs[] = {nppiNormDiff_Inf_8u_C1R, nppiNormDiff_L1_8u_C1R, nppiNormDiff_L2_8u_C1R};
+#else
+ typedef NppStatus (*func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2,
+ NppiSize oSizeROI, Npp64f* pRetVal, Npp8u * pDeviceBuffer);
+
+ typedef NppStatus (*buf_size_func_t)(NppiSize oSizeROI, int* hpBufferSize);
+
+ static const func_t funcs[] = {nppiNormDiff_Inf_8u_C1R, nppiNormDiff_L1_8u_C1R, nppiNormDiff_L2_8u_C1R};
+
+ static const buf_size_func_t buf_size_funcs[] = {nppiNormDiffInfGetBufferHostSize_8u_C1R, nppiNormDiffL1GetBufferHostSize_8u_C1R, nppiNormDiffL2GetBufferHostSize_8u_C1R};
+#endif
NppiSize sz;
sz.width = src1.cols;
DeviceBuffer dbuf;
- nppSafeCall( npp_norm_diff_func[funcIdx](src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step), sz, dbuf) );
+#if CUDA_VERSION < 5050
+ nppSafeCall( funcs[funcIdx](src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step), sz, dbuf) );
+#else
+ int bufSize;
+ buf_size_funcs[funcIdx](sz, &bufSize);
+
+ GpuMat buf(1, bufSize, CV_8UC1);
+
+ nppSafeCall( funcs[funcIdx](src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step), sz, dbuf, buf.data) );
+#endif
cudaSafeCall( cudaDeviceSynchronize() );
#define CUDART_MINIMUM_REQUIRED_VERSION 4010
#define NPP_MINIMUM_REQUIRED_VERSION 4100
+ #define NPP_VERSION (NPP_VERSION_MAJOR * 1000 + NPP_VERSION_MINOR * 100 + NPP_VERSION_BUILD)
+
#if (CUDART_VERSION < CUDART_MINIMUM_REQUIRED_VERSION)
#error "Insufficient Cuda Runtime library version, please update it."
#endif
- #if (NPP_VERSION_MAJOR * 1000 + NPP_VERSION_MINOR * 100 + NPP_VERSION_BUILD < NPP_MINIMUM_REQUIRED_VERSION)
+ #if (NPP_VERSION < NPP_MINIMUM_REQUIRED_VERSION)
#error "Insufficient NPP version, please update it."
#endif
cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0));
cv::add(mat, val, dst_gold, cv::noArray(), depth.second);
- EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
+ EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 1.0);
}
}
cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0));
cv::add(mat, val, dst_gold, mask, depth.second);
- EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
+ EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 1.0);
}
}
cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0));
cv::subtract(mat, val, dst_gold, cv::noArray(), depth.second);
- EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
+ EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 1.0);
}
}
cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0));
cv::subtract(mat, val, dst_gold, mask, depth.second);
- EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
+ EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 1.0);
}
}
cv::Mat dst_gold = cv::min(src, val);
- EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+ EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 1.0 : 1e-5);
}
}
cv::Mat dst_gold = cv::max(src, val);
- EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+ EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 1.0 : 1e-5);
}
}
The function can do the following transformations:
*
+ RGB :math:`\leftrightarrow` GRAY ( ``CV_BGR2GRAY, CV_RGB2GRAY, CV_GRAY2BGR, CV_GRAY2RGB`` )
Transformations within RGB space like adding/removing the alpha channel, reversing the channel order, conversion to/from 16-bit RGB color (R5:G6:B5 or R5:G5:B5), as well as conversion to/from grayscale using:
.. math::
* **GC_PR_BGD** defines a possible background pixel.
- * **GC_PR_BGD** defines a possible foreground pixel.
+ * **GC_PR_FGD** defines a possible foreground pixel.
:param rect: ROI containing a segmented object. The pixels outside of the ROI are marked as "obvious background". The parameter is only used when ``mode==GC_INIT_WITH_RECT`` .
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, NVIDIA Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the copyright holders or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+// ----------------------------------------------------------------------
+// CLAHE
+
+namespace
+{
+ class CLAHE_CalcLut_Body : public cv::ParallelLoopBody
+ {
+ public:
+ CLAHE_CalcLut_Body(const cv::Mat& src, cv::Mat& lut, cv::Size tileSize, int tilesX, int tilesY, int clipLimit, float lutScale) :
+ src_(src), lut_(lut), tileSize_(tileSize), tilesX_(tilesX), tilesY_(tilesY), clipLimit_(clipLimit), lutScale_(lutScale)
+ {
+ }
+
+ void operator ()(const cv::Range& range) const;
+
+ private:
+ cv::Mat src_;
+ mutable cv::Mat lut_;
+
+ cv::Size tileSize_;
+ int tilesX_;
+ int tilesY_;
+ int clipLimit_;
+ float lutScale_;
+ };
+
+ void CLAHE_CalcLut_Body::operator ()(const cv::Range& range) const
+ {
+ const int histSize = 256;
+
+ uchar* tileLut = lut_.ptr(range.start);
+ const size_t lut_step = lut_.step;
+
+ for (int k = range.start; k < range.end; ++k, tileLut += lut_step)
+ {
+ const int ty = k / tilesX_;
+ const int tx = k % tilesX_;
+
+ // retrieve tile submatrix
+
+ cv::Rect tileROI;
+ tileROI.x = tx * tileSize_.width;
+ tileROI.y = ty * tileSize_.height;
+ tileROI.width = tileSize_.width;
+ tileROI.height = tileSize_.height;
+
+ const cv::Mat tile = src_(tileROI);
+
+ // calc histogram
+
+ int tileHist[histSize] = {0, };
+
+ int height = tileROI.height;
+ const size_t sstep = tile.step;
+ for (const uchar* ptr = tile.ptr<uchar>(0); height--; ptr += sstep)
+ {
+ int x = 0;
+ for (; x <= tileROI.width - 4; x += 4)
+ {
+ int t0 = ptr[x], t1 = ptr[x+1];
+ tileHist[t0]++; tileHist[t1]++;
+ t0 = ptr[x+2]; t1 = ptr[x+3];
+ tileHist[t0]++; tileHist[t1]++;
+ }
+
+ for (; x < tileROI.width; ++x)
+ tileHist[ptr[x]]++;
+ }
+
+ // clip histogram
+
+ if (clipLimit_ > 0)
+ {
+ // how many pixels were clipped
+ int clipped = 0;
+ for (int i = 0; i < histSize; ++i)
+ {
+ if (tileHist[i] > clipLimit_)
+ {
+ clipped += tileHist[i] - clipLimit_;
+ tileHist[i] = clipLimit_;
+ }
+ }
+
+ // redistribute clipped pixels
+ int redistBatch = clipped / histSize;
+ int residual = clipped - redistBatch * histSize;
+
+ for (int i = 0; i < histSize; ++i)
+ tileHist[i] += redistBatch;
+
+ for (int i = 0; i < residual; ++i)
+ tileHist[i]++;
+ }
+
+ // calc Lut
+
+ int sum = 0;
+ for (int i = 0; i < histSize; ++i)
+ {
+ sum += tileHist[i];
+ tileLut[i] = cv::saturate_cast<uchar>(sum * lutScale_);
+ }
+ }
+ }
+
+ class CLAHE_Interpolation_Body : public cv::ParallelLoopBody
+ {
+ public:
+ CLAHE_Interpolation_Body(const cv::Mat& src, cv::Mat& dst, const cv::Mat& lut, cv::Size tileSize, int tilesX, int tilesY) :
+ src_(src), dst_(dst), lut_(lut), tileSize_(tileSize), tilesX_(tilesX), tilesY_(tilesY)
+ {
+ }
+
+ void operator ()(const cv::Range& range) const;
+
+ private:
+ cv::Mat src_;
+ mutable cv::Mat dst_;
+ cv::Mat lut_;
+
+ cv::Size tileSize_;
+ int tilesX_;
+ int tilesY_;
+ };
+
+ void CLAHE_Interpolation_Body::operator ()(const cv::Range& range) const
+ {
+ const size_t lut_step = lut_.step;
+
+ for (int y = range.start; y < range.end; ++y)
+ {
+ const uchar* srcRow = src_.ptr<uchar>(y);
+ uchar* dstRow = dst_.ptr<uchar>(y);
+
+ const float tyf = (static_cast<float>(y) / tileSize_.height) - 0.5f;
+
+ int ty1 = cvFloor(tyf);
+ int ty2 = ty1 + 1;
+
+ const float ya = tyf - ty1;
+
+ ty1 = std::max(ty1, 0);
+ ty2 = std::min(ty2, tilesY_ - 1);
+
+ const uchar* lutPlane1 = lut_.ptr(ty1 * tilesX_);
+ const uchar* lutPlane2 = lut_.ptr(ty2 * tilesX_);
+
+ for (int x = 0; x < src_.cols; ++x)
+ {
+ const float txf = (static_cast<float>(x) / tileSize_.width) - 0.5f;
+
+ int tx1 = cvFloor(txf);
+ int tx2 = tx1 + 1;
+
+ const float xa = txf - tx1;
+
+ tx1 = std::max(tx1, 0);
+ tx2 = std::min(tx2, tilesX_ - 1);
+
+ const int srcVal = srcRow[x];
+
+ const size_t ind1 = tx1 * lut_step + srcVal;
+ const size_t ind2 = tx2 * lut_step + srcVal;
+
+ float res = 0;
+
+ res += lutPlane1[ind1] * ((1.0f - xa) * (1.0f - ya));
+ res += lutPlane1[ind2] * ((xa) * (1.0f - ya));
+ res += lutPlane2[ind1] * ((1.0f - xa) * (ya));
+ res += lutPlane2[ind2] * ((xa) * (ya));
+
+ dstRow[x] = cv::saturate_cast<uchar>(res);
+ }
+ }
+ }
+
+ class CLAHE_Impl : public cv::CLAHE
+ {
+ public:
+ CLAHE_Impl(double clipLimit = 40.0, int tilesX = 8, int tilesY = 8);
+
+ cv::AlgorithmInfo* info() const;
+
+ void apply(cv::InputArray src, cv::OutputArray dst);
+
+ void setClipLimit(double clipLimit);
+ double getClipLimit() const;
+
+ void setTilesGridSize(cv::Size tileGridSize);
+ cv::Size getTilesGridSize() const;
+
+ void collectGarbage();
+
+ private:
+ double clipLimit_;
+ int tilesX_;
+ int tilesY_;
+
+ cv::Mat srcExt_;
+ cv::Mat lut_;
+ };
+
+ CLAHE_Impl::CLAHE_Impl(double clipLimit, int tilesX, int tilesY) :
+ clipLimit_(clipLimit), tilesX_(tilesX), tilesY_(tilesY)
+ {
+ }
+
+ CV_INIT_ALGORITHM(CLAHE_Impl, "CLAHE",
+ obj.info()->addParam(obj, "clipLimit", obj.clipLimit_);
+ obj.info()->addParam(obj, "tilesX", obj.tilesX_);
+ obj.info()->addParam(obj, "tilesY", obj.tilesY_))
+
+ void CLAHE_Impl::apply(cv::InputArray _src, cv::OutputArray _dst)
+ {
+ cv::Mat src = _src.getMat();
+
+ CV_Assert( src.type() == CV_8UC1 );
+
+ _dst.create( src.size(), src.type() );
+ cv::Mat dst = _dst.getMat();
+
+ const int histSize = 256;
+
+ lut_.create(tilesX_ * tilesY_, histSize, CV_8UC1);
+
+ cv::Size tileSize;
+ cv::Mat srcForLut;
+
+ if (src.cols % tilesX_ == 0 && src.rows % tilesY_ == 0)
+ {
+ tileSize = cv::Size(src.cols / tilesX_, src.rows / tilesY_);
+ srcForLut = src;
+ }
+ else
+ {
+ cv::copyMakeBorder(src, srcExt_, 0, tilesY_ - (src.rows % tilesY_), 0, tilesX_ - (src.cols % tilesX_), cv::BORDER_REFLECT_101);
+
+ tileSize = cv::Size(srcExt_.cols / tilesX_, srcExt_.rows / tilesY_);
+ srcForLut = srcExt_;
+ }
+
+ const int tileSizeTotal = tileSize.area();
+ const float lutScale = static_cast<float>(histSize - 1) / tileSizeTotal;
+
+ int clipLimit = 0;
+ if (clipLimit_ > 0.0)
+ {
+ clipLimit = static_cast<int>(clipLimit_ * tileSizeTotal / histSize);
+ clipLimit = std::max(clipLimit, 1);
+ }
+
+ CLAHE_CalcLut_Body calcLutBody(srcForLut, lut_, tileSize, tilesX_, tilesY_, clipLimit, lutScale);
+ cv::parallel_for_(cv::Range(0, tilesX_ * tilesY_), calcLutBody);
+
+ CLAHE_Interpolation_Body interpolationBody(src, dst, lut_, tileSize, tilesX_, tilesY_);
+ cv::parallel_for_(cv::Range(0, src.rows), interpolationBody);
+ }
+
+ void CLAHE_Impl::setClipLimit(double clipLimit)
+ {
+ clipLimit_ = clipLimit;
+ }
+
+ double CLAHE_Impl::getClipLimit() const
+ {
+ return clipLimit_;
+ }
+
+ void CLAHE_Impl::setTilesGridSize(cv::Size tileGridSize)
+ {
+ tilesX_ = tileGridSize.width;
+ tilesY_ = tileGridSize.height;
+ }
+
+ cv::Size CLAHE_Impl::getTilesGridSize() const
+ {
+ return cv::Size(tilesX_, tilesY_);
+ }
+
+ void CLAHE_Impl::collectGarbage()
+ {
+ srcExt_.release();
+ lut_.release();
+ }
+}
+
+cv::Ptr<cv::CLAHE> cv::createCLAHE(double clipLimit, cv::Size tileGridSize)
+{
+ return new CLAHE_Impl(clipLimit, tileGridSize.width, tileGridSize.height);
+}
const int ITUR_BT_601_CBV = -74448;
template<int bIdx, int uIdx>
-struct YUV420sp2RGB888Invoker
+struct YUV420sp2RGB888Invoker : ParallelLoopBody
{
Mat* dst;
const uchar* my1, *muv;
YUV420sp2RGB888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _uv)
: dst(_dst), my1(_y1), muv(_uv), width(_dst->cols), stride(_stride) {}
- void operator()(const BlockedRange& range) const
+ void operator()(const Range& range) const
{
- int rangeBegin = range.begin() * 2;
- int rangeEnd = range.end() * 2;
+ int rangeBegin = range.start * 2;
+ int rangeEnd = range.end * 2;
//R = 1.164(Y - 16) + 1.596(V - 128)
//G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128)
};
template<int bIdx, int uIdx>
-struct YUV420sp2RGBA8888Invoker
+struct YUV420sp2RGBA8888Invoker : ParallelLoopBody
{
Mat* dst;
const uchar* my1, *muv;
YUV420sp2RGBA8888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _uv)
: dst(_dst), my1(_y1), muv(_uv), width(_dst->cols), stride(_stride) {}
- void operator()(const BlockedRange& range) const
+ void operator()(const Range& range) const
{
- int rangeBegin = range.begin() * 2;
- int rangeEnd = range.end() * 2;
+ int rangeBegin = range.start * 2;
+ int rangeEnd = range.end * 2;
//R = 1.164(Y - 16) + 1.596(V - 128)
//G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128)
};
template<int bIdx>
-struct YUV420p2RGB888Invoker
+struct YUV420p2RGB888Invoker : ParallelLoopBody
{
Mat* dst;
const uchar* my1, *mu, *mv;
YUV420p2RGB888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int _ustepIdx, int _vstepIdx)
: dst(_dst), my1(_y1), mu(_u), mv(_v), width(_dst->cols), stride(_stride), ustepIdx(_ustepIdx), vstepIdx(_vstepIdx) {}
- void operator()(const BlockedRange& range) const
+ void operator()(const Range& range) const
{
- const int rangeBegin = range.begin() * 2;
- const int rangeEnd = range.end() * 2;
+ const int rangeBegin = range.start * 2;
+ const int rangeEnd = range.end * 2;
size_t uvsteps[2] = {width/2, stride - width/2};
int usIdx = ustepIdx, vsIdx = vstepIdx;
const uchar* y1 = my1 + rangeBegin * stride;
- const uchar* u1 = mu + (range.begin() / 2) * stride;
- const uchar* v1 = mv + (range.begin() / 2) * stride;
+ const uchar* u1 = mu + (range.start / 2) * stride;
+ const uchar* v1 = mv + (range.start / 2) * stride;
- if(range.begin() % 2 == 1)
+ if(range.start % 2 == 1)
{
u1 += uvsteps[(usIdx++) & 1];
v1 += uvsteps[(vsIdx++) & 1];
};
template<int bIdx>
-struct YUV420p2RGBA8888Invoker
+struct YUV420p2RGBA8888Invoker : ParallelLoopBody
{
Mat* dst;
const uchar* my1, *mu, *mv;
YUV420p2RGBA8888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int _ustepIdx, int _vstepIdx)
: dst(_dst), my1(_y1), mu(_u), mv(_v), width(_dst->cols), stride(_stride), ustepIdx(_ustepIdx), vstepIdx(_vstepIdx) {}
- void operator()(const BlockedRange& range) const
+ void operator()(const Range& range) const
{
- int rangeBegin = range.begin() * 2;
- int rangeEnd = range.end() * 2;
+ int rangeBegin = range.start * 2;
+ int rangeEnd = range.end * 2;
size_t uvsteps[2] = {width/2, stride - width/2};
int usIdx = ustepIdx, vsIdx = vstepIdx;
const uchar* y1 = my1 + rangeBegin * stride;
- const uchar* u1 = mu + (range.begin() / 2) * stride;
- const uchar* v1 = mv + (range.begin() / 2) * stride;
+ const uchar* u1 = mu + (range.start / 2) * stride;
+ const uchar* v1 = mv + (range.start / 2) * stride;
- if(range.begin() % 2 == 1)
+ if(range.start % 2 == 1)
{
u1 += uvsteps[(usIdx++) & 1];
v1 += uvsteps[(vsIdx++) & 1];
inline void cvtYUV420sp2RGB(Mat& _dst, int _stride, const uchar* _y1, const uchar* _uv)
{
YUV420sp2RGB888Invoker<bIdx, uIdx> converter(&_dst, _stride, _y1, _uv);
-#ifdef HAVE_TBB
if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION)
- parallel_for(BlockedRange(0, _dst.rows/2), converter);
+ parallel_for_(Range(0, _dst.rows/2), converter);
else
-#endif
- converter(BlockedRange(0, _dst.rows/2));
+ converter(Range(0, _dst.rows/2));
}
template<int bIdx, int uIdx>
inline void cvtYUV420sp2RGBA(Mat& _dst, int _stride, const uchar* _y1, const uchar* _uv)
{
YUV420sp2RGBA8888Invoker<bIdx, uIdx> converter(&_dst, _stride, _y1, _uv);
-#ifdef HAVE_TBB
if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION)
- parallel_for(BlockedRange(0, _dst.rows/2), converter);
+ parallel_for_(Range(0, _dst.rows/2), converter);
else
-#endif
- converter(BlockedRange(0, _dst.rows/2));
+ converter(Range(0, _dst.rows/2));
}
template<int bIdx>
inline void cvtYUV420p2RGB(Mat& _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int ustepIdx, int vstepIdx)
{
YUV420p2RGB888Invoker<bIdx> converter(&_dst, _stride, _y1, _u, _v, ustepIdx, vstepIdx);
-#ifdef HAVE_TBB
if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION)
- parallel_for(BlockedRange(0, _dst.rows/2), converter);
+ parallel_for_(Range(0, _dst.rows/2), converter);
else
-#endif
- converter(BlockedRange(0, _dst.rows/2));
+ converter(Range(0, _dst.rows/2));
}
template<int bIdx>
inline void cvtYUV420p2RGBA(Mat& _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int ustepIdx, int vstepIdx)
{
YUV420p2RGBA8888Invoker<bIdx> converter(&_dst, _stride, _y1, _u, _v, ustepIdx, vstepIdx);
-#ifdef HAVE_TBB
if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION)
- parallel_for(BlockedRange(0, _dst.rows/2), converter);
+ parallel_for_(Range(0, _dst.rows/2), converter);
else
-#endif
- converter(BlockedRange(0, _dst.rows/2));
+ converter(Range(0, _dst.rows/2));
}
///////////////////////////////////// RGB -> YUV420p /////////////////////////////////////
///////////////////////////////////// YUV422 -> RGB /////////////////////////////////////
template<int bIdx, int uIdx, int yIdx>
-struct YUV422toRGB888Invoker
+struct YUV422toRGB888Invoker : ParallelLoopBody
{
Mat* dst;
const uchar* src;
YUV422toRGB888Invoker(Mat* _dst, int _stride, const uchar* _yuv)
: dst(_dst), src(_yuv), width(_dst->cols), stride(_stride) {}
- void operator()(const BlockedRange& range) const
+ void operator()(const Range& range) const
{
- int rangeBegin = range.begin();
- int rangeEnd = range.end();
+ int rangeBegin = range.start;
+ int rangeEnd = range.end;
const int uidx = 1 - yIdx + uIdx * 2;
const int vidx = (2 + uidx) % 4;
};
template<int bIdx, int uIdx, int yIdx>
-struct YUV422toRGBA8888Invoker
+struct YUV422toRGBA8888Invoker : ParallelLoopBody
{
Mat* dst;
const uchar* src;
YUV422toRGBA8888Invoker(Mat* _dst, int _stride, const uchar* _yuv)
: dst(_dst), src(_yuv), width(_dst->cols), stride(_stride) {}
- void operator()(const BlockedRange& range) const
+ void operator()(const Range& range) const
{
- int rangeBegin = range.begin();
- int rangeEnd = range.end();
+ int rangeBegin = range.start;
+ int rangeEnd = range.end;
const int uidx = 1 - yIdx + uIdx * 2;
const int vidx = (2 + uidx) % 4;
inline void cvtYUV422toRGB(Mat& _dst, int _stride, const uchar* _yuv)
{
YUV422toRGB888Invoker<bIdx, uIdx, yIdx> converter(&_dst, _stride, _yuv);
-#ifdef HAVE_TBB
if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV422_CONVERSION)
- parallel_for(BlockedRange(0, _dst.rows), converter);
+ parallel_for_(Range(0, _dst.rows), converter);
else
-#endif
- converter(BlockedRange(0, _dst.rows));
+ converter(Range(0, _dst.rows));
}
template<int bIdx, int uIdx, int yIdx>
inline void cvtYUV422toRGBA(Mat& _dst, int _stride, const uchar* _yuv)
{
YUV422toRGBA8888Invoker<bIdx, uIdx, yIdx> converter(&_dst, _stride, _yuv);
-#ifdef HAVE_TBB
if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV422_CONVERSION)
- parallel_for(BlockedRange(0, _dst.rows), converter);
+ parallel_for_(Range(0, _dst.rows), converter);
else
-#endif
- converter(BlockedRange(0, _dst.rows));
+ converter(Range(0, _dst.rows));
}
/////////////////////////// RGBA <-> mRGBA (alpha premultiplied) //////////////
namespace cv
{
-struct DTColumnInvoker
+struct DTColumnInvoker : ParallelLoopBody
{
DTColumnInvoker( const CvMat* _src, CvMat* _dst, const int* _sat_tab, const float* _sqr_tab)
{
sqr_tab = _sqr_tab;
}
- void operator()( const BlockedRange& range ) const
+ void operator()( const Range& range ) const
{
- int i, i1 = range.begin(), i2 = range.end();
+ int i, i1 = range.start, i2 = range.end;
int m = src->rows;
size_t sstep = src->step, dstep = dst->step/sizeof(float);
AutoBuffer<int> _d(m);
};
-struct DTRowInvoker
+struct DTRowInvoker : ParallelLoopBody
{
DTRowInvoker( CvMat* _dst, const float* _sqr_tab, const float* _inv_tab )
{
inv_tab = _inv_tab;
}
- void operator()( const BlockedRange& range ) const
+ void operator()( const Range& range ) const
{
const float inf = 1e15f;
- int i, i1 = range.begin(), i2 = range.end();
+ int i, i1 = range.start, i2 = range.end;
int n = dst->cols;
AutoBuffer<uchar> _buf((n+2)*2*sizeof(float) + (n+2)*sizeof(int));
float* f = (float*)(uchar*)_buf;
for( ; i <= m*3; i++ )
sat_tab[i] = i - shift;
- cv::parallel_for(cv::BlockedRange(0, n), cv::DTColumnInvoker(src, dst, sat_tab, sqr_tab));
+ cv::parallel_for_(cv::Range(0, n), cv::DTColumnInvoker(src, dst, sat_tab, sqr_tab));
// stage 2: compute modified distance transform for each row
float* inv_tab = sqr_tab + n;
sqr_tab[i] = (float)(i*i);
}
- cv::parallel_for(cv::BlockedRange(0, m), cv::DTRowInvoker(dst, sqr_tab, inv_tab));
+ cv::parallel_for_(cv::Range(0, m), cv::DTRowInvoker(dst, sqr_tab, inv_tab));
}
}
}
-class EqualizeHistCalcHist_Invoker
+class EqualizeHistCalcHist_Invoker : public cv::ParallelLoopBody
{
public:
enum {HIST_SZ = 256};
-#ifdef HAVE_TBB
- typedef tbb::mutex* MutextPtr;
-#else
- typedef void* MutextPtr;
-#endif
-
- EqualizeHistCalcHist_Invoker(cv::Mat& src, int* histogram, MutextPtr histogramLock)
+ EqualizeHistCalcHist_Invoker(cv::Mat& src, int* histogram, cv::Mutex* histogramLock)
: src_(src), globalHistogram_(histogram), histogramLock_(histogramLock)
{ }
- void operator()( const cv::BlockedRange& rowRange ) const
+ void operator()( const cv::Range& rowRange ) const
{
int localHistogram[HIST_SZ] = {0, };
const size_t sstep = src_.step;
int width = src_.cols;
- int height = rowRange.end() - rowRange.begin();
+ int height = rowRange.end - rowRange.start;
if (src_.isContinuous())
{
height = 1;
}
- for (const uchar* ptr = src_.ptr<uchar>(rowRange.begin()); height--; ptr += sstep)
+ for (const uchar* ptr = src_.ptr<uchar>(rowRange.start); height--; ptr += sstep)
{
int x = 0;
for (; x <= width - 4; x += 4)
localHistogram[ptr[x]]++;
}
-#ifdef HAVE_TBB
- tbb::mutex::scoped_lock lock(*histogramLock_);
-#endif
+ cv::AutoLock lock(*histogramLock_);
for( int i = 0; i < HIST_SZ; i++ )
globalHistogram_[i] += localHistogram[i];
static bool isWorthParallel( const cv::Mat& src )
{
-#ifdef HAVE_TBB
return ( src.total() >= 640*480 );
-#else
- (void)src;
- return false;
-#endif
}
private:
cv::Mat& src_;
int* globalHistogram_;
- MutextPtr histogramLock_;
+ cv::Mutex* histogramLock_;
};
-class EqualizeHistLut_Invoker
+class EqualizeHistLut_Invoker : public cv::ParallelLoopBody
{
public:
EqualizeHistLut_Invoker( cv::Mat& src, cv::Mat& dst, int* lut )
lut_(lut)
{ }
- void operator()( const cv::BlockedRange& rowRange ) const
+ void operator()( const cv::Range& rowRange ) const
{
const size_t sstep = src_.step;
const size_t dstep = dst_.step;
int width = src_.cols;
- int height = rowRange.end() - rowRange.begin();
+ int height = rowRange.end - rowRange.start;
int* lut = lut_;
if (src_.isContinuous() && dst_.isContinuous())
height = 1;
}
- const uchar* sptr = src_.ptr<uchar>(rowRange.begin());
- uchar* dptr = dst_.ptr<uchar>(rowRange.begin());
+ const uchar* sptr = src_.ptr<uchar>(rowRange.start);
+ uchar* dptr = dst_.ptr<uchar>(rowRange.start);
for (; height--; sptr += sstep, dptr += dstep)
{
static bool isWorthParallel( const cv::Mat& src )
{
-#ifdef HAVE_TBB
return ( src.total() >= 640*480 );
-#else
- (void)src;
- return false;
-#endif
}
private:
if(src.empty())
return;
-#ifdef HAVE_TBB
- tbb::mutex histogramLockInstance;
- EqualizeHistCalcHist_Invoker::MutextPtr histogramLock = &histogramLockInstance;
-#else
- EqualizeHistCalcHist_Invoker::MutextPtr histogramLock = 0;
-#endif
+ Mutex histogramLockInstance;
const int hist_sz = EqualizeHistCalcHist_Invoker::HIST_SZ;
int hist[hist_sz] = {0,};
int lut[hist_sz];
- EqualizeHistCalcHist_Invoker calcBody(src, hist, histogramLock);
+ EqualizeHistCalcHist_Invoker calcBody(src, hist, &histogramLockInstance);
EqualizeHistLut_Invoker lutBody(src, dst, lut);
- cv::BlockedRange heightRange(0, src.rows);
+ cv::Range heightRange(0, src.rows);
if(EqualizeHistCalcHist_Invoker::isWorthParallel(src))
- parallel_for(heightRange, calcBody);
+ parallel_for_(heightRange, calcBody);
else
calcBody(heightRange);
}
if(EqualizeHistLut_Invoker::isWorthParallel(src))
- parallel_for(heightRange, lutBody);
+ parallel_for_(heightRange, lutBody);
else
lutBody(heightRange);
}
// ----------------------------------------------------------------------
-// CLAHE
-
-namespace
-{
- class CLAHE_CalcLut_Body : public cv::ParallelLoopBody
- {
- public:
- CLAHE_CalcLut_Body(const cv::Mat& src, cv::Mat& lut, cv::Size tileSize, int tilesX, int tilesY, int clipLimit, float lutScale) :
- src_(src), lut_(lut), tileSize_(tileSize), tilesX_(tilesX), tilesY_(tilesY), clipLimit_(clipLimit), lutScale_(lutScale)
- {
- }
-
- void operator ()(const cv::Range& range) const;
-
- private:
- cv::Mat src_;
- mutable cv::Mat lut_;
-
- cv::Size tileSize_;
- int tilesX_;
- int tilesY_;
- int clipLimit_;
- float lutScale_;
- };
-
- void CLAHE_CalcLut_Body::operator ()(const cv::Range& range) const
- {
- const int histSize = 256;
-
- uchar* tileLut = lut_.ptr(range.start);
- const size_t lut_step = lut_.step;
-
- for (int k = range.start; k < range.end; ++k, tileLut += lut_step)
- {
- const int ty = k / tilesX_;
- const int tx = k % tilesX_;
-
- // retrieve tile submatrix
-
- cv::Rect tileROI;
- tileROI.x = tx * tileSize_.width;
- tileROI.y = ty * tileSize_.height;
- tileROI.width = tileSize_.width;
- tileROI.height = tileSize_.height;
-
- const cv::Mat tile = src_(tileROI);
-
- // calc histogram
-
- int tileHist[histSize] = {0, };
-
- int height = tileROI.height;
- const size_t sstep = tile.step;
- for (const uchar* ptr = tile.ptr<uchar>(0); height--; ptr += sstep)
- {
- int x = 0;
- for (; x <= tileROI.width - 4; x += 4)
- {
- int t0 = ptr[x], t1 = ptr[x+1];
- tileHist[t0]++; tileHist[t1]++;
- t0 = ptr[x+2]; t1 = ptr[x+3];
- tileHist[t0]++; tileHist[t1]++;
- }
-
- for (; x < tileROI.width; ++x)
- tileHist[ptr[x]]++;
- }
-
- // clip histogram
-
- if (clipLimit_ > 0)
- {
- // how many pixels were clipped
- int clipped = 0;
- for (int i = 0; i < histSize; ++i)
- {
- if (tileHist[i] > clipLimit_)
- {
- clipped += tileHist[i] - clipLimit_;
- tileHist[i] = clipLimit_;
- }
- }
-
- // redistribute clipped pixels
- int redistBatch = clipped / histSize;
- int residual = clipped - redistBatch * histSize;
-
- for (int i = 0; i < histSize; ++i)
- tileHist[i] += redistBatch;
-
- for (int i = 0; i < residual; ++i)
- tileHist[i]++;
- }
-
- // calc Lut
-
- int sum = 0;
- for (int i = 0; i < histSize; ++i)
- {
- sum += tileHist[i];
- tileLut[i] = cv::saturate_cast<uchar>(sum * lutScale_);
- }
- }
- }
-
- class CLAHE_Interpolation_Body : public cv::ParallelLoopBody
- {
- public:
- CLAHE_Interpolation_Body(const cv::Mat& src, cv::Mat& dst, const cv::Mat& lut, cv::Size tileSize, int tilesX, int tilesY) :
- src_(src), dst_(dst), lut_(lut), tileSize_(tileSize), tilesX_(tilesX), tilesY_(tilesY)
- {
- }
-
- void operator ()(const cv::Range& range) const;
-
- private:
- cv::Mat src_;
- mutable cv::Mat dst_;
- cv::Mat lut_;
-
- cv::Size tileSize_;
- int tilesX_;
- int tilesY_;
- };
-
- void CLAHE_Interpolation_Body::operator ()(const cv::Range& range) const
- {
- const size_t lut_step = lut_.step;
-
- for (int y = range.start; y < range.end; ++y)
- {
- const uchar* srcRow = src_.ptr<uchar>(y);
- uchar* dstRow = dst_.ptr<uchar>(y);
-
- const float tyf = (static_cast<float>(y) / tileSize_.height) - 0.5f;
-
- int ty1 = cvFloor(tyf);
- int ty2 = ty1 + 1;
-
- const float ya = tyf - ty1;
-
- ty1 = std::max(ty1, 0);
- ty2 = std::min(ty2, tilesY_ - 1);
-
- const uchar* lutPlane1 = lut_.ptr(ty1 * tilesX_);
- const uchar* lutPlane2 = lut_.ptr(ty2 * tilesX_);
-
- for (int x = 0; x < src_.cols; ++x)
- {
- const float txf = (static_cast<float>(x) / tileSize_.width) - 0.5f;
-
- int tx1 = cvFloor(txf);
- int tx2 = tx1 + 1;
-
- const float xa = txf - tx1;
-
- tx1 = std::max(tx1, 0);
- tx2 = std::min(tx2, tilesX_ - 1);
-
- const int srcVal = srcRow[x];
-
- const size_t ind1 = tx1 * lut_step + srcVal;
- const size_t ind2 = tx2 * lut_step + srcVal;
-
- float res = 0;
-
- res += lutPlane1[ind1] * ((1.0f - xa) * (1.0f - ya));
- res += lutPlane1[ind2] * ((xa) * (1.0f - ya));
- res += lutPlane2[ind1] * ((1.0f - xa) * (ya));
- res += lutPlane2[ind2] * ((xa) * (ya));
-
- dstRow[x] = cv::saturate_cast<uchar>(res);
- }
- }
- }
-
- class CLAHE_Impl : public cv::CLAHE
- {
- public:
- CLAHE_Impl(double clipLimit = 40.0, int tilesX = 8, int tilesY = 8);
-
- cv::AlgorithmInfo* info() const;
-
- void apply(cv::InputArray src, cv::OutputArray dst);
-
- void setClipLimit(double clipLimit);
- double getClipLimit() const;
-
- void setTilesGridSize(cv::Size tileGridSize);
- cv::Size getTilesGridSize() const;
-
- void collectGarbage();
-
- private:
- double clipLimit_;
- int tilesX_;
- int tilesY_;
-
- cv::Mat srcExt_;
- cv::Mat lut_;
- };
-
- CLAHE_Impl::CLAHE_Impl(double clipLimit, int tilesX, int tilesY) :
- clipLimit_(clipLimit), tilesX_(tilesX), tilesY_(tilesY)
- {
- }
-
- CV_INIT_ALGORITHM(CLAHE_Impl, "CLAHE",
- obj.info()->addParam(obj, "clipLimit", obj.clipLimit_);
- obj.info()->addParam(obj, "tilesX", obj.tilesX_);
- obj.info()->addParam(obj, "tilesY", obj.tilesY_))
-
- void CLAHE_Impl::apply(cv::InputArray _src, cv::OutputArray _dst)
- {
- cv::Mat src = _src.getMat();
-
- CV_Assert( src.type() == CV_8UC1 );
-
- _dst.create( src.size(), src.type() );
- cv::Mat dst = _dst.getMat();
-
- const int histSize = 256;
-
- lut_.create(tilesX_ * tilesY_, histSize, CV_8UC1);
-
- cv::Size tileSize;
- cv::Mat srcForLut;
-
- if (src.cols % tilesX_ == 0 && src.rows % tilesY_ == 0)
- {
- tileSize = cv::Size(src.cols / tilesX_, src.rows / tilesY_);
- srcForLut = src;
- }
- else
- {
- cv::copyMakeBorder(src, srcExt_, 0, tilesY_ - (src.rows % tilesY_), 0, tilesX_ - (src.cols % tilesX_), cv::BORDER_REFLECT_101);
-
- tileSize = cv::Size(srcExt_.cols / tilesX_, srcExt_.rows / tilesY_);
- srcForLut = srcExt_;
- }
-
- const int tileSizeTotal = tileSize.area();
- const float lutScale = static_cast<float>(histSize - 1) / tileSizeTotal;
-
- int clipLimit = 0;
- if (clipLimit_ > 0.0)
- {
- clipLimit = static_cast<int>(clipLimit_ * tileSizeTotal / histSize);
- clipLimit = std::max(clipLimit, 1);
- }
-
- CLAHE_CalcLut_Body calcLutBody(srcForLut, lut_, tileSize, tilesX_, tilesY_, clipLimit, lutScale);
- cv::parallel_for_(cv::Range(0, tilesX_ * tilesY_), calcLutBody);
-
- CLAHE_Interpolation_Body interpolationBody(src, dst, lut_, tileSize, tilesX_, tilesY_);
- cv::parallel_for_(cv::Range(0, src.rows), interpolationBody);
- }
-
- void CLAHE_Impl::setClipLimit(double clipLimit)
- {
- clipLimit_ = clipLimit;
- }
-
- double CLAHE_Impl::getClipLimit() const
- {
- return clipLimit_;
- }
-
- void CLAHE_Impl::setTilesGridSize(cv::Size tileGridSize)
- {
- tilesX_ = tileGridSize.width;
- tilesY_ = tileGridSize.height;
- }
-
- cv::Size CLAHE_Impl::getTilesGridSize() const
- {
- return cv::Size(tilesX_, tilesY_);
- }
-
- void CLAHE_Impl::collectGarbage()
- {
- srcExt_.release();
- lut_.release();
- }
-}
-
-cv::Ptr<cv::CLAHE> cv::createCLAHE(double clipLimit, cv::Size tileGridSize)
-{
- return new CLAHE_Impl(clipLimit, tileGridSize.width, tileGridSize.height);
-}
-
-// ----------------------------------------------------------------------
/* Implementation of RTTI and Generic Functions for CvHistogram */
#define CV_TYPE_NAME_HIST "opencv-hist"
namespace cv
{
-class MorphologyRunner
+class MorphologyRunner : public ParallelLoopBody
{
public:
MorphologyRunner(Mat _src, Mat _dst, int _nStripes, int _iterations,
columnBorderType = _columnBorderType;
}
- void operator () ( const BlockedRange& range ) const
+ void operator () ( const Range& range ) const
{
- int row0 = min(cvRound(range.begin() * src.rows / nStripes), src.rows);
- int row1 = min(cvRound(range.end() * src.rows / nStripes), src.rows);
+ int row0 = min(cvRound(range.start * src.rows / nStripes), src.rows);
+ int row1 = min(cvRound(range.end * src.rows / nStripes), src.rows);
/*if(0)
printf("Size = (%d, %d), range[%d,%d), row0 = %d, row1 = %d\n",
- src.rows, src.cols, range.begin(), range.end(), row0, row1);*/
+ src.rows, src.cols, range.start, range.end, row0, row1);*/
Mat srcStripe = src.rowRange(row0, row1);
Mat dstStripe = dst.rowRange(row0, row1);
}
int nStripes = 1;
-#if defined HAVE_TBB && defined HAVE_TEGRA_OPTIMIZATION
+#if defined HAVE_TEGRA_OPTIMIZATION
if (src.data != dst.data && iterations == 1 && //NOTE: threads are not used for inplace processing
(borderType & BORDER_ISOLATED) == 0 && //TODO: check border types
src.rows >= 64 ) //NOTE: just heuristics
nStripes = 4;
#endif
- parallel_for(BlockedRange(0, nStripes),
- MorphologyRunner(src, dst, nStripes, iterations, op, kernel, anchor, borderType, borderType, borderValue));
+ parallel_for_(Range(0, nStripes),
+ MorphologyRunner(src, dst, nStripes, iterations, op, kernel, anchor, borderType, borderType, borderValue));
//Ptr<FilterEngine> f = createMorphologyFilter(op, src.type(),
// kernel, anchor, borderType, borderType, borderValue );
#include "precomp.hpp"
-#ifdef HAVE_TBB
-#include <tbb/tbb.h>
-#endif
-
CvANN_MLP_TrainParams::CvANN_MLP_TrainParams()
{
term_crit = cvTermCriteria( CV_TERMCRIT_ITER + CV_TERMCRIT_EPS, 1000, 0.01 );
return iter;
}
-struct rprop_loop {
+struct rprop_loop : cv::ParallelLoopBody {
rprop_loop(const CvANN_MLP* _point, double**& _weights, int& _count, int& _ivcount, CvVectors* _x0,
int& _l_count, CvMat*& _layer_sizes, int& _ovcount, int& _max_count,
CvVectors* _u, const double*& _sw, double& _inv_count, CvMat*& _dEdw, int& _dcount0, double* _E, int _buf_sz)
int buf_sz;
- void operator()( const cv::BlockedRange& range ) const
+ void operator()( const cv::Range& range ) const
{
double* buf_ptr;
double** x = 0;
buf_ptr += (df[i] - x[i])*2;
}
- for(int si = range.begin(); si < range.end(); si++ )
+ for(int si = range.start; si < range.end; si++ )
{
if (si % dcount0 != 0) continue;
int n1, n2, k;
}
// backward pass, update dEdw
- #ifdef HAVE_TBB
- static tbb::spin_mutex mutex;
- tbb::spin_mutex::scoped_lock lock;
- #endif
+ static cv::Mutex mutex;
+
for(int i = l_count-1; i > 0; i-- )
{
n1 = layer_sizes->data.i[i-1]; n2 = layer_sizes->data.i[i];
cvInitMatHeader( &_df, dcount, n2, CV_64F, df[i] );
cvMul( grad1, &_df, grad1 );
- #ifdef HAVE_TBB
- lock.acquire(mutex);
- #endif
- cvInitMatHeader( &_dEdw, n1, n2, CV_64F, dEdw->data.db+(weights[i]-weights[0]) );
- cvInitMatHeader( x1, dcount, n1, CV_64F, x[i-1] );
- cvGEMM( x1, grad1, 1, &_dEdw, 1, &_dEdw, CV_GEMM_A_T );
-
- // update bias part of dEdw
- for( k = 0; k < dcount; k++ )
- {
- double* dst = _dEdw.data.db + n1*n2;
- const double* src = grad1->data.db + k*n2;
- for(int j = 0; j < n2; j++ )
- dst[j] += src[j];
+
+ {
+ cv::AutoLock lock(mutex);
+ cvInitMatHeader( &_dEdw, n1, n2, CV_64F, dEdw->data.db+(weights[i]-weights[0]) );
+ cvInitMatHeader( x1, dcount, n1, CV_64F, x[i-1] );
+ cvGEMM( x1, grad1, 1, &_dEdw, 1, &_dEdw, CV_GEMM_A_T );
+
+ // update bias part of dEdw
+ for( k = 0; k < dcount; k++ )
+ {
+ double* dst = _dEdw.data.db + n1*n2;
+ const double* src = grad1->data.db + k*n2;
+ for(int j = 0; j < n2; j++ )
+ dst[j] += src[j];
+ }
+
+ if (i > 1)
+ cvInitMatHeader( &_w, n1, n2, CV_64F, weights[i] );
}
- if (i > 1)
- cvInitMatHeader( &_w, n1, n2, CV_64F, weights[i] );
- #ifdef HAVE_TBB
- lock.release();
- #endif
cvInitMatHeader( grad2, dcount, n1, CV_64F, grad2->data.db );
if( i > 1 )
cvGEMM( grad1, &_w, 1, 0, 0, grad2, CV_GEMM_B_T );
double E = 0;
// first, iterate through all the samples and compute dEdw
- cv::parallel_for(cv::BlockedRange(0, count),
+ cv::parallel_for_(cv::Range(0, count),
rprop_loop(this, weights, count, ivcount, &x0, l_count, layer_sizes,
ovcount, max_count, &u, sw, inv_count, dEdw, dcount0, &E, buf_sz)
);
}
-class Tree_predictor
+class Tree_predictor : public cv::ParallelLoopBody
{
private:
pCvSeq* weak;
const CvMat* missing;
const float shrinkage;
-#ifdef HAVE_TBB
- static tbb::spin_mutex SumMutex;
-#endif
+ static cv::Mutex SumMutex;
public:
Tree_predictor& operator=( const Tree_predictor& )
{ return *this; }
- virtual void operator()(const cv::BlockedRange& range) const
+ virtual void operator()(const cv::Range& range) const
{
-#ifdef HAVE_TBB
- tbb::spin_mutex::scoped_lock lock;
-#endif
CvSeqReader reader;
- int begin = range.begin();
- int end = range.end();
+ int begin = range.start;
+ int end = range.end;
int weak_count = end - begin;
CvDTree* tree;
tmp_sum += shrinkage*(float)(tree->predict(sample, missing)->value);
}
}
-#ifdef HAVE_TBB
- lock.acquire(SumMutex);
- sum[i] += tmp_sum;
- lock.release();
-#else
- sum[i] += tmp_sum;
-#endif
+
+ {
+ cv::AutoLock lock(SumMutex);
+ sum[i] += tmp_sum;
+ }
}
} // Tree_predictor::operator()
}; // class Tree_predictor
-
-#ifdef HAVE_TBB
-tbb::spin_mutex Tree_predictor::SumMutex;
-#endif
-
+cv::Mutex Tree_predictor::SumMutex;
float CvGBTrees::predict( const CvMat* _sample, const CvMat* _missing,
Tree_predictor predictor = Tree_predictor(weak_seq, class_count,
params.shrinkage, _sample, _missing, sum);
-//#ifdef HAVE_TBB
-// tbb::parallel_for(cv::BlockedRange(begin, end), predictor,
-// tbb::auto_partitioner());
-//#else
- cv::parallel_for(cv::BlockedRange(begin, end), predictor);
-//#endif
+ cv::parallel_for_(cv::Range(begin, end), predictor);
for (int i=0; i<class_count; ++i)
sum[i] = sum[i] /** params.shrinkage*/ + base_value;
//===========================================================================
-class Sample_predictor
+class Sample_predictor : public cv::ParallelLoopBody
{
private:
const CvGBTrees* gbt;
{}
- virtual void operator()(const cv::BlockedRange& range) const
+ virtual void operator()(const cv::Range& range) const
{
- int begin = range.begin();
- int end = range.end();
+ int begin = range.start;
+ int end = range.end;
CvMat x;
CvMat miss;
Sample_predictor predictor = Sample_predictor(this, pred_resp, _data->get_values(),
_data->get_missing(), _sample_idx);
-//#ifdef HAVE_TBB
-// tbb::parallel_for(cv::BlockedRange(0,n), predictor, tbb::auto_partitioner());
-//#else
- cv::parallel_for(cv::BlockedRange(0,n), predictor);
-//#endif
+ cv::parallel_for_(cv::Range(0,n), predictor);
int* sidx = _sample_idx ? _sample_idx->data.i : 0;
int r_step = CV_IS_MAT_CONT(response->type) ?
return result;
}
-struct P1 {
+struct P1 : cv::ParallelLoopBody {
P1(const CvKNearest* _pointer, int _buf_sz, int _k, const CvMat* __samples, const float** __neighbors,
int _k1, CvMat* __results, CvMat* __neighbor_responses, CvMat* __dist, float* _result)
{
float* result;
int buf_sz;
- void operator()( const cv::BlockedRange& range ) const
+ void operator()( const cv::Range& range ) const
{
cv::AutoBuffer<float> buf(buf_sz);
- for(int i = range.begin(); i < range.end(); i += 1 )
+ for(int i = range.start; i < range.end; i += 1 )
{
float* neighbor_responses = &buf[0];
float* dist = neighbor_responses + 1*k;
int k1 = get_sample_count();
k1 = MIN( k1, k );
- cv::parallel_for(cv::BlockedRange(0, count), P1(this, buf_sz, k, _samples, _neighbors, k1,
- _results, _neighbor_responses, _dist, &result)
+ cv::parallel_for_(cv::Range(0, count), P1(this, buf_sz, k, _samples, _neighbors, k1,
+ _results, _neighbor_responses, _dist, &result)
);
return result;
return result;
}
-struct predict_body {
+struct predict_body : cv::ParallelLoopBody {
predict_body(CvMat* _c, CvMat** _cov_rotate_mats, CvMat** _inv_eigen_values, CvMat** _avg,
const CvMat* _samples, const int* _vidx, CvMat* _cls_labels,
CvMat* _results, float* _value, int _var_count1
float* value;
int var_count1;
- void operator()( const cv::BlockedRange& range ) const
+ void operator()( const cv::Range& range ) const
{
int cls = -1;
cv::AutoBuffer<double> buffer(nclasses + var_count1);
CvMat diff = cvMat( 1, var_count1, CV_64FC1, &buffer[0] );
- for(int k = range.begin(); k < range.end(); k += 1 )
+ for(int k = range.start; k < range.end; k += 1 )
{
int ival;
double opt = FLT_MAX;
const int* vidx = var_idx ? var_idx->data.i : 0;
- cv::parallel_for(cv::BlockedRange(0, samples->rows), predict_body(c, cov_rotate_mats, inv_eigen_values, avg, samples,
- vidx, cls_labels, results, &value, var_count
- ));
+ cv::parallel_for_(cv::Range(0, samples->rows),
+ predict_body(c, cov_rotate_mats, inv_eigen_values, avg, samples,
+ vidx, cls_labels, results, &value, var_count));
return value;
}
return result;
}
-struct predict_body_svm {
+struct predict_body_svm : ParallelLoopBody {
predict_body_svm(const CvSVM* _pointer, float* _result, const CvMat* _samples, CvMat* _results)
{
pointer = _pointer;
const CvMat* samples;
CvMat* results;
- void operator()( const cv::BlockedRange& range ) const
+ void operator()( const cv::Range& range ) const
{
- for(int i = range.begin(); i < range.end(); i++ )
+ for(int i = range.start; i < range.end; i++ )
{
CvMat sample;
cvGetRow( samples, &sample, i );
float CvSVM::predict(const CvMat* samples, CV_OUT CvMat* results) const
{
float result = 0;
- cv::parallel_for(cv::BlockedRange(0, samples->rows),
+ cv::parallel_for_(cv::Range(0, samples->rows),
predict_body_svm(this, &result, samples, results)
);
return result;
}
// Multi-threaded construction of the scale-space pyramid
-struct SURFBuildInvoker
+struct SURFBuildInvoker : ParallelLoopBody
{
SURFBuildInvoker( const Mat& _sum, const vector<int>& _sizes,
const vector<int>& _sampleSteps,
traces = &_traces;
}
- void operator()(const BlockedRange& range) const
+ void operator()(const Range& range) const
{
- for( int i=range.begin(); i<range.end(); i++ )
+ for( int i=range.start; i<range.end; i++ )
calcLayerDetAndTrace( *sum, (*sizes)[i], (*sampleSteps)[i], (*dets)[i], (*traces)[i] );
}
};
// Multi-threaded search of the scale-space pyramid for keypoints
-struct SURFFindInvoker
+struct SURFFindInvoker : ParallelLoopBody
{
SURFFindInvoker( const Mat& _sum, const Mat& _mask_sum,
const vector<Mat>& _dets, const vector<Mat>& _traces,
const vector<int>& sizes, vector<KeyPoint>& keypoints,
int octave, int layer, float hessianThreshold, int sampleStep );
- void operator()(const BlockedRange& range) const
+ void operator()(const Range& range) const
{
- for( int i=range.begin(); i<range.end(); i++ )
+ for( int i=range.start; i<range.end; i++ )
{
int layer = (*middleIndices)[i];
int octave = i / nOctaveLayers;
int nOctaveLayers;
float hessianThreshold;
-#ifdef HAVE_TBB
- static tbb::mutex findMaximaInLayer_m;
-#endif
+ static Mutex findMaximaInLayer_m;
};
-#ifdef HAVE_TBB
-tbb::mutex SURFFindInvoker::findMaximaInLayer_m;
-#endif
+Mutex SURFFindInvoker::findMaximaInLayer_m;
/*
if( interp_ok )
{
/*printf( "KeyPoint %f %f %d\n", point.pt.x, point.pt.y, point.size );*/
-#ifdef HAVE_TBB
- tbb::mutex::scoped_lock lock(findMaximaInLayer_m);
-#endif
+ cv::AutoLock lock(findMaximaInLayer_m);
keypoints.push_back(kpt);
}
}
}
// Calculate hessian determinant and trace samples in each layer
- parallel_for( BlockedRange(0, nTotalLayers),
- SURFBuildInvoker(sum, sizes, sampleSteps, dets, traces) );
+ parallel_for_( Range(0, nTotalLayers),
+ SURFBuildInvoker(sum, sizes, sampleSteps, dets, traces) );
// Find maxima in the determinant of the hessian
- parallel_for( BlockedRange(0, nMiddleLayers),
- SURFFindInvoker(sum, mask_sum, dets, traces, sizes,
- sampleSteps, middleIndices, keypoints,
- nOctaveLayers, hessianThreshold) );
+ parallel_for_( Range(0, nMiddleLayers),
+ SURFFindInvoker(sum, mask_sum, dets, traces, sizes,
+ sampleSteps, middleIndices, keypoints,
+ nOctaveLayers, hessianThreshold) );
std::sort(keypoints.begin(), keypoints.end(), KeypointGreater());
}
-struct SURFInvoker
+struct SURFInvoker : ParallelLoopBody
{
enum { ORI_RADIUS = 6, ORI_WIN = 60, PATCH_SZ = 20 };
}
}
- void operator()(const BlockedRange& range) const
+ void operator()(const Range& range) const
{
/* X and Y gradient wavelet data */
const int NX=2, NY=2;
int dsize = extended ? 128 : 64;
- int k, k1 = range.begin(), k2 = range.end();
+ int k, k1 = range.start, k2 = range.end;
float maxSize = 0;
for( k = k1; k < k2; k++ )
{
// we call SURFInvoker in any case, even if we do not need descriptors,
// since it computes orientation of each feature.
- parallel_for(BlockedRange(0, N), SURFInvoker(img, sum, keypoints, descriptors, extended, upright) );
+ parallel_for_(Range(0, N), SURFInvoker(img, sum, keypoints, descriptors, extended, upright) );
// remove keypoints that were marked for deletion
for( i = j = 0; i < N; i++ )
int stripCount, stripSize;
- #ifdef HAVE_TBB
const int PTS_PER_THREAD = 1000;
stripCount = ((processingRectSize.width/yStep)*(processingRectSize.height + yStep-1)/yStep + PTS_PER_THREAD/2)/PTS_PER_THREAD;
stripCount = std::min(std::max(stripCount, 1), 100);
stripSize = (((processingRectSize.height + stripCount - 1)/stripCount + yStep-1)/yStep)*yStep;
- #else
- stripCount = 1;
- stripSize = processingRectSize.height;
- #endif
if( !detectSingleScale( scaledImage, stripCount, processingRectSize, stripSize, yStep, factor, candidates,
rejectLevels, levelWeights, outputRejectLevels ) )
// For each component perform searching
for (i = 0; i < kComponents; i++)
{
-#ifdef HAVE_TBB
int error = searchObjectThreshold(H, &(filters[componentIndex]), kPartFilters[i],
b[i], maxXBorder, maxYBorder, scoreThreshold,
&(pointsArr[i]), &(levelsArr[i]), &(kPointsArr[i]),
free(partsDisplacementArr);
return LATENT_SVM_SEARCH_OBJECT_FAILED;
}
-#else
- (void)numThreads;
- searchObjectThreshold(H, &(filters[componentIndex]), kPartFilters[i],
- b[i], maxXBorder, maxYBorder, scoreThreshold,
- &(pointsArr[i]), &(levelsArr[i]), &(kPointsArr[i]),
- &(scoreArr[i]), &(partsDisplacementArr[i]));
-#endif
estimateBoxes(pointsArr[i], levelsArr[i], kPointsArr[i],
filters[componentIndex]->sizeX, filters[componentIndex]->sizeY, &(oppPointsArr[i]));
componentIndex += (kPartFilters[i] + 1);
CV_EXPORTS void setBinpath(const char *path);
//The two functions below enable other opencl program to use ocl module's cl_context and cl_command_queue
+ //returns cl_context *
CV_EXPORTS void* getoclContext();
-
+ //returns cl_command_queue *
CV_EXPORTS void* getoclCommandQueue();
//explicit call clFinish. The global command queue will be used.
// support all C1 types
CV_EXPORTS void minMax(const oclMat &src, double *minVal, double *maxVal = 0, const oclMat &mask = oclMat());
+ CV_EXPORTS void minMax_buf(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat& buf);
//! finds global minimum and maximum array elements and returns their values with locations
// support all C1 types
CV_EXPORTS void integral(const oclMat &src, oclMat &sum, oclMat &sqsum);
CV_EXPORTS void integral(const oclMat &src, oclMat &sum);
CV_EXPORTS void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
+ CV_EXPORTS void cornerHarris_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy,
+ int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
CV_EXPORTS void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT);
+ CV_EXPORTS void cornerMinEigenVal_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy,
+ int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT);
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////CascadeClassifier//////////////////////////////////////////////////////////////////
OclCascadeClassifierBuf() :
m_flags(0), initialized(false), m_scaleFactor(0), buffers(NULL) {}
- ~OclCascadeClassifierBuf() {}
+ ~OclCascadeClassifierBuf() { release(); }
void detectMultiScale(oclMat &image, CV_OUT std::vector<cv::Rect>& faces,
double scaleFactor = 1.1, int minNeighbors = 3, int flags = 0,
public:
explicit BFMatcher_OCL(int norm = NORM_L2) : BruteForceMatcher_OCL_base(norm == NORM_L1 ? L1Dist : norm == NORM_L2 ? L2Dist : HammingDist) {}
};
+
+ class CV_EXPORTS GoodFeaturesToTrackDetector_OCL
+ {
+ public:
+ explicit GoodFeaturesToTrackDetector_OCL(int maxCorners = 1000, double qualityLevel = 0.01, double minDistance = 0.0,
+ int blockSize = 3, bool useHarrisDetector = false, double harrisK = 0.04);
+
+ //! return 1 rows matrix with CV_32FC2 type
+ void operator ()(const oclMat& image, oclMat& corners, const oclMat& mask = oclMat());
+ //! download points of type Point2f to a vector. the vector's content will be erased
+ void downloadPoints(const oclMat &points, vector<Point2f> &points_v);
+
+ int maxCorners;
+ double qualityLevel;
+ double minDistance;
+
+ int blockSize;
+ bool useHarrisDetector;
+ double harrisK;
+ void releaseMemory()
+ {
+ Dx_.release();
+ Dy_.release();
+ eig_.release();
+ minMaxbuf_.release();
+ tmpCorners_.release();
+ }
+ private:
+ oclMat Dx_;
+ oclMat Dy_;
+ oclMat eig_;
+ oclMat minMaxbuf_;
+ oclMat tmpCorners_;
+ };
+
+ inline GoodFeaturesToTrackDetector_OCL::GoodFeaturesToTrackDetector_OCL(int maxCorners_, double qualityLevel_, double minDistance_,
+ int blockSize_, bool useHarrisDetector_, double harrisK_)
+ {
+ maxCorners = maxCorners_;
+ qualityLevel = qualityLevel_;
+ minDistance = minDistance_;
+ blockSize = blockSize_;
+ useHarrisDetector = useHarrisDetector_;
+ harrisK = harrisK_;
+ }
+
/////////////////////////////// PyrLKOpticalFlow /////////////////////////////////////
class CV_EXPORTS PyrLKOpticalFlow
{
#include "opencv2/ocl/ocl.hpp"
#if defined __APPLE__
-#include <OpenCL/OpenCL.h>
+#include <OpenCL/opencl.h>
#else
#include <CL/opencl.h>
#endif
cl_mem CV_EXPORTS bindTexture(const oclMat &mat);
void CV_EXPORTS releaseTexture(cl_mem& texture);
+ //Represents an image texture object
+ class CV_EXPORTS TextureCL
+ {
+ public:
+ TextureCL(cl_mem tex, int r, int c, int t)
+ : tex_(tex), rows(r), cols(c), type(t) {}
+ ~TextureCL()
+ {
+ openCLFree(tex_);
+ }
+ operator cl_mem()
+ {
+ return tex_;
+ }
+ cl_mem const tex_;
+ const int rows;
+ const int cols;
+ const int type;
+ private:
+ //disable assignment
+ void operator=(const TextureCL&);
+ };
+ // bind oclMat to OpenCL image textures and retunrs an TextureCL object
+ // note:
+ // for faster clamping, there is no buffer padding for the constructed texture
+ Ptr<TextureCL> CV_EXPORTS bindTexturePtr(const oclMat &mat);
+
// returns whether the current context supports image2d_t format or not
bool CV_EXPORTS support_image2d(Context *clCxt = Context::getContext());
};
template<DEVICE_INFO _it, typename _ty>
_ty queryDeviceInfo(cl_kernel kernel = NULL);
- //info should have been pre-allocated
+
template<>
int CV_EXPORTS queryDeviceInfo<WAVEFRONT_SIZE, int>(cl_kernel kernel);
template<>
///////////// Lut ////////////////////////
PERFTEST(lut)
{
- Mat src, lut, dst;
+ Mat src, lut, dst, ocl_dst;
ocl::oclMat d_src, d_lut, d_dst;
int all_type[] = {CV_8UC1, CV_8UC3};
ocl::LUT(d_src, d_lut, d_dst);
WARMUP_OFF;
- cv::Mat ocl_mat_dst;
- d_dst.download(ocl_mat_dst);
-
- TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 0));
-
GPU_ON;
ocl::LUT(d_src, d_lut, d_dst);
GPU_OFF;
d_src.upload(src);
d_lut.upload(lut);
ocl::LUT(d_src, d_lut, d_dst);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 0);
}
}
///////////// Exp ////////////////////////
PERFTEST(Exp)
{
- Mat src, dst;
+ Mat src, dst, ocl_dst;
ocl::oclMat d_src, d_dst;
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
ocl::exp(d_src, d_dst);
WARMUP_OFF;
- cv::Mat ocl_mat_dst;
- d_dst.download(ocl_mat_dst);
-
- TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 2));
-
GPU_ON;
ocl::exp(d_src, d_dst);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::exp(d_src, d_dst);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 2);
}
}
///////////// LOG ////////////////////////
PERFTEST(Log)
{
- Mat src, dst;
+ Mat src, dst, ocl_dst;
ocl::oclMat d_src, d_dst;
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
ocl::log(d_src, d_dst);
WARMUP_OFF;
- cv::Mat ocl_mat_dst;
- d_dst.download(ocl_mat_dst);
-
- TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 1));
-
GPU_ON;
ocl::log(d_src, d_dst);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::log(d_src, d_dst);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1);
}
}
///////////// Add ////////////////////////
PERFTEST(Add)
{
- Mat src1, src2, dst;
+ Mat src1, src2, dst, ocl_dst;
ocl::oclMat d_src1, d_src2, d_dst;
int all_type[] = {CV_8UC1, CV_32FC1};
CPU_ON;
add(src1, src2, dst);
CPU_OFF;
+
d_src1.upload(src1);
d_src2.upload(src2);
ocl::add(d_src1, d_src2, d_dst);
WARMUP_OFF;
- cv::Mat ocl_mat_dst;
- d_dst.download(ocl_mat_dst);
-
- TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 0.0));
-
GPU_ON;
ocl::add(d_src1, d_src2, d_dst);
GPU_OFF;
d_src1.upload(src1);
d_src2.upload(src2);
ocl::add(d_src1, d_src2, d_dst);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 0.0);
}
}
///////////// Mul ////////////////////////
PERFTEST(Mul)
{
- Mat src1, src2, dst;
+ Mat src1, src2, dst, ocl_dst;
ocl::oclMat d_src1, d_src2, d_dst;
int all_type[] = {CV_8UC1, CV_8UC4};
ocl::multiply(d_src1, d_src2, d_dst);
WARMUP_OFF;
- cv::Mat ocl_mat_dst;
- d_dst.download(ocl_mat_dst);
-
- TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 0.0));
-
GPU_ON;
ocl::multiply(d_src1, d_src2, d_dst);
GPU_OFF;
d_src1.upload(src1);
d_src2.upload(src2);
ocl::multiply(d_src1, d_src2, d_dst);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 0.0);
}
}
///////////// Div ////////////////////////
PERFTEST(Div)
{
- Mat src1, src2, dst;
+ Mat src1, src2, dst, ocl_dst;
ocl::oclMat d_src1, d_src2, d_dst;
int all_type[] = {CV_8UC1, CV_8UC4};
std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
CPU_ON;
divide(src1, src2, dst);
CPU_OFF;
+
d_src1.upload(src1);
d_src2.upload(src2);
ocl::divide(d_src1, d_src2, d_dst);
WARMUP_OFF;
- cv::Mat ocl_mat_dst;
- d_dst.download(ocl_mat_dst);
-
- TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 1));
-
GPU_ON;
ocl::divide(d_src1, d_src2, d_dst);
GPU_OFF;
d_src1.upload(src1);
d_src2.upload(src2);
ocl::divide(d_src1, d_src2, d_dst);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1);
}
}
///////////// Absdiff ////////////////////////
PERFTEST(Absdiff)
{
- Mat src1, src2, dst;
+ Mat src1, src2, dst, ocl_dst;
ocl::oclMat d_src1, d_src2, d_dst;
int all_type[] = {CV_8UC1, CV_8UC4};
CPU_ON;
absdiff(src1, src2, dst);
CPU_OFF;
+
d_src1.upload(src1);
d_src2.upload(src2);
ocl::absdiff(d_src1, d_src2, d_dst);
WARMUP_OFF;
- cv::Mat ocl_mat_dst;
- d_dst.download(ocl_mat_dst);
-
- TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 0.0));
-
GPU_ON;
ocl::absdiff(d_src1, d_src2, d_dst);
GPU_OFF;
d_src1.upload(src1);
d_src2.upload(src2);
ocl::absdiff(d_src1, d_src2, d_dst);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 0.0);
}
}
///////////// CartToPolar ////////////////////////
PERFTEST(CartToPolar)
{
- Mat src1, src2, dst, dst1;
+ Mat src1, src2, dst, dst1, ocl_dst, ocl_dst1;
ocl::oclMat d_src1, d_src2, d_dst, d_dst1;
int all_type[] = {CV_32FC1};
CPU_ON;
cartToPolar(src1, src2, dst, dst1, 1);
CPU_OFF;
+
d_src1.upload(src1);
d_src2.upload(src2);
ocl::cartToPolar(d_src1, d_src2, d_dst, d_dst1, 1);
WARMUP_OFF;
- cv::Mat ocl_mat_dst;
- d_dst.download(ocl_mat_dst);
-
- cv::Mat ocl_mat_dst1;
- d_dst1.download(ocl_mat_dst1);
-
- TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst1, dst1, 0.5)&&ExpectedMatNear(ocl_mat_dst, dst, 0.5));
-
GPU_ON;
ocl::cartToPolar(d_src1, d_src2, d_dst, d_dst1, 1);
GPU_OFF;
d_src1.upload(src1);
d_src2.upload(src2);
ocl::cartToPolar(d_src1, d_src2, d_dst, d_dst1, 1);
- d_dst.download(dst);
- d_dst1.download(dst1);
+ d_dst.download(ocl_dst);
+ d_dst1.download(ocl_dst1);
GPU_FULL_OFF;
+
+ double diff1 = checkNorm(ocl_dst1, dst1);
+ double diff2 = checkNorm(ocl_dst, dst);
+ double max_diff = max(diff1, diff2);
+ TestSystem::instance().setAccurate(max_diff<=.5?1:0, max_diff);
+
}
}
///////////// PolarToCart ////////////////////////
PERFTEST(PolarToCart)
{
- Mat src1, src2, dst, dst1;
+ Mat src1, src2, dst, dst1, ocl_dst, ocl_dst1;
ocl::oclMat d_src1, d_src2, d_dst, d_dst1;
int all_type[] = {CV_32FC1};
ocl::polarToCart(d_src1, d_src2, d_dst, d_dst1, 1);
WARMUP_OFF;
- cv::Mat ocl_mat_dst;
- d_dst.download(ocl_mat_dst);
-
- cv::Mat ocl_mat_dst1;
- d_dst1.download(ocl_mat_dst1);
-
- TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst1, dst1, 0.5)&&ExpectedMatNear(ocl_mat_dst, dst, 0.5));
-
GPU_ON;
ocl::polarToCart(d_src1, d_src2, d_dst, d_dst1, 1);
GPU_OFF;
d_src1.upload(src1);
d_src2.upload(src2);
ocl::polarToCart(d_src1, d_src2, d_dst, d_dst1, 1);
- d_dst.download(dst);
- d_dst1.download(dst1);
+ d_dst.download(ocl_dst);
+ d_dst1.download(ocl_dst1);
GPU_FULL_OFF;
+
+ double diff1 = checkNorm(ocl_dst1, dst1);
+ double diff2 = checkNorm(ocl_dst, dst);
+ double max_diff = max(diff1, diff2);
+ TestSystem::instance().setAccurate(max_diff<=.5?1:0, max_diff);
+
}
}
///////////// Magnitude ////////////////////////
PERFTEST(magnitude)
{
- Mat x, y, mag;
+ Mat x, y, mag, ocl_mag;
ocl::oclMat d_x, d_y, d_mag;
int all_type[] = {CV_32FC1};
ocl::magnitude(d_x, d_y, d_mag);
WARMUP_OFF;
- cv::Mat ocl_mat_dst;
- d_mag.download(ocl_mat_dst);
-
- TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, mag, 1e-5));
-
GPU_ON;
ocl::magnitude(d_x, d_y, d_mag);
GPU_OFF;
d_x.upload(x);
d_y.upload(y);
ocl::magnitude(d_x, d_y, d_mag);
- d_mag.download(mag);
+ d_mag.download(ocl_mag);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(ocl_mag, mag, 1e-5);
}
}
///////////// Transpose ////////////////////////
PERFTEST(Transpose)
{
- Mat src, dst;
+ Mat src, dst, ocl_dst;
ocl::oclMat d_src, d_dst;
int all_type[] = {CV_8UC1, CV_8UC4};
ocl::transpose(d_src, d_dst);
WARMUP_OFF;
- cv::Mat ocl_mat_dst;
- d_dst.download(ocl_mat_dst);
-
- TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 1e-5));
-
GPU_ON;
ocl::transpose(d_src, d_dst);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::transpose(d_src, d_dst);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-5);
}
}
///////////// Flip ////////////////////////
PERFTEST(Flip)
{
- Mat src, dst;
+ Mat src, dst, ocl_dst;
ocl::oclMat d_src, d_dst;
int all_type[] = {CV_8UC1, CV_8UC4};
ocl::flip(d_src, d_dst, 0);
WARMUP_OFF;
- cv::Mat ocl_mat_dst;
- d_dst.download(ocl_mat_dst);
-
- TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 1e-5));
-
GPU_ON;
ocl::flip(d_src, d_dst, 0);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::flip(d_src, d_dst, 0);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-5);
}
}
ocl::minMax(d_src, &min_val_, &max_val_);
WARMUP_OFF;
- TestSystem::instance().setAccurate(EeceptDoubleEQ<double>(max_val_, max_val)&&EeceptDoubleEQ<double>(min_val_, min_val));
+ if(EeceptDoubleEQ<double>(max_val_, max_val) && EeceptDoubleEQ<double>(min_val_, min_val))
+ TestSystem::instance().setAccurate(1, max(fabs(max_val_-max_val), fabs(min_val_-min_val)));
+ else
+ TestSystem::instance().setAccurate(0, max(fabs(max_val_-max_val), fabs(min_val_-min_val)));
GPU_ON;
ocl::minMax(d_src, &min_val, &max_val);
minlocVal_ = src.at<unsigned char>(min_loc_);
maxlocVal = src.at<unsigned char>(max_loc);
maxlocVal_ = src.at<unsigned char>(max_loc_);
- error0 = ::abs(src.at<unsigned char>(min_loc_) - src.at<unsigned char>(min_loc));
- error1 = ::abs(src.at<unsigned char>(max_loc_) - src.at<unsigned char>(max_loc));
}
if(src.depth() == 1)
{
minlocVal_ = src.at<signed char>(min_loc_);
maxlocVal = src.at<signed char>(max_loc);
maxlocVal_ = src.at<signed char>(max_loc_);
- error0 = ::abs(src.at<signed char>(min_loc_) - src.at<signed char>(min_loc));
- error1 = ::abs(src.at<signed char>(max_loc_) - src.at<signed char>(max_loc));
}
if(src.depth() == 2)
{
minlocVal_ = src.at<unsigned short>(min_loc_);
maxlocVal = src.at<unsigned short>(max_loc);
maxlocVal_ = src.at<unsigned short>(max_loc_);
- error0 = ::abs(src.at<unsigned short>(min_loc_) - src.at<unsigned short>(min_loc));
- error1 = ::abs(src.at<unsigned short>(max_loc_) - src.at<unsigned short>(max_loc));
}
if(src.depth() == 3)
{
minlocVal_ = src.at<signed short>(min_loc_);
maxlocVal = src.at<signed short>(max_loc);
maxlocVal_ = src.at<signed short>(max_loc_);
- error0 = ::abs(src.at<signed short>(min_loc_) - src.at<signed short>(min_loc));
- error1 = ::abs(src.at<signed short>(max_loc_) - src.at<signed short>(max_loc));
}
if(src.depth() == 4)
{
minlocVal_ = src.at<int>(min_loc_);
maxlocVal = src.at<int>(max_loc);
maxlocVal_ = src.at<int>(max_loc_);
- error0 = ::abs(src.at<int>(min_loc_) - src.at<int>(min_loc));
- error1 = ::abs(src.at<int>(max_loc_) - src.at<int>(max_loc));
}
if(src.depth() == 5)
{
minlocVal_ = src.at<float>(min_loc_);
maxlocVal = src.at<float>(max_loc);
maxlocVal_ = src.at<float>(max_loc_);
- error0 = ::abs(src.at<float>(min_loc_) - src.at<float>(min_loc));
- error1 = ::abs(src.at<float>(max_loc_) - src.at<float>(max_loc));
}
if(src.depth() == 6)
{
minlocVal_ = src.at<double>(min_loc_);
maxlocVal = src.at<double>(max_loc);
maxlocVal_ = src.at<double>(max_loc_);
- error0 = ::abs(src.at<double>(min_loc_) - src.at<double>(min_loc));
- error1 = ::abs(src.at<double>(max_loc_) - src.at<double>(max_loc));
}
-
- TestSystem::instance().setAccurate(EeceptDoubleEQ<double>(error1, 0.0)
- &&EeceptDoubleEQ<double>(error0, 0.0)
- &&EeceptDoubleEQ<double>(maxlocVal_, maxlocVal)
+ error0 = ::abs(minlocVal_ - minlocVal);
+ error1 = ::abs(maxlocVal_ - maxlocVal);
+ if( EeceptDoubleEQ<double>(maxlocVal_, maxlocVal)
&&EeceptDoubleEQ<double>(minlocVal_, minlocVal)
&&EeceptDoubleEQ<double>(max_val_, max_val)
- &&EeceptDoubleEQ<double>(min_val_, min_val));
+ &&EeceptDoubleEQ<double>(min_val_, min_val))
+ TestSystem::instance().setAccurate(1, 0.);
+ else
+ TestSystem::instance().setAccurate(0, max(error0, error1));
GPU_ON;
ocl::minMaxLoc(d_src, &min_val, &max_val, &min_loc, &max_loc);
gpures = ocl::sum(d_src);
WARMUP_OFF;
- TestSystem::instance().setAccurate(ExceptDoubleNear(cpures[3], gpures[3], 0.1)
- &&ExceptDoubleNear(cpures[2], gpures[2], 0.1)
- &&ExceptDoubleNear(cpures[1], gpures[1], 0.1)
- &&ExceptDoubleNear(cpures[0], gpures[0], 0.1));
-
+ vector<double> diffs(4);
+ diffs[3] = fabs(cpures[3] - gpures[3]);
+ diffs[2] = fabs(cpures[2] - gpures[2]);
+ diffs[1] = fabs(cpures[1] - gpures[1]);
+ diffs[0] = fabs(cpures[0] - gpures[0]);
+ double max_diff = *max_element(diffs.begin(), diffs.end());
+ TestSystem::instance().setAccurate(max_diff<0.1?1:0, max_diff);
GPU_ON;
gpures = ocl::sum(d_src);
gpures = ocl::countNonZero(d_src);
WARMUP_OFF;
- TestSystem::instance().setAccurate((EeceptDoubleEQ<double>((double)cpures, (double)gpures)));
+ int diff = abs(cpures - gpures);
+ if(diff == 0)
+ TestSystem::instance().setAccurate(1, 0);
+ else
+ TestSystem::instance().setAccurate(0, diff);
GPU_ON;
ocl::countNonZero(d_src);
///////////// Phase ////////////////////////
PERFTEST(Phase)
{
- Mat src1, src2, dst;
+ Mat src1, src2, dst, ocl_dst;
ocl::oclMat d_src1, d_src2, d_dst;
int all_type[] = {CV_32FC1};
gen(src2, size, size, all_type[j], 0, 256);
gen(dst, size, size, all_type[j], 0, 256);
-
phase(src1, src2, dst, 1);
CPU_ON;
phase(src1, src2, dst, 1);
CPU_OFF;
+
d_src1.upload(src1);
d_src2.upload(src2);
ocl::phase(d_src1, d_src2, d_dst, 1);
WARMUP_OFF;
- cv::Mat ocl_mat_dst;
- d_dst.download(ocl_mat_dst);
-
- TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 1e-2));
-
GPU_ON;
ocl::phase(d_src1, d_src2, d_dst, 1);
GPU_OFF;
d_src1.upload(src1);
d_src2.upload(src2);
ocl::phase(d_src1, d_src2, d_dst, 1);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-2);
}
}
///////////// bitwise_and////////////////////////
PERFTEST(bitwise_and)
{
- Mat src1, src2, dst;
+ Mat src1, src2, dst, ocl_dst;
ocl::oclMat d_src1, d_src2, d_dst;
int all_type[] = {CV_8UC1, CV_32SC1};
gen(src2, size, size, all_type[j], 0, 256);
gen(dst, size, size, all_type[j], 0, 256);
-
bitwise_and(src1, src2, dst);
CPU_ON;
ocl::bitwise_and(d_src1, d_src2, d_dst);
WARMUP_OFF;
- cv::Mat ocl_mat_dst;
- d_dst.download(ocl_mat_dst);
-
- TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 0.0));
-
GPU_ON;
ocl::bitwise_and(d_src1, d_src2, d_dst);
GPU_OFF;
d_src1.upload(src1);
d_src2.upload(src2);
ocl::bitwise_and(d_src1, d_src2, d_dst);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 0.0);
}
}
///////////// bitwise_not////////////////////////
PERFTEST(bitwise_not)
{
- Mat src1, dst;
+ Mat src1, dst, ocl_dst;
ocl::oclMat d_src1, d_dst;
int all_type[] = {CV_8UC1, CV_32SC1};
gen(src1, size, size, all_type[j], 0, 256);
gen(dst, size, size, all_type[j], 0, 256);
-
bitwise_not(src1, dst);
CPU_ON;
ocl::bitwise_not(d_src1, d_dst);
WARMUP_OFF;
- cv::Mat ocl_mat_dst;
- d_dst.download(ocl_mat_dst);
-
- TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 0.0));
-
GPU_ON;
ocl::bitwise_not(d_src1, d_dst);
GPU_OFF;
GPU_FULL_ON;
d_src1.upload(src1);
ocl::bitwise_not(d_src1, d_dst);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 0.0);
}
}
///////////// compare////////////////////////
PERFTEST(compare)
{
- Mat src1, src2, dst;
+ Mat src1, src2, dst, ocl_dst;
ocl::oclMat d_src1, d_src2, d_dst;
int CMP_EQ = 0;
gen(src2, size, size, all_type[j], 0, 256);
gen(dst, size, size, all_type[j], 0, 256);
-
compare(src1, src2, dst, CMP_EQ);
CPU_ON;
compare(src1, src2, dst, CMP_EQ);
CPU_OFF;
+
d_src1.upload(src1);
d_src2.upload(src2);
ocl::compare(d_src1, d_src2, d_dst, CMP_EQ);
WARMUP_OFF;
- cv::Mat ocl_mat_dst;
- d_dst.download(ocl_mat_dst);
-
- TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 0.0));
-
GPU_ON;
ocl::compare(d_src1, d_src2, d_dst, CMP_EQ);
GPU_OFF;
d_src1.upload(src1);
d_src2.upload(src2);
ocl::compare(d_src1, d_src2, d_dst, CMP_EQ);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 0.0);
}
}
///////////// pow ////////////////////////
PERFTEST(pow)
{
- Mat src, dst;
+ Mat src, dst, ocl_dst;
ocl::oclMat d_src, d_dst;
int all_type[] = {CV_32FC1};
ocl::pow(d_src, -2.0, d_dst);
WARMUP_OFF;
- cv::Mat ocl_mat_dst;
- d_dst.download(ocl_mat_dst);
-
- TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 1.0));
-
GPU_ON;
ocl::pow(d_src, -2.0, d_dst);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::pow(d_src, -2.0, d_dst);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1.0);
}
}
///////////// MagnitudeSqr////////////////////////
PERFTEST(MagnitudeSqr)
{
- Mat src1, src2, dst;
+ Mat src1, src2, dst, ocl_dst;
ocl::oclMat d_src1, d_src2, d_dst;
int all_type[] = {CV_32FC1};
gen(src2, size, size, all_type[t], 0, 256);
gen(dst, size, size, all_type[t], 0, 256);
-
+ CPU_ON;
for (int i = 0; i < src1.rows; ++i)
-
for (int j = 0; j < src1.cols; ++j)
{
float val1 = src1.at<float>(i, j);
float val2 = src2.at<float>(i, j);
-
((float *)(dst.data))[i * dst.step / 4 + j] = val1 * val1 + val2 * val2;
}
+ CPU_OFF;
- CPU_ON;
-
- for (int i = 0; i < src1.rows; ++i)
- for (int j = 0; j < src1.cols; ++j)
- {
- float val1 = src1.at<float>(i, j);
- float val2 = src2.at<float>(i, j);
-
- ((float *)(dst.data))[i * dst.step / 4 + j] = val1 * val1 + val2 * val2;
-
- }
-
- CPU_OFF;
- d_src1.upload(src1);
- d_src2.upload(src2);
-
- WARMUP_ON;
- ocl::magnitudeSqr(d_src1, d_src2, d_dst);
- WARMUP_OFF;
+ d_src1.upload(src1);
+ d_src2.upload(src2);
- cv::Mat ocl_mat_dst;
- d_dst.download(ocl_mat_dst);
+ WARMUP_ON;
+ ocl::magnitudeSqr(d_src1, d_src2, d_dst);
+ WARMUP_OFF;
- TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 1.0));
+ GPU_ON;
+ ocl::magnitudeSqr(d_src1, d_src2, d_dst);
+ GPU_OFF;
- GPU_ON;
- ocl::magnitudeSqr(d_src1, d_src2, d_dst);
- GPU_OFF;
+ GPU_FULL_ON;
+ d_src1.upload(src1);
+ d_src2.upload(src2);
+ ocl::magnitudeSqr(d_src1, d_src2, d_dst);
+ d_dst.download(ocl_dst);
+ GPU_FULL_OFF;
- GPU_FULL_ON;
- d_src1.upload(src1);
- d_src2.upload(src2);
- ocl::magnitudeSqr(d_src1, d_src2, d_dst);
- d_dst.download(dst);
- GPU_FULL_OFF;
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1.0);
}
}
///////////// AddWeighted////////////////////////
PERFTEST(AddWeighted)
{
- Mat src1, src2, dst;
+ Mat src1, src2, dst, ocl_dst;
ocl::oclMat d_src1, d_src2, d_dst;
double alpha = 2.0, beta = 1.0, gama = 3.0;
ocl::addWeighted(d_src1, alpha, d_src2, beta, gama, d_dst);
WARMUP_OFF;
- cv::Mat ocl_mat_dst;
- d_dst.download(ocl_mat_dst);
-
- TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 1e-5));
-
GPU_ON;
ocl::addWeighted(d_src1, alpha, d_src2, beta, gama, d_dst);
GPU_OFF;
d_src1.upload(src1);
d_src2.upload(src2);
ocl::addWeighted(d_src1, alpha, d_src2, beta, gama, d_dst);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-5);
}
}
}
PERFTEST(blend)
{
- Mat src1, src2, weights1, weights2, dst;
+ Mat src1, src2, weights1, weights2, dst, ocl_dst;
ocl::oclMat d_src1, d_src2, d_weights1, d_weights2, d_dst;
int all_type[] = {CV_8UC1, CV_8UC4};
ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst);
WARMUP_OFF;
- cv::Mat ocl_mat;
- d_dst.download(ocl_mat);
- TestSystem::instance().setAccurate(ExpectedMatNear(dst, ocl_mat, 1.f));
-
GPU_ON;
ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst);
GPU_OFF;
d_weights1.upload(weights1);
d_weights2.upload(weights2);
ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.f);
}
}
}
\ No newline at end of file
d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
WARMUP_OFF;
- d_matcher.match(d_query, d_train, d_matches[0]);
- TestSystem::instance().setAccurate(AssertEQ<size_t>(d_matches[0].size(), matches[0].size()));
-
GPU_ON;
d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
GPU_OFF;
GPU_FULL_ON;
d_query.upload(query);
d_train.upload(train);
- d_matcher.match(d_query, d_train, matches[0]);
+ d_matcher.match(d_query, d_train, d_matches[0]);
GPU_FULL_OFF;
+ int diff = abs((int)d_matches[0].size() - (int)matches[0].size());
+ if(diff == 0)
+ TestSystem::instance().setAccurate(1, 0);
+ else
+ TestSystem::instance().setAccurate(0, diff);
+
SUBTEST << size << "; knnMatch";
matcher.knnMatch(query, train, matches, 2);
d_matcher.knnMatch(d_query, d_train, d_matches, 2);
GPU_FULL_OFF;
- TestSystem::instance().setAccurate(AssertEQ<size_t>(d_matches[0].size(), matches[0].size()));
+ diff = abs((int)d_matches[0].size() - (int)matches[0].size());
+ if(diff == 0)
+ TestSystem::instance().setAccurate(1, 0);
+ else
+ TestSystem::instance().setAccurate(0, diff);
SUBTEST << size << "; radiusMatch";
d_matcher.radiusMatch(d_query, d_train, d_matches, max_distance);
GPU_FULL_OFF;
- TestSystem::instance().setAccurate(AssertEQ<size_t>(d_matches[0].size(), matches[0].size()));
+ diff = abs((int)d_matches[0].size() - (int)matches[0].size());
+ if(diff == 0)
+ TestSystem::instance().setAccurate(1, 0);
+ else
+ TestSystem::instance().setAccurate(0, diff);
}
}
\ No newline at end of file
SUBTEST << img.cols << 'x' << img.rows << "; aloeL.jpg" << "; edges" << "; CV_8UC1";
- Mat edges(img.size(), CV_8UC1);
+ Mat edges(img.size(), CV_8UC1), ocl_edges;
CPU_ON;
Canny(img, edges, 50.0, 100.0);
ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0);
WARMUP_OFF;
- TestSystem::instance().setAccurate(ExceptedMatSimilar(edges, d_edges, 2e-2));
-
GPU_ON;
ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0);
GPU_OFF;
GPU_FULL_ON;
d_img.upload(img);
ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0);
- d_edges.download(edges);
+ d_edges.download(ocl_edges);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExceptedMatSimilar(edges, ocl_edges, 2e-2);
}
\ No newline at end of file
///////////// cvtColor////////////////////////
PERFTEST(cvtColor)
{
- Mat src, dst;
+ Mat src, dst, ocl_dst;
ocl::oclMat d_src, d_dst;
int all_type[] = {CV_8UC4};
ocl::cvtColor(d_src, d_dst, CV_RGBA2GRAY, 4);
WARMUP_OFF;
- cv::Mat ocl_mat;
- d_dst.download(ocl_mat);
- TestSystem::instance().setAccurate(ExceptedMatSimilar(dst, ocl_mat, 1e-5));
-
GPU_ON;
ocl::cvtColor(d_src, d_dst, CV_RGBA2GRAY, 4);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::cvtColor(d_src, d_dst, CV_RGBA2GRAY, 4);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExceptedMatSimilar(dst, ocl_dst, 1e-5);
}
///////////// columnSum////////////////////////
PERFTEST(columnSum)
{
- Mat src, dst;
+ Mat src, dst, ocl_dst;
ocl::oclMat d_src, d_dst;
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
dst.at<float>(0, j) = src.at<float>(0, j);
for (int i = 1; i < src.rows; ++i)
- {for (int j = 0; j < src.cols; ++j)
- {
+ for (int j = 0; j < src.cols; ++j)
dst.at<float>(i, j) = dst.at<float>(i - 1 , j) + src.at<float>(i , j);
- }
- }
-
CPU_OFF;
d_src.upload(src);
+
WARMUP_ON;
ocl::columnSum(d_src, d_dst);
WARMUP_OFF;
- cv::Mat ocl_mat;
- d_dst.download(ocl_mat);
- TestSystem::instance().setAccurate(ExpectedMatNear(dst, ocl_mat, 5e-1));
-
GPU_ON;
ocl::columnSum(d_src, d_dst);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::columnSum(d_src, d_dst);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 5e-1);
}
}
\ No newline at end of file
///////////// dft ////////////////////////
PERFTEST(dft)
{
- Mat src, dst;
+ Mat src, dst, ocl_dst;
ocl::oclMat d_src, d_dst;
int all_type[] = {CV_32FC2};
ocl::dft(d_src, d_dst, Size(size, size));
WARMUP_OFF;
- TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), src.size().area() * 1e-4));
-
GPU_ON;
ocl::dft(d_src, d_dst, Size(size, size));
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::dft(d_src, d_dst, Size(size, size));
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(dst, ocl_dst, src.size().area() * 1e-4);
}
}
///////////// Blur////////////////////////
PERFTEST(Blur)
{
- Mat src1, dst;
+ Mat src1, dst, ocl_dst;
ocl::oclMat d_src1, d_dst;
Size ksize = Size(3, 3);
gen(src1, size, size, all_type[j], 0, 256);
gen(dst, size, size, all_type[j], 0, 256);
-
blur(src1, dst, ksize, Point(-1, -1), bordertype);
CPU_ON;
ocl::blur(d_src1, d_dst, ksize, Point(-1, -1), bordertype);
WARMUP_OFF;
- TestSystem::instance().setAccurate(ExpectedMatNear(cv::Mat(d_dst), dst, 1.0));
-
GPU_ON;
ocl::blur(d_src1, d_dst, ksize, Point(-1, -1), bordertype);
GPU_OFF;
GPU_FULL_ON;
d_src1.upload(src1);
ocl::blur(d_src1, d_dst, ksize, Point(-1, -1), bordertype);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1.0);
}
}
///////////// Laplacian////////////////////////
PERFTEST(Laplacian)
{
- Mat src1, dst;
+ Mat src1, dst, ocl_dst;
ocl::oclMat d_src1, d_dst;
int ksize = 3;
gen(src1, size, size, all_type[j], 0, 256);
gen(dst, size, size, all_type[j], 0, 256);
-
Laplacian(src1, dst, -1, ksize, 1);
CPU_ON;
ocl::Laplacian(d_src1, d_dst, -1, ksize, 1);
WARMUP_OFF;
- TestSystem::instance().setAccurate(ExpectedMatNear(cv::Mat(d_dst), dst, 1e-5));
-
GPU_ON;
ocl::Laplacian(d_src1, d_dst, -1, ksize, 1);
GPU_OFF;
GPU_FULL_ON;
d_src1.upload(src1);
ocl::Laplacian(d_src1, d_dst, -1, ksize, 1);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-5);
}
}
///////////// Erode ////////////////////
PERFTEST(Erode)
{
- Mat src, dst, ker;
+ Mat src, dst, ker, ocl_dst;
ocl::oclMat d_src, d_dst;
int all_type[] = {CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4};
ocl::erode(d_src, d_dst, ker);
WARMUP_OFF;
- TestSystem::instance().setAccurate(ExpectedMatNear(cv::Mat(d_dst), dst, 1e-5));
-
GPU_ON;
ocl::erode(d_src, d_dst, ker);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::erode(d_src, d_dst, ker);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-5);
}
}
///////////// Sobel ////////////////////////
PERFTEST(Sobel)
{
- Mat src, dst;
+ Mat src, dst, ocl_dst;
ocl::oclMat d_src, d_dst;
int dx = 1;
ocl::Sobel(d_src, d_dst, -1, dx, dy);
WARMUP_OFF;
- TestSystem::instance().setAccurate(ExpectedMatNear(cv::Mat(d_dst), dst, 1));
-
GPU_ON;
ocl::Sobel(d_src, d_dst, -1, dx, dy);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::Sobel(d_src, d_dst, -1, dx, dy);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1);
}
}
///////////// Scharr ////////////////////////
PERFTEST(Scharr)
{
- Mat src, dst;
+ Mat src, dst, ocl_dst;
ocl::oclMat d_src, d_dst;
int dx = 1;
ocl::Scharr(d_src, d_dst, -1, dx, dy);
WARMUP_OFF;
- TestSystem::instance().setAccurate(ExpectedMatNear(cv::Mat(d_dst), dst, 1));
-
GPU_ON;
ocl::Scharr(d_src, d_dst, -1, dx, dy);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::Scharr(d_src, d_dst, -1, dx, dy);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1);
}
}
///////////// GaussianBlur ////////////////////////
PERFTEST(GaussianBlur)
{
- Mat src, dst;
+ Mat src, dst, ocl_dst;
int all_type[] = {CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4};
std::string type_name[] = {"CV_8UC1", "CV_8UC4", "CV_32FC1", "CV_32FC4"};
ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0);
WARMUP_OFF;
- TestSystem::instance().setAccurate(ExpectedMatNear(cv::Mat(d_dst), dst, 1.0));
-
-
GPU_ON;
ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1.0);
}
}
Mat kernel;
gen(kernel, ksize, ksize, CV_32FC1, 0.0, 1.0);
- Mat dst(src);
+ Mat dst, ocl_dst;
dst.setTo(0);
cv::filter2D(src, dst, -1, kernel);
cv::filter2D(src, dst, -1, kernel);
CPU_OFF;
- ocl::oclMat d_src(src);
- ocl::oclMat d_dst(d_src);
- d_dst.setTo(0);
+ ocl::oclMat d_src(src), d_dst;
WARMUP_ON;
ocl::filter2D(d_src, d_dst, -1, kernel);
WARMUP_OFF;
- TestSystem::instance().setAccurate(ExpectedMatNear(cv::Mat(d_dst), dst, 1e-5));
-
-
GPU_ON;
ocl::filter2D(d_src, d_dst, -1, kernel);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::filter2D(d_src, d_dst, -1, kernel);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-5);
}
}
///////////// gemm ////////////////////////
PERFTEST(gemm)
{
- Mat src1, src2, src3, dst;
+ Mat src1, src2, src3, dst, ocl_dst;
ocl::oclMat d_src1, d_src2, d_src3, d_dst;
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
WARMUP_ON;
ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst);
WARMUP_OFF;
- TestSystem::instance().setAccurate(ExpectedMatNear(cv::Mat(d_dst), dst, src1.cols * src1.rows * 1e-4));
GPU_ON;
ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst);
d_src2.upload(src2);
d_src3.upload(src3);
ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, src1.cols * src1.rows * 1e-4);
}
}
\ No newline at end of file
1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30));
WARMUP_OFF;
- //Testing whether the expected is equal to the actual.
- TestSystem::instance().setAccurate(ExpectedEQ<vector<Rect>::size_type, vector<Rect>::size_type>(faces.size(), oclfaces.size()));
+ if(faces.size() == oclfaces.size())
+ TestSystem::instance().setAccurate(1, 0);
+ else
+ TestSystem::instance().setAccurate(0, abs((int)faces.size() - (int)oclfaces.size()));
faces.clear();
}
}
- cv::Mat ocl_mat;
- ocl_mat = cv::Mat(d_comp);
- ocl_mat.convertTo(ocl_mat, cv::Mat(comp).type());
- TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat, cv::Mat(comp), 3));
+ cv::Mat gpu_rst(d_comp), cpu_rst(comp);
+ TestSystem::instance().ExpectedMatNear(gpu_rst, cpu_rst, 3);
GPU_ON;
ocl_hog.detectMultiScale(d_src, found_locations);
///////////// equalizeHist ////////////////////////
PERFTEST(equalizeHist)
{
- Mat src, dst;
+ Mat src, dst, ocl_dst;
int all_type[] = {CV_8UC1};
std::string type_name[] = {"CV_8UC1"};
ocl::equalizeHist(d_src, d_dst);
WARMUP_OFF;
- TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.1));
-
-
GPU_ON;
ocl::equalizeHist(d_src, d_dst);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::equalizeHist(d_src, d_dst);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.1);
}
}
/////////// CopyMakeBorder //////////////////////
PERFTEST(CopyMakeBorder)
{
- Mat src, dst;
+ Mat src, dst, ocl_dst;
ocl::oclMat d_dst;
int bordertype = BORDER_CONSTANT;
ocl::copyMakeBorder(d_src, d_dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0));
WARMUP_OFF;
- TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 0.0));
-
-
GPU_ON;
ocl::copyMakeBorder(d_src, d_dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0));
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::copyMakeBorder(d_src, d_dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0));
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 0.0);
}
}
///////////// cornerMinEigenVal ////////////////////////
PERFTEST(cornerMinEigenVal)
{
- Mat src, dst;
+ Mat src, dst, ocl_dst;
ocl::oclMat d_dst;
int blockSize = 7, apertureSize = 1 + 2 * (rand() % 4);
{
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
-
gen(src, size, size, all_type[j], 0, 256);
cornerMinEigenVal(src, dst, blockSize, apertureSize, borderType);
ocl::cornerMinEigenVal(d_src, d_dst, blockSize, apertureSize, borderType);
WARMUP_OFF;
- TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.0));
-
-
GPU_ON;
ocl::cornerMinEigenVal(d_src, d_dst, blockSize, apertureSize, borderType);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::cornerMinEigenVal(d_src, d_dst, blockSize, apertureSize, borderType);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0);
}
}
///////////// cornerHarris ////////////////////////
PERFTEST(cornerHarris)
{
- Mat src, dst;
+ Mat src, dst, ocl_dst;
ocl::oclMat d_src, d_dst;
int all_type[] = {CV_8UC1, CV_32FC1};
ocl::cornerHarris(d_src, d_dst, 5, 7, 0.1, BORDER_REFLECT);
WARMUP_OFF;
- TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.0));
-
GPU_ON;
ocl::cornerHarris(d_src, d_dst, 5, 7, 0.1, BORDER_REFLECT);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::cornerHarris(d_src, d_dst, 5, 7, 0.1, BORDER_REFLECT);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0);
}
///////////// integral ////////////////////////
PERFTEST(integral)
{
- Mat src, sum;
+ Mat src, sum, ocl_sum;
ocl::oclMat d_src, d_sum, d_buf;
int all_type[] = {CV_8UC1};
ocl::integral(d_src, d_sum);
WARMUP_OFF;
- cv::Mat ocl_mat;
- d_sum.download(ocl_mat);
- if(sum.type() == ocl_mat.type()) //we won't test accuracy when cpu function overlow
- TestSystem::instance().setAccurate(ExpectedMatNear(sum, ocl_mat, 0.0));
-
-
GPU_ON;
ocl::integral(d_src, d_sum);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::integral(d_src, d_sum);
- d_sum.download(sum);
+ d_sum.download(ocl_sum);
GPU_FULL_OFF;
+
+ if(sum.type() == ocl_sum.type()) //we won't test accuracy when cpu function overlow
+ TestSystem::instance().ExpectedMatNear(sum, ocl_sum, 0.0);
+
}
}
///////////// WarpAffine ////////////////////////
PERFTEST(WarpAffine)
{
- Mat src, dst;
+ Mat src, dst, ocl_dst;
ocl::oclMat d_src, d_dst;
static const double coeffs[2][3] =
ocl::warpAffine(d_src, d_dst, M, size1, interpolation);
WARMUP_OFF;
- TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.0));
-
GPU_ON;
ocl::warpAffine(d_src, d_dst, M, size1, interpolation);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::warpAffine(d_src, d_dst, M, size1, interpolation);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0);
}
}
///////////// WarpPerspective ////////////////////////
PERFTEST(WarpPerspective)
{
- Mat src, dst;
+ Mat src, dst, ocl_dst;
ocl::oclMat d_src, d_dst;
static const double coeffs[3][3] =
ocl::warpPerspective(d_src, d_dst, M, size1, interpolation);
WARMUP_OFF;
- TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.0));
-
GPU_ON;
ocl::warpPerspective(d_src, d_dst, M, size1, interpolation);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::warpPerspective(d_src, d_dst, M, size1, interpolation);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0);
}
}
///////////// resize ////////////////////////
PERFTEST(resize)
{
- Mat src, dst;
+ Mat src, dst, ocl_dst;
ocl::oclMat d_src, d_dst;
ocl::resize(d_src, d_dst, Size(), 2.0, 2.0);
WARMUP_OFF;
- TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.0));
-
-
GPU_ON;
ocl::resize(d_src, d_dst, Size(), 2.0, 2.0);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::resize(d_src, d_dst, Size(), 2.0, 2.0);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0);
}
}
ocl::resize(d_src, d_dst, Size(), 0.5, 0.5);
WARMUP_OFF;
- TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.0));
-
GPU_ON;
ocl::resize(d_src, d_dst, Size(), 0.5, 0.5);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::resize(d_src, d_dst, Size(), 0.5, 0.5);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0);
}
}
///////////// threshold////////////////////////
PERFTEST(threshold)
{
- Mat src, dst;
+ Mat src, dst, ocl_dst;
ocl::oclMat d_src, d_dst;
-
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
{
SUBTEST << size << 'x' << size << "; 8UC1; THRESH_BINARY";
ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY);
WARMUP_OFF;
- TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.0));
-
-
GPU_ON;
ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+ TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0);
}
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC);
WARMUP_OFF;
- TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.0));
-
GPU_ON;
ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0);
}
}
///////////// meanShiftFiltering////////////////////////
PERFTEST(meanShiftFiltering)
{
int sp = 5, sr = 6;
- Mat src, dst;
+ Mat src, dst, ocl_dst;
ocl::oclMat d_src, d_dst;
ocl::meanShiftFiltering(d_src, d_dst, sp, sr, crit);
WARMUP_OFF;
- cv::Mat ocl_mat;
- d_dst.download(ocl_mat);
-
- TestSystem::instance().setAccurate(ExpectedMatNear(dst, ocl_mat, 0.0));
-
GPU_ON;
ocl::meanShiftFiltering(d_src, d_dst, sp, sr);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::meanShiftFiltering(d_src, d_dst, sp, sr);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 0.0);
}
}
///////////// meanShiftProc////////////////////////
}
PERFTEST(meanShiftProc)
{
- Mat src, dst, dstCoor_roi;
- ocl::oclMat d_src, d_dst, d_dstCoor_roi;
+ Mat src;
+ vector<Mat> dst(2), ocl_dst(2);
+ ocl::oclMat d_src, d_dst, d_dstCoor;
TermCriteria crit(TermCriteria::COUNT + TermCriteria::EPS, 5, 1);
SUBTEST << size << 'x' << size << "; 8UC4 and CV_16SC2 ";
gen(src, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256));
- gen(dst, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256));
- gen(dstCoor_roi, size, size, CV_16SC2, Scalar::all(0), Scalar::all(256));
+ gen(dst[0], size, size, CV_8UC4, Scalar::all(0), Scalar::all(256));
+ gen(dst[1], size, size, CV_16SC2, Scalar::all(0), Scalar::all(256));
- meanShiftProc_(src, dst, dstCoor_roi, 5, 6, crit);
+ meanShiftProc_(src, dst[0], dst[1], 5, 6, crit);
CPU_ON;
- meanShiftProc_(src, dst, dstCoor_roi, 5, 6, crit);
+ meanShiftProc_(src, dst[0], dst[1], 5, 6, crit);
CPU_OFF;
d_src.upload(src);
WARMUP_ON;
- ocl::meanShiftProc(d_src, d_dst, d_dstCoor_roi, 5, 6, crit);
+ ocl::meanShiftProc(d_src, d_dst, d_dstCoor, 5, 6, crit);
WARMUP_OFF;
- TestSystem::instance().setAccurate(ExpectedMatNear(dstCoor_roi, cv::Mat(d_dstCoor_roi), 0.0)
- &&ExpectedMatNear(dst, cv::Mat(d_dst), 0.0));
-
GPU_ON;
- ocl::meanShiftProc(d_src, d_dst, d_dstCoor_roi, 5, 6, crit);
+ ocl::meanShiftProc(d_src, d_dst, d_dstCoor, 5, 6, crit);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
- ocl::meanShiftProc(d_src, d_dst, d_dstCoor_roi, 5, 6, crit);
- d_dst.download(dst);
- d_dstCoor_roi.download(dstCoor_roi);
+ ocl::meanShiftProc(d_src, d_dst, d_dstCoor, 5, 6, crit);
+ d_dst.download(ocl_dst[0]);
+ d_dstCoor.download(ocl_dst[1]);
GPU_FULL_OFF;
+ vector<double> eps(2, 0.);
+ TestSystem::instance().ExpectMatsNear(dst, ocl_dst, eps);
}
}
///////////// remap////////////////////////
PERFTEST(remap)
{
- Mat src, dst, xmap, ymap;
+ Mat src, dst, xmap, ymap, ocl_dst;
ocl::oclMat d_src, d_dst, d_xmap, d_ymap;
int all_type[] = {CV_8UC1, CV_8UC4};
}
}
-
remap(src, dst, xmap, ymap, interpolation, borderMode);
CPU_ON;
ocl::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode);
WARMUP_OFF;
- if(interpolation == 0)
- TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.0));
- else
- TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 2.0));
-
-
GPU_ON;
ocl::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 2.0);
}
}
PERFTEST(matchTemplate)
{
//InitMatchTemplate();
-
- Mat src, templ, dst;
+ Mat src, templ, dst, ocl_dst;
int templ_size = 5;
-
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
{
int all_type[] = {CV_32FC1, CV_32FC4};
matchTemplate(src, templ, dst, CV_TM_CCORR);
CPU_OFF;
- ocl::oclMat d_src(src), d_templ, d_dst;
-
- d_templ.upload(templ);
+ ocl::oclMat d_src(src), d_templ(templ), d_dst;
WARMUP_ON;
ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR);
WARMUP_OFF;
- TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), templ.rows * templ.cols * 1e-1));
-
GPU_ON;
ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR);
GPU_OFF;
d_src.upload(src);
d_templ.upload(templ);
ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(dst, ocl_dst, templ.rows * templ.cols * 1e-1);
}
}
ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR_NORMED);
WARMUP_OFF;
- TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), templ.rows * templ.cols * 1e-1));
-
GPU_ON;
ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR_NORMED);
GPU_OFF;
d_src.upload(src);
d_templ.upload(templ);
ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR_NORMED);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(dst, ocl_dst, templ.rows * templ.cols * 1e-1);
}
}
}
///////////// ConvertTo////////////////////////
PERFTEST(ConvertTo)
{
- Mat src, dst;
+ Mat src, dst, ocl_dst;
ocl::oclMat d_src, d_dst;
int all_type[] = {CV_8UC1, CV_8UC4};
d_src.convertTo(d_dst, CV_32FC1);
WARMUP_OFF;
- TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 0.0));
-
-
GPU_ON;
d_src.convertTo(d_dst, CV_32FC1);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
d_src.convertTo(d_dst, CV_32FC1);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 0.0);
}
}
///////////// copyTo////////////////////////
PERFTEST(copyTo)
{
- Mat src, dst;
+ Mat src, dst, ocl_dst;
ocl::oclMat d_src, d_dst;
int all_type[] = {CV_8UC1, CV_8UC4};
d_src.copyTo(d_dst);
WARMUP_OFF;
- TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 0.0));
-
-
GPU_ON;
d_src.copyTo(d_dst);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
d_src.copyTo(d_dst);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 0.0);
}
}
///////////// setTo////////////////////////
PERFTEST(setTo)
{
- Mat src, dst;
+ Mat src, ocl_src;
Scalar val(1, 2, 3, 4);
- ocl::oclMat d_src, d_dst;
+ ocl::oclMat d_src;
int all_type[] = {CV_8UC1, CV_8UC4};
std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
d_src.setTo(val);
WARMUP_OFF;
- TestSystem::instance().setAccurate(ExpectedMatNear(src, cv::Mat(d_src), 1.0));
+ d_src.download(ocl_src);
+ TestSystem::instance().ExpectedMatNear(src, ocl_src, 1.0);
-
- GPU_ON;
+ GPU_ON;;
d_src.setTo(val);
GPU_OFF;
///////////// norm////////////////////////
PERFTEST(norm)
{
- Mat src, buf;
- ocl::oclMat d_src, d_buf;
-
+ Mat src1, src2, ocl_src1;
+ ocl::oclMat d_src1, d_src2;
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
{
SUBTEST << size << 'x' << size << "; CV_8UC1; NORM_INF";
- gen(src, size, size, CV_8UC1, Scalar::all(0), Scalar::all(1));
- gen(buf, size, size, CV_8UC1, Scalar::all(0), Scalar::all(1));
+ gen(src1, size, size, CV_8UC1, Scalar::all(0), Scalar::all(1));
+ gen(src2, size, size, CV_8UC1, Scalar::all(0), Scalar::all(1));
- norm(src, NORM_INF);
+ norm(src1, src2, NORM_INF);
CPU_ON;
- norm(src, NORM_INF);
+ norm(src1, src2, NORM_INF);
CPU_OFF;
- d_src.upload(src);
- d_buf.upload(buf);
+ d_src1.upload(src1);
+ d_src2.upload(src2);
WARMUP_ON;
- ocl::norm(d_src, d_buf, NORM_INF);
+ ocl::norm(d_src1, d_src2, NORM_INF);
WARMUP_OFF;
- TestSystem::instance().setAccurate(ExpectedMatNear(src, cv::Mat(d_buf), .5));
+ d_src1.download(ocl_src1);
+ TestSystem::instance().ExpectedMatNear(src1, ocl_src1, .5);
GPU_ON;
- ocl::norm(d_src, d_buf, NORM_INF);
+ ocl::norm(d_src1, d_src2, NORM_INF);
GPU_OFF;
GPU_FULL_ON;
- d_src.upload(src);
- ocl::norm(d_src, d_buf, NORM_INF);
+ d_src1.upload(src1);
+ d_src2.upload(src2);
+ ocl::norm(d_src1, d_src2, NORM_INF);
GPU_FULL_OFF;
}
}
\ No newline at end of file
SUBTEST << frame0.cols << "x" << frame0.rows << "; color; " << points << " points";
else
SUBTEST << frame0.cols << "x" << frame0.rows << "; gray; " << points << " points";
- Mat nextPts_cpu;
- Mat status_cpu;
+ Mat ocl_nextPts;
+ Mat ocl_status;
vector<Point2f> pts;
goodFeaturesToTrack(i == 0 ? gray_frame : frame0, pts, points, 0.01, 0.0);
d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err);
WARMUP_OFF;
- std::vector<cv::Point2f> ocl_nextPts(d_nextPts.cols);
- std::vector<unsigned char> ocl_status(d_status.cols);
- TestSystem::instance().setAccurate(AssertEQ<size_t>(nextPts.size(), ocl_nextPts.size()));
- TestSystem::instance().setAccurate(AssertEQ<size_t>(status.size(), ocl_status.size()));
-
-
GPU_ON;
d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err);
GPU_OFF;
d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err);
if (!d_nextPts.empty())
- {
- d_nextPts.download(nextPts_cpu);
- }
+ d_nextPts.download(ocl_nextPts);
if (!d_status.empty())
+ d_status.download(ocl_status);
+ GPU_FULL_OFF;
+
+ size_t mismatch = 0;
+ for (int i = 0; i < (int)nextPts.size(); ++i)
{
- d_status.download(status_cpu);
+ if(status[i] != ocl_status.at<unsigned char>(0, i)){
+ mismatch++;
+ continue;
+ }
+ if(status[i]){
+ Point2f gpu_rst = ocl_nextPts.at<Point2f>(0, i);
+ Point2f cpu_rst = nextPts[i];
+ if(fabs(gpu_rst.x - cpu_rst.x) >= 1. || fabs(gpu_rst.y - cpu_rst.y) >= 1.)
+ mismatch++;
+ }
}
-
- GPU_FULL_OFF;
+ double ratio = (double)mismatch / (double)nextPts.size();
+ if(ratio < .02)
+ TestSystem::instance().setAccurate(1, ratio);
+ else
+ TestSystem::instance().setAccurate(0, ratio);
}
}
}
+
+
+PERFTEST(tvl1flow)
+{
+ cv::Mat frame0 = imread("rubberwhale1.png", cv::IMREAD_GRAYSCALE);
+ assert(!frame0.empty());
+
+ cv::Mat frame1 = imread("rubberwhale2.png", cv::IMREAD_GRAYSCALE);
+ assert(!frame1.empty());
+
+ cv::ocl::OpticalFlowDual_TVL1_OCL d_alg;
+ cv::ocl::oclMat d_flowx(frame0.size(), CV_32FC1);
+ cv::ocl::oclMat d_flowy(frame1.size(), CV_32FC1);
+
+ cv::Ptr<cv::DenseOpticalFlow> alg = cv::createOptFlow_DualTVL1();
+ cv::Mat flow;
+
+
+ SUBTEST << frame0.cols << 'x' << frame0.rows << "; rubberwhale1.png; "<<frame1.cols<<'x'<<frame1.rows<<"; rubberwhale2.png";
+
+ alg->calc(frame0, frame1, flow);
+
+ CPU_ON;
+ alg->calc(frame0, frame1, flow);
+ CPU_OFF;
+
+ cv::Mat gold[2];
+ cv::split(flow, gold);
+
+ cv::ocl::oclMat d0(frame0.size(), CV_32FC1);
+ d0.upload(frame0);
+ cv::ocl::oclMat d1(frame1.size(), CV_32FC1);
+ d1.upload(frame1);
+
+ WARMUP_ON;
+ d_alg(d0, d1, d_flowx, d_flowy);
+ WARMUP_OFF;
+/*
+ double diff1 = 0.0, diff2 = 0.0;
+ if(ExceptedMatSimilar(gold[0], cv::Mat(d_flowx), 3e-3, diff1) == 1
+ &&ExceptedMatSimilar(gold[1], cv::Mat(d_flowy), 3e-3, diff2) == 1)
+ TestSystem::instance().setAccurate(1);
+ else
+ TestSystem::instance().setAccurate(0);
+
+ TestSystem::instance().setDiff(diff1);
+ TestSystem::instance().setDiff(diff2);
+*/
+
+
+ GPU_ON;
+ d_alg(d0, d1, d_flowx, d_flowy);
+ d_alg.collectGarbage();
+ GPU_OFF;
+
+
+ cv::Mat flowx, flowy;
+
+ GPU_FULL_ON;
+ d0.upload(frame0);
+ d1.upload(frame1);
+ d_alg(d0, d1, d_flowx, d_flowy);
+ d_alg.collectGarbage();
+ d_flowx.download(flowx);
+ d_flowy.download(flowy);
+ GPU_FULL_OFF;
+
+ TestSystem::instance().ExceptedMatSimilar(gold[0], flowx, 3e-3);
+ TestSystem::instance().ExceptedMatSimilar(gold[1], flowy, 3e-3);
+}
\ No newline at end of file
///////////// pyrDown //////////////////////
PERFTEST(pyrDown)
{
- Mat src, dst;
+ Mat src, dst, ocl_dst;
int all_type[] = {CV_8UC1, CV_8UC4};
std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
ocl::pyrDown(d_src, d_dst);
WARMUP_OFF;
- TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), dst.depth() == CV_32F ? 1e-4f : 1.0f));
-
-
GPU_ON;
ocl::pyrDown(d_src, d_dst);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::pyrDown(d_src, d_dst);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
+ GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(dst, ocl_dst, dst.depth() == CV_32F ? 1e-4f : 1.0f);
+ }
+ }
+}
+
+///////////// pyrUp ////////////////////////
+PERFTEST(pyrUp)
+{
+ Mat src, dst, ocl_dst;
+ int all_type[] = {CV_8UC1, CV_8UC4};
+ std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
+
+ for (int size = 500; size <= 2000; size *= 2)
+ {
+ for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+ {
+ SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+
+ gen(src, size, size, all_type[j], 0, 256);
+
+ pyrUp(src, dst);
+
+ CPU_ON;
+ pyrUp(src, dst);
+ CPU_OFF;
+
+ ocl::oclMat d_src(src);
+ ocl::oclMat d_dst;
+
+ WARMUP_ON;
+ ocl::pyrUp(d_src, d_dst);
+ WARMUP_OFF;
+
+ GPU_ON;
+ ocl::pyrUp(d_src, d_dst);
+ GPU_OFF;
+
+ GPU_FULL_ON;
+ d_src.upload(src);
+ ocl::pyrUp(d_src, d_dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(dst, ocl_dst, (src.depth() == CV_32F ? 1e-4f : 1.0));
}
}
}
\ No newline at end of file
+++ /dev/null
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-// By downloading, copying, installing or using the software you agree to this license.
-// If you do not agree to this license, do not download, install,
-// copy or use the software.
-//
-//
-// License Agreement
-// For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-// Fangfang Bai, fangfang@multicorewareinc.com
-// Jin Ma, jin@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-// * Redistribution's of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// * Redistribution's in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other oclMaterials provided with the distribution.
-//
-// * The name of the copyright holders may not be used to endorse or promote products
-// derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#include "precomp.hpp"
-
-///////////// pyrUp ////////////////////////
-PERFTEST(pyrUp)
-{
- Mat src, dst;
- int all_type[] = {CV_8UC1, CV_8UC4};
- std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
-
- for (int size = 500; size <= 2000; size *= 2)
- {
- for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
- {
- SUBTEST << size << 'x' << size << "; " << type_name[j] ;
-
- gen(src, size, size, all_type[j], 0, 256);
-
- pyrUp(src, dst);
-
- CPU_ON;
- pyrUp(src, dst);
- CPU_OFF;
-
- ocl::oclMat d_src(src);
- ocl::oclMat d_dst;
-
- WARMUP_ON;
- ocl::pyrUp(d_src, d_dst);
- WARMUP_OFF;
-
- TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), (src.depth() == CV_32F ? 1e-4f : 1.0)));
-
- GPU_ON;
- ocl::pyrUp(d_src, d_dst);
- GPU_OFF;
-
- GPU_FULL_ON;
- d_src.upload(src);
- ocl::pyrUp(d_src, d_dst);
- d_dst.download(dst);
- GPU_FULL_OFF;
- }
- }
-}
\ No newline at end of file
///////////// Merge////////////////////////
PERFTEST(Merge)
{
- Mat dst;
+ Mat dst, ocl_dst;
ocl::oclMat d_dst;
int channels = 4;
ocl::merge(d_src, d_dst);
WARMUP_OFF;
- TestSystem::instance().setAccurate(ExpectedMatNear(cv::Mat(dst), cv::Mat(d_dst), 0.0));
-
GPU_ON;
ocl::merge(d_src, d_dst);
GPU_OFF;
GPU_FULL_ON;
-
for (int i = 0; i < channels; ++i)
{
- d_src[i] = ocl::oclMat(size1, CV_8U, cv::Scalar::all(i));
+ d_src[i] = ocl::oclMat(size1, all_type[j], cv::Scalar::all(i));
}
-
ocl::merge(d_src, d_dst);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 0.0);
}
}
Mat src(size1, CV_MAKE_TYPE(all_type[j], 4), cv::Scalar(1, 2, 3, 4));
- std::vector<cv::Mat> dst;
+ std::vector<cv::Mat> dst, ocl_dst(4);
split(src, dst);
WARMUP_ON;
ocl::split(d_src, d_dst);
- WARMUP_OFF;
-
- if(d_dst.size() == dst.size())
- {
- TestSystem::instance().setAccurate(1);
- for(size_t i = 0; i < dst.size(); i++)
- {
- if(ExpectedMatNear(dst[i], cv::Mat(d_dst[i]), 0.0) == 0)
- {
- TestSystem::instance().setAccurate(0);
- break;
- }
- }
- }else
- TestSystem::instance().setAccurate(0);
-
+ WARMUP_OFF;
GPU_ON;
ocl::split(d_src, d_dst);
GPU_FULL_ON;
d_src.upload(src);
ocl::split(d_src, d_dst);
+ for(size_t i = 0; i < dst.size(); i++)
+ d_dst[i].download(ocl_dst[i]);
GPU_FULL_OFF;
+
+ vector<double> eps(4, 0.);
+ TestSystem::instance().ExpectMatsNear(dst, ocl_dst, eps);
}
}
return;
}
- int is_accurate = is_accurate_;
double cpu_time = cpu_elapsed_ / getTickFrequency() * 1000.0;
double gpu_time = gpu_elapsed_ / getTickFrequency() * 1000.0;
double gpu_full_time = gpu_full_elapsed_ / getTickFrequency() * 1000.0;
deviation = std::sqrt(sum / gpu_times_.size());
}
- printMetrics(is_accurate, cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup);
- writeMetrics(is_accurate, cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup, gpu_min, gpu_max, deviation);
+ printMetrics(is_accurate_, cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup);
+ writeMetrics(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup, gpu_min, gpu_max, deviation);
num_subtests_called_++;
resetCurrentSubtest();
}
}
- fprintf(record_, "NAME,DESCRIPTION,ACCURACY,CPU (ms),GPU (ms),SPEEDUP,GPUTOTAL (ms),TOTALSPEEDUP,GPU Min (ms),GPU Max (ms), Standard deviation (ms)\n");
+ fprintf(record_, "NAME,DESCRIPTION,ACCURACY,DIFFERENCE,CPU (ms),GPU (ms),SPEEDUP,GPUTOTAL (ms),TOTALSPEEDUP,GPU Min (ms),GPU Max (ms), Standard deviation (ms)\n");
fflush(record_);
}
#endif
}
-void TestSystem::writeMetrics(int is_accurate, double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup, double gpu_min, double gpu_max, double std_dev)
+void TestSystem::writeMetrics(double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup, double gpu_min, double gpu_max, double std_dev)
{
if (!record_)
{
string _is_accurate_;
- if(is_accurate == 1)
+ if(is_accurate_ == 1)
_is_accurate_ = "Pass";
- else if(is_accurate == 0)
+ else if(is_accurate_ == 0)
_is_accurate_ = "Fail";
- else if(is_accurate == -1)
+ else if(is_accurate_ == -1)
_is_accurate_ = " ";
else
{
- std::cout<<"is_accurate errer: "<<is_accurate<<"\n";
+ std::cout<<"is_accurate errer: "<<is_accurate_<<"\n";
exit(-1);
}
- fprintf(record_, "%s,%s,%s,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f\n", itname_changed_ ? itname_.c_str() : "",
+ fprintf(record_, "%s,%s,%s,%.2f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f\n",
+ itname_changed_ ? itname_.c_str() : "",
cur_subtest_description_.str().c_str(),
- _is_accurate_.c_str(), cpu_time, gpu_time, speedup, gpu_full_time, fullspeedup,
+ _is_accurate_.c_str(),
+ accurate_diff_,
+ cpu_time, gpu_time, speedup, gpu_full_time, fullspeedup,
gpu_min, gpu_max, std_dev);
if (itname_changed_)
RNG rng(0);
rng.fill(mat, RNG::UNIFORM, low, high);
}
-#if 0
-void gen(Mat &mat, int rows, int cols, int type, int low, int high, int n)
-{
- assert(n > 0&&n <= cols * rows);
- assert(type == CV_8UC1||type == CV_8UC3||type == CV_8UC4
- ||type == CV_32FC1||type == CV_32FC3||type == CV_32FC4);
-
- RNG rng;
- //generate random position without duplication
- std::vector<int> pos;
- for(int i = 0; i < cols * rows; i++)
- {
- pos.push_back(i);
- }
-
- for(int i = 0; i < cols * rows; i++)
- {
- int temp = i + rng.uniform(0, cols * rows - 1 - i);
- int temp1 = pos[temp];
- pos[temp]= pos[i];
- pos[i] = temp1;
- }
-
- std::vector<int> selected_pos;
- for(int i = 0; i < n; i++)
- {
- selected_pos.push_back(pos[i]);
- }
-
- pos.clear();
- //end of generating random y without duplication
-
- if(type == CV_8UC1)
- {
- typedef struct coorStruct_
- {
- int x;
- int y;
- uchar xy;
- }coorStruct;
-
- coorStruct coor_struct;
-
- std::vector<coorStruct> coor;
-
- for(int i = 0; i < n; i++)
- {
- coor_struct.x = -1;
- coor_struct.y = -1;
- coor_struct.xy = (uchar)rng.uniform(low, high);
- coor.push_back(coor_struct);
- }
-
- for(int i = 0; i < n; i++)
- {
- coor[i].y = selected_pos[i]/cols;
- coor[i].x = selected_pos[i]%cols;
- }
- selected_pos.clear();
-
- mat.create(rows, cols, type);
- mat.setTo(0);
-
- for(int i = 0; i < n; i++)
- {
- mat.at<unsigned char>(coor[i].y, coor[i].x) = coor[i].xy;
- }
- }
-
- if(type == CV_8UC4 || type == CV_8UC3)
- {
- mat.create(rows, cols, type);
- mat.setTo(0);
-
- typedef struct Coor
- {
- int x;
- int y;
-
- uchar r;
- uchar g;
- uchar b;
- uchar alpha;
- }coor;
-
- std::vector<coor> coor_vect;
-
- coor xy_coor;
-
- for(int i = 0; i < n; i++)
- {
- xy_coor.r = (uchar)rng.uniform(low, high);
- xy_coor.g = (uchar)rng.uniform(low, high);
- xy_coor.b = (uchar)rng.uniform(low, high);
- if(type == CV_8UC4)
- xy_coor.alpha = (uchar)rng.uniform(low, high);
-
- coor_vect.push_back(xy_coor);
- }
-
- for(int i = 0; i < n; i++)
- {
- coor_vect[i].y = selected_pos[i]/((int)mat.step1()/mat.elemSize());
- coor_vect[i].x = selected_pos[i]%((int)mat.step1()/mat.elemSize());
- //printf("coor_vect[%d] = (%d, %d)\n", i, coor_vect[i].y, coor_vect[i].x);
- }
-
- if(type == CV_8UC4)
- {
- for(int i = 0; i < n; i++)
- {
- mat.at<unsigned char>(coor_vect[i].y, 4 * coor_vect[i].x) = coor_vect[i].r;
- mat.at<unsigned char>(coor_vect[i].y, 4 * coor_vect[i].x + 1) = coor_vect[i].g;
- mat.at<unsigned char>(coor_vect[i].y, 4 * coor_vect[i].x + 2) = coor_vect[i].b;
- mat.at<unsigned char>(coor_vect[i].y, 4 * coor_vect[i].x + 3) = coor_vect[i].alpha;
- }
- }else if(type == CV_8UC3)
- {
- for(int i = 0; i < n; i++)
- {
- mat.at<unsigned char>(coor_vect[i].y, 3 * coor_vect[i].x) = coor_vect[i].r;
- mat.at<unsigned char>(coor_vect[i].y, 3 * coor_vect[i].x + 1) = coor_vect[i].g;
- mat.at<unsigned char>(coor_vect[i].y, 3 * coor_vect[i].x + 2) = coor_vect[i].b;
- }
- }
- }
-}
-#endif
string abspath(const string &relpath)
{
}
-int ExpectedMatNear(cv::Mat dst, cv::Mat cpu_dst, double eps)
-{
- assert(dst.type() == cpu_dst.type());
- assert(dst.size() == cpu_dst.size());
- if(checkNorm(cv::Mat(dst), cv::Mat(cpu_dst)) < eps ||checkNorm(cv::Mat(dst), cv::Mat(cpu_dst)) == eps)
- return 1;
- return 0;
-}
-
-int ExceptDoubleNear(double val1, double val2, double abs_error)
-{
- const double diff = fabs(val1 - val2);
- if (diff <= abs_error)
- return 1;
-
- return 0;
-}
-
-int ExceptedMatSimilar(cv::Mat dst, cv::Mat cpu_dst, double eps)
-{
- assert(dst.type() == cpu_dst.type());
- assert(dst.size() == cpu_dst.size());
- if(checkSimilarity(cv::Mat(cpu_dst), cv::Mat(dst)) <= eps)
- return 1;
- return 0;
-}
-
itname_changed_ = true;
}
- void setAccurate(int is_accurate = -1)
+ void setAccurate(int accurate, double diff)
{
- is_accurate_ = is_accurate;
+ is_accurate_ = accurate;
+ accurate_diff_ = diff;
+ }
+
+ void ExpectMatsNear(vector<Mat>& dst, vector<Mat>& cpu_dst, vector<double>& eps)
+ {
+ assert(dst.size() == cpu_dst.size());
+ assert(cpu_dst.size() == eps.size());
+ is_accurate_ = 1;
+ for(size_t i=0; i<dst.size(); i++)
+ {
+ double cur_diff = checkNorm(dst[i], cpu_dst[i]);
+ accurate_diff_ = max(accurate_diff_, cur_diff);
+ if(cur_diff > eps[i])
+ is_accurate_ = 0;
+ }
+ }
+
+ void ExpectedMatNear(cv::Mat& dst, cv::Mat& cpu_dst, double eps)
+ {
+ assert(dst.type() == cpu_dst.type());
+ assert(dst.size() == cpu_dst.size());
+ accurate_diff_ = checkNorm(dst, cpu_dst);
+ if(accurate_diff_ <= eps)
+ is_accurate_ = 1;
+ else
+ is_accurate_ = 0;
+ }
+
+ void ExceptedMatSimilar(cv::Mat& dst, cv::Mat& cpu_dst, double eps)
+ {
+ assert(dst.type() == cpu_dst.type());
+ assert(dst.size() == cpu_dst.size());
+ accurate_diff_ = checkSimilarity(cpu_dst, dst);
+ if(accurate_diff_ <= eps)
+ is_accurate_ = 1;
+ else
+ is_accurate_ = 0;
}
std::stringstream &getCurSubtestDescription()
num_iters_(10), cpu_num_iters_(2),
gpu_warmup_iters_(1), cur_iter_idx_(0), cur_warmup_idx_(0),
record_(0), recordname_("performance"), itname_changed_(true),
- is_accurate_(-1)
+ is_accurate_(-1), accurate_diff_(0.)
{
cpu_times_.reserve(num_iters_);
gpu_times_.reserve(num_iters_);
gpu_times_.clear();
gpu_full_times_.clear();
is_accurate_ = -1;
+ accurate_diff_ = 0.;
}
double meanTime(const std::vector<int64> &samples);
void writeHeading();
void writeSummary();
- void writeMetrics(int is_accurate, double cpu_time, double gpu_time = 0.0f, double gpu_full_time = 0.0f,
+ void writeMetrics(double cpu_time, double gpu_time = 0.0f, double gpu_full_time = 0.0f,
double speedup = 0.0f, double fullspeedup = 0.0f,
double gpu_min = 0.0f, double gpu_max = 0.0f, double std_dev = 0.0f);
bool itname_changed_;
int is_accurate_;
+ double accurate_diff_;
};
args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
+ float f_scalar = (float)scalar;
if(src.clCxt->supportsFeature(Context::CL_DOUBLE))
args.push_back( make_pair( sizeof(cl_double), (void *)&scalar ));
else
{
- float f_scalar = (float)scalar;
args.push_back( make_pair( sizeof(cl_float), (void *)&f_scalar));
}
}
}
-template <typename T> void arithmetic_minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask)
+template <typename T> void arithmetic_minMax(const oclMat &src, double *minVal, double *maxVal,
+ const oclMat &mask, oclMat &buf)
{
size_t groupnum = src.clCxt->computeUnits();
CV_Assert(groupnum != 0);
groupnum = groupnum * 2;
int vlen = 8;
int dbsize = groupnum * 2 * vlen * sizeof(T) ;
- Context *clCxt = src.clCxt;
- cl_mem dstBuffer = openCLCreateBuffer(clCxt, CL_MEM_WRITE_ONLY, dbsize);
- *minVal = std::numeric_limits<double>::max() , *maxVal = -std::numeric_limits<double>::max();
+
+ ensureSizeIsEnough(1, dbsize, CV_8UC1, buf);
+
+ cl_mem buf_data = reinterpret_cast<cl_mem>(buf.data);
+
if (mask.empty())
{
- arithmetic_minMax_run(src, mask, dstBuffer, vlen, groupnum, "arithm_op_minMax");
+ arithmetic_minMax_run(src, mask, buf_data, vlen, groupnum, "arithm_op_minMax");
}
else
{
- arithmetic_minMax_mask_run(src, mask, dstBuffer, vlen, groupnum, "arithm_op_minMax_mask");
+ arithmetic_minMax_mask_run(src, mask, buf_data, vlen, groupnum, "arithm_op_minMax_mask");
}
- T *p = new T[groupnum * vlen * 2];
- memset(p, 0, dbsize);
- openCLReadBuffer(clCxt, dstBuffer, (void *)p, dbsize);
- if(minVal != NULL){
+
+ Mat matbuf = Mat(buf);
+ T *p = matbuf.ptr<T>();
+ if(minVal != NULL)
+ {
+ *minVal = std::numeric_limits<double>::max();
for(int i = 0; i < vlen * (int)groupnum; i++)
{
*minVal = *minVal < p[i] ? *minVal : p[i];
}
}
- if(maxVal != NULL){
+ if(maxVal != NULL)
+ {
+ *maxVal = -std::numeric_limits<double>::max();
for(int i = vlen * (int)groupnum; i < 2 * vlen * (int)groupnum; i++)
{
*maxVal = *maxVal > p[i] ? *maxVal : p[i];
}
}
- delete[] p;
- openCLFree(dstBuffer);
}
-typedef void (*minMaxFunc)(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask);
+typedef void (*minMaxFunc)(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat &buf);
void cv::ocl::minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask)
{
+ oclMat buf;
+ minMax_buf(src, minVal, maxVal, mask, buf);
+}
+void cv::ocl::minMax_buf(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat &buf)
+{
CV_Assert(src.oclchannels() == 1);
if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
{
};
minMaxFunc func;
func = functab[src.depth()];
- func(src, minVal, maxVal, mask);
+ func(src, minVal, maxVal, mask, buf);
}
//////////////////////////////////////////////////////////////////////////////
args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
+ T scalar;
if(_scalar != NULL)
{
double scalar1 = *((double *)_scalar);
- T scalar = (T)scalar1;
+ scalar = (T)scalar1;
args.push_back( make_pair( sizeof(T), (void *)&scalar ));
}
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows ));
args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
+ float pf = p;
if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE))
{
- float pf = p;
args.push_back( make_pair( sizeof(cl_float), (void *)&pf ));
}
else
{
const oclMat zeroMask;
const oclMat &tempMask = mask.data ? mask : zeroMask;
+ bool is_cpu = queryDeviceInfo<IS_CPU_DEVICE, bool>();
if (query.cols <= 64)
{
matchUnrolledCached<16, 64>(query, train, tempMask, trainIdx, distance, distType);
}
- else if (query.cols <= 128)
+ else if (query.cols <= 128 && !is_cpu)
{
matchUnrolledCached<16, 128>(query, train, tempMask, trainIdx, distance, distType);
}
{
const oclMat zeroMask;
const oclMat &tempMask = mask.data ? mask : zeroMask;
+ bool is_cpu = queryDeviceInfo<IS_CPU_DEVICE, bool>();
if (query.cols <= 64)
{
matchUnrolledCached<16, 64>(query, trains, n, tempMask, trainIdx, imgIdx, distance, distType);
}
- else if (query.cols <= 128)
+ else if (query.cols <= 128 && !is_cpu)
{
matchUnrolledCached<16, 128>(query, trains, n, tempMask, trainIdx, imgIdx, distance, distType);
}
{
const oclMat zeroMask;
const oclMat &tempMask = mask.data ? mask : zeroMask;
+ bool is_cpu = queryDeviceInfo<IS_CPU_DEVICE, bool>();
if (query.cols <= 64)
{
matchUnrolledCached<16, 64>(query, train, maxDistance, tempMask, trainIdx, distance, nMatches, distType);
}
- else if (query.cols <= 128)
+ else if (query.cols <= 128 && !is_cpu)
{
matchUnrolledCached<16, 128>(query, train, maxDistance, tempMask, trainIdx, distance, nMatches, distType);
}
static void match2Dispatcher(const oclMat &query, const oclMat &train, const oclMat &mask,
const oclMat &trainIdx, const oclMat &distance, int distType)
{
+ bool is_cpu = queryDeviceInfo<IS_CPU_DEVICE, bool>();
if (query.cols <= 64)
{
knn_matchUnrolledCached<16, 64>(query, train, mask, trainIdx, distance, distType);
}
- else if (query.cols <= 128)
+ else if (query.cols <= 128 && !is_cpu)
{
knn_matchUnrolledCached<16, 128>(query, train, mask, trainIdx, distance, distType);
}
filterDY = createDerivFilter_GPU(CV_8U, CV_32S, 0, 1, apperture_size, BORDER_REPLICATE);
}
}
- ensureSizeIsEnough(image_size.height + 2, image_size.width + 2, CV_32FC1, edgeBuf);
+ ensureSizeIsEnough(2 * (image_size.height + 2), image_size.width + 2, CV_32FC1, edgeBuf);
ensureSizeIsEnough(1, image_size.width * image_size.height, CV_16UC2, trackBuf1);
ensureSizeIsEnough(1, image_size.width * image_size.height, CV_16UC2, trackBuf2);
void CannyCaller(CannyBuf &buf, oclMat &dst, float low_thresh, float high_thresh)
{
using namespace ::cv::ocl::canny;
- calcMap_gpu(buf.dx, buf.dy, buf.edgeBuf, buf.edgeBuf, dst.rows, dst.cols, low_thresh, high_thresh);
+ oclMat magBuf = buf.edgeBuf(Rect(0, 0, buf.edgeBuf.cols, buf.edgeBuf.rows / 2));
+ oclMat mapBuf = buf.edgeBuf(Rect(0, buf.edgeBuf.rows / 2, buf.edgeBuf.cols, buf.edgeBuf.rows / 2));
- edgesHysteresisLocal_gpu(buf.edgeBuf, buf.trackBuf1, buf.counter, dst.rows, dst.cols);
+ calcMap_gpu(buf.dx, buf.dy, magBuf, mapBuf, dst.rows, dst.cols, low_thresh, high_thresh);
- edgesHysteresisGlobal_gpu(buf.edgeBuf, buf.trackBuf1, buf.trackBuf2, buf.counter, dst.rows, dst.cols);
+ edgesHysteresisLocal_gpu(mapBuf, buf.trackBuf1, buf.counter, dst.rows, dst.cols);
- getEdges_gpu(buf.edgeBuf, dst, dst.rows, dst.cols);
+ edgesHysteresisGlobal_gpu(mapBuf, buf.trackBuf1, buf.trackBuf2, buf.counter, dst.rows, dst.cols);
+
+ getEdges_gpu(mapBuf, dst, dst.rows, dst.cols);
}
}
buf.create(src.size(), apperture_size);
buf.edgeBuf.setTo(Scalar::all(0));
+ oclMat magBuf = buf.edgeBuf(Rect(0, 0, buf.edgeBuf.cols, buf.edgeBuf.rows / 2));
+
if (apperture_size == 3)
{
calcSobelRowPass_gpu(src, buf.dx_buf, buf.dy_buf, src.rows, src.cols);
- calcMagnitude_gpu(buf.dx_buf, buf.dy_buf, buf.dx, buf.dy, buf.edgeBuf, src.rows, src.cols, L2gradient);
+ calcMagnitude_gpu(buf.dx_buf, buf.dy_buf, buf.dx, buf.dy, magBuf, src.rows, src.cols, L2gradient);
}
else
{
buf.filterDX->apply(src, buf.dx);
buf.filterDY->apply(src, buf.dy);
- calcMagnitude_gpu(buf.dx, buf.dy, buf.edgeBuf, src.rows, src.cols, L2gradient);
+ calcMagnitude_gpu(buf.dx, buf.dy, magBuf, src.rows, src.cols, L2gradient);
}
CannyCaller(buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));
}
buf.dy = dy;
buf.create(dx.size(), -1);
buf.edgeBuf.setTo(Scalar::all(0));
- calcMagnitude_gpu(buf.dx, buf.dy, buf.edgeBuf, dx.rows, dx.cols, L2gradient);
+
+ oclMat magBuf = buf.edgeBuf(Rect(0, 0, buf.edgeBuf.cols, buf.edgeBuf.rows / 2));
+
+ calcMagnitude_gpu(buf.dx, buf.dy, magBuf, dx.rows, dx.cols, L2gradient);
CannyCaller(buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));
}
size_t globalThreads[3] = {cols, rows, 1};
size_t localThreads[3] = {16, 16, 1};
- openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
+ openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
}
void canny::calcMagnitude_gpu(const oclMat &dx_buf, const oclMat &dy_buf, oclMat &dx, oclMat &dy, oclMat &mag, int rows, int cols, bool L2Grad)
size_t globalThreads[3] = {cols, rows, 1};
size_t localThreads[3] = {16, 16, 1};
- char build_options [15] = "";
- if(L2Grad)
- {
- strcat(build_options, "-D L2GRAD");
- }
- openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
+ const char * build_options = L2Grad ? "-D L2GRAD":"";
+ openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
}
void canny::calcMagnitude_gpu(const oclMat &dx, const oclMat &dy, oclMat &mag, int rows, int cols, bool L2Grad)
{
size_t globalThreads[3] = {cols, rows, 1};
size_t localThreads[3] = {16, 16, 1};
- char build_options [15] = "";
- if(L2Grad)
- {
- strcat(build_options, "-D L2GRAD");
- }
- openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
+ const char * build_options = L2Grad ? "-D L2GRAD":"";
+ openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
}
void canny::calcMap_gpu(oclMat &dx, oclMat &dy, oclMat &mag, oclMat &map, int rows, int cols, float low_thresh, float high_thresh)
string kernelName = "calcMap";
size_t localThreads[3] = {16, 16, 1};
- openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
+ openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
}
void canny::edgesHysteresisLocal_gpu(oclMat &map, oclMat &st1, void *counter, int rows, int cols)
size_t globalThreads[3] = {cols, rows, 1};
size_t localThreads[3] = {16, 16, 1};
- openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
+ openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
}
void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, void *counter, int rows, int cols)
args.push_back( make_pair( sizeof(cl_int), (void *)&map.step));
args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));
- openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, DISABLE);
+ openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)getoclCommandQueue(), (cl_mem)counter, 1, 0, sizeof(int), &count, 0, NULL, NULL));
std::swap(st1, st2);
}
size_t globalThreads[3] = {cols, rows, 1};
size_t localThreads[3] = {16, 16, 1};
- openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
+ openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
}
char compile_option[128];
sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D DILATE %s %s",
anchor.x, anchor.y, (int)localThreads[0], (int)localThreads[1],
- rectKernel?"-D RECTKERNEL":"",
- s);
+ s, rectKernel?"-D RECTKERNEL":"");
vector< pair<size_t, const void *> > args;
args.push_back(make_pair(sizeof(cl_mem), (void *)&src.data));
args.push_back(make_pair(sizeof(cl_mem), (void *)&dst.data));
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+// Peng Xiao, pengxiao@outlook.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other oclMaterials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+#include <iomanip>
+#include "precomp.hpp"
+
+using namespace cv;
+using namespace cv::ocl;
+
+static bool use_cpu_sorter = true;
+
+namespace cv
+{
+ namespace ocl
+ {
+ ///////////////////////////OpenCL kernel strings///////////////////////////
+ extern const char *imgproc_gfft;
+ }
+}
+
+namespace
+{
+enum SortMethod
+{
+ CPU_STL,
+ BITONIC,
+ SELECTION
+};
+
+const int GROUP_SIZE = 256;
+
+template<SortMethod method>
+struct Sorter
+{
+ //typedef EigType;
+};
+
+//TODO(pengx): optimize GPU sorter's performance thus CPU sorter is removed.
+template<>
+struct Sorter<CPU_STL>
+{
+ typedef oclMat EigType;
+ static cv::Mutex cs;
+ static Mat mat_eig;
+
+ //prototype
+ static int clfloat2Gt(cl_float2 pt1, cl_float2 pt2)
+ {
+ float v1 = mat_eig.at<float>(cvRound(pt1.s[1]), cvRound(pt1.s[0]));
+ float v2 = mat_eig.at<float>(cvRound(pt2.s[1]), cvRound(pt2.s[0]));
+ return v1 > v2;
+ }
+ static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count)
+ {
+ cv::AutoLock lock(cs);
+ //temporarily use STL's sort function
+ Mat mat_corners = corners;
+ mat_eig = eig_tex;
+ std::sort(mat_corners.begin<cl_float2>(), mat_corners.begin<cl_float2>() + count, clfloat2Gt);
+ corners = mat_corners;
+ }
+};
+cv::Mutex Sorter<CPU_STL>::cs;
+cv::Mat Sorter<CPU_STL>::mat_eig;
+
+template<>
+struct Sorter<BITONIC>
+{
+ typedef TextureCL EigType;
+
+ static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count)
+ {
+ Context * cxt = Context::getContext();
+ size_t globalThreads[3] = {count / 2, 1, 1};
+ size_t localThreads[3] = {GROUP_SIZE, 1, 1};
+
+ // 2^numStages should be equal to count or the output is invalid
+ int numStages = 0;
+ for(int i = count; i > 1; i >>= 1)
+ {
+ ++numStages;
+ }
+ const int argc = 5;
+ std::vector< std::pair<size_t, const void *> > args(argc);
+ std::string kernelname = "sortCorners_bitonicSort";
+ args[0] = std::make_pair(sizeof(cl_mem), (void *)&eig_tex);
+ args[1] = std::make_pair(sizeof(cl_mem), (void *)&corners.data);
+ args[2] = std::make_pair(sizeof(cl_int), (void *)&count);
+ for(int stage = 0; stage < numStages; ++stage)
+ {
+ args[3] = std::make_pair(sizeof(cl_int), (void *)&stage);
+ for(int passOfStage = 0; passOfStage < stage + 1; ++passOfStage)
+ {
+ args[4] = std::make_pair(sizeof(cl_int), (void *)&passOfStage);
+ openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1);
+ }
+ }
+ }
+};
+
+template<>
+struct Sorter<SELECTION>
+{
+ typedef TextureCL EigType;
+
+ static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count)
+ {
+ Context * cxt = Context::getContext();
+
+ size_t globalThreads[3] = {count, 1, 1};
+ size_t localThreads[3] = {GROUP_SIZE, 1, 1};
+
+ std::vector< std::pair<size_t, const void *> > args;
+ //local
+ std::string kernelname = "sortCorners_selectionSortLocal";
+ int lds_size = GROUP_SIZE * sizeof(cl_float2);
+ args.push_back( std::make_pair( sizeof(cl_mem), (void*)&eig_tex) );
+ args.push_back( std::make_pair( sizeof(cl_mem), (void*)&corners.data) );
+ args.push_back( std::make_pair( sizeof(cl_int), (void*)&count) );
+ args.push_back( std::make_pair( lds_size, (void*)NULL) );
+
+ openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1);
+
+ //final
+ kernelname = "sortCorners_selectionSortFinal";
+ args.pop_back();
+ openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1);
+ }
+};
+
+int findCorners_caller(
+ const TextureCL& eig,
+ const float threshold,
+ const oclMat& mask,
+ oclMat& corners,
+ const int max_count)
+{
+ std::vector<int> k;
+ Context * cxt = Context::getContext();
+
+ std::vector< std::pair<size_t, const void*> > args;
+ std::string kernelname = "findCorners";
+
+ const int mask_strip = mask.step / mask.elemSize1();
+
+ oclMat g_counter(1, 1, CV_32SC1);
+ g_counter.setTo(0);
+
+ args.push_back(make_pair( sizeof(cl_mem), (void*)&eig ));
+ args.push_back(make_pair( sizeof(cl_mem), (void*)&mask.data ));
+ args.push_back(make_pair( sizeof(cl_mem), (void*)&corners.data ));
+ args.push_back(make_pair( sizeof(cl_int), (void*)&mask_strip));
+ args.push_back(make_pair( sizeof(cl_float), (void*)&threshold ));
+ args.push_back(make_pair( sizeof(cl_int), (void*)&eig.rows ));
+ args.push_back(make_pair( sizeof(cl_int), (void*)&eig.cols ));
+ args.push_back(make_pair( sizeof(cl_int), (void*)&max_count ));
+ args.push_back(make_pair( sizeof(cl_mem), (void*)&g_counter.data ));
+
+ size_t globalThreads[3] = {eig.cols, eig.rows, 1};
+ size_t localThreads[3] = {16, 16, 1};
+
+ const char * opt = mask.empty() ? "" : "-D WITH_MASK";
+ openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1, opt);
+ return std::min(Mat(g_counter).at<int>(0), max_count);
+}
+}//unnamed namespace
+
+void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image, oclMat& corners, const oclMat& mask)
+{
+ CV_Assert(qualityLevel > 0 && minDistance >= 0 && maxCorners >= 0);
+ CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size()));
+
+ CV_DbgAssert(support_image2d());
+
+ ensureSizeIsEnough(image.size(), CV_32F, eig_);
+
+ if (useHarrisDetector)
+ cornerMinEigenVal_dxdy(image, eig_, Dx_, Dy_, blockSize, 3, harrisK);
+ else
+ cornerMinEigenVal_dxdy(image, eig_, Dx_, Dy_, blockSize, 3);
+
+ double maxVal = 0;
+ minMax_buf(eig_, 0, &maxVal, oclMat(), minMaxbuf_);
+
+ ensureSizeIsEnough(1, std::max(1000, static_cast<int>(image.size().area() * 0.05)), CV_32FC2, tmpCorners_);
+
+ Ptr<TextureCL> eig_tex = bindTexturePtr(eig_);
+ int total = findCorners_caller(
+ *eig_tex,
+ static_cast<float>(maxVal * qualityLevel),
+ mask,
+ tmpCorners_,
+ tmpCorners_.cols);
+
+ if (total == 0)
+ {
+ corners.release();
+ return;
+ }
+ if(use_cpu_sorter)
+ {
+ Sorter<CPU_STL>::sortCorners_caller(eig_, tmpCorners_, total);
+ }
+ else
+ {
+ //if total is power of 2
+ if(((total - 1) & (total)) == 0)
+ {
+ Sorter<BITONIC>::sortCorners_caller(*eig_tex, tmpCorners_, total);
+ }
+ else
+ {
+ Sorter<SELECTION>::sortCorners_caller(*eig_tex, tmpCorners_, total);
+ }
+ }
+
+ if (minDistance < 1)
+ {
+ corners = tmpCorners_(Rect(0, 0, maxCorners > 0 ? std::min(maxCorners, total) : total, 1));
+ }
+ else
+ {
+ vector<Point2f> tmp(total);
+ downloadPoints(tmpCorners_, tmp);
+
+ vector<Point2f> tmp2;
+ tmp2.reserve(total);
+
+ const int cell_size = cvRound(minDistance);
+ const int grid_width = (image.cols + cell_size - 1) / cell_size;
+ const int grid_height = (image.rows + cell_size - 1) / cell_size;
+
+ std::vector< std::vector<Point2f> > grid(grid_width * grid_height);
+
+ for (int i = 0; i < total; ++i)
+ {
+ Point2f p = tmp[i];
+
+ bool good = true;
+
+ int x_cell = static_cast<int>(p.x / cell_size);
+ int y_cell = static_cast<int>(p.y / cell_size);
+
+ int x1 = x_cell - 1;
+ int y1 = y_cell - 1;
+ int x2 = x_cell + 1;
+ int y2 = y_cell + 1;
+
+ // boundary check
+ x1 = std::max(0, x1);
+ y1 = std::max(0, y1);
+ x2 = std::min(grid_width - 1, x2);
+ y2 = std::min(grid_height - 1, y2);
+
+ for (int yy = y1; yy <= y2; yy++)
+ {
+ for (int xx = x1; xx <= x2; xx++)
+ {
+ vector<Point2f>& m = grid[yy * grid_width + xx];
+
+ if (!m.empty())
+ {
+ for(size_t j = 0; j < m.size(); j++)
+ {
+ float dx = p.x - m[j].x;
+ float dy = p.y - m[j].y;
+
+ if (dx * dx + dy * dy < minDistance * minDistance)
+ {
+ good = false;
+ goto break_out;
+ }
+ }
+ }
+ }
+ }
+
+ break_out:
+
+ if(good)
+ {
+ grid[y_cell * grid_width + x_cell].push_back(p);
+
+ tmp2.push_back(p);
+
+ if (maxCorners > 0 && tmp2.size() == static_cast<size_t>(maxCorners))
+ break;
+ }
+ }
+
+ corners.upload(Mat(1, static_cast<int>(tmp2.size()), CV_32FC2, &tmp2[0]));
+ }
+}
+void cv::ocl::GoodFeaturesToTrackDetector_OCL::downloadPoints(const oclMat &points, vector<Point2f> &points_v)
+{
+ CV_DbgAssert(points.type() == CV_32FC2);
+ points_v.resize(points.cols);
+ openCLSafeCall(clEnqueueReadBuffer(
+ *reinterpret_cast<cl_command_queue*>(getoclCommandQueue()),
+ reinterpret_cast<cl_mem>(points.data),
+ CL_TRUE,
+ 0,
+ points.cols * sizeof(Point2f),
+ &points_v[0],
+ 0,
+ NULL,
+ NULL));
+}
+
+
};
typedef struct
{
- //int rows;
- //int ystep;
int width_height;
- //int height;
int grpnumperline_totalgrp;
- //int totalgrp;
int imgoff;
float factor;
} detect_piramid_info;
-
-#if defined WIN32 && !defined __MINGW__ && !defined __MINGW32__
+#ifdef WIN32
#define _ALIGNED_ON(_ALIGNMENT) __declspec(align(_ALIGNMENT))
-typedef _ALIGNED_ON(128) struct GpuHidHaarFeature
-{
- _ALIGNED_ON(32) struct
- {
- _ALIGNED_ON(4) int p0 ;
- _ALIGNED_ON(4) int p1 ;
- _ALIGNED_ON(4) int p2 ;
- _ALIGNED_ON(4) int p3 ;
- _ALIGNED_ON(4) float weight ;
- }
- /*_ALIGNED_ON(32)*/ rect[CV_HAAR_FEATURE_MAX] ;
-}
-GpuHidHaarFeature;
-
typedef _ALIGNED_ON(128) struct GpuHidHaarTreeNode
{
_ALIGNED_ON(64) int p[CV_HAAR_FEATURE_MAX][4];
- //_ALIGNED_ON(16) int p1[CV_HAAR_FEATURE_MAX] ;
- //_ALIGNED_ON(16) int p2[CV_HAAR_FEATURE_MAX] ;
- //_ALIGNED_ON(16) int p3[CV_HAAR_FEATURE_MAX] ;
- /*_ALIGNED_ON(16)*/
float weight[CV_HAAR_FEATURE_MAX] ;
- /*_ALIGNED_ON(4)*/
float threshold ;
- _ALIGNED_ON(8) float alpha[2] ;
+ _ALIGNED_ON(16) float alpha[3] ;
_ALIGNED_ON(4) int left ;
_ALIGNED_ON(4) int right ;
- // GpuHidHaarFeature feature __attribute__((aligned (128)));
}
GpuHidHaarTreeNode;
typedef _ALIGNED_ON(32) struct GpuHidHaarClassifier
{
_ALIGNED_ON(4) int count;
- //CvHaarFeature* orig_feature;
_ALIGNED_ON(8) GpuHidHaarTreeNode *node ;
_ALIGNED_ON(8) float *alpha ;
}
_ALIGNED_ON(4) int p2 ;
_ALIGNED_ON(4) int p3 ;
_ALIGNED_ON(4) float inv_window_area ;
- // GpuHidHaarStageClassifier* stage_classifier __attribute__((aligned (8)));
} GpuHidHaarClassifierCascade;
#else
#define _ALIGNED_ON(_ALIGNMENT) __attribute__((aligned(_ALIGNMENT) ))
-typedef struct _ALIGNED_ON(128) GpuHidHaarFeature
-{
- struct _ALIGNED_ON(32)
-{
- int p0 _ALIGNED_ON(4);
- int p1 _ALIGNED_ON(4);
- int p2 _ALIGNED_ON(4);
- int p3 _ALIGNED_ON(4);
- float weight _ALIGNED_ON(4);
-}
-rect[CV_HAAR_FEATURE_MAX] _ALIGNED_ON(32);
-}
-GpuHidHaarFeature;
-
-
typedef struct _ALIGNED_ON(128) GpuHidHaarTreeNode
{
int p[CV_HAAR_FEATURE_MAX][4] _ALIGNED_ON(64);
float weight[CV_HAAR_FEATURE_MAX];// _ALIGNED_ON(16);
float threshold;// _ALIGNED_ON(4);
- float alpha[2] _ALIGNED_ON(8);
+ float alpha[3] _ALIGNED_ON(16);
int left _ALIGNED_ON(4);
int right _ALIGNED_ON(4);
}
int p2 _ALIGNED_ON(4);
int p3 _ALIGNED_ON(4);
float inv_window_area _ALIGNED_ON(4);
- // GpuHidHaarStageClassifier* stage_classifier __attribute__((aligned (8)));
} GpuHidHaarClassifierCascade;
#endif
const float icv_stage_threshold_bias = 0.0001f;
double globaltime = 0;
-
-// static CvHaarClassifierCascade * gpuCreateHaarClassifierCascade( int stage_count )
-// {
-// CvHaarClassifierCascade *cascade = 0;
-
-// int block_size = sizeof(*cascade) + stage_count * sizeof(*cascade->stage_classifier);
-
-// if( stage_count <= 0 )
-// CV_Error( CV_StsOutOfRange, "Number of stages should be positive" );
-
-// cascade = (CvHaarClassifierCascade *)cvAlloc( block_size );
-// memset( cascade, 0, block_size );
-
-// cascade->stage_classifier = (CvHaarStageClassifier *)(cascade + 1);
-// cascade->flags = CV_HAAR_MAGIC_VAL;
-// cascade->count = stage_count;
-
-// return cascade;
-// }
-
-//static int globalcounter = 0;
-
-// static void gpuReleaseHidHaarClassifierCascade( GpuHidHaarClassifierCascade **_cascade )
-// {
-// if( _cascade && *_cascade )
-// {
-// cvFree( _cascade );
-// }
-// }
-
/* create more efficient internal representation of haar classifier cascade */
static GpuHidHaarClassifierCascade * gpuCreateHidHaarClassifierCascade( CvHaarClassifierCascade *cascade, int *size, int *totalclassifier)
{
hid_stage_classifier->two_rects = 1;
haar_classifier_ptr += stage_classifier->count;
- /*
- hid_stage_classifier->parent = (stage_classifier->parent == -1)
- ? NULL : stage_classifier_ptr + stage_classifier->parent;
- hid_stage_classifier->next = (stage_classifier->next == -1)
- ? NULL : stage_classifier_ptr + stage_classifier->next;
- hid_stage_classifier->child = (stage_classifier->child == -1)
- ? NULL : stage_classifier_ptr + stage_classifier->child;
-
- out->is_tree |= hid_stage_classifier->next != NULL;
- */
-
for( j = 0; j < stage_classifier->count; j++ )
{
CvHaarClassifier *classifier = stage_classifier->classifier + j;
GpuHidHaarClassifier *hid_classifier = hid_stage_classifier->classifier + j;
int node_count = classifier->count;
- // float* alpha_ptr = (float*)(haar_node_ptr + node_count);
float *alpha_ptr = &haar_node_ptr->alpha[0];
hid_classifier->count = node_count;
node->p[2][3] = 0;
node->weight[2] = 0;
}
- // memset( &(node->feature.rect[2]), 0, sizeof(node->feature.rect[2]) );
else
hid_stage_classifier->two_rects = 0;
- }
-
- memcpy( alpha_ptr, classifier->alpha, (node_count + 1)*sizeof(alpha_ptr[0]));
- haar_node_ptr = haar_node_ptr + 1;
- // (GpuHidHaarTreeNode*)cvAlignPtr(alpha_ptr+node_count+1, sizeof(void*));
- // (GpuHidHaarTreeNode*)(alpha_ptr+node_count+1);
+ memcpy( node->alpha, classifier->alpha, (node_count + 1)*sizeof(alpha_ptr[0]));
+ haar_node_ptr = haar_node_ptr + 1;
+ }
out->is_stump_based &= node_count == 1;
}
}
#define sum_elem_ptr(sum,row,col) \
- ((sumtype*)CV_MAT_ELEM_PTR_FAST((sum),(row),(col),sizeof(sumtype)))
+ ((sumtype*)CV_MAT_ELEM_PTR_FAST((sum),(row),(col),sizeof(sumtype)))
#define sqsum_elem_ptr(sqsum,row,col) \
- ((sqsumtype*)CV_MAT_ELEM_PTR_FAST((sqsum),(row),(col),sizeof(sqsumtype)))
+ ((sqsumtype*)CV_MAT_ELEM_PTR_FAST((sqsum),(row),(col),sizeof(sqsumtype)))
#define calc_sum(rect,offset) \
- ((rect).p0[offset] - (rect).p1[offset] - (rect).p2[offset] + (rect).p3[offset])
+ ((rect).p0[offset] - (rect).p1[offset] - (rect).p2[offset] + (rect).p3[offset])
static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_cascade,
- /* const CvArr* _sum,
- const CvArr* _sqsum,
- const CvArr* _tilted_sum,*/
double scale,
int step)
{
- // CvMat sum_stub, *sum = (CvMat*)_sum;
- // CvMat sqsum_stub, *sqsum = (CvMat*)_sqsum;
- // CvMat tilted_stub, *tilted = (CvMat*)_tilted_sum;
GpuHidHaarClassifierCascade *cascade;
int coi0 = 0, coi1 = 0;
int i;
if( scale <= 0 )
CV_Error( CV_StsOutOfRange, "Scale must be positive" );
- // sum = cvGetMat( sum, &sum_stub, &coi0 );
- // sqsum = cvGetMat( sqsum, &sqsum_stub, &coi1 );
-
if( coi0 || coi1 )
CV_Error( CV_BadCOI, "COI is not supported" );
- // if( !CV_ARE_SIZES_EQ( sum, sqsum ))
- // CV_Error( CV_StsUnmatchedSizes, "All integral images must have the same size" );
-
- // if( CV_MAT_TYPE(sqsum->type) != CV_64FC1 ||
- // CV_MAT_TYPE(sum->type) != CV_32SC1 )
- // CV_Error( CV_StsUnsupportedFormat,
- // "Only (32s, 64f, 32s) combination of (sum,sqsum,tilted_sum) formats is allowed" );
-
if( !_cascade->hid_cascade )
gpuCreateHidHaarClassifierCascade(_cascade, &datasize, &total);
cascade = (GpuHidHaarClassifierCascade *) _cascade->hid_cascade;
stage_classifier = (GpuHidHaarStageClassifier *) (cascade + 1);
- if( cascade->has_tilted_features )
- {
- // tilted = cvGetMat( tilted, &tilted_stub, &coi1 );
-
- // if( CV_MAT_TYPE(tilted->type) != CV_32SC1 )
- // CV_Error( CV_StsUnsupportedFormat,
- // "Only (32s, 64f, 32s) combination of (sum,sqsum,tilted_sum) formats is allowed" );
-
- // if( sum->step != tilted->step )
- // CV_Error( CV_StsUnmatchedSizes,
- // "Sum and tilted_sum must have the same stride (step, widthStep)" );
-
- // if( !CV_ARE_SIZES_EQ( sum, tilted ))
- // CV_Error( CV_StsUnmatchedSizes, "All integral images must have the same size" );
- // cascade->tilted = *tilted;
- }
-
_cascade->scale = scale;
_cascade->real_window_size.width = cvRound( _cascade->orig_window_size.width * scale );
_cascade->real_window_size.height = cvRound( _cascade->orig_window_size.height * scale );
- //cascade->sum = *sum;
- //cascade->sqsum = *sqsum;
-
equRect.x = equRect.y = cvRound(scale);
equRect.width = cvRound((_cascade->orig_window_size.width - 2) * scale);
equRect.height = cvRound((_cascade->orig_window_size.height - 2) * scale);
weight_scale = 1. / (equRect.width * equRect.height);
cascade->inv_window_area = weight_scale;
- // cascade->pq0 = equRect.y * step + equRect.x;
- // cascade->pq1 = equRect.y * step + equRect.x + equRect.width ;
- // cascade->pq2 = (equRect.y + equRect.height)*step + equRect.x;
- // cascade->pq3 = (equRect.y + equRect.height)*step + equRect.x + equRect.width ;
-
cascade->pq0 = equRect.x;
cascade->pq1 = equRect.y;
cascade->pq2 = equRect.x + equRect.width;
{
CvHaarFeature *feature =
&_cascade->stage_classifier[i].classifier[j].haar_feature[l];
- /* GpuHidHaarClassifier* classifier =
- cascade->stage_classifier[i].classifier + j; */
- //GpuHidHaarFeature* hidfeature =
- // &cascade->stage_classifier[i].classifier[j].node[l].feature;
GpuHidHaarTreeNode *hidnode = &stage_classifier[i].classifier[j].node[l];
double sum0 = 0, area0 = 0;
CvRect r[3];
/* align blocks */
for( k = 0; k < CV_HAAR_FEATURE_MAX; k++ )
{
- //if( !hidfeature->rect[k].p0 )
- // break;
if(!hidnode->p[k][0])
break;
r[k] = feature->rect[k].r;
if( !feature->tilted )
{
- /* hidfeature->rect[k].p0 = tr.y * sum->cols + tr.x;
- hidfeature->rect[k].p1 = tr.y * sum->cols + tr.x + tr.width;
- hidfeature->rect[k].p2 = (tr.y + tr.height) * sum->cols + tr.x;
- hidfeature->rect[k].p3 = (tr.y + tr.height) * sum->cols + tr.x + tr.width;
- */
- /*hidnode->p0[k] = tr.y * step + tr.x;
- hidnode->p1[k] = tr.y * step + tr.x + tr.width;
- hidnode->p2[k] = (tr.y + tr.height) * step + tr.x;
- hidnode->p3[k] = (tr.y + tr.height) * step + tr.x + tr.width;*/
hidnode->p[k][0] = tr.x;
hidnode->p[k][1] = tr.y;
hidnode->p[k][2] = tr.x + tr.width;
}
else
{
- /* hidfeature->rect[k].p2 = (tr.y + tr.width) * tilted->cols + tr.x + tr.width;
- hidfeature->rect[k].p3 = (tr.y + tr.width + tr.height) * tilted->cols + tr.x + tr.width - tr.height;
- hidfeature->rect[k].p0 = tr.y * tilted->cols + tr.x;
- hidfeature->rect[k].p1 = (tr.y + tr.height) * tilted->cols + tr.x - tr.height;
- */
-
hidnode->p[k][2] = (tr.y + tr.width) * step + tr.x + tr.width;
hidnode->p[k][3] = (tr.y + tr.width + tr.height) * step + tr.x + tr.width - tr.height;
hidnode->p[k][0] = tr.y * step + tr.x;
hidnode->p[k][1] = (tr.y + tr.height) * step + tr.x - tr.height;
}
-
- //hidfeature->rect[k].weight = (float)(feature->rect[k].weight * correction_ratio);
hidnode->weight[k] = (float)(feature->rect[k].weight * correction_ratio);
if( k == 0 )
area0 = tr.width * tr.height;
else
- //sum0 += hidfeature->rect[k].weight * tr.width * tr.height;
sum0 += hidnode->weight[k] * tr.width * tr.height;
}
-
- // hidfeature->rect[0].weight = (float)(-sum0/area0);
hidnode->weight[0] = (float)(-sum0 / area0);
} /* l */
} /* j */
}
}
-static void gpuSetHaarClassifierCascade( CvHaarClassifierCascade *_cascade
- /*double scale=0.0,*/
- /*int step*/)
+static void gpuSetHaarClassifierCascade( CvHaarClassifierCascade *_cascade)
{
GpuHidHaarClassifierCascade *cascade;
int i;
if(!hidnode->p[k][0])
break;
r[k] = feature->rect[k].r;
- // base_w = (int)CV_IMIN( (unsigned)base_w, (unsigned)(r[k].width-1) );
- // base_w = (int)CV_IMIN( (unsigned)base_w, (unsigned)(r[k].x - r[0].x-1) );
- // base_h = (int)CV_IMIN( (unsigned)base_h, (unsigned)(r[k].height-1) );
- // base_h = (int)CV_IMIN( (unsigned)base_h, (unsigned)(r[k].y - r[0].y-1) );
- }
+ }
nr = k;
for( k = 0; k < nr; k++ )
hidnode->p[k][3] = tr.height;
hidnode->weight[k] = (float)(feature->rect[k].weight * correction_ratio);
}
- //hidnode->weight[0]=(float)(-sum0/area0);
} /* l */
} /* j */
}
const double GROUP_EPS = 0.2;
CvSeq *result_seq = 0;
- cv::Ptr<CvMemStorage> temp_storage;
cv::ConcurrentRectVector allCandidates;
std::vector<cv::Rect> rectList;
if( gimg.cols < minSize.width || gimg.rows < minSize.height )
CV_Error(CV_StsError, "Image too small");
+ cl_command_queue qu = reinterpret_cast<cl_command_queue>(Context::getContext()->oclCommandQueue());
if( (flags & CV_HAAR_SCALE_IMAGE) )
{
CvSize winSize0 = cascade->orig_window_size;
size_t blocksize = 8;
size_t localThreads[3] = { blocksize, blocksize , 1 };
- size_t globalThreads[3] = { grp_per_CU * gsum.clCxt->computeUnits() *localThreads[0],
+ size_t globalThreads[3] = { grp_per_CU *(gsum.clCxt->computeUnits()) *localThreads[0],
localThreads[1], 1
};
int outputsz = 256 * globalThreads[0] / localThreads[0];
gpuSetImagesForHaarClassifierCascade( cascade, 1., gsum.step / 4 );
stagebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(GpuHidHaarStageClassifier) * gcascade->count);
- cl_command_queue qu = (cl_command_queue)gsum.clCxt->oclCommandQueue();
openCLSafeCall(clEnqueueWriteBuffer(qu, stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL));
nodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, nodenum * sizeof(GpuHidHaarTreeNode));
args.push_back ( make_pair(sizeof(cl_int4) , (void *)&pq ));
args.push_back ( make_pair(sizeof(cl_float) , (void *)&correction ));
- openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade", globalThreads, localThreads, args, -1, -1);
+ const char * build_options = gcascade->is_stump_based ? "-D STUMP_BASED=1" : "-D STUMP_BASED=0";
+
+ openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade", globalThreads, localThreads, args, -1, -1, build_options);
openCLReadBuffer( gsum.clCxt, candidatebuffer, candidate, 4 * sizeof(int)*outputsz );
openCLSafeCall(clReleaseMemObject(scaleinfobuffer));
openCLSafeCall(clReleaseMemObject(nodebuffer));
openCLSafeCall(clReleaseMemObject(candidatebuffer));
+
}
else
{
sizeof(GpuHidHaarStageClassifier) * gcascade->count - sizeof(GpuHidHaarClassifier) * totalclassifier) / sizeof(GpuHidHaarTreeNode);
nodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY,
nodenum * sizeof(GpuHidHaarTreeNode));
- cl_command_queue qu = (cl_command_queue)gsum.clCxt->oclCommandQueue();
openCLSafeCall(clEnqueueWriteBuffer(qu, nodebuffer, 1, 0,
nodenum * sizeof(GpuHidHaarTreeNode),
node, 0, NULL, NULL));
args1.push_back ( make_pair(sizeof(cl_int) , (void *)&startnodenum ));
size_t globalThreads2[3] = {nodenum, 1, 1};
-
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuscaleclassifier", globalThreads2, NULL/*localThreads2*/, args1, -1, -1);
}
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&pbuffer ));
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&correctionbuffer ));
args.push_back ( make_pair(sizeof(cl_int) , (void *)&nodenum ));
-
- openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1);
+ const char * build_options = gcascade->is_stump_based ? "-D STUMP_BASED=1" : "-D STUMP_BASED=0";
+ openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1, build_options);
candidate = (int *)clEnqueueMapBuffer(qu, candidatebuffer, 1, CL_MAP_READ, 0, 4 * sizeof(int) * outputsz, 0, 0, 0, &status);
int blocksize = 8;
int grp_per_CU = 12;
size_t localThreads[3] = { blocksize, blocksize, 1 };
- size_t globalThreads[3] = { grp_per_CU * Context::getContext()->computeUnits() * localThreads[0],
+ size_t globalThreads[3] = { grp_per_CU * cv::ocl::Context::getContext()->computeUnits() *localThreads[0],
localThreads[1],
1 };
int outputsz = 256 * globalThreads[0] / localThreads[0];
CvHaarClassifierCascade *cascade = oldCascade;
GpuHidHaarClassifierCascade *gcascade;
GpuHidHaarStageClassifier *stage;
- GpuHidHaarClassifier *classifier;
- GpuHidHaarTreeNode *node;
if( CV_MAT_DEPTH(gimg.type()) != CV_8U )
CV_Error( CV_StsUnsupportedFormat, "Only 8-bit images are supported" );
}
int *candidate;
-
+ cl_command_queue qu = reinterpret_cast<cl_command_queue>(Context::getContext()->oclCommandQueue());
if( (flags & CV_HAAR_SCALE_IMAGE) )
{
int indexy = 0;
gcascade = (GpuHidHaarClassifierCascade *)(cascade->hid_cascade);
stage = (GpuHidHaarStageClassifier *)(gcascade + 1);
- classifier = (GpuHidHaarClassifier *)(stage + gcascade->count);
- node = (GpuHidHaarTreeNode *)(classifier->node);
-
- gpuSetImagesForHaarClassifierCascade( cascade, 1., gsum.step / 4 );
-
- cl_command_queue qu = (cl_command_queue)gsum.clCxt->oclCommandQueue();
- openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->stagebuffer, 1, 0,
- sizeof(GpuHidHaarStageClassifier) * gcascade->count,
- stage, 0, NULL, NULL));
-
- openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->nodebuffer, 1, 0,
- m_nodenum * sizeof(GpuHidHaarTreeNode),
- node, 0, NULL, NULL));
int startstage = 0;
int endstage = gcascade->count;
args.push_back ( make_pair(sizeof(cl_int4) , (void *)&pq ));
args.push_back ( make_pair(sizeof(cl_float) , (void *)&correction ));
- openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade", globalThreads, localThreads, args, -1, -1);
+ const char * build_options = gcascade->is_stump_based ? "-D STUMP_BASED=1" : "-D STUMP_BASED=0";
+
+ openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade", globalThreads, localThreads, args, -1, -1, build_options);
candidate = (int *)malloc(4 * sizeof(int) * outputsz);
memset(candidate, 0, 4 * sizeof(int) * outputsz);
+
openCLReadBuffer( gsum.clCxt, ((OclBuffers *)buffers)->candidatebuffer, candidate, 4 * sizeof(int)*outputsz );
for(int i = 0; i < outputsz; i++)
+ {
if(candidate[4 * i + 2] != 0)
+ {
allCandidates.push_back(Rect(candidate[4 * i], candidate[4 * i + 1],
candidate[4 * i + 2], candidate[4 * i + 3]));
-
+ }
+ }
free((void *)candidate);
candidate = NULL;
}
{
cv::ocl::integral(gimg, gsum, gsqsum);
- gpuSetHaarClassifierCascade(cascade);
-
gcascade = (GpuHidHaarClassifierCascade *)cascade->hid_cascade;
- stage = (GpuHidHaarStageClassifier *)(gcascade + 1);
- classifier = (GpuHidHaarClassifier *)(stage + gcascade->count);
- node = (GpuHidHaarTreeNode *)(classifier->node);
-
- cl_command_queue qu = (cl_command_queue)gsum.clCxt->oclCommandQueue();
- openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->nodebuffer, 1, 0,
- m_nodenum * sizeof(GpuHidHaarTreeNode),
- node, 0, NULL, NULL));
-
- cl_int4 *p = (cl_int4 *)malloc(sizeof(cl_int4) * m_loopcount);
- float *correction = (float *)malloc(sizeof(float) * m_loopcount);
- int startstage = 0;
- int endstage = gcascade->count;
- double factor;
- for(int i = 0; i < m_loopcount; i++)
- {
- factor = scalev[i];
- int equRect_x = (int)(factor * gcascade->p0 + 0.5);
- int equRect_y = (int)(factor * gcascade->p1 + 0.5);
- int equRect_w = (int)(factor * gcascade->p3 + 0.5);
- int equRect_h = (int)(factor * gcascade->p2 + 0.5);
- p[i].s[0] = equRect_x;
- p[i].s[1] = equRect_y;
- p[i].s[2] = equRect_x + equRect_w;
- p[i].s[3] = equRect_y + equRect_h;
- correction[i] = 1. / (equRect_w * equRect_h);
- int startnodenum = m_nodenum * i;
- float factor2 = (float)factor;
-
- vector<pair<size_t, const void *> > args1;
- args1.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->nodebuffer ));
- args1.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->newnodebuffer ));
- args1.push_back ( make_pair(sizeof(cl_float) , (void *)&factor2 ));
- args1.push_back ( make_pair(sizeof(cl_float) , (void *)&correction[i] ));
- args1.push_back ( make_pair(sizeof(cl_int) , (void *)&startnodenum ));
-
- size_t globalThreads2[3] = {m_nodenum, 1, 1};
-
- openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuscaleclassifier", globalThreads2, NULL/*localThreads2*/, args1, -1, -1);
- }
int step = gsum.step / 4;
int startnode = 0;
int splitstage = 3;
- openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL));
- openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->pbuffer, 1, 0, sizeof(cl_int4)*m_loopcount, p, 0, NULL, NULL));
- openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->correctionbuffer, 1, 0, sizeof(cl_float)*m_loopcount, correction, 0, NULL, NULL));
+
+ int startstage = 0;
+ int endstage = gcascade->count;
vector<pair<size_t, const void *> > args;
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->stagebuffer ));
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->correctionbuffer ));
args.push_back ( make_pair(sizeof(cl_int) , (void *)&m_nodenum ));
- openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1);
+ const char * build_options = gcascade->is_stump_based ? "-D STUMP_BASED=1" : "-D STUMP_BASED=0";
+ openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1, build_options);
candidate = (int *)clEnqueueMapBuffer(qu, ((OclBuffers *)buffers)->candidatebuffer, 1, CL_MAP_READ, 0, 4 * sizeof(int) * outputsz, 0, 0, 0, NULL);
allCandidates.push_back(Rect(candidate[4 * i], candidate[4 * i + 1],
candidate[4 * i + 2], candidate[4 * i + 3]));
}
-
- free(p);
- free(correction);
clEnqueueUnmapMemObject(qu, ((OclBuffers *)buffers)->candidatebuffer, candidate, 0, 0, 0);
}
-
rectList.resize(allCandidates.size());
if(!allCandidates.empty())
std::copy(allCandidates.begin(), allCandidates.end(), rectList.begin());
const int outputsz, const size_t localThreads[],
CvSize minSize, CvSize maxSize)
{
+ if(initialized)
+ {
+ return; // we only allow one time initialization
+ }
CvHaarClassifierCascade *cascade = oldCascade;
if( !CV_IS_HAAR_CLASSIFIER(cascade) )
int totalclassifier=0;
if( !cascade->hid_cascade )
+ {
gpuCreateHidHaarClassifierCascade(cascade, &datasize, &totalclassifier);
+ }
if( maxSize.height == 0 || maxSize.width == 0 )
{
m_minSize = minSize;
m_maxSize = maxSize;
+ // initialize nodes
+ GpuHidHaarClassifierCascade *gcascade;
+ GpuHidHaarStageClassifier *stage;
+ GpuHidHaarClassifier *classifier;
+ GpuHidHaarTreeNode *node;
+ cl_command_queue qu = reinterpret_cast<cl_command_queue>(Context::getContext()->oclCommandQueue());
+ if( (flags & CV_HAAR_SCALE_IMAGE) )
+ {
+ gcascade = (GpuHidHaarClassifierCascade *)(cascade->hid_cascade);
+ stage = (GpuHidHaarStageClassifier *)(gcascade + 1);
+ classifier = (GpuHidHaarClassifier *)(stage + gcascade->count);
+ node = (GpuHidHaarTreeNode *)(classifier->node);
+
+ gpuSetImagesForHaarClassifierCascade( cascade, 1., gsum.step / 4 );
+
+ openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->stagebuffer, 1, 0,
+ sizeof(GpuHidHaarStageClassifier) * gcascade->count,
+ stage, 0, NULL, NULL));
+
+ openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->nodebuffer, 1, 0,
+ m_nodenum * sizeof(GpuHidHaarTreeNode),
+ node, 0, NULL, NULL));
+ }
+ else
+ {
+ gpuSetHaarClassifierCascade(cascade);
+
+ gcascade = (GpuHidHaarClassifierCascade *)cascade->hid_cascade;
+ stage = (GpuHidHaarStageClassifier *)(gcascade + 1);
+ classifier = (GpuHidHaarClassifier *)(stage + gcascade->count);
+ node = (GpuHidHaarTreeNode *)(classifier->node);
+
+ openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->nodebuffer, 1, 0,
+ m_nodenum * sizeof(GpuHidHaarTreeNode),
+ node, 0, NULL, NULL));
+
+ cl_int4 *p = (cl_int4 *)malloc(sizeof(cl_int4) * m_loopcount);
+ float *correction = (float *)malloc(sizeof(float) * m_loopcount);
+ double factor;
+ for(int i = 0; i < m_loopcount; i++)
+ {
+ factor = scalev[i];
+ int equRect_x = (int)(factor * gcascade->p0 + 0.5);
+ int equRect_y = (int)(factor * gcascade->p1 + 0.5);
+ int equRect_w = (int)(factor * gcascade->p3 + 0.5);
+ int equRect_h = (int)(factor * gcascade->p2 + 0.5);
+ p[i].s[0] = equRect_x;
+ p[i].s[1] = equRect_y;
+ p[i].s[2] = equRect_x + equRect_w;
+ p[i].s[3] = equRect_y + equRect_h;
+ correction[i] = 1. / (equRect_w * equRect_h);
+ int startnodenum = m_nodenum * i;
+ float factor2 = (float)factor;
+
+ vector<pair<size_t, const void *> > args1;
+ args1.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->nodebuffer ));
+ args1.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->newnodebuffer ));
+ args1.push_back ( make_pair(sizeof(cl_float) , (void *)&factor2 ));
+ args1.push_back ( make_pair(sizeof(cl_float) , (void *)&correction[i] ));
+ args1.push_back ( make_pair(sizeof(cl_int) , (void *)&startnodenum ));
+
+ size_t globalThreads2[3] = {m_nodenum, 1, 1};
+
+ openCLExecuteKernel(Context::getContext(), &haarobjectdetect_scaled2, "gpuscaleclassifier", globalThreads2, NULL/*localThreads2*/, args1, -1, -1);
+ }
+ openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL));
+ openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->pbuffer, 1, 0, sizeof(cl_int4)*m_loopcount, p, 0, NULL, NULL));
+ openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->correctionbuffer, 1, 0, sizeof(cl_float)*m_loopcount, correction, 0, NULL, NULL));
+
+ free(p);
+ free(correction);
+ }
initialized = true;
}
CvSize sz;
CvSize winSize0 = oldCascade->orig_window_size;
detect_piramid_info *scaleinfo;
+ cl_command_queue qu = reinterpret_cast<cl_command_queue>(Context::getContext()->oclCommandQueue());
if (flags & CV_HAAR_SCALE_IMAGE)
{
for(factor = 1.f;; factor *= scaleFactor)
((OclBuffers *)buffers)->scaleinfobuffer = openCLCreateBuffer(cv::ocl::Context::getContext(), CL_MEM_READ_ONLY, sizeof(detect_piramid_info) * loopcount);
}
- openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)cv::ocl::Context::getContext()->oclCommandQueue(), ((OclBuffers *)buffers)->scaleinfobuffer, 1, 0,
+ openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->scaleinfobuffer, 1, 0,
sizeof(detect_piramid_info)*loopcount,
scaleinfo, 0, NULL, NULL));
free(scaleinfo);
const std::vector<cv::Rect> &rectList,
const std::vector<int> &rweights)
{
- CvSeq *result_seq = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvAvgComp), cvCreateMemStorage(0) );
+ MemStorage tempStorage(cvCreateMemStorage(0));
+ CvSeq *result_seq = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvAvgComp), tempStorage );
if( findBiggestObject && rectList.size() )
{
void cv::ocl::OclCascadeClassifierBuf::release()
{
- openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->stagebuffer));
- openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->scaleinfobuffer));
- openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->nodebuffer));
- openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->candidatebuffer));
-
- if( (m_flags & CV_HAAR_SCALE_IMAGE) )
- {
- cvFree(&oldCascade->hid_cascade);
- }
- else
+ if(initialized)
{
- openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->newnodebuffer));
- openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->correctionbuffer));
- openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->pbuffer));
- }
+ openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->stagebuffer));
+ openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->scaleinfobuffer));
+ openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->nodebuffer));
+ openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->candidatebuffer));
+
+ if( (m_flags & CV_HAAR_SCALE_IMAGE) )
+ {
+ cvFree(&oldCascade->hid_cascade);
+ }
+ else
+ {
+ openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->newnodebuffer));
+ openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->correctionbuffer));
+ openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->pbuffer));
+ }
- free(buffers);
- buffers = NULL;
+ free(buffers);
+ buffers = NULL;
+ initialized = false;
+ }
}
#ifndef _MAX_PATH
#define _MAX_PATH 1024
#endif
-
-
-/****************************************************************************************\
-* Persistence functions *
-\****************************************************************************************/
-
-/* field names */
-
-#define ICV_HAAR_SIZE_NAME "size"
-#define ICV_HAAR_STAGES_NAME "stages"
-#define ICV_HAAR_TREES_NAME "trees"
-#define ICV_HAAR_FEATURE_NAME "feature"
-#define ICV_HAAR_RECTS_NAME "rects"
-#define ICV_HAAR_TILTED_NAME "tilted"
-#define ICV_HAAR_THRESHOLD_NAME "threshold"
-#define ICV_HAAR_LEFT_NODE_NAME "left_node"
-#define ICV_HAAR_LEFT_VAL_NAME "left_val"
-#define ICV_HAAR_RIGHT_NODE_NAME "right_node"
-#define ICV_HAAR_RIGHT_VAL_NAME "right_val"
-#define ICV_HAAR_STAGE_THRESHOLD_NAME "stage_threshold"
-#define ICV_HAAR_PARENT_NAME "parent"
-#define ICV_HAAR_NEXT_NAME "next"
-
-static int gpuRunHaarClassifierCascade( /*const CvHaarClassifierCascade *_cascade, CvPoint pt, int start_stage */)
-{
- return 1;
-}
-
-namespace cv
-{
-namespace ocl
-{
-
-struct gpuHaarDetectObjects_ScaleImage_Invoker
-{
- gpuHaarDetectObjects_ScaleImage_Invoker( const CvHaarClassifierCascade *_cascade,
- int _stripSize, double _factor,
- const Mat &_sum1, const Mat &_sqsum1, Mat *_norm1,
- Mat *_mask1, Rect _equRect, ConcurrentRectVector &_vec )
- {
- cascade = _cascade;
- stripSize = _stripSize;
- factor = _factor;
- sum1 = _sum1;
- sqsum1 = _sqsum1;
- norm1 = _norm1;
- mask1 = _mask1;
- equRect = _equRect;
- vec = &_vec;
- }
-
- void operator()( const BlockedRange &range ) const
- {
- Size winSize0 = cascade->orig_window_size;
- Size winSize(cvRound(winSize0.width * factor), cvRound(winSize0.height * factor));
- int y1 = range.begin() * stripSize, y2 = min(range.end() * stripSize, sum1.rows - 1 - winSize0.height);
- Size ssz(sum1.cols - 1 - winSize0.width, y2 - y1);
- int x, y, ystep = factor > 2 ? 1 : 2;
-
- for( y = y1; y < y2; y += ystep )
- for( x = 0; x < ssz.width; x += ystep )
- {
- if( gpuRunHaarClassifierCascade( /*cascade, cvPoint(x, y), 0*/ ) > 0 )
- vec->push_back(Rect(cvRound(x * factor), cvRound(y * factor),
- winSize.width, winSize.height));
- }
- }
-
- const CvHaarClassifierCascade *cascade;
- int stripSize;
- double factor;
- Mat sum1, sqsum1, *norm1, *mask1;
- Rect equRect;
- ConcurrentRectVector *vec;
-};
-
-
-struct gpuHaarDetectObjects_ScaleCascade_Invoker
-{
- gpuHaarDetectObjects_ScaleCascade_Invoker( const CvHaarClassifierCascade *_cascade,
- Size _winsize, const Range &_xrange, double _ystep,
- size_t _sumstep, const int **_p, const int **_pq,
- ConcurrentRectVector &_vec )
- {
- cascade = _cascade;
- winsize = _winsize;
- xrange = _xrange;
- ystep = _ystep;
- sumstep = _sumstep;
- p = _p;
- pq = _pq;
- vec = &_vec;
- }
-
- void operator()( const BlockedRange &range ) const
- {
- int iy, startY = range.begin(), endY = range.end();
- const int *p0 = p[0], *p1 = p[1], *p2 = p[2], *p3 = p[3];
- const int *pq0 = pq[0], *pq1 = pq[1], *pq2 = pq[2], *pq3 = pq[3];
- bool doCannyPruning = p0 != 0;
- int sstep = (int)(sumstep / sizeof(p0[0]));
-
- for( iy = startY; iy < endY; iy++ )
- {
- int ix, y = cvRound(iy * ystep), ixstep = 1;
- for( ix = xrange.start; ix < xrange.end; ix += ixstep )
- {
- int x = cvRound(ix * ystep); // it should really be ystep, not ixstep
-
- if( doCannyPruning )
- {
- int offset = y * sstep + x;
- int s = p0[offset] - p1[offset] - p2[offset] + p3[offset];
- int sq = pq0[offset] - pq1[offset] - pq2[offset] + pq3[offset];
- if( s < 100 || sq < 20 )
- {
- ixstep = 2;
- continue;
- }
- }
-
- int result = gpuRunHaarClassifierCascade(/* cascade, cvPoint(x, y), 0 */);
- if( result > 0 )
- vec->push_back(Rect(x, y, winsize.width, winsize.height));
- ixstep = result != 0 ? 1 : 2;
- }
- }
- }
-
- const CvHaarClassifierCascade *cascade;
- double ystep;
- size_t sumstep;
- Size winsize;
- Range xrange;
- const int **p;
- const int **pq;
- ConcurrentRectVector *vec;
-};
-
-}
-}
size_t globalThreads[3] = {glbSizeX, glbSizeY, 1};
size_t localThreads[3] = {blkSizeX, blkSizeY, 1};
-
+ float borderFloat[4] = {(float)borderValue[0], (float)borderValue[1], (float)borderValue[2], (float)borderValue[3]};
vector< pair<size_t, const void *> > args;
if(map1.channels() == 2)
{
args.push_back( make_pair(sizeof(cl_int), (void *)&map1.cols));
args.push_back( make_pair(sizeof(cl_int), (void *)&map1.rows));
args.push_back( make_pair(sizeof(cl_int), (void *)&cols));
- float borderFloat[4] = {(float)borderValue[0], (float)borderValue[1], (float)borderValue[2], (float)borderValue[3]};
-
- if(src.clCxt->supportsFeature(Context::CL_DOUBLE))
+
+ if(src.clCxt->supportsFeature(Context::CL_DOUBLE))
{
args.push_back( make_pair(sizeof(cl_double4), (void *)&borderValue));
}
}
else
{
- float borderFloat[4] = {(float)borderValue[0], (float)borderValue[1], (float)borderValue[2], (float)borderValue[3]};
args.push_back( make_pair(sizeof(cl_float4), (void *)&borderFloat));
}
}
void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize,
double k, int borderType)
{
+ oclMat dx, dy;
+ cornerHarris_dxdy(src, dst, dx, dy, blockSize, ksize, k, borderType);
+ }
+
+ void cornerHarris_dxdy(const oclMat &src, oclMat &dst, oclMat &dx, oclMat &dy, int blockSize, int ksize,
+ double k, int borderType)
+ {
if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "select device don't support double");
}
CV_Assert(src.cols >= blockSize / 2 && src.rows >= blockSize / 2);
- oclMat Dx, Dy;
CV_Assert(borderType == cv::BORDER_CONSTANT || borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT);
- extractCovData(src, Dx, Dy, blockSize, ksize, borderType);
+ extractCovData(src, dx, dy, blockSize, ksize, borderType);
dst.create(src.size(), CV_32F);
- corner_ocl(imgproc_calcHarris, "calcHarris", blockSize, static_cast<float>(k), Dx, Dy, dst, borderType);
+ corner_ocl(imgproc_calcHarris, "calcHarris", blockSize, static_cast<float>(k), dx, dy, dst, borderType);
}
void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int borderType)
{
+ oclMat dx, dy;
+ cornerMinEigenVal_dxdy(src, dst, dx, dy, blockSize, ksize, borderType);
+ }
+
+ void cornerMinEigenVal_dxdy(const oclMat &src, oclMat &dst, oclMat &dx, oclMat &dy, int blockSize, int ksize, int borderType)
+ {
if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "select device don't support double");
}
CV_Assert(src.cols >= blockSize / 2 && src.rows >= blockSize / 2);
- oclMat Dx, Dy;
CV_Assert(borderType == cv::BORDER_CONSTANT || borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT);
- extractCovData(src, Dx, Dy, blockSize, ksize, borderType);
+ extractCovData(src, dx, dy, blockSize, ksize, borderType);
dst.create(src.size(), CV_32F);
- corner_ocl(imgproc_calcMinEigenVal, "calcMinEigenVal", blockSize, 0, Dx, Dy, dst, borderType);
+ corner_ocl(imgproc_calcMinEigenVal, "calcMinEigenVal", blockSize, 0, dx, dy, dst, borderType);
}
/////////////////////////////////// MeanShiftfiltering ///////////////////////////////////////////////
static void meanShiftFiltering_gpu(const oclMat &src, oclMat dst, int sp, int sr, int maxIter, float eps)
oclinfo.push_back(ocltmpinfo);
}
}
+ if(devcienums > 0)
+ {
+ setDevice(oclinfo[0]);
+ }
return devcienums;
}
//
//M*/
-#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
#include "precomp.hpp"
+#ifdef __GNUC__
+#if ((__GNUC__ * 100) + __GNUC_MINOR__) >= 402
+#define GCC_DIAG_STR(s) #s
+#define GCC_DIAG_JOINSTR(x,y) GCC_DIAG_STR(x ## y)
+# define GCC_DIAG_DO_PRAGMA(x) _Pragma (#x)
+# define GCC_DIAG_PRAGMA(x) GCC_DIAG_DO_PRAGMA(GCC diagnostic x)
+# if ((__GNUC__ * 100) + __GNUC_MINOR__) >= 406
+# define GCC_DIAG_OFF(x) GCC_DIAG_PRAGMA(push) \
+GCC_DIAG_PRAGMA(ignored GCC_DIAG_JOINSTR(-W,x))
+# define GCC_DIAG_ON(x) GCC_DIAG_PRAGMA(pop)
+# else
+# define GCC_DIAG_OFF(x) GCC_DIAG_PRAGMA(ignored GCC_DIAG_JOINSTR(-W,x))
+# define GCC_DIAG_ON(x) GCC_DIAG_PRAGMA(warning GCC_DIAG_JOINSTR(-W,x))
+# endif
+#else
+# define GCC_DIAG_OFF(x)
+# define GCC_DIAG_ON(x)
+#endif
+#endif /* __GNUC__ */
+
using namespace std;
namespace cv
build_options, finish_mode);
}
+#ifdef __GNUC__
+ GCC_DIAG_OFF(deprecated-declarations)
+#endif
cl_mem bindTexture(const oclMat &mat)
{
cl_mem texture;
format.image_channel_order = CL_RGBA;
break;
default:
- CV_Error(-1, "Image forma is not supported");
+ CV_Error(-1, "Image format is not supported");
break;
}
#ifdef CL_VERSION_1_2
else
#endif
{
-#ifdef __GNUC__
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-#endif
texture = clCreateImage2D(
(cl_context)mat.clCxt->oclContext(),
CL_MEM_READ_WRITE,
0,
NULL,
&err);
-#ifdef __GNUC__
-#pragma GCC diagnostic pop
-#endif
}
size_t origin[] = { 0, 0, 0 };
size_t region[] = { mat.cols, mat.rows, 1 };
openCLSafeCall(err);
return texture;
}
+#ifdef __GNUC__
+ GCC_DIAG_ON(deprecated-declarations)
+#endif
+
+ Ptr<TextureCL> bindTexturePtr(const oclMat &mat)
+ {
+ return Ptr<TextureCL>(new TextureCL(bindTexture(mat), mat.rows, mat.cols, mat.type()));
+ }
void releaseTexture(cl_mem& texture)
{
openCLFree(texture);
#ifdef dst_align
#undef dst_align
#endif
-#define dst_align ((dst_offset >> 1) & 3)
+#define dst_align ((dst_offset / 2) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
#ifdef dst_align
#undef dst_align
#endif
-#define dst_align ((dst_offset >> 1) & 3)
+#define dst_align ((dst_offset / 2) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
#ifdef dst_align
#undef dst_align
#endif
-#define dst_align ((dst_offset >> 1) & 1)
+#define dst_align ((dst_offset / 2) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
#ifdef dst_align
#undef dst_align
#endif
-#define dst_align ((dst_offset >> 1) & 1)
+#define dst_align ((dst_offset / 2) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
#ifdef dst_align
#undef dst_align
#endif
-#define dst_align ((dst_offset >> 1) & 1)
+#define dst_align ((dst_offset / 2) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
#ifdef dst_align
#undef dst_align
#endif
-#define dst_align ((dst_offset >> 1) & 1)
+#define dst_align ((dst_offset / 2) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
#ifdef dst_align
#undef dst_align
#endif
-#define dst_align ((dst_offset >> 1) & 1)
+#define dst_align ((dst_offset / 2) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
#ifdef dst_align
#undef dst_align
#endif
-#define dst_align ((dst_offset >> 1) & 1)
+#define dst_align ((dst_offset / 2) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
int gidy = get_global_id(1);
int out_addr = mad24(gidy,dst_step_in_pixel,gidx+dst_offset_in_pixel);
- if(gidx+3<cols && gidy<rows && (dst_offset_in_pixel&3)==0)
+ if(gidx+3<cols && gidy<rows && ((dst_offset_in_pixel&3)==0))
{
*(__global uchar4*)&dst[out_addr] = res;
}
// Wang Weiyan, wangweiyanster@gmail.com
// Jia Haipeng, jiahaipeng95@gmail.com
// Nathan, liujun@multicorewareinc.com
+// Peng Xiao, pengxiao@outlook.com
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
typedef int sumtype;
typedef float sqsumtype;
-typedef struct __attribute__((aligned (128))) GpuHidHaarFeature
-{
- struct __attribute__((aligned (32)))
-{
- int p0 __attribute__((aligned (4)));
- int p1 __attribute__((aligned (4)));
- int p2 __attribute__((aligned (4)));
- int p3 __attribute__((aligned (4)));
- float weight __attribute__((aligned (4)));
-}
-rect[CV_HAAR_FEATURE_MAX] __attribute__((aligned (32)));
-}
-GpuHidHaarFeature;
-
+#ifndef STUMP_BASED
+#define STUMP_BASED 1
+#endif
typedef struct __attribute__((aligned (128) )) GpuHidHaarTreeNode
{
int p[CV_HAAR_FEATURE_MAX][4] __attribute__((aligned (64)));
- float weight[CV_HAAR_FEATURE_MAX] /*__attribute__((aligned (16)))*/;
- float threshold /*__attribute__((aligned (4)))*/;
- float alpha[2] __attribute__((aligned (8)));
+ float weight[CV_HAAR_FEATURE_MAX];
+ float threshold;
+ float alpha[3] __attribute__((aligned (16)));
int left __attribute__((aligned (4)));
int right __attribute__((aligned (4)));
}
float inv_window_area __attribute__((aligned (4)));
} GpuHidHaarClassifierCascade;
-
__kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCascade(
global GpuHidHaarStageClassifier * stagecascadeptr,
global int4 * info,
float stage_sum = 0.f;
int2 stageinfo = *(global int2*)(stagecascadeptr+stageloop);
float stagethreshold = as_float(stageinfo.y);
- for(int nodeloop = 0; nodeloop < stageinfo.x; nodeloop++ )
+ for(int nodeloop = 0; nodeloop < stageinfo.x; )
{
__global GpuHidHaarTreeNode* currentnodeptr = (nodeptr + nodecounter);
int4 info2 = *(__global int4*)(&(currentnodeptr->p[1][0]));
int4 info3 = *(__global int4*)(&(currentnodeptr->p[2][0]));
float4 w = *(__global float4*)(&(currentnodeptr->weight[0]));
- float2 alpha2 = *(__global float2*)(&(currentnodeptr->alpha[0]));
+ float3 alpha3 = *(__global float3*)(&(currentnodeptr->alpha[0]));
+
float nodethreshold = w.w * variance_norm_factor;
info1.x +=lcl_off;
classsum += (lcldata[mad24(info3.y,readwidth,info3.x)] - lcldata[mad24(info3.y,readwidth,info3.z)] -
lcldata[mad24(info3.w,readwidth,info3.x)] + lcldata[mad24(info3.w,readwidth,info3.z)]) * w.z;
- stage_sum += classsum >= nodethreshold ? alpha2.y : alpha2.x;
+ bool passThres = classsum >= nodethreshold;
+#if STUMP_BASED
+ stage_sum += passThres ? alpha3.y : alpha3.x;
nodecounter++;
+ nodeloop++;
+#else
+ bool isRootNode = (nodecounter & 1) == 0;
+ if(isRootNode)
+ {
+ if( (passThres && currentnodeptr->right) ||
+ (!passThres && currentnodeptr->left))
+ {
+ nodecounter ++;
+ }
+ else
+ {
+ stage_sum += alpha3.x;
+ nodecounter += 2;
+ nodeloop ++;
+ }
+ }
+ else
+ {
+ stage_sum += passThres ? alpha3.z : alpha3.y;
+ nodecounter ++;
+ nodeloop ++;
+ }
+#endif
}
result = (stage_sum >= stagethreshold);
if(lcl_compute_win_id < queuecount)
{
-
int tempnodecounter = lcl_compute_id;
float part_sum = 0.f;
- for(int lcl_loop=0; lcl_loop<lcl_loops && tempnodecounter<stageinfo.x; lcl_loop++)
+ const int stump_factor = STUMP_BASED ? 1 : 2;
+ int root_offset = 0;
+ for(int lcl_loop=0; lcl_loop<lcl_loops && tempnodecounter<stageinfo.x;)
{
- __global GpuHidHaarTreeNode* currentnodeptr = (nodeptr + nodecounter + tempnodecounter);
+ __global GpuHidHaarTreeNode* currentnodeptr =
+ nodeptr + (nodecounter + tempnodecounter) * stump_factor + root_offset;
int4 info1 = *(__global int4*)(&(currentnodeptr->p[0][0]));
int4 info2 = *(__global int4*)(&(currentnodeptr->p[1][0]));
int4 info3 = *(__global int4*)(&(currentnodeptr->p[2][0]));
float4 w = *(__global float4*)(&(currentnodeptr->weight[0]));
- float2 alpha2 = *(__global float2*)(&(currentnodeptr->alpha[0]));
+ float3 alpha3 = *(__global float3*)(&(currentnodeptr->alpha[0]));
float nodethreshold = w.w * variance_norm_factor;
info1.x +=queue_pixel;
classsum += (lcldata[mad24(info3.y,readwidth,info3.x)] - lcldata[mad24(info3.y,readwidth,info3.z)] -
lcldata[mad24(info3.w,readwidth,info3.x)] + lcldata[mad24(info3.w,readwidth,info3.z)]) * w.z;
- part_sum += classsum >= nodethreshold ? alpha2.y : alpha2.x;
- tempnodecounter +=lcl_compute_win;
+ bool passThres = classsum >= nodethreshold;
+#if STUMP_BASED
+ part_sum += passThres ? alpha3.y : alpha3.x;
+ tempnodecounter += lcl_compute_win;
+ lcl_loop++;
+#else
+ if(root_offset == 0)
+ {
+ if( (passThres && currentnodeptr->right) ||
+ (!passThres && currentnodeptr->left))
+ {
+ root_offset = 1;
+ }
+ else
+ {
+ part_sum += alpha3.x;
+ tempnodecounter += lcl_compute_win;
+ lcl_loop++;
+ }
+ }
+ else
+ {
+ part_sum += passThres ? alpha3.z : alpha3.y;
+ tempnodecounter += lcl_compute_win;
+ lcl_loop++;
+ root_offset = 0;
+ }
+#endif
}//end for(int lcl_loop=0;lcl_loop<lcl_loops;lcl_loop++)
partialsum[lcl_id]=part_sum;
}
}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-/*
-if(stagecascade->two_rects)
-{
- #pragma unroll
- for( n = 0; n < stagecascade->count; n++ )
- {
- t1 = *(node + counter);
- t = t1.threshold * variance_norm_factor;
- classsum = calc_sum1(t1,p_offset,0) * t1.weight[0];
-
- classsum += calc_sum1(t1, p_offset,1) * t1.weight[1];
- stage_sum += classsum >= t ? t1.alpha[1]:t1.alpha[0];
-
- counter++;
- }
-}
-else
-{
- #pragma unroll
- for( n = 0; n < stagecascade->count; n++ )
- {
- t = node[counter].threshold*variance_norm_factor;
- classsum = calc_sum1(node[counter],p_offset,0) * node[counter].weight[0];
- classsum += calc_sum1(node[counter],p_offset,1) * node[counter].weight[1];
-
- if( node[counter].p0[2] )
- classsum += calc_sum1(node[counter],p_offset,2) * node[counter].weight[2];
-
- stage_sum += classsum >= t ? node[counter].alpha[1]:node[counter].alpha[0];// modify
-
- counter++;
- }
-}
-*/
-/*
-__kernel void gpuRunHaarClassifierCascade_ScaleWindow(
- constant GpuHidHaarClassifierCascade * _cascade,
- global GpuHidHaarStageClassifier * stagecascadeptr,
- //global GpuHidHaarClassifier * classifierptr,
- global GpuHidHaarTreeNode * nodeptr,
- global int * sum,
- global float * sqsum,
- global int * _candidate,
- int pixel_step,
- int cols,
- int rows,
- int start_stage,
- int end_stage,
- //int counts,
- int nodenum,
- int ystep,
- int detect_width,
- //int detect_height,
- int loopcount,
- int outputstep)
- //float scalefactor)
-{
-unsigned int x1 = get_global_id(0);
-unsigned int y1 = get_global_id(1);
-int p_offset;
-int m, n;
-int result;
-int counter;
-float mean, variance_norm_factor;
-for(int i=0;i<loopcount;i++)
-{
-constant GpuHidHaarClassifierCascade * cascade = _cascade + i;
-global int * candidate = _candidate + i*outputstep;
-int window_width = cascade->p1 - cascade->p0;
-int window_height = window_width;
-result = 1;
-counter = 0;
-unsigned int x = mul24(x1,ystep);
-unsigned int y = mul24(y1,ystep);
-if((x < cols - window_width - 1) && (y < rows - window_height -1))
-{
-global GpuHidHaarStageClassifier *stagecascade = stagecascadeptr +cascade->count*i+ start_stage;
-//global GpuHidHaarClassifier *classifier = classifierptr;
-global GpuHidHaarTreeNode *node = nodeptr + nodenum*i;
-
-p_offset = mad24(y, pixel_step, x);// modify
-
-mean = (*(sum + p_offset + (int)cascade->p0) - *(sum + p_offset + (int)cascade->p1) -
- *(sum + p_offset + (int)cascade->p2) + *(sum + p_offset + (int)cascade->p3))
- *cascade->inv_window_area;
-
-variance_norm_factor = *(sqsum + p_offset + cascade->p0) - *(sqsum + cascade->p1 + p_offset) -
- *(sqsum + p_offset + cascade->p2) + *(sqsum + cascade->p3 + p_offset);
-variance_norm_factor = variance_norm_factor * cascade->inv_window_area - mean * mean;
-variance_norm_factor = variance_norm_factor >=0.f ? sqrt(variance_norm_factor) : 1;//modify
-
-// if( cascade->is_stump_based )
-//{
-for( m = start_stage; m < end_stage; m++ )
-{
-float stage_sum = 0.f;
-float t, classsum;
-GpuHidHaarTreeNode t1;
-
-//#pragma unroll
-for( n = 0; n < stagecascade->count; n++ )
-{
- t1 = *(node + counter);
- t = t1.threshold * variance_norm_factor;
- classsum = calc_sum1(t1, p_offset ,0) * t1.weight[0] + calc_sum1(t1, p_offset ,1) * t1.weight[1];
-
- if((t1.p0[2]) && (!stagecascade->two_rects))
- classsum += calc_sum1(t1, p_offset, 2) * t1.weight[2];
-
- stage_sum += classsum >= t ? t1.alpha[1] : t1.alpha[0];// modify
- counter++;
-}
-
-if (stage_sum < stagecascade->threshold)
-{
- result = 0;
- break;
-}
-
-stagecascade++;
-
-}
-if(result)
-{
- candidate[4 * (y1 * detect_width + x1)] = x;
- candidate[4 * (y1 * detect_width + x1) + 1] = y;
- candidate[4 * (y1 * detect_width + x1)+2] = window_width;
- candidate[4 * (y1 * detect_width + x1) + 3] = window_height;
-}
-//}
-}
-}
-}
-*/
-
-
-
-
// @Authors
// Wu Xinglong, wxl370@126.com
// Sen Liu, swjtuls1987@126.com
-//
+// Peng Xiao, pengxiao@outlook.com
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
#define CV_HAAR_FEATURE_MAX 3
typedef int sumtype;
typedef float sqsumtype;
-typedef struct __attribute__((aligned(128))) GpuHidHaarFeature
-{
- struct __attribute__((aligned(32)))
-{
- int p0 __attribute__((aligned(4)));
- int p1 __attribute__((aligned(4)));
- int p2 __attribute__((aligned(4)));
- int p3 __attribute__((aligned(4)));
- float weight __attribute__((aligned(4)));
-}
-rect[CV_HAAR_FEATURE_MAX] __attribute__((aligned(32)));
-}
-GpuHidHaarFeature;
+
typedef struct __attribute__((aligned(128))) GpuHidHaarTreeNode
{
int p[CV_HAAR_FEATURE_MAX][4] __attribute__((aligned(64)));
float weight[CV_HAAR_FEATURE_MAX] /*__attribute__((aligned (16)))*/;
float threshold /*__attribute__((aligned (4)))*/;
- float alpha[2] __attribute__((aligned(8)));
+ float alpha[3] __attribute__((aligned(16)));
int left __attribute__((aligned(4)));
int right __attribute__((aligned(4)));
}
const int p_offset = mad24(y, step, x);
cascadeinfo.x += p_offset;
cascadeinfo.z += p_offset;
- mean = (sum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.x), 0, max_idx)] - sum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.z), 0, max_idx)] -
- sum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.x), 0, max_idx)] + sum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.z), 0, max_idx)])
+ mean = (sum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.x), 0, max_idx)]
+ - sum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.z), 0, max_idx)] -
+ sum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.x), 0, max_idx)]
+ + sum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.z), 0, max_idx)])
* correction_t;
- variance_norm_factor = sqsum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.x), 0, max_idx)] - sqsum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.z), 0, max_idx)] -
- sqsum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.x), 0, max_idx)] + sqsum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.z), 0, max_idx)];
+ variance_norm_factor = sqsum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.x), 0, max_idx)]
+ - sqsum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.z), 0, max_idx)] -
+ sqsum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.x), 0, max_idx)]
+ + sqsum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.z), 0, max_idx)];
variance_norm_factor = variance_norm_factor * correction_t - mean * mean;
variance_norm_factor = variance_norm_factor >= 0.f ? sqrt(variance_norm_factor) : 1.f;
bool result = true;
nodecounter = startnode + nodecount * scalei;
-
for (int stageloop = start_stage; (stageloop < end_stage) && result; stageloop++)
{
float stage_sum = 0.f;
int stagecount = stagecascadeptr[stageloop].count;
- for (int nodeloop = 0; nodeloop < stagecount; nodeloop++)
+ for (int nodeloop = 0; nodeloop < stagecount;)
{
__global GpuHidHaarTreeNode *currentnodeptr = (nodeptr + nodecounter);
int4 info1 = *(__global int4 *)(&(currentnodeptr->p[0][0]));
int4 info2 = *(__global int4 *)(&(currentnodeptr->p[1][0]));
int4 info3 = *(__global int4 *)(&(currentnodeptr->p[2][0]));
float4 w = *(__global float4 *)(&(currentnodeptr->weight[0]));
- float2 alpha2 = *(__global float2 *)(&(currentnodeptr->alpha[0]));
+ float3 alpha3 = *(__global float3 *)(&(currentnodeptr->alpha[0]));
float nodethreshold = w.w * variance_norm_factor;
+
info1.x += p_offset;
info1.z += p_offset;
info2.x += p_offset;
info2.z += p_offset;
- float classsum = (sum[clamp(mad24(info1.y, step, info1.x), 0, max_idx)] - sum[clamp(mad24(info1.y, step, info1.z), 0, max_idx)] -
- sum[clamp(mad24(info1.w, step, info1.x), 0, max_idx)] + sum[clamp(mad24(info1.w, step, info1.z), 0, max_idx)]) * w.x;
- classsum += (sum[clamp(mad24(info2.y, step, info2.x), 0, max_idx)] - sum[clamp(mad24(info2.y, step, info2.z), 0, max_idx)] -
- sum[clamp(mad24(info2.w, step, info2.x), 0, max_idx)] + sum[clamp(mad24(info2.w, step, info2.z), 0, max_idx)]) * w.y;
info3.x += p_offset;
info3.z += p_offset;
- classsum += (sum[clamp(mad24(info3.y, step, info3.x), 0, max_idx)] - sum[clamp(mad24(info3.y, step, info3.z), 0, max_idx)] -
- sum[clamp(mad24(info3.w, step, info3.x), 0, max_idx)] + sum[clamp(mad24(info3.w, step, info3.z), 0, max_idx)]) * w.z;
- stage_sum += classsum >= nodethreshold ? alpha2.y : alpha2.x;
+ float classsum = (sum[clamp(mad24(info1.y, step, info1.x), 0, max_idx)]
+ - sum[clamp(mad24(info1.y, step, info1.z), 0, max_idx)] -
+ sum[clamp(mad24(info1.w, step, info1.x), 0, max_idx)]
+ + sum[clamp(mad24(info1.w, step, info1.z), 0, max_idx)]) * w.x;
+ classsum += (sum[clamp(mad24(info2.y, step, info2.x), 0, max_idx)]
+ - sum[clamp(mad24(info2.y, step, info2.z), 0, max_idx)] -
+ sum[clamp(mad24(info2.w, step, info2.x), 0, max_idx)]
+ + sum[clamp(mad24(info2.w, step, info2.z), 0, max_idx)]) * w.y;
+ classsum += (sum[clamp(mad24(info3.y, step, info3.x), 0, max_idx)]
+ - sum[clamp(mad24(info3.y, step, info3.z), 0, max_idx)] -
+ sum[clamp(mad24(info3.w, step, info3.x), 0, max_idx)]
+ + sum[clamp(mad24(info3.w, step, info3.z), 0, max_idx)]) * w.z;
+
+ bool passThres = classsum >= nodethreshold;
+
+#if STUMP_BASED
+ stage_sum += passThres ? alpha3.y : alpha3.x;
nodecounter++;
+ nodeloop++;
+#else
+ bool isRootNode = (nodecounter & 1) == 0;
+ if(isRootNode)
+ {
+ if( (passThres && currentnodeptr->right) ||
+ (!passThres && currentnodeptr->left))
+ {
+ nodecounter ++;
+ }
+ else
+ {
+ stage_sum += alpha3.x;
+ nodecounter += 2;
+ nodeloop ++;
+ }
+ }
+ else
+ {
+ stage_sum += (passThres ? alpha3.z : alpha3.y);
+ nodecounter ++;
+ nodeloop ++;
+ }
+#endif
}
- result = (bool)(stage_sum >= stagecascadeptr[stageloop].threshold);
+ result = (int)(stage_sum >= stagecascadeptr[stageloop].threshold);
}
barrier(CLK_LOCAL_MEM_FENCE);
int queueindex = atomic_inc(lclcount);
lcloutindex[queueindex] = (y << 16) | x;
}
-
barrier(CLK_LOCAL_MEM_FENCE);
int queuecount = lclcount[0];
newnode[counter].threshold = t1.threshold;
newnode[counter].alpha[0] = t1.alpha[0];
newnode[counter].alpha[1] = t1.alpha[1];
+ newnode[counter].alpha[2] = t1.alpha[2];
}
map_step /= sizeof(*map);
map_offset /= sizeof(*map);
+ mag += mag_offset;
+ map += map_offset;
+
__local float smem[18][18];
int gidx = get_global_id(0);
(
__global int * map,
__global ushort2 * st,
- volatile __global unsigned int * counter,
+ __global unsigned int * counter,
int rows,
int cols,
int map_step,
map_step /= sizeof(*map);
map_offset /= sizeof(*map);
+ map += map_offset;
+
__local int smem[18][18];
int gidx = get_global_id(0);
if(ly < 14)
{
smem[ly][lx] =
- map[grp_idx + lx + min(grp_idy + ly, rows - 1) * map_step + map_offset];
+ map[grp_idx + lx + min(grp_idy + ly, rows - 1) * map_step];
}
if(ly < 4 && grp_idy + ly + 14 <= rows && grp_idx + lx <= cols)
{
smem[ly + 14][lx] =
- map[grp_idx + lx + min(grp_idy + ly + 14, rows - 1) * map_step + map_offset];
+ map[grp_idx + lx + min(grp_idy + ly + 14, rows - 1) * map_step];
}
barrier(CLK_LOCAL_MEM_FENCE);
__constant int c_dx[8] = {-1, 0, 1, -1, 1, -1, 0, 1};
__constant int c_dy[8] = {-1, -1, -1, 0, 0, 1, 1, 1};
+
#define stack_size 512
__kernel
-void edgesHysteresisGlobal
+void
+__attribute__((reqd_work_group_size(128,1,1)))
+edgesHysteresisGlobal
(
__global int * map,
__global ushort2 * st1,
__global ushort2 * st2,
- volatile __global int * counter,
+ __global int * counter,
int rows,
int cols,
int count,
map_step /= sizeof(*map);
map_offset /= sizeof(*map);
+ map += map_offset;
+
int gidx = get_global_id(0);
int gidy = get_global_id(1);
int grp_idx = get_group_id(0);
int grp_idy = get_group_id(1);
- volatile __local unsigned int s_counter;
+ __local unsigned int s_counter;
__local unsigned int s_ind;
__local ushort2 s_st[stack_size];
pos.x += c_dx[lidx & 7];
pos.y += c_dy[lidx & 7];
- if (map[pos.x + map_offset + pos.y * map_step] == 1)
+ if (map[pos.x + pos.y * map_step] == 1)
{
- map[pos.x + map_offset + pos.y * map_step] = 2;
+ map[pos.x + pos.y * map_step] = 2;
ind = atomic_inc(&s_counter);
if(gidy < rows && gidx < cols)
{
- dst[gidx + gidy * dst_step] = (uchar)(-(map[gidx + 1 + (gidy + 1) * map_step] >> 1));
+ dst[gidx + gidy * dst_step] = (uchar)(-(map[gidx + 1 + (gidy + 1) * map_step + map_offset] >> 1));
}
}
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+// Peng Xiao, pengxiao@outlook.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other oclMaterials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef WITH_MASK
+#define WITH_MASK 0
+#endif
+
+__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
+
+inline float ELEM_INT2(image2d_t _eig, int _x, int _y)
+{
+ return read_imagef(_eig, sampler, (int2)(_x, _y)).x;
+}
+
+inline float ELEM_FLT2(image2d_t _eig, float2 pt)
+{
+ return read_imagef(_eig, sampler, pt).x;
+}
+
+__kernel
+ void findCorners
+ (
+ image2d_t eig,
+ __global const char * mask,
+ __global float2 * corners,
+ const int mask_strip,// in pixels
+ const float threshold,
+ const int rows,
+ const int cols,
+ const int max_count,
+ __global int * g_counter
+ )
+{
+ const int j = get_global_id(0);
+ const int i = get_global_id(1);
+
+ if (i > 0 && i < rows - 1 && j > 0 && j < cols - 1
+#if WITH_MASK
+ && mask[i * mask_strip + j] != 0
+#endif
+ )
+ {
+ const float val = ELEM_INT2(eig, j, i);
+
+ if (val > threshold)
+ {
+ float maxVal = val;
+
+ maxVal = fmax(ELEM_INT2(eig, j - 1, i - 1), maxVal);
+ maxVal = fmax(ELEM_INT2(eig, j , i - 1), maxVal);
+ maxVal = fmax(ELEM_INT2(eig, j + 1, i - 1), maxVal);
+
+ maxVal = fmax(ELEM_INT2(eig, j - 1, i), maxVal);
+ maxVal = fmax(ELEM_INT2(eig, j + 1, i), maxVal);
+
+ maxVal = fmax(ELEM_INT2(eig, j - 1, i + 1), maxVal);
+ maxVal = fmax(ELEM_INT2(eig, j , i + 1), maxVal);
+ maxVal = fmax(ELEM_INT2(eig, j + 1, i + 1), maxVal);
+
+ if (val == maxVal)
+ {
+ const int ind = atomic_inc(g_counter);
+
+ if (ind < max_count)
+ corners[ind] = (float2)(j, i);
+ }
+ }
+ }
+}
+
+//bitonic sort
+__kernel
+ void sortCorners_bitonicSort
+ (
+ image2d_t eig,
+ __global float2 * corners,
+ const int count,
+ const int stage,
+ const int passOfStage
+ )
+{
+ const int threadId = get_global_id(0);
+ if(threadId >= count / 2)
+ {
+ return;
+ }
+
+ const int sortOrder = (((threadId/(1 << stage)) % 2)) == 1 ? 1 : 0; // 0 is descent
+
+ const int pairDistance = 1 << (stage - passOfStage);
+ const int blockWidth = 2 * pairDistance;
+
+ const int leftId = min( (threadId % pairDistance)
+ + (threadId / pairDistance) * blockWidth, count );
+
+ const int rightId = min( leftId + pairDistance, count );
+
+ const float2 leftPt = corners[leftId];
+ const float2 rightPt = corners[rightId];
+
+ const float leftVal = ELEM_FLT2(eig, leftPt);
+ const float rightVal = ELEM_FLT2(eig, rightPt);
+
+ const bool compareResult = leftVal > rightVal;
+
+ float2 greater = compareResult ? leftPt:rightPt;
+ float2 lesser = compareResult ? rightPt:leftPt;
+
+ corners[leftId] = sortOrder ? lesser : greater;
+ corners[rightId] = sortOrder ? greater : lesser;
+}
+
+//selection sort for gfft
+//kernel is ported from Bolt library:
+//https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/sort_kernels.cl
+// Local sort will firstly sort elements of each workgroup using selection sort
+// its performance is O(n)
+__kernel
+ void sortCorners_selectionSortLocal
+ (
+ image2d_t eig,
+ __global float2 * corners,
+ const int count,
+ __local float2 * scratch
+ )
+{
+ int i = get_local_id(0); // index in workgroup
+ int numOfGroups = get_num_groups(0); // index in workgroup
+ int groupID = get_group_id(0);
+ int wg = get_local_size(0); // workgroup size = block size
+ int n; // number of elements to be processed for this work group
+
+ int offset = groupID * wg;
+ int same = 0;
+ corners += offset;
+ n = (groupID == (numOfGroups-1))? (count - wg*(numOfGroups-1)) : wg;
+ float2 pt1, pt2;
+
+ pt1 = corners[min(i, n)];
+ scratch[i] = pt1;
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if(i >= n)
+ {
+ return;
+ }
+
+ float val1 = ELEM_FLT2(eig, pt1);
+ float val2;
+
+ int pos = 0;
+ for (int j=0;j<n;++j)
+ {
+ pt2 = scratch[j];
+ val2 = ELEM_FLT2(eig, pt2);
+ if(val2 > val1)
+ pos++;//calculate the rank of this element in this work group
+ else
+ {
+ if(val1 > val2)
+ continue;
+ else
+ {
+ // val1 and val2 are same
+ same++;
+ }
+ }
+ }
+ for (int j=0; j< same; j++)
+ corners[pos + j] = pt1;
+}
+__kernel
+ void sortCorners_selectionSortFinal
+ (
+ image2d_t eig,
+ __global float2 * corners,
+ const int count
+ )
+{
+ const int i = get_local_id(0); // index in workgroup
+ const int numOfGroups = get_num_groups(0); // index in workgroup
+ const int groupID = get_group_id(0);
+ const int wg = get_local_size(0); // workgroup size = block size
+ int pos = 0, same = 0;
+ const int offset = get_group_id(0) * wg;
+ const int remainder = count - wg*(numOfGroups-1);
+
+ if((offset + i ) >= count)
+ return;
+ float2 pt1, pt2;
+ pt1 = corners[groupID*wg + i];
+
+ float val1 = ELEM_FLT2(eig, pt1);
+ float val2;
+
+ for(int j=0; j<numOfGroups-1; j++ )
+ {
+ for(int k=0; k<wg; k++)
+ {
+ pt2 = corners[j*wg + k];
+ val2 = ELEM_FLT2(eig, pt2);
+ if(val1 > val2)
+ break;
+ else
+ {
+ //Increment only if the value is not the same.
+ if( val2 > val1 )
+ pos++;
+ else
+ same++;
+ }
+ }
+ }
+
+ for(int k=0; k<remainder; k++)
+ {
+ pt2 = corners[(numOfGroups-1)*wg + k];
+ val2 = ELEM_FLT2(eig, pt2);
+ if(val1 > val2)
+ break;
+ else
+ {
+ //Don't increment if the value is the same.
+ //Two elements are same if (*userComp)(jData, iData) and (*userComp)(iData, jData) are both false
+ if(val2 > val1)
+ pos++;
+ else
+ same++;
+ }
+ }
+ for (int j=0; j< same; j++)
+ corners[pos + j] = pt1;
+}
+
int4 dpos = (int4)(dstart, dstart+1, dstart+2, dstart+3);
float4 dVal = *(__global float4*)(dst+dst_offset+gy*dst_step+dstart);
int4 con = dpos >= 0 && dpos < dst_cols;
- ddata = convert_float4(con) != 0 ? ddata : dVal;
+ ddata = convert_float4(con) != (float4)(0) ? ddata : dVal;
if(dstart < dst_cols)
{
*(__global float4*)(dst+dst_offset+gy*dst_step+dstart) = ddata;
#if defined (HAVE_OPENCL)
+#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
#include "opencv2/ocl/private/util.hpp"
#include "safe_call.hpp"
#define __OPENCV_OPENCL_SAFE_CALL_HPP__
#if defined __APPLE__
-#include <OpenCL/OpenCL.h>
+#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
#include "precomp.hpp"
#ifdef HAVE_OPENCL
-#define SHOW_RESULT 0
////////////////////////////////////////////////////////
// Canny
bool useL2gradient;
cv::Mat edges_gold;
- //std::vector<cv::ocl::Info> oclinfo;
virtual void SetUp()
{
apperture_size = GET_PARAM(0);
useL2gradient = GET_PARAM(1);
- //int devnums = getDevice(oclinfo);
- //CV_Assert(devnums > 0);
}
};
double low_thresh = 50.0;
double high_thresh = 100.0;
- cv::resize(img, img, cv::Size(512, 384));
cv::ocl::oclMat ocl_img = cv::ocl::oclMat(img);
cv::ocl::oclMat edges;
cv::ocl::Canny(ocl_img, edges, low_thresh, high_thresh, apperture_size, useL2gradient);
- char filename [100];
- sprintf(filename, "G:/Valve_edges_a%d_L2Grad%d.jpg", apperture_size, (int)useL2gradient);
-
cv::Mat edges_gold;
cv::Canny(img, edges_gold, low_thresh, high_thresh, apperture_size, useL2gradient);
-#if SHOW_RESULT
- cv::Mat edges_x2, ocl_edges(edges);
- edges_x2.create(edges.rows, edges.cols * 2, edges.type());
- edges_x2.setTo(0);
- cv::add(edges_gold, cv::Mat(edges_x2, cv::Rect(0, 0, edges_gold.cols, edges_gold.rows)), cv::Mat(edges_x2, cv::Rect(0, 0, edges_gold.cols, edges_gold.rows)));
- cv::add(ocl_edges, cv::Mat(edges_x2, cv::Rect(edges_gold.cols, 0, edges_gold.cols, edges_gold.rows)), cv::Mat(edges_x2, cv::Rect(edges_gold.cols, 0, edges_gold.cols, edges_gold.rows)));
- cv::namedWindow("Canny result (left: cpu, right: ocl)");
- cv::imshow("Canny result (left: cpu, right: ocl)", edges_x2);
- cv::waitKey();
-#endif //OUTPUT_RESULT
EXPECT_MAT_SIMILAR(edges_gold, edges, 1e-2);
}
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Canny, testing::Combine(
+INSTANTIATE_TEST_CASE_P(OCL_ImgProc, Canny, testing::Combine(
testing::Values(AppertureSize(3), AppertureSize(5)),
testing::Values(L2gradient(false), L2gradient(true))));
#endif
\ No newline at end of file
using namespace std;
using namespace cv;
extern string workdir;
+
+namespace
+{
+IMPLEMENT_PARAM_CLASS(CascadeName, std::string);
+CascadeName cascade_frontalface_alt(std::string("haarcascade_frontalface_alt.xml"));
+CascadeName cascade_frontalface_alt2(std::string("haarcascade_frontalface_alt2.xml"));
struct getRect
{
Rect operator ()(const CvAvgComp &e) const
return e.rect;
}
};
+}
-PARAM_TEST_CASE(Haar, double, int)
+PARAM_TEST_CASE(Haar, double, int, CascadeName)
{
cv::ocl::OclCascadeClassifier cascade, nestedCascade;
- cv::ocl::OclCascadeClassifierBuf cascadebuf;
cv::CascadeClassifier cpucascade, cpunestedCascade;
double scale;
int flags;
+ std::string cascadeName;
virtual void SetUp()
{
scale = GET_PARAM(0);
flags = GET_PARAM(1);
- string cascadeName = workdir + "../../data/haarcascades/haarcascade_frontalface_alt.xml";
+ cascadeName = (workdir + "../../data/haarcascades/").append(GET_PARAM(2));
- if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName)) || (!cascadebuf.load( cascadeName )))
+ if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName)) )
{
cout << "ERROR: Could not load classifier cascade" << endl;
return;
Seq<CvAvgComp>(_objects).copyTo(vecAvgComp);
oclfaces.resize(vecAvgComp.size());
std::transform(vecAvgComp.begin(), vecAvgComp.end(), oclfaces.begin(), getRect());
-
+
cpucascade.detectMultiScale( smallImg, faces, 1.1, 3,
flags,
Size(30, 30), Size(0, 0) );
vector<Rect> faces, oclfaces;
Mat gray, smallImg(cvRound (img.rows / scale), cvRound(img.cols / scale), CV_8UC1 );
- MemStorage storage(cvCreateMemStorage(0));
cvtColor( img, gray, CV_BGR2GRAY );
resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
equalizeHist( smallImg, smallImg );
cv::ocl::oclMat image;
image.upload(smallImg);
+ cv::ocl::OclCascadeClassifierBuf cascadebuf;
+ if( !cascadebuf.load( cascadeName ) )
+ {
+ cout << "ERROR: Could not load classifier cascade for FaceDetectUseBuf!" << endl;
+ return;
+ }
cascadebuf.detectMultiScale( image, oclfaces, 1.1, 3,
flags,
Size(30, 30), Size(0, 0) );
- cascadebuf.release();
cpucascade.detectMultiScale( smallImg, faces, 1.1, 3,
flags,
Size(30, 30), Size(0, 0) );
EXPECT_EQ(faces.size(), oclfaces.size());
+
+ // intentionally run ocl facedetect again and check if it still works after the first run
+ cascadebuf.detectMultiScale( image, oclfaces, 1.1, 3,
+ flags,
+ Size(30, 30));
+ cascadebuf.release();
+ EXPECT_EQ(faces.size(), oclfaces.size());
}
INSTANTIATE_TEST_CASE_P(FaceDetect, Haar,
Combine(Values(1.0),
- Values(CV_HAAR_SCALE_IMAGE, 0)));
+ Values(CV_HAAR_SCALE_IMAGE, 0), Values(cascade_frontalface_alt, cascade_frontalface_alt2)));
#endif // HAVE_OPENCL
using namespace std;
extern string workdir;
+
+
+//////////////////////////////////////////////////////
+// GoodFeaturesToTrack
+namespace
+{
+ IMPLEMENT_PARAM_CLASS(MinDistance, double)
+}
+PARAM_TEST_CASE(GoodFeaturesToTrack, MinDistance)
+{
+ double minDistance;
+
+ virtual void SetUp()
+ {
+ minDistance = GET_PARAM(0);
+ }
+};
+
+TEST_P(GoodFeaturesToTrack, Accuracy)
+{
+ cv::Mat frame = readImage(workdir + "../gpu/rubberwhale1.png", cv::IMREAD_GRAYSCALE);
+ ASSERT_FALSE(frame.empty());
+
+ int maxCorners = 1000;
+ double qualityLevel = 0.01;
+
+ cv::ocl::GoodFeaturesToTrackDetector_OCL detector(maxCorners, qualityLevel, minDistance);
+
+ cv::ocl::oclMat d_pts;
+ detector(oclMat(frame), d_pts);
+
+ ASSERT_FALSE(d_pts.empty());
+
+ std::vector<cv::Point2f> pts(d_pts.cols);
+
+ detector.downloadPoints(d_pts, pts);
+
+ std::vector<cv::Point2f> pts_gold;
+ cv::goodFeaturesToTrack(frame, pts_gold, maxCorners, qualityLevel, minDistance);
+
+ ASSERT_EQ(pts_gold.size(), pts.size());
+
+ size_t mistmatch = 0;
+ for (size_t i = 0; i < pts.size(); ++i)
+ {
+ cv::Point2i a = pts_gold[i];
+ cv::Point2i b = pts[i];
+
+ bool eq = std::abs(a.x - b.x) < 1 && std::abs(a.y - b.y) < 1;
+
+ if (!eq)
+ ++mistmatch;
+ }
+
+ double bad_ratio = static_cast<double>(mistmatch) / pts.size();
+
+ ASSERT_LE(bad_ratio, 0.01);
+}
+
+TEST_P(GoodFeaturesToTrack, EmptyCorners)
+{
+ int maxCorners = 1000;
+ double qualityLevel = 0.01;
+
+ cv::ocl::GoodFeaturesToTrackDetector_OCL detector(maxCorners, qualityLevel, minDistance);
+
+ cv::ocl::oclMat src(100, 100, CV_8UC1, cv::Scalar::all(0));
+ cv::ocl::oclMat corners(1, maxCorners, CV_32FC2);
+
+ detector(src, corners);
+
+ ASSERT_TRUE(corners.empty());
+}
+
+INSTANTIATE_TEST_CASE_P(OCL_Video, GoodFeaturesToTrack,
+ testing::Values(MinDistance(0.0), MinDistance(3.0)));
+
//////////////////////////////////////////////////////////////////////////
PARAM_TEST_CASE(TVL1, bool)
{
switch (src.type()) {
case CV_8U:
- parallel_for(cv::BlockedRange(0, src.rows),
+ parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<uchar>(
src, dst, templateWindowSize, searchWindowSize, h));
break;
case CV_8UC2:
- parallel_for(cv::BlockedRange(0, src.rows),
+ parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<cv::Vec2b>(
src, dst, templateWindowSize, searchWindowSize, h));
break;
case CV_8UC3:
- parallel_for(cv::BlockedRange(0, src.rows),
+ parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<cv::Vec3b>(
src, dst, templateWindowSize, searchWindowSize, h));
break;
switch (srcImgs[0].type()) {
case CV_8U:
- parallel_for(cv::BlockedRange(0, srcImgs[0].rows),
+ parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<uchar>(
srcImgs, imgToDenoiseIndex, temporalWindowSize,
dst, templateWindowSize, searchWindowSize, h));
break;
case CV_8UC2:
- parallel_for(cv::BlockedRange(0, srcImgs[0].rows),
+ parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<cv::Vec2b>(
srcImgs, imgToDenoiseIndex, temporalWindowSize,
dst, templateWindowSize, searchWindowSize, h));
break;
case CV_8UC3:
- parallel_for(cv::BlockedRange(0, srcImgs[0].rows),
+ parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<cv::Vec3b>(
srcImgs, imgToDenoiseIndex, temporalWindowSize,
dst, templateWindowSize, searchWindowSize, h));
using namespace cv;
template <typename T>
-struct FastNlMeansDenoisingInvoker {
+struct FastNlMeansDenoisingInvoker : ParallelLoopBody {
public:
FastNlMeansDenoisingInvoker(const Mat& src, Mat& dst,
int template_window_size, int search_window_size, const float h);
- void operator() (const BlockedRange& range) const;
+ void operator() (const Range& range) const;
private:
void operator= (const FastNlMeansDenoisingInvoker&);
}
template <class T>
-void FastNlMeansDenoisingInvoker<T>::operator() (const BlockedRange& range) const {
- int row_from = range.begin();
- int row_to = range.end() - 1;
+void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const {
+ int row_from = range.start;
+ int row_to = range.end - 1;
Array2d<int> dist_sums(search_window_size_, search_window_size_);
using namespace cv;
template <typename T>
-struct FastNlMeansMultiDenoisingInvoker {
+struct FastNlMeansMultiDenoisingInvoker : ParallelLoopBody {
public:
FastNlMeansMultiDenoisingInvoker(
const std::vector<Mat>& srcImgs, int imgToDenoiseIndex, int temporalWindowSize,
Mat& dst, int template_window_size, int search_window_size, const float h);
- void operator() (const BlockedRange& range) const;
+ void operator() (const Range& range) const;
private:
void operator= (const FastNlMeansMultiDenoisingInvoker&);
}
template <class T>
-void FastNlMeansMultiDenoisingInvoker<T>::operator() (const BlockedRange& range) const {
- int row_from = range.begin();
- int row_to = range.end() - 1;
+void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const {
+ int row_from = range.start;
+ int row_to = range.end - 1;
Array3d<int> dist_sums(temporal_window_size_, search_window_size_, search_window_size_);
};
-struct MatchPairsBody
+struct MatchPairsBody : ParallelLoopBody
{
MatchPairsBody(const MatchPairsBody& other)
: matcher(other.matcher), features(other.features),
: matcher(_matcher), features(_features),
pairwise_matches(_pairwise_matches), near_pairs(_near_pairs) {}
- void operator ()(const BlockedRange &r) const
+ void operator ()(const Range &r) const
{
const int num_images = static_cast<int>(features.size());
- for (int i = r.begin(); i < r.end(); ++i)
+ for (int i = r.start; i < r.end; ++i)
{
int from = near_pairs[i].first;
int to = near_pairs[i].second;
MatchPairsBody body(*this, features, pairwise_matches, near_pairs);
if (is_thread_safe_)
- parallel_for(BlockedRange(0, static_cast<int>(near_pairs.size())), body);
+ parallel_for_(Range(0, static_cast<int>(near_pairs.size())), body);
else
- body(BlockedRange(0, static_cast<int>(near_pairs.size())));
+ body(Range(0, static_cast<int>(near_pairs.size())));
LOGLN_CHAT("");
}
//IEEE Trans. on Pattern Analysis and Machine Intelligence, vol.26, no.5, pages 651-656, 2004
//http://www.zoranz.net/Publications/zivkovic2004PAMI.pdf
-struct MOG2Invoker
+struct MOG2Invoker : ParallelLoopBody
{
MOG2Invoker(const Mat& _src, Mat& _dst,
GMM* _gmm, float* _mean,
cvtfunc = src->depth() != CV_32F ? getConvertFunc(src->depth(), CV_32F) : 0;
}
- void operator()(const BlockedRange& range) const
+ void operator()(const Range& range) const
{
- int y0 = range.begin(), y1 = range.end();
+ int y0 = range.start, y1 = range.end;
int ncols = src->cols, nchannels = src->channels();
AutoBuffer<float> buf(src->cols*nchannels);
float alpha1 = 1.f - alphaT;
learningRate = learningRate >= 0 && nframes > 1 ? learningRate : 1./min( 2*nframes, history );
CV_Assert(learningRate >= 0);
- parallel_for(BlockedRange(0, image.rows),
- MOG2Invoker(image, fgmask,
- (GMM*)bgmodel.data,
- (float*)(bgmodel.data + sizeof(GMM)*nmixtures*image.rows*image.cols),
- bgmodelUsedModes.data, nmixtures, (float)learningRate,
- (float)varThreshold,
- backgroundRatio, varThresholdGen,
- fVarInit, fVarMin, fVarMax, float(-learningRate*fCT), fTau,
- bShadowDetection, nShadowDetection));
+ parallel_for_(Range(0, image.rows),
+ MOG2Invoker(image, fgmask,
+ (GMM*)bgmodel.data,
+ (float*)(bgmodel.data + sizeof(GMM)*nmixtures*image.rows*image.cols),
+ bgmodelUsedModes.data, nmixtures, (float)learningRate,
+ (float)varThreshold,
+ backgroundRatio, varThresholdGen,
+ fVarInit, fVarMin, fVarMax, float(-learningRate*fCT), fTau,
+ bShadowDetection, nShadowDetection));
}
void BackgroundSubtractorMOG2::getBackgroundImage(OutputArray backgroundImage) const
minEigThreshold = _minEigThreshold;
}
-void cv::detail::LKTrackerInvoker::operator()(const BlockedRange& range) const
+void cv::detail::LKTrackerInvoker::operator()(const Range& range) const
{
Point2f halfWin((winSize.width-1)*0.5f, (winSize.height-1)*0.5f);
const Mat& I = *prevImg;
Mat IWinBuf(winSize, CV_MAKETYPE(derivDepth, cn), (deriv_type*)_buf);
Mat derivIWinBuf(winSize, CV_MAKETYPE(derivDepth, cn2), (deriv_type*)_buf + winSize.area()*cn);
- for( int ptidx = range.begin(); ptidx < range.end(); ptidx++ )
+ for( int ptidx = range.start; ptidx < range.end; ptidx++ )
{
Point2f prevPt = prevPts[ptidx]*(float)(1./(1 << level));
Point2f nextPt;
typedef cv::detail::LKTrackerInvoker LKTrackerInvoker;
#endif
- parallel_for(BlockedRange(0, npoints), LKTrackerInvoker(prevPyr[level * lvlStep1], derivI,
- nextPyr[level * lvlStep2], prevPts, nextPts,
- status, err,
- winSize, criteria, level, maxLevel,
- flags, (float)minEigThreshold));
+ parallel_for_(Range(0, npoints), LKTrackerInvoker(prevPyr[level * lvlStep1], derivI,
+ nextPyr[level * lvlStep2], prevPts, nextPts,
+ status, err,
+ winSize, criteria, level, maxLevel,
+ flags, (float)minEigThreshold));
}
}
typedef short deriv_type;
- struct LKTrackerInvoker
+ struct LKTrackerInvoker : ParallelLoopBody
{
LKTrackerInvoker( const Mat& _prevImg, const Mat& _prevDeriv, const Mat& _nextImg,
const Point2f* _prevPts, Point2f* _nextPts,
Size _winSize, TermCriteria _criteria,
int _level, int _maxLevel, int _flags, float _minEigThreshold );
- void operator()(const BlockedRange& range) const;
+ void operator()(const Range& range) const;
const Mat* prevImg;
const Mat* nextImg;
--- /dev/null
+# Copyright (c) 2010-2011, Ethan Rublee
+# Copyright (c) 2011-2013, Andrey Kamaev
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. The name of the copyright holders may be used to endorse or promote
+# products derived from this software without specific prior written
+# permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+# ------------------------------------------------------------------------------
+# Android CMake toolchain file, for use with the Android NDK r5-r8
+# Requires cmake 2.6.3 or newer (2.8.5 or newer is recommended).
+# See home page: https://github.com/taka-no-me/android-cmake
+#
+# The file is mantained by the OpenCV project. The latest version can be get at
+# http://code.opencv.org/projects/opencv/repository/revisions/master/changes/android/android.toolchain.cmake
+#
+# Usage Linux:
+# $ export ANDROID_NDK=/absolute/path/to/the/android-ndk
+# $ mkdir build && cd build
+# $ cmake -DCMAKE_TOOLCHAIN_FILE=path/to/the/android.toolchain.cmake ..
+# $ make -j8
+#
+# Usage Linux (using standalone toolchain):
+# $ export ANDROID_STANDALONE_TOOLCHAIN=/absolute/path/to/android-toolchain
+# $ mkdir build && cd build
+# $ cmake -DCMAKE_TOOLCHAIN_FILE=path/to/the/android.toolchain.cmake ..
+# $ make -j8
+#
+# Usage Windows:
+# You need native port of make to build your project.
+# Android NDK r7 (or newer) already has make.exe on board.
+# For older NDK you have to install it separately.
+# For example, this one: http://gnuwin32.sourceforge.net/packages/make.htm
+#
+# $ SET ANDROID_NDK=C:\absolute\path\to\the\android-ndk
+# $ mkdir build && cd build
+# $ cmake.exe -G"MinGW Makefiles"
+# -DCMAKE_TOOLCHAIN_FILE=path\to\the\android.toolchain.cmake
+# -DCMAKE_MAKE_PROGRAM="%ANDROID_NDK%\prebuilt\windows\bin\make.exe" ..
+# $ cmake.exe --build .
+#
+#
+# Options (can be set as cmake parameters: -D<option_name>=<value>):
+# ANDROID_NDK=/opt/android-ndk - path to the NDK root.
+# Can be set as environment variable. Can be set only at first cmake run.
+#
+# ANDROID_STANDALONE_TOOLCHAIN=/opt/android-toolchain - path to the
+# standalone toolchain. This option is not used if full NDK is found
+# (ignored if ANDROID_NDK is set).
+# Can be set as environment variable. Can be set only at first cmake run.
+#
+# ANDROID_ABI=armeabi-v7a - specifies the target Application Binary
+# Interface (ABI). This option nearly matches to the APP_ABI variable
+# used by ndk-build tool from Android NDK.
+#
+# Possible targets are:
+# "armeabi" - matches to the NDK ABI with the same name.
+# See ${ANDROID_NDK}/docs/CPU-ARCH-ABIS.html for the documentation.
+# "armeabi-v7a" - matches to the NDK ABI with the same name.
+# See ${ANDROID_NDK}/docs/CPU-ARCH-ABIS.html for the documentation.
+# "armeabi-v7a with NEON" - same as armeabi-v7a, but
+# sets NEON as floating-point unit
+# "armeabi-v7a with VFPV3" - same as armeabi-v7a, but
+# sets VFPV3 as floating-point unit (has 32 registers instead of 16).
+# "armeabi-v6 with VFP" - tuned for ARMv6 processors having VFP.
+# "x86" - matches to the NDK ABI with the same name.
+# See ${ANDROID_NDK}/docs/CPU-ARCH-ABIS.html for the documentation.
+# "mips" - matches to the NDK ABI with the same name
+# (It is not tested on real devices by the authos of this toolchain)
+# See ${ANDROID_NDK}/docs/CPU-ARCH-ABIS.html for the documentation.
+#
+# ANDROID_NATIVE_API_LEVEL=android-8 - level of Android API compile for.
+# Option is read-only when standalone toolchain is used.
+#
+# ANDROID_TOOLCHAIN_NAME=arm-linux-androideabi-4.6 - the name of compiler
+# toolchain to be used. The list of possible values depends on the NDK
+# version. For NDK r8c the possible values are:
+#
+# * arm-linux-androideabi-4.4.3
+# * arm-linux-androideabi-4.6
+# * arm-linux-androideabi-clang3.1
+# * mipsel-linux-android-4.4.3
+# * mipsel-linux-android-4.6
+# * mipsel-linux-android-clang3.1
+# * x86-4.4.3
+# * x86-4.6
+# * x86-clang3.1
+#
+# ANDROID_FORCE_ARM_BUILD=OFF - set ON to generate 32-bit ARM instructions
+# instead of Thumb. Is not available for "x86" (inapplicable) and
+# "armeabi-v6 with VFP" (is forced to be ON) ABIs.
+#
+# ANDROID_NO_UNDEFINED=ON - set ON to show all undefined symbols as linker
+# errors even if they are not used.
+#
+# ANDROID_SO_UNDEFINED=OFF - set ON to allow undefined symbols in shared
+# libraries. Automatically turned for NDK r5x and r6x due to GLESv2
+# problems.
+#
+# LIBRARY_OUTPUT_PATH_ROOT=${CMAKE_SOURCE_DIR} - where to output binary
+# files. See additional details below.
+#
+# ANDROID_SET_OBSOLETE_VARIABLES=ON - if set, then toolchain defines some
+# obsolete variables which were used by previous versions of this file for
+# backward compatibility.
+#
+# ANDROID_STL=gnustl_static - specify the runtime to use.
+#
+# Possible values are:
+# none -> Do not configure the runtime.
+# system -> Use the default minimal system C++ runtime library.
+# Implies -fno-rtti -fno-exceptions.
+# Is not available for standalone toolchain.
+# system_re -> Use the default minimal system C++ runtime library.
+# Implies -frtti -fexceptions.
+# Is not available for standalone toolchain.
+# gabi++_static -> Use the GAbi++ runtime as a static library.
+# Implies -frtti -fno-exceptions.
+# Available for NDK r7 and newer.
+# Is not available for standalone toolchain.
+# gabi++_shared -> Use the GAbi++ runtime as a shared library.
+# Implies -frtti -fno-exceptions.
+# Available for NDK r7 and newer.
+# Is not available for standalone toolchain.
+# stlport_static -> Use the STLport runtime as a static library.
+# Implies -fno-rtti -fno-exceptions for NDK before r7.
+# Implies -frtti -fno-exceptions for NDK r7 and newer.
+# Is not available for standalone toolchain.
+# stlport_shared -> Use the STLport runtime as a shared library.
+# Implies -fno-rtti -fno-exceptions for NDK before r7.
+# Implies -frtti -fno-exceptions for NDK r7 and newer.
+# Is not available for standalone toolchain.
+# gnustl_static -> Use the GNU STL as a static library.
+# Implies -frtti -fexceptions.
+# gnustl_shared -> Use the GNU STL as a shared library.
+# Implies -frtti -fno-exceptions.
+# Available for NDK r7b and newer.
+# Silently degrades to gnustl_static if not available.
+#
+# ANDROID_STL_FORCE_FEATURES=ON - turn rtti and exceptions support based on
+# chosen runtime. If disabled, then the user is responsible for settings
+# these options.
+#
+# What?:
+# android-cmake toolchain searches for NDK/toolchain in the following order:
+# ANDROID_NDK - cmake parameter
+# ANDROID_NDK - environment variable
+# ANDROID_STANDALONE_TOOLCHAIN - cmake parameter
+# ANDROID_STANDALONE_TOOLCHAIN - environment variable
+# ANDROID_NDK - default locations
+# ANDROID_STANDALONE_TOOLCHAIN - default locations
+#
+# Make sure to do the following in your scripts:
+# SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${my_cxx_flags}" )
+# SET( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${my_cxx_flags}" )
+# The flags will be prepopulated with critical flags, so don't loose them.
+# Also be aware that toolchain also sets configuration-specific compiler
+# flags and linker flags.
+#
+# ANDROID and BUILD_ANDROID will be set to true, you may test any of these
+# variables to make necessary Android-specific configuration changes.
+#
+# Also ARMEABI or ARMEABI_V7A or X86 or MIPS will be set true, mutually
+# exclusive. NEON option will be set true if VFP is set to NEON.
+#
+# LIBRARY_OUTPUT_PATH_ROOT should be set in cache to determine where Android
+# libraries will be installed.
+# Default is ${CMAKE_SOURCE_DIR}, and the android libs will always be
+# under the ${LIBRARY_OUTPUT_PATH_ROOT}/libs/${ANDROID_NDK_ABI_NAME}
+# (depending on the target ABI). This is convenient for Android packaging.
+#
+# Change Log:
+# - initial version December 2010
+# - April 2011
+# [+] added possibility to build with NDK (without standalone toolchain)
+# [+] support cross-compilation on Windows (native, no cygwin support)
+# [+] added compiler option to force "char" type to be signed
+# [+] added toolchain option to compile to 32-bit ARM instructions
+# [+] added toolchain option to disable SWIG search
+# [+] added platform "armeabi-v7a with VFPV3"
+# [~] ARM_TARGETS renamed to ARM_TARGET
+# [+] EXECUTABLE_OUTPUT_PATH is set by toolchain (required on Windows)
+# [~] Fixed bug with ANDROID_API_LEVEL variable
+# [~] turn off SWIG search if it is not found first time
+# - May 2011
+# [~] ANDROID_LEVEL is renamed to ANDROID_API_LEVEL
+# [+] ANDROID_API_LEVEL is detected by toolchain if not specified
+# [~] added guard to prevent changing of output directories on the first
+# cmake pass
+# [~] toolchain exits with error if ARM_TARGET is not recognized
+# - June 2011
+# [~] default NDK path is updated for version r5c
+# [+] variable CMAKE_SYSTEM_PROCESSOR is set based on ARM_TARGET
+# [~] toolchain install directory is added to linker paths
+# [-] removed SWIG-related stuff from toolchain
+# [+] added macro find_host_package, find_host_program to search
+# packages/programs on the host system
+# [~] fixed path to STL library
+# - July 2011
+# [~] fixed options caching
+# [~] search for all supported NDK versions
+# [~] allowed spaces in NDK path
+# - September 2011
+# [~] updated for NDK r6b
+# - November 2011
+# [*] rewritten for NDK r7
+# [+] x86 toolchain support (experimental)
+# [+] added "armeabi-v6 with VFP" ABI for ARMv6 processors.
+# [~] improved compiler and linker flags management
+# [+] support different build flags for Release and Debug configurations
+# [~] by default compiler flags the same as used by ndk-build (but only
+# where reasonable)
+# [~] ANDROID_NDK_TOOLCHAIN_ROOT is splitted to ANDROID_STANDALONE_TOOLCHAIN
+# and ANDROID_TOOLCHAIN_ROOT
+# [~] ARM_TARGET is renamed to ANDROID_ABI
+# [~] ARMEABI_NDK_NAME is renamed to ANDROID_NDK_ABI_NAME
+# [~] ANDROID_API_LEVEL is renamed to ANDROID_NATIVE_API_LEVEL
+# - January 2012
+# [+] added stlport_static support (experimental)
+# [+] added special check for cygwin
+# [+] filtered out hidden files (starting with .) while globbing inside NDK
+# [+] automatically applied GLESv2 linkage fix for NDK revisions 5-6
+# [+] added ANDROID_GET_ABI_RAWNAME to get NDK ABI names by CMake flags
+# - February 2012
+# [+] updated for NDK r7b
+# [~] fixed cmake try_compile() command
+# [~] Fix for missing install_name_tool on OS X
+# - March 2012
+# [~] fixed incorrect C compiler flags
+# [~] fixed CMAKE_SYSTEM_PROCESSOR change on ANDROID_ABI change
+# [+] improved toolchain loading speed
+# [+] added assembler language support (.S)
+# [+] allowed preset search paths and extra search suffixes
+# - April 2012
+# [+] updated for NDK r7c
+# [~] fixed most of problems with compiler/linker flags and caching
+# [+] added option ANDROID_FUNCTION_LEVEL_LINKING
+# - May 2012
+# [+] updated for NDK r8
+# [+] added mips architecture support
+# - August 2012
+# [+] updated for NDK r8b
+# [~] all intermediate files generated by toolchain are moved to CMakeFiles
+# [~] libstdc++ and libsupc are removed from explicit link libraries
+# [+] added CCache support (via NDK_CCACHE environment or cmake variable)
+# [+] added gold linker support for NDK r8b
+# [~] fixed mips linker flags for NDK r8b
+# - September 2012
+# [+] added NDK release name detection (see ANDROID_NDK_RELEASE)
+# [+] added support for all C++ runtimes from NDK
+# (system, gabi++, stlport, gnustl)
+# [+] improved warnings on known issues of NDKs
+# [~] use gold linker as default if available (NDK r8b)
+# [~] globally turned off rpath
+# [~] compiler options are aligned with NDK r8b
+# - October 2012
+# [~] fixed C++ linking: explicitly link with math library (OpenCV #2426)
+# - November 2012
+# [+] updated for NDK r8c
+# [+] added support for clang compiler
+# - December 2012
+# [+] suppress warning about unused CMAKE_TOOLCHAIN_FILE variable
+# [+] adjust API level to closest compatible as NDK does
+# [~] fixed ccache full path search
+# [+] updated for NDK r8d
+# [~] compiler options are aligned with NDK r8d
+# - March 2013
+# [+] updated for NDK r8e (x86 version)
+# [+] support x86_64 version of NDK
+# ------------------------------------------------------------------------------
+
+cmake_minimum_required( VERSION 2.6.3 )
+
+if( DEFINED CMAKE_CROSSCOMPILING )
+ # subsequent toolchain loading is not really needed
+ return()
+endif()
+
+if( CMAKE_TOOLCHAIN_FILE )
+ # touch toolchain variable only to suppress "unused variable" warning
+endif()
+
+get_property( _CMAKE_IN_TRY_COMPILE GLOBAL PROPERTY IN_TRY_COMPILE )
+if( _CMAKE_IN_TRY_COMPILE )
+ include( "${CMAKE_CURRENT_SOURCE_DIR}/../android.toolchain.config.cmake" OPTIONAL )
+endif()
+
+# this one is important
+set( CMAKE_SYSTEM_NAME Linux )
+# this one not so much
+set( CMAKE_SYSTEM_VERSION 1 )
+
+# rpath makes low sence for Android
+set( CMAKE_SKIP_RPATH TRUE CACHE BOOL "If set, runtime paths are not added when using shared libraries." )
+
+set( ANDROID_SUPPORTED_NDK_VERSIONS ${ANDROID_EXTRA_NDK_VERSIONS} -r8e -r8d -r8c -r8b -r8 -r7c -r7b -r7 -r6b -r6 -r5c -r5b -r5 "" )
+if(NOT DEFINED ANDROID_NDK_SEARCH_PATHS)
+ if( CMAKE_HOST_WIN32 )
+ file( TO_CMAKE_PATH "$ENV{PROGRAMFILES}" ANDROID_NDK_SEARCH_PATHS )
+ set( ANDROID_NDK_SEARCH_PATHS "${ANDROID_NDK_SEARCH_PATHS}/android-ndk" "$ENV{SystemDrive}/NVPACK/android-ndk" )
+ else()
+ file( TO_CMAKE_PATH "$ENV{HOME}" ANDROID_NDK_SEARCH_PATHS )
+ set( ANDROID_NDK_SEARCH_PATHS /opt/android-ndk "${ANDROID_NDK_SEARCH_PATHS}/NVPACK/android-ndk" )
+ endif()
+endif()
+if(NOT DEFINED ANDROID_STANDALONE_TOOLCHAIN_SEARCH_PATH)
+ set( ANDROID_STANDALONE_TOOLCHAIN_SEARCH_PATH /opt/android-toolchain )
+endif()
+
+set( ANDROID_SUPPORTED_ABIS_arm "armeabi-v7a;armeabi;armeabi-v7a with NEON;armeabi-v7a with VFPV3;armeabi-v6 with VFP" )
+set( ANDROID_SUPPORTED_ABIS_x86 "x86" )
+set( ANDROID_SUPPORTED_ABIS_mipsel "mips" )
+
+set( ANDROID_DEFAULT_NDK_API_LEVEL 8 )
+set( ANDROID_DEFAULT_NDK_API_LEVEL_x86 9 )
+set( ANDROID_DEFAULT_NDK_API_LEVEL_mips 9 )
+
+
+macro( __LIST_FILTER listvar regex )
+ if( ${listvar} )
+ foreach( __val ${${listvar}} )
+ if( __val MATCHES "${regex}" )
+ list( REMOVE_ITEM ${listvar} "${__val}" )
+ endif()
+ endforeach()
+ endif()
+endmacro()
+
+macro( __INIT_VARIABLE var_name )
+ set( __test_path 0 )
+ foreach( __var ${ARGN} )
+ if( __var STREQUAL "PATH" )
+ set( __test_path 1 )
+ break()
+ endif()
+ endforeach()
+ if( __test_path AND NOT EXISTS "${${var_name}}" )
+ unset( ${var_name} CACHE )
+ endif()
+ if( "${${var_name}}" STREQUAL "" )
+ set( __values 0 )
+ foreach( __var ${ARGN} )
+ if( __var STREQUAL "VALUES" )
+ set( __values 1 )
+ elseif( NOT __var STREQUAL "PATH" )
+ set( __obsolete 0 )
+ if( __var MATCHES "^OBSOLETE_.*$" )
+ string( REPLACE "OBSOLETE_" "" __var "${__var}" )
+ set( __obsolete 1 )
+ endif()
+ if( __var MATCHES "^ENV_.*$" )
+ string( REPLACE "ENV_" "" __var "${__var}" )
+ set( __value "$ENV{${__var}}" )
+ elseif( DEFINED ${__var} )
+ set( __value "${${__var}}" )
+ else()
+ if( __values )
+ set( __value "${__var}" )
+ else()
+ set( __value "" )
+ endif()
+ endif()
+ if( NOT "${__value}" STREQUAL "" )
+ if( __test_path )
+ if( EXISTS "${__value}" )
+ file( TO_CMAKE_PATH "${__value}" ${var_name} )
+ if( __obsolete AND NOT _CMAKE_IN_TRY_COMPILE )
+ message( WARNING "Using value of obsolete variable ${__var} as initial value for ${var_name}. Please note, that ${__var} can be completely removed in future versions of the toolchain." )
+ endif()
+ break()
+ endif()
+ else()
+ set( ${var_name} "${__value}" )
+ if( __obsolete AND NOT _CMAKE_IN_TRY_COMPILE )
+ message( WARNING "Using value of obsolete variable ${__var} as initial value for ${var_name}. Please note, that ${__var} can be completely removed in future versions of the toolchain." )
+ endif()
+ break()
+ endif()
+ endif()
+ endif()
+ endforeach()
+ unset( __value )
+ unset( __values )
+ unset( __obsolete )
+ elseif( __test_path )
+ file( TO_CMAKE_PATH "${${var_name}}" ${var_name} )
+ endif()
+ unset( __test_path )
+endmacro()
+
+macro( __DETECT_NATIVE_API_LEVEL _var _path )
+ SET( __ndkApiLevelRegex "^[\t ]*#define[\t ]+__ANDROID_API__[\t ]+([0-9]+)[\t ]*$" )
+ FILE( STRINGS ${_path} __apiFileContent REGEX "${__ndkApiLevelRegex}" )
+ if( NOT __apiFileContent )
+ message( SEND_ERROR "Could not get Android native API level. Probably you have specified invalid level value, or your copy of NDK/toolchain is broken." )
+ endif()
+ string( REGEX REPLACE "${__ndkApiLevelRegex}" "\\1" ${_var} "${__apiFileContent}" )
+ unset( __apiFileContent )
+ unset( __ndkApiLevelRegex )
+endmacro()
+
+macro( __DETECT_TOOLCHAIN_MACHINE_NAME _var _root )
+ if( EXISTS "${_root}" )
+ file( GLOB __gccExePath RELATIVE "${_root}/bin/" "${_root}/bin/*-gcc${TOOL_OS_SUFFIX}" )
+ __LIST_FILTER( __gccExePath "^[.].*" )
+ list( LENGTH __gccExePath __gccExePathsCount )
+ if( NOT __gccExePathsCount EQUAL 1 AND NOT _CMAKE_IN_TRY_COMPILE )
+ message( WARNING "Could not determine machine name for compiler from ${_root}" )
+ set( ${_var} "" )
+ else()
+ get_filename_component( __gccExeName "${__gccExePath}" NAME_WE )
+ string( REPLACE "-gcc" "" ${_var} "${__gccExeName}" )
+ endif()
+ unset( __gccExePath )
+ unset( __gccExePathsCount )
+ unset( __gccExeName )
+ else()
+ set( ${_var} "" )
+ endif()
+endmacro()
+
+
+# fight against cygwin
+set( ANDROID_FORBID_SYGWIN TRUE CACHE BOOL "Prevent cmake from working under cygwin and using cygwin tools")
+mark_as_advanced( ANDROID_FORBID_SYGWIN )
+if( ANDROID_FORBID_SYGWIN )
+ if( CYGWIN )
+ message( FATAL_ERROR "Android NDK and android-cmake toolchain are not welcome Cygwin. It is unlikely that this cmake toolchain will work under cygwin. But if you want to try then you can set cmake variable ANDROID_FORBID_SYGWIN to FALSE and rerun cmake." )
+ endif()
+
+ if( CMAKE_HOST_WIN32 )
+ # remove cygwin from PATH
+ set( __new_path "$ENV{PATH}")
+ __LIST_FILTER( __new_path "cygwin" )
+ set(ENV{PATH} "${__new_path}")
+ unset(__new_path)
+ endif()
+endif()
+
+
+# detect current host platform
+if( NOT DEFINED ANDROID_NDK_HOST_X64 AND CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "amd64|x86_64|AMD64")
+ set( ANDROID_NDK_HOST_X64 1 CACHE BOOL "Try to use 64-bit compiler toolchain" )
+ mark_as_advanced( ANDROID_NDK_HOST_X64 )
+endif()
+
+set( TOOL_OS_SUFFIX "" )
+if( CMAKE_HOST_APPLE )
+ set( ANDROID_NDK_HOST_SYSTEM_NAME "darwin-x86_64" )
+ set( ANDROID_NDK_HOST_SYSTEM_NAME2 "darwin-x86" )
+elseif( CMAKE_HOST_WIN32 )
+ set( ANDROID_NDK_HOST_SYSTEM_NAME "windows-x86_64" )
+ set( ANDROID_NDK_HOST_SYSTEM_NAME2 "windows" )
+ set( TOOL_OS_SUFFIX ".exe" )
+elseif( CMAKE_HOST_UNIX )
+ set( ANDROID_NDK_HOST_SYSTEM_NAME "linux-x86_64" )
+ set( ANDROID_NDK_HOST_SYSTEM_NAME2 "linux-x86" )
+else()
+ message( FATAL_ERROR "Cross-compilation on your platform is not supported by this cmake toolchain" )
+endif()
+
+if( NOT ANDROID_NDK_HOST_X64 )
+ set( ANDROID_NDK_HOST_SYSTEM_NAME ${ANDROID_NDK_HOST_SYSTEM_NAME2} )
+endif()
+
+# see if we have path to Android NDK
+__INIT_VARIABLE( ANDROID_NDK PATH ENV_ANDROID_NDK )
+if( NOT ANDROID_NDK )
+ # see if we have path to Android standalone toolchain
+ __INIT_VARIABLE( ANDROID_STANDALONE_TOOLCHAIN PATH ENV_ANDROID_STANDALONE_TOOLCHAIN OBSOLETE_ANDROID_NDK_TOOLCHAIN_ROOT OBSOLETE_ENV_ANDROID_NDK_TOOLCHAIN_ROOT )
+
+ if( NOT ANDROID_STANDALONE_TOOLCHAIN )
+ #try to find Android NDK in one of the the default locations
+ set( __ndkSearchPaths )
+ foreach( __ndkSearchPath ${ANDROID_NDK_SEARCH_PATHS} )
+ foreach( suffix ${ANDROID_SUPPORTED_NDK_VERSIONS} )
+ list( APPEND __ndkSearchPaths "${__ndkSearchPath}${suffix}" )
+ endforeach()
+ endforeach()
+ __INIT_VARIABLE( ANDROID_NDK PATH VALUES ${__ndkSearchPaths} )
+ unset( __ndkSearchPaths )
+
+ if( ANDROID_NDK )
+ message( STATUS "Using default path for Android NDK: ${ANDROID_NDK}" )
+ message( STATUS " If you prefer to use a different location, please define a cmake or environment variable: ANDROID_NDK" )
+ else()
+ #try to find Android standalone toolchain in one of the the default locations
+ __INIT_VARIABLE( ANDROID_STANDALONE_TOOLCHAIN PATH ANDROID_STANDALONE_TOOLCHAIN_SEARCH_PATH )
+
+ if( ANDROID_STANDALONE_TOOLCHAIN )
+ message( STATUS "Using default path for standalone toolchain ${ANDROID_STANDALONE_TOOLCHAIN}" )
+ message( STATUS " If you prefer to use a different location, please define the variable: ANDROID_STANDALONE_TOOLCHAIN" )
+ endif( ANDROID_STANDALONE_TOOLCHAIN )
+ endif( ANDROID_NDK )
+ endif( NOT ANDROID_STANDALONE_TOOLCHAIN )
+endif( NOT ANDROID_NDK )
+# remember found paths
+if( ANDROID_NDK )
+ get_filename_component( ANDROID_NDK "${ANDROID_NDK}" ABSOLUTE )
+ # try to detect change
+ if( CMAKE_AR )
+ string( LENGTH "${ANDROID_NDK}" __length )
+ string( SUBSTRING "${CMAKE_AR}" 0 ${__length} __androidNdkPreviousPath )
+ if( NOT __androidNdkPreviousPath STREQUAL ANDROID_NDK )
+ message( FATAL_ERROR "It is not possible to change the path to the NDK on subsequent CMake run. You must remove all generated files from your build folder first.
+ " )
+ endif()
+ unset( __androidNdkPreviousPath )
+ unset( __length )
+ endif()
+ set( ANDROID_NDK "${ANDROID_NDK}" CACHE INTERNAL "Path of the Android NDK" FORCE )
+ set( BUILD_WITH_ANDROID_NDK True )
+ file( STRINGS "${ANDROID_NDK}/RELEASE.TXT" ANDROID_NDK_RELEASE_FULL LIMIT_COUNT 1 REGEX r[0-9]+[a-z]? )
+ string( REGEX MATCH r[0-9]+[a-z]? ANDROID_NDK_RELEASE "${ANDROID_NDK_RELEASE_FULL}" )
+elseif( ANDROID_STANDALONE_TOOLCHAIN )
+ get_filename_component( ANDROID_STANDALONE_TOOLCHAIN "${ANDROID_STANDALONE_TOOLCHAIN}" ABSOLUTE )
+ # try to detect change
+ if( CMAKE_AR )
+ string( LENGTH "${ANDROID_STANDALONE_TOOLCHAIN}" __length )
+ string( SUBSTRING "${CMAKE_AR}" 0 ${__length} __androidStandaloneToolchainPreviousPath )
+ if( NOT __androidStandaloneToolchainPreviousPath STREQUAL ANDROID_STANDALONE_TOOLCHAIN )
+ message( FATAL_ERROR "It is not possible to change path to the Android standalone toolchain on subsequent run." )
+ endif()
+ unset( __androidStandaloneToolchainPreviousPath )
+ unset( __length )
+ endif()
+ set( ANDROID_STANDALONE_TOOLCHAIN "${ANDROID_STANDALONE_TOOLCHAIN}" CACHE INTERNAL "Path of the Android standalone toolchain" FORCE )
+ set( BUILD_WITH_STANDALONE_TOOLCHAIN True )
+else()
+ list(GET ANDROID_NDK_SEARCH_PATHS 0 ANDROID_NDK_SEARCH_PATH)
+ message( FATAL_ERROR "Could not find neither Android NDK nor Android standalone toolchain.
+ You should either set an environment variable:
+ export ANDROID_NDK=~/my-android-ndk
+ or
+ export ANDROID_STANDALONE_TOOLCHAIN=~/my-android-toolchain
+ or put the toolchain or NDK in the default path:
+ sudo ln -s ~/my-android-ndk ${ANDROID_NDK_SEARCH_PATH}
+ sudo ln -s ~/my-android-toolchain ${ANDROID_STANDALONE_TOOLCHAIN_SEARCH_PATH}" )
+endif()
+
+# get all the details about standalone toolchain
+if( BUILD_WITH_STANDALONE_TOOLCHAIN )
+ __DETECT_NATIVE_API_LEVEL( ANDROID_SUPPORTED_NATIVE_API_LEVELS "${ANDROID_STANDALONE_TOOLCHAIN}/sysroot/usr/include/android/api-level.h" )
+ set( ANDROID_STANDALONE_TOOLCHAIN_API_LEVEL ${ANDROID_SUPPORTED_NATIVE_API_LEVELS} )
+ set( __availableToolchains "standalone" )
+ __DETECT_TOOLCHAIN_MACHINE_NAME( __availableToolchainMachines "${ANDROID_STANDALONE_TOOLCHAIN}" )
+ if( NOT __availableToolchainMachines )
+ message( FATAL_ERROR "Could not determine machine name of your toolchain. Probably your Android standalone toolchain is broken." )
+ endif()
+ if( __availableToolchainMachines MATCHES i686 )
+ set( __availableToolchainArchs "x86" )
+ elseif( __availableToolchainMachines MATCHES arm )
+ set( __availableToolchainArchs "arm" )
+ elseif( __availableToolchainMachines MATCHES mipsel )
+ set( __availableToolchainArchs "mipsel" )
+ endif()
+ execute_process( COMMAND "${ANDROID_STANDALONE_TOOLCHAIN}/bin/${__availableToolchainMachines}-gcc${TOOL_OS_SUFFIX}" -dumpversion
+ OUTPUT_VARIABLE __availableToolchainCompilerVersions OUTPUT_STRIP_TRAILING_WHITESPACE )
+ string( REGEX MATCH "[0-9]+[.][0-9]+([.][0-9]+)?" __availableToolchainCompilerVersions "${__availableToolchainCompilerVersions}" )
+ if( EXISTS "${ANDROID_STANDALONE_TOOLCHAIN}/bin/clang${TOOL_OS_SUFFIX}" )
+ list( APPEND __availableToolchains "standalone-clang" )
+ list( APPEND __availableToolchainMachines ${__availableToolchainMachines} )
+ list( APPEND __availableToolchainArchs ${__availableToolchainArchs} )
+ list( APPEND __availableToolchainCompilerVersions ${__availableToolchainCompilerVersions} )
+ endif()
+endif()
+
+macro( __GLOB_NDK_TOOLCHAINS __availableToolchainsVar __availableToolchainsLst __host_system_name )
+ foreach( __toolchain ${${__availableToolchainsLst}} )
+ if( "${__toolchain}" MATCHES "-clang3[.][0-9]$" AND NOT EXISTS "${ANDROID_NDK}/toolchains/${__toolchain}/prebuilt/" )
+ string( REGEX REPLACE "-clang3[.][0-9]$" "-4.6" __gcc_toolchain "${__toolchain}" )
+ else()
+ set( __gcc_toolchain "${__toolchain}" )
+ endif()
+ __DETECT_TOOLCHAIN_MACHINE_NAME( __machine "${ANDROID_NDK}/toolchains/${__gcc_toolchain}/prebuilt/${__host_system_name}" )
+ if( __machine )
+ string( REGEX MATCH "[0-9]+[.][0-9]+([.][0-9]+)?$" __version "${__gcc_toolchain}" )
+ string( REGEX MATCH "^[^-]+" __arch "${__gcc_toolchain}" )
+ list( APPEND __availableToolchainMachines "${__machine}" )
+ list( APPEND __availableToolchainArchs "${__arch}" )
+ list( APPEND __availableToolchainCompilerVersions "${__version}" )
+ list( APPEND ${__availableToolchainsVar} "${__toolchain}" )
+ endif()
+ unset( __gcc_toolchain )
+ endforeach()
+endmacro()
+
+# get all the details about NDK
+if( BUILD_WITH_ANDROID_NDK )
+ file( GLOB ANDROID_SUPPORTED_NATIVE_API_LEVELS RELATIVE "${ANDROID_NDK}/platforms" "${ANDROID_NDK}/platforms/android-*" )
+ string( REPLACE "android-" "" ANDROID_SUPPORTED_NATIVE_API_LEVELS "${ANDROID_SUPPORTED_NATIVE_API_LEVELS}" )
+ set( __availableToolchains "" )
+ set( __availableToolchainMachines "" )
+ set( __availableToolchainArchs "" )
+ set( __availableToolchainCompilerVersions "" )
+ if( ANDROID_TOOLCHAIN_NAME AND EXISTS "${ANDROID_NDK}/toolchains/${ANDROID_TOOLCHAIN_NAME}/" )
+ # do not go through all toolchains if we know the name
+ set( __availableToolchainsLst "${ANDROID_TOOLCHAIN_NAME}" )
+ __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst ${ANDROID_NDK_HOST_SYSTEM_NAME} )
+ if( NOT __availableToolchains AND NOT ANDROID_NDK_HOST_SYSTEM_NAME STREQUAL ANDROID_NDK_HOST_SYSTEM_NAME2 )
+ __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst ${ANDROID_NDK_HOST_SYSTEM_NAME2} )
+ if( __availableToolchains )
+ set( ANDROID_NDK_HOST_SYSTEM_NAME ${ANDROID_NDK_HOST_SYSTEM_NAME2} )
+ endif()
+ endif()
+ endif()
+ if( NOT __availableToolchains )
+ file( GLOB __availableToolchainsLst RELATIVE "${ANDROID_NDK}/toolchains" "${ANDROID_NDK}/toolchains/*" )
+ if( __availableToolchains )
+ list(SORT __availableToolchainsLst) # we need clang to go after gcc
+ endif()
+ __LIST_FILTER( __availableToolchainsLst "^[.]" )
+ __LIST_FILTER( __availableToolchainsLst "llvm" )
+ __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst ${ANDROID_NDK_HOST_SYSTEM_NAME} )
+ if( NOT __availableToolchains AND NOT ANDROID_NDK_HOST_SYSTEM_NAME STREQUAL ANDROID_NDK_HOST_SYSTEM_NAME2 )
+ __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst ${ANDROID_NDK_HOST_SYSTEM_NAME2} )
+ if( __availableToolchains )
+ set( ANDROID_NDK_HOST_SYSTEM_NAME ${ANDROID_NDK_HOST_SYSTEM_NAME2} )
+ endif()
+ endif()
+ endif()
+ if( NOT __availableToolchains )
+ message( FATAL_ERROR "Could not find any working toolchain in the NDK. Probably your Android NDK is broken." )
+ endif()
+endif()
+
+# build list of available ABIs
+set( ANDROID_SUPPORTED_ABIS "" )
+set( __uniqToolchainArchNames ${__availableToolchainArchs} )
+list( REMOVE_DUPLICATES __uniqToolchainArchNames )
+list( SORT __uniqToolchainArchNames )
+foreach( __arch ${__uniqToolchainArchNames} )
+ list( APPEND ANDROID_SUPPORTED_ABIS ${ANDROID_SUPPORTED_ABIS_${__arch}} )
+endforeach()
+unset( __uniqToolchainArchNames )
+if( NOT ANDROID_SUPPORTED_ABIS )
+ message( FATAL_ERROR "No one of known Android ABIs is supported by this cmake toolchain." )
+endif()
+
+# choose target ABI
+__INIT_VARIABLE( ANDROID_ABI OBSOLETE_ARM_TARGET OBSOLETE_ARM_TARGETS VALUES ${ANDROID_SUPPORTED_ABIS} )
+# verify that target ABI is supported
+list( FIND ANDROID_SUPPORTED_ABIS "${ANDROID_ABI}" __androidAbiIdx )
+if( __androidAbiIdx EQUAL -1 )
+ string( REPLACE ";" "\", \"", PRINTABLE_ANDROID_SUPPORTED_ABIS "${ANDROID_SUPPORTED_ABIS}" )
+ message( FATAL_ERROR "Specified ANDROID_ABI = \"${ANDROID_ABI}\" is not supported by this cmake toolchain or your NDK/toolchain.
+ Supported values are: \"${PRINTABLE_ANDROID_SUPPORTED_ABIS}\"
+ " )
+endif()
+unset( __androidAbiIdx )
+
+# set target ABI options
+if( ANDROID_ABI STREQUAL "x86" )
+ set( X86 true )
+ set( ANDROID_NDK_ABI_NAME "x86" )
+ set( ANDROID_ARCH_NAME "x86" )
+ set( ANDROID_ARCH_FULLNAME "x86" )
+ set( ANDROID_LLVM_TRIPLE "i686-none-linux-android" )
+ set( CMAKE_SYSTEM_PROCESSOR "i686" )
+elseif( ANDROID_ABI STREQUAL "mips" )
+ set( MIPS true )
+ set( ANDROID_NDK_ABI_NAME "mips" )
+ set( ANDROID_ARCH_NAME "mips" )
+ set( ANDROID_ARCH_FULLNAME "mipsel" )
+ set( ANDROID_LLVM_TRIPLE "mipsel-none-linux-android" )
+ set( CMAKE_SYSTEM_PROCESSOR "mips" )
+elseif( ANDROID_ABI STREQUAL "armeabi" )
+ set( ARMEABI true )
+ set( ANDROID_NDK_ABI_NAME "armeabi" )
+ set( ANDROID_ARCH_NAME "arm" )
+ set( ANDROID_ARCH_FULLNAME "arm" )
+ set( ANDROID_LLVM_TRIPLE "armv5te-none-linux-androideabi" )
+ set( CMAKE_SYSTEM_PROCESSOR "armv5te" )
+elseif( ANDROID_ABI STREQUAL "armeabi-v6 with VFP" )
+ set( ARMEABI_V6 true )
+ set( ANDROID_NDK_ABI_NAME "armeabi" )
+ set( ANDROID_ARCH_NAME "arm" )
+ set( ANDROID_ARCH_FULLNAME "arm" )
+ set( ANDROID_LLVM_TRIPLE "armv5te-none-linux-androideabi" )
+ set( CMAKE_SYSTEM_PROCESSOR "armv6" )
+ # need always fallback to older platform
+ set( ARMEABI true )
+elseif( ANDROID_ABI STREQUAL "armeabi-v7a")
+ set( ARMEABI_V7A true )
+ set( ANDROID_NDK_ABI_NAME "armeabi-v7a" )
+ set( ANDROID_ARCH_NAME "arm" )
+ set( ANDROID_ARCH_FULLNAME "arm" )
+ set( ANDROID_LLVM_TRIPLE "armv7-none-linux-androideabi" )
+ set( CMAKE_SYSTEM_PROCESSOR "armv7-a" )
+elseif( ANDROID_ABI STREQUAL "armeabi-v7a with VFPV3" )
+ set( ARMEABI_V7A true )
+ set( ANDROID_NDK_ABI_NAME "armeabi-v7a" )
+ set( ANDROID_ARCH_NAME "arm" )
+ set( ANDROID_ARCH_FULLNAME "arm" )
+ set( ANDROID_LLVM_TRIPLE "armv7-none-linux-androideabi" )
+ set( CMAKE_SYSTEM_PROCESSOR "armv7-a" )
+ set( VFPV3 true )
+elseif( ANDROID_ABI STREQUAL "armeabi-v7a with NEON" )
+ set( ARMEABI_V7A true )
+ set( ANDROID_NDK_ABI_NAME "armeabi-v7a" )
+ set( ANDROID_ARCH_NAME "arm" )
+ set( ANDROID_ARCH_FULLNAME "arm" )
+ set( ANDROID_LLVM_TRIPLE "armv7-none-linux-androideabi" )
+ set( CMAKE_SYSTEM_PROCESSOR "armv7-a" )
+ set( VFPV3 true )
+ set( NEON true )
+else()
+ message( SEND_ERROR "Unknown ANDROID_ABI=\"${ANDROID_ABI}\" is specified." )
+endif()
+
+if( CMAKE_BINARY_DIR AND EXISTS "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeSystem.cmake" )
+ # really dirty hack
+ # it is not possible to change CMAKE_SYSTEM_PROCESSOR after the first run...
+ file( APPEND "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeSystem.cmake" "SET(CMAKE_SYSTEM_PROCESSOR \"${CMAKE_SYSTEM_PROCESSOR}\")\n" )
+endif()
+
+if( ANDROID_ARCH_NAME STREQUAL "arm" AND NOT ARMEABI_V6 )
+ __INIT_VARIABLE( ANDROID_FORCE_ARM_BUILD OBSOLETE_FORCE_ARM VALUES OFF )
+ set( ANDROID_FORCE_ARM_BUILD ${ANDROID_FORCE_ARM_BUILD} CACHE BOOL "Use 32-bit ARM instructions instead of Thumb-1" FORCE )
+ mark_as_advanced( ANDROID_FORCE_ARM_BUILD )
+else()
+ unset( ANDROID_FORCE_ARM_BUILD CACHE )
+endif()
+
+# choose toolchain
+if( ANDROID_TOOLCHAIN_NAME )
+ list( FIND __availableToolchains "${ANDROID_TOOLCHAIN_NAME}" __toolchainIdx )
+ if( __toolchainIdx EQUAL -1 )
+ list( SORT __availableToolchains )
+ string( REPLACE ";" "\n * " toolchains_list "${__availableToolchains}" )
+ set( toolchains_list " * ${toolchains_list}")
+ message( FATAL_ERROR "Specified toolchain \"${ANDROID_TOOLCHAIN_NAME}\" is missing in your NDK or broken. Please verify that your NDK is working or select another compiler toolchain.
+To configure the toolchain set CMake variable ANDROID_TOOLCHAIN_NAME to one of the following values:\n${toolchains_list}\n" )
+ endif()
+ list( GET __availableToolchainArchs ${__toolchainIdx} __toolchainArch )
+ if( NOT __toolchainArch STREQUAL ANDROID_ARCH_FULLNAME )
+ message( SEND_ERROR "Selected toolchain \"${ANDROID_TOOLCHAIN_NAME}\" is not able to compile binaries for the \"${ANDROID_ARCH_NAME}\" platform." )
+ endif()
+else()
+ set( __toolchainIdx -1 )
+ set( __applicableToolchains "" )
+ set( __toolchainMaxVersion "0.0.0" )
+ list( LENGTH __availableToolchains __availableToolchainsCount )
+ math( EXPR __availableToolchainsCount "${__availableToolchainsCount}-1" )
+ foreach( __idx RANGE ${__availableToolchainsCount} )
+ list( GET __availableToolchainArchs ${__idx} __toolchainArch )
+ if( __toolchainArch STREQUAL ANDROID_ARCH_FULLNAME )
+ list( GET __availableToolchainCompilerVersions ${__idx} __toolchainVersion )
+ if( __toolchainVersion VERSION_GREATER __toolchainMaxVersion )
+ set( __toolchainMaxVersion "${__toolchainVersion}" )
+ set( __toolchainIdx ${__idx} )
+ endif()
+ endif()
+ endforeach()
+ unset( __availableToolchainsCount )
+ unset( __toolchainMaxVersion )
+ unset( __toolchainVersion )
+endif()
+unset( __toolchainArch )
+if( __toolchainIdx EQUAL -1 )
+ message( FATAL_ERROR "No one of available compiler toolchains is able to compile for ${ANDROID_ARCH_NAME} platform." )
+endif()
+list( GET __availableToolchains ${__toolchainIdx} ANDROID_TOOLCHAIN_NAME )
+list( GET __availableToolchainMachines ${__toolchainIdx} ANDROID_TOOLCHAIN_MACHINE_NAME )
+list( GET __availableToolchainCompilerVersions ${__toolchainIdx} ANDROID_COMPILER_VERSION )
+
+unset( __toolchainIdx )
+unset( __availableToolchains )
+unset( __availableToolchainMachines )
+unset( __availableToolchainArchs )
+unset( __availableToolchainCompilerVersions )
+
+# choose native API level
+__INIT_VARIABLE( ANDROID_NATIVE_API_LEVEL ENV_ANDROID_NATIVE_API_LEVEL ANDROID_API_LEVEL ENV_ANDROID_API_LEVEL ANDROID_STANDALONE_TOOLCHAIN_API_LEVEL ANDROID_DEFAULT_NDK_API_LEVEL_${ANDROID_ARCH_NAME} ANDROID_DEFAULT_NDK_API_LEVEL )
+string( REGEX MATCH "[0-9]+" ANDROID_NATIVE_API_LEVEL "${ANDROID_NATIVE_API_LEVEL}" )
+# adjust API level
+set( __real_api_level ${ANDROID_DEFAULT_NDK_API_LEVEL_${ANDROID_ARCH_NAME}} )
+foreach( __level ${ANDROID_SUPPORTED_NATIVE_API_LEVELS} )
+ if( NOT __level GREATER ANDROID_NATIVE_API_LEVEL AND NOT __level LESS __real_api_level )
+ set( __real_api_level ${__level} )
+ endif()
+endforeach()
+if( __real_api_level AND NOT ANDROID_NATIVE_API_LEVEL EQUAL __real_api_level )
+ message( STATUS "Adjusting Android API level 'android-${ANDROID_NATIVE_API_LEVEL}' to 'android-${__real_api_level}'")
+ set( ANDROID_NATIVE_API_LEVEL ${__real_api_level} )
+endif()
+unset(__real_api_level)
+# validate
+list( FIND ANDROID_SUPPORTED_NATIVE_API_LEVELS "${ANDROID_NATIVE_API_LEVEL}" __levelIdx )
+if( __levelIdx EQUAL -1 )
+ message( SEND_ERROR "Specified Android native API level 'android-${ANDROID_NATIVE_API_LEVEL}' is not supported by your NDK/toolchain." )
+else()
+ if( BUILD_WITH_ANDROID_NDK )
+ __DETECT_NATIVE_API_LEVEL( __realApiLevel "${ANDROID_NDK}/platforms/android-${ANDROID_NATIVE_API_LEVEL}/arch-${ANDROID_ARCH_NAME}/usr/include/android/api-level.h" )
+ if( NOT __realApiLevel EQUAL ANDROID_NATIVE_API_LEVEL )
+ message( SEND_ERROR "Specified Android API level (${ANDROID_NATIVE_API_LEVEL}) does not match to the level found (${__realApiLevel}). Probably your copy of NDK is broken." )
+ endif()
+ unset( __realApiLevel )
+ endif()
+ set( ANDROID_NATIVE_API_LEVEL "${ANDROID_NATIVE_API_LEVEL}" CACHE STRING "Android API level for native code" FORCE )
+ if( CMAKE_VERSION VERSION_GREATER "2.8" )
+ list( SORT ANDROID_SUPPORTED_NATIVE_API_LEVELS )
+ set_property( CACHE ANDROID_NATIVE_API_LEVEL PROPERTY STRINGS ${ANDROID_SUPPORTED_NATIVE_API_LEVELS} )
+ endif()
+endif()
+unset( __levelIdx )
+
+
+# remember target ABI
+set( ANDROID_ABI "${ANDROID_ABI}" CACHE STRING "The target ABI for Android. If arm, then armeabi-v7a is recommended for hardware floating point." FORCE )
+if( CMAKE_VERSION VERSION_GREATER "2.8" )
+ list( SORT ANDROID_SUPPORTED_ABIS_${ANDROID_ARCH_FULLNAME} )
+ set_property( CACHE ANDROID_ABI PROPERTY STRINGS ${ANDROID_SUPPORTED_ABIS_${ANDROID_ARCH_FULLNAME}} )
+endif()
+
+
+# runtime choice (STL, rtti, exceptions)
+if( NOT ANDROID_STL )
+ # honor legacy ANDROID_USE_STLPORT
+ if( DEFINED ANDROID_USE_STLPORT )
+ if( ANDROID_USE_STLPORT )
+ set( ANDROID_STL stlport_static )
+ endif()
+ message( WARNING "You are using an obsolete variable ANDROID_USE_STLPORT to select the STL variant. Use -DANDROID_STL=stlport_static instead." )
+ endif()
+ if( NOT ANDROID_STL )
+ set( ANDROID_STL gnustl_static )
+ endif()
+endif()
+set( ANDROID_STL "${ANDROID_STL}" CACHE STRING "C++ runtime" )
+set( ANDROID_STL_FORCE_FEATURES ON CACHE BOOL "automatically configure rtti and exceptions support based on C++ runtime" )
+mark_as_advanced( ANDROID_STL ANDROID_STL_FORCE_FEATURES )
+
+if( BUILD_WITH_ANDROID_NDK )
+ if( NOT "${ANDROID_STL}" MATCHES "^(none|system|system_re|gabi\\+\\+_static|gabi\\+\\+_shared|stlport_static|stlport_shared|gnustl_static|gnustl_shared)$")
+ message( FATAL_ERROR "ANDROID_STL is set to invalid value \"${ANDROID_STL}\".
+The possible values are:
+ none -> Do not configure the runtime.
+ system -> Use the default minimal system C++ runtime library.
+ system_re -> Same as system but with rtti and exceptions.
+ gabi++_static -> Use the GAbi++ runtime as a static library.
+ gabi++_shared -> Use the GAbi++ runtime as a shared library.
+ stlport_static -> Use the STLport runtime as a static library.
+ stlport_shared -> Use the STLport runtime as a shared library.
+ gnustl_static -> (default) Use the GNU STL as a static library.
+ gnustl_shared -> Use the GNU STL as a shared library.
+" )
+ endif()
+elseif( BUILD_WITH_STANDALONE_TOOLCHAIN )
+ if( NOT "${ANDROID_STL}" MATCHES "^(none|gnustl_static|gnustl_shared)$")
+ message( FATAL_ERROR "ANDROID_STL is set to invalid value \"${ANDROID_STL}\".
+The possible values are:
+ none -> Do not configure the runtime.
+ gnustl_static -> (default) Use the GNU STL as a static library.
+ gnustl_shared -> Use the GNU STL as a shared library.
+" )
+ endif()
+endif()
+
+unset( ANDROID_RTTI )
+unset( ANDROID_EXCEPTIONS )
+unset( ANDROID_STL_INCLUDE_DIRS )
+unset( __libstl )
+unset( __libsupcxx )
+
+if( NOT _CMAKE_IN_TRY_COMPILE AND ANDROID_NDK_RELEASE STREQUAL "r7b" AND ARMEABI_V7A AND NOT VFPV3 AND ANDROID_STL MATCHES "gnustl" )
+ message( WARNING "The GNU STL armeabi-v7a binaries from NDK r7b can crash non-NEON devices. The files provided with NDK r7b were not configured properly, resulting in crashes on Tegra2-based devices and others when trying to use certain floating-point functions (e.g., cosf, sinf, expf).
+You are strongly recommended to switch to another NDK release.
+" )
+endif()
+
+if( NOT _CMAKE_IN_TRY_COMPILE AND X86 AND ANDROID_STL MATCHES "gnustl" AND ANDROID_NDK_RELEASE STREQUAL "r6" )
+ message( WARNING "The x86 system header file from NDK r6 has incorrect definition for ptrdiff_t. You are recommended to upgrade to a newer NDK release or manually patch the header:
+See https://android.googlesource.com/platform/development.git f907f4f9d4e56ccc8093df6fee54454b8bcab6c2
+ diff --git a/ndk/platforms/android-9/arch-x86/include/machine/_types.h b/ndk/platforms/android-9/arch-x86/include/machine/_types.h
+ index 5e28c64..65892a1 100644
+ --- a/ndk/platforms/android-9/arch-x86/include/machine/_types.h
+ +++ b/ndk/platforms/android-9/arch-x86/include/machine/_types.h
+ @@ -51,7 +51,11 @@ typedef long int ssize_t;
+ #endif
+ #ifndef _PTRDIFF_T
+ #define _PTRDIFF_T
+ -typedef long ptrdiff_t;
+ +# ifdef __ANDROID__
+ + typedef int ptrdiff_t;
+ +# else
+ + typedef long ptrdiff_t;
+ +# endif
+ #endif
+" )
+endif()
+
+
+# setup paths and STL for standalone toolchain
+if( BUILD_WITH_STANDALONE_TOOLCHAIN )
+ set( ANDROID_TOOLCHAIN_ROOT "${ANDROID_STANDALONE_TOOLCHAIN}" )
+ set( ANDROID_CLANG_TOOLCHAIN_ROOT "${ANDROID_STANDALONE_TOOLCHAIN}" )
+ set( ANDROID_SYSROOT "${ANDROID_STANDALONE_TOOLCHAIN}/sysroot" )
+
+ if( NOT ANDROID_STL STREQUAL "none" )
+ set( ANDROID_STL_INCLUDE_DIRS "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/include/c++/${ANDROID_COMPILER_VERSION}" )
+ if( ARMEABI_V7A AND EXISTS "${ANDROID_STL_INCLUDE_DIRS}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/${CMAKE_SYSTEM_PROCESSOR}/bits" )
+ list( APPEND ANDROID_STL_INCLUDE_DIRS "${ANDROID_STL_INCLUDE_DIRS}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/${CMAKE_SYSTEM_PROCESSOR}" )
+ elseif( ARMEABI AND NOT ANDROID_FORCE_ARM_BUILD AND EXISTS "${ANDROID_STL_INCLUDE_DIRS}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/thumb/bits" )
+ list( APPEND ANDROID_STL_INCLUDE_DIRS "${ANDROID_STL_INCLUDE_DIRS}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/thumb" )
+ else()
+ list( APPEND ANDROID_STL_INCLUDE_DIRS "${ANDROID_STL_INCLUDE_DIRS}/${ANDROID_TOOLCHAIN_MACHINE_NAME}" )
+ endif()
+ # always search static GNU STL to get the location of libsupc++.a
+ if( ARMEABI_V7A AND NOT ANDROID_FORCE_ARM_BUILD AND EXISTS "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/thumb/libstdc++.a" )
+ set( __libstl "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/thumb" )
+ elseif( ARMEABI_V7A AND EXISTS "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/libstdc++.a" )
+ set( __libstl "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}" )
+ elseif( ARMEABI AND NOT ANDROID_FORCE_ARM_BUILD AND EXISTS "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/thumb/libstdc++.a" )
+ set( __libstl "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/thumb" )
+ elseif( EXISTS "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/libstdc++.a" )
+ set( __libstl "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib" )
+ endif()
+ if( __libstl )
+ set( __libsupcxx "${__libstl}/libsupc++.a" )
+ set( __libstl "${__libstl}/libstdc++.a" )
+ endif()
+ if( NOT EXISTS "${__libsupcxx}" )
+ message( FATAL_ERROR "The required libstdsupc++.a is missing in your standalone toolchain.
+ Usually it happens because of bug in make-standalone-toolchain.sh script from NDK r7, r7b and r7c.
+ You need to either upgrade to newer NDK or manually copy
+ $ANDROID_NDK/sources/cxx-stl/gnu-libstdc++/libs/${ANDROID_NDK_ABI_NAME}/libsupc++.a
+ to
+ ${__libsupcxx}
+ " )
+ endif()
+ if( ANDROID_STL STREQUAL "gnustl_shared" )
+ if( ARMEABI_V7A AND EXISTS "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/libgnustl_shared.so" )
+ set( __libstl "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/libgnustl_shared.so" )
+ elseif( ARMEABI AND NOT ANDROID_FORCE_ARM_BUILD AND EXISTS "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/thumb/libgnustl_shared.so" )
+ set( __libstl "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/thumb/libgnustl_shared.so" )
+ elseif( EXISTS "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/libgnustl_shared.so" )
+ set( __libstl "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/libgnustl_shared.so" )
+ endif()
+ endif()
+ endif()
+endif()
+
+# clang
+if( "${ANDROID_TOOLCHAIN_NAME}" STREQUAL "standalone-clang" )
+ set( ANDROID_COMPILER_IS_CLANG 1 )
+ execute_process( COMMAND "${ANDROID_CLANG_TOOLCHAIN_ROOT}/bin/clang${TOOL_OS_SUFFIX}" --version OUTPUT_VARIABLE ANDROID_CLANG_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE )
+ string( REGEX MATCH "[0-9]+[.][0-9]+" ANDROID_CLANG_VERSION "${ANDROID_CLANG_VERSION}")
+elseif( "${ANDROID_TOOLCHAIN_NAME}" MATCHES "-clang3[.][0-9]?$" )
+ string( REGEX MATCH "3[.][0-9]$" ANDROID_CLANG_VERSION "${ANDROID_TOOLCHAIN_NAME}")
+ string( REGEX REPLACE "-clang${ANDROID_CLANG_VERSION}$" "-4.6" ANDROID_GCC_TOOLCHAIN_NAME "${ANDROID_TOOLCHAIN_NAME}" )
+ if( NOT EXISTS "${ANDROID_NDK}/toolchains/llvm-${ANDROID_CLANG_VERSION}/prebuilt/${ANDROID_NDK_HOST_SYSTEM_NAME}/bin/clang${TOOL_OS_SUFFIX}" )
+ message( FATAL_ERROR "Could not find the Clang compiler driver" )
+ endif()
+ set( ANDROID_COMPILER_IS_CLANG 1 )
+ set( ANDROID_CLANG_TOOLCHAIN_ROOT "${ANDROID_NDK}/toolchains/llvm-${ANDROID_CLANG_VERSION}/prebuilt/${ANDROID_NDK_HOST_SYSTEM_NAME}" )
+else()
+ set( ANDROID_GCC_TOOLCHAIN_NAME "${ANDROID_TOOLCHAIN_NAME}" )
+ unset( ANDROID_COMPILER_IS_CLANG CACHE )
+endif()
+
+string( REPLACE "." "" _clang_name "clang${ANDROID_CLANG_VERSION}" )
+if( NOT EXISTS "${ANDROID_CLANG_TOOLCHAIN_ROOT}/bin/${_clang_name}${TOOL_OS_SUFFIX}" )
+ set( _clang_name "clang" )
+endif()
+
+
+# setup paths and STL for NDK
+if( BUILD_WITH_ANDROID_NDK )
+ set( ANDROID_TOOLCHAIN_ROOT "${ANDROID_NDK}/toolchains/${ANDROID_GCC_TOOLCHAIN_NAME}/prebuilt/${ANDROID_NDK_HOST_SYSTEM_NAME}" )
+ set( ANDROID_SYSROOT "${ANDROID_NDK}/platforms/android-${ANDROID_NATIVE_API_LEVEL}/arch-${ANDROID_ARCH_NAME}" )
+
+ if( ANDROID_STL STREQUAL "none" )
+ # do nothing
+ elseif( ANDROID_STL STREQUAL "system" )
+ set( ANDROID_RTTI OFF )
+ set( ANDROID_EXCEPTIONS OFF )
+ set( ANDROID_STL_INCLUDE_DIRS "${ANDROID_NDK}/sources/cxx-stl/system/include" )
+ elseif( ANDROID_STL STREQUAL "system_re" )
+ set( ANDROID_RTTI ON )
+ set( ANDROID_EXCEPTIONS ON )
+ set( ANDROID_STL_INCLUDE_DIRS "${ANDROID_NDK}/sources/cxx-stl/system/include" )
+ elseif( ANDROID_STL MATCHES "gabi" )
+ if( ANDROID_NDK_RELEASE STRLESS "r7" )
+ message( FATAL_ERROR "gabi++ is not awailable in your NDK. You have to upgrade to NDK r7 or newer to use gabi++.")
+ endif()
+ set( ANDROID_RTTI ON )
+ set( ANDROID_EXCEPTIONS OFF )
+ set( ANDROID_STL_INCLUDE_DIRS "${ANDROID_NDK}/sources/cxx-stl/gabi++/include" )
+ set( __libstl "${ANDROID_NDK}/sources/cxx-stl/gabi++/libs/${ANDROID_NDK_ABI_NAME}/libgabi++_static.a" )
+ elseif( ANDROID_STL MATCHES "stlport" )
+ if( NOT ANDROID_NDK_RELEASE STRLESS "r8d" )
+ set( ANDROID_EXCEPTIONS ON )
+ else()
+ set( ANDROID_EXCEPTIONS OFF )
+ endif()
+ if( ANDROID_NDK_RELEASE STRLESS "r7" )
+ set( ANDROID_RTTI OFF )
+ else()
+ set( ANDROID_RTTI ON )
+ endif()
+ set( ANDROID_STL_INCLUDE_DIRS "${ANDROID_NDK}/sources/cxx-stl/stlport/stlport" )
+ set( __libstl "${ANDROID_NDK}/sources/cxx-stl/stlport/libs/${ANDROID_NDK_ABI_NAME}/libstlport_static.a" )
+ elseif( ANDROID_STL MATCHES "gnustl" )
+ set( ANDROID_EXCEPTIONS ON )
+ set( ANDROID_RTTI ON )
+ if( EXISTS "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/${ANDROID_COMPILER_VERSION}" )
+ if( ARMEABI_V7A AND ANDROID_COMPILER_VERSION VERSION_EQUAL "4.7" AND ANDROID_NDK_RELEASE STREQUAL "r8d" )
+ # gnustl binary for 4.7 compiler is buggy :(
+ # TODO: look for right fix
+ set( __libstl "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/4.6" )
+ else()
+ set( __libstl "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/${ANDROID_COMPILER_VERSION}" )
+ endif()
+ else()
+ set( __libstl "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++" )
+ endif()
+ set( ANDROID_STL_INCLUDE_DIRS "${__libstl}/include" "${__libstl}/libs/${ANDROID_NDK_ABI_NAME}/include" )
+ if( EXISTS "${__libstl}/libs/${ANDROID_NDK_ABI_NAME}/libgnustl_static.a" )
+ set( __libstl "${__libstl}/libs/${ANDROID_NDK_ABI_NAME}/libgnustl_static.a" )
+ else()
+ set( __libstl "${__libstl}/libs/${ANDROID_NDK_ABI_NAME}/libstdc++.a" )
+ endif()
+ else()
+ message( FATAL_ERROR "Unknown runtime: ${ANDROID_STL}" )
+ endif()
+ # find libsupc++.a - rtti & exceptions
+ if( ANDROID_STL STREQUAL "system_re" OR ANDROID_STL MATCHES "gnustl" )
+ if( ANDROID_NDK_RELEASE STRGREATER "r8" ) # r8b
+ set( __libsupcxx "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/${ANDROID_COMPILER_VERSION}/libs/${ANDROID_NDK_ABI_NAME}/libsupc++.a" )
+ elseif( NOT ANDROID_NDK_RELEASE STRLESS "r7" AND ANDROID_NDK_RELEASE STRLESS "r8b")
+ set( __libsupcxx "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/libs/${ANDROID_NDK_ABI_NAME}/libsupc++.a" )
+ else( ANDROID_NDK_RELEASE STRLESS "r7" )
+ if( ARMEABI_V7A )
+ if( ANDROID_FORCE_ARM_BUILD )
+ set( __libsupcxx "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/libsupc++.a" )
+ else()
+ set( __libsupcxx "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/thumb/libsupc++.a" )
+ endif()
+ elseif( ARMEABI AND NOT ANDROID_FORCE_ARM_BUILD )
+ set( __libsupcxx "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/thumb/libsupc++.a" )
+ else()
+ set( __libsupcxx "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/libsupc++.a" )
+ endif()
+ endif()
+ if( NOT EXISTS "${__libsupcxx}")
+ message( ERROR "Could not find libsupc++.a for a chosen platform. Either your NDK is not supported or is broken.")
+ endif()
+ endif()
+endif()
+
+
+# case of shared STL linkage
+if( ANDROID_STL MATCHES "shared" AND DEFINED __libstl )
+ string( REPLACE "_static.a" "_shared.so" __libstl "${__libstl}" )
+ if( NOT _CMAKE_IN_TRY_COMPILE AND __libstl MATCHES "[.]so$" )
+ get_filename_component( __libstlname "${__libstl}" NAME )
+ execute_process( COMMAND "${CMAKE_COMMAND}" -E copy_if_different "${__libstl}" "${LIBRARY_OUTPUT_PATH}/${__libstlname}" RESULT_VARIABLE __fileCopyProcess )
+ if( NOT __fileCopyProcess EQUAL 0 OR NOT EXISTS "${LIBRARY_OUTPUT_PATH}/${__libstlname}")
+ message( SEND_ERROR "Failed copying of ${__libstl} to the ${LIBRARY_OUTPUT_PATH}/${__libstlname}" )
+ endif()
+ unset( __fileCopyProcess )
+ unset( __libstlname )
+ endif()
+endif()
+
+
+# ccache support
+__INIT_VARIABLE( _ndk_ccache NDK_CCACHE ENV_NDK_CCACHE )
+if( _ndk_ccache )
+ if( DEFINED NDK_CCACHE AND NOT EXISTS NDK_CCACHE )
+ unset( NDK_CCACHE CACHE )
+ endif()
+ find_program( NDK_CCACHE "${_ndk_ccache}" DOC "The path to ccache binary")
+else()
+ unset( NDK_CCACHE CACHE )
+endif()
+unset( _ndk_ccache )
+
+
+# setup the cross-compiler
+if( NOT CMAKE_C_COMPILER )
+ if( NDK_CCACHE )
+ set( CMAKE_C_COMPILER "${NDK_CCACHE}" CACHE PATH "ccache as C compiler" )
+ set( CMAKE_CXX_COMPILER "${NDK_CCACHE}" CACHE PATH "ccache as C++ compiler" )
+ if( ANDROID_COMPILER_IS_CLANG )
+ set( CMAKE_C_COMPILER_ARG1 "${ANDROID_CLANG_TOOLCHAIN_ROOT}/bin/${_clang_name}${TOOL_OS_SUFFIX}" CACHE PATH "C compiler")
+ set( CMAKE_CXX_COMPILER_ARG1 "${ANDROID_CLANG_TOOLCHAIN_ROOT}/bin/${_clang_name}++${TOOL_OS_SUFFIX}" CACHE PATH "C++ compiler")
+ else()
+ set( CMAKE_C_COMPILER_ARG1 "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-gcc${TOOL_OS_SUFFIX}" CACHE PATH "C compiler")
+ set( CMAKE_CXX_COMPILER_ARG1 "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-g++${TOOL_OS_SUFFIX}" CACHE PATH "C++ compiler")
+ endif()
+ else()
+ if( ANDROID_COMPILER_IS_CLANG )
+ set( CMAKE_C_COMPILER "${ANDROID_CLANG_TOOLCHAIN_ROOT}/bin/${_clang_name}${TOOL_OS_SUFFIX}" CACHE PATH "C compiler")
+ set( CMAKE_CXX_COMPILER "${ANDROID_CLANG_TOOLCHAIN_ROOT}/bin/${_clang_name}++${TOOL_OS_SUFFIX}" CACHE PATH "C++ compiler")
+ else()
+ set( CMAKE_C_COMPILER "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-gcc${TOOL_OS_SUFFIX}" CACHE PATH "C compiler" )
+ set( CMAKE_CXX_COMPILER "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-g++${TOOL_OS_SUFFIX}" CACHE PATH "C++ compiler" )
+ endif()
+ endif()
+ set( CMAKE_ASM_COMPILER "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-gcc${TOOL_OS_SUFFIX}" CACHE PATH "assembler" )
+ set( CMAKE_STRIP "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-strip${TOOL_OS_SUFFIX}" CACHE PATH "strip" )
+ set( CMAKE_AR "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-ar${TOOL_OS_SUFFIX}" CACHE PATH "archive" )
+ set( CMAKE_LINKER "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-ld${TOOL_OS_SUFFIX}" CACHE PATH "linker" )
+ set( CMAKE_NM "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-nm${TOOL_OS_SUFFIX}" CACHE PATH "nm" )
+ set( CMAKE_OBJCOPY "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-objcopy${TOOL_OS_SUFFIX}" CACHE PATH "objcopy" )
+ set( CMAKE_OBJDUMP "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-objdump${TOOL_OS_SUFFIX}" CACHE PATH "objdump" )
+ set( CMAKE_RANLIB "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-ranlib${TOOL_OS_SUFFIX}" CACHE PATH "ranlib" )
+endif()
+
+set( _CMAKE_TOOLCHAIN_PREFIX "${ANDROID_TOOLCHAIN_MACHINE_NAME}-" )
+if( CMAKE_VERSION VERSION_LESS 2.8.5 )
+ set( CMAKE_ASM_COMPILER_ARG1 "-c" )
+endif()
+if( APPLE )
+ find_program( CMAKE_INSTALL_NAME_TOOL NAMES install_name_tool )
+ if( NOT CMAKE_INSTALL_NAME_TOOL )
+ message( FATAL_ERROR "Could not find install_name_tool, please check your installation." )
+ endif()
+ mark_as_advanced( CMAKE_INSTALL_NAME_TOOL )
+endif()
+
+# Force set compilers because standard identification works badly for us
+include( CMakeForceCompiler )
+CMAKE_FORCE_C_COMPILER( "${CMAKE_C_COMPILER}" GNU )
+if( ANDROID_COMPILER_IS_CLANG )
+ set( CMAKE_C_COMPILER_ID Clang)
+endif()
+set( CMAKE_C_PLATFORM_ID Linux )
+set( CMAKE_C_SIZEOF_DATA_PTR 4 )
+set( CMAKE_C_HAS_ISYSROOT 1 )
+set( CMAKE_C_COMPILER_ABI ELF )
+CMAKE_FORCE_CXX_COMPILER( "${CMAKE_CXX_COMPILER}" GNU )
+if( ANDROID_COMPILER_IS_CLANG )
+ set( CMAKE_CXX_COMPILER_ID Clang)
+endif()
+set( CMAKE_CXX_PLATFORM_ID Linux )
+set( CMAKE_CXX_SIZEOF_DATA_PTR 4 )
+set( CMAKE_CXX_HAS_ISYSROOT 1 )
+set( CMAKE_CXX_COMPILER_ABI ELF )
+set( CMAKE_CXX_SOURCE_FILE_EXTENSIONS cc cp cxx cpp CPP c++ C )
+# force ASM compiler (required for CMake < 2.8.5)
+set( CMAKE_ASM_COMPILER_ID_RUN TRUE )
+set( CMAKE_ASM_COMPILER_ID GNU )
+set( CMAKE_ASM_COMPILER_WORKS TRUE )
+set( CMAKE_ASM_COMPILER_FORCED TRUE )
+set( CMAKE_COMPILER_IS_GNUASM 1)
+set( CMAKE_ASM_SOURCE_FILE_EXTENSIONS s S asm )
+
+# flags and definitions
+remove_definitions( -DANDROID )
+add_definitions( -DANDROID )
+
+if(ANDROID_SYSROOT MATCHES "[ ;\"]")
+ set( ANDROID_CXX_FLAGS "--sysroot=\"${ANDROID_SYSROOT}\"" )
+ if( NOT _CMAKE_IN_TRY_COMPILE )
+ # quotes will break try_compile and compiler identification
+ message(WARNING "Your Android system root has non-alphanumeric symbols. It can break compiler features detection and the whole build.")
+ endif()
+else()
+ set( ANDROID_CXX_FLAGS "--sysroot=${ANDROID_SYSROOT}" )
+endif()
+
+# NDK flags
+if( ARMEABI OR ARMEABI_V7A )
+ set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -fpic -funwind-tables" )
+ if( NOT ANDROID_FORCE_ARM_BUILD AND NOT ARMEABI_V6 )
+ set( ANDROID_CXX_FLAGS_RELEASE "-mthumb -fomit-frame-pointer -fno-strict-aliasing" )
+ set( ANDROID_CXX_FLAGS_DEBUG "-marm -fno-omit-frame-pointer -fno-strict-aliasing" )
+ if( NOT ANDROID_COMPILER_IS_CLANG )
+ set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -finline-limit=64" )
+ endif()
+ else()
+ # always compile ARMEABI_V6 in arm mode; otherwise there is no difference from ARMEABI
+ set( ANDROID_CXX_FLAGS_RELEASE "-marm -fomit-frame-pointer -fstrict-aliasing" )
+ set( ANDROID_CXX_FLAGS_DEBUG "-marm -fno-omit-frame-pointer -fno-strict-aliasing" )
+ if( NOT ANDROID_COMPILER_IS_CLANG )
+ set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -funswitch-loops -finline-limit=300" )
+ endif()
+ endif()
+elseif( X86 )
+ set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -funwind-tables" )
+ if( NOT ANDROID_COMPILER_IS_CLANG )
+ set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -funswitch-loops -finline-limit=300" )
+ else()
+ set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -fPIC" )
+ endif()
+ set( ANDROID_CXX_FLAGS_RELEASE "-fomit-frame-pointer -fstrict-aliasing" )
+ set( ANDROID_CXX_FLAGS_DEBUG "-fno-omit-frame-pointer -fno-strict-aliasing" )
+elseif( MIPS )
+ set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -fpic -fno-strict-aliasing -finline-functions -ffunction-sections -funwind-tables -fmessage-length=0" )
+ set( ANDROID_CXX_FLAGS_RELEASE "-fomit-frame-pointer" )
+ set( ANDROID_CXX_FLAGS_DEBUG "-fno-omit-frame-pointer" )
+ if( NOT ANDROID_COMPILER_IS_CLANG )
+ set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -fno-inline-functions-called-once -fgcse-after-reload -frerun-cse-after-loop -frename-registers" )
+ set( ANDROID_CXX_FLAGS_RELEASE "${ANDROID_CXX_FLAGS_RELEASE} -funswitch-loops -finline-limit=300" )
+ endif()
+elseif()
+ set( ANDROID_CXX_FLAGS_RELEASE "" )
+ set( ANDROID_CXX_FLAGS_DEBUG "" )
+endif()
+
+set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -fsigned-char" ) # good/necessary when porting desktop libraries
+
+if( NOT X86 AND NOT ANDROID_COMPILER_IS_CLANG )
+ set( ANDROID_CXX_FLAGS "-Wno-psabi ${ANDROID_CXX_FLAGS}" )
+endif()
+
+if( NOT ANDROID_COMPILER_VERSION VERSION_LESS "4.6" )
+ set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -no-canonical-prefixes" ) # see https://android-review.googlesource.com/#/c/47564/
+endif()
+
+# ABI-specific flags
+if( ARMEABI_V7A )
+ set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -march=armv7-a -mfloat-abi=softfp" )
+ if( NEON )
+ set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -mfpu=neon" )
+ elseif( VFPV3 )
+ set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -mfpu=vfpv3" )
+ else()
+ set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -mfpu=vfpv3-d16" )
+ endif()
+elseif( ARMEABI_V6 )
+ set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -march=armv6 -mfloat-abi=softfp -mfpu=vfp" ) # vfp == vfpv2
+elseif( ARMEABI )
+ set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -march=armv5te -mtune=xscale -msoft-float" )
+endif()
+
+# STL
+if( EXISTS "${__libstl}" OR EXISTS "${__libsupcxx}" )
+ if( ANDROID_STL MATCHES "gnustl" )
+ set( CMAKE_CXX_CREATE_SHARED_LIBRARY "<CMAKE_C_COMPILER> <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>" )
+ set( CMAKE_CXX_CREATE_SHARED_MODULE "<CMAKE_C_COMPILER> <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>" )
+ set( CMAKE_CXX_LINK_EXECUTABLE "<CMAKE_C_COMPILER> <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>" )
+ else()
+ set( CMAKE_CXX_CREATE_SHARED_LIBRARY "<CMAKE_CXX_COMPILER> <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>" )
+ set( CMAKE_CXX_CREATE_SHARED_MODULE "<CMAKE_CXX_COMPILER> <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>" )
+ set( CMAKE_CXX_LINK_EXECUTABLE "<CMAKE_CXX_COMPILER> <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>" )
+ endif()
+ if ( X86 AND ANDROID_STL MATCHES "gnustl" AND ANDROID_NDK_RELEASE STREQUAL "r6" )
+ # workaround "undefined reference to `__dso_handle'" problem
+ set( CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_LIBRARY} \"${ANDROID_SYSROOT}/usr/lib/crtbegin_so.o\"" )
+ set( CMAKE_CXX_CREATE_SHARED_MODULE "${CMAKE_CXX_CREATE_SHARED_MODULE} \"${ANDROID_SYSROOT}/usr/lib/crtbegin_so.o\"" )
+ endif()
+ if( EXISTS "${__libstl}" )
+ set( CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_LIBRARY} \"${__libstl}\"" )
+ set( CMAKE_CXX_CREATE_SHARED_MODULE "${CMAKE_CXX_CREATE_SHARED_MODULE} \"${__libstl}\"" )
+ set( CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} \"${__libstl}\"" )
+ endif()
+ if( EXISTS "${__libsupcxx}" )
+ set( CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_LIBRARY} \"${__libsupcxx}\"" )
+ set( CMAKE_CXX_CREATE_SHARED_MODULE "${CMAKE_CXX_CREATE_SHARED_MODULE} \"${__libsupcxx}\"" )
+ set( CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} \"${__libsupcxx}\"" )
+ # C objects:
+ set( CMAKE_C_CREATE_SHARED_LIBRARY "<CMAKE_C_COMPILER> <CMAKE_SHARED_LIBRARY_C_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_C_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>" )
+ set( CMAKE_C_CREATE_SHARED_MODULE "<CMAKE_C_COMPILER> <CMAKE_SHARED_LIBRARY_C_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_C_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>" )
+ set( CMAKE_C_LINK_EXECUTABLE "<CMAKE_C_COMPILER> <FLAGS> <CMAKE_C_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>" )
+ set( CMAKE_C_CREATE_SHARED_LIBRARY "${CMAKE_C_CREATE_SHARED_LIBRARY} \"${__libsupcxx}\"" )
+ set( CMAKE_C_CREATE_SHARED_MODULE "${CMAKE_C_CREATE_SHARED_MODULE} \"${__libsupcxx}\"" )
+ set( CMAKE_C_LINK_EXECUTABLE "${CMAKE_C_LINK_EXECUTABLE} \"${__libsupcxx}\"" )
+ endif()
+ if( ANDROID_STL MATCHES "gnustl" )
+ set( CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_LIBRARY} -lm" )
+ set( CMAKE_CXX_CREATE_SHARED_MODULE "${CMAKE_CXX_CREATE_SHARED_MODULE} -lm" )
+ set( CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -lm" )
+ endif()
+endif()
+
+# variables controlling optional build flags
+if (ANDROID_NDK_RELEASE STRLESS "r7")
+ # libGLESv2.so in NDK's prior to r7 refers to missing external symbols.
+ # So this flag option is required for all projects using OpenGL from native.
+ __INIT_VARIABLE( ANDROID_SO_UNDEFINED VALUES ON )
+else()
+ __INIT_VARIABLE( ANDROID_SO_UNDEFINED VALUES OFF )
+endif()
+__INIT_VARIABLE( ANDROID_NO_UNDEFINED OBSOLETE_NO_UNDEFINED VALUES ON )
+__INIT_VARIABLE( ANDROID_FUNCTION_LEVEL_LINKING VALUES ON )
+__INIT_VARIABLE( ANDROID_GOLD_LINKER VALUES ON )
+__INIT_VARIABLE( ANDROID_NOEXECSTACK VALUES ON )
+__INIT_VARIABLE( ANDROID_RELRO VALUES ON )
+
+set( ANDROID_NO_UNDEFINED ${ANDROID_NO_UNDEFINED} CACHE BOOL "Show all undefined symbols as linker errors" )
+set( ANDROID_SO_UNDEFINED ${ANDROID_SO_UNDEFINED} CACHE BOOL "Allows or disallows undefined symbols in shared libraries" )
+set( ANDROID_FUNCTION_LEVEL_LINKING ${ANDROID_FUNCTION_LEVEL_LINKING} CACHE BOOL "Allows or disallows undefined symbols in shared libraries" )
+set( ANDROID_GOLD_LINKER ${ANDROID_GOLD_LINKER} CACHE BOOL "Enables gold linker (only avaialble for NDK r8b for ARM and x86 architectures on linux-86 and darwin-x86 hosts)" )
+set( ANDROID_NOEXECSTACK ${ANDROID_NOEXECSTACK} CACHE BOOL "Allows or disallows undefined symbols in shared libraries" )
+set( ANDROID_RELRO ${ANDROID_RELRO} CACHE BOOL "Enables RELRO - a memory corruption mitigation technique" )
+mark_as_advanced( ANDROID_NO_UNDEFINED ANDROID_SO_UNDEFINED ANDROID_FUNCTION_LEVEL_LINKING ANDROID_GOLD_LINKER ANDROID_NOEXECSTACK ANDROID_RELRO )
+
+# linker flags
+set( ANDROID_LINKER_FLAGS "" )
+
+if( ARMEABI_V7A )
+ # this is *required* to use the following linker flags that routes around
+ # a CPU bug in some Cortex-A8 implementations:
+ set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,--fix-cortex-a8" )
+endif()
+
+if( ANDROID_NO_UNDEFINED )
+ set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,--no-undefined" )
+endif()
+
+if( ANDROID_SO_UNDEFINED )
+ set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,-allow-shlib-undefined" )
+endif()
+
+if( ANDROID_FUNCTION_LEVEL_LINKING )
+ set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -fdata-sections -ffunction-sections" )
+ set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,--gc-sections" )
+endif()
+
+if( ANDROID_COMPILER_VERSION VERSION_EQUAL "4.6" )
+ if( ANDROID_GOLD_LINKER AND (CMAKE_HOST_UNIX OR ANDROID_NDK_RELEASE STRGREATER "r8b") AND (ARMEABI OR ARMEABI_V7A OR X86) )
+ set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -fuse-ld=gold" )
+ elseif( ANDROID_NDK_RELEASE STRGREATER "r8b")
+ set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -fuse-ld=bfd" )
+ elseif( ANDROID_NDK_RELEASE STREQUAL "r8b" AND ARMEABI AND NOT _CMAKE_IN_TRY_COMPILE )
+ message( WARNING "The default bfd linker from arm GCC 4.6 toolchain can fail with 'unresolvable R_ARM_THM_CALL relocation' error message. See https://code.google.com/p/android/issues/detail?id=35342
+ On Linux and OS X host platform you can workaround this problem using gold linker (default).
+ Rerun cmake with -DANDROID_GOLD_LINKER=ON option in case of problems.
+" )
+ endif()
+endif() # version 4.6
+
+if( ANDROID_NOEXECSTACK )
+ if( ANDROID_COMPILER_IS_CLANG )
+ set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -Xclang -mnoexecstack" )
+ else()
+ set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -Wa,--noexecstack" )
+ endif()
+ set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,-z,noexecstack" )
+endif()
+
+if( ANDROID_RELRO )
+ set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,-z,relro -Wl,-z,now" )
+endif()
+
+if( ANDROID_COMPILER_IS_CLANG )
+ set( ANDROID_CXX_FLAGS "-Qunused-arguments ${ANDROID_CXX_FLAGS}" )
+ if( ARMEABI_V7A AND NOT ANDROID_FORCE_ARM_BUILD )
+ set( ANDROID_CXX_FLAGS_RELEASE "-target thumbv7-none-linux-androideabi ${ANDROID_CXX_FLAGS_RELEASE}" )
+ set( ANDROID_CXX_FLAGS_DEBUG "-target ${ANDROID_LLVM_TRIPLE} ${ANDROID_CXX_FLAGS_DEBUG}" )
+ else()
+ set( ANDROID_CXX_FLAGS "-target ${ANDROID_LLVM_TRIPLE} ${ANDROID_CXX_FLAGS}" )
+ endif()
+ if( BUILD_WITH_ANDROID_NDK )
+ set( ANDROID_CXX_FLAGS "-gcc-toolchain ${ANDROID_TOOLCHAIN_ROOT} ${ANDROID_CXX_FLAGS}" )
+ endif()
+endif()
+
+# cache flags
+set( CMAKE_CXX_FLAGS "" CACHE STRING "c++ flags" )
+set( CMAKE_C_FLAGS "" CACHE STRING "c flags" )
+set( CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG" CACHE STRING "c++ Release flags" )
+set( CMAKE_C_FLAGS_RELEASE "-O3 -DNDEBUG" CACHE STRING "c Release flags" )
+set( CMAKE_CXX_FLAGS_DEBUG "-O0 -g -DDEBUG -D_DEBUG" CACHE STRING "c++ Debug flags" )
+set( CMAKE_C_FLAGS_DEBUG "-O0 -g -DDEBUG -D_DEBUG" CACHE STRING "c Debug flags" )
+set( CMAKE_SHARED_LINKER_FLAGS "" CACHE STRING "shared linker flags" )
+set( CMAKE_MODULE_LINKER_FLAGS "" CACHE STRING "module linker flags" )
+set( CMAKE_EXE_LINKER_FLAGS "-Wl,-z,nocopyreloc" CACHE STRING "executable linker flags" )
+
+# put flags to cache (for debug purpose only)
+set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS}" CACHE INTERNAL "Android specific c/c++ flags" )
+set( ANDROID_CXX_FLAGS_RELEASE "${ANDROID_CXX_FLAGS_RELEASE}" CACHE INTERNAL "Android specific c/c++ Release flags" )
+set( ANDROID_CXX_FLAGS_DEBUG "${ANDROID_CXX_FLAGS_DEBUG}" CACHE INTERNAL "Android specific c/c++ Debug flags" )
+set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS}" CACHE INTERNAL "Android specific c/c++ linker flags" )
+
+# finish flags
+set( CMAKE_CXX_FLAGS "${ANDROID_CXX_FLAGS} ${CMAKE_CXX_FLAGS}" )
+set( CMAKE_C_FLAGS "${ANDROID_CXX_FLAGS} ${CMAKE_C_FLAGS}" )
+set( CMAKE_CXX_FLAGS_RELEASE "${ANDROID_CXX_FLAGS_RELEASE} ${CMAKE_CXX_FLAGS_RELEASE}" )
+set( CMAKE_C_FLAGS_RELEASE "${ANDROID_CXX_FLAGS_RELEASE} ${CMAKE_C_FLAGS_RELEASE}" )
+set( CMAKE_CXX_FLAGS_DEBUG "${ANDROID_CXX_FLAGS_DEBUG} ${CMAKE_CXX_FLAGS_DEBUG}" )
+set( CMAKE_C_FLAGS_DEBUG "${ANDROID_CXX_FLAGS_DEBUG} ${CMAKE_C_FLAGS_DEBUG}" )
+set( CMAKE_SHARED_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} ${CMAKE_SHARED_LINKER_FLAGS}" )
+set( CMAKE_MODULE_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} ${CMAKE_MODULE_LINKER_FLAGS}" )
+set( CMAKE_EXE_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} ${CMAKE_EXE_LINKER_FLAGS}" )
+
+if( MIPS AND BUILD_WITH_ANDROID_NDK AND ANDROID_NDK_RELEASE STREQUAL "r8" )
+ set( CMAKE_SHARED_LINKER_FLAGS "-Wl,-T,${ANDROID_NDK}/toolchains/${ANDROID_GCC_TOOLCHAIN_NAME}/mipself.xsc ${CMAKE_SHARED_LINKER_FLAGS}" )
+ set( CMAKE_MODULE_LINKER_FLAGS "-Wl,-T,${ANDROID_NDK}/toolchains/${ANDROID_GCC_TOOLCHAIN_NAME}/mipself.xsc ${CMAKE_MODULE_LINKER_FLAGS}" )
+ set( CMAKE_EXE_LINKER_FLAGS "-Wl,-T,${ANDROID_NDK}/toolchains/${ANDROID_GCC_TOOLCHAIN_NAME}/mipself.x ${CMAKE_EXE_LINKER_FLAGS}" )
+endif()
+
+# configure rtti
+if( DEFINED ANDROID_RTTI AND ANDROID_STL_FORCE_FEATURES )
+ if( ANDROID_RTTI )
+ set( CMAKE_CXX_FLAGS "-frtti ${CMAKE_CXX_FLAGS}" )
+ else()
+ set( CMAKE_CXX_FLAGS "-fno-rtti ${CMAKE_CXX_FLAGS}" )
+ endif()
+endif()
+
+# configure exceptios
+if( DEFINED ANDROID_EXCEPTIONS AND ANDROID_STL_FORCE_FEATURES )
+ if( ANDROID_EXCEPTIONS )
+ set( CMAKE_CXX_FLAGS "-fexceptions ${CMAKE_CXX_FLAGS}" )
+ set( CMAKE_C_FLAGS "-fexceptions ${CMAKE_C_FLAGS}" )
+ else()
+ set( CMAKE_CXX_FLAGS "-fno-exceptions ${CMAKE_CXX_FLAGS}" )
+ set( CMAKE_C_FLAGS "-fno-exceptions ${CMAKE_C_FLAGS}" )
+ endif()
+endif()
+
+# global includes and link directories
+include_directories( SYSTEM "${ANDROID_SYSROOT}/usr/include" ${ANDROID_STL_INCLUDE_DIRS} )
+link_directories( "${CMAKE_INSTALL_PREFIX}/libs/${ANDROID_NDK_ABI_NAME}" )
+
+# setup output directories
+set( LIBRARY_OUTPUT_PATH_ROOT ${CMAKE_SOURCE_DIR} CACHE PATH "root for library output, set this to change where android libs are installed to" )
+set( CMAKE_INSTALL_PREFIX "${ANDROID_TOOLCHAIN_ROOT}/user" CACHE STRING "path for installing" )
+
+if(NOT _CMAKE_IN_TRY_COMPILE)
+ if( EXISTS "${CMAKE_SOURCE_DIR}/jni/CMakeLists.txt" )
+ set( EXECUTABLE_OUTPUT_PATH "${LIBRARY_OUTPUT_PATH_ROOT}/bin/${ANDROID_NDK_ABI_NAME}" CACHE PATH "Output directory for applications" )
+ else()
+ set( EXECUTABLE_OUTPUT_PATH "${LIBRARY_OUTPUT_PATH_ROOT}/bin" CACHE PATH "Output directory for applications" )
+ endif()
+ set( LIBRARY_OUTPUT_PATH "${LIBRARY_OUTPUT_PATH_ROOT}/libs/${ANDROID_NDK_ABI_NAME}" CACHE PATH "path for android libs" )
+endif()
+
+# set these global flags for cmake client scripts to change behavior
+set( ANDROID True )
+set( BUILD_ANDROID True )
+
+# where is the target environment
+set( CMAKE_FIND_ROOT_PATH "${ANDROID_TOOLCHAIN_ROOT}/bin" "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}" "${ANDROID_SYSROOT}" "${CMAKE_INSTALL_PREFIX}" "${CMAKE_INSTALL_PREFIX}/share" )
+
+# only search for libraries and includes in the ndk toolchain
+set( CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY )
+set( CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY )
+set( CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY )
+
+
+# macro to find packages on the host OS
+macro( find_host_package )
+ set( CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER )
+ set( CMAKE_FIND_ROOT_PATH_MODE_LIBRARY NEVER )
+ set( CMAKE_FIND_ROOT_PATH_MODE_INCLUDE NEVER )
+ if( CMAKE_HOST_WIN32 )
+ SET( WIN32 1 )
+ SET( UNIX )
+ elseif( CMAKE_HOST_APPLE )
+ SET( APPLE 1 )
+ SET( UNIX )
+ endif()
+ find_package( ${ARGN} )
+ SET( WIN32 )
+ SET( APPLE )
+ SET( UNIX 1 )
+ set( CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY )
+ set( CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY )
+ set( CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY )
+endmacro()
+
+
+# macro to find programs on the host OS
+macro( find_host_program )
+ set( CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER )
+ set( CMAKE_FIND_ROOT_PATH_MODE_LIBRARY NEVER )
+ set( CMAKE_FIND_ROOT_PATH_MODE_INCLUDE NEVER )
+ if( CMAKE_HOST_WIN32 )
+ SET( WIN32 1 )
+ SET( UNIX )
+ elseif( CMAKE_HOST_APPLE )
+ SET( APPLE 1 )
+ SET( UNIX )
+ endif()
+ find_program( ${ARGN} )
+ SET( WIN32 )
+ SET( APPLE )
+ SET( UNIX 1 )
+ set( CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY )
+ set( CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY )
+ set( CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY )
+endmacro()
+
+
+macro( ANDROID_GET_ABI_RAWNAME TOOLCHAIN_FLAG VAR )
+ if( "${TOOLCHAIN_FLAG}" STREQUAL "ARMEABI" )
+ set( ${VAR} "armeabi" )
+ elseif( "${TOOLCHAIN_FLAG}" STREQUAL "ARMEABI_V7A" )
+ set( ${VAR} "armeabi-v7a" )
+ elseif( "${TOOLCHAIN_FLAG}" STREQUAL "X86" )
+ set( ${VAR} "x86" )
+ elseif( "${TOOLCHAIN_FLAG}" STREQUAL "MIPS" )
+ set( ${VAR} "mips" )
+ else()
+ set( ${VAR} "unknown" )
+ endif()
+endmacro()
+
+
+# export toolchain settings for the try_compile() command
+if( NOT PROJECT_NAME STREQUAL "CMAKE_TRY_COMPILE" )
+ set( __toolchain_config "")
+ foreach( __var NDK_CCACHE LIBRARY_OUTPUT_PATH_ROOT ANDROID_FORBID_SYGWIN ANDROID_SET_OBSOLETE_VARIABLES
+ ANDROID_NDK_HOST_X64
+ ANDROID_NDK
+ ANDROID_STANDALONE_TOOLCHAIN
+ ANDROID_TOOLCHAIN_NAME
+ ANDROID_ABI
+ ANDROID_NATIVE_API_LEVEL
+ ANDROID_STL
+ ANDROID_STL_FORCE_FEATURES
+ ANDROID_FORCE_ARM_BUILD
+ ANDROID_NO_UNDEFINED
+ ANDROID_SO_UNDEFINED
+ ANDROID_FUNCTION_LEVEL_LINKING
+ ANDROID_GOLD_LINKER
+ ANDROID_NOEXECSTACK
+ ANDROID_RELRO
+ )
+ if( DEFINED ${__var} )
+ if( "${__var}" MATCHES " ")
+ set( __toolchain_config "${__toolchain_config}set( ${__var} \"${${__var}}\" CACHE INTERNAL \"\" )\n" )
+ else()
+ set( __toolchain_config "${__toolchain_config}set( ${__var} ${${__var}} CACHE INTERNAL \"\" )\n" )
+ endif()
+ endif()
+ endforeach()
+ file( WRITE "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/android.toolchain.config.cmake" "${__toolchain_config}" )
+ unset( __toolchain_config )
+endif()
+
+
+# set some obsolete variables for backward compatibility
+set( ANDROID_SET_OBSOLETE_VARIABLES ON CACHE BOOL "Define obsolete Andrid-specific cmake variables" )
+mark_as_advanced( ANDROID_SET_OBSOLETE_VARIABLES )
+if( ANDROID_SET_OBSOLETE_VARIABLES )
+ set( ANDROID_API_LEVEL ${ANDROID_NATIVE_API_LEVEL} )
+ set( ARM_TARGET "${ANDROID_ABI}" )
+ set( ARMEABI_NDK_NAME "${ANDROID_NDK_ABI_NAME}" )
+endif()
+
+
+# Variables controlling behavior or set by cmake toolchain:
+# ANDROID_ABI : "armeabi-v7a" (default), "armeabi", "armeabi-v7a with NEON", "armeabi-v7a with VFPV3", "armeabi-v6 with VFP", "x86", "mips"
+# ANDROID_NATIVE_API_LEVEL : 3,4,5,8,9,14 (depends on NDK version)
+# ANDROID_STL : gnustl_static/gnustl_shared/stlport_static/stlport_shared/gabi++_static/gabi++_shared/system_re/system/none
+# ANDROID_FORBID_SYGWIN : ON/OFF
+# ANDROID_NO_UNDEFINED : ON/OFF
+# ANDROID_SO_UNDEFINED : OFF/ON (default depends on NDK version)
+# ANDROID_FUNCTION_LEVEL_LINKING : ON/OFF
+# ANDROID_GOLD_LINKER : ON/OFF
+# ANDROID_NOEXECSTACK : ON/OFF
+# ANDROID_RELRO : ON/OFF
+# ANDROID_FORCE_ARM_BUILD : ON/OFF
+# ANDROID_STL_FORCE_FEATURES : ON/OFF
+# ANDROID_SET_OBSOLETE_VARIABLES : ON/OFF
+# Can be set only at the first run:
+# ANDROID_NDK
+# ANDROID_STANDALONE_TOOLCHAIN
+# ANDROID_TOOLCHAIN_NAME : the NDK name of compiler toolchain
+# ANDROID_NDK_HOST_X64 : try to use x86_64 toolchain (default for x64 host systems)
+# LIBRARY_OUTPUT_PATH_ROOT : <any valid path>
+# NDK_CCACHE : <path to your ccache executable>
+# Obsolete:
+# ANDROID_API_LEVEL : superseded by ANDROID_NATIVE_API_LEVEL
+# ARM_TARGET : superseded by ANDROID_ABI
+# ARM_TARGETS : superseded by ANDROID_ABI (can be set only)
+# ANDROID_NDK_TOOLCHAIN_ROOT : superseded by ANDROID_STANDALONE_TOOLCHAIN (can be set only)
+# ANDROID_USE_STLPORT : superseded by ANDROID_STL=stlport_static
+# ANDROID_LEVEL : superseded by ANDROID_NATIVE_API_LEVEL (completely removed)
+#
+# Primary read-only variables:
+# ANDROID : always TRUE
+# ARMEABI : TRUE for arm v6 and older devices
+# ARMEABI_V6 : TRUE for arm v6
+# ARMEABI_V7A : TRUE for arm v7a
+# NEON : TRUE if NEON unit is enabled
+# VFPV3 : TRUE if VFP version 3 is enabled
+# X86 : TRUE if configured for x86
+# MIPS : TRUE if configured for mips
+# BUILD_ANDROID : always TRUE
+# BUILD_WITH_ANDROID_NDK : TRUE if NDK is used
+# BUILD_WITH_STANDALONE_TOOLCHAIN : TRUE if standalone toolchain is used
+# ANDROID_NDK_HOST_SYSTEM_NAME : "windows", "linux-x86" or "darwin-x86" depending on host platform
+# ANDROID_NDK_ABI_NAME : "armeabi", "armeabi-v7a", "x86" or "mips" depending on ANDROID_ABI
+# ANDROID_NDK_RELEASE : one of r5, r5b, r5c, r6, r6b, r7, r7b, r7c, r8, r8b, r8c, r8d, r8e; set only for NDK
+# ANDROID_ARCH_NAME : "arm" or "x86" or "mips" depending on ANDROID_ABI
+# ANDROID_SYSROOT : path to the compiler sysroot
+# TOOL_OS_SUFFIX : "" or ".exe" depending on host platform
+# ANDROID_COMPILER_IS_CLANG : TRUE if clang compiler is used
+# Obsolete:
+# ARMEABI_NDK_NAME : superseded by ANDROID_NDK_ABI_NAME
+#
+# Secondary (less stable) read-only variables:
+# ANDROID_COMPILER_VERSION : GCC version used
+# ANDROID_CXX_FLAGS : C/C++ compiler flags required by Android platform
+# ANDROID_SUPPORTED_ABIS : list of currently allowed values for ANDROID_ABI
+# ANDROID_TOOLCHAIN_MACHINE_NAME : "arm-linux-androideabi", "arm-eabi" or "i686-android-linux"
+# ANDROID_TOOLCHAIN_ROOT : path to the top level of toolchain (standalone or placed inside NDK)
+# ANDROID_CLANG_TOOLCHAIN_ROOT : path to clang tools
+# ANDROID_SUPPORTED_NATIVE_API_LEVELS : list of native API levels found inside NDK
+# ANDROID_STL_INCLUDE_DIRS : stl include paths
+# ANDROID_RTTI : if rtti is enabled by the runtime
+# ANDROID_EXCEPTIONS : if exceptions are enabled by the runtime
+# ANDROID_GCC_TOOLCHAIN_NAME : read-only, differs from ANDROID_TOOLCHAIN_NAME only if clang is used
+# ANDROID_CLANG_VERSION : version of clang compiler if clang is used
+#
+# Defaults:
+# ANDROID_DEFAULT_NDK_API_LEVEL
+# ANDROID_DEFAULT_NDK_API_LEVEL_${ARCH}
+# ANDROID_NDK_SEARCH_PATHS
+# ANDROID_STANDALONE_TOOLCHAIN_SEARCH_PATH
+# ANDROID_SUPPORTED_ABIS_${ARCH}
+# ANDROID_SUPPORTED_NDK_VERSIONS
message(WARNING "Can not automatically determine the value for ANDROID_PLATFORM_VERSION_CODE")
endif()
-configure_file("${CMAKE_CURRENT_SOURCE_DIR}/${ANDROID_MANIFEST_FILE}" "${OpenCV_BINARY_DIR}/android/service/engine/.build/${ANDROID_MANIFEST_FILE}" @ONLY)
+configure_file("${CMAKE_CURRENT_SOURCE_DIR}/${ANDROID_MANIFEST_FILE}" "${OpenCV_BINARY_DIR}/platforms/android/service/engine/.build/${ANDROID_MANIFEST_FILE}" @ONLY)
link_directories("${ANDROID_SOURCE_TREE}/out/target/product/generic/system/lib" "${ANDROID_SOURCE_TREE}/out/target/product/${ANDROID_PRODUCT}/system/lib" "${ANDROID_SOURCE_TREE}/bin/${ANDROID_ARCH_NAME}")
add_executable(opencv_test_engine ${engine_test_files} jni/Tests/gtest/gtest-all.cpp)
target_link_libraries(opencv_test_engine z binder log utils android_runtime ${engine} ${engine}_jni)
-
EXPECT_EQ(info1, info2);
}
#endif
-
// string path = pm.GetPackagePathByVersion("240", PLATFORM_TEGRA2, 0);
// EXPECT_STREQ("/data/data/org.opencv.lib_v24_tegra2/lib", path.c_str());
// }
-
-
os.system("adb %s shell mkdir -p \"%s\"" % (DEVICE_STR, DEVICE_LOG_PATH))
RunTestApp("OpenCVEngineTestApp")
-
+++ /dev/null
-#!/bin/sh
-cd `dirname $0`/..
-
-mkdir -p build_hardfp
-cd build_hardfp
-
-cmake -DCMAKE_TOOLCHAIN_FILE=../arm-gnueabi.toolchain.cmake $@ ../../..
-
+++ /dev/null
-#!/bin/sh
-cd `dirname $0`/..
-
-mkdir -p build_softfp
-cd build_softfp
-
-cmake -DSOFTFP=ON -DCMAKE_TOOLCHAIN_FILE=../arm-gnueabi.toolchain.cmake $@ ../../..
-
architecture = 'armeabi'
-excludedHeaders = set(['hdf5.h', 'cap_ios.h',
- 'eigen.hpp', 'cxeigen.hpp' #TOREMOVE
- ])
+excludedHeaders = set(['hdf5.h', 'cap_ios.h', 'eigen.hpp', 'cxeigen.hpp']) #TOREMOVE
systemIncludes = ['sources/cxx-stl/gnu-libstdc++/4.6/include', \
'/opt/android-ndk-r8c/platforms/android-8/arch-arm', # TODO: check if this one could be passed as command line arg
'sources/cxx-stl/gnu-libstdc++/4.6/libs/armeabi-v7a/include']
if f == m:
moduleHeaders += GetHeaderFiles(os.path.join(cppHeadersFolder, f))
if m == 'flann':
- flann = os.path.join(cppHeadersFolder, f, 'flann.hpp')
+ flann = os.path.join(cppHeadersFolder, f, 'flann.hpp')
moduleHeaders.remove(flann)
moduleHeaders.insert(0, flann)
cppHeaders += moduleHeaders
os.chdir(BuildDir)
BuildLog = os.path.join(BuildDir, "build.log")
- CmakeCmdLine = "cmake -DCMAKE_TOOLCHAIN_FILE=../android.toolchain.cmake -DANDROID_SOURCE_TREE=\"%s\" -DANDROID_NATIVE_API_LEVEL=\"%s\" -DANDROID_ABI=\"%s\" -DANDROID_STL=stlport_static ../../ > \"%s\" 2>&1" % (AndroidTreeRoot, NativeApiLevel, Arch, BuildLog)
+ CmakeCmdLine = "cmake -DCMAKE_TOOLCHAIN_FILE=../android/android.toolchain.cmake -DANDROID_SOURCE_TREE=\"%s\" -DANDROID_NATIVE_API_LEVEL=\"%s\" -DANDROID_ABI=\"%s\" -DANDROID_STL=stlport_static ../.. > \"%s\" 2>&1" % (AndroidTreeRoot, NativeApiLevel, Arch, BuildLog)
MakeCmdLine = "make %s >> \"%s\" 2>&1" % (MakeTarget, BuildLog);
#print(CmakeCmdLine)
os.system(CmakeCmdLine)
#!/bin/sh
cd `dirname $0`/..
-mkdir -p build
-cd build
-
-cmake -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON -DCMAKE_TOOLCHAIN_FILE=../android.toolchain.cmake $@ ../..
+mkdir -p build_android_arm
+cd build_android_arm
+cmake -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON -DCMAKE_TOOLCHAIN_FILE=../android/android.toolchain.cmake $@ ../..
--- /dev/null
+#!/bin/sh
+cd `dirname $0`/..
+
+mkdir -p build_android_mips
+cd build_android_mips
+
+cmake -DANDROID_ABI=mips -DCMAKE_TOOLCHAIN_FILE=../android/android.toolchain.cmake $@ ../..
--- /dev/null
+#!/bin/sh
+cd `dirname $0`/..
+
+mkdir -p build_android_service
+cd build_android_service
+
+cmake -DCMAKE_TOOLCHAIN_FILE=../android/android.toolchain.cmake -DANDROID_TOOLCHAIN_NAME="arm-linux-androideabi-4.4.3" -DANDROID_STL=stlport_static -DANDROID_STL_FORCE_FEATURES=OFF -DBUILD_ANDROID_SERVICE=ON -DANDROID_SOURCE_TREE=~/Projects/AndroidSource/ServiceStub/ $@ ../..
--- /dev/null
+#!/bin/sh
+
+cd `dirname $0`/..
+
+mkdir -p build_android_x86
+cd build_android_x86
+
+cmake -DANDROID_ABI=x86 -DCMAKE_TOOLCHAIN_FILE=../android/android.toolchain.cmake $@ ../..
--- /dev/null
+#!/bin/sh
+cd `dirname $0`/..
+
+mkdir -p build_linux_arm_hardfp
+cd build_linux_arm_hardfp
+
+cmake -DCMAKE_TOOLCHAIN_FILE=../linux/arm-gnueabi.toolchain.cmake $@ ../..
--- /dev/null
+#!/bin/sh
+cd `dirname $0`/..
+
+mkdir -p build_linux_arm_softfp
+cd build_linux_arm_softfp
+
+cmake -DSOFTFP=ON -DCMAKE_TOOLCHAIN_FILE=../linux/arm-gnueabi.toolchain.cmake $@ ../..
-//This sample is inherited from facedetect.cpp in smaple/c
-
#include "opencv2/objdetect/objdetect.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/imgproc/imgproc.hpp"
using namespace std;
using namespace cv;
+#define LOOP_NUM 10
+
+const static Scalar colors[] = { CV_RGB(0,0,255),
+ CV_RGB(0,128,255),
+ CV_RGB(0,255,255),
+ CV_RGB(0,255,0),
+ CV_RGB(255,128,0),
+ CV_RGB(255,255,0),
+ CV_RGB(255,0,0),
+ CV_RGB(255,0,255)} ;
-static void help()
+int64 work_begin = 0;
+int64 work_end = 0;
+
+static void workBegin()
+{
+ work_begin = getTickCount();
+}
+static void workEnd()
{
- cout << "\nThis program demonstrates the cascade recognizer.\n"
- "This classifier can recognize many ~rigid objects, it's most known use is for faces.\n"
- "Usage:\n"
- "./facedetect [--cascade=<cascade_path> this is the primary trained classifier such as frontal face]\n"
- " [--scale=<image scale greater or equal to 1, try 1.3 for example>\n"
- " [filename|camera_index]\n\n"
- "see facedetect.cmd for one call:\n"
- "./facedetect --cascade=\"../../data/haarcascades/haarcascade_frontalface_alt.xml\" --scale=1.3 \n"
- "Hit any key to quit.\n"
- "Using OpenCV version " << CV_VERSION << "\n" << endl;
+ work_end += (getTickCount() - work_begin);
}
-struct getRect { Rect operator ()(const CvAvgComp& e) const { return e.rect; } };
-void detectAndDraw( Mat& img,
- cv::ocl::OclCascadeClassifier& cascade, CascadeClassifier& nestedCascade,
- double scale);
+static double getTime(){
+ return work_end /((double)cvGetTickFrequency() * 1000.);
+}
+
+void detect( Mat& img, vector<Rect>& faces,
+ cv::ocl::OclCascadeClassifierBuf& cascade,
+ double scale, bool calTime);
-String cascadeName = "../../../data/haarcascades/haarcascade_frontalface_alt.xml";
+void detectCPU( Mat& img, vector<Rect>& faces,
+ CascadeClassifier& cascade,
+ double scale, bool calTime);
+
+void Draw(Mat& img, vector<Rect>& faces, double scale);
+
+// This function test if gpu_rst matches cpu_rst.
+// If the two vectors are not equal, it will return the difference in vector size
+// Else if will return (total diff of each cpu and gpu rects covered pixels)/(total cpu rects covered pixels)
+double checkRectSimilarity(Size sz, std::vector<Rect>& cpu_rst, std::vector<Rect>& gpu_rst);
int main( int argc, const char** argv )
{
- CvCapture* capture = 0;
- Mat frame, frameCopy, image;
- const String scaleOpt = "--scale=";
- size_t scaleOptLen = scaleOpt.length();
- const String cascadeOpt = "--cascade=";
- size_t cascadeOptLen = cascadeOpt.length();
- String inputName;
-
- help();
- cv::ocl::OclCascadeClassifier cascade;
- CascadeClassifier nestedCascade;
- double scale = 1;
-
- for( int i = 1; i < argc; i++ )
+ const char* keys =
+ "{ h | help | false | print help message }"
+ "{ i | input | | specify input image }"
+ "{ t | template | ../../../data/haarcascades/haarcascade_frontalface_alt.xml | specify template file }"
+ "{ c | scale | 1.0 | scale image }"
+ "{ s | use_cpu | false | use cpu or gpu to process the image }";
+
+ CommandLineParser cmd(argc, argv, keys);
+ if (cmd.get<bool>("help"))
{
- cout << "Processing " << i << " " << argv[i] << endl;
- if( cascadeOpt.compare( 0, cascadeOptLen, argv[i], cascadeOptLen ) == 0 )
- {
- cascadeName.assign( argv[i] + cascadeOptLen );
- cout << " from which we have cascadeName= " << cascadeName << endl;
- }
- else if( scaleOpt.compare( 0, scaleOptLen, argv[i], scaleOptLen ) == 0 )
- {
- if( !sscanf( argv[i] + scaleOpt.length(), "%lf", &scale ) || scale < 1 )
- scale = 1;
- cout << " from which we read scale = " << scale << endl;
- }
- else if( argv[i][0] == '-' )
- {
- cerr << "WARNING: Unknown option %s" << argv[i] << endl;
- }
- else
- inputName.assign( argv[i] );
+ cout << "Avaible options:" << endl;
+ cmd.printParams();
+ return 0;
}
+ CvCapture* capture = 0;
+ Mat frame, frameCopy, image;
- if( !cascade.load( cascadeName ) )
+ bool useCPU = cmd.get<bool>("s");
+ string inputName = cmd.get<string>("i");
+ string cascadeName = cmd.get<string>("t");
+ double scale = cmd.get<double>("c");
+ cv::ocl::OclCascadeClassifierBuf cascade;
+ CascadeClassifier cpu_cascade;
+
+ if( !cascade.load( cascadeName ) || !cpu_cascade.load(cascadeName) )
{
cerr << "ERROR: Could not load classifier cascade" << endl;
- cerr << "Usage: facedetect [--cascade=<cascade_path>]\n"
- " [--scale[=<image scale>\n"
- " [filename|camera_index]\n" << endl ;
return -1;
}
- if( inputName.empty() || (isdigit(inputName.c_str()[0]) && inputName.c_str()[1] == '\0') )
+ if( inputName.empty() )
{
- capture = cvCaptureFromCAM( inputName.empty() ? 0 : inputName.c_str()[0] - '0' );
- int c = inputName.empty() ? 0 : inputName.c_str()[0] - '0' ;
- if(!capture) cout << "Capture from CAM " << c << " didn't work" << endl;
+ capture = cvCaptureFromCAM(0);
+ if(!capture)
+ cout << "Capture from CAM 0 didn't work" << endl;
}
else if( inputName.size() )
{
if( image.empty() )
{
capture = cvCaptureFromAVI( inputName.c_str() );
- if(!capture) cout << "Capture from AVI didn't work" << endl;
+ if(!capture)
+ cout << "Capture from AVI didn't work" << endl;
+ return -1;
}
}
else
{
image = imread( "lena.jpg", 1 );
- if(image.empty()) cout << "Couldn't read lena.jpg" << endl;
+ if(image.empty())
+ cout << "Couldn't read lena.jpg" << endl;
+ return -1;
}
cvNamedWindow( "result", 1 );
std::vector<cv::ocl::Info> oclinfo;
int devnums = cv::ocl::getDevice(oclinfo);
- if(devnums<1)
+ if( devnums < 1 )
{
std::cout << "no device found\n";
return -1;
}
//if you want to use undefault device, set it here
//setDevice(oclinfo[0]);
- //setBinpath(CLBINPATH);
+ ocl::setBinpath("./");
if( capture )
{
cout << "In capture ..." << endl;
{
IplImage* iplImg = cvQueryFrame( capture );
frame = iplImg;
+ vector<Rect> faces;
if( frame.empty() )
break;
if( iplImg->origin == IPL_ORIGIN_TL )
frame.copyTo( frameCopy );
else
flip( frame, frameCopy, 0 );
-
- detectAndDraw( frameCopy, cascade, nestedCascade, scale );
-
+ if(useCPU){
+ detectCPU(frameCopy, faces, cpu_cascade, scale, false);
+ }
+ else{
+ detect(frameCopy, faces, cascade, scale, false);
+ }
+ Draw(frameCopy, faces, scale);
if( waitKey( 10 ) >= 0 )
goto _cleanup_;
}
else
{
cout << "In image read" << endl;
- if( !image.empty() )
- {
- detectAndDraw( image, cascade, nestedCascade, scale );
- waitKey(0);
- }
- else if( !inputName.empty() )
+ vector<Rect> faces;
+ vector<Rect> ref_rst;
+ double accuracy = 0.;
+ for(int i = 0; i <= LOOP_NUM;i ++)
{
- /* assume it is a text file containing the
- list of the image filenames to be processed - one per line */
- FILE* f = fopen( inputName.c_str(), "rt" );
- if( f )
+ cout << "loop" << i << endl;
+ if(useCPU){
+ detectCPU(image, faces, cpu_cascade, scale, i==0?false:true);
+ }
+ else{
+ detect(image, faces, cascade, scale, i==0?false:true);
+ if(i == 0){
+ detectCPU(image, ref_rst, cpu_cascade, scale, false);
+ accuracy = checkRectSimilarity(image.size(), ref_rst, faces);
+ }
+ }
+ if (i == LOOP_NUM)
{
- char buf[1000+1];
- while( fgets( buf, 1000, f ) )
- {
- int len = (int)strlen(buf), c;
- while( len > 0 && isspace(buf[len-1]) )
- len--;
- buf[len] = '\0';
- cout << "file " << buf << endl;
- image = imread( buf, 1 );
- if( !image.empty() )
- {
- detectAndDraw( image, cascade, nestedCascade, scale );
- c = waitKey(0);
- if( c == 27 || c == 'q' || c == 'Q' )
- break;
- }
- else
- {
- cerr << "Aw snap, couldn't read image " << buf << endl;
- }
- }
- fclose(f);
+ if (useCPU)
+ cout << "average CPU time (noCamera) : ";
+ else
+ cout << "average GPU time (noCamera) : ";
+ cout << getTime() / LOOP_NUM << " ms" << endl;
+ cout << "accuracy value: " << accuracy <<endl;
}
}
+ Draw(image, faces, scale);
+ waitKey(0);
}
cvDestroyWindow("result");
return 0;
}
-void detectAndDraw( Mat& img,
- cv::ocl::OclCascadeClassifier& cascade, CascadeClassifier&,
- double scale)
+void detect( Mat& img, vector<Rect>& faces,
+ cv::ocl::OclCascadeClassifierBuf& cascade,
+ double scale, bool calTime)
{
- int i = 0;
- double t = 0;
- vector<Rect> faces;
- const static Scalar colors[] = { CV_RGB(0,0,255),
- CV_RGB(0,128,255),
- CV_RGB(0,255,255),
- CV_RGB(0,255,0),
- CV_RGB(255,128,0),
- CV_RGB(255,255,0),
- CV_RGB(255,0,0),
- CV_RGB(255,0,255)} ;
cv::ocl::oclMat image(img);
cv::ocl::oclMat gray, smallImg( cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 );
-
+ if(calTime) workBegin();
cv::ocl::cvtColor( image, gray, CV_BGR2GRAY );
cv::ocl::resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
cv::ocl::equalizeHist( smallImg, smallImg );
- CvSeq* _objects;
- MemStorage storage(cvCreateMemStorage(0));
- t = (double)cvGetTickCount();
- _objects = cascade.oclHaarDetectObjects( smallImg, storage, 1.1,
+ cascade.detectMultiScale( smallImg, faces, 1.1,
3, 0
|CV_HAAR_SCALE_IMAGE
, Size(30,30), Size(0, 0) );
- vector<CvAvgComp> vecAvgComp;
- Seq<CvAvgComp>(_objects).copyTo(vecAvgComp);
- faces.resize(vecAvgComp.size());
- std::transform(vecAvgComp.begin(), vecAvgComp.end(), faces.begin(), getRect());
- t = (double)cvGetTickCount() - t;
- printf( "detection time = %g ms\n", t/((double)cvGetTickFrequency()*1000.) );
+ if(calTime) workEnd();
+}
+
+void detectCPU( Mat& img, vector<Rect>& faces,
+ CascadeClassifier& cascade,
+ double scale, bool calTime)
+{
+ if(calTime) workBegin();
+ Mat cpu_gray, cpu_smallImg( cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 );
+ cvtColor(img, cpu_gray, CV_BGR2GRAY);
+ resize(cpu_gray, cpu_smallImg, cpu_smallImg.size(), 0, 0, INTER_LINEAR);
+ equalizeHist(cpu_smallImg, cpu_smallImg);
+ cascade.detectMultiScale(cpu_smallImg, faces, 1.1,
+ 3, 0 | CV_HAAR_SCALE_IMAGE,
+ Size(30, 30), Size(0, 0));
+ if(calTime) workEnd();
+}
+
+void Draw(Mat& img, vector<Rect>& faces, double scale)
+{
+ int i = 0;
for( vector<Rect>::const_iterator r = faces.begin(); r != faces.end(); r++, i++ )
{
- Mat smallImgROI;
Point center;
Scalar color = colors[i%8];
int radius;
}
cv::imshow( "result", img );
}
+
+double checkRectSimilarity(Size sz, std::vector<Rect>& ob1, std::vector<Rect>& ob2)
+{
+ double final_test_result = 0.0;
+ size_t sz1 = ob1.size();
+ size_t sz2 = ob2.size();
+
+ if(sz1 != sz2)
+ return sz1 > sz2 ? (double)(sz1 - sz2) : (double)(sz2 - sz1);
+ else
+ {
+ cv::Mat cpu_result(sz, CV_8UC1);
+ cpu_result.setTo(0);
+
+ for(vector<Rect>::const_iterator r = ob1.begin(); r != ob1.end(); r++)
+ {
+ cv::Mat cpu_result_roi(cpu_result, *r);
+ cpu_result_roi.setTo(1);
+ cpu_result.copyTo(cpu_result);
+ }
+ int cpu_area = cv::countNonZero(cpu_result > 0);
+
+ cv::Mat gpu_result(sz, CV_8UC1);
+ gpu_result.setTo(0);
+ for(vector<Rect>::const_iterator r2 = ob2.begin(); r2 != ob2.end(); r2++)
+ {
+ cv::Mat gpu_result_roi(gpu_result, *r2);
+ gpu_result_roi.setTo(1);
+ gpu_result.copyTo(gpu_result);
+ }
+
+ cv::Mat result_;
+ multiply(cpu_result, gpu_result, result_);
+ int result = cv::countNonZero(result_ > 0);
+
+ final_test_result = 1.0 - (double)result/(double)cpu_area;
+ }
+ return final_test_result;
+}
bool gamma_corr;
};
-
class App
{
public:
string message() const;
+// This function test if gpu_rst matches cpu_rst.
+// If the two vectors are not equal, it will return the difference in vector size
+// Else if will return
+// (total diff of each cpu and gpu rects covered pixels)/(total cpu rects covered pixels)
+ double checkRectSimilarity(Size sz,
+ std::vector<Rect>& cpu_rst,
+ std::vector<Rect>& gpu_rst);
private:
App operator=(App&);
ocl::oclMat gpu_img;
// Iterate over all frames
+ bool verify = false;
while (running && !frame.empty())
{
workBegin();
gpu_img.upload(img);
gpu_hog.detectMultiScale(gpu_img, found, hit_threshold, win_stride,
Size(0, 0), scale, gr_threshold);
- }
+ if (!verify)
+ {
+ // verify if GPU output same objects with CPU at 1st run
+ verify = true;
+ vector<Rect> ref_rst;
+ cvtColor(img, img, CV_BGRA2BGR);
+ cpu_hog.detectMultiScale(img, ref_rst, hit_threshold, win_stride,
+ Size(0, 0), scale, gr_threshold-2);
+ double accuracy = checkRectSimilarity(img.size(), ref_rst, found);
+ cout << "\naccuracy value: " << accuracy << endl;
+ }
+ }
else cpu_hog.detectMultiScale(img, found, hit_threshold, win_stride,
Size(0, 0), scale, gr_threshold);
hogWorkEnd();
return ss.str();
}
+double App::checkRectSimilarity(Size sz,
+ std::vector<Rect>& ob1,
+ std::vector<Rect>& ob2)
+{
+ double final_test_result = 0.0;
+ size_t sz1 = ob1.size();
+ size_t sz2 = ob2.size();
+
+ if(sz1 != sz2)
+ return sz1 > sz2 ? (double)(sz1 - sz2) : (double)(sz2 - sz1);
+ else
+ {
+ cv::Mat cpu_result(sz, CV_8UC1);
+ cpu_result.setTo(0);
+
+ for(vector<Rect>::const_iterator r = ob1.begin(); r != ob1.end(); r++)
+ {
+ cv::Mat cpu_result_roi(cpu_result, *r);
+ cpu_result_roi.setTo(1);
+ cpu_result.copyTo(cpu_result);
+ }
+ int cpu_area = cv::countNonZero(cpu_result > 0);
+
+ cv::Mat gpu_result(sz, CV_8UC1);
+ gpu_result.setTo(0);
+ for(vector<Rect>::const_iterator r2 = ob2.begin(); r2 != ob2.end(); r2++)
+ {
+ cv::Mat gpu_result_roi(gpu_result, *r2);
+ gpu_result_roi.setTo(1);
+ gpu_result.copyTo(gpu_result);
+ }
+
+ cv::Mat result_;
+ multiply(cpu_result, gpu_result, result_);
+ int result = cv::countNonZero(result_ > 0);
+
+ final_test_result = 1.0 - (double)result/(double)cpu_area;
+ }
+ return final_test_result;
+
+}
+
--- /dev/null
+#include <iostream>
+#include <vector>
+#include <iomanip>
+
+#include "opencv2/highgui/highgui.hpp"
+#include "opencv2/ocl/ocl.hpp"
+#include "opencv2/video/video.hpp"
+
+using namespace std;
+using namespace cv;
+using namespace cv::ocl;
+
+typedef unsigned char uchar;
+#define LOOP_NUM 10
+int64 work_begin = 0;
+int64 work_end = 0;
+
+static void workBegin()
+{
+ work_begin = getTickCount();
+}
+static void workEnd()
+{
+ work_end += (getTickCount() - work_begin);
+}
+static double getTime(){
+ return work_end * 1000. / getTickFrequency();
+}
+
+static void download(const oclMat& d_mat, vector<Point2f>& vec)
+{
+ vec.resize(d_mat.cols);
+ Mat mat(1, d_mat.cols, CV_32FC2, (void*)&vec[0]);
+ d_mat.download(mat);
+}
+
+static void download(const oclMat& d_mat, vector<uchar>& vec)
+{
+ vec.resize(d_mat.cols);
+ Mat mat(1, d_mat.cols, CV_8UC1, (void*)&vec[0]);
+ d_mat.download(mat);
+}
+
+static void drawArrows(Mat& frame, const vector<Point2f>& prevPts, const vector<Point2f>& nextPts, const vector<uchar>& status, Scalar line_color = Scalar(0, 0, 255))
+{
+ for (size_t i = 0; i < prevPts.size(); ++i)
+ {
+ if (status[i])
+ {
+ int line_thickness = 1;
+
+ Point p = prevPts[i];
+ Point q = nextPts[i];
+
+ double angle = atan2((double) p.y - q.y, (double) p.x - q.x);
+
+ double hypotenuse = sqrt( (double)(p.y - q.y)*(p.y - q.y) + (double)(p.x - q.x)*(p.x - q.x) );
+
+ if (hypotenuse < 1.0)
+ continue;
+
+ // Here we lengthen the arrow by a factor of three.
+ q.x = (int) (p.x - 3 * hypotenuse * cos(angle));
+ q.y = (int) (p.y - 3 * hypotenuse * sin(angle));
+
+ // Now we draw the main line of the arrow.
+ line(frame, p, q, line_color, line_thickness);
+
+ // Now draw the tips of the arrow. I do some scaling so that the
+ // tips look proportional to the main line of the arrow.
+
+ p.x = (int) (q.x + 9 * cos(angle + CV_PI / 4));
+ p.y = (int) (q.y + 9 * sin(angle + CV_PI / 4));
+ line(frame, p, q, line_color, line_thickness);
+
+ p.x = (int) (q.x + 9 * cos(angle - CV_PI / 4));
+ p.y = (int) (q.y + 9 * sin(angle - CV_PI / 4));
+ line(frame, p, q, line_color, line_thickness);
+ }
+ }
+}
+
+
+int main(int argc, const char* argv[])
+{
+ static std::vector<Info> ocl_info;
+ ocl::getDevice(ocl_info);
+ //if you want to use undefault device, set it here
+ setDevice(ocl_info[0]);
+
+ //set this to save kernel compile time from second time you run
+ ocl::setBinpath("./");
+ const char* keys =
+ "{ h | help | false | print help message }"
+ "{ l | left | | specify left image }"
+ "{ r | right | | specify right image }"
+ "{ c | camera | 0 | enable camera capturing }"
+ "{ s | use_cpu | false | use cpu or gpu to process the image }"
+ "{ v | video | | use video as input }"
+ "{ points | points | 1000 | specify points count [GoodFeatureToTrack] }"
+ "{ min_dist | min_dist | 0 | specify minimal distance between points [GoodFeatureToTrack] }";
+
+ CommandLineParser cmd(argc, argv, keys);
+
+ if (cmd.get<bool>("help"))
+ {
+ cout << "Usage: pyrlk_optical_flow [options]" << endl;
+ cout << "Avaible options:" << endl;
+ cmd.printParams();
+ return 0;
+ }
+
+ bool defaultPicturesFail = false;
+ string fname0 = cmd.get<string>("left");
+ string fname1 = cmd.get<string>("right");
+ string vdofile = cmd.get<string>("video");
+ int points = cmd.get<int>("points");
+ double minDist = cmd.get<double>("min_dist");
+ bool useCPU = cmd.get<bool>("s");
+ bool useCamera = cmd.get<bool>("c");
+ int inputName = cmd.get<int>("c");
+ oclMat d_nextPts, d_status;
+
+ Mat frame0 = imread(fname0, cv::IMREAD_GRAYSCALE);
+ Mat frame1 = imread(fname1, cv::IMREAD_GRAYSCALE);
+ PyrLKOpticalFlow d_pyrLK;
+ vector<cv::Point2f> pts;
+ vector<cv::Point2f> nextPts;
+ vector<unsigned char> status;
+ vector<float> err;
+
+ if (frame0.empty() || frame1.empty())
+ {
+ useCamera = true;
+ defaultPicturesFail = true;
+ CvCapture* capture = 0;
+ capture = cvCaptureFromCAM( inputName );
+ if (!capture)
+ {
+ cout << "Can't load input images" << endl;
+ return -1;
+ }
+ }
+
+ cout << "Points count : " << points << endl << endl;
+
+ if (useCamera)
+ {
+ CvCapture* capture = 0;
+ Mat frame, frameCopy;
+ Mat frame0Gray, frame1Gray;
+ Mat ptr0, ptr1;
+
+ if(vdofile == "")
+ capture = cvCaptureFromCAM( inputName );
+ else
+ capture = cvCreateFileCapture(vdofile.c_str());
+
+ int c = inputName ;
+ if(!capture)
+ {
+ if(vdofile == "")
+ cout << "Capture from CAM " << c << " didn't work" << endl;
+ else
+ cout << "Capture from file " << vdofile << " failed" <<endl;
+ if (defaultPicturesFail)
+ {
+ return -1;
+ }
+ goto nocamera;
+ }
+
+ cout << "In capture ..." << endl;
+ for(int i = 0;; i++)
+ {
+ frame = cvQueryFrame( capture );
+ if( frame.empty() )
+ break;
+
+ if (i == 0)
+ {
+ frame.copyTo( frame0 );
+ cvtColor(frame0, frame0Gray, COLOR_BGR2GRAY);
+ }
+ else
+ {
+ if (i%2 == 1)
+ {
+ frame.copyTo(frame1);
+ cvtColor(frame1, frame1Gray, COLOR_BGR2GRAY);
+ ptr0 = frame0Gray;
+ ptr1 = frame1Gray;
+ }
+ else
+ {
+ frame.copyTo(frame0);
+ cvtColor(frame0, frame0Gray, COLOR_BGR2GRAY);
+ ptr0 = frame1Gray;
+ ptr1 = frame0Gray;
+ }
+
+ pts.clear();
+
+ cv::goodFeaturesToTrack(ptr0, pts, points, 0.01, 0.0);
+
+ if (pts.size() == 0)
+ {
+ continue;
+ }
+
+ if (useCPU)
+ {
+ cv::calcOpticalFlowPyrLK(ptr0, ptr1, pts, nextPts, status, err);
+ }
+ else
+ {
+ oclMat d_prevPts(1, points, CV_32FC2, (void*)&pts[0]);
+
+ d_pyrLK.sparse(oclMat(ptr0), oclMat(ptr1), d_prevPts, d_nextPts, d_status);
+
+ download(d_prevPts, pts);
+ download(d_nextPts, nextPts);
+ download(d_status, status);
+
+ }
+ if (i%2 == 1)
+ frame1.copyTo(frameCopy);
+ else
+ frame0.copyTo(frameCopy);
+ drawArrows(frameCopy, pts, nextPts, status, Scalar(255, 0, 0));
+ imshow("PyrLK [Sparse]", frameCopy);
+ }
+
+ if( waitKey( 10 ) >= 0 )
+ goto _cleanup_;
+ }
+
+ waitKey(0);
+
+_cleanup_:
+ cvReleaseCapture( &capture );
+ }
+ else
+ {
+nocamera:
+ for(int i = 0; i <= LOOP_NUM;i ++)
+ {
+ cout << "loop" << i << endl;
+ if (i > 0) workBegin();
+
+ cv::goodFeaturesToTrack(frame0, pts, points, 0.01, minDist);
+
+ if (useCPU)
+ {
+ cv::calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, err);
+ }
+ else
+ {
+ oclMat d_prevPts(1, points, CV_32FC2, (void*)&pts[0]);
+
+ d_pyrLK.sparse(oclMat(frame0), oclMat(frame1), d_prevPts, d_nextPts, d_status);
+
+ download(d_prevPts, pts);
+ download(d_nextPts, nextPts);
+ download(d_status, status);
+ }
+
+ if (i > 0 && i <= LOOP_NUM)
+ workEnd();
+
+ if (i == LOOP_NUM)
+ {
+ if (useCPU)
+ cout << "average CPU time (noCamera) : ";
+ else
+ cout << "average GPU time (noCamera) : ";
+
+ cout << getTime() / LOOP_NUM << " ms" << endl;
+
+ drawArrows(frame0, pts, nextPts, status, Scalar(255, 0, 0));
+
+ imshow("PyrLK [Sparse]", frame0);
+ }
+ }
+ }
+
+ waitKey();
+
+ return 0;
+}
--- /dev/null
+#include <iostream>
+#include <string>
+#include <sstream>
+#include <iomanip>
+#include <stdexcept>
+#include "opencv2/ocl/ocl.hpp"
+#include "opencv2/highgui/highgui.hpp"
+
+using namespace cv;
+using namespace std;
+using namespace ocl;
+
+bool help_showed = false;
+
+struct Params
+{
+ Params();
+ static Params read(int argc, char** argv);
+
+ string left;
+ string right;
+
+ string method_str() const
+ {
+ switch (method)
+ {
+ case BM: return "BM";
+ case BP: return "BP";
+ case CSBP: return "CSBP";
+ }
+ return "";
+ }
+ enum {BM, BP, CSBP} method;
+ int ndisp; // Max disparity + 1
+ enum {GPU, CPU} type;
+};
+
+
+struct App
+{
+ App(const Params& p);
+ void run();
+ void handleKey(char key);
+ void printParams() const;
+
+ void workBegin() { work_begin = getTickCount(); }
+ void workEnd()
+ {
+ int64 d = getTickCount() - work_begin;
+ double f = getTickFrequency();
+ work_fps = f / d;
+ }
+
+ string text() const
+ {
+ stringstream ss;
+ ss << "(" << p.method_str() << ") FPS: " << setiosflags(ios::left)
+ << setprecision(4) << work_fps;
+ return ss.str();
+ }
+private:
+ Params p;
+ bool running;
+
+ Mat left_src, right_src;
+ Mat left, right;
+ oclMat d_left, d_right;
+
+ StereoBM_OCL bm;
+ StereoBeliefPropagation bp;
+ StereoConstantSpaceBP csbp;
+
+ int64 work_begin;
+ double work_fps;
+};
+
+static void printHelp()
+{
+ cout << "Usage: stereo_match_gpu\n"
+ << "\t--left <left_view> --right <right_view> # must be rectified\n"
+ << "\t--method <stereo_match_method> # BM | BP | CSBP\n"
+ << "\t--ndisp <number> # number of disparity levels\n"
+ << "\t--type <device_type> # cpu | CPU | gpu | GPU\n";
+ help_showed = true;
+}
+
+int main(int argc, char** argv)
+{
+ try
+ {
+ if (argc < 2)
+ {
+ printHelp();
+ return 1;
+ }
+
+ Params args = Params::read(argc, argv);
+ if (help_showed)
+ return -1;
+
+ int flags[2] = { CVCL_DEVICE_TYPE_GPU, CVCL_DEVICE_TYPE_CPU };
+ vector<Info> info;
+
+ if(getDevice(info, flags[args.type]) == 0)
+ {
+ throw runtime_error("Error: Did not find a valid OpenCL device!");
+ }
+ cout << "Device name:" << info[0].DeviceName[0] << endl;
+
+ App app(args);
+ app.run();
+ }
+ catch (const exception& e)
+ {
+ cout << "error: " << e.what() << endl;
+ }
+ return 0;
+}
+
+
+Params::Params()
+{
+ method = BM;
+ ndisp = 64;
+ type = GPU;
+}
+
+
+Params Params::read(int argc, char** argv)
+{
+ Params p;
+
+ for (int i = 1; i < argc; i++)
+ {
+ if (string(argv[i]) == "--left") p.left = argv[++i];
+ else if (string(argv[i]) == "--right") p.right = argv[++i];
+ else if (string(argv[i]) == "--method")
+ {
+ if (string(argv[i + 1]) == "BM") p.method = BM;
+ else if (string(argv[i + 1]) == "BP") p.method = BP;
+ else if (string(argv[i + 1]) == "CSBP") p.method = CSBP;
+ else throw runtime_error("unknown stereo match method: " + string(argv[i + 1]));
+ i++;
+ }
+ else if (string(argv[i]) == "--ndisp") p.ndisp = atoi(argv[++i]);
+ else if (string(argv[i]) == "--type")
+ {
+ string t(argv[++i]);
+ if (t == "cpu" || t == "CPU")
+ {
+ p.type = CPU;
+ }
+ else if (t == "gpu" || t == "GPU")
+ {
+ p.type = GPU;
+ }
+ else throw runtime_error("unknown device type: " + t);
+ }
+ else if (string(argv[i]) == "--help") printHelp();
+ else throw runtime_error("unknown key: " + string(argv[i]));
+ }
+
+ return p;
+}
+
+
+App::App(const Params& params)
+ : p(params), running(false)
+{
+ cout << "stereo_match_ocl sample\n";
+ cout << "\nControls:\n"
+ << "\tesc - exit\n"
+ << "\tp - print current parameters\n"
+ << "\tg - convert source images into gray\n"
+ << "\tm - change stereo match method\n"
+ << "\ts - change Sobel prefiltering flag (for BM only)\n"
+ << "\t1/q - increase/decrease maximum disparity\n"
+ << "\t2/w - increase/decrease window size (for BM only)\n"
+ << "\t3/e - increase/decrease iteration count (for BP and CSBP only)\n"
+ << "\t4/r - increase/decrease level count (for BP and CSBP only)\n";
+}
+
+
+void App::run()
+{
+ // Load images
+ left_src = imread(p.left);
+ right_src = imread(p.right);
+ if (left_src.empty()) throw runtime_error("can't open file \"" + p.left + "\"");
+ if (right_src.empty()) throw runtime_error("can't open file \"" + p.right + "\"");
+
+ cvtColor(left_src, left, CV_BGR2GRAY);
+ cvtColor(right_src, right, CV_BGR2GRAY);
+
+ d_left.upload(left);
+ d_right.upload(right);
+
+ imshow("left", left);
+ imshow("right", right);
+
+ // Set common parameters
+ bm.ndisp = p.ndisp;
+ bp.ndisp = p.ndisp;
+ csbp.ndisp = p.ndisp;
+
+ cout << endl;
+ printParams();
+
+ running = true;
+ while (running)
+ {
+
+ // Prepare disparity map of specified type
+ Mat disp;
+ oclMat d_disp;
+ workBegin();
+ switch (p.method)
+ {
+ case Params::BM:
+ if (d_left.channels() > 1 || d_right.channels() > 1)
+ {
+ cout << "BM doesn't support color images\n";
+ cvtColor(left_src, left, CV_BGR2GRAY);
+ cvtColor(right_src, right, CV_BGR2GRAY);
+ cout << "image_channels: " << left.channels() << endl;
+ d_left.upload(left);
+ d_right.upload(right);
+ imshow("left", left);
+ imshow("right", right);
+ }
+ bm(d_left, d_right, d_disp);
+ break;
+ case Params::BP:
+ bp(d_left, d_right, d_disp);
+ break;
+ case Params::CSBP:
+ csbp(d_left, d_right, d_disp);
+ break;
+ }
+ ocl::finish();
+ workEnd();
+
+ // Show results
+ d_disp.download(disp);
+ if (p.method != Params::BM)
+ {
+ disp.convertTo(disp, 0);
+ }
+ putText(disp, text(), Point(5, 25), FONT_HERSHEY_SIMPLEX, 1.0, Scalar::all(255));
+ imshow("disparity", disp);
+
+ handleKey((char)waitKey(3));
+ }
+}
+
+
+void App::printParams() const
+{
+ cout << "--- Parameters ---\n";
+ cout << "image_size: (" << left.cols << ", " << left.rows << ")\n";
+ cout << "image_channels: " << left.channels() << endl;
+ cout << "method: " << p.method_str() << endl
+ << "ndisp: " << p.ndisp << endl;
+ switch (p.method)
+ {
+ case Params::BM:
+ cout << "win_size: " << bm.winSize << endl;
+ cout << "prefilter_sobel: " << bm.preset << endl;
+ break;
+ case Params::BP:
+ cout << "iter_count: " << bp.iters << endl;
+ cout << "level_count: " << bp.levels << endl;
+ break;
+ case Params::CSBP:
+ cout << "iter_count: " << csbp.iters << endl;
+ cout << "level_count: " << csbp.levels << endl;
+ break;
+ }
+ cout << endl;
+}
+
+
+void App::handleKey(char key)
+{
+ switch (key)
+ {
+ case 27:
+ running = false;
+ break;
+ case 'p': case 'P':
+ printParams();
+ break;
+ case 'g': case 'G':
+ if (left.channels() == 1 && p.method != Params::BM)
+ {
+ left = left_src;
+ right = right_src;
+ }
+ else
+ {
+ cvtColor(left_src, left, CV_BGR2GRAY);
+ cvtColor(right_src, right, CV_BGR2GRAY);
+ }
+ d_left.upload(left);
+ d_right.upload(right);
+ cout << "image_channels: " << left.channels() << endl;
+ imshow("left", left);
+ imshow("right", right);
+ break;
+ case 'm': case 'M':
+ switch (p.method)
+ {
+ case Params::BM:
+ p.method = Params::BP;
+ break;
+ case Params::BP:
+ p.method = Params::CSBP;
+ break;
+ case Params::CSBP:
+ p.method = Params::BM;
+ break;
+ }
+ cout << "method: " << p.method_str() << endl;
+ break;
+ case 's': case 'S':
+ if (p.method == Params::BM)
+ {
+ switch (bm.preset)
+ {
+ case StereoBM_OCL::BASIC_PRESET:
+ bm.preset = StereoBM_OCL::PREFILTER_XSOBEL;
+ break;
+ case StereoBM_OCL::PREFILTER_XSOBEL:
+ bm.preset = StereoBM_OCL::BASIC_PRESET;
+ break;
+ }
+ cout << "prefilter_sobel: " << bm.preset << endl;
+ }
+ break;
+ case '1':
+ p.ndisp = p.ndisp == 1 ? 8 : p.ndisp + 8;
+ cout << "ndisp: " << p.ndisp << endl;
+ bm.ndisp = p.ndisp;
+ bp.ndisp = p.ndisp;
+ csbp.ndisp = p.ndisp;
+ break;
+ case 'q': case 'Q':
+ p.ndisp = max(p.ndisp - 8, 1);
+ cout << "ndisp: " << p.ndisp << endl;
+ bm.ndisp = p.ndisp;
+ bp.ndisp = p.ndisp;
+ csbp.ndisp = p.ndisp;
+ break;
+ case '2':
+ if (p.method == Params::BM)
+ {
+ bm.winSize = min(bm.winSize + 1, 51);
+ cout << "win_size: " << bm.winSize << endl;
+ }
+ break;
+ case 'w': case 'W':
+ if (p.method == Params::BM)
+ {
+ bm.winSize = max(bm.winSize - 1, 2);
+ cout << "win_size: " << bm.winSize << endl;
+ }
+ break;
+ case '3':
+ if (p.method == Params::BP)
+ {
+ bp.iters += 1;
+ cout << "iter_count: " << bp.iters << endl;
+ }
+ else if (p.method == Params::CSBP)
+ {
+ csbp.iters += 1;
+ cout << "iter_count: " << csbp.iters << endl;
+ }
+ break;
+ case 'e': case 'E':
+ if (p.method == Params::BP)
+ {
+ bp.iters = max(bp.iters - 1, 1);
+ cout << "iter_count: " << bp.iters << endl;
+ }
+ else if (p.method == Params::CSBP)
+ {
+ csbp.iters = max(csbp.iters - 1, 1);
+ cout << "iter_count: " << csbp.iters << endl;
+ }
+ break;
+ case '4':
+ if (p.method == Params::BP)
+ {
+ bp.levels += 1;
+ cout << "level_count: " << bp.levels << endl;
+ }
+ else if (p.method == Params::CSBP)
+ {
+ csbp.levels += 1;
+ cout << "level_count: " << csbp.levels << endl;
+ }
+ break;
+ case 'r': case 'R':
+ if (p.method == Params::BP)
+ {
+ bp.levels = max(bp.levels - 1, 1);
+ cout << "level_count: " << bp.levels << endl;
+ }
+ else if (p.method == Params::CSBP)
+ {
+ csbp.levels = max(csbp.levels - 1, 1);
+ cout << "level_count: " << csbp.levels << endl;
+ }
+ break;
+ }
+}
+
+
#include <iostream>
#include <stdio.h>
#include "opencv2/core/core.hpp"
-#include "opencv2/features2d/features2d.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/ocl/ocl.hpp"
-#include "opencv2/nonfree/nonfree.hpp"
#include "opencv2/nonfree/ocl.hpp"
#include "opencv2/calib3d/calib3d.hpp"
+#include "opencv2/nonfree/nonfree.hpp"
-using namespace std;
using namespace cv;
using namespace cv::ocl;
-//#define USE_CPU_DESCRIPTOR // use cpu descriptor extractor until ocl descriptor extractor is fixed
-//#define USE_CPU_BFMATCHER
+const int LOOP_NUM = 10;
+const int GOOD_PTS_MAX = 50;
+const float GOOD_PORTION = 0.15f;
+
+namespace
+{
void help();
void help()
{
- cout << "\nThis program demonstrates using SURF_OCL features detector and descriptor extractor" << endl;
- cout << "\nUsage:\n\tsurf_matcher --left <image1> --right <image2>" << endl;
+ std::cout << "\nThis program demonstrates using SURF_OCL features detector and descriptor extractor" << std::endl;
+ std::cout << "\nUsage:\n\tsurf_matcher --left <image1> --right <image2> [-c]" << std::endl;
+ std::cout << "\nExample:\n\tsurf_matcher --left box.png --right box_in_scene.png" << std::endl;
}
+int64 work_begin = 0;
+int64 work_end = 0;
-////////////////////////////////////////////////////
-// This program demonstrates the usage of SURF_OCL.
-// use cpu findHomography interface to calculate the transformation matrix
-int main(int argc, char* argv[])
+void workBegin()
+{
+ work_begin = getTickCount();
+}
+void workEnd()
{
- if (argc != 5 && argc != 1)
- {
- help();
- return -1;
- }
- vector<cv::ocl::Info> info;
- if(!cv::ocl::getDevice(info))
- {
- cout << "Error: Did not find a valid OpenCL device!" << endl;
- return -1;
- }
- Mat cpu_img1, cpu_img2, cpu_img1_grey, cpu_img2_grey;
- oclMat img1, img2;
- if(argc != 5)
+ work_end = getTickCount() - work_begin;
+}
+double getTime(){
+ return work_end /((double)cvGetTickFrequency() * 1000.);
+}
+
+template<class KPDetector>
+struct SURFDetector
+{
+ KPDetector surf;
+ SURFDetector(double hessian = 800.0)
+ :surf(hessian)
{
- cpu_img1 = imread("o.png");
- cvtColor(cpu_img1, cpu_img1_grey, CV_BGR2GRAY);
- img1 = cpu_img1_grey;
- CV_Assert(!img1.empty());
-
- cpu_img2 = imread("r2.png");
- cvtColor(cpu_img2, cpu_img2_grey, CV_BGR2GRAY);
- img2 = cpu_img2_grey;
}
- else
+ template<class T>
+ void operator()(const T& in, const T& mask, vector<cv::KeyPoint>& pts, T& descriptors, bool useProvided = false)
{
- for (int i = 1; i < argc; ++i)
- {
- if (string(argv[i]) == "--left")
- {
- cpu_img1 = imread(argv[++i]);
- cvtColor(cpu_img1, cpu_img1_grey, CV_BGR2GRAY);
- img1 = cpu_img1_grey;
- CV_Assert(!img1.empty());
- }
- else if (string(argv[i]) == "--right")
- {
- cpu_img2 = imread(argv[++i]);
- cvtColor(cpu_img2, cpu_img2_grey, CV_BGR2GRAY);
- img2 = cpu_img2_grey;
- }
- else if (string(argv[i]) == "--help")
- {
- help();
- return -1;
- }
- }
+ surf(in, mask, pts, descriptors, useProvided);
}
+};
- SURF_OCL surf;
- //surf.hessianThreshold = 400.f;
- //surf.extended = false;
-
- // detecting keypoints & computing descriptors
- oclMat keypoints1GPU, keypoints2GPU;
- oclMat descriptors1GPU, descriptors2GPU;
-
- // downloading results
- vector<KeyPoint> keypoints1, keypoints2;
- vector<DMatch> matches;
-
-
-#ifndef USE_CPU_DESCRIPTOR
- surf(img1, oclMat(), keypoints1GPU, descriptors1GPU);
- surf(img2, oclMat(), keypoints2GPU, descriptors2GPU);
-
- surf.downloadKeypoints(keypoints1GPU, keypoints1);
- surf.downloadKeypoints(keypoints2GPU, keypoints2);
-
-
-#ifdef USE_CPU_BFMATCHER
- //BFMatcher
- BFMatcher matcher(cv::NORM_L2);
- matcher.match(Mat(descriptors1GPU), Mat(descriptors2GPU), matches);
-#else
- BruteForceMatcher_OCL_base matcher(BruteForceMatcher_OCL_base::L2Dist);
- matcher.match(descriptors1GPU, descriptors2GPU, matches);
-#endif
-
-#else
- surf(img1, oclMat(), keypoints1GPU);
- surf(img2, oclMat(), keypoints2GPU);
- surf.downloadKeypoints(keypoints1GPU, keypoints1);
- surf.downloadKeypoints(keypoints2GPU, keypoints2);
-
- // use SURF_OCL to detect keypoints and use SURF to extract descriptors
- SURF surf_cpu;
- Mat descriptors1, descriptors2;
- surf_cpu(cpu_img1, Mat(), keypoints1, descriptors1, true);
- surf_cpu(cpu_img2, Mat(), keypoints2, descriptors2, true);
- matcher.match(descriptors1, descriptors2, matches);
-#endif
- cout << "OCL: FOUND " << keypoints1GPU.cols << " keypoints on first image" << endl;
- cout << "OCL: FOUND " << keypoints2GPU.cols << " keypoints on second image" << endl;
-
- double max_dist = 0; double min_dist = 100;
- //-- Quick calculation of max and min distances between keypoints
- for( size_t i = 0; i < keypoints1.size(); i++ )
+template<class KPMatcher>
+struct SURFMatcher
+{
+ KPMatcher matcher;
+ template<class T>
+ void match(const T& in1, const T& in2, vector<cv::DMatch>& matches)
{
- double dist = matches[i].distance;
- if( dist < min_dist ) min_dist = dist;
- if( dist > max_dist ) max_dist = dist;
+ matcher.match(in1, in2, matches);
}
+};
- printf("-- Max dist : %f \n", max_dist );
- printf("-- Min dist : %f \n", min_dist );
-
- //-- Draw only "good" matches (i.e. whose distance is less than 2.5*min_dist )
+Mat drawGoodMatches(
+ const Mat& cpu_img1,
+ const Mat& cpu_img2,
+ const vector<KeyPoint>& keypoints1,
+ const vector<KeyPoint>& keypoints2,
+ vector<DMatch>& matches,
+ vector<Point2f>& scene_corners_
+ )
+{
+ //-- Sort matches and preserve top 10% matches
+ std::sort(matches.begin(), matches.end());
std::vector< DMatch > good_matches;
+ double minDist = matches.front().distance,
+ maxDist = matches.back().distance;
- for( size_t i = 0; i < keypoints1.size(); i++ )
+ const int ptsPairs = std::min(GOOD_PTS_MAX, (int)(matches.size() * GOOD_PORTION));
+ for( int i = 0; i < ptsPairs; i++ )
{
- if( matches[i].distance < 3*min_dist )
- {
- good_matches.push_back( matches[i]);
- }
+ good_matches.push_back( matches[i] );
}
+ std::cout << "\nMax distance: " << maxDist << std::endl;
+ std::cout << "Min distance: " << minDist << std::endl;
+
+ std::cout << "Calculating homography using " << ptsPairs << " point pairs." << std::endl;
// drawing the results
Mat img_matches;
drawMatches( cpu_img1, keypoints1, cpu_img2, keypoints2,
good_matches, img_matches, Scalar::all(-1), Scalar::all(-1),
- vector<char>(), DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS );
+ vector<char>(), DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS );
//-- Localize the object
std::vector<Point2f> obj;
obj.push_back( keypoints1[ good_matches[i].queryIdx ].pt );
scene.push_back( keypoints2[ good_matches[i].trainIdx ].pt );
}
- Mat H = findHomography( obj, scene, CV_RANSAC );
-
//-- Get the corners from the image_1 ( the object to be "detected" )
std::vector<Point2f> obj_corners(4);
obj_corners[0] = cvPoint(0,0); obj_corners[1] = cvPoint( cpu_img1.cols, 0 );
obj_corners[2] = cvPoint( cpu_img1.cols, cpu_img1.rows ); obj_corners[3] = cvPoint( 0, cpu_img1.rows );
std::vector<Point2f> scene_corners(4);
-
+
+ Mat H = findHomography( obj, scene, CV_RANSAC );
perspectiveTransform( obj_corners, scene_corners, H);
+ scene_corners_ = scene_corners;
+
//-- Draw lines between the corners (the mapped object in the scene - image_2 )
- line( img_matches, scene_corners[0] + Point2f( (float)cpu_img1.cols, 0), scene_corners[1] + Point2f( (float)cpu_img1.cols, 0), Scalar( 0, 255, 0), 4 );
- line( img_matches, scene_corners[1] + Point2f( (float)cpu_img1.cols, 0), scene_corners[2] + Point2f( (float)cpu_img1.cols, 0), Scalar( 0, 255, 0), 4 );
- line( img_matches, scene_corners[2] + Point2f( (float)cpu_img1.cols, 0), scene_corners[3] + Point2f( (float)cpu_img1.cols, 0), Scalar( 0, 255, 0), 4 );
- line( img_matches, scene_corners[3] + Point2f( (float)cpu_img1.cols, 0), scene_corners[0] + Point2f( (float)cpu_img1.cols, 0), Scalar( 0, 255, 0), 4 );
+ line( img_matches,
+ scene_corners[0] + Point2f( (float)cpu_img1.cols, 0), scene_corners[1] + Point2f( (float)cpu_img1.cols, 0),
+ Scalar( 0, 255, 0), 2, CV_AA );
+ line( img_matches,
+ scene_corners[1] + Point2f( (float)cpu_img1.cols, 0), scene_corners[2] + Point2f( (float)cpu_img1.cols, 0),
+ Scalar( 0, 255, 0), 2, CV_AA );
+ line( img_matches,
+ scene_corners[2] + Point2f( (float)cpu_img1.cols, 0), scene_corners[3] + Point2f( (float)cpu_img1.cols, 0),
+ Scalar( 0, 255, 0), 2, CV_AA );
+ line( img_matches,
+ scene_corners[3] + Point2f( (float)cpu_img1.cols, 0), scene_corners[0] + Point2f( (float)cpu_img1.cols, 0),
+ Scalar( 0, 255, 0), 2, CV_AA );
+ return img_matches;
+}
+
+}
+////////////////////////////////////////////////////
+// This program demonstrates the usage of SURF_OCL.
+// use cpu findHomography interface to calculate the transformation matrix
+int main(int argc, char* argv[])
+{
+ vector<cv::ocl::Info> info;
+ if(cv::ocl::getDevice(info) == 0)
+ {
+ std::cout << "Error: Did not find a valid OpenCL device!" << std::endl;
+ return -1;
+ }
+ ocl::setDevice(info[0]);
+
+ Mat cpu_img1, cpu_img2, cpu_img1_grey, cpu_img2_grey;
+ oclMat img1, img2;
+ bool useCPU = false;
+ bool useGPU = false;
+ bool useALL = false;
+
+ for (int i = 1; i < argc; ++i)
+ {
+ if (string(argv[i]) == "--left")
+ {
+ cpu_img1 = imread(argv[++i]);
+ CV_Assert(!cpu_img1.empty());
+ cvtColor(cpu_img1, cpu_img1_grey, CV_BGR2GRAY);
+ img1 = cpu_img1_grey;
+ }
+ else if (string(argv[i]) == "--right")
+ {
+ cpu_img2 = imread(argv[++i]);
+ CV_Assert(!cpu_img2.empty());
+ cvtColor(cpu_img2, cpu_img2_grey, CV_BGR2GRAY);
+ img2 = cpu_img2_grey;
+ }
+ else if (string(argv[i]) == "-c")
+ {
+ useCPU = true;
+ useGPU = false;
+ useALL = false;
+ }else if(string(argv[i]) == "-g")
+ {
+ useGPU = true;
+ useCPU = false;
+ useALL = false;
+ }else if(string(argv[i]) == "-a")
+ {
+ useALL = true;
+ useCPU = false;
+ useGPU = false;
+ }
+ else if (string(argv[i]) == "--help")
+ {
+ help();
+ return -1;
+ }
+ }
+ if(!useCPU)
+ {
+ std::cout
+ << "Device name:"
+ << info[0].DeviceName[0]
+ << std::endl;
+ }
+ double surf_time = 0.;
+
+ //declare input/output
+ vector<KeyPoint> keypoints1, keypoints2;
+ vector<DMatch> matches;
+
+ vector<KeyPoint> gpu_keypoints1;
+ vector<KeyPoint> gpu_keypoints2;
+ vector<DMatch> gpu_matches;
+
+ Mat descriptors1CPU, descriptors2CPU;
+
+ oclMat keypoints1GPU, keypoints2GPU;
+ oclMat descriptors1GPU, descriptors2GPU;
+
+ //instantiate detectors/matchers
+ SURFDetector<SURF> cpp_surf;
+ SURFDetector<SURF_OCL> ocl_surf;
+
+ SURFMatcher<BFMatcher> cpp_matcher;
+ SURFMatcher<BFMatcher_OCL> ocl_matcher;
+
+ //-- start of timing section
+ if (useCPU)
+ {
+ for (int i = 0; i <= LOOP_NUM; i++)
+ {
+ if(i == 1) workBegin();
+ cpp_surf(cpu_img1_grey, Mat(), keypoints1, descriptors1CPU);
+ cpp_surf(cpu_img2_grey, Mat(), keypoints2, descriptors2CPU);
+ cpp_matcher.match(descriptors1CPU, descriptors2CPU, matches);
+ }
+ workEnd();
+ std::cout << "CPP: FOUND " << keypoints1.size() << " keypoints on first image" << std::endl;
+ std::cout << "CPP: FOUND " << keypoints2.size() << " keypoints on second image" << std::endl;
+
+ surf_time = getTime();
+ std::cout << "SURF run time: " << surf_time / LOOP_NUM << " ms" << std::endl<<"\n";
+ }
+ else if(useGPU)
+ {
+ for (int i = 0; i <= LOOP_NUM; i++)
+ {
+ if(i == 1) workBegin();
+ ocl_surf(img1, oclMat(), keypoints1, descriptors1GPU);
+ ocl_surf(img2, oclMat(), keypoints2, descriptors2GPU);
+ ocl_matcher.match(descriptors1GPU, descriptors2GPU, matches);
+ }
+ workEnd();
+ std::cout << "OCL: FOUND " << keypoints1.size() << " keypoints on first image" << std::endl;
+ std::cout << "OCL: FOUND " << keypoints2.size() << " keypoints on second image" << std::endl;
+
+ surf_time = getTime();
+ std::cout << "SURF run time: " << surf_time / LOOP_NUM << " ms" << std::endl<<"\n";
+ }else
+ {
+ //cpu runs
+ for (int i = 0; i <= LOOP_NUM; i++)
+ {
+ if(i == 1) workBegin();
+ cpp_surf(cpu_img1_grey, Mat(), keypoints1, descriptors1CPU);
+ cpp_surf(cpu_img2_grey, Mat(), keypoints2, descriptors2CPU);
+ cpp_matcher.match(descriptors1CPU, descriptors2CPU, matches);
+ }
+ workEnd();
+ std::cout << "\nCPP: FOUND " << keypoints1.size() << " keypoints on first image" << std::endl;
+ std::cout << "CPP: FOUND " << keypoints2.size() << " keypoints on second image" << std::endl;
+
+ surf_time = getTime();
+ std::cout << "(CPP)SURF run time: " << surf_time / LOOP_NUM << " ms" << std::endl;
+
+ //gpu runs
+ for (int i = 0; i <= LOOP_NUM; i++)
+ {
+ if(i == 1) workBegin();
+ ocl_surf(img1, oclMat(), gpu_keypoints1, descriptors1GPU);
+ ocl_surf(img2, oclMat(), gpu_keypoints2, descriptors2GPU);
+ ocl_matcher.match(descriptors1GPU, descriptors2GPU, gpu_matches);
+ }
+ workEnd();
+ std::cout << "\nOCL: FOUND " << keypoints1.size() << " keypoints on first image" << std::endl;
+ std::cout << "OCL: FOUND " << keypoints2.size() << " keypoints on second image" << std::endl;
+
+ surf_time = getTime();
+ std::cout << "(OCL)SURF run time: " << surf_time / LOOP_NUM << " ms" << std::endl<<"\n";
+
+ }
+
+ //--------------------------------------------------------------------------
+ std::vector<Point2f> cpu_corner;
+ Mat img_matches = drawGoodMatches(cpu_img1, cpu_img2, keypoints1, keypoints2, matches, cpu_corner);
+
+ std::vector<Point2f> gpu_corner;
+ Mat ocl_img_matches;
+ if(useALL || (!useCPU&&!useGPU))
+ {
+ ocl_img_matches = drawGoodMatches(cpu_img1, cpu_img2, gpu_keypoints1, gpu_keypoints2, gpu_matches, gpu_corner);
+
+ //check accuracy
+ std::cout<<"\nCheck accuracy:\n";
+
+ if(cpu_corner.size()!=gpu_corner.size())
+ std::cout<<"Failed\n";
+ else
+ {
+ bool result = false;
+ for(size_t i = 0; i < cpu_corner.size(); i++)
+ {
+ if((std::abs(cpu_corner[i].x - gpu_corner[i].x) > 10)
+ ||(std::abs(cpu_corner[i].y - gpu_corner[i].y) > 10))
+ {
+ std::cout<<"Failed\n";
+ result = false;
+ break;
+ }
+ result = true;
+ }
+ if(result)
+ std::cout<<"Passed\n";
+ }
+ }
//-- Show detected matches
- namedWindow("ocl surf matches", 0);
- imshow("ocl surf matches", img_matches);
- waitKey(0);
+ if (useCPU)
+ {
+ namedWindow("cpu surf matches", 0);
+ imshow("cpu surf matches", img_matches);
+ }
+ else if(useGPU)
+ {
+ namedWindow("ocl surf matches", 0);
+ imshow("ocl surf matches", img_matches);
+ }else
+ {
+ namedWindow("cpu surf matches", 0);
+ imshow("cpu surf matches", img_matches);
+ namedWindow("ocl surf matches", 0);
+ imshow("ocl surf matches", ocl_img_matches);
+ }
+ waitKey(0);
return 0;
}
--- /dev/null
+#!/usr/bin/env python
+'''
+===============================================================================
+Interactive Image Segmentation using GrabCut algorithm.
+
+This sample shows interactive image segmentation using grabcut algorithm.
+
+USAGE :
+ python grabcut.py <filename>
+
+README FIRST:
+ Two windows will show up, one for input and one for output.
+
+ At first, in input window, draw a rectangle around the object using
+mouse right button. Then press 'n' to segment the object (once or a few times)
+For any finer touch-ups, you can press any of the keys below and draw lines on
+the areas you want. Then again press 'n' for updating the output.
+
+Key '0' - To select areas of sure background
+Key '1' - To select areas of sure foreground
+Key '2' - To select areas of probable background
+Key '3' - To select areas of probable foreground
+
+Key 'n' - To update the segmentation
+Key 'r' - To reset the setup
+Key 's' - To save the results
+===============================================================================
+'''
+
+import numpy as np
+import cv2
+import sys
+
+BLUE = [255,0,0] # rectangle color
+RED = [0,0,255] # PR BG
+GREEN = [0,255,0] # PR FG
+BLACK = [0,0,0] # sure BG
+WHITE = [255,255,255] # sure FG
+
+DRAW_BG = {'color' : BLACK, 'val' : 0}
+DRAW_FG = {'color' : WHITE, 'val' : 1}
+DRAW_PR_FG = {'color' : GREEN, 'val' : 3}
+DRAW_PR_BG = {'color' : RED, 'val' : 2}
+
+# setting up flags
+rect = (0,0,1,1)
+drawing = False # flag for drawing curves
+rectangle = False # flag for drawing rect
+rect_over = False # flag to check if rect drawn
+rect_or_mask = 100 # flag for selecting rect or mask mode
+value = DRAW_FG # drawing initialized to FG
+thickness = 3 # brush thickness
+
+def onmouse(event,x,y,flags,param):
+ global img,img2,drawing,value,mask,rectangle,rect,rect_or_mask,ix,iy,rect_over
+
+ # Draw Rectangle
+ if event == cv2.EVENT_RBUTTONDOWN:
+ rectangle = True
+ ix,iy = x,y
+
+ elif event == cv2.EVENT_MOUSEMOVE:
+ if rectangle == True:
+ img = img2.copy()
+ cv2.rectangle(img,(ix,iy),(x,y),BLUE,2)
+ rect = (ix,iy,abs(ix-x),abs(iy-y))
+ rect_or_mask = 0
+
+ elif event == cv2.EVENT_RBUTTONUP:
+ rectangle = False
+ rect_over = True
+ cv2.rectangle(img,(ix,iy),(x,y),BLUE,2)
+ rect = (ix,iy,abs(ix-x),abs(iy-y))
+ rect_or_mask = 0
+ print " Now press the key 'n' a few times until no further change \n"
+
+ # draw touchup curves
+
+ if event == cv2.EVENT_LBUTTONDOWN:
+ if rect_over == False:
+ print "first draw rectangle \n"
+ else:
+ drawing = True
+ cv2.circle(img,(x,y),thickness,value['color'],-1)
+ cv2.circle(mask,(x,y),thickness,value['val'],-1)
+
+ elif event == cv2.EVENT_MOUSEMOVE:
+ if drawing == True:
+ cv2.circle(img,(x,y),thickness,value['color'],-1)
+ cv2.circle(mask,(x,y),thickness,value['val'],-1)
+
+ elif event == cv2.EVENT_LBUTTONUP:
+ if drawing == True:
+ drawing = False
+ cv2.circle(img,(x,y),thickness,value['color'],-1)
+ cv2.circle(mask,(x,y),thickness,value['val'],-1)
+
+# print documentation
+print __doc__
+
+# Loading images
+if len(sys.argv) == 2:
+ filename = sys.argv[1] # for drawing purposes
+else:
+ print "No input image given, so loading default image, lena.jpg \n"
+ print "Correct Usage : python grabcut.py <filename> \n"
+ filename = '../cpp/lena.jpg'
+
+img = cv2.imread(filename)
+img2 = img.copy() # a copy of original image
+mask = np.zeros(img.shape[:2],dtype = np.uint8) # mask initialized to PR_BG
+output = np.zeros(img.shape,np.uint8) # output image to be shown
+
+# input and output windows
+cv2.namedWindow('output')
+cv2.namedWindow('input')
+cv2.setMouseCallback('input',onmouse)
+cv2.moveWindow('input',img.shape[1]+10,90)
+
+print " Instructions : \n"
+print " Draw a rectangle around the object using right mouse button \n"
+
+while(1):
+
+ cv2.imshow('output',output)
+ cv2.imshow('input',img)
+ k = 0xFF & cv2.waitKey(1)
+
+ # key bindings
+ if k == 27: # esc to exit
+ break
+ elif k == ord('0'): # BG drawing
+ print " mark background regions with left mouse button \n"
+ value = DRAW_BG
+ elif k == ord('1'): # FG drawing
+ print " mark foreground regions with left mouse button \n"
+ value = DRAW_FG
+ elif k == ord('2'): # PR_BG drawing
+ value = DRAW_PR_BG
+ elif k == ord('3'): # PR_FG drawing
+ value = DRAW_PR_FG
+ elif k == ord('s'): # save image
+ bar = np.zeros((img.shape[0],5,3),np.uint8)
+ res = np.hstack((img2,bar,img,bar,output))
+ cv2.imwrite('grabcut_output.png',res)
+ print " Result saved as image \n"
+ elif k == ord('r'): # reset everything
+ print "resetting \n"
+ rect = (0,0,1,1)
+ drawing = False
+ rectangle = False
+ rect_or_mask = 100
+ rect_over = False
+ value = DRAW_FG
+ img = img2.copy()
+ mask = np.zeros(img.shape[:2],dtype = np.uint8) # mask initialized to PR_BG
+ output = np.zeros(img.shape,np.uint8) # output image to be shown
+ elif k == ord('n'): # segment the image
+ print """ For finer touchups, mark foreground and background after pressing keys 0-3
+ and again press 'n' \n"""
+ if (rect_or_mask == 0): # grabcut with rect
+ bgdmodel = np.zeros((1,65),np.float64)
+ fgdmodel = np.zeros((1,65),np.float64)
+ cv2.grabCut(img2,mask,rect,bgdmodel,fgdmodel,1,cv2.GC_INIT_WITH_RECT)
+ rect_or_mask = 1
+ elif rect_or_mask == 1: # grabcut with mask
+ bgdmodel = np.zeros((1,65),np.float64)
+ fgdmodel = np.zeros((1,65),np.float64)
+ cv2.grabCut(img2,mask,rect,bgdmodel,fgdmodel,1,cv2.GC_INIT_WITH_MASK)
+
+ mask2 = np.where((mask==1) + (mask==3),255,0).astype('uint8')
+ output = cv2.bitwise_and(img2,img2,mask=mask2)
+
+cv2.destroyAllWindows()