From: Maksim Shabunin Date: Tue, 15 Dec 2015 12:55:43 +0000 (+0300) Subject: HAL moved back to core X-Git-Tag: accepted/tizen/6.0/unified/20201030.111113~2073^2~2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=84f37d352f61f7a53a5eeacd781b3d1228411e64;p=platform%2Fupstream%2Fopencv.git HAL moved back to core --- diff --git a/CMakeLists.txt b/CMakeLists.txt index 51e5ae9..16619fc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -590,10 +590,30 @@ include(cmake/OpenCVFindMatlab.cmake) include(cmake/OpenCVDetectVTK.cmake) +# -- Custom HAL replacement -- +set(_includes "") +# assuming OPENCV_HAL_HEADERS and OPENCV_HAL_LIBS are lists of files: +# option example: -DOPENCV_HAL_HEADERS="/header1.h;/header2.h" if (OPENCV_HAL_HEADERS AND OPENCV_HAL_LIBS) - get_filename_component(OPENCV_HAL_HEADERS "${OPENCV_HAL_HEADERS}" ABSOLUTE) - get_filename_component(OPENCV_HAL_LIBS "${OPENCV_HAL_LIBS}" ABSOLUTE) + foreach (h ${OPENCV_HAL_HEADERS}) + get_filename_component(h "${h}" ABSOLUTE) + set(_includes "${_includes}\n#include \"${h}\"") + endforeach() + foreach (l ${OPENCV_HAL_LIBS}) + get_filename_component(l "${l}" ABSOLUTE) + set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} ${l}) + # TODO: install? + # ocv_install_target(${l} EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev) + endforeach() +else() + set(_includes "// using default HAL") + unset(OPENCV_HAL_HEADERS CACHE) + unset(OPENCV_HAL_LIBS CACHE) endif() +set(OPENCV_HAL_HEADERS "${OPENCV_HAL_HEADERS}" CACHE STRING "Headers with custom HAL implementation") +set(OPENCV_HAL_LIBS "${OPENCV_HAL_LIBS}" CACHE STRING "Libraries with custom HAL implementation") +configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/custom_hal.hpp.in" "${CMAKE_BINARY_DIR}/custom_hal.hpp" @ONLY) +unset(_includes) # ---------------------------------------------------------------------------- # Add CUDA libraries (needed for apps/tools, samples) @@ -1091,6 +1111,7 @@ endif(DEFINED WITH_VA_INTEL) status(" Use Eigen:" HAVE_EIGEN THEN "YES (ver ${EIGEN_WORLD_VERSION}.${EIGEN_MAJOR_VERSION}.${EIGEN_MINOR_VERSION})" ELSE NO) status(" Use Cuda:" HAVE_CUDA THEN "YES (ver ${CUDA_VERSION_STRING})" ELSE NO) status(" Use OpenCL:" HAVE_OPENCL THEN YES ELSE NO) +status(" Use custom HAL:" OPENCV_HAL_HEADERS AND OPENCV_HAL_LIBS THEN "YES (${OPENCV_HAL_HEADERS}; ${OPENCV_HAL_LIBS})" ELSE "NO") if(HAVE_CUDA) status("") diff --git a/cmake/OpenCVModule.cmake b/cmake/OpenCVModule.cmake index d6cdd04..5573ed4 100644 --- a/cmake/OpenCVModule.cmake +++ b/cmake/OpenCVModule.cmake @@ -655,6 +655,8 @@ macro(ocv_glob_module_sources) "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/*.hpp" "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/*.hpp" "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/*.h" + "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/hal/*.hpp" + "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/hal/*.h" ) file(GLOB lib_hdrs_detail "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/detail/*.hpp" diff --git a/cmake/templates/custom_hal.hpp.in b/cmake/templates/custom_hal.hpp.in index b298a03..f1c6515 100644 --- a/cmake/templates/custom_hal.hpp.in +++ b/cmake/templates/custom_hal.hpp.in @@ -1,6 +1,6 @@ #ifndef _CUSTOM_HAL_INCLUDED_ #define _CUSTOM_HAL_INCLUDED_ -@OPENCV_HAL_HEADERS_INCLUDES@ +@_includes@ #endif diff --git a/cmake/templates/opencv_abi.xml.in b/cmake/templates/opencv_abi.xml.in index 292d9b4..43a53c3 100644 --- a/cmake/templates/opencv_abi.xml.in +++ b/cmake/templates/opencv_abi.xml.in @@ -21,7 +21,7 @@ - opencv2/hal/intrin* + opencv2/core/hal/intrin* opencv2/core/cuda* opencv2/core/private* opencv/cxeigen.hpp diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in index 192081e..c4da726 100644 --- a/doc/Doxyfile.in +++ b/doc/Doxyfile.in @@ -243,6 +243,7 @@ PREDEFINED = __cplusplus=1 \ CV_NORETURN= \ CV_DEFAULT(x)=" = x" \ CV_NEON=1 \ + CV_SSE2=1 \ FLANN_DEPRECATED= EXPAND_AS_DEFINED = SKIP_FUNCTION_MACROS = YES diff --git a/modules/calib3d/src/stereosgbm.cpp b/modules/calib3d/src/stereosgbm.cpp index 15b35fc..1c085f9 100644 --- a/modules/calib3d/src/stereosgbm.cpp +++ b/modules/calib3d/src/stereosgbm.cpp @@ -52,7 +52,7 @@ #include "precomp.hpp" #include -#include "opencv2/hal/intrin.hpp" +#include "opencv2/core/hal/intrin.hpp" namespace cv { diff --git a/modules/core/CMakeLists.txt b/modules/core/CMakeLists.txt index 1ac4c9e..44085d4 100644 --- a/modules/core/CMakeLists.txt +++ b/modules/core/CMakeLists.txt @@ -1,6 +1,5 @@ set(the_description "The Core Functionality") ocv_add_module(core - opencv_hal PRIVATE_REQUIRED ${ZLIB_LIBRARIES} "${OPENCL_LIBRARIES}" "${VA_LIBRARIES}" OPTIONAL opencv_cudev WRAP java python) diff --git a/modules/core/include/opencv2/core.hpp b/modules/core/include/opencv2/core.hpp index 26f6205..c41868b 100644 --- a/modules/core/include/opencv2/core.hpp +++ b/modules/core/include/opencv2/core.hpp @@ -72,6 +72,7 @@ @defgroup core_cluster Clustering @defgroup core_utils Utility and system functions and macros @{ + @defgroup core_utils_sse SSE utilities @defgroup core_utils_neon NEON utilities @} @defgroup core_opengl OpenGL interoperability @@ -80,6 +81,16 @@ @defgroup core_directx DirectX interoperability @defgroup core_eigen Eigen support @defgroup core_opencl OpenCL support + @defgroup core_va_intel Intel VA-API/OpenCL (CL-VA) interoperability + @defgroup core_hal Hardware Acceleration Layer + @{ + @defgroup core_hal_functions Functions + @defgroup core_hal_interface Interface + @defgroup core_hal_intrin Universal intrinsics + @{ + @defgroup core_hal_intrin_impl Private implementation helpers + @} + @} @} */ diff --git a/modules/core/include/opencv2/core/base.hpp b/modules/core/include/opencv2/core/base.hpp index a8a0b23..ed633f5 100644 --- a/modules/core/include/opencv2/core/base.hpp +++ b/modules/core/include/opencv2/core/base.hpp @@ -50,10 +50,10 @@ #endif #include +#include #include "opencv2/core/cvdef.h" #include "opencv2/core/cvstd.hpp" -#include "opencv2/hal.hpp" namespace cv { @@ -679,8 +679,11 @@ CV_EXPORTS void setUseIPP(bool flag); //! @} core_utils + + + } // cv -#include "opencv2/hal/neon_utils.hpp" +#include "opencv2/core/neon_utils.hpp" #endif //__OPENCV_CORE_BASE_HPP__ diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h index 9947bdf..af2abfb 100644 --- a/modules/core/include/opencv2/core/cvdef.h +++ b/modules/core/include/opencv2/core/cvdef.h @@ -45,6 +45,9 @@ #ifndef __OPENCV_CORE_CVDEF_H__ #define __OPENCV_CORE_CVDEF_H__ +//! @addtogroup core_utils +//! @{ + #if !defined _CRT_SECURE_NO_DEPRECATE && defined _MSC_VER && _MSC_VER > 1300 # define _CRT_SECURE_NO_DEPRECATE /* to avoid multiple Visual Studio warnings */ #endif @@ -56,7 +59,265 @@ #undef abs #undef Complex -#include "opencv2/hal/defs.h" +#if !defined _CRT_SECURE_NO_DEPRECATE && defined _MSC_VER && _MSC_VER > 1300 +# define _CRT_SECURE_NO_DEPRECATE /* to avoid multiple Visual Studio warnings */ +#endif + +#include +#include "opencv2/core/hal/interface.h" + +#if defined __ICL +# define CV_ICC __ICL +#elif defined __ICC +# define CV_ICC __ICC +#elif defined __ECL +# define CV_ICC __ECL +#elif defined __ECC +# define CV_ICC __ECC +#elif defined __INTEL_COMPILER +# define CV_ICC __INTEL_COMPILER +#endif + +#ifndef CV_INLINE +# if defined __cplusplus +# define CV_INLINE static inline +# elif defined _MSC_VER +# define CV_INLINE __inline +# else +# define CV_INLINE static +# endif +#endif + +#if defined CV_ICC && !defined CV_ENABLE_UNROLLED +# define CV_ENABLE_UNROLLED 0 +#else +# define CV_ENABLE_UNROLLED 1 +#endif + +#ifdef __GNUC__ +# define CV_DECL_ALIGNED(x) __attribute__ ((aligned (x))) +#elif defined _MSC_VER +# define CV_DECL_ALIGNED(x) __declspec(align(x)) +#else +# define CV_DECL_ALIGNED(x) +#endif + +/* CPU features and intrinsics support */ +#define CV_CPU_NONE 0 +#define CV_CPU_MMX 1 +#define CV_CPU_SSE 2 +#define CV_CPU_SSE2 3 +#define CV_CPU_SSE3 4 +#define CV_CPU_SSSE3 5 +#define CV_CPU_SSE4_1 6 +#define CV_CPU_SSE4_2 7 +#define CV_CPU_POPCNT 8 + +#define CV_CPU_AVX 10 +#define CV_CPU_AVX2 11 +#define CV_CPU_FMA3 12 + +#define CV_CPU_AVX_512F 13 +#define CV_CPU_AVX_512BW 14 +#define CV_CPU_AVX_512CD 15 +#define CV_CPU_AVX_512DQ 16 +#define CV_CPU_AVX_512ER 17 +#define CV_CPU_AVX_512IFMA512 18 +#define CV_CPU_AVX_512PF 19 +#define CV_CPU_AVX_512VBMI 20 +#define CV_CPU_AVX_512VL 21 + +#define CV_CPU_NEON 100 + +// when adding to this list remember to update the following enum +#define CV_HARDWARE_MAX_FEATURE 255 + +/** @brief Available CPU features. +*/ +enum CpuFeatures { + CPU_MMX = 1, + CPU_SSE = 2, + CPU_SSE2 = 3, + CPU_SSE3 = 4, + CPU_SSSE3 = 5, + CPU_SSE4_1 = 6, + CPU_SSE4_2 = 7, + CPU_POPCNT = 8, + + CPU_AVX = 10, + CPU_AVX2 = 11, + CPU_FMA3 = 12, + + CPU_AVX_512F = 13, + CPU_AVX_512BW = 14, + CPU_AVX_512CD = 15, + CPU_AVX_512DQ = 16, + CPU_AVX_512ER = 17, + CPU_AVX_512IFMA512 = 18, + CPU_AVX_512PF = 19, + CPU_AVX_512VBMI = 20, + CPU_AVX_512VL = 21, + + CPU_NEON = 100 +}; + +// do not include SSE/AVX/NEON headers for NVCC compiler +#ifndef __CUDACC__ + +#if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2) +# include +# define CV_MMX 1 +# define CV_SSE 1 +# define CV_SSE2 1 +# if defined __SSE3__ || (defined _MSC_VER && _MSC_VER >= 1500) +# include +# define CV_SSE3 1 +# endif +# if defined __SSSE3__ || (defined _MSC_VER && _MSC_VER >= 1500) +# include +# define CV_SSSE3 1 +# endif +# if defined __SSE4_1__ || (defined _MSC_VER && _MSC_VER >= 1500) +# include +# define CV_SSE4_1 1 +# endif +# if defined __SSE4_2__ || (defined _MSC_VER && _MSC_VER >= 1500) +# include +# define CV_SSE4_2 1 +# endif +# if defined __POPCNT__ || (defined _MSC_VER && _MSC_VER >= 1500) +# ifdef _MSC_VER +# include +# else +# include +# endif +# define CV_POPCNT 1 +# endif +# if defined __AVX__ || (defined _MSC_VER && _MSC_VER >= 1600 && 0) +// MS Visual Studio 2010 (2012?) has no macro pre-defined to identify the use of /arch:AVX +// See: http://connect.microsoft.com/VisualStudio/feedback/details/605858/arch-avx-should-define-a-predefined-macro-in-x64-and-set-a-unique-value-for-m-ix86-fp-in-win32 +# include +# define CV_AVX 1 +# if defined(_XCR_XFEATURE_ENABLED_MASK) +# define __xgetbv() _xgetbv(_XCR_XFEATURE_ENABLED_MASK) +# else +# define __xgetbv() 0 +# endif +# endif +# if defined __AVX2__ || (defined _MSC_VER && _MSC_VER >= 1800 && 0) +# include +# define CV_AVX2 1 +# if defined __FMA__ +# define CV_FMA3 1 +# endif +# endif +#endif + +#if (defined WIN32 || defined _WIN32) && defined(_M_ARM) +# include +# include "arm_neon.h" +# define CV_NEON 1 +# define CPU_HAS_NEON_FEATURE (true) +#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__)) +# include +# define CV_NEON 1 +#endif + +#if defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__ +# define CV_VFP 1 +#endif + +#endif // __CUDACC__ + +#ifndef CV_POPCNT +#define CV_POPCNT 0 +#endif +#ifndef CV_MMX +# define CV_MMX 0 +#endif +#ifndef CV_SSE +# define CV_SSE 0 +#endif +#ifndef CV_SSE2 +# define CV_SSE2 0 +#endif +#ifndef CV_SSE3 +# define CV_SSE3 0 +#endif +#ifndef CV_SSSE3 +# define CV_SSSE3 0 +#endif +#ifndef CV_SSE4_1 +# define CV_SSE4_1 0 +#endif +#ifndef CV_SSE4_2 +# define CV_SSE4_2 0 +#endif +#ifndef CV_AVX +# define CV_AVX 0 +#endif +#ifndef CV_AVX2 +# define CV_AVX2 0 +#endif +#ifndef CV_FMA3 +# define CV_FMA3 0 +#endif +#ifndef CV_AVX_512F +# define CV_AVX_512F 0 +#endif +#ifndef CV_AVX_512BW +# define CV_AVX_512BW 0 +#endif +#ifndef CV_AVX_512CD +# define CV_AVX_512CD 0 +#endif +#ifndef CV_AVX_512DQ +# define CV_AVX_512DQ 0 +#endif +#ifndef CV_AVX_512ER +# define CV_AVX_512ER 0 +#endif +#ifndef CV_AVX_512IFMA512 +# define CV_AVX_512IFMA512 0 +#endif +#ifndef CV_AVX_512PF +# define CV_AVX_512PF 0 +#endif +#ifndef CV_AVX_512VBMI +# define CV_AVX_512VBMI 0 +#endif +#ifndef CV_AVX_512VL +# define CV_AVX_512VL 0 +#endif + +#ifndef CV_NEON +# define CV_NEON 0 +#endif + +#ifndef CV_VFP +# define CV_VFP 0 +#endif + +/* fundamental constants */ +#define CV_PI 3.1415926535897932384626433832795 +#define CV_2PI 6.283185307179586476925286766559 +#define CV_LOG2 0.69314718055994530941723212145818 + +typedef union Cv32suf +{ + int i; + unsigned u; + float f; +} +Cv32suf; + +typedef union Cv64suf +{ + int64 i; + uint64 u; + double f; +} +Cv64suf; #define OPENCV_ABI_COMPATIBILITY 300 @@ -169,12 +430,12 @@ #define CV_SUBMAT_FLAG (1 << CV_SUBMAT_FLAG_SHIFT) #define CV_IS_SUBMAT(flags) ((flags) & CV_MAT_SUBMAT_FLAG) -/* Size of each channel item, +/** Size of each channel item, 0x124489 = 1000 0100 0100 0010 0010 0001 0001 ~ array of sizeof(arr_type_elem) */ #define CV_ELEM_SIZE1(type) \ ((((sizeof(size_t)<<28)|0x8442211) >> CV_MAT_DEPTH(type)*4) & 15) -/* 0x3a50 = 11 10 10 01 01 00 00 ~ array of log2(sizeof(arr_type_elem)) */ +/** 0x3a50 = 11 10 10 01 01 00 00 ~ array of log2(sizeof(arr_type_elem)) */ #define CV_ELEM_SIZE(type) \ (CV_MAT_CN(type) << ((((sizeof(size_t)/4+1)*16384|0x3a50) >> CV_MAT_DEPTH(type)*2) & 3)) @@ -249,4 +510,6 @@ # endif #endif +//! @} + #endif // __OPENCV_CORE_CVDEF_H__ diff --git a/modules/core/include/opencv2/core/fast_math.hpp b/modules/core/include/opencv2/core/fast_math.hpp new file mode 100644 index 0000000..b8b241b --- /dev/null +++ b/modules/core/include/opencv2/core/fast_math.hpp @@ -0,0 +1,302 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Copyright (C) 2015, Itseez Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef __OPENCV_CORE_FAST_MATH_HPP__ +#define __OPENCV_CORE_FAST_MATH_HPP__ + +#include "opencv2/core/cvdef.h" + +//! @addtogroup core_utils +//! @{ + +/****************************************************************************************\ +* fast math * +\****************************************************************************************/ + +#if defined __BORLANDC__ +# include +#elif defined __cplusplus +# include +#else +# include +#endif + +#ifdef HAVE_TEGRA_OPTIMIZATION +# include "tegra_round.hpp" +#endif + +#if CV_VFP + // 1. general scheme + #define ARM_ROUND(_value, _asm_string) \ + int res; \ + float temp; \ + asm(_asm_string : [res] "=r" (res), [temp] "=w" (temp) : [value] "w" (_value)); \ + return res + // 2. version for double + #ifdef __clang__ + #define ARM_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %[value] \n vmov %[res], %[temp]") + #else + #define ARM_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %P[value] \n vmov %[res], %[temp]") + #endif + // 3. version for float + #define ARM_ROUND_FLT(value) ARM_ROUND(value, "vcvtr.s32.f32 %[temp], %[value]\n vmov %[res], %[temp]") +#endif // CV_VFP + +/** @brief Rounds floating-point number to the nearest integer + + @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the + result is not defined. + */ +CV_INLINE int +cvRound( double value ) +{ +#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \ + && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__) + __m128d t = _mm_set_sd( value ); + return _mm_cvtsd_si32(t); +#elif defined _MSC_VER && defined _M_IX86 + int t; + __asm + { + fld value; + fistp t; + } + return t; +#elif ((defined _MSC_VER && defined _M_ARM) || defined CV_ICC || \ + defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION + TEGRA_ROUND_DBL(value); +#elif defined CV_ICC || defined __GNUC__ +# if CV_VFP + ARM_ROUND_DBL(value); +# else + return (int)lrint(value); +# endif +#else + /* it's ok if round does not comply with IEEE754 standard; + the tests should allow +/-1 difference when the tested functions use round */ + return (int)(value + (value >= 0 ? 0.5 : -0.5)); +#endif +} + + +/** @brief Rounds floating-point number to the nearest integer not larger than the original. + + The function computes an integer i such that: + \f[i \le \texttt{value} < i+1\f] + @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the + result is not defined. + */ +CV_INLINE int cvFloor( double value ) +{ +#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__) + __m128d t = _mm_set_sd( value ); + int i = _mm_cvtsd_si32(t); + return i - _mm_movemask_pd(_mm_cmplt_sd(t, _mm_cvtsi32_sd(t,i))); +#elif defined __GNUC__ + int i = (int)value; + return i - (i > value); +#else + int i = cvRound(value); + float diff = (float)(value - i); + return i - (diff < 0); +#endif +} + +/** @brief Rounds floating-point number to the nearest integer not smaller than the original. + + The function computes an integer i such that: + \f[i \le \texttt{value} < i+1\f] + @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the + result is not defined. + */ +CV_INLINE int cvCeil( double value ) +{ +#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__&& !defined __APPLE__)) && !defined(__CUDACC__) + __m128d t = _mm_set_sd( value ); + int i = _mm_cvtsd_si32(t); + return i + _mm_movemask_pd(_mm_cmplt_sd(_mm_cvtsi32_sd(t,i), t)); +#elif defined __GNUC__ + int i = (int)value; + return i + (i < value); +#else + int i = cvRound(value); + float diff = (float)(i - value); + return i + (diff < 0); +#endif +} + +/** @brief Determines if the argument is Not A Number. + + @param value The input floating-point value + + The function returns 1 if the argument is Not A Number (as defined by IEEE754 standard), 0 + otherwise. */ +CV_INLINE int cvIsNaN( double value ) +{ + Cv64suf ieee754; + ieee754.f = value; + return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) + + ((unsigned)ieee754.u != 0) > 0x7ff00000; +} + +/** @brief Determines if the argument is Infinity. + + @param value The input floating-point value + + The function returns 1 if the argument is a plus or minus infinity (as defined by IEEE754 standard) + and 0 otherwise. */ +CV_INLINE int cvIsInf( double value ) +{ + Cv64suf ieee754; + ieee754.f = value; + return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) == 0x7ff00000 && + (unsigned)ieee754.u == 0; +} + +#ifdef __cplusplus + +/** @overload */ +CV_INLINE int cvRound(float value) +{ +#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ && \ + defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__) + __m128 t = _mm_set_ss( value ); + return _mm_cvtss_si32(t); +#elif defined _MSC_VER && defined _M_IX86 + int t; + __asm + { + fld value; + fistp t; + } + return t; +#elif ((defined _MSC_VER && defined _M_ARM) || defined CV_ICC || \ + defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION + TEGRA_ROUND_FLT(value); +#elif defined CV_ICC || defined __GNUC__ +# if CV_VFP + ARM_ROUND_FLT(value); +# else + return (int)lrintf(value); +# endif +#else + /* it's ok if round does not comply with IEEE754 standard; + the tests should allow +/-1 difference when the tested functions use round */ + return (int)(value + (value >= 0 ? 0.5f : -0.5f)); +#endif +} + +/** @overload */ +CV_INLINE int cvRound( int value ) +{ + return value; +} + +/** @overload */ +CV_INLINE int cvFloor( float value ) +{ +#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__) + __m128 t = _mm_set_ss( value ); + int i = _mm_cvtss_si32(t); + return i - _mm_movemask_ps(_mm_cmplt_ss(t, _mm_cvtsi32_ss(t,i))); +#elif defined __GNUC__ + int i = (int)value; + return i - (i > value); +#else + int i = cvRound(value); + float diff = (float)(value - i); + return i - (diff < 0); +#endif +} + +/** @overload */ +CV_INLINE int cvFloor( int value ) +{ + return value; +} + +/** @overload */ +CV_INLINE int cvCeil( float value ) +{ +#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__&& !defined __APPLE__)) && !defined(__CUDACC__) + __m128 t = _mm_set_ss( value ); + int i = _mm_cvtss_si32(t); + return i + _mm_movemask_ps(_mm_cmplt_ss(_mm_cvtsi32_ss(t,i), t)); +#elif defined __GNUC__ + int i = (int)value; + return i + (i < value); +#else + int i = cvRound(value); + float diff = (float)(i - value); + return i + (diff < 0); +#endif +} + +/** @overload */ +CV_INLINE int cvCeil( int value ) +{ + return value; +} + +/** @overload */ +CV_INLINE int cvIsNaN( float value ) +{ + Cv32suf ieee754; + ieee754.f = value; + return (ieee754.u & 0x7fffffff) > 0x7f800000; +} + +/** @overload */ +CV_INLINE int cvIsInf( float value ) +{ + Cv32suf ieee754; + ieee754.f = value; + return (ieee754.u & 0x7fffffff) == 0x7f800000; +} + +#endif // __cplusplus + +//! @} core_utils + +#endif diff --git a/modules/core/include/opencv2/core/hal/hal.hpp b/modules/core/include/opencv2/core/hal/hal.hpp new file mode 100644 index 0000000..118913e --- /dev/null +++ b/modules/core/include/opencv2/core/hal/hal.hpp @@ -0,0 +1,218 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Copyright (C) 2015, Itseez Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef __OPENCV_HAL_HPP__ +#define __OPENCV_HAL_HPP__ + +#include "opencv2/core/cvdef.h" +#include "opencv2/core/hal/interface.h" + +//! @cond IGNORED +#define CALL_HAL(name, fun, ...) \ + int res = fun(__VA_ARGS__); \ + if (res == CV_HAL_ERROR_OK) \ + return; \ + else if (res != CV_HAL_ERROR_NOT_IMPLEMENTED) \ + CV_Error_(cv::Error::StsInternal, \ + ("HAL implementation " CVAUX_STR(name) " ==> " CVAUX_STR(fun) " returned %d (0x%08x)", res, res)); +//! @endcond + + +namespace cv { namespace hal { + +//! @addtogroup core_hal_functions +//! @{ + +CV_EXPORTS int normHamming(const uchar* a, int n); +CV_EXPORTS int normHamming(const uchar* a, const uchar* b, int n); + +CV_EXPORTS int normHamming(const uchar* a, int n, int cellSize); +CV_EXPORTS int normHamming(const uchar* a, const uchar* b, int n, int cellSize); + +CV_EXPORTS int LU32f(float* A, size_t astep, int m, float* b, size_t bstep, int n); +CV_EXPORTS int LU64f(double* A, size_t astep, int m, double* b, size_t bstep, int n); +CV_EXPORTS bool Cholesky32f(float* A, size_t astep, int m, float* b, size_t bstep, int n); +CV_EXPORTS bool Cholesky64f(double* A, size_t astep, int m, double* b, size_t bstep, int n); + +CV_EXPORTS int normL1_(const uchar* a, const uchar* b, int n); +CV_EXPORTS float normL1_(const float* a, const float* b, int n); +CV_EXPORTS float normL2Sqr_(const float* a, const float* b, int n); + +CV_EXPORTS void exp32f(const float* src, float* dst, int n); +CV_EXPORTS void exp64f(const double* src, double* dst, int n); +CV_EXPORTS void log32f(const float* src, float* dst, int n); +CV_EXPORTS void log64f(const double* src, double* dst, int n); + +CV_EXPORTS void fastAtan2(const float* y, const float* x, float* dst, int n, bool angleInDegrees); +CV_EXPORTS void magnitude32f(const float* x, const float* y, float* dst, int n); +CV_EXPORTS void magnitude64f(const double* x, const double* y, double* dst, int n); +CV_EXPORTS void sqrt32f(const float* src, float* dst, int len); +CV_EXPORTS void sqrt64f(const double* src, double* dst, int len); +CV_EXPORTS void invSqrt32f(const float* src, float* dst, int len); +CV_EXPORTS void invSqrt64f(const double* src, double* dst, int len); + +CV_EXPORTS void split8u(const uchar* src, uchar** dst, int len, int cn ); +CV_EXPORTS void split16u(const ushort* src, ushort** dst, int len, int cn ); +CV_EXPORTS void split32s(const int* src, int** dst, int len, int cn ); +CV_EXPORTS void split64s(const int64* src, int64** dst, int len, int cn ); + +CV_EXPORTS void merge8u(const uchar** src, uchar* dst, int len, int cn ); +CV_EXPORTS void merge16u(const ushort** src, ushort* dst, int len, int cn ); +CV_EXPORTS void merge32s(const int** src, int* dst, int len, int cn ); +CV_EXPORTS void merge64s(const int64** src, int64* dst, int len, int cn ); + +CV_EXPORTS void add8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void add8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void add16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void add16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void add32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void add32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void add64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* ); + +CV_EXPORTS void sub8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void sub8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void sub16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void sub16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void sub32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void sub32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void sub64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* ); + +CV_EXPORTS void max8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void max8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void max16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void max16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void max32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void max32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void max64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* ); + +CV_EXPORTS void min8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void min8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void min16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void min16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void min32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void min32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void min64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* ); + +CV_EXPORTS void absdiff8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void absdiff8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void absdiff16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void absdiff16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void absdiff32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void absdiff32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void absdiff64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* ); + +CV_EXPORTS void and8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void or8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void xor8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void not8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* ); + +CV_EXPORTS void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop); +CV_EXPORTS void cmp8s(const schar* src1, size_t step1, const schar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop); +CV_EXPORTS void cmp16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop); +CV_EXPORTS void cmp16s(const short* src1, size_t step1, const short* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop); +CV_EXPORTS void cmp32s(const int* src1, size_t step1, const int* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop); +CV_EXPORTS void cmp32f(const float* src1, size_t step1, const float* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop); +CV_EXPORTS void cmp64f(const double* src1, size_t step1, const double* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop); + +CV_EXPORTS void mul8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void mul8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void mul16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void mul16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void mul32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void mul32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void mul64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scale); + +CV_EXPORTS void div8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void div8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void div16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void div16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void div32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void div32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void div64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scale); + +CV_EXPORTS void recip8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void recip8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void recip16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void recip16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void recip32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void recip32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void recip64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scale); + +CV_EXPORTS void addWeighted8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _scalars ); +CV_EXPORTS void addWeighted8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scalars ); +CV_EXPORTS void addWeighted16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scalars ); +CV_EXPORTS void addWeighted16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scalars ); +CV_EXPORTS void addWeighted32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scalars ); +CV_EXPORTS void addWeighted32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scalars ); +CV_EXPORTS void addWeighted64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scalars ); + +//! @} core_hal + +//============================================================================= +// for binary compatibility with 3.0 + +//! @cond IGNORED + +CV_EXPORTS int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n); +CV_EXPORTS int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n); +CV_EXPORTS bool Cholesky(float* A, size_t astep, int m, float* b, size_t bstep, int n); +CV_EXPORTS bool Cholesky(double* A, size_t astep, int m, double* b, size_t bstep, int n); + +CV_EXPORTS void exp(const float* src, float* dst, int n); +CV_EXPORTS void exp(const double* src, double* dst, int n); +CV_EXPORTS void log(const float* src, float* dst, int n); +CV_EXPORTS void log(const double* src, double* dst, int n); + +CV_EXPORTS void magnitude(const float* x, const float* y, float* dst, int n); +CV_EXPORTS void magnitude(const double* x, const double* y, double* dst, int n); +CV_EXPORTS void sqrt(const float* src, float* dst, int len); +CV_EXPORTS void sqrt(const double* src, double* dst, int len); +CV_EXPORTS void invSqrt(const float* src, float* dst, int len); +CV_EXPORTS void invSqrt(const double* src, double* dst, int len); + +//! @endcond + +}} //cv::hal + +#endif //__OPENCV_HAL_HPP__ diff --git a/modules/hal/include/opencv2/hal/interface.hpp b/modules/core/include/opencv2/core/hal/interface.h similarity index 83% rename from modules/hal/include/opencv2/hal/interface.hpp rename to modules/core/include/opencv2/core/hal/interface.h index 2a5bff0..51f7606 100644 --- a/modules/hal/include/opencv2/hal/interface.hpp +++ b/modules/core/include/opencv2/core/hal/interface.h @@ -1,8 +1,11 @@ #ifndef _HAL_INTERFACE_HPP_INCLUDED_ #define _HAL_INTERFACE_HPP_INCLUDED_ +//! @addtogroup core_hal_interface +//! @{ + #define CV_HAL_ERROR_OK 0 -#define CV_HAL_ERROR_NI 1 +#define CV_HAL_ERROR_NOT_IMPLEMENTED 1 #define CV_HAL_ERROR_UNKNOWN -1 #define CV_HAL_CMP_EQ 0 @@ -13,33 +16,6 @@ #define CV_HAL_CMP_NE 5 #ifdef __cplusplus -namespace cv { namespace hal { - -namespace Error { - -enum -{ - Ok = 0, - NotImplemented = 1, - Unknown = -1 -}; - -} - -enum -{ - CMP_EQ = 0, - CMP_GT = 1, - CMP_GE = 2, - CMP_LT = 3, - CMP_LE = 4, - CMP_NE = 5 -}; - -}} -#endif - -#ifdef __cplusplus #include #else #include @@ -88,4 +64,6 @@ typedef signed char schar; # define CV_BIG_UINT(n) n##ULL #endif +//! @} + #endif diff --git a/modules/hal/include/opencv2/hal/intrin.hpp b/modules/core/include/opencv2/core/hal/intrin.hpp similarity index 97% rename from modules/hal/include/opencv2/hal/intrin.hpp rename to modules/core/include/opencv2/core/hal/intrin.hpp index c8d59c9..33e14b4 100644 --- a/modules/hal/include/opencv2/hal/intrin.hpp +++ b/modules/core/include/opencv2/core/hal/intrin.hpp @@ -48,7 +48,7 @@ #include #include #include -#include "opencv2/hal/defs.h" +#include "opencv2/core/cvdef.h" #define OPENCV_HAL_ADD(a, b) ((a) + (b)) #define OPENCV_HAL_AND(a, b) ((a) & (b)) @@ -60,7 +60,7 @@ // access from within opencv code more accessible namespace cv { -//! @addtogroup hal_intrin +//! @addtogroup core_hal_intrin //! @{ //! @cond IGNORED @@ -290,19 +290,19 @@ template struct V_SIMD128Traits #if CV_SSE2 -#include "opencv2/hal/intrin_sse.hpp" +#include "opencv2/core/hal/intrin_sse.hpp" #elif CV_NEON -#include "opencv2/hal/intrin_neon.hpp" +#include "opencv2/core/hal/intrin_neon.hpp" #else -#include "opencv2/hal/intrin_cpp.hpp" +#include "opencv2/core/hal/intrin_cpp.hpp" #endif -//! @addtogroup hal_intrin +//! @addtogroup core_hal_intrin //! @{ #ifndef CV_SIMD128 diff --git a/modules/hal/include/opencv2/hal/intrin_cpp.hpp b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp similarity index 98% rename from modules/hal/include/opencv2/hal/intrin_cpp.hpp rename to modules/core/include/opencv2/core/hal/intrin_cpp.hpp index e1b1044..3929e0d 100644 --- a/modules/hal/include/opencv2/hal/intrin_cpp.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp @@ -47,11 +47,13 @@ #include #include +#include +#include "opencv2/core/saturate.hpp" namespace cv { -/** @addtogroup hal_intrin +/** @addtogroup core_hal_intrin "Universal intrinsics" is a types and functions set intended to simplify vectorization of code on different platforms. Currently there are two supported SIMD extensions: __SSE/SSE2__ on x86 @@ -370,7 +372,7 @@ typedef v_reg v_uint64x2; typedef v_reg v_int64x2; //! @brief Helper macro -//! @ingroup hal_intrin_impl +//! @ingroup core_hal_intrin_impl #define OPENCV_HAL_IMPL_BIN_OP(bin_op) \ template inline v_reg<_Tp, n> \ operator bin_op (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ @@ -409,7 +411,7 @@ For floating types only. */ OPENCV_HAL_IMPL_BIN_OP(/) //! @brief Helper macro -//! @ingroup hal_intrin_impl +//! @ingroup core_hal_intrin_impl #define OPENCV_HAL_IMPL_BIT_OP(bit_op) \ template inline v_reg<_Tp, n> operator bit_op \ (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ @@ -458,7 +460,7 @@ template inline v_reg<_Tp, n> operator ~ (const v_reg<_Tp, } //! @brief Helper macro -//! @ingroup hal_intrin_impl +//! @ingroup core_hal_intrin_impl #define OPENCV_HAL_IMPL_MATH_FUNC(func, cfunc, _Tp2) \ template inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a) \ { \ @@ -507,7 +509,7 @@ Only for floating point types.*/ OPENCV_HAL_IMPL_MATH_FUNC(v_trunc, int, int) //! @brief Helper macro -//! @ingroup hal_intrin_impl +//! @ingroup core_hal_intrin_impl #define OPENCV_HAL_IMPL_MINMAX_FUNC(func, cfunc) \ template inline v_reg<_Tp, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ { \ @@ -518,7 +520,7 @@ template inline v_reg<_Tp, n> func(const v_reg<_Tp, n>& a, } //! @brief Helper macro -//! @ingroup hal_intrin_impl +//! @ingroup core_hal_intrin_impl #define OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(func, cfunc) \ template inline _Tp func(const v_reg<_Tp, n>& a) \ { \ @@ -584,7 +586,7 @@ inline void v_minmax( const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, //! @endcond //! @brief Helper macro -//! @ingroup hal_intrin_impl +//! @ingroup core_hal_intrin_impl #define OPENCV_HAL_IMPL_CMP_OP(cmp_op) \ template \ inline v_reg<_Tp, n> operator cmp_op(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ @@ -627,7 +629,7 @@ For all types except 64-bit integer values. */ OPENCV_HAL_IMPL_CMP_OP(!=) //! @brief Helper macro -//! @ingroup hal_intrin_impl +//! @ingroup core_hal_intrin_impl #define OPENCV_HAL_IMPL_ADD_SUB_OP(func, bin_op, cast_op, _Tp2) \ template \ inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ @@ -821,7 +823,7 @@ template inline void v_hsum(const v_reg<_Tp, n>& a, //! @endcond //! @brief Helper macro -//! @ingroup hal_intrin_impl +//! @ingroup core_hal_intrin_impl #define OPENCV_HAL_IMPL_SHIFT_OP(shift_op) \ template inline v_reg<_Tp, n> operator shift_op(const v_reg<_Tp, n>& a, int imm) \ { \ @@ -1465,7 +1467,7 @@ inline void v_transpose4x4( v_reg<_Tp, 4>& a0, const v_reg<_Tp, 4>& a1, } //! @brief Helper macro -//! @ingroup hal_intrin_impl +//! @ingroup core_hal_intrin_impl #define OPENCV_HAL_IMPL_C_INIT_ZERO(_Tpvec, _Tp, suffix) \ inline _Tpvec v_setzero_##suffix() { return _Tpvec::zero(); } @@ -1485,7 +1487,7 @@ OPENCV_HAL_IMPL_C_INIT_ZERO(v_int64x2, int64, s64) //! @} //! @brief Helper macro -//! @ingroup hal_intrin_impl +//! @ingroup core_hal_intrin_impl #define OPENCV_HAL_IMPL_C_INIT_VAL(_Tpvec, _Tp, suffix) \ inline _Tpvec v_setall_##suffix(_Tp val) { return _Tpvec::all(val); } @@ -1505,7 +1507,7 @@ OPENCV_HAL_IMPL_C_INIT_VAL(v_int64x2, int64, s64) //! @} //! @brief Helper macro -//! @ingroup hal_intrin_impl +//! @ingroup core_hal_intrin_impl #define OPENCV_HAL_IMPL_C_REINTERPRET(_Tpvec, _Tp, suffix) \ template inline _Tpvec \ v_reinterpret_as_##suffix(const v_reg<_Tp0, n0>& a) \ @@ -1527,7 +1529,7 @@ OPENCV_HAL_IMPL_C_REINTERPRET(v_int64x2, int64, s64) //! @} //! @brief Helper macro -//! @ingroup hal_intrin_impl +//! @ingroup core_hal_intrin_impl #define OPENCV_HAL_IMPL_C_SHIFTL(_Tpvec, _Tp) \ template inline _Tpvec v_shl(const _Tpvec& a) \ { return a << n; } @@ -1544,7 +1546,7 @@ OPENCV_HAL_IMPL_C_SHIFTL(v_int64x2, int64) //! @} //! @brief Helper macro -//! @ingroup hal_intrin_impl +//! @ingroup core_hal_intrin_impl #define OPENCV_HAL_IMPL_C_SHIFTR(_Tpvec, _Tp) \ template inline _Tpvec v_shr(const _Tpvec& a) \ { return a >> n; } @@ -1561,7 +1563,7 @@ OPENCV_HAL_IMPL_C_SHIFTR(v_int64x2, int64) //! @} //! @brief Helper macro -//! @ingroup hal_intrin_impl +//! @ingroup core_hal_intrin_impl #define OPENCV_HAL_IMPL_C_RSHIFTR(_Tpvec, _Tp) \ template inline _Tpvec v_rshr(const _Tpvec& a) \ { \ @@ -1583,7 +1585,7 @@ OPENCV_HAL_IMPL_C_RSHIFTR(v_int64x2, int64) //! @} //! @brief Helper macro -//! @ingroup hal_intrin_impl +//! @ingroup core_hal_intrin_impl #define OPENCV_HAL_IMPL_C_PACK(_Tpvec, _Tpnvec, _Tpn, pack_suffix) \ inline _Tpnvec v_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \ { \ @@ -1616,7 +1618,7 @@ OPENCV_HAL_IMPL_C_PACK(v_int32x4, v_uint16x8, ushort, pack_u) //! @} //! @brief Helper macro -//! @ingroup hal_intrin_impl +//! @ingroup core_hal_intrin_impl #define OPENCV_HAL_IMPL_C_RSHR_PACK(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix) \ template inline _Tpnvec v_rshr_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \ { \ @@ -1649,7 +1651,7 @@ OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4, int, v_uint16x8, ushort, pack_u) //! @} //! @brief Helper macro -//! @ingroup hal_intrin_impl +//! @ingroup core_hal_intrin_impl #define OPENCV_HAL_IMPL_C_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix) \ inline void v_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \ { \ @@ -1677,7 +1679,7 @@ OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4, int, v_uint16x8, ushort, pack_u) //! @} //! @brief Helper macro -//! @ingroup hal_intrin_impl +//! @ingroup core_hal_intrin_impl #define OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix) \ template inline void v_rshr_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \ { \ diff --git a/modules/hal/include/opencv2/hal/intrin_neon.hpp b/modules/core/include/opencv2/core/hal/intrin_neon.hpp similarity index 99% rename from modules/hal/include/opencv2/hal/intrin_neon.hpp rename to modules/core/include/opencv2/core/hal/intrin_neon.hpp index d53971f..f3e47ca 100644 --- a/modules/hal/include/opencv2/hal/intrin_neon.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_neon.hpp @@ -45,6 +45,8 @@ #ifndef __OPENCV_HAL_INTRIN_NEON_HPP__ #define __OPENCV_HAL_INTRIN_NEON_HPP__ +#include + namespace cv { diff --git a/modules/hal/include/opencv2/hal/intrin_sse.hpp b/modules/core/include/opencv2/core/hal/intrin_sse.hpp similarity index 99% rename from modules/hal/include/opencv2/hal/intrin_sse.hpp rename to modules/core/include/opencv2/core/hal/intrin_sse.hpp index e237ccd..1840e03 100644 --- a/modules/hal/include/opencv2/hal/intrin_sse.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_sse.hpp @@ -45,6 +45,8 @@ #ifndef __OPENCV_HAL_SSE_HPP__ #define __OPENCV_HAL_SSE_HPP__ +#include + #define CV_SIMD128 1 #define CV_SIMD128_64F 1 diff --git a/modules/core/include/opencv2/core/matx.hpp b/modules/core/include/opencv2/core/matx.hpp index 9ef81e7..ca5f261 100644 --- a/modules/core/include/opencv2/core/matx.hpp +++ b/modules/core/include/opencv2/core/matx.hpp @@ -51,6 +51,7 @@ #include "opencv2/core/cvdef.h" #include "opencv2/core/base.hpp" #include "opencv2/core/traits.hpp" +#include "opencv2/core/saturate.hpp" namespace cv { diff --git a/modules/hal/include/opencv2/hal/neon_utils.hpp b/modules/core/include/opencv2/core/neon_utils.hpp similarity index 98% rename from modules/hal/include/opencv2/hal/neon_utils.hpp rename to modules/core/include/opencv2/core/neon_utils.hpp index 6026777..adb750f 100644 --- a/modules/hal/include/opencv2/hal/neon_utils.hpp +++ b/modules/core/include/opencv2/core/neon_utils.hpp @@ -42,9 +42,10 @@ #ifndef __OPENCV_HAL_NEON_UTILS_HPP__ #define __OPENCV_HAL_NEON_UTILS_HPP__ -#include "opencv2/hal/defs.h" +#include "opencv2/core/cvdef.h" -namespace cv { +//! @addtogroup core_utils_neon +//! @{ #if CV_NEON @@ -122,6 +123,6 @@ inline float32x2_t cv_vsqrt_f32(float32x2_t val) #endif -} +//! @} #endif // __OPENCV_HAL_NEON_UTILS_HPP__ diff --git a/modules/core/include/opencv2/core/saturate.hpp b/modules/core/include/opencv2/core/saturate.hpp new file mode 100644 index 0000000..1442eab --- /dev/null +++ b/modules/core/include/opencv2/core/saturate.hpp @@ -0,0 +1,150 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Copyright (C) 2014, Itseez Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef __OPENCV_CORE_SATURATE_HPP__ +#define __OPENCV_CORE_SATURATE_HPP__ + +#include "opencv2/core/cvdef.h" +#include "opencv2/core/fast_math.hpp" + +namespace cv +{ + +//! @addtogroup core_utils +//! @{ + +/////////////// saturate_cast (used in image & signal processing) /////////////////// + +/** @brief Template function for accurate conversion from one primitive type to another. + + The functions saturate_cast resemble the standard C++ cast operations, such as static_cast\() + and others. They perform an efficient and accurate conversion from one primitive type to another + (see the introduction chapter). saturate in the name means that when the input value v is out of the + range of the target type, the result is not formed just by taking low bits of the input, but instead + the value is clipped. For example: + @code + uchar a = saturate_cast(-100); // a = 0 (UCHAR_MIN) + short b = saturate_cast(33333.33333); // b = 32767 (SHRT_MAX) + @endcode + Such clipping is done when the target type is unsigned char , signed char , unsigned short or + signed short . For 32-bit integers, no clipping is done. + + When the parameter is a floating-point value and the target type is an integer (8-, 16- or 32-bit), + the floating-point value is first rounded to the nearest integer and then clipped if needed (when + the target type is 8- or 16-bit). + + This operation is used in the simplest or most complex image processing functions in OpenCV. + + @param v Function parameter. + @sa add, subtract, multiply, divide, Mat::convertTo + */ +template static inline _Tp saturate_cast(uchar v) { return _Tp(v); } +/** @overload */ +template static inline _Tp saturate_cast(schar v) { return _Tp(v); } +/** @overload */ +template static inline _Tp saturate_cast(ushort v) { return _Tp(v); } +/** @overload */ +template static inline _Tp saturate_cast(short v) { return _Tp(v); } +/** @overload */ +template static inline _Tp saturate_cast(unsigned v) { return _Tp(v); } +/** @overload */ +template static inline _Tp saturate_cast(int v) { return _Tp(v); } +/** @overload */ +template static inline _Tp saturate_cast(float v) { return _Tp(v); } +/** @overload */ +template static inline _Tp saturate_cast(double v) { return _Tp(v); } +/** @overload */ +template static inline _Tp saturate_cast(int64 v) { return _Tp(v); } +/** @overload */ +template static inline _Tp saturate_cast(uint64 v) { return _Tp(v); } + +template<> inline uchar saturate_cast(schar v) { return (uchar)std::max((int)v, 0); } +template<> inline uchar saturate_cast(ushort v) { return (uchar)std::min((unsigned)v, (unsigned)UCHAR_MAX); } +template<> inline uchar saturate_cast(int v) { return (uchar)((unsigned)v <= UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); } +template<> inline uchar saturate_cast(short v) { return saturate_cast((int)v); } +template<> inline uchar saturate_cast(unsigned v) { return (uchar)std::min(v, (unsigned)UCHAR_MAX); } +template<> inline uchar saturate_cast(float v) { int iv = cvRound(v); return saturate_cast(iv); } +template<> inline uchar saturate_cast(double v) { int iv = cvRound(v); return saturate_cast(iv); } +template<> inline uchar saturate_cast(int64 v) { return (uchar)((uint64)v <= (uint64)UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); } +template<> inline uchar saturate_cast(uint64 v) { return (uchar)std::min(v, (uint64)UCHAR_MAX); } + +template<> inline schar saturate_cast(uchar v) { return (schar)std::min((int)v, SCHAR_MAX); } +template<> inline schar saturate_cast(ushort v) { return (schar)std::min((unsigned)v, (unsigned)SCHAR_MAX); } +template<> inline schar saturate_cast(int v) { return (schar)((unsigned)(v-SCHAR_MIN) <= (unsigned)UCHAR_MAX ? v : v > 0 ? SCHAR_MAX : SCHAR_MIN); } +template<> inline schar saturate_cast(short v) { return saturate_cast((int)v); } +template<> inline schar saturate_cast(unsigned v) { return (schar)std::min(v, (unsigned)SCHAR_MAX); } +template<> inline schar saturate_cast(float v) { int iv = cvRound(v); return saturate_cast(iv); } +template<> inline schar saturate_cast(double v) { int iv = cvRound(v); return saturate_cast(iv); } +template<> inline schar saturate_cast(int64 v) { return (schar)((uint64)((int64)v-SCHAR_MIN) <= (uint64)UCHAR_MAX ? v : v > 0 ? SCHAR_MAX : SCHAR_MIN); } +template<> inline schar saturate_cast(uint64 v) { return (schar)std::min(v, (uint64)SCHAR_MAX); } + +template<> inline ushort saturate_cast(schar v) { return (ushort)std::max((int)v, 0); } +template<> inline ushort saturate_cast(short v) { return (ushort)std::max((int)v, 0); } +template<> inline ushort saturate_cast(int v) { return (ushort)((unsigned)v <= (unsigned)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0); } +template<> inline ushort saturate_cast(unsigned v) { return (ushort)std::min(v, (unsigned)USHRT_MAX); } +template<> inline ushort saturate_cast(float v) { int iv = cvRound(v); return saturate_cast(iv); } +template<> inline ushort saturate_cast(double v) { int iv = cvRound(v); return saturate_cast(iv); } +template<> inline ushort saturate_cast(int64 v) { return (ushort)((uint64)v <= (uint64)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0); } +template<> inline ushort saturate_cast(uint64 v) { return (ushort)std::min(v, (uint64)USHRT_MAX); } + +template<> inline short saturate_cast(ushort v) { return (short)std::min((int)v, SHRT_MAX); } +template<> inline short saturate_cast(int v) { return (short)((unsigned)(v - SHRT_MIN) <= (unsigned)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN); } +template<> inline short saturate_cast(unsigned v) { return (short)std::min(v, (unsigned)SHRT_MAX); } +template<> inline short saturate_cast(float v) { int iv = cvRound(v); return saturate_cast(iv); } +template<> inline short saturate_cast(double v) { int iv = cvRound(v); return saturate_cast(iv); } +template<> inline short saturate_cast(int64 v) { return (short)((uint64)((int64)v - SHRT_MIN) <= (uint64)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN); } +template<> inline short saturate_cast(uint64 v) { return (short)std::min(v, (uint64)SHRT_MAX); } + +template<> inline int saturate_cast(float v) { return cvRound(v); } +template<> inline int saturate_cast(double v) { return cvRound(v); } + +// we intentionally do not clip negative numbers, to make -1 become 0xffffffff etc. +template<> inline unsigned saturate_cast(float v) { return cvRound(v); } +template<> inline unsigned saturate_cast(double v) { return cvRound(v); } + +//! @} + +} // cv + +#endif // __OPENCV_CORE_SATURATE_HPP__ diff --git a/modules/hal/include/opencv2/hal/sse_utils.hpp b/modules/core/include/opencv2/core/sse_utils.hpp similarity index 99% rename from modules/hal/include/opencv2/hal/sse_utils.hpp rename to modules/core/include/opencv2/core/sse_utils.hpp index 9ce4098..c87b029 100644 --- a/modules/hal/include/opencv2/hal/sse_utils.hpp +++ b/modules/core/include/opencv2/core/sse_utils.hpp @@ -46,7 +46,10 @@ # error sse_utils.hpp header must be compiled as C++ #endif -#include "opencv2/hal/defs.h" +#include "opencv2/core/cvdef.h" + +//! @addtogroup core_utils_sse +//! @{ #if CV_SSE2 @@ -644,4 +647,6 @@ inline void _mm_interleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m12 #endif // CV_SSE2 +//! @} + #endif //__OPENCV_CORE_SSE_UTILS_HPP__ diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp index 06cd791..c3acca0 100644 --- a/modules/core/src/arithm.cpp +++ b/modules/core/src/arithm.cpp @@ -2258,4 +2258,1110 @@ cvMaxS( const void* srcarr1, double value, void* dstarr ) cv::max( src1, value, dst ); } + + +namespace cv { namespace hal { + +//======================================= + +#if (ARITHM_USE_IPP == 1) +static inline void fixSteps(int width, int height, size_t elemSize, size_t& step1, size_t& step2, size_t& step) +{ + if( height == 1 ) + step1 = step2 = step = width*elemSize; +} +#define CALL_IPP_BIN_E_12(fun) \ + CV_IPP_CHECK() \ + { \ + fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \ + if (0 <= fun(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height), 0)) \ + { \ + CV_IMPL_ADD(CV_IMPL_IPP); \ + return; \ + } \ + setIppErrorStatus(); \ + } + +#define CALL_IPP_BIN_E_21(fun) \ + CV_IPP_CHECK() \ + { \ + fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \ + if (0 <= fun(src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(width, height), 0)) \ + { \ + CV_IMPL_ADD(CV_IMPL_IPP); \ + return; \ + } \ + setIppErrorStatus(); \ + } + +#define CALL_IPP_BIN_12(fun) \ + CV_IPP_CHECK() \ + { \ + fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \ + if (0 <= fun(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height))) \ + { \ + CV_IMPL_ADD(CV_IMPL_IPP); \ + return; \ + } \ + setIppErrorStatus(); \ + } + +#define CALL_IPP_BIN_21(fun) \ + CV_IPP_CHECK() \ + { \ + fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \ + if (0 <= fun(src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(width, height))) \ + { \ + CV_IMPL_ADD(CV_IMPL_IPP); \ + return; \ + } \ + setIppErrorStatus(); \ + } + +#else +#define CALL_IPP_BIN_E_12(fun) +#define CALL_IPP_BIN_E_21(fun) +#define CALL_IPP_BIN_12(fun) +#define CALL_IPP_BIN_21(fun) +#endif + + +//======================================= +// Add +//======================================= + +void add8u( const uchar* src1, size_t step1, + const uchar* src2, size_t step2, + uchar* dst, size_t step, int width, int height, void* ) +{ + CALL_HAL(add8u, cv_hal_add8u, src1, step1, src2, step2, dst, step, width, height) + CALL_IPP_BIN_E_12(ippiAdd_8u_C1RSfs) + (vBinOp, IF_SIMD(VAdd)>(src1, step1, src2, step2, dst, step, width, height)); +} + +void add8s( const schar* src1, size_t step1, + const schar* src2, size_t step2, + schar* dst, size_t step, int width, int height, void* ) +{ + CALL_HAL(add8s, cv_hal_add8s, src1, step1, src2, step2, dst, step, width, height) + vBinOp, IF_SIMD(VAdd)>(src1, step1, src2, step2, dst, step, width, height); +} + +void add16u( const ushort* src1, size_t step1, + const ushort* src2, size_t step2, + ushort* dst, size_t step, int width, int height, void* ) +{ + CALL_HAL(add16u, cv_hal_add16u, src1, step1, src2, step2, dst, step, width, height) + CALL_IPP_BIN_E_12(ippiAdd_16u_C1RSfs) + (vBinOp, IF_SIMD(VAdd)>(src1, step1, src2, step2, dst, step, width, height)); +} + +void add16s( const short* src1, size_t step1, + const short* src2, size_t step2, + short* dst, size_t step, int width, int height, void* ) +{ + CALL_HAL(add16s, cv_hal_add16s, src1, step1, src2, step2, dst, step, width, height) + CALL_IPP_BIN_E_12(ippiAdd_16s_C1RSfs) + (vBinOp, IF_SIMD(VAdd)>(src1, step1, src2, step2, dst, step, width, height)); +} + +void add32s( const int* src1, size_t step1, + const int* src2, size_t step2, + int* dst, size_t step, int width, int height, void* ) +{ + CALL_HAL(add32s, cv_hal_add32s, src1, step1, src2, step2, dst, step, width, height) + vBinOp32, IF_SIMD(VAdd)>(src1, step1, src2, step2, dst, step, width, height); +} + +void add32f( const float* src1, size_t step1, + const float* src2, size_t step2, + float* dst, size_t step, int width, int height, void* ) +{ + CALL_HAL(add32f, cv_hal_add32f, src1, step1, src2, step2, dst, step, width, height) + CALL_IPP_BIN_12(ippiAdd_32f_C1R) + (vBinOp32, IF_SIMD(VAdd)>(src1, step1, src2, step2, dst, step, width, height)); +} + +void add64f( const double* src1, size_t step1, + const double* src2, size_t step2, + double* dst, size_t step, int width, int height, void* ) +{ + CALL_HAL(add64f, cv_hal_add64f, src1, step1, src2, step2, dst, step, width, height) + vBinOp64, IF_SIMD(VAdd)>(src1, step1, src2, step2, dst, step, width, height); +} + +//======================================= +// Subtract +//======================================= + +void sub8u( const uchar* src1, size_t step1, + const uchar* src2, size_t step2, + uchar* dst, size_t step, int width, int height, void* ) +{ + CALL_HAL(sub8u, cv_hal_sub8u, src1, step1, src2, step2, dst, step, width, height) + CALL_IPP_BIN_E_21(ippiSub_8u_C1RSfs) + (vBinOp, IF_SIMD(VSub)>(src1, step1, src2, step2, dst, step, width, height)); +} + +void sub8s( const schar* src1, size_t step1, + const schar* src2, size_t step2, + schar* dst, size_t step, int width, int height, void* ) +{ + CALL_HAL(sub8s, cv_hal_sub8s, src1, step1, src2, step2, dst, step, width, height) + vBinOp, IF_SIMD(VSub)>(src1, step1, src2, step2, dst, step, width, height); +} + +void sub16u( const ushort* src1, size_t step1, + const ushort* src2, size_t step2, + ushort* dst, size_t step, int width, int height, void* ) +{ + CALL_HAL(sub16u, cv_hal_sub16u, src1, step1, src2, step2, dst, step, width, height) + CALL_IPP_BIN_E_21(ippiSub_16u_C1RSfs) + (vBinOp, IF_SIMD(VSub)>(src1, step1, src2, step2, dst, step, width, height)); +} + +void sub16s( const short* src1, size_t step1, + const short* src2, size_t step2, + short* dst, size_t step, int width, int height, void* ) +{ + CALL_HAL(sub16s, cv_hal_sub16s, src1, step1, src2, step2, dst, step, width, height) + CALL_IPP_BIN_E_21(ippiSub_16s_C1RSfs) + (vBinOp, IF_SIMD(VSub)>(src1, step1, src2, step2, dst, step, width, height)); +} + +void sub32s( const int* src1, size_t step1, + const int* src2, size_t step2, + int* dst, size_t step, int width, int height, void* ) +{ + CALL_HAL(sub32s, cv_hal_sub32s, src1, step1, src2, step2, dst, step, width, height) + vBinOp32, IF_SIMD(VSub)>(src1, step1, src2, step2, dst, step, width, height); +} + +void sub32f( const float* src1, size_t step1, + const float* src2, size_t step2, + float* dst, size_t step, int width, int height, void* ) +{ + CALL_HAL(sub32f, cv_hal_sub32f, src1, step1, src2, step2, dst, step, width, height) + CALL_IPP_BIN_21(ippiSub_32f_C1R) + (vBinOp32, IF_SIMD(VSub)>(src1, step1, src2, step2, dst, step, width, height)); +} + +void sub64f( const double* src1, size_t step1, + const double* src2, size_t step2, + double* dst, size_t step, int width, int height, void* ) +{ + CALL_HAL(sub64f, cv_hal_sub64f, src1, step1, src2, step2, dst, step, width, height) + vBinOp64, IF_SIMD(VSub)>(src1, step1, src2, step2, dst, step, width, height); +} + +//======================================= + +#if (ARITHM_USE_IPP == 1) +#define CALL_IPP_MIN_MAX(fun, type) \ + CV_IPP_CHECK() \ + { \ + type* s1 = (type*)src1; \ + type* s2 = (type*)src2; \ + type* d = dst; \ + fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \ + int i = 0; \ + for(; i < height; i++) \ + { \ + if (0 > fun(s1, s2, d, width)) \ + break; \ + s1 = (type*)((uchar*)s1 + step1); \ + s2 = (type*)((uchar*)s2 + step2); \ + d = (type*)((uchar*)d + step); \ + } \ + if (i == height) \ + { \ + CV_IMPL_ADD(CV_IMPL_IPP); \ + return; \ + } \ + setIppErrorStatus(); \ + } +#else +#define CALL_IPP_MIN_MAX(fun, type) +#endif + +//======================================= +// Max +//======================================= + +void max8u( const uchar* src1, size_t step1, + const uchar* src2, size_t step2, + uchar* dst, size_t step, int width, int height, void* ) +{ + CALL_HAL(max8u, cv_hal_max8u, src1, step1, src2, step2, dst, step, width, height) + CALL_IPP_MIN_MAX(ippsMaxEvery_8u, uchar) + vBinOp, IF_SIMD(VMax)>(src1, step1, src2, step2, dst, step, width, height); +} + +void max8s( const schar* src1, size_t step1, + const schar* src2, size_t step2, + schar* dst, size_t step, int width, int height, void* ) +{ + CALL_HAL(max8s, cv_hal_max8s, src1, step1, src2, step2, dst, step, width, height) + vBinOp, IF_SIMD(VMax)>(src1, step1, src2, step2, dst, step, width, height); +} + +void max16u( const ushort* src1, size_t step1, + const ushort* src2, size_t step2, + ushort* dst, size_t step, int width, int height, void* ) +{ + CALL_HAL(max16u, cv_hal_max16u, src1, step1, src2, step2, dst, step, width, height) + CALL_IPP_MIN_MAX(ippsMaxEvery_16u, ushort) + vBinOp, IF_SIMD(VMax)>(src1, step1, src2, step2, dst, step, width, height); +} + +void max16s( const short* src1, size_t step1, + const short* src2, size_t step2, + short* dst, size_t step, int width, int height, void* ) +{ + CALL_HAL(max16s, cv_hal_max16s, src1, step1, src2, step2, dst, step, width, height) + vBinOp, IF_SIMD(VMax)>(src1, step1, src2, step2, dst, step, width, height); +} + +void max32s( const int* src1, size_t step1, + const int* src2, size_t step2, + int* dst, size_t step, int width, int height, void* ) +{ + CALL_HAL(max32s, cv_hal_max32s, src1, step1, src2, step2, dst, step, width, height) + vBinOp32, IF_SIMD(VMax)>(src1, step1, src2, step2, dst, step, width, height); +} + +void max32f( const float* src1, size_t step1, + const float* src2, size_t step2, + float* dst, size_t step, int width, int height, void* ) +{ + CALL_HAL(max32f, cv_hal_max32f, src1, step1, src2, step2, dst, step, width, height) + CALL_IPP_MIN_MAX(ippsMaxEvery_32f, float) + vBinOp32, IF_SIMD(VMax)>(src1, step1, src2, step2, dst, step, width, height); +} + +void max64f( const double* src1, size_t step1, + const double* src2, size_t step2, + double* dst, size_t step, int width, int height, void* ) +{ + CALL_HAL(max64f, cv_hal_max64f, src1, step1, src2, step2, dst, step, width, height) + CALL_IPP_MIN_MAX(ippsMaxEvery_64f, double) + vBinOp64, IF_SIMD(VMax)>(src1, step1, src2, step2, dst, step, width, height); +} + +//======================================= +// Min +//======================================= + +void min8u( const uchar* src1, size_t step1, + const uchar* src2, size_t step2, + uchar* dst, size_t step, int width, int height, void* ) +{ + CALL_HAL(min8u, cv_hal_min8u, src1, step1, src2, step2, dst, step, width, height) + CALL_IPP_MIN_MAX(ippsMinEvery_8u, uchar) + vBinOp, IF_SIMD(VMin)>(src1, step1, src2, step2, dst, step, width, height); +} + +void min8s( const schar* src1, size_t step1, + const schar* src2, size_t step2, + schar* dst, size_t step, int width, int height, void* ) +{ + CALL_HAL(min8s, cv_hal_min8s, src1, step1, src2, step2, dst, step, width, height) + vBinOp, IF_SIMD(VMin)>(src1, step1, src2, step2, dst, step, width, height); +} + +void min16u( const ushort* src1, size_t step1, + const ushort* src2, size_t step2, + ushort* dst, size_t step, int width, int height, void* ) +{ + CALL_HAL(min16u, cv_hal_min16u, src1, step1, src2, step2, dst, step, width, height) + CALL_IPP_MIN_MAX(ippsMinEvery_16u, ushort) + vBinOp, IF_SIMD(VMin)>(src1, step1, src2, step2, dst, step, width, height); +} + +void min16s( const short* src1, size_t step1, + const short* src2, size_t step2, + short* dst, size_t step, int width, int height, void* ) +{ + CALL_HAL(min16s, cv_hal_min16s, src1, step1, src2, step2, dst, step, width, height) + vBinOp, IF_SIMD(VMin)>(src1, step1, src2, step2, dst, step, width, height); +} + +void min32s( const int* src1, size_t step1, + const int* src2, size_t step2, + int* dst, size_t step, int width, int height, void* ) +{ + CALL_HAL(min32s, cv_hal_min32s, src1, step1, src2, step2, dst, step, width, height) + vBinOp32, IF_SIMD(VMin)>(src1, step1, src2, step2, dst, step, width, height); +} + +void min32f( const float* src1, size_t step1, + const float* src2, size_t step2, + float* dst, size_t step, int width, int height, void* ) +{ + CALL_HAL(min32f, cv_hal_min32f, src1, step1, src2, step2, dst, step, width, height) + CALL_IPP_MIN_MAX(ippsMinEvery_32f, float) + vBinOp32, IF_SIMD(VMin)>(src1, step1, src2, step2, dst, step, width, height); +} + +void min64f( const double* src1, size_t step1, + const double* src2, size_t step2, + double* dst, size_t step, int width, int height, void* ) +{ + CALL_HAL(min64f, cv_hal_min64f, src1, step1, src2, step2, dst, step, width, height) + CALL_IPP_MIN_MAX(ippsMinEvery_64f, double) + vBinOp64, IF_SIMD(VMin)>(src1, step1, src2, step2, dst, step, width, height); +} + +//======================================= +// AbsDiff +//======================================= + +void absdiff8u( const uchar* src1, size_t step1, + const uchar* src2, size_t step2, + uchar* dst, size_t step, int width, int height, void* ) +{ + CALL_HAL(absdiff8u, cv_hal_absdiff8u, src1, step1, src2, step2, dst, step, width, height) + CALL_IPP_BIN_12(ippiAbsDiff_8u_C1R) + (vBinOp, IF_SIMD(VAbsDiff)>(src1, step1, src2, step2, dst, step, width, height)); +} + +void absdiff8s( const schar* src1, size_t step1, + const schar* src2, size_t step2, + schar* dst, size_t step, int width, int height, void* ) +{ + CALL_HAL(absdiff8s, cv_hal_absdiff8s, src1, step1, src2, step2, dst, step, width, height) + vBinOp, IF_SIMD(VAbsDiff)>(src1, step1, src2, step2, dst, step, width, height); +} + +void absdiff16u( const ushort* src1, size_t step1, + const ushort* src2, size_t step2, + ushort* dst, size_t step, int width, int height, void* ) +{ + CALL_HAL(absdiff16u, cv_hal_absdiff16u, src1, step1, src2, step2, dst, step, width, height) + CALL_IPP_BIN_12(ippiAbsDiff_16u_C1R) + (vBinOp, IF_SIMD(VAbsDiff)>(src1, step1, src2, step2, dst, step, width, height)); +} + +void absdiff16s( const short* src1, size_t step1, + const short* src2, size_t step2, + short* dst, size_t step, int width, int height, void* ) +{ + CALL_HAL(absdiff16s, cv_hal_absdiff16s, src1, step1, src2, step2, dst, step, width, height) + vBinOp, IF_SIMD(VAbsDiff)>(src1, step1, src2, step2, dst, step, width, height); +} + +void absdiff32s( const int* src1, size_t step1, + const int* src2, size_t step2, + int* dst, size_t step, int width, int height, void* ) +{ + CALL_HAL(absdiff32s, cv_hal_absdiff32s, src1, step1, src2, step2, dst, step, width, height) + vBinOp32, IF_SIMD(VAbsDiff)>(src1, step1, src2, step2, dst, step, width, height); +} + +void absdiff32f( const float* src1, size_t step1, + const float* src2, size_t step2, + float* dst, size_t step, int width, int height, void* ) +{ + CALL_HAL(absdiff32f, cv_hal_absdiff32f, src1, step1, src2, step2, dst, step, width, height) + CALL_IPP_BIN_12(ippiAbsDiff_32f_C1R) + (vBinOp32, IF_SIMD(VAbsDiff)>(src1, step1, src2, step2, dst, step, width, height)); +} + +void absdiff64f( const double* src1, size_t step1, + const double* src2, size_t step2, + double* dst, size_t step, int width, int height, void* ) +{ + CALL_HAL(absdiff64f, cv_hal_absdiff64f, src1, step1, src2, step2, dst, step, width, height) + vBinOp64, IF_SIMD(VAbsDiff)>(src1, step1, src2, step2, dst, step, width, height); +} + +//======================================= +// Logical +//======================================= + +#if (ARITHM_USE_IPP == 1) +#define CALL_IPP_UN(fun) \ + CV_IPP_CHECK() \ + { \ + fixSteps(width, height, sizeof(dst[0]), step1, step2, step); (void)src2; \ + if (0 <= fun(src1, (int)step1, dst, (int)step, ippiSize(width, height))) \ + { \ + CV_IMPL_ADD(CV_IMPL_IPP); \ + return; \ + } \ + setIppErrorStatus(); \ + } +#else +#define CALL_IPP_UN(fun) +#endif + +void and8u( const uchar* src1, size_t step1, + const uchar* src2, size_t step2, + uchar* dst, size_t step, int width, int height, void* ) +{ + CALL_HAL(and8u, cv_hal_and8u, src1, step1, src2, step2, dst, step, width, height) + CALL_IPP_BIN_12(ippiAnd_8u_C1R) + (vBinOp, IF_SIMD(VAnd)>(src1, step1, src2, step2, dst, step, width, height)); +} + +void or8u( const uchar* src1, size_t step1, + const uchar* src2, size_t step2, + uchar* dst, size_t step, int width, int height, void* ) +{ + CALL_HAL(or8u, cv_hal_or8u, src1, step1, src2, step2, dst, step, width, height) + CALL_IPP_BIN_12(ippiOr_8u_C1R) + (vBinOp, IF_SIMD(VOr)>(src1, step1, src2, step2, dst, step, width, height)); +} + +void xor8u( const uchar* src1, size_t step1, + const uchar* src2, size_t step2, + uchar* dst, size_t step, int width, int height, void* ) +{ + CALL_HAL(xor8u, cv_hal_xor8u, src1, step1, src2, step2, dst, step, width, height) + CALL_IPP_BIN_12(ippiXor_8u_C1R) + (vBinOp, IF_SIMD(VXor)>(src1, step1, src2, step2, dst, step, width, height)); +} + +void not8u( const uchar* src1, size_t step1, + const uchar* src2, size_t step2, + uchar* dst, size_t step, int width, int height, void* ) +{ + CALL_HAL(not8u, cv_hal_not8u, src1, step1, dst, step, width, height) + CALL_IPP_UN(ippiNot_8u_C1R) + (vBinOp, IF_SIMD(VNot)>(src1, step1, src2, step2, dst, step, width, height)); +} + +//======================================= + +#if ARITHM_USE_IPP +inline static IppCmpOp convert_cmp(int _cmpop) +{ + return _cmpop == CMP_EQ ? ippCmpEq : + _cmpop == CMP_GT ? ippCmpGreater : + _cmpop == CMP_GE ? ippCmpGreaterEq : + _cmpop == CMP_LT ? ippCmpLess : + _cmpop == CMP_LE ? ippCmpLessEq : + (IppCmpOp)-1; +} +#define CALL_IPP_CMP(fun) \ + CV_IPP_CHECK() \ + { \ + IppCmpOp op = convert_cmp(*(int *)_cmpop); \ + if( op >= 0 ) \ + { \ + fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \ + if (0 <= fun(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height), op)) \ + { \ + CV_IMPL_ADD(CV_IMPL_IPP); \ + return; \ + } \ + setIppErrorStatus(); \ + } \ + } +#else +#define CALL_IPP_CMP(fun) +#endif + +//======================================= +// Compare +//======================================= + +void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, + uchar* dst, size_t step, int width, int height, void* _cmpop) +{ + CALL_HAL(cmp8u, cv_hal_cmp8u, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop) + CALL_IPP_CMP(ippiCompare_8u_C1R) + //vz optimized cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop); + int code = *(int*)_cmpop; + step1 /= sizeof(src1[0]); + step2 /= sizeof(src2[0]); + if( code == CMP_GE || code == CMP_LT ) + { + std::swap(src1, src2); + std::swap(step1, step2); + code = code == CMP_GE ? CMP_LE : CMP_GT; + } + + if( code == CMP_GT || code == CMP_LE ) + { + int m = code == CMP_GT ? 0 : 255; + for( ; height--; src1 += step1, src2 += step2, dst += step ) + { + int x =0; + #if CV_SSE2 + if( USE_SSE2 ) + { + __m128i m128 = code == CMP_GT ? _mm_setzero_si128() : _mm_set1_epi8 (-1); + __m128i c128 = _mm_set1_epi8 (-128); + for( ; x <= width - 16; x += 16 ) + { + __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x)); + __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x)); + // no simd for 8u comparison, that's why we need the trick + r00 = _mm_sub_epi8(r00,c128); + r10 = _mm_sub_epi8(r10,c128); + + r00 =_mm_xor_si128(_mm_cmpgt_epi8(r00, r10), m128); + _mm_storeu_si128((__m128i*)(dst + x),r00); + + } + } + #elif CV_NEON + uint8x16_t mask = code == CMP_GT ? vdupq_n_u8(0) : vdupq_n_u8(255); + + for( ; x <= width - 16; x += 16 ) + { + vst1q_u8(dst+x, veorq_u8(vcgtq_u8(vld1q_u8(src1+x), vld1q_u8(src2+x)), mask)); + } + + #endif + + for( ; x < width; x++ ){ + dst[x] = (uchar)(-(src1[x] > src2[x]) ^ m); + } + } + } + else if( code == CMP_EQ || code == CMP_NE ) + { + int m = code == CMP_EQ ? 0 : 255; + for( ; height--; src1 += step1, src2 += step2, dst += step ) + { + int x = 0; + #if CV_SSE2 + if( USE_SSE2 ) + { + __m128i m128 = code == CMP_EQ ? _mm_setzero_si128() : _mm_set1_epi8 (-1); + for( ; x <= width - 16; x += 16 ) + { + __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x)); + __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x)); + r00 = _mm_xor_si128 ( _mm_cmpeq_epi8 (r00, r10), m128); + _mm_storeu_si128((__m128i*)(dst + x), r00); + } + } + #elif CV_NEON + uint8x16_t mask = code == CMP_EQ ? vdupq_n_u8(0) : vdupq_n_u8(255); + + for( ; x <= width - 16; x += 16 ) + { + vst1q_u8(dst+x, veorq_u8(vceqq_u8(vld1q_u8(src1+x), vld1q_u8(src2+x)), mask)); + } + #endif + for( ; x < width; x++ ) + dst[x] = (uchar)(-(src1[x] == src2[x]) ^ m); + } + } +} + +void cmp8s(const schar* src1, size_t step1, const schar* src2, size_t step2, + uchar* dst, size_t step, int width, int height, void* _cmpop) +{ + CALL_HAL(cmp8s, cv_hal_cmp8s, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop) + cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop); +} + +void cmp16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2, + uchar* dst, size_t step, int width, int height, void* _cmpop) +{ + CALL_HAL(cmp16u, cv_hal_cmp16u, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop) + CALL_IPP_CMP(ippiCompare_16u_C1R) + cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop); +} + +void cmp16s(const short* src1, size_t step1, const short* src2, size_t step2, + uchar* dst, size_t step, int width, int height, void* _cmpop) +{ + CALL_HAL(cmp16s, cv_hal_cmp16s, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop) + CALL_IPP_CMP(ippiCompare_16s_C1R) + //vz optimized cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop); + + int code = *(int*)_cmpop; + step1 /= sizeof(src1[0]); + step2 /= sizeof(src2[0]); + if( code == CMP_GE || code == CMP_LT ) + { + std::swap(src1, src2); + std::swap(step1, step2); + code = code == CMP_GE ? CMP_LE : CMP_GT; + } + + if( code == CMP_GT || code == CMP_LE ) + { + int m = code == CMP_GT ? 0 : 255; + for( ; height--; src1 += step1, src2 += step2, dst += step ) + { + int x =0; + #if CV_SSE2 + if( USE_SSE2) + { + __m128i m128 = code == CMP_GT ? _mm_setzero_si128() : _mm_set1_epi16 (-1); + for( ; x <= width - 16; x += 16 ) + { + __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x)); + __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x)); + r00 = _mm_xor_si128 ( _mm_cmpgt_epi16 (r00, r10), m128); + __m128i r01 = _mm_loadu_si128((const __m128i*)(src1 + x + 8)); + __m128i r11 = _mm_loadu_si128((const __m128i*)(src2 + x + 8)); + r01 = _mm_xor_si128 ( _mm_cmpgt_epi16 (r01, r11), m128); + r11 = _mm_packs_epi16(r00, r01); + _mm_storeu_si128((__m128i*)(dst + x), r11); + } + if( x <= width-8) + { + __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x)); + __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x)); + r00 = _mm_xor_si128 ( _mm_cmpgt_epi16 (r00, r10), m128); + r10 = _mm_packs_epi16(r00, r00); + _mm_storel_epi64((__m128i*)(dst + x), r10); + + x += 8; + } + } + #elif CV_NEON + uint8x16_t mask = code == CMP_GT ? vdupq_n_u8(0) : vdupq_n_u8(255); + + for( ; x <= width - 16; x += 16 ) + { + int16x8_t in1 = vld1q_s16(src1 + x); + int16x8_t in2 = vld1q_s16(src2 + x); + uint8x8_t t1 = vmovn_u16(vcgtq_s16(in1, in2)); + + in1 = vld1q_s16(src1 + x + 8); + in2 = vld1q_s16(src2 + x + 8); + uint8x8_t t2 = vmovn_u16(vcgtq_s16(in1, in2)); + + vst1q_u8(dst+x, veorq_u8(vcombine_u8(t1, t2), mask)); + } + #endif + + for( ; x < width; x++ ){ + dst[x] = (uchar)(-(src1[x] > src2[x]) ^ m); + } + } + } + else if( code == CMP_EQ || code == CMP_NE ) + { + int m = code == CMP_EQ ? 0 : 255; + for( ; height--; src1 += step1, src2 += step2, dst += step ) + { + int x = 0; + #if CV_SSE2 + if( USE_SSE2 ) + { + __m128i m128 = code == CMP_EQ ? _mm_setzero_si128() : _mm_set1_epi16 (-1); + for( ; x <= width - 16; x += 16 ) + { + __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x)); + __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x)); + r00 = _mm_xor_si128 ( _mm_cmpeq_epi16 (r00, r10), m128); + __m128i r01 = _mm_loadu_si128((const __m128i*)(src1 + x + 8)); + __m128i r11 = _mm_loadu_si128((const __m128i*)(src2 + x + 8)); + r01 = _mm_xor_si128 ( _mm_cmpeq_epi16 (r01, r11), m128); + r11 = _mm_packs_epi16(r00, r01); + _mm_storeu_si128((__m128i*)(dst + x), r11); + } + if( x <= width - 8) + { + __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x)); + __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x)); + r00 = _mm_xor_si128 ( _mm_cmpeq_epi16 (r00, r10), m128); + r10 = _mm_packs_epi16(r00, r00); + _mm_storel_epi64((__m128i*)(dst + x), r10); + + x += 8; + } + } + #elif CV_NEON + uint8x16_t mask = code == CMP_EQ ? vdupq_n_u8(0) : vdupq_n_u8(255); + + for( ; x <= width - 16; x += 16 ) + { + int16x8_t in1 = vld1q_s16(src1 + x); + int16x8_t in2 = vld1q_s16(src2 + x); + uint8x8_t t1 = vmovn_u16(vceqq_s16(in1, in2)); + + in1 = vld1q_s16(src1 + x + 8); + in2 = vld1q_s16(src2 + x + 8); + uint8x8_t t2 = vmovn_u16(vceqq_s16(in1, in2)); + + vst1q_u8(dst+x, veorq_u8(vcombine_u8(t1, t2), mask)); + } + #endif + for( ; x < width; x++ ) + dst[x] = (uchar)(-(src1[x] == src2[x]) ^ m); + } + } +} + +void cmp32s(const int* src1, size_t step1, const int* src2, size_t step2, + uchar* dst, size_t step, int width, int height, void* _cmpop) +{ + CALL_HAL(cmp32s, cv_hal_cmp32s, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop) + cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop); +} + +void cmp32f(const float* src1, size_t step1, const float* src2, size_t step2, + uchar* dst, size_t step, int width, int height, void* _cmpop) +{ + CALL_HAL(cmp32f, cv_hal_cmp32f, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop) + CALL_IPP_CMP(ippiCompare_32f_C1R) + cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop); +} + +void cmp64f(const double* src1, size_t step1, const double* src2, size_t step2, + uchar* dst, size_t step, int width, int height, void* _cmpop) +{ + CALL_HAL(cmp64f, cv_hal_cmp64f, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop) + cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop); +} + +//======================================= + +#if defined HAVE_IPP +#define CALL_IPP_MUL(fun) \ + CV_IPP_CHECK() \ + { \ + if (std::fabs(fscale - 1) <= FLT_EPSILON) \ + { \ + if (fun(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height), 0) >= 0) \ + { \ + CV_IMPL_ADD(CV_IMPL_IPP); \ + return; \ + } \ + setIppErrorStatus(); \ + } \ + } + +#define CALL_IPP_MUL_2(fun) \ + CV_IPP_CHECK() \ + { \ + if (std::fabs(fscale - 1) <= FLT_EPSILON) \ + { \ + if (fun(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height)) >= 0) \ + { \ + CV_IMPL_ADD(CV_IMPL_IPP); \ + return; \ + } \ + setIppErrorStatus(); \ + } \ + } + +#else +#define CALL_IPP_MUL(fun) +#define CALL_IPP_MUL_2(fun) +#endif + +//======================================= +// Multilpy +//======================================= + +void mul8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, + uchar* dst, size_t step, int width, int height, void* scale) +{ + CALL_HAL(mul8u, cv_hal_mul8u, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) + float fscale = (float)*(const double*)scale; + CALL_IPP_MUL(ippiMul_8u_C1RSfs) + mul_(src1, step1, src2, step2, dst, step, width, height, fscale); +} + +void mul8s( const schar* src1, size_t step1, const schar* src2, size_t step2, + schar* dst, size_t step, int width, int height, void* scale) +{ + CALL_HAL(mul8s, cv_hal_mul8s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) + mul_(src1, step1, src2, step2, dst, step, width, height, (float)*(const double*)scale); +} + +void mul16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, + ushort* dst, size_t step, int width, int height, void* scale) +{ + CALL_HAL(mul16u, cv_hal_mul16u, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) + float fscale = (float)*(const double*)scale; + CALL_IPP_MUL(ippiMul_16u_C1RSfs) + mul_(src1, step1, src2, step2, dst, step, width, height, fscale); +} + +void mul16s( const short* src1, size_t step1, const short* src2, size_t step2, + short* dst, size_t step, int width, int height, void* scale) +{ + CALL_HAL(mul16s, cv_hal_mul16s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) + float fscale = (float)*(const double*)scale; + CALL_IPP_MUL(ippiMul_16s_C1RSfs) + mul_(src1, step1, src2, step2, dst, step, width, height, fscale); +} + +void mul32s( const int* src1, size_t step1, const int* src2, size_t step2, + int* dst, size_t step, int width, int height, void* scale) +{ + CALL_HAL(mul32s, cv_hal_mul32s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) + mul_(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); +} + +void mul32f( const float* src1, size_t step1, const float* src2, size_t step2, + float* dst, size_t step, int width, int height, void* scale) +{ + CALL_HAL(mul32f, cv_hal_mul32f, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) + float fscale = (float)*(const double*)scale; + CALL_IPP_MUL_2(ippiMul_32f_C1R) + mul_(src1, step1, src2, step2, dst, step, width, height, fscale); +} + +void mul64f( const double* src1, size_t step1, const double* src2, size_t step2, + double* dst, size_t step, int width, int height, void* scale) +{ + CALL_HAL(mul64f, cv_hal_mul64f, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) + mul_(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); +} + +//======================================= +// Divide +//======================================= + +void div8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, + uchar* dst, size_t step, int width, int height, void* scale) +{ + CALL_HAL(div8u, cv_hal_div8u, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) + if( src1 ) + div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); + else + recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); +} + +void div8s( const schar* src1, size_t step1, const schar* src2, size_t step2, + schar* dst, size_t step, int width, int height, void* scale) +{ + CALL_HAL(div8s, cv_hal_div8s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) + div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); +} + +void div16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, + ushort* dst, size_t step, int width, int height, void* scale) +{ + CALL_HAL(div16u, cv_hal_div16u, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) + div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); +} + +void div16s( const short* src1, size_t step1, const short* src2, size_t step2, + short* dst, size_t step, int width, int height, void* scale) +{ + CALL_HAL(div16s, cv_hal_div16s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) + div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); +} + +void div32s( const int* src1, size_t step1, const int* src2, size_t step2, + int* dst, size_t step, int width, int height, void* scale) +{ + CALL_HAL(div32s, cv_hal_div32s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) + div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); +} + +void div32f( const float* src1, size_t step1, const float* src2, size_t step2, + float* dst, size_t step, int width, int height, void* scale) +{ + CALL_HAL(div32f, cv_hal_div32f, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) + div_f(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); +} + +void div64f( const double* src1, size_t step1, const double* src2, size_t step2, + double* dst, size_t step, int width, int height, void* scale) +{ + CALL_HAL(div64f, cv_hal_div64f, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) + div_f(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); +} + +//======================================= +// Reciprocial +//======================================= + +void recip8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, + uchar* dst, size_t step, int width, int height, void* scale) +{ + CALL_HAL(recip8u, cv_hal_recip8u, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) + recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); +} + +void recip8s( const schar* src1, size_t step1, const schar* src2, size_t step2, + schar* dst, size_t step, int width, int height, void* scale) +{ + CALL_HAL(recip8s, cv_hal_recip8s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) + recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); +} + +void recip16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, + ushort* dst, size_t step, int width, int height, void* scale) +{ + CALL_HAL(recip16u, cv_hal_recip16u, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) + recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); +} + +void recip16s( const short* src1, size_t step1, const short* src2, size_t step2, + short* dst, size_t step, int width, int height, void* scale) +{ + CALL_HAL(recip16s, cv_hal_recip16s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) + recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); +} + +void recip32s( const int* src1, size_t step1, const int* src2, size_t step2, + int* dst, size_t step, int width, int height, void* scale) +{ + CALL_HAL(recip32s, cv_hal_recip32s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) + recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); +} + +void recip32f( const float* src1, size_t step1, const float* src2, size_t step2, + float* dst, size_t step, int width, int height, void* scale) +{ + CALL_HAL(recip32f, cv_hal_recip32f, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) + recip_f(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); +} + +void recip64f( const double* src1, size_t step1, const double* src2, size_t step2, + double* dst, size_t step, int width, int height, void* scale) +{ + CALL_HAL(recip64f, cv_hal_recip64f, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) + recip_f(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); +} + +//======================================= +// Add weighted +//======================================= + +void +addWeighted8u( const uchar* src1, size_t step1, + const uchar* src2, size_t step2, + uchar* dst, size_t step, int width, int height, + void* scalars ) +{ + CALL_HAL(addWeighted8u, cv_hal_addWeighted8u, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars) + const double* scalars_ = (const double*)scalars; + float alpha = (float)scalars_[0], beta = (float)scalars_[1], gamma = (float)scalars_[2]; + + for( ; height--; src1 += step1, src2 += step2, dst += step ) + { + int x = 0; + +#if CV_SSE2 + if( USE_SSE2 ) + { + __m128 a4 = _mm_set1_ps(alpha), b4 = _mm_set1_ps(beta), g4 = _mm_set1_ps(gamma); + __m128i z = _mm_setzero_si128(); + + for( ; x <= width - 8; x += 8 ) + { + __m128i u = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(src1 + x)), z); + __m128i v = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(src2 + x)), z); + + __m128 u0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(u, z)); + __m128 u1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(u, z)); + __m128 v0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v, z)); + __m128 v1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v, z)); + + u0 = _mm_add_ps(_mm_mul_ps(u0, a4), _mm_mul_ps(v0, b4)); + u1 = _mm_add_ps(_mm_mul_ps(u1, a4), _mm_mul_ps(v1, b4)); + u0 = _mm_add_ps(u0, g4); u1 = _mm_add_ps(u1, g4); + + u = _mm_packs_epi32(_mm_cvtps_epi32(u0), _mm_cvtps_epi32(u1)); + u = _mm_packus_epi16(u, u); + + _mm_storel_epi64((__m128i*)(dst + x), u); + } + } +#elif CV_NEON + float32x4_t g = vdupq_n_f32 (gamma); + + for( ; x <= width - 8; x += 8 ) + { + uint8x8_t in1 = vld1_u8(src1+x); + uint16x8_t in1_16 = vmovl_u8(in1); + float32x4_t in1_f_l = vcvtq_f32_u32(vmovl_u16(vget_low_u16(in1_16))); + float32x4_t in1_f_h = vcvtq_f32_u32(vmovl_u16(vget_high_u16(in1_16))); + + uint8x8_t in2 = vld1_u8(src2+x); + uint16x8_t in2_16 = vmovl_u8(in2); + float32x4_t in2_f_l = vcvtq_f32_u32(vmovl_u16(vget_low_u16(in2_16))); + float32x4_t in2_f_h = vcvtq_f32_u32(vmovl_u16(vget_high_u16(in2_16))); + + float32x4_t out_f_l = vaddq_f32(vmulq_n_f32(in1_f_l, alpha), vmulq_n_f32(in2_f_l, beta)); + float32x4_t out_f_h = vaddq_f32(vmulq_n_f32(in1_f_h, alpha), vmulq_n_f32(in2_f_h, beta)); + out_f_l = vaddq_f32(out_f_l, g); + out_f_h = vaddq_f32(out_f_h, g); + + uint16x4_t out_16_l = vqmovun_s32(cv_vrndq_s32_f32(out_f_l)); + uint16x4_t out_16_h = vqmovun_s32(cv_vrndq_s32_f32(out_f_h)); + + uint16x8_t out_16 = vcombine_u16(out_16_l, out_16_h); + uint8x8_t out = vqmovn_u16(out_16); + + vst1_u8(dst+x, out); + } +#endif + #if CV_ENABLE_UNROLLED + for( ; x <= width - 4; x += 4 ) + { + float t0, t1; + t0 = CV_8TO32F(src1[x])*alpha + CV_8TO32F(src2[x])*beta + gamma; + t1 = CV_8TO32F(src1[x+1])*alpha + CV_8TO32F(src2[x+1])*beta + gamma; + + dst[x] = saturate_cast(t0); + dst[x+1] = saturate_cast(t1); + + t0 = CV_8TO32F(src1[x+2])*alpha + CV_8TO32F(src2[x+2])*beta + gamma; + t1 = CV_8TO32F(src1[x+3])*alpha + CV_8TO32F(src2[x+3])*beta + gamma; + + dst[x+2] = saturate_cast(t0); + dst[x+3] = saturate_cast(t1); + } + #endif + + for( ; x < width; x++ ) + { + float t0 = CV_8TO32F(src1[x])*alpha + CV_8TO32F(src2[x])*beta + gamma; + dst[x] = saturate_cast(t0); + } + } +} + +void addWeighted8s( const schar* src1, size_t step1, const schar* src2, size_t step2, + schar* dst, size_t step, int width, int height, void* scalars ) +{ + CALL_HAL(addWeighted8s, cv_hal_addWeighted8s, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars) + addWeighted_(src1, step1, src2, step2, dst, step, width, height, scalars); +} + +void addWeighted16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, + ushort* dst, size_t step, int width, int height, void* scalars ) +{ + CALL_HAL(addWeighted16u, cv_hal_addWeighted16u, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars) + addWeighted_(src1, step1, src2, step2, dst, step, width, height, scalars); +} + +void addWeighted16s( const short* src1, size_t step1, const short* src2, size_t step2, + short* dst, size_t step, int width, int height, void* scalars ) +{ + CALL_HAL(addWeighted16s, cv_hal_addWeighted16s, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars) + addWeighted_(src1, step1, src2, step2, dst, step, width, height, scalars); +} + +void addWeighted32s( const int* src1, size_t step1, const int* src2, size_t step2, + int* dst, size_t step, int width, int height, void* scalars ) +{ + CALL_HAL(addWeighted32s, cv_hal_addWeighted32s, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars) + addWeighted_(src1, step1, src2, step2, dst, step, width, height, scalars); +} + +void addWeighted32f( const float* src1, size_t step1, const float* src2, size_t step2, + float* dst, size_t step, int width, int height, void* scalars ) +{ + CALL_HAL(addWeighted32f, cv_hal_addWeighted32f, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars) + addWeighted_(src1, step1, src2, step2, dst, step, width, height, scalars); +} + +void addWeighted64f( const double* src1, size_t step1, const double* src2, size_t step2, + double* dst, size_t step, int width, int height, void* scalars ) +{ + CALL_HAL(addWeighted64f, cv_hal_addWeighted64f, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars) + addWeighted_(src1, step1, src2, step2, dst, step, width, height, scalars); +} + +}} // cv::hal:: + /* End of file. */ diff --git a/modules/hal/src/arithm_core.hpp b/modules/core/src/arithm_core.hpp similarity index 67% rename from modules/hal/src/arithm_core.hpp rename to modules/core/src/arithm_core.hpp index a65e74c..4790586 100644 --- a/modules/hal/src/arithm_core.hpp +++ b/modules/core/src/arithm_core.hpp @@ -42,144 +42,94 @@ // //M*/ -#ifndef __OPENCV_HAL_ARITHM_CORE_HPP__ -#define __OPENCV_HAL_ARITHM_CORE_HPP__ +#ifndef __OPENCV_ARITHM_CORE_HPP__ +#define __OPENCV_ARITHM_CORE_HPP__ #include "arithm_simd.hpp" -const uchar g_Saturate8u[] = +namespace cv { + +template struct OpAdd { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, - 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, - 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, - 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, - 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, - 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, - 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, - 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, - 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, - 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, - 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, - 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255 + typedef T1 type1; + typedef T2 type2; + typedef T3 rtype; + T3 operator ()(const T1 a, const T2 b) const { return saturate_cast(a + b); } }; - -#define CV_FAST_CAST_8U(t) (assert(-256 <= (t) && (t) <= 512), g_Saturate8u[(t)+256]) -#define CV_MIN_8U(a,b) ((a) - CV_FAST_CAST_8U((a) - (b))) -#define CV_MAX_8U(a,b) ((a) + CV_FAST_CAST_8U((b) - (a))) - -const float g_8x32fTab[] = +template struct OpSub { - -128.f, -127.f, -126.f, -125.f, -124.f, -123.f, -122.f, -121.f, - -120.f, -119.f, -118.f, -117.f, -116.f, -115.f, -114.f, -113.f, - -112.f, -111.f, -110.f, -109.f, -108.f, -107.f, -106.f, -105.f, - -104.f, -103.f, -102.f, -101.f, -100.f, -99.f, -98.f, -97.f, - -96.f, -95.f, -94.f, -93.f, -92.f, -91.f, -90.f, -89.f, - -88.f, -87.f, -86.f, -85.f, -84.f, -83.f, -82.f, -81.f, - -80.f, -79.f, -78.f, -77.f, -76.f, -75.f, -74.f, -73.f, - -72.f, -71.f, -70.f, -69.f, -68.f, -67.f, -66.f, -65.f, - -64.f, -63.f, -62.f, -61.f, -60.f, -59.f, -58.f, -57.f, - -56.f, -55.f, -54.f, -53.f, -52.f, -51.f, -50.f, -49.f, - -48.f, -47.f, -46.f, -45.f, -44.f, -43.f, -42.f, -41.f, - -40.f, -39.f, -38.f, -37.f, -36.f, -35.f, -34.f, -33.f, - -32.f, -31.f, -30.f, -29.f, -28.f, -27.f, -26.f, -25.f, - -24.f, -23.f, -22.f, -21.f, -20.f, -19.f, -18.f, -17.f, - -16.f, -15.f, -14.f, -13.f, -12.f, -11.f, -10.f, -9.f, - -8.f, -7.f, -6.f, -5.f, -4.f, -3.f, -2.f, -1.f, - 0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, - 8.f, 9.f, 10.f, 11.f, 12.f, 13.f, 14.f, 15.f, - 16.f, 17.f, 18.f, 19.f, 20.f, 21.f, 22.f, 23.f, - 24.f, 25.f, 26.f, 27.f, 28.f, 29.f, 30.f, 31.f, - 32.f, 33.f, 34.f, 35.f, 36.f, 37.f, 38.f, 39.f, - 40.f, 41.f, 42.f, 43.f, 44.f, 45.f, 46.f, 47.f, - 48.f, 49.f, 50.f, 51.f, 52.f, 53.f, 54.f, 55.f, - 56.f, 57.f, 58.f, 59.f, 60.f, 61.f, 62.f, 63.f, - 64.f, 65.f, 66.f, 67.f, 68.f, 69.f, 70.f, 71.f, - 72.f, 73.f, 74.f, 75.f, 76.f, 77.f, 78.f, 79.f, - 80.f, 81.f, 82.f, 83.f, 84.f, 85.f, 86.f, 87.f, - 88.f, 89.f, 90.f, 91.f, 92.f, 93.f, 94.f, 95.f, - 96.f, 97.f, 98.f, 99.f, 100.f, 101.f, 102.f, 103.f, - 104.f, 105.f, 106.f, 107.f, 108.f, 109.f, 110.f, 111.f, - 112.f, 113.f, 114.f, 115.f, 116.f, 117.f, 118.f, 119.f, - 120.f, 121.f, 122.f, 123.f, 124.f, 125.f, 126.f, 127.f, - 128.f, 129.f, 130.f, 131.f, 132.f, 133.f, 134.f, 135.f, - 136.f, 137.f, 138.f, 139.f, 140.f, 141.f, 142.f, 143.f, - 144.f, 145.f, 146.f, 147.f, 148.f, 149.f, 150.f, 151.f, - 152.f, 153.f, 154.f, 155.f, 156.f, 157.f, 158.f, 159.f, - 160.f, 161.f, 162.f, 163.f, 164.f, 165.f, 166.f, 167.f, - 168.f, 169.f, 170.f, 171.f, 172.f, 173.f, 174.f, 175.f, - 176.f, 177.f, 178.f, 179.f, 180.f, 181.f, 182.f, 183.f, - 184.f, 185.f, 186.f, 187.f, 188.f, 189.f, 190.f, 191.f, - 192.f, 193.f, 194.f, 195.f, 196.f, 197.f, 198.f, 199.f, - 200.f, 201.f, 202.f, 203.f, 204.f, 205.f, 206.f, 207.f, - 208.f, 209.f, 210.f, 211.f, 212.f, 213.f, 214.f, 215.f, - 216.f, 217.f, 218.f, 219.f, 220.f, 221.f, 222.f, 223.f, - 224.f, 225.f, 226.f, 227.f, 228.f, 229.f, 230.f, 231.f, - 232.f, 233.f, 234.f, 235.f, 236.f, 237.f, 238.f, 239.f, - 240.f, 241.f, 242.f, 243.f, 244.f, 245.f, 246.f, 247.f, - 248.f, 249.f, 250.f, 251.f, 252.f, 253.f, 254.f, 255.f + typedef T1 type1; + typedef T2 type2; + typedef T3 rtype; + T3 operator ()(const T1 a, const T2 b) const { return saturate_cast(a - b); } }; -#define CV_8TO32F(x) g_8x32fTab[(x)+128] - -namespace cv { +template struct OpRSub +{ + typedef T1 type1; + typedef T2 type2; + typedef T3 rtype; + T3 operator ()(const T1 a, const T2 b) const { return saturate_cast(b - a); } +}; -template<> inline uchar OpAdd::operator ()(uchar a, uchar b) const -{ return CV_FAST_CAST_8U(a + b); } +template struct OpMin +{ + typedef T type1; + typedef T type2; + typedef T rtype; + T operator ()(const T a, const T b) const { return std::min(a, b); } +}; -template<> inline uchar OpSub::operator ()(uchar a, uchar b) const -{ return CV_FAST_CAST_8U(a - b); } +template struct OpMax +{ + typedef T type1; + typedef T type2; + typedef T rtype; + T operator ()(const T a, const T b) const { return std::max(a, b); } +}; -template<> inline short OpAbsDiff::operator ()(short a, short b) const -{ return saturate_cast(std::abs(a - b)); } +template struct OpAbsDiff +{ + typedef T type1; + typedef T type2; + typedef T rtype; + T operator()(T a, T b) const { return a > b ? a - b : b - a; } +}; -template<> inline schar OpAbsDiff::operator ()(schar a, schar b) const -{ return saturate_cast(std::abs(a - b)); } +template struct OpAnd +{ + typedef T type1; + typedef T type2; + typedef T rtype; + T operator()( T a, T b ) const { return a & b; } +}; -template<> inline uchar OpMin::operator ()(uchar a, uchar b) const { return CV_MIN_8U(a, b); } +template struct OpOr +{ + typedef T type1; + typedef T type2; + typedef T rtype; + T operator()( T a, T b ) const { return a | b; } +}; -template<> inline uchar OpMax::operator ()(uchar a, uchar b) const { return CV_MAX_8U(a, b); } +template struct OpXor +{ + typedef T type1; + typedef T type2; + typedef T rtype; + T operator()( T a, T b ) const { return a ^ b; } +}; -} +template struct OpNot +{ + typedef T type1; + typedef T type2; + typedef T rtype; + T operator()( T a, T ) const { return ~a; } +}; -namespace cv { namespace hal { +//============================================================================= template void vBinOp(const T* src1, size_t step1, const T* src2, size_t step2, T* dst, size_t step, int width, int height) @@ -651,7 +601,7 @@ addWeighted_( const T* src1, size_t step1, const T* src2, size_t step2, } } -}} // cv::hal:: +} // cv:: -#endif // __OPENCV_HAL_ARITHM_CORE_HPP__ +#endif // __OPENCV_ARITHM_CORE_HPP__ diff --git a/modules/hal/src/arithm_simd.hpp b/modules/core/src/arithm_simd.hpp similarity index 99% rename from modules/hal/src/arithm_simd.hpp rename to modules/core/src/arithm_simd.hpp index 4e40298..b6a549e 100644 --- a/modules/hal/src/arithm_simd.hpp +++ b/modules/core/src/arithm_simd.hpp @@ -42,10 +42,10 @@ // //M*/ -#ifndef __OPENCV_HAL_ARITHM_SIMD_HPP__ -#define __OPENCV_HAL_ARITHM_SIMD_HPP__ +#ifndef __OPENCV_ARITHM_SIMD_HPP__ +#define __OPENCV_ARITHM_SIMD_HPP__ -namespace cv { namespace hal { +namespace cv { struct NOP {}; @@ -2020,6 +2020,6 @@ struct AddWeighted_SIMD #endif -}} +} -#endif // __OPENCV_HAL_ARITHM_SIMD_HPP__ +#endif // __OPENCV_ARITHM_SIMD_HPP__ diff --git a/modules/core/src/hal_replacement.hpp b/modules/core/src/hal_replacement.hpp new file mode 100644 index 0000000..65866f8 --- /dev/null +++ b/modules/core/src/hal_replacement.hpp @@ -0,0 +1,228 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Copyright (C) 2015, Itseez Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef __OPENCV_CORE_HAL_REPLACEMENT_HPP__ +#define __OPENCV_CORE_HAL_REPLACEMENT_HPP__ + +#include "opencv2/core/hal/interface.h" + +inline int hal_ni_add8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_add8s(const schar*, size_t, const schar*, size_t, schar*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_add16u(const ushort*, size_t, const ushort*, size_t, ushort*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_add16s(const short*, size_t, const short*, size_t, short*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_add32s(const int*, size_t, const int*, size_t, int*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_add32f(const float*, size_t, const float*, size_t, float*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_add64f(const double*, size_t, const double*, size_t, double*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_sub8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_sub8s(const schar*, size_t, const schar*, size_t, schar*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_sub16u(const ushort*, size_t, const ushort*, size_t, ushort*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_sub16s(const short*, size_t, const short*, size_t, short*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_sub32s(const int*, size_t, const int*, size_t, int*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_sub32f(const float*, size_t, const float*, size_t, float*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_sub64f(const double*, size_t, const double*, size_t, double*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_max8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_max8s(const schar*, size_t, const schar*, size_t, schar*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_max16u(const ushort*, size_t, const ushort*, size_t, ushort*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_max16s(const short*, size_t, const short*, size_t, short*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_max32s(const int*, size_t, const int*, size_t, int*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_max32f(const float*, size_t, const float*, size_t, float*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_max64f(const double*, size_t, const double*, size_t, double*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_min8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_min8s(const schar*, size_t, const schar*, size_t, schar*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_min16u(const ushort*, size_t, const ushort*, size_t, ushort*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_min16s(const short*, size_t, const short*, size_t, short*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_min32s(const int*, size_t, const int*, size_t, int*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_min32f(const float*, size_t, const float*, size_t, float*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_min64f(const double*, size_t, const double*, size_t, double*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_absdiff8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_absdiff8s(const schar*, size_t, const schar*, size_t, schar*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_absdiff16u(const ushort*, size_t, const ushort*, size_t, ushort*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_absdiff16s(const short*, size_t, const short*, size_t, short*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_absdiff32s(const int*, size_t, const int*, size_t, int*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_absdiff32f(const float*, size_t, const float*, size_t, float*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_absdiff64f(const double*, size_t, const double*, size_t, double*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_and8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_or8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_xor8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_not8u(const uchar*, size_t, uchar*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } + +#define cv_hal_add8u hal_ni_add8u +#define cv_hal_add8s hal_ni_add8s +#define cv_hal_add16u hal_ni_add16u +#define cv_hal_add16s hal_ni_add16s +#define cv_hal_add32s hal_ni_add32s +#define cv_hal_add32f hal_ni_add32f +#define cv_hal_add64f hal_ni_add64f +#define cv_hal_sub8u hal_ni_sub8u +#define cv_hal_sub8s hal_ni_sub8s +#define cv_hal_sub16u hal_ni_sub16u +#define cv_hal_sub16s hal_ni_sub16s +#define cv_hal_sub32s hal_ni_sub32s +#define cv_hal_sub32f hal_ni_sub32f +#define cv_hal_sub64f hal_ni_sub64f +#define cv_hal_max8u hal_ni_max8u +#define cv_hal_max8s hal_ni_max8s +#define cv_hal_max16u hal_ni_max16u +#define cv_hal_max16s hal_ni_max16s +#define cv_hal_max32s hal_ni_max32s +#define cv_hal_max32f hal_ni_max32f +#define cv_hal_max64f hal_ni_max64f +#define cv_hal_min8u hal_ni_min8u +#define cv_hal_min8s hal_ni_min8s +#define cv_hal_min16u hal_ni_min16u +#define cv_hal_min16s hal_ni_min16s +#define cv_hal_min32s hal_ni_min32s +#define cv_hal_min32f hal_ni_min32f +#define cv_hal_min64f hal_ni_min64f +#define cv_hal_absdiff8u hal_ni_absdiff8u +#define cv_hal_absdiff8s hal_ni_absdiff8s +#define cv_hal_absdiff16u hal_ni_absdiff16u +#define cv_hal_absdiff16s hal_ni_absdiff16s +#define cv_hal_absdiff32s hal_ni_absdiff32s +#define cv_hal_absdiff32f hal_ni_absdiff32f +#define cv_hal_absdiff64f hal_ni_absdiff64f +#define cv_hal_and8u hal_ni_and8u +#define cv_hal_or8u hal_ni_or8u +#define cv_hal_xor8u hal_ni_xor8u +#define cv_hal_not8u hal_ni_not8u + +inline int hal_ni_cmp8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_cmp8s(const schar*, size_t, const schar*, size_t, uchar*, size_t, int, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_cmp16u(const ushort*, size_t, const ushort*, size_t, uchar*, size_t, int, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_cmp16s(const short*, size_t, const short*, size_t, uchar*, size_t, int, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_cmp32s(const int*, size_t, const int*, size_t, uchar*, size_t, int, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_cmp32f(const float*, size_t, const float*, size_t, uchar*, size_t, int, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_cmp64f(const double*, size_t, const double*, size_t, uchar*, size_t, int, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } + +#define cv_hal_cmp8u hal_ni_cmp8u +#define cv_hal_cmp8s hal_ni_cmp8s +#define cv_hal_cmp16u hal_ni_cmp16u +#define cv_hal_cmp16s hal_ni_cmp16s +#define cv_hal_cmp32s hal_ni_cmp32s +#define cv_hal_cmp32f hal_ni_cmp32f +#define cv_hal_cmp64f hal_ni_cmp64f + +inline int hal_ni_mul8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_mul8s(const schar*, size_t, const schar*, size_t, schar*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_mul16u(const ushort*, size_t, const ushort*, size_t, ushort*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_mul16s(const short*, size_t, const short*, size_t, short*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_mul32s(const int*, size_t, const int*, size_t, int*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_mul32f(const float*, size_t, const float*, size_t, float*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_mul64f(const double*, size_t, const double*, size_t, double*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_div8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_div8s(const schar*, size_t, const schar*, size_t, schar*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_div16u(const ushort*, size_t, const ushort*, size_t, ushort*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_div16s(const short*, size_t, const short*, size_t, short*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_div32s(const int*, size_t, const int*, size_t, int*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_div32f(const float*, size_t, const float*, size_t, float*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_div64f(const double*, size_t, const double*, size_t, double*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_recip8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_recip8s(const schar*, size_t, const schar*, size_t, schar*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_recip16u(const ushort*, size_t, const ushort*, size_t, ushort*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_recip16s(const short*, size_t, const short*, size_t, short*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_recip32s(const int*, size_t, const int*, size_t, int*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_recip32f(const float*, size_t, const float*, size_t, float*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_recip64f(const double*, size_t, const double*, size_t, double*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } + +#define cv_hal_mul8u hal_ni_mul8u +#define cv_hal_mul8s hal_ni_mul8s +#define cv_hal_mul16u hal_ni_mul16u +#define cv_hal_mul16s hal_ni_mul16s +#define cv_hal_mul32s hal_ni_mul32s +#define cv_hal_mul32f hal_ni_mul32f +#define cv_hal_mul64f hal_ni_mul64f +#define cv_hal_div8u hal_ni_div8u +#define cv_hal_div8s hal_ni_div8s +#define cv_hal_div16u hal_ni_div16u +#define cv_hal_div16s hal_ni_div16s +#define cv_hal_div32s hal_ni_div32s +#define cv_hal_div32f hal_ni_div32f +#define cv_hal_div64f hal_ni_div64f +#define cv_hal_recip8u hal_ni_recip8u +#define cv_hal_recip8s hal_ni_recip8s +#define cv_hal_recip16u hal_ni_recip16u +#define cv_hal_recip16s hal_ni_recip16s +#define cv_hal_recip32s hal_ni_recip32s +#define cv_hal_recip32f hal_ni_recip32f +#define cv_hal_recip64f hal_ni_recip64f + +inline int hal_ni_addWeighted8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int, const double*) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_addWeighted8s(const schar*, size_t, const schar*, size_t, schar*, size_t, int, int, const double*) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_addWeighted16u(const ushort*, size_t, const ushort*, size_t, ushort*, size_t, int, int, const double*) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_addWeighted16s(const short*, size_t, const short*, size_t, short*, size_t, int, int, const double*) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_addWeighted32s(const int*, size_t, const int*, size_t, int*, size_t, int, int, const double*) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_addWeighted32f(const float*, size_t, const float*, size_t, float*, size_t, int, int, const double*) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_addWeighted64f(const double*, size_t, const double*, size_t, double*, size_t, int, int, const double*) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } + +#define cv_hal_addWeighted8u hal_ni_addWeighted8u +#define cv_hal_addWeighted8s hal_ni_addWeighted8s +#define cv_hal_addWeighted16u hal_ni_addWeighted16u +#define cv_hal_addWeighted16s hal_ni_addWeighted16s +#define cv_hal_addWeighted32s hal_ni_addWeighted32s +#define cv_hal_addWeighted32f hal_ni_addWeighted32f +#define cv_hal_addWeighted64f hal_ni_addWeighted64f + +inline int hal_ni_split8u(const uchar*, uchar**, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_split16u(const ushort*, ushort**, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_split32s(const int*, int**, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_split64s(const int64*, int64**, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } + +#define cv_hal_split8u hal_ni_split8u +#define cv_hal_split16u hal_ni_split16u +#define cv_hal_split32s hal_ni_split32s +#define cv_hal_split64s hal_ni_split64s + +inline int hal_ni_merge8u(const uchar**, uchar*, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_merge16u(const ushort**, ushort*, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_merge32s(const int**, int*, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_merge64s(const int64**, int64*, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } + +#define cv_hal_merge8u hal_ni_merge8u +#define cv_hal_merge16u hal_ni_merge16u +#define cv_hal_merge32s hal_ni_merge32s +#define cv_hal_merge64s hal_ni_merge64s + +#include "custom_hal.hpp" + +#endif diff --git a/modules/core/src/lapack.cpp b/modules/core/src/lapack.cpp index b3e1b52..4fcf3c7 100644 --- a/modules/core/src/lapack.cpp +++ b/modules/core/src/lapack.cpp @@ -52,22 +52,22 @@ namespace cv int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n) { - return hal::LU(A, astep, m, b, bstep, n); + return hal::LU32f(A, astep, m, b, bstep, n); } int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n) { - return hal::LU(A, astep, m, b, bstep, n); + return hal::LU64f(A, astep, m, b, bstep, n); } bool Cholesky(float* A, size_t astep, int m, float* b, size_t bstep, int n) { - return hal::Cholesky(A, astep, m, b, bstep, n); + return hal::Cholesky32f(A, astep, m, b, bstep, n); } bool Cholesky(double* A, size_t astep, int m, double* b, size_t bstep, int n) { - return hal::Cholesky(A, astep, m, b, bstep, n); + return hal::Cholesky64f(A, astep, m, b, bstep, n); } template static inline _Tp hypot(_Tp a, _Tp b) @@ -740,7 +740,7 @@ double cv::determinant( InputArray _mat ) Mat a(rows, rows, CV_32F, (uchar*)buffer); mat.copyTo(a); - result = hal::LU(a.ptr(), a.step, rows, 0, 0, 0); + result = hal::LU32f(a.ptr(), a.step, rows, 0, 0, 0); if( result ) { for( int i = 0; i < rows; i++ ) @@ -764,7 +764,7 @@ double cv::determinant( InputArray _mat ) Mat a(rows, rows, CV_64F, (uchar*)buffer); mat.copyTo(a); - result = hal::LU(a.ptr(), a.step, rows, 0, 0, 0); + result = hal::LU64f(a.ptr(), a.step, rows, 0, 0, 0); if( result ) { for( int i = 0; i < rows; i++ ) @@ -1027,13 +1027,13 @@ double cv::invert( InputArray _src, OutputArray _dst, int method ) setIdentity(dst); if( method == DECOMP_LU && type == CV_32F ) - result = hal::LU(src1.ptr(), src1.step, n, dst.ptr(), dst.step, n) != 0; + result = hal::LU32f(src1.ptr(), src1.step, n, dst.ptr(), dst.step, n) != 0; else if( method == DECOMP_LU && type == CV_64F ) - result = hal::LU(src1.ptr(), src1.step, n, dst.ptr(), dst.step, n) != 0; + result = hal::LU64f(src1.ptr(), src1.step, n, dst.ptr(), dst.step, n) != 0; else if( method == DECOMP_CHOLESKY && type == CV_32F ) - result = hal::Cholesky(src1.ptr(), src1.step, n, dst.ptr(), dst.step, n); + result = hal::Cholesky32f(src1.ptr(), src1.step, n, dst.ptr(), dst.step, n); else - result = hal::Cholesky(src1.ptr(), src1.step, n, dst.ptr(), dst.step, n); + result = hal::Cholesky64f(src1.ptr(), src1.step, n, dst.ptr(), dst.step, n); if( !result ) dst = Scalar(0); @@ -1265,16 +1265,16 @@ bool cv::solve( InputArray _src, InputArray _src2arg, OutputArray _dst, int meth if( method == DECOMP_LU ) { if( type == CV_32F ) - result = hal::LU(a.ptr(), a.step, n, dst.ptr(), dst.step, nb) != 0; + result = hal::LU32f(a.ptr(), a.step, n, dst.ptr(), dst.step, nb) != 0; else - result = hal::LU(a.ptr(), a.step, n, dst.ptr(), dst.step, nb) != 0; + result = hal::LU64f(a.ptr(), a.step, n, dst.ptr(), dst.step, nb) != 0; } else if( method == DECOMP_CHOLESKY ) { if( type == CV_32F ) - result = hal::Cholesky(a.ptr(), a.step, n, dst.ptr(), dst.step, nb); + result = hal::Cholesky32f(a.ptr(), a.step, n, dst.ptr(), dst.step, nb); else - result = hal::Cholesky(a.ptr(), a.step, n, dst.ptr(), dst.step, nb); + result = hal::Cholesky64f(a.ptr(), a.step, n, dst.ptr(), dst.step, nb); } else { diff --git a/modules/core/src/mathfuncs.cpp b/modules/core/src/mathfuncs.cpp index b07c1ee..495711f 100644 --- a/modules/core/src/mathfuncs.cpp +++ b/modules/core/src/mathfuncs.cpp @@ -191,13 +191,13 @@ void magnitude( InputArray src1, InputArray src2, OutputArray dst ) { const float *x = (const float*)ptrs[0], *y = (const float*)ptrs[1]; float *mag = (float*)ptrs[2]; - hal::magnitude( x, y, mag, len ); + hal::magnitude32f( x, y, mag, len ); } else { const double *x = (const double*)ptrs[0], *y = (const double*)ptrs[1]; double *mag = (double*)ptrs[2]; - hal::magnitude( x, y, mag, len ); + hal::magnitude64f( x, y, mag, len ); } } } @@ -374,7 +374,7 @@ void cartToPolar( InputArray src1, InputArray src2, { const float *x = (const float*)ptrs[0], *y = (const float*)ptrs[1]; float *mag = (float*)ptrs[2], *angle = (float*)ptrs[3]; - hal::magnitude( x, y, mag, len ); + hal::magnitude32f( x, y, mag, len ); hal::fastAtan2( y, x, angle, len, angleInDegrees ); } else @@ -382,7 +382,7 @@ void cartToPolar( InputArray src1, InputArray src2, const double *x = (const double*)ptrs[0], *y = (const double*)ptrs[1]; double *angle = (double*)ptrs[3]; - hal::magnitude(x, y, (double*)ptrs[2], len); + hal::magnitude64f(x, y, (double*)ptrs[2], len); k = 0; #if CV_SSE2 @@ -760,7 +760,7 @@ static void Exp_32f_ipp(const float *x, float *y, int n) } setIppErrorStatus(); } - hal::exp(x, y, n); + hal::exp32f(x, y, n); } static void Exp_64f_ipp(const double *x, double *y, int n) @@ -774,14 +774,14 @@ static void Exp_64f_ipp(const double *x, double *y, int n) } setIppErrorStatus(); } - hal::exp(x, y, n); + hal::exp64f(x, y, n); } #define Exp_32f Exp_32f_ipp #define Exp_64f Exp_64f_ipp #else -#define Exp_32f hal::exp -#define Exp_64f hal::exp +#define Exp_32f hal::exp32f +#define Exp_64f hal::exp64f #endif @@ -828,7 +828,7 @@ static void Log_32f_ipp(const float *x, float *y, int n) } setIppErrorStatus(); } - hal::log(x, y, n); + hal::log32f(x, y, n); } static void Log_64f_ipp(const double *x, double *y, int n) @@ -842,14 +842,14 @@ static void Log_64f_ipp(const double *x, double *y, int n) } setIppErrorStatus(); } - hal::log(x, y, n); + hal::log64f(x, y, n); } #define Log_32f Log_32f_ipp #define Log_64f Log_64f_ipp #else -#define Log_32f hal::log -#define Log_64f hal::log +#define Log_32f hal::log32f +#define Log_64f hal::log64f #endif void log( InputArray _src, OutputArray _dst ) @@ -1356,10 +1356,10 @@ static bool ocl_pow(InputArray _src, double power, OutputArray _dst, #endif -static void InvSqrt_32f(const float* src, float* dst, int n) { hal::invSqrt(src, dst, n); } -static void InvSqrt_64f(const double* src, double* dst, int n) { hal::invSqrt(src, dst, n); } -static void Sqrt_32f(const float* src, float* dst, int n) { hal::sqrt(src, dst, n); } -static void Sqrt_64f(const double* src, double* dst, int n) { hal::sqrt(src, dst, n); } +static void InvSqrt_32f(const float* src, float* dst, int n) { hal::invSqrt32f(src, dst, n); } +static void InvSqrt_64f(const double* src, double* dst, int n) { hal::invSqrt64f(src, dst, n); } +static void Sqrt_32f(const float* src, float* dst, int n) { hal::sqrt32f(src, dst, n); } +static void Sqrt_64f(const double* src, double* dst, int n) { hal::sqrt64f(src, dst, n); } void pow( InputArray _src, double power, OutputArray _dst ) { diff --git a/modules/hal/src/mathfuncs.cpp b/modules/core/src/mathfuncs_core.cpp similarity index 97% rename from modules/hal/src/mathfuncs.cpp rename to modules/core/src/mathfuncs_core.cpp index 66a03e1..7b3ec31 100644 --- a/modules/hal/src/mathfuncs.cpp +++ b/modules/core/src/mathfuncs_core.cpp @@ -52,16 +52,6 @@ static const float atan2_p3 = -0.3258083974640975f*(float)(180/CV_PI); static const float atan2_p5 = 0.1555786518463281f*(float)(180/CV_PI); static const float atan2_p7 = -0.04432655554792128f*(float)(180/CV_PI); -#if CV_NEON -static inline float32x4_t cv_vrecpq_f32(float32x4_t val) -{ - float32x4_t reciprocal = vrecpeq_f32(val); - reciprocal = vmulq_f32(vrecpsq_f32(val, reciprocal), reciprocal); - reciprocal = vmulq_f32(vrecpsq_f32(val, reciprocal), reciprocal); - return reciprocal; -} -#endif - void fastAtan2(const float *Y, const float *X, float *angle, int len, bool angleInDegrees ) { int i = 0; @@ -160,7 +150,7 @@ void fastAtan2(const float *Y, const float *X, float *angle, int len, bool angle } -void magnitude(const float* x, const float* y, float* mag, int len) +void magnitude32f(const float* x, const float* y, float* mag, int len) { #if defined HAVE_IPP CV_IPP_CHECK() @@ -196,7 +186,7 @@ void magnitude(const float* x, const float* y, float* mag, int len) } } -void magnitude(const double* x, const double* y, double* mag, int len) +void magnitude64f(const double* x, const double* y, double* mag, int len) { #if defined(HAVE_IPP) CV_IPP_CHECK() @@ -233,7 +223,7 @@ void magnitude(const double* x, const double* y, double* mag, int len) } -void invSqrt(const float* src, float* dst, int len) +void invSqrt32f(const float* src, float* dst, int len) { #if defined(HAVE_IPP) CV_IPP_CHECK() @@ -264,7 +254,7 @@ void invSqrt(const float* src, float* dst, int len) } -void invSqrt(const double* src, double* dst, int len) +void invSqrt64f(const double* src, double* dst, int len) { int i = 0; @@ -279,7 +269,7 @@ void invSqrt(const double* src, double* dst, int len) } -void sqrt(const float* src, float* dst, int len) +void sqrt32f(const float* src, float* dst, int len) { #if defined(HAVE_IPP) CV_IPP_CHECK() @@ -310,7 +300,7 @@ void sqrt(const float* src, float* dst, int len) } -void sqrt(const double* src, double* dst, int len) +void sqrt64f(const double* src, double* dst, int len) { #if defined(HAVE_IPP) CV_IPP_CHECK() @@ -441,7 +431,7 @@ static const double exp_prescale = 1.4426950408889634073599246810019 * (1 << EXP static const double exp_postscale = 1./(1 << EXPTAB_SCALE); static const double exp_max_val = 3000.*(1 << EXPTAB_SCALE); // log10(DBL_MAX) < 3000 -void exp( const float *_x, float *y, int n ) +void exp32f( const float *_x, float *y, int n ) { static const float A4 = (float)(1.000000000000002438532970795181890933776 / EXPPOLY_32F_A0), @@ -640,7 +630,7 @@ void exp( const float *_x, float *y, int n ) } } -void exp( const double *_x, double *y, int n ) +void exp64f( const double *_x, double *y, int n ) { static const double A5 = .99999999999999999998285227504999 / EXPPOLY_32F_A0, @@ -1084,7 +1074,7 @@ static const double CV_DECL_ALIGNED(16) icvLogTab[] = { #define LOGTAB_TRANSLATE(x,h) (((x) - 1.)*icvLogTab[(h)+1]) static const double ln_2 = 0.69314718055994530941723212145818; -void log( const float *_x, float *y, int n ) +void log32f( const float *_x, float *y, int n ) { static const float shift[] = { 0, -1.f/512 }; static const float @@ -1228,7 +1218,7 @@ void log( const float *_x, float *y, int n ) } } -void log( const double *x, double *y, int n ) +void log64f( const double *x, double *y, int n ) { static const double shift[] = { 0, -1./512 }; static const double @@ -1413,4 +1403,58 @@ void log( const double *x, double *y, int n ) } } -}} +//============================================================================= +// for compatibility with 3.0 + +void exp(const float* src, float* dst, int n) +{ + exp32f(src, dst, n); +} + +void exp(const double* src, double* dst, int n) +{ + exp64f(src, dst, n); +} + +void log(const float* src, float* dst, int n) +{ + log32f(src, dst, n); +} + +void log(const double* src, double* dst, int n) +{ + log64f(src, dst, n); +} + +void magnitude(const float* x, const float* y, float* dst, int n) +{ + magnitude32f(x, y, dst, n); +} + +void magnitude(const double* x, const double* y, double* dst, int n) +{ + magnitude64f(x, y, dst, n); +} + +void sqrt(const float* src, float* dst, int len) +{ + sqrt32f(src, dst, len); +} + +void sqrt(const double* src, double* dst, int len) +{ + sqrt64f(src, dst, len); +} + +void invSqrt(const float* src, float* dst, int len) +{ + invSqrt32f(src, dst, len); +} + +void invSqrt(const double* src, double* dst, int len) +{ + invSqrt64f(src, dst, len); +} + + +}} // cv::hal:: diff --git a/modules/hal/src/matrix.cpp b/modules/core/src/matrix_decomp.cpp similarity index 90% rename from modules/hal/src/matrix.cpp rename to modules/core/src/matrix_decomp.cpp index 921b778..3fe9eca 100644 --- a/modules/hal/src/matrix.cpp +++ b/modules/core/src/matrix_decomp.cpp @@ -109,18 +109,17 @@ LUImpl(_Tp* A, size_t astep, int m, _Tp* b, size_t bstep, int n, _Tp eps) } -int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n) +int LU32f(float* A, size_t astep, int m, float* b, size_t bstep, int n) { return LUImpl(A, astep, m, b, bstep, n, FLT_EPSILON*10); } -int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n) +int LU64f(double* A, size_t astep, int m, double* b, size_t bstep, int n) { return LUImpl(A, astep, m, b, bstep, n, DBL_EPSILON*100); } - template static inline bool CholImpl(_Tp* A, size_t astep, int m, _Tp* b, size_t bstep, int n) { @@ -195,6 +194,29 @@ CholImpl(_Tp* A, size_t astep, int m, _Tp* b, size_t bstep, int n) } +bool Cholesky32f(float* A, size_t astep, int m, float* b, size_t bstep, int n) +{ + return CholImpl(A, astep, m, b, bstep, n); +} + +bool Cholesky64f(double* A, size_t astep, int m, double* b, size_t bstep, int n) +{ + return CholImpl(A, astep, m, b, bstep, n); +} + +//============================================================================= +// for compatibility with 3.0 + +int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n) +{ + return LUImpl(A, astep, m, b, bstep, n, FLT_EPSILON*10); +} + +int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n) +{ + return LUImpl(A, astep, m, b, bstep, n, DBL_EPSILON*100); +} + bool Cholesky(float* A, size_t astep, int m, float* b, size_t bstep, int n) { return CholImpl(A, astep, m, b, bstep, n); @@ -205,4 +227,5 @@ bool Cholesky(double* A, size_t astep, int m, double* b, size_t bstep, int n) return CholImpl(A, astep, m, b, bstep, n); } + }} diff --git a/modules/hal/src/merge.cpp b/modules/core/src/merge.cpp similarity index 98% rename from modules/hal/src/merge.cpp rename to modules/core/src/merge.cpp index 982b24c..abe9f64 100644 --- a/modules/hal/src/merge.cpp +++ b/modules/core/src/merge.cpp @@ -387,21 +387,25 @@ merge_( const T** src, T* dst, int len, int cn ) void merge8u(const uchar** src, uchar* dst, int len, int cn ) { + CALL_HAL(merge8u, cv_hal_merge8u, src, dst, len, cn) merge_(src, dst, len, cn); } void merge16u(const ushort** src, ushort* dst, int len, int cn ) { + CALL_HAL(merge16u, cv_hal_merge16u, src, dst, len, cn) merge_(src, dst, len, cn); } void merge32s(const int** src, int* dst, int len, int cn ) { + CALL_HAL(merge32s, cv_hal_merge32s, src, dst, len, cn) merge_(src, dst, len, cn); } void merge64s(const int64** src, int64* dst, int len, int cn ) { + CALL_HAL(merge64s, cv_hal_merge64s, src, dst, len, cn) merge_(src, dst, len, cn); } diff --git a/modules/core/src/precomp.hpp b/modules/core/src/precomp.hpp index d1f2ec2..f699ede 100644 --- a/modules/core/src/precomp.hpp +++ b/modules/core/src/precomp.hpp @@ -58,8 +58,6 @@ #include "opencv2/core/ocl.hpp" #endif -#include "opencv2/hal.hpp" - #include #include #include @@ -69,6 +67,27 @@ #include #include +#include +#include +#include +#include +#include +#include +#include + +#define USE_SSE2 (cv::checkHardwareSupport(CV_CPU_SSE)) +#define USE_SSE4_2 (cv::checkHardwareSupport(CV_CPU_SSE4_2)) +#define USE_AVX (cv::checkHardwareSupport(CV_CPU_AVX)) +#define USE_AVX2 (cv::checkHardwareSupport(CV_CPU_AVX2)) + +#include "opencv2/core/hal/hal.hpp" +#include "opencv2/core/hal/intrin.hpp" +#include "opencv2/core/sse_utils.hpp" +#include "opencv2/core/neon_utils.hpp" + +#include "arithm_core.hpp" +#include "hal_replacement.hpp" + #ifdef HAVE_TEGRA_OPTIMIZATION #include "opencv2/core/core_tegra.hpp" #else @@ -78,6 +97,34 @@ namespace cv { +// -128.f ... 255.f +extern const float g_8x32fTab[]; +#define CV_8TO32F(x) cv::g_8x32fTab[(x)+128] + +extern const ushort g_8x16uSqrTab[]; +#define CV_SQR_8U(x) cv::g_8x16uSqrTab[(x)+255] + +extern const uchar g_Saturate8u[]; +#define CV_FAST_CAST_8U(t) (assert(-256 <= (t) && (t) <= 512), cv::g_Saturate8u[(t)+256]) +#define CV_MIN_8U(a,b) ((a) - CV_FAST_CAST_8U((a) - (b))) +#define CV_MAX_8U(a,b) ((a) + CV_FAST_CAST_8U((b) - (a))) + +template<> inline uchar OpAdd::operator ()(uchar a, uchar b) const +{ return CV_FAST_CAST_8U(a + b); } + +template<> inline uchar OpSub::operator ()(uchar a, uchar b) const +{ return CV_FAST_CAST_8U(a - b); } + +template<> inline short OpAbsDiff::operator ()(short a, short b) const +{ return saturate_cast(std::abs(a - b)); } + +template<> inline schar OpAbsDiff::operator ()(schar a, schar b) const +{ return saturate_cast(std::abs(a - b)); } + +template<> inline uchar OpMin::operator ()(uchar a, uchar b) const { return CV_MIN_8U(a, b); } + +template<> inline uchar OpMax::operator ()(uchar a, uchar b) const { return CV_MAX_8U(a, b); } + typedef void (*BinaryFunc)(const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, Size sz, @@ -100,21 +147,6 @@ BinaryFunc getCopyMaskFunc(size_t esz); /* maximal average node_count/hash_size ratio beyond which hash table is resized */ #define CV_SPARSE_HASH_RATIO 3 - - -// -128.f ... 255.f -extern const float g_8x32fTab[]; -#define CV_8TO32F(x) cv::g_8x32fTab[(x)+128] - -extern const ushort g_8x16uSqrTab[]; -#define CV_SQR_8U(x) cv::g_8x16uSqrTab[(x)+255] - -extern const uchar g_Saturate8u[]; -#define CV_FAST_CAST_8U(t) (assert(-256 <= (t) && (t) <= 512), cv::g_Saturate8u[(t)+256]) -#define CV_MIN_8U(a,b) ((a) - CV_FAST_CAST_8U((a) - (b))) -#define CV_MAX_8U(a,b) ((a) + CV_FAST_CAST_8U((b) - (a))) - - #if defined WIN32 || defined _WIN32 void deleteThreadAllocData(); #endif @@ -282,6 +314,4 @@ cv::Mutex& getInitializationMutex(); } -#include "opencv2/hal/intrin.hpp" - #endif /*_CXCORE_INTERNAL_H_*/ diff --git a/modules/hal/src/split.cpp b/modules/core/src/split.cpp similarity index 98% rename from modules/hal/src/split.cpp rename to modules/core/src/split.cpp index c31bf8c..311e97d 100644 --- a/modules/hal/src/split.cpp +++ b/modules/core/src/split.cpp @@ -403,21 +403,25 @@ split_( const T* src, T** dst, int len, int cn ) void split8u(const uchar* src, uchar** dst, int len, int cn ) { + CALL_HAL(split8u, cv_hal_split8u, src,dst, len, cn) split_(src, dst, len, cn); } void split16u(const ushort* src, ushort** dst, int len, int cn ) { + CALL_HAL(split16u, cv_hal_split16u, src,dst, len, cn) split_(src, dst, len, cn); } void split32s(const int* src, int** dst, int len, int cn ) { + CALL_HAL(split32s, cv_hal_split32s, src,dst, len, cn) split_(src, dst, len, cn); } void split64s(const int64* src, int64** dst, int len, int cn ) { + CALL_HAL(split64s, cv_hal_split64s, src,dst, len, cn) split_(src, dst, len, cn); } diff --git a/modules/core/src/stat.cpp b/modules/core/src/stat.cpp index 4e60dbe..e352575 100644 --- a/modules/core/src/stat.cpp +++ b/modules/core/src/stat.cpp @@ -3996,3 +3996,266 @@ cvNorm( const void* imgA, const void* imgB, int normType, const void* maskarr ) return !maskarr ? cv::norm(a, b, normType) : cv::norm(a, b, normType, mask); } + +namespace cv { namespace hal { + +static const uchar popCountTable[] = +{ + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 +}; + +static const uchar popCountTable2[] = +{ + 0, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, + 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, + 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, + 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, + 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, + 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, + 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, + 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4 +}; + +static const uchar popCountTable4[] = +{ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +}; + +int normHamming(const uchar* a, int n) +{ + int i = 0; + int result = 0; +#if CV_NEON + { + uint32x4_t bits = vmovq_n_u32(0); + for (; i <= n - 16; i += 16) { + uint8x16_t A_vec = vld1q_u8 (a + i); + uint8x16_t bitsSet = vcntq_u8 (A_vec); + uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet); + uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8); + bits = vaddq_u32(bits, bitSet4); + } + uint64x2_t bitSet2 = vpaddlq_u32 (bits); + result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0); + result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2); + } +#endif + for( ; i <= n - 4; i += 4 ) + result += popCountTable[a[i]] + popCountTable[a[i+1]] + + popCountTable[a[i+2]] + popCountTable[a[i+3]]; + for( ; i < n; i++ ) + result += popCountTable[a[i]]; + return result; +} + +int normHamming(const uchar* a, const uchar* b, int n) +{ + int i = 0; + int result = 0; +#if CV_NEON + { + uint32x4_t bits = vmovq_n_u32(0); + for (; i <= n - 16; i += 16) { + uint8x16_t A_vec = vld1q_u8 (a + i); + uint8x16_t B_vec = vld1q_u8 (b + i); + uint8x16_t AxorB = veorq_u8 (A_vec, B_vec); + uint8x16_t bitsSet = vcntq_u8 (AxorB); + uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet); + uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8); + bits = vaddq_u32(bits, bitSet4); + } + uint64x2_t bitSet2 = vpaddlq_u32 (bits); + result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0); + result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2); + } +#endif + for( ; i <= n - 4; i += 4 ) + result += popCountTable[a[i] ^ b[i]] + popCountTable[a[i+1] ^ b[i+1]] + + popCountTable[a[i+2] ^ b[i+2]] + popCountTable[a[i+3] ^ b[i+3]]; + for( ; i < n; i++ ) + result += popCountTable[a[i] ^ b[i]]; + return result; +} + +int normHamming(const uchar* a, int n, int cellSize) +{ + if( cellSize == 1 ) + return normHamming(a, n); + const uchar* tab = 0; + if( cellSize == 2 ) + tab = popCountTable2; + else if( cellSize == 4 ) + tab = popCountTable4; + else + return -1; + int i = 0; + int result = 0; +#if CV_ENABLE_UNROLLED + for( ; i <= n - 4; i += 4 ) + result += tab[a[i]] + tab[a[i+1]] + tab[a[i+2]] + tab[a[i+3]]; +#endif + for( ; i < n; i++ ) + result += tab[a[i]]; + return result; +} + +int normHamming(const uchar* a, const uchar* b, int n, int cellSize) +{ + if( cellSize == 1 ) + return normHamming(a, b, n); + const uchar* tab = 0; + if( cellSize == 2 ) + tab = popCountTable2; + else if( cellSize == 4 ) + tab = popCountTable4; + else + return -1; + int i = 0; + int result = 0; + #if CV_ENABLE_UNROLLED + for( ; i <= n - 4; i += 4 ) + result += tab[a[i] ^ b[i]] + tab[a[i+1] ^ b[i+1]] + + tab[a[i+2] ^ b[i+2]] + tab[a[i+3] ^ b[i+3]]; + #endif + for( ; i < n; i++ ) + result += tab[a[i] ^ b[i]]; + return result; +} + +float normL2Sqr_(const float* a, const float* b, int n) +{ + int j = 0; float d = 0.f; +#if CV_SSE + float CV_DECL_ALIGNED(16) buf[4]; + __m128 d0 = _mm_setzero_ps(), d1 = _mm_setzero_ps(); + + for( ; j <= n - 8; j += 8 ) + { + __m128 t0 = _mm_sub_ps(_mm_loadu_ps(a + j), _mm_loadu_ps(b + j)); + __m128 t1 = _mm_sub_ps(_mm_loadu_ps(a + j + 4), _mm_loadu_ps(b + j + 4)); + d0 = _mm_add_ps(d0, _mm_mul_ps(t0, t0)); + d1 = _mm_add_ps(d1, _mm_mul_ps(t1, t1)); + } + _mm_store_ps(buf, _mm_add_ps(d0, d1)); + d = buf[0] + buf[1] + buf[2] + buf[3]; +#endif + { + for( ; j <= n - 4; j += 4 ) + { + float t0 = a[j] - b[j], t1 = a[j+1] - b[j+1], t2 = a[j+2] - b[j+2], t3 = a[j+3] - b[j+3]; + d += t0*t0 + t1*t1 + t2*t2 + t3*t3; + } + } + + for( ; j < n; j++ ) + { + float t = a[j] - b[j]; + d += t*t; + } + return d; +} + + +float normL1_(const float* a, const float* b, int n) +{ + int j = 0; float d = 0.f; +#if CV_SSE + float CV_DECL_ALIGNED(16) buf[4]; + static const int CV_DECL_ALIGNED(16) absbuf[4] = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff}; + __m128 d0 = _mm_setzero_ps(), d1 = _mm_setzero_ps(); + __m128 absmask = _mm_load_ps((const float*)absbuf); + + for( ; j <= n - 8; j += 8 ) + { + __m128 t0 = _mm_sub_ps(_mm_loadu_ps(a + j), _mm_loadu_ps(b + j)); + __m128 t1 = _mm_sub_ps(_mm_loadu_ps(a + j + 4), _mm_loadu_ps(b + j + 4)); + d0 = _mm_add_ps(d0, _mm_and_ps(t0, absmask)); + d1 = _mm_add_ps(d1, _mm_and_ps(t1, absmask)); + } + _mm_store_ps(buf, _mm_add_ps(d0, d1)); + d = buf[0] + buf[1] + buf[2] + buf[3]; +#elif CV_NEON + float32x4_t v_sum = vdupq_n_f32(0.0f); + for ( ; j <= n - 4; j += 4) + v_sum = vaddq_f32(v_sum, vabdq_f32(vld1q_f32(a + j), vld1q_f32(b + j))); + + float CV_DECL_ALIGNED(16) buf[4]; + vst1q_f32(buf, v_sum); + d = buf[0] + buf[1] + buf[2] + buf[3]; +#endif + { + for( ; j <= n - 4; j += 4 ) + { + d += std::abs(a[j] - b[j]) + std::abs(a[j+1] - b[j+1]) + + std::abs(a[j+2] - b[j+2]) + std::abs(a[j+3] - b[j+3]); + } + } + + for( ; j < n; j++ ) + d += std::abs(a[j] - b[j]); + return d; +} + +int normL1_(const uchar* a, const uchar* b, int n) +{ + int j = 0, d = 0; +#if CV_SSE + __m128i d0 = _mm_setzero_si128(); + + for( ; j <= n - 16; j += 16 ) + { + __m128i t0 = _mm_loadu_si128((const __m128i*)(a + j)); + __m128i t1 = _mm_loadu_si128((const __m128i*)(b + j)); + + d0 = _mm_add_epi32(d0, _mm_sad_epu8(t0, t1)); + } + + for( ; j <= n - 4; j += 4 ) + { + __m128i t0 = _mm_cvtsi32_si128(*(const int*)(a + j)); + __m128i t1 = _mm_cvtsi32_si128(*(const int*)(b + j)); + + d0 = _mm_add_epi32(d0, _mm_sad_epu8(t0, t1)); + } + d = _mm_cvtsi128_si32(_mm_add_epi32(d0, _mm_unpackhi_epi64(d0, d0))); +#elif CV_NEON + uint32x4_t v_sum = vdupq_n_u32(0.0f); + for ( ; j <= n - 16; j += 16) + { + uint8x16_t v_dst = vabdq_u8(vld1q_u8(a + j), vld1q_u8(b + j)); + uint16x8_t v_low = vmovl_u8(vget_low_u8(v_dst)), v_high = vmovl_u8(vget_high_u8(v_dst)); + v_sum = vaddq_u32(v_sum, vaddl_u16(vget_low_u16(v_low), vget_low_u16(v_high))); + v_sum = vaddq_u32(v_sum, vaddl_u16(vget_high_u16(v_low), vget_high_u16(v_high))); + } + + uint CV_DECL_ALIGNED(16) buf[4]; + vst1q_u32(buf, v_sum); + d = buf[0] + buf[1] + buf[2] + buf[3]; +#endif + { + for( ; j <= n - 4; j += 4 ) + { + d += std::abs(a[j] - b[j]) + std::abs(a[j+1] - b[j+1]) + + std::abs(a[j+2] - b[j+2]) + std::abs(a[j+3] - b[j+3]); + } + } + for( ; j < n; j++ ) + d += std::abs(a[j] - b[j]); + return d; +} + +}} //cv::hal diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp index ba2c9d5..5fbb453 100644 --- a/modules/core/src/system.cpp +++ b/modules/core/src/system.cpp @@ -86,6 +86,45 @@ Mutex* __initialization_mutex_initializer = &getInitializationMutex(); #undef max #undef abs #include +#if defined _MSC_VER + #if _MSC_VER >= 1400 + #include + #elif defined _M_IX86 + static void __cpuid(int* cpuid_data, int) + { + __asm + { + push ebx + push edi + mov edi, cpuid_data + mov eax, 1 + cpuid + mov [edi], eax + mov [edi + 4], ebx + mov [edi + 8], ecx + mov [edi + 12], edx + pop edi + pop ebx + } + } + static void __cpuidex(int* cpuid_data, int, int) + { + __asm + { + push edi + mov edi, cpuid_data + mov eax, 7 + mov ecx, 0 + cpuid + mov [edi], eax + mov [edi + 4], ebx + mov [edi + 8], ecx + mov [edi + 12], edx + pop edi + } + } + #endif +#endif #ifdef WINRT #include @@ -198,15 +237,154 @@ void Exception::formatMessage() msg = format("%s:%d: error: (%d) %s\n", file.c_str(), line, code, err.c_str()); } +struct HWFeatures +{ + enum { MAX_FEATURE = CV_HARDWARE_MAX_FEATURE }; + + HWFeatures(void) + { + memset( have, 0, sizeof(have) ); + x86_family = 0; + } + + static HWFeatures initialize(void) + { + HWFeatures f; + int cpuid_data[4] = { 0, 0, 0, 0 }; + + #if defined _MSC_VER && (defined _M_IX86 || defined _M_X64) + __cpuid(cpuid_data, 1); + #elif defined __GNUC__ && (defined __i386__ || defined __x86_64__) + #ifdef __x86_64__ + asm __volatile__ + ( + "movl $1, %%eax\n\t" + "cpuid\n\t" + :[eax]"=a"(cpuid_data[0]),[ebx]"=b"(cpuid_data[1]),[ecx]"=c"(cpuid_data[2]),[edx]"=d"(cpuid_data[3]) + : + : "cc" + ); + #else + asm volatile + ( + "pushl %%ebx\n\t" + "movl $1,%%eax\n\t" + "cpuid\n\t" + "popl %%ebx\n\t" + : "=a"(cpuid_data[0]), "=c"(cpuid_data[2]), "=d"(cpuid_data[3]) + : + : "cc" + ); + #endif + #endif + + f.x86_family = (cpuid_data[0] >> 8) & 15; + if( f.x86_family >= 6 ) + { + f.have[CV_CPU_MMX] = (cpuid_data[3] & (1 << 23)) != 0; + f.have[CV_CPU_SSE] = (cpuid_data[3] & (1<<25)) != 0; + f.have[CV_CPU_SSE2] = (cpuid_data[3] & (1<<26)) != 0; + f.have[CV_CPU_SSE3] = (cpuid_data[2] & (1<<0)) != 0; + f.have[CV_CPU_SSSE3] = (cpuid_data[2] & (1<<9)) != 0; + f.have[CV_CPU_FMA3] = (cpuid_data[2] & (1<<12)) != 0; + f.have[CV_CPU_SSE4_1] = (cpuid_data[2] & (1<<19)) != 0; + f.have[CV_CPU_SSE4_2] = (cpuid_data[2] & (1<<20)) != 0; + f.have[CV_CPU_POPCNT] = (cpuid_data[2] & (1<<23)) != 0; + f.have[CV_CPU_AVX] = (((cpuid_data[2] & (1<<28)) != 0)&&((cpuid_data[2] & (1<<27)) != 0));//OS uses XSAVE_XRSTORE and CPU support AVX + + // make the second call to the cpuid command in order to get + // information about extended features like AVX2 + #if defined _MSC_VER && (defined _M_IX86 || defined _M_X64) + __cpuidex(cpuid_data, 7, 0); + #elif defined __GNUC__ && (defined __i386__ || defined __x86_64__) + #ifdef __x86_64__ + asm __volatile__ + ( + "movl $7, %%eax\n\t" + "movl $0, %%ecx\n\t" + "cpuid\n\t" + :[eax]"=a"(cpuid_data[0]),[ebx]"=b"(cpuid_data[1]),[ecx]"=c"(cpuid_data[2]),[edx]"=d"(cpuid_data[3]) + : + : "cc" + ); + #else + asm volatile + ( + "pushl %%ebx\n\t" + "movl $7,%%eax\n\t" + "movl $0,%%ecx\n\t" + "cpuid\n\t" + "movl %%ebx, %0\n\t" + "popl %%ebx\n\t" + : "=r"(cpuid_data[1]), "=c"(cpuid_data[2]) + : + : "cc" + ); + #endif + #endif + f.have[CV_CPU_AVX2] = (cpuid_data[1] & (1<<5)) != 0; + + f.have[CV_CPU_AVX_512F] = (cpuid_data[1] & (1<<16)) != 0; + f.have[CV_CPU_AVX_512DQ] = (cpuid_data[1] & (1<<17)) != 0; + f.have[CV_CPU_AVX_512IFMA512] = (cpuid_data[1] & (1<<21)) != 0; + f.have[CV_CPU_AVX_512PF] = (cpuid_data[1] & (1<<26)) != 0; + f.have[CV_CPU_AVX_512ER] = (cpuid_data[1] & (1<<27)) != 0; + f.have[CV_CPU_AVX_512CD] = (cpuid_data[1] & (1<<28)) != 0; + f.have[CV_CPU_AVX_512BW] = (cpuid_data[1] & (1<<30)) != 0; + f.have[CV_CPU_AVX_512VL] = (cpuid_data[1] & (1<<31)) != 0; + f.have[CV_CPU_AVX_512VBMI] = (cpuid_data[2] & (1<<1)) != 0; + } + + #if defined ANDROID || defined __linux__ + #ifdef __aarch64__ + f.have[CV_CPU_NEON] = true; + #else + int cpufile = open("/proc/self/auxv", O_RDONLY); + + if (cpufile >= 0) + { + Elf32_auxv_t auxv; + const size_t size_auxv_t = sizeof(auxv); + + while ((size_t)read(cpufile, &auxv, size_auxv_t) == size_auxv_t) + { + if (auxv.a_type == AT_HWCAP) + { + f.have[CV_CPU_NEON] = (auxv.a_un.a_val & 4096) != 0; + break; + } + } + + close(cpufile); + } + #endif + #elif (defined __clang__ || defined __APPLE__) && (defined __ARM_NEON__ || (defined __ARM_NEON && defined __aarch64__)) + f.have[CV_CPU_NEON] = true; + #endif + + return f; + } + + int x86_family; + bool have[MAX_FEATURE+1]; +}; + +static HWFeatures featuresEnabled = HWFeatures::initialize(), featuresDisabled = HWFeatures(); +static HWFeatures* currentFeatures = &featuresEnabled; + bool checkHardwareSupport(int feature) { CV_DbgAssert( 0 <= feature && feature <= CV_HARDWARE_MAX_FEATURE ); - return cv::hal::checkHardwareSupport(feature); + return currentFeatures->have[feature]; } + +volatile bool useOptimizedFlag = true; + void setUseOptimized( bool flag ) { - cv::hal::setUseOptimized(flag); + useOptimizedFlag = flag; + currentFeatures = flag ? &featuresEnabled : &featuresDisabled; ipp::setUseIPP(flag); #ifdef HAVE_OPENCL @@ -219,7 +397,7 @@ void setUseOptimized( bool flag ) bool useOptimized(void) { - return cv::hal::useOptimized(); + return useOptimizedFlag; } int64 getTickCount(void) @@ -499,12 +677,12 @@ redirectError( CvErrorCallback errCallback, void* userdata, void** prevUserdata) CV_IMPL int cvCheckHardwareSupport(int feature) { CV_DbgAssert( 0 <= feature && feature <= CV_HARDWARE_MAX_FEATURE ); - return cv::hal::checkHardwareSupport(feature); + return cv::currentFeatures->have[feature]; } CV_IMPL int cvUseOptimized( int flag ) { - int prevMode = cv::useOptimized(); + int prevMode = cv::useOptimizedFlag; cv::setUseOptimized( flag != 0 ); return prevMode; } diff --git a/modules/core/test/test_hal_core.cpp b/modules/core/test/test_hal_core.cpp index 3dd0a4c..dfd0867 100644 --- a/modules/core/test/test_hal_core.cpp +++ b/modules/core/test/test_hal_core.cpp @@ -40,7 +40,6 @@ //M*/ #include "test_precomp.hpp" -#include "opencv2/hal.hpp" using namespace cv; @@ -72,21 +71,21 @@ TEST(Core_HAL, mathfuncs) { case HAL_EXP: if( depth == CV_32F ) - hal::exp(src.ptr(), dst.ptr(), n); + hal::exp32f(src.ptr(), dst.ptr(), n); else - hal::exp(src.ptr(), dst.ptr(), n); + hal::exp64f(src.ptr(), dst.ptr(), n); break; case HAL_LOG: if( depth == CV_32F ) - hal::log(src.ptr(), dst.ptr(), n); + hal::log32f(src.ptr(), dst.ptr(), n); else - hal::log(src.ptr(), dst.ptr(), n); + hal::log64f(src.ptr(), dst.ptr(), n); break; case HAL_SQRT: if( depth == CV_32F ) - hal::sqrt(src.ptr(), dst.ptr(), n); + hal::sqrt32f(src.ptr(), dst.ptr(), n); else - hal::sqrt(src.ptr(), dst.ptr(), n); + hal::sqrt64f(src.ptr(), dst.ptr(), n); break; default: CV_Error(Error::StsBadArg, "unknown function"); @@ -159,15 +158,15 @@ TEST(Core_HAL, mat_decomp) { case HAL_LU: if( depth == CV_32F ) - hal::LU(a.ptr(), a.step, size, x.ptr(), x.step, 1); + hal::LU32f(a.ptr(), a.step, size, x.ptr(), x.step, 1); else - hal::LU(a.ptr(), a.step, size, x.ptr(), x.step, 1); + hal::LU64f(a.ptr(), a.step, size, x.ptr(), x.step, 1); break; case HAL_CHOL: if( depth == CV_32F ) - hal::Cholesky(a.ptr(), a.step, size, x.ptr(), x.step, 1); + hal::Cholesky32f(a.ptr(), a.step, size, x.ptr(), x.step, 1); else - hal::Cholesky(a.ptr(), a.step, size, x.ptr(), x.step, 1); + hal::Cholesky64f(a.ptr(), a.step, size, x.ptr(), x.step, 1); break; default: CV_Error(Error::StsBadArg, "unknown function"); diff --git a/modules/hal/test/test_intrin.cpp b/modules/core/test/test_intrin.cpp similarity index 100% rename from modules/hal/test/test_intrin.cpp rename to modules/core/test/test_intrin.cpp diff --git a/modules/hal/test/test_intrin_utils.hpp b/modules/core/test/test_intrin_utils.hpp similarity index 99% rename from modules/hal/test/test_intrin_utils.hpp rename to modules/core/test/test_intrin_utils.hpp index 47473ae..a0eab56 100644 --- a/modules/hal/test/test_intrin_utils.hpp +++ b/modules/core/test/test_intrin_utils.hpp @@ -1,7 +1,7 @@ #ifndef _TEST_UTILS_HPP_ #define _TEST_UTILS_HPP_ -#include "opencv2/hal/intrin.hpp" +#include "opencv2/core/hal/intrin.hpp" #include "opencv2/ts.hpp" #include #include diff --git a/modules/core/test/test_precomp.hpp b/modules/core/test/test_precomp.hpp index d981cea..962348b 100644 --- a/modules/core/test/test_precomp.hpp +++ b/modules/core/test/test_precomp.hpp @@ -13,6 +13,9 @@ #include "opencv2/ts.hpp" #include "opencv2/core/core_c.h" +#include "opencv2/core/cvdef.h" #include "opencv2/core/private.hpp" +#include "opencv2/core/hal/hal.hpp" +#include "opencv2/core/hal/intrin.hpp" #endif diff --git a/modules/features2d/src/precomp.hpp b/modules/features2d/src/precomp.hpp index 2f77d92..c3db78b 100644 --- a/modules/features2d/src/precomp.hpp +++ b/modules/features2d/src/precomp.hpp @@ -49,6 +49,7 @@ #include "opencv2/core/utility.hpp" #include "opencv2/core/private.hpp" #include "opencv2/core/ocl.hpp" +#include "opencv2/core/hal/hal.hpp" #include diff --git a/modules/hal/CMakeLists.txt b/modules/hal/CMakeLists.txt deleted file mode 100644 index 982913d..0000000 --- a/modules/hal/CMakeLists.txt +++ /dev/null @@ -1,21 +0,0 @@ -set(the_description "The Hardware Acceleration Layer (HAL) module") - -set(OPENCV_MODULE_TYPE STATIC) - -if(OPENCV_HAL_HEADERS AND OPENCV_HAL_LIBS) - set(OPENCV_HAL_HEADERS_INCLUDES "#include \"${OPENCV_HAL_HEADERS}\"") - set(DEPS "${OPENCV_HAL_LIBS}") -else() - set(OPENCV_HAL_HEADERS_INCLUDES "// using default HAL") - set(DEPS "") -endif() - -configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/custom_hal.hpp.in" "${CMAKE_BINARY_DIR}/custom_hal.hpp" @ONLY) - -if(UNIX) - if(CMAKE_COMPILER_IS_GNUCXX OR CV_ICC) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") - endif() -endif() - -ocv_define_module(hal ${DEPS}) diff --git a/modules/hal/include/opencv2/hal.hpp b/modules/hal/include/opencv2/hal.hpp deleted file mode 100644 index 125bbc8..0000000 --- a/modules/hal/include/opencv2/hal.hpp +++ /dev/null @@ -1,287 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Copyright (C) 2013, OpenCV Foundation, all rights reserved. -// Copyright (C) 2015, Itseez Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_HAL_HPP__ -#define __OPENCV_HAL_HPP__ - -#include "opencv2/hal/defs.h" -#include "opencv2/hal/interface.hpp" - -/** - @defgroup hal Hardware Acceleration Layer - @{ - @defgroup hal_intrin Universal intrinsics - @{ - @defgroup hal_intrin_impl Private implementation helpers - @} - @defgroup hal_utils Platform-dependent utils - @} -*/ - -namespace cv { namespace hal { - -//! @addtogroup hal -//! @{ - -class Failure -{ -public: - Failure(int code_ = Error::Unknown) : code(code_) {} -public: - int code; -}; - -int normHamming(const uchar* a, int n); -int normHamming(const uchar* a, const uchar* b, int n); - -int normHamming(const uchar* a, int n, int cellSize); -int normHamming(const uchar* a, const uchar* b, int n, int cellSize); - -//////////////////////////////// low-level functions //////////////////////////////// - -int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n); -int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n); -bool Cholesky(float* A, size_t astep, int m, float* b, size_t bstep, int n); -bool Cholesky(double* A, size_t astep, int m, double* b, size_t bstep, int n); - -int normL1_(const uchar* a, const uchar* b, int n); -float normL1_(const float* a, const float* b, int n); -float normL2Sqr_(const float* a, const float* b, int n); - -void exp(const float* src, float* dst, int n); -void exp(const double* src, double* dst, int n); -void log(const float* src, float* dst, int n); -void log(const double* src, double* dst, int n); - -void fastAtan2(const float* y, const float* x, float* dst, int n, bool angleInDegrees); -void magnitude(const float* x, const float* y, float* dst, int n); -void magnitude(const double* x, const double* y, double* dst, int n); -void sqrt(const float* src, float* dst, int len); -void sqrt(const double* src, double* dst, int len); -void invSqrt(const float* src, float* dst, int len); -void invSqrt(const double* src, double* dst, int len); - -void split8u(const uchar* src, uchar** dst, int len, int cn ); -void split16u(const ushort* src, ushort** dst, int len, int cn ); -void split32s(const int* src, int** dst, int len, int cn ); -void split64s(const int64* src, int64** dst, int len, int cn ); - -void merge8u(const uchar** src, uchar* dst, int len, int cn ); -void merge16u(const ushort** src, ushort* dst, int len, int cn ); -void merge32s(const int** src, int* dst, int len, int cn ); -void merge64s(const int64** src, int64* dst, int len, int cn ); - -void add8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* ); -void add8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* ); -void add16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* ); -void add16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* ); -void add32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* ); -void add32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* ); -void add64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* ); - -void sub8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* ); -void sub8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* ); -void sub16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* ); -void sub16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* ); -void sub32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* ); -void sub32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* ); -void sub64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* ); - -void max8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* ); -void max8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* ); -void max16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* ); -void max16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* ); -void max32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* ); -void max32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* ); -void max64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* ); - -void min8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* ); -void min8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* ); -void min16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* ); -void min16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* ); -void min32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* ); -void min32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* ); -void min64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* ); - -void absdiff8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* ); -void absdiff8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* ); -void absdiff16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* ); -void absdiff16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* ); -void absdiff32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* ); -void absdiff32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* ); -void absdiff64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* ); - -void and8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* ); -void or8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* ); -void xor8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* ); -void not8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* ); - -void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop); -void cmp8s(const schar* src1, size_t step1, const schar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop); -void cmp16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop); -void cmp16s(const short* src1, size_t step1, const short* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop); -void cmp32s(const int* src1, size_t step1, const int* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop); -void cmp32f(const float* src1, size_t step1, const float* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop); -void cmp64f(const double* src1, size_t step1, const double* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop); - -void mul8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale); -void mul8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale); -void mul16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale); -void mul16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scale); -void mul32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scale); -void mul32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scale); -void mul64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scale); - -void div8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale); -void div8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale); -void div16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale); -void div16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scale); -void div32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scale); -void div32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scale); -void div64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scale); - -void recip8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale); -void recip8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale); -void recip16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale); -void recip16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scale); -void recip32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scale); -void recip32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scale); -void recip64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scale); - -void addWeighted8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _scalars ); -void addWeighted8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scalars ); -void addWeighted16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scalars ); -void addWeighted16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scalars ); -void addWeighted32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scalars ); -void addWeighted32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scalars ); -void addWeighted64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scalars ); -//! @} - -}} //cv::hal - -namespace cv { - -template struct OpAdd -{ - typedef T1 type1; - typedef T2 type2; - typedef T3 rtype; - T3 operator ()(const T1 a, const T2 b) const { return saturate_cast(a + b); } -}; - -template struct OpSub -{ - typedef T1 type1; - typedef T2 type2; - typedef T3 rtype; - T3 operator ()(const T1 a, const T2 b) const { return saturate_cast(a - b); } -}; - -template struct OpRSub -{ - typedef T1 type1; - typedef T2 type2; - typedef T3 rtype; - T3 operator ()(const T1 a, const T2 b) const { return saturate_cast(b - a); } -}; - -template struct OpMin -{ - typedef T type1; - typedef T type2; - typedef T rtype; - T operator ()(const T a, const T b) const { return std::min(a, b); } -}; - -template struct OpMax -{ - typedef T type1; - typedef T type2; - typedef T rtype; - T operator ()(const T a, const T b) const { return std::max(a, b); } -}; - -template struct OpAbsDiff -{ - typedef T type1; - typedef T type2; - typedef T rtype; - T operator()(T a, T b) const { return a > b ? a - b : b - a; } -}; - -template struct OpAnd -{ - typedef T type1; - typedef T type2; - typedef T rtype; - T operator()( T a, T b ) const { return a & b; } -}; - -template struct OpOr -{ - typedef T type1; - typedef T type2; - typedef T rtype; - T operator()( T a, T b ) const { return a | b; } -}; - -template struct OpXor -{ - typedef T type1; - typedef T type2; - typedef T rtype; - T operator()( T a, T b ) const { return a ^ b; } -}; - -template struct OpNot -{ - typedef T type1; - typedef T type2; - typedef T rtype; - T operator()( T a, T ) const { return ~a; } -}; - -} - -#endif //__OPENCV_HAL_HPP__ diff --git a/modules/hal/include/opencv2/hal/defs.h b/modules/hal/include/opencv2/hal/defs.h deleted file mode 100644 index c9566e3..0000000 --- a/modules/hal/include/opencv2/hal/defs.h +++ /dev/null @@ -1,675 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Copyright (C) 2013, OpenCV Foundation, all rights reserved. -// Copyright (C) 2015, Itseez Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_DEF_H__ -#define __OPENCV_DEF_H__ - -//! @addtogroup hal_utils -//! @{ - -#if !defined _CRT_SECURE_NO_DEPRECATE && defined _MSC_VER && _MSC_VER > 1300 -# define _CRT_SECURE_NO_DEPRECATE /* to avoid multiple Visual Studio warnings */ -#endif - -#include -#include "opencv2/hal/interface.hpp" - -#if defined __ICL -# define CV_ICC __ICL -#elif defined __ICC -# define CV_ICC __ICC -#elif defined __ECL -# define CV_ICC __ECL -#elif defined __ECC -# define CV_ICC __ECC -#elif defined __INTEL_COMPILER -# define CV_ICC __INTEL_COMPILER -#endif - -#ifndef CV_INLINE -# if defined __cplusplus -# define CV_INLINE static inline -# elif defined _MSC_VER -# define CV_INLINE __inline -# else -# define CV_INLINE static -# endif -#endif - -#if defined CV_ICC && !defined CV_ENABLE_UNROLLED -# define CV_ENABLE_UNROLLED 0 -#else -# define CV_ENABLE_UNROLLED 1 -#endif - -#ifdef __GNUC__ -# define CV_DECL_ALIGNED(x) __attribute__ ((aligned (x))) -#elif defined _MSC_VER -# define CV_DECL_ALIGNED(x) __declspec(align(x)) -#else -# define CV_DECL_ALIGNED(x) -#endif - -/* CPU features and intrinsics support */ -#define CV_CPU_NONE 0 -#define CV_CPU_MMX 1 -#define CV_CPU_SSE 2 -#define CV_CPU_SSE2 3 -#define CV_CPU_SSE3 4 -#define CV_CPU_SSSE3 5 -#define CV_CPU_SSE4_1 6 -#define CV_CPU_SSE4_2 7 -#define CV_CPU_POPCNT 8 - -#define CV_CPU_AVX 10 -#define CV_CPU_AVX2 11 -#define CV_CPU_FMA3 12 - -#define CV_CPU_AVX_512F 13 -#define CV_CPU_AVX_512BW 14 -#define CV_CPU_AVX_512CD 15 -#define CV_CPU_AVX_512DQ 16 -#define CV_CPU_AVX_512ER 17 -#define CV_CPU_AVX_512IFMA512 18 -#define CV_CPU_AVX_512PF 19 -#define CV_CPU_AVX_512VBMI 20 -#define CV_CPU_AVX_512VL 21 - -#define CV_CPU_NEON 100 - -// when adding to this list remember to update the following enum -#define CV_HARDWARE_MAX_FEATURE 255 - -/** @brief Available CPU features. -*/ -enum CpuFeatures { - CPU_MMX = 1, - CPU_SSE = 2, - CPU_SSE2 = 3, - CPU_SSE3 = 4, - CPU_SSSE3 = 5, - CPU_SSE4_1 = 6, - CPU_SSE4_2 = 7, - CPU_POPCNT = 8, - - CPU_AVX = 10, - CPU_AVX2 = 11, - CPU_FMA3 = 12, - - CPU_AVX_512F = 13, - CPU_AVX_512BW = 14, - CPU_AVX_512CD = 15, - CPU_AVX_512DQ = 16, - CPU_AVX_512ER = 17, - CPU_AVX_512IFMA512 = 18, - CPU_AVX_512PF = 19, - CPU_AVX_512VBMI = 20, - CPU_AVX_512VL = 21, - - CPU_NEON = 100 -}; - -// do not include SSE/AVX/NEON headers for NVCC compiler -#ifndef __CUDACC__ - -#if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2) -# include -# define CV_MMX 1 -# define CV_SSE 1 -# define CV_SSE2 1 -# if defined __SSE3__ || (defined _MSC_VER && _MSC_VER >= 1500) -# include -# define CV_SSE3 1 -# endif -# if defined __SSSE3__ || (defined _MSC_VER && _MSC_VER >= 1500) -# include -# define CV_SSSE3 1 -# endif -# if defined __SSE4_1__ || (defined _MSC_VER && _MSC_VER >= 1500) -# include -# define CV_SSE4_1 1 -# endif -# if defined __SSE4_2__ || (defined _MSC_VER && _MSC_VER >= 1500) -# include -# define CV_SSE4_2 1 -# endif -# if defined __POPCNT__ || (defined _MSC_VER && _MSC_VER >= 1500) -# ifdef _MSC_VER -# include -# else -# include -# endif -# define CV_POPCNT 1 -# endif -# if defined __AVX__ || (defined _MSC_VER && _MSC_VER >= 1600 && 0) -// MS Visual Studio 2010 (2012?) has no macro pre-defined to identify the use of /arch:AVX -// See: http://connect.microsoft.com/VisualStudio/feedback/details/605858/arch-avx-should-define-a-predefined-macro-in-x64-and-set-a-unique-value-for-m-ix86-fp-in-win32 -# include -# define CV_AVX 1 -# if defined(_XCR_XFEATURE_ENABLED_MASK) -# define __xgetbv() _xgetbv(_XCR_XFEATURE_ENABLED_MASK) -# else -# define __xgetbv() 0 -# endif -# endif -# if defined __AVX2__ || (defined _MSC_VER && _MSC_VER >= 1800 && 0) -# include -# define CV_AVX2 1 -# if defined __FMA__ -# define CV_FMA3 1 -# endif -# endif -#endif - -#if (defined WIN32 || defined _WIN32) && defined(_M_ARM) -# include -# include "arm_neon.h" -# define CV_NEON 1 -# define CPU_HAS_NEON_FEATURE (true) -#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__)) -# include -# define CV_NEON 1 -#endif - -#if defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__ -# define CV_VFP 1 -#endif - -#endif // __CUDACC__ - -#ifndef CV_POPCNT -#define CV_POPCNT 0 -#endif -#ifndef CV_MMX -# define CV_MMX 0 -#endif -#ifndef CV_SSE -# define CV_SSE 0 -#endif -#ifndef CV_SSE2 -# define CV_SSE2 0 -#endif -#ifndef CV_SSE3 -# define CV_SSE3 0 -#endif -#ifndef CV_SSSE3 -# define CV_SSSE3 0 -#endif -#ifndef CV_SSE4_1 -# define CV_SSE4_1 0 -#endif -#ifndef CV_SSE4_2 -# define CV_SSE4_2 0 -#endif -#ifndef CV_AVX -# define CV_AVX 0 -#endif -#ifndef CV_AVX2 -# define CV_AVX2 0 -#endif -#ifndef CV_FMA3 -# define CV_FMA3 0 -#endif -#ifndef CV_AVX_512F -# define CV_AVX_512F 0 -#endif -#ifndef CV_AVX_512BW -# define CV_AVX_512BW 0 -#endif -#ifndef CV_AVX_512CD -# define CV_AVX_512CD 0 -#endif -#ifndef CV_AVX_512DQ -# define CV_AVX_512DQ 0 -#endif -#ifndef CV_AVX_512ER -# define CV_AVX_512ER 0 -#endif -#ifndef CV_AVX_512IFMA512 -# define CV_AVX_512IFMA512 0 -#endif -#ifndef CV_AVX_512PF -# define CV_AVX_512PF 0 -#endif -#ifndef CV_AVX_512VBMI -# define CV_AVX_512VBMI 0 -#endif -#ifndef CV_AVX_512VL -# define CV_AVX_512VL 0 -#endif - -#ifndef CV_NEON -# define CV_NEON 0 -#endif - -#ifndef CV_VFP -# define CV_VFP 0 -#endif - -/* fundamental constants */ -#define CV_PI 3.1415926535897932384626433832795 -#define CV_2PI 6.283185307179586476925286766559 -#define CV_LOG2 0.69314718055994530941723212145818 - -typedef union Cv32suf -{ - int i; - unsigned u; - float f; -} -Cv32suf; - -typedef union Cv64suf -{ - int64 i; - uint64 u; - double f; -} -Cv64suf; - -namespace cv { namespace hal { - -bool checkHardwareSupport(int feature); -void setUseOptimized(bool onoff); -bool useOptimized(); - -}} - -#define USE_SSE2 (cv::hal::checkHardwareSupport(CV_CPU_SSE)) -#define USE_SSE4_2 (cv::hal::checkHardwareSupport(CV_CPU_SSE4_2)) -#define USE_AVX (cv::hal::checkHardwareSupport(CV_CPU_AVX)) -#define USE_AVX2 (cv::hal::checkHardwareSupport(CV_CPU_AVX2)) - - -/****************************************************************************************\ -* fast math * -\****************************************************************************************/ - -#if defined __BORLANDC__ -# include -#elif defined __cplusplus -# include -#else -# include -#endif - -#ifdef HAVE_TEGRA_OPTIMIZATION -# include "tegra_round.hpp" -#endif - -#if CV_VFP - // 1. general scheme - #define ARM_ROUND(_value, _asm_string) \ - int res; \ - float temp; \ - asm(_asm_string : [res] "=r" (res), [temp] "=w" (temp) : [value] "w" (_value)); \ - return res - // 2. version for double - #ifdef __clang__ - #define ARM_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %[value] \n vmov %[res], %[temp]") - #else - #define ARM_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %P[value] \n vmov %[res], %[temp]") - #endif - // 3. version for float - #define ARM_ROUND_FLT(value) ARM_ROUND(value, "vcvtr.s32.f32 %[temp], %[value]\n vmov %[res], %[temp]") -#endif // CV_VFP - -/** @brief Rounds floating-point number to the nearest integer - - @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the - result is not defined. - */ -CV_INLINE int -cvRound( double value ) -{ -#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \ - && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__) - __m128d t = _mm_set_sd( value ); - return _mm_cvtsd_si32(t); -#elif defined _MSC_VER && defined _M_IX86 - int t; - __asm - { - fld value; - fistp t; - } - return t; -#elif ((defined _MSC_VER && defined _M_ARM) || defined CV_ICC || \ - defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION - TEGRA_ROUND_DBL(value); -#elif defined CV_ICC || defined __GNUC__ -# if CV_VFP - ARM_ROUND_DBL(value); -# else - return (int)lrint(value); -# endif -#else - /* it's ok if round does not comply with IEEE754 standard; - the tests should allow +/-1 difference when the tested functions use round */ - return (int)(value + (value >= 0 ? 0.5 : -0.5)); -#endif -} - - -/** @brief Rounds floating-point number to the nearest integer not larger than the original. - - The function computes an integer i such that: - \f[i \le \texttt{value} < i+1\f] - @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the - result is not defined. - */ -CV_INLINE int cvFloor( double value ) -{ -#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__) - __m128d t = _mm_set_sd( value ); - int i = _mm_cvtsd_si32(t); - return i - _mm_movemask_pd(_mm_cmplt_sd(t, _mm_cvtsi32_sd(t,i))); -#elif defined __GNUC__ - int i = (int)value; - return i - (i > value); -#else - int i = cvRound(value); - float diff = (float)(value - i); - return i - (diff < 0); -#endif -} - -/** @brief Rounds floating-point number to the nearest integer not smaller than the original. - - The function computes an integer i such that: - \f[i \le \texttt{value} < i+1\f] - @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the - result is not defined. - */ -CV_INLINE int cvCeil( double value ) -{ -#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__&& !defined __APPLE__)) && !defined(__CUDACC__) - __m128d t = _mm_set_sd( value ); - int i = _mm_cvtsd_si32(t); - return i + _mm_movemask_pd(_mm_cmplt_sd(_mm_cvtsi32_sd(t,i), t)); -#elif defined __GNUC__ - int i = (int)value; - return i + (i < value); -#else - int i = cvRound(value); - float diff = (float)(i - value); - return i + (diff < 0); -#endif -} - -/** @brief Determines if the argument is Not A Number. - - @param value The input floating-point value - - The function returns 1 if the argument is Not A Number (as defined by IEEE754 standard), 0 - otherwise. */ -CV_INLINE int cvIsNaN( double value ) -{ - Cv64suf ieee754; - ieee754.f = value; - return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) + - ((unsigned)ieee754.u != 0) > 0x7ff00000; -} - -/** @brief Determines if the argument is Infinity. - - @param value The input floating-point value - - The function returns 1 if the argument is a plus or minus infinity (as defined by IEEE754 standard) - and 0 otherwise. */ -CV_INLINE int cvIsInf( double value ) -{ - Cv64suf ieee754; - ieee754.f = value; - return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) == 0x7ff00000 && - (unsigned)ieee754.u == 0; -} - -#ifdef __cplusplus - -/** @overload */ -CV_INLINE int cvRound(float value) -{ -#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ && \ - defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__) - __m128 t = _mm_set_ss( value ); - return _mm_cvtss_si32(t); -#elif defined _MSC_VER && defined _M_IX86 - int t; - __asm - { - fld value; - fistp t; - } - return t; -#elif ((defined _MSC_VER && defined _M_ARM) || defined CV_ICC || \ - defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION - TEGRA_ROUND_FLT(value); -#elif defined CV_ICC || defined __GNUC__ -# if CV_VFP - ARM_ROUND_FLT(value); -# else - return (int)lrintf(value); -# endif -#else - /* it's ok if round does not comply with IEEE754 standard; - the tests should allow +/-1 difference when the tested functions use round */ - return (int)(value + (value >= 0 ? 0.5f : -0.5f)); -#endif -} - -/** @overload */ -CV_INLINE int cvRound( int value ) -{ - return value; -} - -/** @overload */ -CV_INLINE int cvFloor( float value ) -{ -#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__) - __m128 t = _mm_set_ss( value ); - int i = _mm_cvtss_si32(t); - return i - _mm_movemask_ps(_mm_cmplt_ss(t, _mm_cvtsi32_ss(t,i))); -#elif defined __GNUC__ - int i = (int)value; - return i - (i > value); -#else - int i = cvRound(value); - float diff = (float)(value - i); - return i - (diff < 0); -#endif -} - -/** @overload */ -CV_INLINE int cvFloor( int value ) -{ - return value; -} - -/** @overload */ -CV_INLINE int cvCeil( float value ) -{ -#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__&& !defined __APPLE__)) && !defined(__CUDACC__) - __m128 t = _mm_set_ss( value ); - int i = _mm_cvtss_si32(t); - return i + _mm_movemask_ps(_mm_cmplt_ss(_mm_cvtsi32_ss(t,i), t)); -#elif defined __GNUC__ - int i = (int)value; - return i + (i < value); -#else - int i = cvRound(value); - float diff = (float)(i - value); - return i + (diff < 0); -#endif -} - -/** @overload */ -CV_INLINE int cvCeil( int value ) -{ - return value; -} - -/** @overload */ -CV_INLINE int cvIsNaN( float value ) -{ - Cv32suf ieee754; - ieee754.f = value; - return (ieee754.u & 0x7fffffff) > 0x7f800000; -} - -/** @overload */ -CV_INLINE int cvIsInf( float value ) -{ - Cv32suf ieee754; - ieee754.f = value; - return (ieee754.u & 0x7fffffff) == 0x7f800000; -} - -//! @} - -#include - -namespace cv -{ - -//! @addtogroup hal_utils -//! @{ - -/////////////// saturate_cast (used in image & signal processing) /////////////////// - -/** @brief Template function for accurate conversion from one primitive type to another. - - The functions saturate_cast resemble the standard C++ cast operations, such as static_cast\() - and others. They perform an efficient and accurate conversion from one primitive type to another - (see the introduction chapter). saturate in the name means that when the input value v is out of the - range of the target type, the result is not formed just by taking low bits of the input, but instead - the value is clipped. For example: - @code - uchar a = saturate_cast(-100); // a = 0 (UCHAR_MIN) - short b = saturate_cast(33333.33333); // b = 32767 (SHRT_MAX) - @endcode - Such clipping is done when the target type is unsigned char , signed char , unsigned short or - signed short . For 32-bit integers, no clipping is done. - - When the parameter is a floating-point value and the target type is an integer (8-, 16- or 32-bit), - the floating-point value is first rounded to the nearest integer and then clipped if needed (when - the target type is 8- or 16-bit). - - This operation is used in the simplest or most complex image processing functions in OpenCV. - - @param v Function parameter. - @sa add, subtract, multiply, divide, Mat::convertTo - */ -template static inline _Tp saturate_cast(uchar v) { return _Tp(v); } -/** @overload */ -template static inline _Tp saturate_cast(schar v) { return _Tp(v); } -/** @overload */ -template static inline _Tp saturate_cast(ushort v) { return _Tp(v); } -/** @overload */ -template static inline _Tp saturate_cast(short v) { return _Tp(v); } -/** @overload */ -template static inline _Tp saturate_cast(unsigned v) { return _Tp(v); } -/** @overload */ -template static inline _Tp saturate_cast(int v) { return _Tp(v); } -/** @overload */ -template static inline _Tp saturate_cast(float v) { return _Tp(v); } -/** @overload */ -template static inline _Tp saturate_cast(double v) { return _Tp(v); } -/** @overload */ -template static inline _Tp saturate_cast(int64 v) { return _Tp(v); } -/** @overload */ -template static inline _Tp saturate_cast(uint64 v) { return _Tp(v); } - -template<> inline uchar saturate_cast(schar v) { return (uchar)std::max((int)v, 0); } -template<> inline uchar saturate_cast(ushort v) { return (uchar)std::min((unsigned)v, (unsigned)UCHAR_MAX); } -template<> inline uchar saturate_cast(int v) { return (uchar)((unsigned)v <= UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); } -template<> inline uchar saturate_cast(short v) { return saturate_cast((int)v); } -template<> inline uchar saturate_cast(unsigned v) { return (uchar)std::min(v, (unsigned)UCHAR_MAX); } -template<> inline uchar saturate_cast(float v) { int iv = cvRound(v); return saturate_cast(iv); } -template<> inline uchar saturate_cast(double v) { int iv = cvRound(v); return saturate_cast(iv); } -template<> inline uchar saturate_cast(int64 v) { return (uchar)((uint64)v <= (uint64)UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); } -template<> inline uchar saturate_cast(uint64 v) { return (uchar)std::min(v, (uint64)UCHAR_MAX); } - -template<> inline schar saturate_cast(uchar v) { return (schar)std::min((int)v, SCHAR_MAX); } -template<> inline schar saturate_cast(ushort v) { return (schar)std::min((unsigned)v, (unsigned)SCHAR_MAX); } -template<> inline schar saturate_cast(int v) { return (schar)((unsigned)(v-SCHAR_MIN) <= (unsigned)UCHAR_MAX ? v : v > 0 ? SCHAR_MAX : SCHAR_MIN); } -template<> inline schar saturate_cast(short v) { return saturate_cast((int)v); } -template<> inline schar saturate_cast(unsigned v) { return (schar)std::min(v, (unsigned)SCHAR_MAX); } -template<> inline schar saturate_cast(float v) { int iv = cvRound(v); return saturate_cast(iv); } -template<> inline schar saturate_cast(double v) { int iv = cvRound(v); return saturate_cast(iv); } -template<> inline schar saturate_cast(int64 v) { return (schar)((uint64)((int64)v-SCHAR_MIN) <= (uint64)UCHAR_MAX ? v : v > 0 ? SCHAR_MAX : SCHAR_MIN); } -template<> inline schar saturate_cast(uint64 v) { return (schar)std::min(v, (uint64)SCHAR_MAX); } - -template<> inline ushort saturate_cast(schar v) { return (ushort)std::max((int)v, 0); } -template<> inline ushort saturate_cast(short v) { return (ushort)std::max((int)v, 0); } -template<> inline ushort saturate_cast(int v) { return (ushort)((unsigned)v <= (unsigned)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0); } -template<> inline ushort saturate_cast(unsigned v) { return (ushort)std::min(v, (unsigned)USHRT_MAX); } -template<> inline ushort saturate_cast(float v) { int iv = cvRound(v); return saturate_cast(iv); } -template<> inline ushort saturate_cast(double v) { int iv = cvRound(v); return saturate_cast(iv); } -template<> inline ushort saturate_cast(int64 v) { return (ushort)((uint64)v <= (uint64)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0); } -template<> inline ushort saturate_cast(uint64 v) { return (ushort)std::min(v, (uint64)USHRT_MAX); } - -template<> inline short saturate_cast(ushort v) { return (short)std::min((int)v, SHRT_MAX); } -template<> inline short saturate_cast(int v) { return (short)((unsigned)(v - SHRT_MIN) <= (unsigned)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN); } -template<> inline short saturate_cast(unsigned v) { return (short)std::min(v, (unsigned)SHRT_MAX); } -template<> inline short saturate_cast(float v) { int iv = cvRound(v); return saturate_cast(iv); } -template<> inline short saturate_cast(double v) { int iv = cvRound(v); return saturate_cast(iv); } -template<> inline short saturate_cast(int64 v) { return (short)((uint64)((int64)v - SHRT_MIN) <= (uint64)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN); } -template<> inline short saturate_cast(uint64 v) { return (short)std::min(v, (uint64)SHRT_MAX); } - -template<> inline int saturate_cast(float v) { return cvRound(v); } -template<> inline int saturate_cast(double v) { return cvRound(v); } - -// we intentionally do not clip negative numbers, to make -1 become 0xffffffff etc. -template<> inline unsigned saturate_cast(float v) { return cvRound(v); } -template<> inline unsigned saturate_cast(double v) { return cvRound(v); } - -//! @} - -} - -#endif // __cplusplus - -#endif //__OPENCV_HAL_H__ diff --git a/modules/hal/src/arithm.cpp b/modules/hal/src/arithm.cpp deleted file mode 100644 index e30cd7d..0000000 --- a/modules/hal/src/arithm.cpp +++ /dev/null @@ -1,1131 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Copyright (C) 2013, OpenCV Foundation, all rights reserved. -// Copyright (C) 2015, Itseez Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "precomp.hpp" -#include "arithm_simd.hpp" -#include "arithm_core.hpp" -#include "replacement.hpp" - -namespace cv { namespace hal { - -//======================================= - -#undef CALL_HAL -#define CALL_HAL(fun) \ - int res = fun(src1, step1, src2, step2, dst, step, width, height); \ - if (res == Error::Ok) \ - return; \ - else if (res != Error::NotImplemented) \ - throw Failure(res); - -#if (ARITHM_USE_IPP == 1) -static inline void fixSteps(width, height, size_t elemSize, size_t& step1, size_t& step2, size_t& step) -{ - if( height == 1 ) - step1 = step2 = step = width*elemSize; -} -#define CALL_IPP_BIN_12(fun) \ - CV_IPP_CHECK() \ - { \ - fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \ - if (0 <= fun(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height), 0)) \ - { \ - CV_IMPL_ADD(CV_IMPL_IPP); \ - return; \ - } \ - setIppErrorStatus(); \ - } -#else -#define CALL_IPP_BIN_12(fun) -#endif - -//======================================= -// Add -//======================================= - -void add8u( const uchar* src1, size_t step1, - const uchar* src2, size_t step2, - uchar* dst, size_t step, int width, int height, void* ) -{ - CALL_HAL(hal_add8u) - CALL_IPP_BIN_12(ippiAdd_8u_C1RSfs) - (vBinOp, IF_SIMD(VAdd)>(src1, step1, src2, step2, dst, step, width, height)); -} - -void add8s( const schar* src1, size_t step1, - const schar* src2, size_t step2, - schar* dst, size_t step, int width, int height, void* ) -{ - CALL_HAL(hal_add8s) - vBinOp, IF_SIMD(VAdd)>(src1, step1, src2, step2, dst, step, width, height); -} - -void add16u( const ushort* src1, size_t step1, - const ushort* src2, size_t step2, - ushort* dst, size_t step, int width, int height, void* ) -{ - CALL_HAL(hal_add16u) - CALL_IPP_BIN_12(ippiAdd_16u_C1RSfs) - (vBinOp, IF_SIMD(VAdd)>(src1, step1, src2, step2, dst, step, width, height)); -} - -void add16s( const short* src1, size_t step1, - const short* src2, size_t step2, - short* dst, size_t step, int width, int height, void* ) -{ - CALL_HAL(hal_add16s) - CALL_IPP_BIN_12(ippiAdd_16s_C1RSfs) - (vBinOp, IF_SIMD(VAdd)>(src1, step1, src2, step2, dst, step, width, height)); -} - -void add32s( const int* src1, size_t step1, - const int* src2, size_t step2, - int* dst, size_t step, int width, int height, void* ) -{ - CALL_HAL(hal_add32s) - vBinOp32, IF_SIMD(VAdd)>(src1, step1, src2, step2, dst, step, width, height); -} - -void add32f( const float* src1, size_t step1, - const float* src2, size_t step2, - float* dst, size_t step, int width, int height, void* ) -{ - CALL_HAL(hal_add32f) - CALL_IPP_BIN_12(ippiAdd_32f_C1R) - (vBinOp32, IF_SIMD(VAdd)>(src1, step1, src2, step2, dst, step, width, height)); -} - -void add64f( const double* src1, size_t step1, - const double* src2, size_t step2, - double* dst, size_t step, int width, int height, void* ) -{ - CALL_HAL(hal_add64f) - vBinOp64, IF_SIMD(VAdd)>(src1, step1, src2, step2, dst, step, width, height); -} - -//======================================= - -#if (ARITHM_USE_IPP == 1) -#define CALL_IPP_BIN_21(fun) \ - CV_IPP_CHECK() \ - { \ - fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \ - if (0 <= fun(src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(width, height), 0)) \ - { \ - CV_IMPL_ADD(CV_IMPL_IPP); \ - return; \ - } \ - setIppErrorStatus(); \ - } -#else -#define CALL_IPP_BIN_21(fun) -#endif - -//======================================= -// Subtract -//======================================= - -void sub8u( const uchar* src1, size_t step1, - const uchar* src2, size_t step2, - uchar* dst, size_t step, int width, int height, void* ) -{ - CALL_HAL(hal_sub8u) - CALL_IPP_BIN_21(ippiSub_8u_C1RSfs) - (vBinOp, IF_SIMD(VSub)>(src1, step1, src2, step2, dst, step, width, height)); -} - -void sub8s( const schar* src1, size_t step1, - const schar* src2, size_t step2, - schar* dst, size_t step, int width, int height, void* ) -{ - CALL_HAL(hal_sub8s) - vBinOp, IF_SIMD(VSub)>(src1, step1, src2, step2, dst, step, width, height); -} - -void sub16u( const ushort* src1, size_t step1, - const ushort* src2, size_t step2, - ushort* dst, size_t step, int width, int height, void* ) -{ - CALL_HAL(hal_sub16u) - CALL_IPP_BIN_21(ippiSub_16u_C1RSfs) - (vBinOp, IF_SIMD(VSub)>(src1, step1, src2, step2, dst, step, width, height)); -} - -void sub16s( const short* src1, size_t step1, - const short* src2, size_t step2, - short* dst, size_t step, int width, int height, void* ) -{ - CALL_HAL(hal_sub16s) - CALL_IPP_BIN_21(ippiSub_16s_C1RSfs) - (vBinOp, IF_SIMD(VSub)>(src1, step1, src2, step2, dst, step, width, height)); -} - -void sub32s( const int* src1, size_t step1, - const int* src2, size_t step2, - int* dst, size_t step, int width, int height, void* ) -{ - CALL_HAL(hal_sub32s) - vBinOp32, IF_SIMD(VSub)>(src1, step1, src2, step2, dst, step, width, height); -} - -void sub32f( const float* src1, size_t step1, - const float* src2, size_t step2, - float* dst, size_t step, int width, int height, void* ) -{ - CALL_HAL(hal_sub32f) - CALL_IPP_BIN_21(ippiSub_32f_C1R) - (vBinOp32, IF_SIMD(VSub)>(src1, step1, src2, step2, dst, step, width, height)); -} - -void sub64f( const double* src1, size_t step1, - const double* src2, size_t step2, - double* dst, size_t step, int width, int height, void* ) -{ - CALL_HAL(hal_sub64f) - vBinOp64, IF_SIMD(VSub)>(src1, step1, src2, step2, dst, step, width, height); -} - -//======================================= - -#if (ARITHM_USE_IPP == 1) -#define CALL_IPP_MIN_MAX(fun, type) \ - CV_IPP_CHECK() \ - { \ - type* s1 = (type*)src1; \ - type* s2 = (type*)src2; \ - type* d = dst; \ - fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \ - int i = 0; \ - for(; i < height; i++) \ - { \ - if (0 > fun(s1, s2, d, width)) \ - break; \ - s1 = (type*)((uchar*)s1 + step1); \ - s2 = (type*)((uchar*)s2 + step2); \ - d = (type*)((uchar*)d + step); \ - } \ - if (i == height) \ - { \ - CV_IMPL_ADD(CV_IMPL_IPP); \ - return; \ - } \ - setIppErrorStatus(); \ - } -#else -#define CALL_IPP_MIN_MAX(fun, type) -#endif - -//======================================= -// Max -//======================================= - -void max8u( const uchar* src1, size_t step1, - const uchar* src2, size_t step2, - uchar* dst, size_t step, int width, int height, void* ) -{ - CALL_HAL(hal_max8u) - CALL_IPP_MIN_MAX(ippsMaxEvery_8u, uchar) - vBinOp, IF_SIMD(VMax)>(src1, step1, src2, step2, dst, step, width, height); -} - -void max8s( const schar* src1, size_t step1, - const schar* src2, size_t step2, - schar* dst, size_t step, int width, int height, void* ) -{ - CALL_HAL(hal_max8s) - vBinOp, IF_SIMD(VMax)>(src1, step1, src2, step2, dst, step, width, height); -} - -void max16u( const ushort* src1, size_t step1, - const ushort* src2, size_t step2, - ushort* dst, size_t step, int width, int height, void* ) -{ - CALL_HAL(hal_max16u) - CALL_IPP_MIN_MAX(ippsMaxEvery_16u, ushort) - vBinOp, IF_SIMD(VMax)>(src1, step1, src2, step2, dst, step, width, height); -} - -void max16s( const short* src1, size_t step1, - const short* src2, size_t step2, - short* dst, size_t step, int width, int height, void* ) -{ - CALL_HAL(hal_max16s) - vBinOp, IF_SIMD(VMax)>(src1, step1, src2, step2, dst, step, width, height); -} - -void max32s( const int* src1, size_t step1, - const int* src2, size_t step2, - int* dst, size_t step, int width, int height, void* ) -{ - CALL_HAL(hal_max32s) - vBinOp32, IF_SIMD(VMax)>(src1, step1, src2, step2, dst, step, width, height); -} - -void max32f( const float* src1, size_t step1, - const float* src2, size_t step2, - float* dst, size_t step, int width, int height, void* ) -{ - CALL_HAL(hal_max32f) - CALL_IPP_MIN_MAX(ippsMaxEvery_32f, float) - vBinOp32, IF_SIMD(VMax)>(src1, step1, src2, step2, dst, step, width, height); -} - -void max64f( const double* src1, size_t step1, - const double* src2, size_t step2, - double* dst, size_t step, int width, int height, void* ) -{ - CALL_HAL(hal_max64f) - CALL_IPP_MIN_MAX(ippsMaxEvery_64f, double) - vBinOp64, IF_SIMD(VMax)>(src1, step1, src2, step2, dst, step, width, height); -} - -//======================================= -// Min -//======================================= - -void min8u( const uchar* src1, size_t step1, - const uchar* src2, size_t step2, - uchar* dst, size_t step, int width, int height, void* ) -{ - CALL_HAL(hal_min8u) - CALL_IPP_MIN_MAX(ippsMinEvery_8u, uchar) - vBinOp, IF_SIMD(VMin)>(src1, step1, src2, step2, dst, step, width, height); -} - -void min8s( const schar* src1, size_t step1, - const schar* src2, size_t step2, - schar* dst, size_t step, int width, int height, void* ) -{ - CALL_HAL(hal_min8s) - vBinOp, IF_SIMD(VMin)>(src1, step1, src2, step2, dst, step, width, height); -} - -void min16u( const ushort* src1, size_t step1, - const ushort* src2, size_t step2, - ushort* dst, size_t step, int width, int height, void* ) -{ - CALL_HAL(hal_min16u) - CALL_IPP_MIN_MAX(ippsMinEvery_16u, ushort) - vBinOp, IF_SIMD(VMin)>(src1, step1, src2, step2, dst, step, width, height); -} - -void min16s( const short* src1, size_t step1, - const short* src2, size_t step2, - short* dst, size_t step, int width, int height, void* ) -{ - CALL_HAL(hal_min16s) - vBinOp, IF_SIMD(VMin)>(src1, step1, src2, step2, dst, step, width, height); -} - -void min32s( const int* src1, size_t step1, - const int* src2, size_t step2, - int* dst, size_t step, int width, int height, void* ) -{ - CALL_HAL(hal_min32s) - vBinOp32, IF_SIMD(VMin)>(src1, step1, src2, step2, dst, step, width, height); -} - -void min32f( const float* src1, size_t step1, - const float* src2, size_t step2, - float* dst, size_t step, int width, int height, void* ) -{ - CALL_HAL(hal_min32f) - CALL_IPP_MIN_MAX(ippsMinEvery_32f, float) - vBinOp32, IF_SIMD(VMin)>(src1, step1, src2, step2, dst, step, width, height); -} - -void min64f( const double* src1, size_t step1, - const double* src2, size_t step2, - double* dst, size_t step, int width, int height, void* ) -{ - CALL_HAL(hal_min64f) - CALL_IPP_MIN_MAX(ippsMinEvery_64f, double) - vBinOp64, IF_SIMD(VMin)>(src1, step1, src2, step2, dst, step, width, height); -} - -//======================================= -// AbsDiff -//======================================= - -void absdiff8u( const uchar* src1, size_t step1, - const uchar* src2, size_t step2, - uchar* dst, size_t step, int width, int height, void* ) -{ - CALL_HAL(hal_absdiff8u) - CALL_IPP_BIN_12(ippiAbsDiff_8u_C1R) - (vBinOp, IF_SIMD(VAbsDiff)>(src1, step1, src2, step2, dst, step, width, height)); -} - -void absdiff8s( const schar* src1, size_t step1, - const schar* src2, size_t step2, - schar* dst, size_t step, int width, int height, void* ) -{ - CALL_HAL(hal_absdiff8s) - vBinOp, IF_SIMD(VAbsDiff)>(src1, step1, src2, step2, dst, step, width, height); -} - -void absdiff16u( const ushort* src1, size_t step1, - const ushort* src2, size_t step2, - ushort* dst, size_t step, int width, int height, void* ) -{ - CALL_HAL(hal_absdiff16u) - CALL_IPP_BIN_12(ippiAbsDiff_16u_C1R) - (vBinOp, IF_SIMD(VAbsDiff)>(src1, step1, src2, step2, dst, step, width, height)); -} - -void absdiff16s( const short* src1, size_t step1, - const short* src2, size_t step2, - short* dst, size_t step, int width, int height, void* ) -{ - CALL_HAL(hal_absdiff16s) - vBinOp, IF_SIMD(VAbsDiff)>(src1, step1, src2, step2, dst, step, width, height); -} - -void absdiff32s( const int* src1, size_t step1, - const int* src2, size_t step2, - int* dst, size_t step, int width, int height, void* ) -{ - CALL_HAL(hal_absdiff32s) - vBinOp32, IF_SIMD(VAbsDiff)>(src1, step1, src2, step2, dst, step, width, height); -} - -void absdiff32f( const float* src1, size_t step1, - const float* src2, size_t step2, - float* dst, size_t step, int width, int height, void* ) -{ - CALL_HAL(hal_absdiff32f) - CALL_IPP_BIN_12(ippiAbsDiff_32f_C1R) - (vBinOp32, IF_SIMD(VAbsDiff)>(src1, step1, src2, step2, dst, step, width, height)); -} - -void absdiff64f( const double* src1, size_t step1, - const double* src2, size_t step2, - double* dst, size_t step, int width, int height, void* ) -{ - CALL_HAL(hal_absdiff64f) - vBinOp64, IF_SIMD(VAbsDiff)>(src1, step1, src2, step2, dst, step, width, height); -} - -//======================================= -// Logical -//======================================= - -void and8u( const uchar* src1, size_t step1, - const uchar* src2, size_t step2, - uchar* dst, size_t step, int width, int height, void* ) -{ - CALL_HAL(hal_and8u) - CALL_IPP_BIN_12(ippiAnd_8u_C1R) - (vBinOp, IF_SIMD(VAnd)>(src1, step1, src2, step2, dst, step, width, height)); -} - -void or8u( const uchar* src1, size_t step1, - const uchar* src2, size_t step2, - uchar* dst, size_t step, int width, int height, void* ) -{ - CALL_HAL(hal_or8u) - CALL_IPP_BIN_12(ippiOr_8u_C1R) - (vBinOp, IF_SIMD(VOr)>(src1, step1, src2, step2, dst, step, width, height)); -} - -void xor8u( const uchar* src1, size_t step1, - const uchar* src2, size_t step2, - uchar* dst, size_t step, int width, int height, void* ) -{ - CALL_HAL(hal_xor8u) - CALL_IPP_BIN_12(ippiXor_8u_C1R) - (vBinOp, IF_SIMD(VXor)>(src1, step1, src2, step2, dst, step, width, height)); -} - -void not8u( const uchar* src1, size_t step1, - const uchar* src2, size_t step2, - uchar* dst, size_t step, int width, int height, void* ) -{ - CALL_HAL(hal_not8u) - CALL_IPP_BIN_12(ippiNot_8u_C1R) - (vBinOp, IF_SIMD(VNot)>(src1, step1, src2, step2, dst, step, width, height)); -} - -//======================================= - -#undef CALL_HAL -#define CALL_HAL(fun) \ - int res = fun(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop); \ - if (res == Error::Ok) \ - return; \ - else if (res != Error::NotImplemented) \ - throw Failure(res); - -#if ARITHM_USE_IPP -inline static IppCmpOp convert_cmp(int _cmpop) -{ - return _cmpop == CMP_EQ ? ippCmpEq : - _cmpop == CMP_GT ? ippCmpGreater : - _cmpop == CMP_GE ? ippCmpGreaterEq : - _cmpop == CMP_LT ? ippCmpLess : - _cmpop == CMP_LE ? ippCmpLessEq : - (IppCmpOp)-1; -} -#define CALL_IPP_CMP(fun) \ - CV_IPP_CHECK() \ - { \ - IppCmpOp op = convert_cmp(*(int *)_cmpop); \ - if( op >= 0 ) \ - { \ - fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \ - if (0 <= fun(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height), op)) \ - { \ - CV_IMPL_ADD(CV_IMPL_IPP); \ - return; \ - } \ - setIppErrorStatus(); \ - } \ - } -#else -#define CALL_IPP_CMP(fun) -#endif - -//======================================= -// Compare -//======================================= - -void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, - uchar* dst, size_t step, int width, int height, void* _cmpop) -{ - CALL_HAL(hal_cmp8u) - CALL_IPP_CMP(ippiCompare_8u_C1R) - //vz optimized cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop); - int code = *(int*)_cmpop; - step1 /= sizeof(src1[0]); - step2 /= sizeof(src2[0]); - if( code == CMP_GE || code == CMP_LT ) - { - std::swap(src1, src2); - std::swap(step1, step2); - code = code == CMP_GE ? CMP_LE : CMP_GT; - } - - if( code == CMP_GT || code == CMP_LE ) - { - int m = code == CMP_GT ? 0 : 255; - for( ; height--; src1 += step1, src2 += step2, dst += step ) - { - int x =0; - #if CV_SSE2 - if( USE_SSE2 ) - { - __m128i m128 = code == CMP_GT ? _mm_setzero_si128() : _mm_set1_epi8 (-1); - __m128i c128 = _mm_set1_epi8 (-128); - for( ; x <= width - 16; x += 16 ) - { - __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x)); - __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x)); - // no simd for 8u comparison, that's why we need the trick - r00 = _mm_sub_epi8(r00,c128); - r10 = _mm_sub_epi8(r10,c128); - - r00 =_mm_xor_si128(_mm_cmpgt_epi8(r00, r10), m128); - _mm_storeu_si128((__m128i*)(dst + x),r00); - - } - } - #elif CV_NEON - uint8x16_t mask = code == CMP_GT ? vdupq_n_u8(0) : vdupq_n_u8(255); - - for( ; x <= width - 16; x += 16 ) - { - vst1q_u8(dst+x, veorq_u8(vcgtq_u8(vld1q_u8(src1+x), vld1q_u8(src2+x)), mask)); - } - - #endif - - for( ; x < width; x++ ){ - dst[x] = (uchar)(-(src1[x] > src2[x]) ^ m); - } - } - } - else if( code == CMP_EQ || code == CMP_NE ) - { - int m = code == CMP_EQ ? 0 : 255; - for( ; height--; src1 += step1, src2 += step2, dst += step ) - { - int x = 0; - #if CV_SSE2 - if( USE_SSE2 ) - { - __m128i m128 = code == CMP_EQ ? _mm_setzero_si128() : _mm_set1_epi8 (-1); - for( ; x <= width - 16; x += 16 ) - { - __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x)); - __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x)); - r00 = _mm_xor_si128 ( _mm_cmpeq_epi8 (r00, r10), m128); - _mm_storeu_si128((__m128i*)(dst + x), r00); - } - } - #elif CV_NEON - uint8x16_t mask = code == CMP_EQ ? vdupq_n_u8(0) : vdupq_n_u8(255); - - for( ; x <= width - 16; x += 16 ) - { - vst1q_u8(dst+x, veorq_u8(vceqq_u8(vld1q_u8(src1+x), vld1q_u8(src2+x)), mask)); - } - #endif - for( ; x < width; x++ ) - dst[x] = (uchar)(-(src1[x] == src2[x]) ^ m); - } - } -} - -void cmp8s(const schar* src1, size_t step1, const schar* src2, size_t step2, - uchar* dst, size_t step, int width, int height, void* _cmpop) -{ - CALL_HAL(hal_cmp8s) - cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop); -} - -void cmp16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2, - uchar* dst, size_t step, int width, int height, void* _cmpop) -{ - CALL_HAL(hal_cmp16u) - CALL_IPP_CMP(ippiCompare_16u_C1R) - cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop); -} - -void cmp16s(const short* src1, size_t step1, const short* src2, size_t step2, - uchar* dst, size_t step, int width, int height, void* _cmpop) -{ - CALL_HAL(hal_cmp16s) - CALL_IPP_CMP(ippiCompare_16s_C1R) - //vz optimized cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop); - - int code = *(int*)_cmpop; - step1 /= sizeof(src1[0]); - step2 /= sizeof(src2[0]); - if( code == CMP_GE || code == CMP_LT ) - { - std::swap(src1, src2); - std::swap(step1, step2); - code = code == CMP_GE ? CMP_LE : CMP_GT; - } - - if( code == CMP_GT || code == CMP_LE ) - { - int m = code == CMP_GT ? 0 : 255; - for( ; height--; src1 += step1, src2 += step2, dst += step ) - { - int x =0; - #if CV_SSE2 - if( USE_SSE2) - { - __m128i m128 = code == CMP_GT ? _mm_setzero_si128() : _mm_set1_epi16 (-1); - for( ; x <= width - 16; x += 16 ) - { - __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x)); - __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x)); - r00 = _mm_xor_si128 ( _mm_cmpgt_epi16 (r00, r10), m128); - __m128i r01 = _mm_loadu_si128((const __m128i*)(src1 + x + 8)); - __m128i r11 = _mm_loadu_si128((const __m128i*)(src2 + x + 8)); - r01 = _mm_xor_si128 ( _mm_cmpgt_epi16 (r01, r11), m128); - r11 = _mm_packs_epi16(r00, r01); - _mm_storeu_si128((__m128i*)(dst + x), r11); - } - if( x <= width-8) - { - __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x)); - __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x)); - r00 = _mm_xor_si128 ( _mm_cmpgt_epi16 (r00, r10), m128); - r10 = _mm_packs_epi16(r00, r00); - _mm_storel_epi64((__m128i*)(dst + x), r10); - - x += 8; - } - } - #elif CV_NEON - uint8x16_t mask = code == CMP_GT ? vdupq_n_u8(0) : vdupq_n_u8(255); - - for( ; x <= width - 16; x += 16 ) - { - int16x8_t in1 = vld1q_s16(src1 + x); - int16x8_t in2 = vld1q_s16(src2 + x); - uint8x8_t t1 = vmovn_u16(vcgtq_s16(in1, in2)); - - in1 = vld1q_s16(src1 + x + 8); - in2 = vld1q_s16(src2 + x + 8); - uint8x8_t t2 = vmovn_u16(vcgtq_s16(in1, in2)); - - vst1q_u8(dst+x, veorq_u8(vcombine_u8(t1, t2), mask)); - } - #endif - - for( ; x < width; x++ ){ - dst[x] = (uchar)(-(src1[x] > src2[x]) ^ m); - } - } - } - else if( code == CMP_EQ || code == CMP_NE ) - { - int m = code == CMP_EQ ? 0 : 255; - for( ; height--; src1 += step1, src2 += step2, dst += step ) - { - int x = 0; - #if CV_SSE2 - if( USE_SSE2 ) - { - __m128i m128 = code == CMP_EQ ? _mm_setzero_si128() : _mm_set1_epi16 (-1); - for( ; x <= width - 16; x += 16 ) - { - __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x)); - __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x)); - r00 = _mm_xor_si128 ( _mm_cmpeq_epi16 (r00, r10), m128); - __m128i r01 = _mm_loadu_si128((const __m128i*)(src1 + x + 8)); - __m128i r11 = _mm_loadu_si128((const __m128i*)(src2 + x + 8)); - r01 = _mm_xor_si128 ( _mm_cmpeq_epi16 (r01, r11), m128); - r11 = _mm_packs_epi16(r00, r01); - _mm_storeu_si128((__m128i*)(dst + x), r11); - } - if( x <= width - 8) - { - __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x)); - __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x)); - r00 = _mm_xor_si128 ( _mm_cmpeq_epi16 (r00, r10), m128); - r10 = _mm_packs_epi16(r00, r00); - _mm_storel_epi64((__m128i*)(dst + x), r10); - - x += 8; - } - } - #elif CV_NEON - uint8x16_t mask = code == CMP_EQ ? vdupq_n_u8(0) : vdupq_n_u8(255); - - for( ; x <= width - 16; x += 16 ) - { - int16x8_t in1 = vld1q_s16(src1 + x); - int16x8_t in2 = vld1q_s16(src2 + x); - uint8x8_t t1 = vmovn_u16(vceqq_s16(in1, in2)); - - in1 = vld1q_s16(src1 + x + 8); - in2 = vld1q_s16(src2 + x + 8); - uint8x8_t t2 = vmovn_u16(vceqq_s16(in1, in2)); - - vst1q_u8(dst+x, veorq_u8(vcombine_u8(t1, t2), mask)); - } - #endif - for( ; x < width; x++ ) - dst[x] = (uchar)(-(src1[x] == src2[x]) ^ m); - } - } -} - -void cmp32s(const int* src1, size_t step1, const int* src2, size_t step2, - uchar* dst, size_t step, int width, int height, void* _cmpop) -{ - CALL_HAL(hal_cmp32s) - cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop); -} - -void cmp32f(const float* src1, size_t step1, const float* src2, size_t step2, - uchar* dst, size_t step, int width, int height, void* _cmpop) -{ - CALL_HAL(hal_cmp32f) - CALL_IPP_CMP(ippiCompare_32f_C1R) - cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop); -} - -void cmp64f(const double* src1, size_t step1, const double* src2, size_t step2, - uchar* dst, size_t step, int width, int height, void* _cmpop) -{ - CALL_HAL(hal_cmp64f) - cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop); -} - -//======================================= - -#undef CALL_HAL -#define CALL_HAL(fun) \ - int res = fun(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); \ - if (res == Error::Ok) \ - return; \ - else if (res != Error::NotImplemented) \ - throw Failure(res); - -#if defined HAVE_IPP -#define CALL_IPP_MUL(fun) \ - CV_IPP_CHECK() \ - { \ - if (std::fabs(fscale - 1) <= FLT_EPSILON) \ - { \ - if (fun(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height), 0) >= 0) \ - { \ - CV_IMPL_ADD(CV_IMPL_IPP); \ - return; \ - } \ - setIppErrorStatus(); \ - } \ - } -#else -#define CALL_IPP_MUL(fun) -#endif - -//======================================= -// Multilpy -//======================================= - -void mul8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, - uchar* dst, size_t step, int width, int height, void* scale) -{ - CALL_HAL(hal_mul8u) - float fscale = (float)*(const double*)scale; - CALL_IPP_MUL(ippiMul_8u_C1RSfs) - mul_(src1, step1, src2, step2, dst, step, width, height, fscale); -} - -void mul8s( const schar* src1, size_t step1, const schar* src2, size_t step2, - schar* dst, size_t step, int width, int height, void* scale) -{ - CALL_HAL(hal_mul8s) - mul_(src1, step1, src2, step2, dst, step, width, height, (float)*(const double*)scale); -} - -void mul16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, - ushort* dst, size_t step, int width, int height, void* scale) -{ - CALL_HAL(hal_mul16u) - float fscale = (float)*(const double*)scale; - CALL_IPP_MUL(ippiMul_16u_C1RSfs) - mul_(src1, step1, src2, step2, dst, step, width, height, fscale); -} - -void mul16s( const short* src1, size_t step1, const short* src2, size_t step2, - short* dst, size_t step, int width, int height, void* scale) -{ - CALL_HAL(hal_mul16s) - float fscale = (float)*(const double*)scale; - CALL_IPP_MUL(ippiMul_16s_C1RSfs) - mul_(src1, step1, src2, step2, dst, step, width, height, fscale); -} - -void mul32s( const int* src1, size_t step1, const int* src2, size_t step2, - int* dst, size_t step, int width, int height, void* scale) -{ - CALL_HAL(hal_mul32s) - mul_(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); -} - -void mul32f( const float* src1, size_t step1, const float* src2, size_t step2, - float* dst, size_t step, int width, int height, void* scale) -{ - CALL_HAL(hal_mul32f) - float fscale = (float)*(const double*)scale; - CALL_IPP_MUL(ippiMul_32f_C1R) - mul_(src1, step1, src2, step2, dst, step, width, height, fscale); -} - -void mul64f( const double* src1, size_t step1, const double* src2, size_t step2, - double* dst, size_t step, int width, int height, void* scale) -{ - CALL_HAL(hal_mul64f) - mul_(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); -} - -//======================================= -// Divide -//======================================= - -void div8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, - uchar* dst, size_t step, int width, int height, void* scale) -{ - CALL_HAL(hal_div8u) - if( src1 ) - div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); - else - recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); -} - -void div8s( const schar* src1, size_t step1, const schar* src2, size_t step2, - schar* dst, size_t step, int width, int height, void* scale) -{ - CALL_HAL(hal_div8s) - div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); -} - -void div16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, - ushort* dst, size_t step, int width, int height, void* scale) -{ - CALL_HAL(hal_div16u) - div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); -} - -void div16s( const short* src1, size_t step1, const short* src2, size_t step2, - short* dst, size_t step, int width, int height, void* scale) -{ - CALL_HAL(hal_div16s) - div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); -} - -void div32s( const int* src1, size_t step1, const int* src2, size_t step2, - int* dst, size_t step, int width, int height, void* scale) -{ - CALL_HAL(hal_div32s) - div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); -} - -void div32f( const float* src1, size_t step1, const float* src2, size_t step2, - float* dst, size_t step, int width, int height, void* scale) -{ - CALL_HAL(hal_div32f) - div_f(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); -} - -void div64f( const double* src1, size_t step1, const double* src2, size_t step2, - double* dst, size_t step, int width, int height, void* scale) -{ - CALL_HAL(hal_div64f) - div_f(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); -} - -//======================================= -// Reciprocial -//======================================= - -void recip8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, - uchar* dst, size_t step, int width, int height, void* scale) -{ - CALL_HAL(hal_recip8u) - recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); -} - -void recip8s( const schar* src1, size_t step1, const schar* src2, size_t step2, - schar* dst, size_t step, int width, int height, void* scale) -{ - CALL_HAL(hal_recip8s) - recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); -} - -void recip16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, - ushort* dst, size_t step, int width, int height, void* scale) -{ - CALL_HAL(hal_recip16u) - recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); -} - -void recip16s( const short* src1, size_t step1, const short* src2, size_t step2, - short* dst, size_t step, int width, int height, void* scale) -{ - CALL_HAL(hal_recip16s) - recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); -} - -void recip32s( const int* src1, size_t step1, const int* src2, size_t step2, - int* dst, size_t step, int width, int height, void* scale) -{ - CALL_HAL(hal_recip32s) - recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); -} - -void recip32f( const float* src1, size_t step1, const float* src2, size_t step2, - float* dst, size_t step, int width, int height, void* scale) -{ - CALL_HAL(hal_recip32f) - recip_f(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); -} - -void recip64f( const double* src1, size_t step1, const double* src2, size_t step2, - double* dst, size_t step, int width, int height, void* scale) -{ - CALL_HAL(hal_recip64f) - recip_f(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); -} - -//======================================= - -#undef CALL_HAL -#define CALL_HAL(fun) \ - int res = fun(src1, step1, src2, step2, dst, step, width, height, scalars); \ - if (res == Error::Ok) \ - return; \ - else if (res != Error::NotImplemented) \ - throw Failure(res); - -//======================================= -// Add weighted -//======================================= - -void -addWeighted8u( const uchar* src1, size_t step1, - const uchar* src2, size_t step2, - uchar* dst, size_t step, int width, int height, - void* scalars ) -{ - CALL_HAL(hal_addWeighted8u) - const double* scalars_ = (const double*)scalars; - float alpha = (float)scalars_[0], beta = (float)scalars_[1], gamma = (float)scalars_[2]; - - for( ; height--; src1 += step1, src2 += step2, dst += step ) - { - int x = 0; - -#if CV_SSE2 - if( USE_SSE2 ) - { - __m128 a4 = _mm_set1_ps(alpha), b4 = _mm_set1_ps(beta), g4 = _mm_set1_ps(gamma); - __m128i z = _mm_setzero_si128(); - - for( ; x <= width - 8; x += 8 ) - { - __m128i u = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(src1 + x)), z); - __m128i v = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(src2 + x)), z); - - __m128 u0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(u, z)); - __m128 u1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(u, z)); - __m128 v0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v, z)); - __m128 v1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v, z)); - - u0 = _mm_add_ps(_mm_mul_ps(u0, a4), _mm_mul_ps(v0, b4)); - u1 = _mm_add_ps(_mm_mul_ps(u1, a4), _mm_mul_ps(v1, b4)); - u0 = _mm_add_ps(u0, g4); u1 = _mm_add_ps(u1, g4); - - u = _mm_packs_epi32(_mm_cvtps_epi32(u0), _mm_cvtps_epi32(u1)); - u = _mm_packus_epi16(u, u); - - _mm_storel_epi64((__m128i*)(dst + x), u); - } - } -#elif CV_NEON - float32x4_t g = vdupq_n_f32 (gamma); - - for( ; x <= width - 8; x += 8 ) - { - uint8x8_t in1 = vld1_u8(src1+x); - uint16x8_t in1_16 = vmovl_u8(in1); - float32x4_t in1_f_l = vcvtq_f32_u32(vmovl_u16(vget_low_u16(in1_16))); - float32x4_t in1_f_h = vcvtq_f32_u32(vmovl_u16(vget_high_u16(in1_16))); - - uint8x8_t in2 = vld1_u8(src2+x); - uint16x8_t in2_16 = vmovl_u8(in2); - float32x4_t in2_f_l = vcvtq_f32_u32(vmovl_u16(vget_low_u16(in2_16))); - float32x4_t in2_f_h = vcvtq_f32_u32(vmovl_u16(vget_high_u16(in2_16))); - - float32x4_t out_f_l = vaddq_f32(vmulq_n_f32(in1_f_l, alpha), vmulq_n_f32(in2_f_l, beta)); - float32x4_t out_f_h = vaddq_f32(vmulq_n_f32(in1_f_h, alpha), vmulq_n_f32(in2_f_h, beta)); - out_f_l = vaddq_f32(out_f_l, g); - out_f_h = vaddq_f32(out_f_h, g); - - uint16x4_t out_16_l = vqmovun_s32(cv_vrndq_s32_f32(out_f_l)); - uint16x4_t out_16_h = vqmovun_s32(cv_vrndq_s32_f32(out_f_h)); - - uint16x8_t out_16 = vcombine_u16(out_16_l, out_16_h); - uint8x8_t out = vqmovn_u16(out_16); - - vst1_u8(dst+x, out); - } -#endif - #if CV_ENABLE_UNROLLED - for( ; x <= width - 4; x += 4 ) - { - float t0, t1; - t0 = CV_8TO32F(src1[x])*alpha + CV_8TO32F(src2[x])*beta + gamma; - t1 = CV_8TO32F(src1[x+1])*alpha + CV_8TO32F(src2[x+1])*beta + gamma; - - dst[x] = saturate_cast(t0); - dst[x+1] = saturate_cast(t1); - - t0 = CV_8TO32F(src1[x+2])*alpha + CV_8TO32F(src2[x+2])*beta + gamma; - t1 = CV_8TO32F(src1[x+3])*alpha + CV_8TO32F(src2[x+3])*beta + gamma; - - dst[x+2] = saturate_cast(t0); - dst[x+3] = saturate_cast(t1); - } - #endif - - for( ; x < width; x++ ) - { - float t0 = CV_8TO32F(src1[x])*alpha + CV_8TO32F(src2[x])*beta + gamma; - dst[x] = saturate_cast(t0); - } - } -} - -void addWeighted8s( const schar* src1, size_t step1, const schar* src2, size_t step2, - schar* dst, size_t step, int width, int height, void* scalars ) -{ - CALL_HAL(hal_addWeighted8s) - addWeighted_(src1, step1, src2, step2, dst, step, width, height, scalars); -} - -void addWeighted16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, - ushort* dst, size_t step, int width, int height, void* scalars ) -{ - CALL_HAL(hal_addWeighted16u) - addWeighted_(src1, step1, src2, step2, dst, step, width, height, scalars); -} - -void addWeighted16s( const short* src1, size_t step1, const short* src2, size_t step2, - short* dst, size_t step, int width, int height, void* scalars ) -{ - CALL_HAL(hal_addWeighted16s) - addWeighted_(src1, step1, src2, step2, dst, step, width, height, scalars); -} - -void addWeighted32s( const int* src1, size_t step1, const int* src2, size_t step2, - int* dst, size_t step, int width, int height, void* scalars ) -{ - CALL_HAL(hal_addWeighted32s) - addWeighted_(src1, step1, src2, step2, dst, step, width, height, scalars); -} - -void addWeighted32f( const float* src1, size_t step1, const float* src2, size_t step2, - float* dst, size_t step, int width, int height, void* scalars ) -{ - CALL_HAL(hal_addWeighted32f) - addWeighted_(src1, step1, src2, step2, dst, step, width, height, scalars); -} - -void addWeighted64f( const double* src1, size_t step1, const double* src2, size_t step2, - double* dst, size_t step, int width, int height, void* scalars ) -{ - CALL_HAL(hal_addWeighted64f) - addWeighted_(src1, step1, src2, step2, dst, step, width, height, scalars); -} - -}} // cv::hal:: diff --git a/modules/hal/src/color.cpp b/modules/hal/src/color.cpp deleted file mode 100644 index a3f69fa..0000000 --- a/modules/hal/src/color.cpp +++ /dev/null @@ -1,47 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "precomp.hpp" - -namespace cv { namespace hal { - -}} diff --git a/modules/hal/src/filter.cpp b/modules/hal/src/filter.cpp deleted file mode 100644 index a3f69fa..0000000 --- a/modules/hal/src/filter.cpp +++ /dev/null @@ -1,47 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "precomp.hpp" - -namespace cv { namespace hal { - -}} diff --git a/modules/hal/src/hardware.cpp b/modules/hal/src/hardware.cpp deleted file mode 100644 index 6a08b9f..0000000 --- a/modules/hal/src/hardware.cpp +++ /dev/null @@ -1,221 +0,0 @@ -#include "precomp.hpp" - -#if defined WIN32 || defined _WIN32 || defined WINCE -#include -#if defined _MSC_VER - #if _MSC_VER >= 1400 - #include - #elif defined _M_IX86 - static void __cpuid(int* cpuid_data, int) - { - __asm - { - push ebx - push edi - mov edi, cpuid_data - mov eax, 1 - cpuid - mov [edi], eax - mov [edi + 4], ebx - mov [edi + 8], ecx - mov [edi + 12], edx - pop edi - pop ebx - } - } - static void __cpuidex(int* cpuid_data, int, int) - { - __asm - { - push edi - mov edi, cpuid_data - mov eax, 7 - mov ecx, 0 - cpuid - mov [edi], eax - mov [edi + 4], ebx - mov [edi + 8], ecx - mov [edi + 12], edx - pop edi - } - } - #endif -#endif -#endif - -#if defined ANDROID || defined __linux__ -# include -# include -# include -# include -#endif - -#if defined __linux__ || defined __APPLE__ || defined __EMSCRIPTEN__ -#include -#include -#include -#if defined ANDROID -#include -#endif -#endif - -#ifdef ANDROID -# include -#endif - -struct HWFeatures -{ - enum { MAX_FEATURE = CV_HARDWARE_MAX_FEATURE }; - - HWFeatures(void) - { - memset( have, 0, sizeof(have) ); - x86_family = 0; - } - - static HWFeatures initialize(void) - { - HWFeatures f; - int cpuid_data[4] = { 0, 0, 0, 0 }; - - #if defined _MSC_VER && (defined _M_IX86 || defined _M_X64) - __cpuid(cpuid_data, 1); - #elif defined __GNUC__ && (defined __i386__ || defined __x86_64__) - #ifdef __x86_64__ - asm __volatile__ - ( - "movl $1, %%eax\n\t" - "cpuid\n\t" - :[eax]"=a"(cpuid_data[0]),[ebx]"=b"(cpuid_data[1]),[ecx]"=c"(cpuid_data[2]),[edx]"=d"(cpuid_data[3]) - : - : "cc" - ); - #else - asm volatile - ( - "pushl %%ebx\n\t" - "movl $1,%%eax\n\t" - "cpuid\n\t" - "popl %%ebx\n\t" - : "=a"(cpuid_data[0]), "=c"(cpuid_data[2]), "=d"(cpuid_data[3]) - : - : "cc" - ); - #endif - #endif - - f.x86_family = (cpuid_data[0] >> 8) & 15; - if( f.x86_family >= 6 ) - { - f.have[CV_CPU_MMX] = (cpuid_data[3] & (1 << 23)) != 0; - f.have[CV_CPU_SSE] = (cpuid_data[3] & (1<<25)) != 0; - f.have[CV_CPU_SSE2] = (cpuid_data[3] & (1<<26)) != 0; - f.have[CV_CPU_SSE3] = (cpuid_data[2] & (1<<0)) != 0; - f.have[CV_CPU_SSSE3] = (cpuid_data[2] & (1<<9)) != 0; - f.have[CV_CPU_FMA3] = (cpuid_data[2] & (1<<12)) != 0; - f.have[CV_CPU_SSE4_1] = (cpuid_data[2] & (1<<19)) != 0; - f.have[CV_CPU_SSE4_2] = (cpuid_data[2] & (1<<20)) != 0; - f.have[CV_CPU_POPCNT] = (cpuid_data[2] & (1<<23)) != 0; - f.have[CV_CPU_AVX] = (((cpuid_data[2] & (1<<28)) != 0)&&((cpuid_data[2] & (1<<27)) != 0));//OS uses XSAVE_XRSTORE and CPU support AVX - - // make the second call to the cpuid command in order to get - // information about extended features like AVX2 - #if defined _MSC_VER && (defined _M_IX86 || defined _M_X64) - __cpuidex(cpuid_data, 7, 0); - #elif defined __GNUC__ && (defined __i386__ || defined __x86_64__) - #ifdef __x86_64__ - asm __volatile__ - ( - "movl $7, %%eax\n\t" - "movl $0, %%ecx\n\t" - "cpuid\n\t" - :[eax]"=a"(cpuid_data[0]),[ebx]"=b"(cpuid_data[1]),[ecx]"=c"(cpuid_data[2]),[edx]"=d"(cpuid_data[3]) - : - : "cc" - ); - #else - asm volatile - ( - "pushl %%ebx\n\t" - "movl $7,%%eax\n\t" - "movl $0,%%ecx\n\t" - "cpuid\n\t" - "movl %%ebx, %0\n\t" - "popl %%ebx\n\t" - : "=r"(cpuid_data[1]), "=c"(cpuid_data[2]) - : - : "cc" - ); - #endif - #endif - f.have[CV_CPU_AVX2] = (cpuid_data[1] & (1<<5)) != 0; - - f.have[CV_CPU_AVX_512F] = (cpuid_data[1] & (1<<16)) != 0; - f.have[CV_CPU_AVX_512DQ] = (cpuid_data[1] & (1<<17)) != 0; - f.have[CV_CPU_AVX_512IFMA512] = (cpuid_data[1] & (1<<21)) != 0; - f.have[CV_CPU_AVX_512PF] = (cpuid_data[1] & (1<<26)) != 0; - f.have[CV_CPU_AVX_512ER] = (cpuid_data[1] & (1<<27)) != 0; - f.have[CV_CPU_AVX_512CD] = (cpuid_data[1] & (1<<28)) != 0; - f.have[CV_CPU_AVX_512BW] = (cpuid_data[1] & (1<<30)) != 0; - f.have[CV_CPU_AVX_512VL] = (cpuid_data[1] & (1<<31)) != 0; - f.have[CV_CPU_AVX_512VBMI] = (cpuid_data[2] & (1<<1)) != 0; - } - - #if defined ANDROID || defined __linux__ - #ifdef __aarch64__ - f.have[CV_CPU_NEON] = true; - #else - int cpufile = open("/proc/self/auxv", O_RDONLY); - - if (cpufile >= 0) - { - Elf32_auxv_t auxv; - const size_t size_auxv_t = sizeof(auxv); - - while ((size_t)read(cpufile, &auxv, size_auxv_t) == size_auxv_t) - { - if (auxv.a_type == AT_HWCAP) - { - f.have[CV_CPU_NEON] = (auxv.a_un.a_val & 4096) != 0; - break; - } - } - - close(cpufile); - } - #endif - #elif (defined __clang__ || defined __APPLE__) && (defined __ARM_NEON__ || (defined __ARM_NEON && defined __aarch64__)) - f.have[CV_CPU_NEON] = true; - #endif - - return f; - } - - int x86_family; - bool have[MAX_FEATURE+1]; -}; - -static HWFeatures featuresEnabled = HWFeatures::initialize(), featuresDisabled = HWFeatures(); -static HWFeatures* currentFeatures = &featuresEnabled; -volatile bool useOptimizedFlag = true; - -namespace cv { namespace hal { - -bool checkHardwareSupport(int feature) -{ -// CV_DbgAssert( 0 <= feature && feature <= CV_HARDWARE_MAX_FEATURE ); - return currentFeatures->have[feature]; -} - -void setUseOptimized( bool flag ) -{ - useOptimizedFlag = flag; - currentFeatures = flag ? &featuresEnabled : &featuresDisabled; -} - -bool useOptimized(void) -{ - return useOptimizedFlag; -} - -}} diff --git a/modules/hal/src/precomp.hpp b/modules/hal/src/precomp.hpp deleted file mode 100644 index 1658636..0000000 --- a/modules/hal/src/precomp.hpp +++ /dev/null @@ -1,60 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "opencv2/hal.hpp" -#include "opencv2/hal/intrin.hpp" -#include -#include -#include -#include -#include -#include -#include - -#include "opencv2/hal/sse_utils.hpp" -#include "opencv2/hal/neon_utils.hpp" - -#if defined HAVE_IPP && (IPP_VERSION_X100 >= 700) -#define ARITHM_USE_IPP 1 -#else -#define ARITHM_USE_IPP 0 -#endif diff --git a/modules/hal/src/replacement.hpp b/modules/hal/src/replacement.hpp deleted file mode 100644 index c8cc192..0000000 --- a/modules/hal/src/replacement.hpp +++ /dev/null @@ -1,208 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Copyright (C) 2013, OpenCV Foundation, all rights reserved. -// Copyright (C) 2015, Itseez Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_HAL_REPLACEMENT_HPP__ -#define __OPENCV_HAL_REPLACEMENT_HPP__ - -#include "opencv2/hal.hpp" - -inline int hal_t_add8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int) { return cv::hal::Error::NotImplemented; } -inline int hal_t_add8s(const schar*, size_t, const schar*, size_t, schar*, size_t, int, int) { return cv::hal::Error::NotImplemented; } -inline int hal_t_add16u(const ushort*, size_t, const ushort*, size_t, ushort*, size_t, int, int) { return cv::hal::Error::NotImplemented; } -inline int hal_t_add16s(const short*, size_t, const short*, size_t, short*, size_t, int, int) { return cv::hal::Error::NotImplemented; } -inline int hal_t_add32s(const int*, size_t, const int*, size_t, int*, size_t, int, int) { return cv::hal::Error::NotImplemented; } -inline int hal_t_add32f(const float*, size_t, const float*, size_t, float*, size_t, int, int) { return cv::hal::Error::NotImplemented; } -inline int hal_t_add64f(const double*, size_t, const double*, size_t, double*, size_t, int, int) { return cv::hal::Error::NotImplemented; } -inline int hal_t_sub8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int) { return cv::hal::Error::NotImplemented; } -inline int hal_t_sub8s(const schar*, size_t, const schar*, size_t, schar*, size_t, int, int) { return cv::hal::Error::NotImplemented; } -inline int hal_t_sub16u(const ushort*, size_t, const ushort*, size_t, ushort*, size_t, int, int) { return cv::hal::Error::NotImplemented; } -inline int hal_t_sub16s(const short*, size_t, const short*, size_t, short*, size_t, int, int) { return cv::hal::Error::NotImplemented; } -inline int hal_t_sub32s(const int*, size_t, const int*, size_t, int*, size_t, int, int) { return cv::hal::Error::NotImplemented; } -inline int hal_t_sub32f(const float*, size_t, const float*, size_t, float*, size_t, int, int) { return cv::hal::Error::NotImplemented; } -inline int hal_t_sub64f(const double*, size_t, const double*, size_t, double*, size_t, int, int) { return cv::hal::Error::NotImplemented; } -inline int hal_t_max8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int) { return cv::hal::Error::NotImplemented; } -inline int hal_t_max8s(const schar*, size_t, const schar*, size_t, schar*, size_t, int, int) { return cv::hal::Error::NotImplemented; } -inline int hal_t_max16u(const ushort*, size_t, const ushort*, size_t, ushort*, size_t, int, int) { return cv::hal::Error::NotImplemented; } -inline int hal_t_max16s(const short*, size_t, const short*, size_t, short*, size_t, int, int) { return cv::hal::Error::NotImplemented; } -inline int hal_t_max32s(const int*, size_t, const int*, size_t, int*, size_t, int, int) { return cv::hal::Error::NotImplemented; } -inline int hal_t_max32f(const float*, size_t, const float*, size_t, float*, size_t, int, int) { return cv::hal::Error::NotImplemented; } -inline int hal_t_max64f(const double*, size_t, const double*, size_t, double*, size_t, int, int) { return cv::hal::Error::NotImplemented; } -inline int hal_t_min8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int) { return cv::hal::Error::NotImplemented; } -inline int hal_t_min8s(const schar*, size_t, const schar*, size_t, schar*, size_t, int, int) { return cv::hal::Error::NotImplemented; } -inline int hal_t_min16u(const ushort*, size_t, const ushort*, size_t, ushort*, size_t, int, int) { return cv::hal::Error::NotImplemented; } -inline int hal_t_min16s(const short*, size_t, const short*, size_t, short*, size_t, int, int) { return cv::hal::Error::NotImplemented; } -inline int hal_t_min32s(const int*, size_t, const int*, size_t, int*, size_t, int, int) { return cv::hal::Error::NotImplemented; } -inline int hal_t_min32f(const float*, size_t, const float*, size_t, float*, size_t, int, int) { return cv::hal::Error::NotImplemented; } -inline int hal_t_min64f(const double*, size_t, const double*, size_t, double*, size_t, int, int) { return cv::hal::Error::NotImplemented; } -inline int hal_t_absdiff8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int) { return cv::hal::Error::NotImplemented; } -inline int hal_t_absdiff8s(const schar*, size_t, const schar*, size_t, schar*, size_t, int, int) { return cv::hal::Error::NotImplemented; } -inline int hal_t_absdiff16u(const ushort*, size_t, const ushort*, size_t, ushort*, size_t, int, int) { return cv::hal::Error::NotImplemented; } -inline int hal_t_absdiff16s(const short*, size_t, const short*, size_t, short*, size_t, int, int) { return cv::hal::Error::NotImplemented; } -inline int hal_t_absdiff32s(const int*, size_t, const int*, size_t, int*, size_t, int, int) { return cv::hal::Error::NotImplemented; } -inline int hal_t_absdiff32f(const float*, size_t, const float*, size_t, float*, size_t, int, int) { return cv::hal::Error::NotImplemented; } -inline int hal_t_absdiff64f(const double*, size_t, const double*, size_t, double*, size_t, int, int) { return cv::hal::Error::NotImplemented; } -inline int hal_t_and8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int) { return cv::hal::Error::NotImplemented; } -inline int hal_t_or8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int) { return cv::hal::Error::NotImplemented; } -inline int hal_t_xor8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int) { return cv::hal::Error::NotImplemented; } -inline int hal_t_not8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int) { return cv::hal::Error::NotImplemented; } - -#define hal_add8u hal_t_add8u -#define hal_add8s hal_t_add8s -#define hal_add16u hal_t_add16u -#define hal_add16s hal_t_add16s -#define hal_add32s hal_t_add32s -#define hal_add32f hal_t_add32f -#define hal_add64f hal_t_add64f -#define hal_sub8u hal_t_sub8u -#define hal_sub8s hal_t_sub8s -#define hal_sub16u hal_t_sub16u -#define hal_sub16s hal_t_sub16s -#define hal_sub32s hal_t_sub32s -#define hal_sub32f hal_t_sub32f -#define hal_sub64f hal_t_sub64f -#define hal_max8u hal_t_max8u -#define hal_max8s hal_t_max8s -#define hal_max16u hal_t_max16u -#define hal_max16s hal_t_max16s -#define hal_max32s hal_t_max32s -#define hal_max32f hal_t_max32f -#define hal_max64f hal_t_max64f -#define hal_min8u hal_t_min8u -#define hal_min8s hal_t_min8s -#define hal_min16u hal_t_min16u -#define hal_min16s hal_t_min16s -#define hal_min32s hal_t_min32s -#define hal_min32f hal_t_min32f -#define hal_min64f hal_t_min64f -#define hal_absdiff8u hal_t_absdiff8u -#define hal_absdiff8s hal_t_absdiff8s -#define hal_absdiff16u hal_t_absdiff16u -#define hal_absdiff16s hal_t_absdiff16s -#define hal_absdiff32s hal_t_absdiff32s -#define hal_absdiff32f hal_t_absdiff32f -#define hal_absdiff64f hal_t_absdiff64f -#define hal_and8u hal_t_and8u -#define hal_or8u hal_t_or8u -#define hal_xor8u hal_t_xor8u -#define hal_not8u hal_t_not8u - -inline int hal_t_cmp8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int, int) { return cv::hal::Error::NotImplemented; } -inline int hal_t_cmp8s(const schar*, size_t, const schar*, size_t, uchar*, size_t, int, int, int) { return cv::hal::Error::NotImplemented; } -inline int hal_t_cmp16u(const ushort*, size_t, const ushort*, size_t, uchar*, size_t, int, int, int) { return cv::hal::Error::NotImplemented; } -inline int hal_t_cmp16s(const short*, size_t, const short*, size_t, uchar*, size_t, int, int, int) { return cv::hal::Error::NotImplemented; } -inline int hal_t_cmp32s(const int*, size_t, const int*, size_t, uchar*, size_t, int, int, int) { return cv::hal::Error::NotImplemented; } -inline int hal_t_cmp32f(const float*, size_t, const float*, size_t, uchar*, size_t, int, int, int) { return cv::hal::Error::NotImplemented; } -inline int hal_t_cmp64f(const double*, size_t, const double*, size_t, uchar*, size_t, int, int, int) { return cv::hal::Error::NotImplemented; } - -#define hal_cmp8u hal_t_cmp8u -#define hal_cmp8s hal_t_cmp8s -#define hal_cmp16u hal_t_cmp16u -#define hal_cmp16s hal_t_cmp16s -#define hal_cmp32s hal_t_cmp32s -#define hal_cmp32f hal_t_cmp32f -#define hal_cmp64f hal_t_cmp64f - -inline int hal_t_mul8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int, double) { return cv::hal::Error::NotImplemented; } -inline int hal_t_mul8s(const schar*, size_t, const schar*, size_t, schar*, size_t, int, int, double) { return cv::hal::Error::NotImplemented; } -inline int hal_t_mul16u(const ushort*, size_t, const ushort*, size_t, ushort*, size_t, int, int, double) { return cv::hal::Error::NotImplemented; } -inline int hal_t_mul16s(const short*, size_t, const short*, size_t, short*, size_t, int, int, double) { return cv::hal::Error::NotImplemented; } -inline int hal_t_mul32s(const int*, size_t, const int*, size_t, int*, size_t, int, int, double) { return cv::hal::Error::NotImplemented; } -inline int hal_t_mul32f(const float*, size_t, const float*, size_t, float*, size_t, int, int, double) { return cv::hal::Error::NotImplemented; } -inline int hal_t_mul64f(const double*, size_t, const double*, size_t, double*, size_t, int, int, double) { return cv::hal::Error::NotImplemented; } -inline int hal_t_div8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int, double) { return cv::hal::Error::NotImplemented; } -inline int hal_t_div8s(const schar*, size_t, const schar*, size_t, schar*, size_t, int, int, double) { return cv::hal::Error::NotImplemented; } -inline int hal_t_div16u(const ushort*, size_t, const ushort*, size_t, ushort*, size_t, int, int, double) { return cv::hal::Error::NotImplemented; } -inline int hal_t_div16s(const short*, size_t, const short*, size_t, short*, size_t, int, int, double) { return cv::hal::Error::NotImplemented; } -inline int hal_t_div32s(const int*, size_t, const int*, size_t, int*, size_t, int, int, double) { return cv::hal::Error::NotImplemented; } -inline int hal_t_div32f(const float*, size_t, const float*, size_t, float*, size_t, int, int, double) { return cv::hal::Error::NotImplemented; } -inline int hal_t_div64f(const double*, size_t, const double*, size_t, double*, size_t, int, int, double) { return cv::hal::Error::NotImplemented; } -inline int hal_t_recip8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int, double) { return cv::hal::Error::NotImplemented; } -inline int hal_t_recip8s(const schar*, size_t, const schar*, size_t, schar*, size_t, int, int, double) { return cv::hal::Error::NotImplemented; } -inline int hal_t_recip16u(const ushort*, size_t, const ushort*, size_t, ushort*, size_t, int, int, double) { return cv::hal::Error::NotImplemented; } -inline int hal_t_recip16s(const short*, size_t, const short*, size_t, short*, size_t, int, int, double) { return cv::hal::Error::NotImplemented; } -inline int hal_t_recip32s(const int*, size_t, const int*, size_t, int*, size_t, int, int, double) { return cv::hal::Error::NotImplemented; } -inline int hal_t_recip32f(const float*, size_t, const float*, size_t, float*, size_t, int, int, double) { return cv::hal::Error::NotImplemented; } -inline int hal_t_recip64f(const double*, size_t, const double*, size_t, double*, size_t, int, int, double) { return cv::hal::Error::NotImplemented; } - -#define hal_mul8u hal_t_mul8u -#define hal_mul8s hal_t_mul8s -#define hal_mul16u hal_t_mul16u -#define hal_mul16s hal_t_mul16s -#define hal_mul32s hal_t_mul32s -#define hal_mul32f hal_t_mul32f -#define hal_mul64f hal_t_mul64f -#define hal_div8u hal_t_div8u -#define hal_div8s hal_t_div8s -#define hal_div16u hal_t_div16u -#define hal_div16s hal_t_div16s -#define hal_div32s hal_t_div32s -#define hal_div32f hal_t_div32f -#define hal_div64f hal_t_div64f -#define hal_recip8u hal_t_recip8u -#define hal_recip8s hal_t_recip8s -#define hal_recip16u hal_t_recip16u -#define hal_recip16s hal_t_recip16s -#define hal_recip32s hal_t_recip32s -#define hal_recip32f hal_t_recip32f -#define hal_recip64f hal_t_recip64f - -inline int hal_t_addWeighted8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int, void*) { return cv::hal::Error::NotImplemented; } -inline int hal_t_addWeighted8s(const schar*, size_t, const schar*, size_t, schar*, size_t, int, int, void*) { return cv::hal::Error::NotImplemented; } -inline int hal_t_addWeighted16u(const ushort*, size_t, const ushort*, size_t, ushort*, size_t, int, int, void*) { return cv::hal::Error::NotImplemented; } -inline int hal_t_addWeighted16s(const short*, size_t, const short*, size_t, short*, size_t, int, int, void*) { return cv::hal::Error::NotImplemented; } -inline int hal_t_addWeighted32s(const int*, size_t, const int*, size_t, int*, size_t, int, int, void*) { return cv::hal::Error::NotImplemented; } -inline int hal_t_addWeighted32f(const float*, size_t, const float*, size_t, float*, size_t, int, int, void*) { return cv::hal::Error::NotImplemented; } -inline int hal_t_addWeighted64f(const double*, size_t, const double*, size_t, double*, size_t, int, int, void*) { return cv::hal::Error::NotImplemented; } - -#define hal_addWeighted8u hal_t_addWeighted8u -#define hal_addWeighted8s hal_t_addWeighted8s -#define hal_addWeighted16u hal_t_addWeighted16u -#define hal_addWeighted16s hal_t_addWeighted16s -#define hal_addWeighted32s hal_t_addWeighted32s -#define hal_addWeighted32f hal_t_addWeighted32f -#define hal_addWeighted64f hal_t_addWeighted64f - -#include "custom_hal.hpp" - -#endif diff --git a/modules/hal/src/resize.cpp b/modules/hal/src/resize.cpp deleted file mode 100644 index a3f69fa..0000000 --- a/modules/hal/src/resize.cpp +++ /dev/null @@ -1,47 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "precomp.hpp" - -namespace cv { namespace hal { - -}} diff --git a/modules/hal/src/stat.cpp b/modules/hal/src/stat.cpp deleted file mode 100644 index ec3b8db..0000000 --- a/modules/hal/src/stat.cpp +++ /dev/null @@ -1,306 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "precomp.hpp" - -namespace cv { namespace hal { - -static const uchar popCountTable[] = -{ - 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 -}; - -static const uchar popCountTable2[] = -{ - 0, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, - 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, - 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, - 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, - 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, - 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, - 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, - 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4 -}; - -static const uchar popCountTable4[] = -{ - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 -}; - -int normHamming(const uchar* a, int n) -{ - int i = 0; - int result = 0; -#if CV_NEON - { - uint32x4_t bits = vmovq_n_u32(0); - for (; i <= n - 16; i += 16) { - uint8x16_t A_vec = vld1q_u8 (a + i); - uint8x16_t bitsSet = vcntq_u8 (A_vec); - uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet); - uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8); - bits = vaddq_u32(bits, bitSet4); - } - uint64x2_t bitSet2 = vpaddlq_u32 (bits); - result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0); - result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2); - } -#endif - for( ; i <= n - 4; i += 4 ) - result += popCountTable[a[i]] + popCountTable[a[i+1]] + - popCountTable[a[i+2]] + popCountTable[a[i+3]]; - for( ; i < n; i++ ) - result += popCountTable[a[i]]; - return result; -} - -int normHamming(const uchar* a, const uchar* b, int n) -{ - int i = 0; - int result = 0; -#if CV_NEON - { - uint32x4_t bits = vmovq_n_u32(0); - for (; i <= n - 16; i += 16) { - uint8x16_t A_vec = vld1q_u8 (a + i); - uint8x16_t B_vec = vld1q_u8 (b + i); - uint8x16_t AxorB = veorq_u8 (A_vec, B_vec); - uint8x16_t bitsSet = vcntq_u8 (AxorB); - uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet); - uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8); - bits = vaddq_u32(bits, bitSet4); - } - uint64x2_t bitSet2 = vpaddlq_u32 (bits); - result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0); - result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2); - } -#endif - for( ; i <= n - 4; i += 4 ) - result += popCountTable[a[i] ^ b[i]] + popCountTable[a[i+1] ^ b[i+1]] + - popCountTable[a[i+2] ^ b[i+2]] + popCountTable[a[i+3] ^ b[i+3]]; - for( ; i < n; i++ ) - result += popCountTable[a[i] ^ b[i]]; - return result; -} - -int normHamming(const uchar* a, int n, int cellSize) -{ - if( cellSize == 1 ) - return normHamming(a, n); - const uchar* tab = 0; - if( cellSize == 2 ) - tab = popCountTable2; - else if( cellSize == 4 ) - tab = popCountTable4; - else - return -1; - int i = 0; - int result = 0; -#if CV_ENABLE_UNROLLED - for( ; i <= n - 4; i += 4 ) - result += tab[a[i]] + tab[a[i+1]] + tab[a[i+2]] + tab[a[i+3]]; -#endif - for( ; i < n; i++ ) - result += tab[a[i]]; - return result; -} - -int normHamming(const uchar* a, const uchar* b, int n, int cellSize) -{ - if( cellSize == 1 ) - return normHamming(a, b, n); - const uchar* tab = 0; - if( cellSize == 2 ) - tab = popCountTable2; - else if( cellSize == 4 ) - tab = popCountTable4; - else - return -1; - int i = 0; - int result = 0; - #if CV_ENABLE_UNROLLED - for( ; i <= n - 4; i += 4 ) - result += tab[a[i] ^ b[i]] + tab[a[i+1] ^ b[i+1]] + - tab[a[i+2] ^ b[i+2]] + tab[a[i+3] ^ b[i+3]]; - #endif - for( ; i < n; i++ ) - result += tab[a[i] ^ b[i]]; - return result; -} - -float normL2Sqr_(const float* a, const float* b, int n) -{ - int j = 0; float d = 0.f; -#if CV_SSE - float CV_DECL_ALIGNED(16) buf[4]; - __m128 d0 = _mm_setzero_ps(), d1 = _mm_setzero_ps(); - - for( ; j <= n - 8; j += 8 ) - { - __m128 t0 = _mm_sub_ps(_mm_loadu_ps(a + j), _mm_loadu_ps(b + j)); - __m128 t1 = _mm_sub_ps(_mm_loadu_ps(a + j + 4), _mm_loadu_ps(b + j + 4)); - d0 = _mm_add_ps(d0, _mm_mul_ps(t0, t0)); - d1 = _mm_add_ps(d1, _mm_mul_ps(t1, t1)); - } - _mm_store_ps(buf, _mm_add_ps(d0, d1)); - d = buf[0] + buf[1] + buf[2] + buf[3]; -#endif - { - for( ; j <= n - 4; j += 4 ) - { - float t0 = a[j] - b[j], t1 = a[j+1] - b[j+1], t2 = a[j+2] - b[j+2], t3 = a[j+3] - b[j+3]; - d += t0*t0 + t1*t1 + t2*t2 + t3*t3; - } - } - - for( ; j < n; j++ ) - { - float t = a[j] - b[j]; - d += t*t; - } - return d; -} - - -float normL1_(const float* a, const float* b, int n) -{ - int j = 0; float d = 0.f; -#if CV_SSE - float CV_DECL_ALIGNED(16) buf[4]; - static const int CV_DECL_ALIGNED(16) absbuf[4] = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff}; - __m128 d0 = _mm_setzero_ps(), d1 = _mm_setzero_ps(); - __m128 absmask = _mm_load_ps((const float*)absbuf); - - for( ; j <= n - 8; j += 8 ) - { - __m128 t0 = _mm_sub_ps(_mm_loadu_ps(a + j), _mm_loadu_ps(b + j)); - __m128 t1 = _mm_sub_ps(_mm_loadu_ps(a + j + 4), _mm_loadu_ps(b + j + 4)); - d0 = _mm_add_ps(d0, _mm_and_ps(t0, absmask)); - d1 = _mm_add_ps(d1, _mm_and_ps(t1, absmask)); - } - _mm_store_ps(buf, _mm_add_ps(d0, d1)); - d = buf[0] + buf[1] + buf[2] + buf[3]; -#elif CV_NEON - float32x4_t v_sum = vdupq_n_f32(0.0f); - for ( ; j <= n - 4; j += 4) - v_sum = vaddq_f32(v_sum, vabdq_f32(vld1q_f32(a + j), vld1q_f32(b + j))); - - float CV_DECL_ALIGNED(16) buf[4]; - vst1q_f32(buf, v_sum); - d = buf[0] + buf[1] + buf[2] + buf[3]; -#endif - { - for( ; j <= n - 4; j += 4 ) - { - d += std::abs(a[j] - b[j]) + std::abs(a[j+1] - b[j+1]) + - std::abs(a[j+2] - b[j+2]) + std::abs(a[j+3] - b[j+3]); - } - } - - for( ; j < n; j++ ) - d += std::abs(a[j] - b[j]); - return d; -} - -int normL1_(const uchar* a, const uchar* b, int n) -{ - int j = 0, d = 0; -#if CV_SSE - __m128i d0 = _mm_setzero_si128(); - - for( ; j <= n - 16; j += 16 ) - { - __m128i t0 = _mm_loadu_si128((const __m128i*)(a + j)); - __m128i t1 = _mm_loadu_si128((const __m128i*)(b + j)); - - d0 = _mm_add_epi32(d0, _mm_sad_epu8(t0, t1)); - } - - for( ; j <= n - 4; j += 4 ) - { - __m128i t0 = _mm_cvtsi32_si128(*(const int*)(a + j)); - __m128i t1 = _mm_cvtsi32_si128(*(const int*)(b + j)); - - d0 = _mm_add_epi32(d0, _mm_sad_epu8(t0, t1)); - } - d = _mm_cvtsi128_si32(_mm_add_epi32(d0, _mm_unpackhi_epi64(d0, d0))); -#elif CV_NEON - uint32x4_t v_sum = vdupq_n_u32(0.0f); - for ( ; j <= n - 16; j += 16) - { - uint8x16_t v_dst = vabdq_u8(vld1q_u8(a + j), vld1q_u8(b + j)); - uint16x8_t v_low = vmovl_u8(vget_low_u8(v_dst)), v_high = vmovl_u8(vget_high_u8(v_dst)); - v_sum = vaddq_u32(v_sum, vaddl_u16(vget_low_u16(v_low), vget_low_u16(v_high))); - v_sum = vaddq_u32(v_sum, vaddl_u16(vget_high_u16(v_low), vget_high_u16(v_high))); - } - - uint CV_DECL_ALIGNED(16) buf[4]; - vst1q_u32(buf, v_sum); - d = buf[0] + buf[1] + buf[2] + buf[3]; -#endif - { - for( ; j <= n - 4; j += 4 ) - { - d += std::abs(a[j] - b[j]) + std::abs(a[j+1] - b[j+1]) + - std::abs(a[j+2] - b[j+2]) + std::abs(a[j+3] - b[j+3]); - } - } - for( ; j < n; j++ ) - d += std::abs(a[j] - b[j]); - return d; -} - -}} //cv::hal diff --git a/modules/hal/src/warp.cpp b/modules/hal/src/warp.cpp deleted file mode 100644 index a3f69fa..0000000 --- a/modules/hal/src/warp.cpp +++ /dev/null @@ -1,47 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "precomp.hpp" - -namespace cv { namespace hal { - -}} diff --git a/modules/hal/test/test_main.cpp b/modules/hal/test/test_main.cpp deleted file mode 100644 index d337a5b..0000000 --- a/modules/hal/test/test_main.cpp +++ /dev/null @@ -1,3 +0,0 @@ -#include "opencv2/ts.hpp" - -CV_TEST_MAIN("cv") diff --git a/modules/hal/test/test_precomp.hpp b/modules/hal/test/test_precomp.hpp deleted file mode 100644 index 387b7ba..0000000 --- a/modules/hal/test/test_precomp.hpp +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef __OPENCV_HAL_TEST_PRECOMP_HPP__ -#define __OPENCV_HAL_TEST_PRECOMP_HPP__ - -#include -#include -#include "opencv2/ts.hpp" -#include "opencv2/hal.hpp" -#include "opencv2/hal/defs.h" -#include "opencv2/hal/intrin.hpp" - -#endif diff --git a/modules/imgproc/src/precomp.hpp b/modules/imgproc/src/precomp.hpp index 3bb8d8e..ed27eea 100644 --- a/modules/imgproc/src/precomp.hpp +++ b/modules/imgproc/src/precomp.hpp @@ -49,7 +49,7 @@ #include "opencv2/imgproc/imgproc_c.h" #include "opencv2/core/private.hpp" #include "opencv2/core/ocl.hpp" -#include "opencv2/hal.hpp" +#include "opencv2/core/hal/hal.hpp" #include #include @@ -94,6 +94,6 @@ extern const float icv8x32fSqrTab[]; #include "_geom.h" #include "filterengine.hpp" -#include "opencv2/hal/sse_utils.hpp" +#include "opencv2/core/sse_utils.hpp" #endif /*__OPENCV_CV_INTERNAL_H_*/ diff --git a/modules/imgproc/src/spatialgradient.cpp b/modules/imgproc/src/spatialgradient.cpp index b4dc032..411c299 100644 --- a/modules/imgproc/src/spatialgradient.cpp +++ b/modules/imgproc/src/spatialgradient.cpp @@ -41,7 +41,7 @@ //M*/ #include "precomp.hpp" -#include "opencv2/hal/intrin.hpp" +#include "opencv2/core/hal/intrin.hpp" #include namespace cv diff --git a/modules/python/common.cmake b/modules/python/common.cmake index e335811..29b8816 100644 --- a/modules/python/common.cmake +++ b/modules/python/common.cmake @@ -28,6 +28,7 @@ endforeach(m) ocv_list_filterout(opencv_hdrs ".h$") ocv_list_filterout(opencv_hdrs "cuda") ocv_list_filterout(opencv_hdrs "cudev") +ocv_list_filterout(opencv_hdrs "/hal/") ocv_list_filterout(opencv_hdrs "detection_based_tracker.hpp") # Conditional compilation set(cv2_generated_hdrs diff --git a/modules/stitching/src/autocalib.cpp b/modules/stitching/src/autocalib.cpp index 91244bd..2414524 100644 --- a/modules/stitching/src/autocalib.cpp +++ b/modules/stitching/src/autocalib.cpp @@ -41,19 +41,19 @@ //M*/ #include "precomp.hpp" +#include "opencv2/core/hal/hal.hpp" using namespace cv; namespace { -template static inline bool -decomposeCholesky(_Tp* A, size_t astep, int m) +static inline bool decomposeCholesky(double* A, size_t astep, int m) { - if (!hal::Cholesky(A, astep, m, 0, 0, 0)) + if (!hal::Cholesky64f(A, astep, m, 0, 0, 0)) return false; astep /= sizeof(A[0]); for (int i = 0; i < m; ++i) - A[i*astep + i] = (_Tp)(1./A[i*astep + i]); + A[i*astep + i] = (double)(1./A[i*astep + i]); return true; } diff --git a/samples/hal/broken_hal/CMakeLists.txt b/samples/hal/broken_hal/CMakeLists.txt new file mode 100644 index 0000000..dd83edc --- /dev/null +++ b/samples/hal/broken_hal/CMakeLists.txt @@ -0,0 +1,11 @@ +cmake_minimum_required(VERSION 2.8.8 FATAL_ERROR) + +if(UNIX) + if(CMAKE_COMPILER_IS_GNUC OR CV_ICC) + set(CMAKE_C_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") + endif() +endif() + +add_library(broken_hal broken.c) +set(OPENCV_SRC_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../..") +target_include_directories(broken_hal PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${OPENCV_SRC_DIR}/modules/core/include) diff --git a/samples/hal/broken_hal/broken.c b/samples/hal/broken_hal/broken.c new file mode 100644 index 0000000..2993f37 --- /dev/null +++ b/samples/hal/broken_hal/broken.c @@ -0,0 +1,371 @@ +#include "broken.h" + +int broken_add8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_add8s(const schar* src1, size_t sz1, const schar* src2, size_t sz2, schar* dst, size_t sz, int w, int h) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_add16u(const ushort* src1, size_t sz1, const ushort* src2, size_t sz2, ushort* dst, size_t sz, int w, int h) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_add16s(const short* src1, size_t sz1, const short* src2, size_t sz2, short* dst, size_t sz, int w, int h) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_add32s(const int* src1, size_t sz1, const int* src2, size_t sz2, int* dst, size_t sz, int w, int h) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_add32f(const float* src1, size_t sz1, const float* src2, size_t sz2, float* dst, size_t sz, int w, int h) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_add64f(const double* src1, size_t sz1, const double* src2, size_t sz2, double* dst, size_t sz, int w, int h) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_sub8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_sub8s(const schar* src1, size_t sz1, const schar* src2, size_t sz2, schar* dst, size_t sz, int w, int h) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_sub16u(const ushort* src1, size_t sz1, const ushort* src2, size_t sz2, ushort* dst, size_t sz, int w, int h) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_sub16s(const short* src1, size_t sz1, const short* src2, size_t sz2, short* dst, size_t sz, int w, int h) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_sub32s(const int* src1, size_t sz1, const int* src2, size_t sz2, int* dst, size_t sz, int w, int h) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_sub32f(const float* src1, size_t sz1, const float* src2, size_t sz2, float* dst, size_t sz, int w, int h) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_sub64f(const double* src1, size_t sz1, const double* src2, size_t sz2, double* dst, size_t sz, int w, int h) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_max8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_max8s(const schar* src1, size_t sz1, const schar* src2, size_t sz2, schar* dst, size_t sz, int w, int h) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_max16u(const ushort* src1, size_t sz1, const ushort* src2, size_t sz2, ushort* dst, size_t sz, int w, int h) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_max16s(const short* src1, size_t sz1, const short* src2, size_t sz2, short* dst, size_t sz, int w, int h) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_max32s(const int* src1, size_t sz1, const int* src2, size_t sz2, int* dst, size_t sz, int w, int h) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_max32f(const float* src1, size_t sz1, const float* src2, size_t sz2, float* dst, size_t sz, int w, int h) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_max64f(const double* src1, size_t sz1, const double* src2, size_t sz2, double* dst, size_t sz, int w, int h) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_min8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_min8s(const schar* src1, size_t sz1, const schar* src2, size_t sz2, schar* dst, size_t sz, int w, int h) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_min16u(const ushort* src1, size_t sz1, const ushort* src2, size_t sz2, ushort* dst, size_t sz, int w, int h) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_min16s(const short* src1, size_t sz1, const short* src2, size_t sz2, short* dst, size_t sz, int w, int h) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_min32s(const int* src1, size_t sz1, const int* src2, size_t sz2, int* dst, size_t sz, int w, int h) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_min32f(const float* src1, size_t sz1, const float* src2, size_t sz2, float* dst, size_t sz, int w, int h) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_min64f(const double* src1, size_t sz1, const double* src2, size_t sz2, double* dst, size_t sz, int w, int h) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_absdiff8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_absdiff8s(const schar* src1, size_t sz1, const schar* src2, size_t sz2, schar* dst, size_t sz, int w, int h) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_absdiff16u(const ushort* src1, size_t sz1, const ushort* src2, size_t sz2, ushort* dst, size_t sz, int w, int h) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_absdiff16s(const short* src1, size_t sz1, const short* src2, size_t sz2, short* dst, size_t sz, int w, int h) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_absdiff32s(const int* src1, size_t sz1, const int* src2, size_t sz2, int* dst, size_t sz, int w, int h) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_absdiff32f(const float* src1, size_t sz1, const float* src2, size_t sz2, float* dst, size_t sz, int w, int h) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_absdiff64f(const double* src1, size_t sz1, const double* src2, size_t sz2, double* dst, size_t sz, int w, int h) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_and8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_or8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_xor8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_not8u(const uchar* src1, size_t sz1, uchar* dst, size_t sz, int w, int h) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_cmp8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h, int op) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_cmp8s(const schar* src1, size_t sz1, const schar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h, int op) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_cmp16u(const ushort* src1, size_t sz1, const ushort* src2, size_t sz2, uchar* dst, size_t sz, int w, int h, int op) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_cmp16s(const short* src1, size_t sz1, const short* src2, size_t sz2, uchar* dst, size_t sz, int w, int h, int op) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_cmp32s(const int* src1, size_t sz1, const int* src2, size_t sz2, uchar* dst, size_t sz, int w, int h, int op) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_cmp32f(const float* src1, size_t sz1, const float* src2, size_t sz2, uchar* dst, size_t sz, int w, int h, int op) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_cmp64f(const double* src1, size_t sz1, const double* src2, size_t sz2, uchar* dst, size_t sz, int w, int h, int op) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_mul8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h, double scale) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_mul8s(const schar* src1, size_t sz1, const schar* src2, size_t sz2, schar* dst, size_t sz, int w, int h, double scale) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_mul16u(const ushort* src1, size_t sz1, const ushort* src2, size_t sz2, ushort* dst, size_t sz, int w, int h, double scale) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_mul16s(const short* src1, size_t sz1, const short* src2, size_t sz2, short* dst, size_t sz, int w, int h, double scale) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_mul32s(const int* src1, size_t sz1, const int* src2, size_t sz2, int* dst, size_t sz, int w, int h, double scale) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_mul32f(const float* src1, size_t sz1, const float* src2, size_t sz2, float* dst, size_t sz, int w, int h, double scale) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_mul64f(const double* src1, size_t sz1, const double* src2, size_t sz2, double* dst, size_t sz, int w, int h, double scale) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_div8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h, double scale) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_div8s(const schar* src1, size_t sz1, const schar* src2, size_t sz2, schar* dst, size_t sz, int w, int h, double scale) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_div16u(const ushort* src1, size_t sz1, const ushort* src2, size_t sz2, ushort* dst, size_t sz, int w, int h, double scale) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_div16s(const short* src1, size_t sz1, const short* src2, size_t sz2, short* dst, size_t sz, int w, int h, double scale) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_div32s(const int* src1, size_t sz1, const int* src2, size_t sz2, int* dst, size_t sz, int w, int h, double scale) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_div32f(const float* src1, size_t sz1, const float* src2, size_t sz2, float* dst, size_t sz, int w, int h, double scale) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_div64f(const double* src1, size_t sz1, const double* src2, size_t sz2, double* dst, size_t sz, int w, int h, double scale) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_recip8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h, double scale) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_recip8s(const schar* src1, size_t sz1, const schar* src2, size_t sz2, schar* dst, size_t sz, int w, int h, double scale) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_recip16u(const ushort* src1, size_t sz1, const ushort* src2, size_t sz2, ushort* dst, size_t sz, int w, int h, double scale) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_recip16s(const short* src1, size_t sz1, const short* src2, size_t sz2, short* dst, size_t sz, int w, int h, double scale) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_recip32s(const int* src1, size_t sz1, const int* src2, size_t sz2, int* dst, size_t sz, int w, int h, double scale) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_recip32f(const float* src1, size_t sz1, const float* src2, size_t sz2, float* dst, size_t sz, int w, int h, double scale) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_recip64f(const double* src1, size_t sz1, const double* src2, size_t sz2, double* dst, size_t sz, int w, int h, double scale) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_addWeighted8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h, const double* scales) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_addWeighted8s(const schar* src1, size_t sz1, const schar* src2, size_t sz2, schar* dst, size_t sz, int w, int h, const double* scales) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_addWeighted16u(const ushort* src1, size_t sz1, const ushort* src2, size_t sz2, ushort* dst, size_t sz, int w, int h, const double* scales) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_addWeighted16s(const short* src1, size_t sz1, const short* src2, size_t sz2, short* dst, size_t sz, int w, int h, const double* scales) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_addWeighted32s(const int* src1, size_t sz1, const int* src2, size_t sz2, int* dst, size_t sz, int w, int h, const double* scales) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_addWeighted32f(const float* src1, size_t sz1, const float* src2, size_t sz2, float* dst, size_t sz, int w, int h, const double* scales) +{ + return CV_HAL_ERROR_UNKNOWN; +} + +int broken_addWeighted64f(const double* src1, size_t sz1, const double* src2, size_t sz2, double* dst, size_t sz, int w, int h, const double* scales) +{ + return CV_HAL_ERROR_UNKNOWN; +} diff --git a/samples/hal/broken_hal/broken.h b/samples/hal/broken_hal/broken.h new file mode 100644 index 0000000..6ba479a --- /dev/null +++ b/samples/hal/broken_hal/broken.h @@ -0,0 +1,245 @@ +#ifndef _BROKEN_H_INCLUDED_ +#define _BROKEN_H_INCLUDED_ + +#include "opencv2/core/hal/interface.h" + +#if defined(__cplusplus) +extern "C" +{ +#endif + +int broken_add8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h); +int broken_add8s(const schar* src1, size_t sz1, const schar* src2, size_t sz2, schar* dst, size_t sz, int w, int h); +int broken_add16u(const ushort* src1, size_t sz1, const ushort* src2, size_t sz2, ushort* dst, size_t sz, int w, int h); +int broken_add16s(const short* src1, size_t sz1, const short* src2, size_t sz2, short* dst, size_t sz, int w, int h); +int broken_add32s(const int* src1, size_t sz1, const int* src2, size_t sz2, int* dst, size_t sz, int w, int h); +int broken_add32f(const float* src1, size_t sz1, const float* src2, size_t sz2, float* dst, size_t sz, int w, int h); +int broken_add64f(const double* src1, size_t sz1, const double* src2, size_t sz2, double* dst, size_t sz, int w, int h); +int broken_sub8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h); +int broken_sub8s(const schar* src1, size_t sz1, const schar* src2, size_t sz2, schar* dst, size_t sz, int w, int h); +int broken_sub16u(const ushort* src1, size_t sz1, const ushort* src2, size_t sz2, ushort* dst, size_t sz, int w, int h); +int broken_sub16s(const short* src1, size_t sz1, const short* src2, size_t sz2, short* dst, size_t sz, int w, int h); +int broken_sub32s(const int* src1, size_t sz1, const int* src2, size_t sz2, int* dst, size_t sz, int w, int h); +int broken_sub32f(const float* src1, size_t sz1, const float* src2, size_t sz2, float* dst, size_t sz, int w, int h); +int broken_sub64f(const double* src1, size_t sz1, const double* src2, size_t sz2, double* dst, size_t sz, int w, int h); +int broken_max8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h); +int broken_max8s(const schar* src1, size_t sz1, const schar* src2, size_t sz2, schar* dst, size_t sz, int w, int h); +int broken_max16u(const ushort* src1, size_t sz1, const ushort* src2, size_t sz2, ushort* dst, size_t sz, int w, int h); +int broken_max16s(const short* src1, size_t sz1, const short* src2, size_t sz2, short* dst, size_t sz, int w, int h); +int broken_max32s(const int* src1, size_t sz1, const int* src2, size_t sz2, int* dst, size_t sz, int w, int h); +int broken_max32f(const float* src1, size_t sz1, const float* src2, size_t sz2, float* dst, size_t sz, int w, int h); +int broken_max64f(const double* src1, size_t sz1, const double* src2, size_t sz2, double* dst, size_t sz, int w, int h); +int broken_min8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h); +int broken_min8s(const schar* src1, size_t sz1, const schar* src2, size_t sz2, schar* dst, size_t sz, int w, int h); +int broken_min16u(const ushort* src1, size_t sz1, const ushort* src2, size_t sz2, ushort* dst, size_t sz, int w, int h); +int broken_min16s(const short* src1, size_t sz1, const short* src2, size_t sz2, short* dst, size_t sz, int w, int h); +int broken_min32s(const int* src1, size_t sz1, const int* src2, size_t sz2, int* dst, size_t sz, int w, int h); +int broken_min32f(const float* src1, size_t sz1, const float* src2, size_t sz2, float* dst, size_t sz, int w, int h); +int broken_min64f(const double* src1, size_t sz1, const double* src2, size_t sz2, double* dst, size_t sz, int w, int h); +int broken_absdiff8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h); +int broken_absdiff8s(const schar* src1, size_t sz1, const schar* src2, size_t sz2, schar* dst, size_t sz, int w, int h); +int broken_absdiff16u(const ushort* src1, size_t sz1, const ushort* src2, size_t sz2, ushort* dst, size_t sz, int w, int h); +int broken_absdiff16s(const short* src1, size_t sz1, const short* src2, size_t sz2, short* dst, size_t sz, int w, int h); +int broken_absdiff32s(const int* src1, size_t sz1, const int* src2, size_t sz2, int* dst, size_t sz, int w, int h); +int broken_absdiff32f(const float* src1, size_t sz1, const float* src2, size_t sz2, float* dst, size_t sz, int w, int h); +int broken_absdiff64f(const double* src1, size_t sz1, const double* src2, size_t sz2, double* dst, size_t sz, int w, int h); +int broken_and8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h); +int broken_or8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h); +int broken_xor8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h); +int broken_not8u(const uchar* src1, size_t sz1, uchar* dst, size_t sz, int w, int h); + +#undef cv_hal_add8u +#define cv_hal_add8u broken_add8u +#undef cv_hal_add8s +#define cv_hal_add8s broken_add8s +#undef cv_hal_add16u +#define cv_hal_add16u broken_add16u +#undef cv_hal_add16s +#define cv_hal_add16s broken_add16s +#undef cv_hal_add32s +#define cv_hal_add32s broken_add32s +#undef cv_hal_add32f +#define cv_hal_add32f broken_add32f +#undef cv_hal_add64f +#define cv_hal_add64f broken_add64f +#undef cv_hal_sub8u +#define cv_hal_sub8u broken_sub8u +#undef cv_hal_sub8s +#define cv_hal_sub8s broken_sub8s +#undef cv_hal_sub16u +#define cv_hal_sub16u broken_sub16u +#undef cv_hal_sub16s +#define cv_hal_sub16s broken_sub16s +#undef cv_hal_sub32s +#define cv_hal_sub32s broken_sub32s +#undef cv_hal_sub32f +#define cv_hal_sub32f broken_sub32f +#undef cv_hal_sub64f +#define cv_hal_sub64f broken_sub64f +#undef cv_hal_max8u +#define cv_hal_max8u broken_max8u +#undef cv_hal_max8s +#define cv_hal_max8s broken_max8s +#undef cv_hal_max16u +#define cv_hal_max16u broken_max16u +#undef cv_hal_max16s +#define cv_hal_max16s broken_max16s +#undef cv_hal_max32s +#define cv_hal_max32s broken_max32s +#undef cv_hal_max32f +#define cv_hal_max32f broken_max32f +#undef cv_hal_max64f +#define cv_hal_max64f broken_max64f +#undef cv_hal_min8u +#define cv_hal_min8u broken_min8u +#undef cv_hal_min8s +#define cv_hal_min8s broken_min8s +#undef cv_hal_min16u +#define cv_hal_min16u broken_min16u +#undef cv_hal_min16s +#define cv_hal_min16s broken_min16s +#undef cv_hal_min32s +#define cv_hal_min32s broken_min32s +#undef cv_hal_min32f +#define cv_hal_min32f broken_min32f +#undef cv_hal_min64f +#define cv_hal_min64f broken_min64f +#undef cv_hal_absdiff8u +#define cv_hal_absdiff8u broken_absdiff8u +#undef cv_hal_absdiff8s +#define cv_hal_absdiff8s broken_absdiff8s +#undef cv_hal_absdiff16u +#define cv_hal_absdiff16u broken_absdiff16u +#undef cv_hal_absdiff16s +#define cv_hal_absdiff16s broken_absdiff16s +#undef cv_hal_absdiff32s +#define cv_hal_absdiff32s broken_absdiff32s +#undef cv_hal_absdiff32f +#define cv_hal_absdiff32f broken_absdiff32f +#undef cv_hal_absdiff64f +#define cv_hal_absdiff64f broken_absdiff64f +#undef cv_hal_and8u +#define cv_hal_and8u broken_and8u +#undef cv_hal_or8u +#define cv_hal_or8u broken_or8u +#undef cv_hal_xor8u +#define cv_hal_xor8u broken_xor8u +#undef cv_hal_not8u +#define cv_hal_not8u broken_not8u + +int broken_cmp8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h, int op); +int broken_cmp8s(const schar* src1, size_t sz1, const schar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h, int op); +int broken_cmp16u(const ushort* src1, size_t sz1, const ushort* src2, size_t sz2, uchar* dst, size_t sz, int w, int h, int op); +int broken_cmp16s(const short* src1, size_t sz1, const short* src2, size_t sz2, uchar* dst, size_t sz, int w, int h, int op); +int broken_cmp32s(const int* src1, size_t sz1, const int* src2, size_t sz2, uchar* dst, size_t sz, int w, int h, int op); +int broken_cmp32f(const float* src1, size_t sz1, const float* src2, size_t sz2, uchar* dst, size_t sz, int w, int h, int op); +int broken_cmp64f(const double* src1, size_t sz1, const double* src2, size_t sz2, uchar* dst, size_t sz, int w, int h, int op); + +#undef cv_hal_cmp8u +#define cv_hal_cmp8u broken_cmp8u +#undef cv_hal_cmp8s +#define cv_hal_cmp8s broken_cmp8s +#undef cv_hal_cmp16u +#define cv_hal_cmp16u broken_cmp16u +#undef cv_hal_cmp16s +#define cv_hal_cmp16s broken_cmp16s +#undef cv_hal_cmp32s +#define cv_hal_cmp32s broken_cmp32s +#undef cv_hal_cmp32f +#define cv_hal_cmp32f broken_cmp32f +#undef cv_hal_cmp64f +#define cv_hal_cmp64f broken_cmp64f + +int broken_mul8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h, double scale); +int broken_mul8s(const schar* src1, size_t sz1, const schar* src2, size_t sz2, schar* dst, size_t sz, int w, int h, double scale); +int broken_mul16u(const ushort* src1, size_t sz1, const ushort* src2, size_t sz2, ushort* dst, size_t sz, int w, int h, double scale); +int broken_mul16s(const short* src1, size_t sz1, const short* src2, size_t sz2, short* dst, size_t sz, int w, int h, double scale); +int broken_mul32s(const int* src1, size_t sz1, const int* src2, size_t sz2, int* dst, size_t sz, int w, int h, double scale); +int broken_mul32f(const float* src1, size_t sz1, const float* src2, size_t sz2, float* dst, size_t sz, int w, int h, double scale); +int broken_mul64f(const double* src1, size_t sz1, const double* src2, size_t sz2, double* dst, size_t sz, int w, int h, double scale); +int broken_div8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h, double scale); +int broken_div8s(const schar* src1, size_t sz1, const schar* src2, size_t sz2, schar* dst, size_t sz, int w, int h, double scale); +int broken_div16u(const ushort* src1, size_t sz1, const ushort* src2, size_t sz2, ushort* dst, size_t sz, int w, int h, double scale); +int broken_div16s(const short* src1, size_t sz1, const short* src2, size_t sz2, short* dst, size_t sz, int w, int h, double scale); +int broken_div32s(const int* src1, size_t sz1, const int* src2, size_t sz2, int* dst, size_t sz, int w, int h, double scale); +int broken_div32f(const float* src1, size_t sz1, const float* src2, size_t sz2, float* dst, size_t sz, int w, int h, double scale); +int broken_div64f(const double* src1, size_t sz1, const double* src2, size_t sz2, double* dst, size_t sz, int w, int h, double scale); +int broken_recip8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h, double scale); +int broken_recip8s(const schar* src1, size_t sz1, const schar* src2, size_t sz2, schar* dst, size_t sz, int w, int h, double scale); +int broken_recip16u(const ushort* src1, size_t sz1, const ushort* src2, size_t sz2, ushort* dst, size_t sz, int w, int h, double scale); +int broken_recip16s(const short* src1, size_t sz1, const short* src2, size_t sz2, short* dst, size_t sz, int w, int h, double scale); +int broken_recip32s(const int* src1, size_t sz1, const int* src2, size_t sz2, int* dst, size_t sz, int w, int h, double scale); +int broken_recip32f(const float* src1, size_t sz1, const float* src2, size_t sz2, float* dst, size_t sz, int w, int h, double scale); +int broken_recip64f(const double* src1, size_t sz1, const double* src2, size_t sz2, double* dst, size_t sz, int w, int h, double scale); + +#undef cv_hal_mul8u +#define cv_hal_mul8u broken_mul8u +#undef cv_hal_mul8s +#define cv_hal_mul8s broken_mul8s +#undef cv_hal_mul16u +#define cv_hal_mul16u broken_mul16u +#undef cv_hal_mul16s +#define cv_hal_mul16s broken_mul16s +#undef cv_hal_mul32s +#define cv_hal_mul32s broken_mul32s +#undef cv_hal_mul32f +#define cv_hal_mul32f broken_mul32f +#undef cv_hal_mul64f +#define cv_hal_mul64f broken_mul64f +#undef cv_hal_div8u +#define cv_hal_div8u broken_div8u +#undef cv_hal_div8s +#define cv_hal_div8s broken_div8s +#undef cv_hal_div16u +#define cv_hal_div16u broken_div16u +#undef cv_hal_div16s +#define cv_hal_div16s broken_div16s +#undef cv_hal_div32s +#define cv_hal_div32s broken_div32s +#undef cv_hal_div32f +#define cv_hal_div32f broken_div32f +#undef cv_hal_div64f +#define cv_hal_div64f broken_div64f +#undef cv_hal_recip8u +#define cv_hal_recip8u broken_recip8u +#undef cv_hal_recip8s +#define cv_hal_recip8s broken_recip8s +#undef cv_hal_recip16u +#define cv_hal_recip16u broken_recip16u +#undef cv_hal_recip16s +#define cv_hal_recip16s broken_recip16s +#undef cv_hal_recip32s +#define cv_hal_recip32s broken_recip32s +#undef cv_hal_recip32f +#define cv_hal_recip32f broken_recip32f +#undef cv_hal_recip64f +#define cv_hal_recip64f broken_recip64f + +int broken_addWeighted8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h, const double* scales); +int broken_addWeighted8s(const schar* src1, size_t sz1, const schar* src2, size_t sz2, schar* dst, size_t sz, int w, int h, const double* scales); +int broken_addWeighted16u(const ushort* src1, size_t sz1, const ushort* src2, size_t sz2, ushort* dst, size_t sz, int w, int h, const double* scales); +int broken_addWeighted16s(const short* src1, size_t sz1, const short* src2, size_t sz2, short* dst, size_t sz, int w, int h, const double* scales); +int broken_addWeighted32s(const int* src1, size_t sz1, const int* src2, size_t sz2, int* dst, size_t sz, int w, int h, const double* scales); +int broken_addWeighted32f(const float* src1, size_t sz1, const float* src2, size_t sz2, float* dst, size_t sz, int w, int h, const double* scales); +int broken_addWeighted64f(const double* src1, size_t sz1, const double* src2, size_t sz2, double* dst, size_t sz, int w, int h, const double* scales); + +#undef cv_hal_addWeighted8u +#define cv_hal_addWeighted8u broken_addWeighted8u +#undef cv_hal_addWeighted8s +#define cv_hal_addWeighted8s broken_addWeighted8s +#undef cv_hal_addWeighted16u +#define cv_hal_addWeighted16u broken_addWeighted16u +#undef cv_hal_addWeighted16s +#define cv_hal_addWeighted16s broken_addWeighted16s +#undef cv_hal_addWeighted32s +#define cv_hal_addWeighted32s broken_addWeighted32s +#undef cv_hal_addWeighted32f +#define cv_hal_addWeighted32f broken_addWeighted32f +#undef cv_hal_addWeighted64f +#define cv_hal_addWeighted64f broken_addWeighted64f + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/modules/hal/samples/simple_hal/CMakeLists.txt b/samples/hal/simple_hal/CMakeLists.txt similarity index 71% rename from modules/hal/samples/simple_hal/CMakeLists.txt rename to samples/hal/simple_hal/CMakeLists.txt index dd0be70..4a2e015 100644 --- a/modules/hal/samples/simple_hal/CMakeLists.txt +++ b/samples/hal/simple_hal/CMakeLists.txt @@ -7,5 +7,5 @@ if(UNIX) endif() add_library(simple_hal simple.cpp) -set(OPENCV_HAL_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../..") -target_include_directories(simple_hal PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${OPENCV_HAL_DIR}/include) +set(OPENCV_SRC_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../..") +target_include_directories(simple_hal PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${OPENCV_SRC_DIR}/modules/core/include) diff --git a/modules/hal/samples/simple_hal/simple.cpp b/samples/hal/simple_hal/simple.cpp similarity index 90% rename from modules/hal/samples/simple_hal/simple.cpp rename to samples/hal/simple_hal/simple.cpp index 564a611..fae5e1b 100644 --- a/modules/hal/samples/simple_hal/simple.cpp +++ b/samples/hal/simple_hal/simple.cpp @@ -5,7 +5,7 @@ int slow_and8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, for(; height--; src1 = src1 + step1, src2 = src2 + step2, dst = dst + step) for(int x = 0 ; x < width; x++ ) dst[x] = src1[x] & src2[x]; - return cv::hal::Error::Ok; + return CV_HAL_ERROR_OK; } int slow_or8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height) @@ -13,7 +13,7 @@ int slow_or8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, for(; height--; src1 = src1 + step1, src2 = src2 + step2, dst = dst + step) for(int x = 0 ; x < width; x++ ) dst[x] = src1[x] | src2[x]; - return cv::hal::Error::Ok; + return CV_HAL_ERROR_OK; } int slow_xor8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height) @@ -21,7 +21,7 @@ int slow_xor8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, for(; height--; src1 = src1 + step1, src2 = src2 + step2, dst = dst + step) for(int x = 0 ; x < width; x++ ) dst[x] = src1[x] ^ src2[x]; - return cv::hal::Error::Ok; + return CV_HAL_ERROR_OK; } int slow_not8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height) @@ -29,5 +29,5 @@ int slow_not8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, for(; height--; src1 = src1 + step1, src2 = src2 + step2, dst = dst + step) for(int x = 0 ; x < width; x++ ) dst[x] = ~src1[x]; - return cv::hal::Error::Ok; + return CV_HAL_ERROR_OK; } diff --git a/modules/hal/samples/simple_hal/simple.hpp b/samples/hal/simple_hal/simple.hpp similarity index 95% rename from modules/hal/samples/simple_hal/simple.hpp rename to samples/hal/simple_hal/simple.hpp index 85a1653..8e4a30f 100644 --- a/modules/hal/samples/simple_hal/simple.hpp +++ b/samples/hal/simple_hal/simple.hpp @@ -1,7 +1,7 @@ #ifndef _SIMPLE_HPP_INCLUDED_ #define _SIMPLE_HPP_INCLUDED_ -#include "opencv2/hal/interface.hpp" +#include "opencv2/core/hal/interface.h" int slow_and8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height); int slow_or8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height);