include(cmake/OpenCVDetectVTK.cmake)
+# -- Custom HAL replacement --
+set(_includes "")
+# assuming OPENCV_HAL_HEADERS and OPENCV_HAL_LIBS are lists of files:
+# option example: -DOPENCV_HAL_HEADERS="<some-path>/header1.h;<some-path>/header2.h"
if (OPENCV_HAL_HEADERS AND OPENCV_HAL_LIBS)
- get_filename_component(OPENCV_HAL_HEADERS "${OPENCV_HAL_HEADERS}" ABSOLUTE)
- get_filename_component(OPENCV_HAL_LIBS "${OPENCV_HAL_LIBS}" ABSOLUTE)
+ foreach (h ${OPENCV_HAL_HEADERS})
+ get_filename_component(h "${h}" ABSOLUTE)
+ set(_includes "${_includes}\n#include \"${h}\"")
+ endforeach()
+ foreach (l ${OPENCV_HAL_LIBS})
+ get_filename_component(l "${l}" ABSOLUTE)
+ set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} ${l})
+ # TODO: install?
+ # ocv_install_target(${l} EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev)
+ endforeach()
+else()
+ set(_includes "// using default HAL")
+ unset(OPENCV_HAL_HEADERS CACHE)
+ unset(OPENCV_HAL_LIBS CACHE)
endif()
+set(OPENCV_HAL_HEADERS "${OPENCV_HAL_HEADERS}" CACHE STRING "Headers with custom HAL implementation")
+set(OPENCV_HAL_LIBS "${OPENCV_HAL_LIBS}" CACHE STRING "Libraries with custom HAL implementation")
+configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/custom_hal.hpp.in" "${CMAKE_BINARY_DIR}/custom_hal.hpp" @ONLY)
+unset(_includes)
# ----------------------------------------------------------------------------
# Add CUDA libraries (needed for apps/tools, samples)
status(" Use Eigen:" HAVE_EIGEN THEN "YES (ver ${EIGEN_WORLD_VERSION}.${EIGEN_MAJOR_VERSION}.${EIGEN_MINOR_VERSION})" ELSE NO)
status(" Use Cuda:" HAVE_CUDA THEN "YES (ver ${CUDA_VERSION_STRING})" ELSE NO)
status(" Use OpenCL:" HAVE_OPENCL THEN YES ELSE NO)
+status(" Use custom HAL:" OPENCV_HAL_HEADERS AND OPENCV_HAL_LIBS THEN "YES (${OPENCV_HAL_HEADERS}; ${OPENCV_HAL_LIBS})" ELSE "NO")
if(HAVE_CUDA)
status("")
"${CMAKE_CURRENT_LIST_DIR}/include/opencv2/*.hpp"
"${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/*.hpp"
"${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/*.h"
+ "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/hal/*.hpp"
+ "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/hal/*.h"
)
file(GLOB lib_hdrs_detail
"${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/detail/*.hpp"
#ifndef _CUSTOM_HAL_INCLUDED_
#define _CUSTOM_HAL_INCLUDED_
-@OPENCV_HAL_HEADERS_INCLUDES@
+@_includes@
#endif
</libs>
<skip_headers>
- opencv2/hal/intrin*
+ opencv2/core/hal/intrin*
opencv2/core/cuda*
opencv2/core/private*
opencv/cxeigen.hpp
CV_NORETURN= \
CV_DEFAULT(x)=" = x" \
CV_NEON=1 \
+ CV_SSE2=1 \
FLANN_DEPRECATED=
EXPAND_AS_DEFINED =
SKIP_FUNCTION_MACROS = YES
#include "precomp.hpp"
#include <limits.h>
-#include "opencv2/hal/intrin.hpp"
+#include "opencv2/core/hal/intrin.hpp"
namespace cv
{
set(the_description "The Core Functionality")
ocv_add_module(core
- opencv_hal
PRIVATE_REQUIRED ${ZLIB_LIBRARIES} "${OPENCL_LIBRARIES}" "${VA_LIBRARIES}"
OPTIONAL opencv_cudev
WRAP java python)
@defgroup core_cluster Clustering
@defgroup core_utils Utility and system functions and macros
@{
+ @defgroup core_utils_sse SSE utilities
@defgroup core_utils_neon NEON utilities
@}
@defgroup core_opengl OpenGL interoperability
@defgroup core_directx DirectX interoperability
@defgroup core_eigen Eigen support
@defgroup core_opencl OpenCL support
+ @defgroup core_va_intel Intel VA-API/OpenCL (CL-VA) interoperability
+ @defgroup core_hal Hardware Acceleration Layer
+ @{
+ @defgroup core_hal_functions Functions
+ @defgroup core_hal_interface Interface
+ @defgroup core_hal_intrin Universal intrinsics
+ @{
+ @defgroup core_hal_intrin_impl Private implementation helpers
+ @}
+ @}
@}
*/
#endif
#include <climits>
+#include <algorithm>
#include "opencv2/core/cvdef.h"
#include "opencv2/core/cvstd.hpp"
-#include "opencv2/hal.hpp"
namespace cv
{
//! @} core_utils
+
+
+
} // cv
-#include "opencv2/hal/neon_utils.hpp"
+#include "opencv2/core/neon_utils.hpp"
#endif //__OPENCV_CORE_BASE_HPP__
#ifndef __OPENCV_CORE_CVDEF_H__
#define __OPENCV_CORE_CVDEF_H__
+//! @addtogroup core_utils
+//! @{
+
#if !defined _CRT_SECURE_NO_DEPRECATE && defined _MSC_VER && _MSC_VER > 1300
# define _CRT_SECURE_NO_DEPRECATE /* to avoid multiple Visual Studio warnings */
#endif
#undef abs
#undef Complex
-#include "opencv2/hal/defs.h"
+#if !defined _CRT_SECURE_NO_DEPRECATE && defined _MSC_VER && _MSC_VER > 1300
+# define _CRT_SECURE_NO_DEPRECATE /* to avoid multiple Visual Studio warnings */
+#endif
+
+#include <limits.h>
+#include "opencv2/core/hal/interface.h"
+
+#if defined __ICL
+# define CV_ICC __ICL
+#elif defined __ICC
+# define CV_ICC __ICC
+#elif defined __ECL
+# define CV_ICC __ECL
+#elif defined __ECC
+# define CV_ICC __ECC
+#elif defined __INTEL_COMPILER
+# define CV_ICC __INTEL_COMPILER
+#endif
+
+#ifndef CV_INLINE
+# if defined __cplusplus
+# define CV_INLINE static inline
+# elif defined _MSC_VER
+# define CV_INLINE __inline
+# else
+# define CV_INLINE static
+# endif
+#endif
+
+#if defined CV_ICC && !defined CV_ENABLE_UNROLLED
+# define CV_ENABLE_UNROLLED 0
+#else
+# define CV_ENABLE_UNROLLED 1
+#endif
+
+#ifdef __GNUC__
+# define CV_DECL_ALIGNED(x) __attribute__ ((aligned (x)))
+#elif defined _MSC_VER
+# define CV_DECL_ALIGNED(x) __declspec(align(x))
+#else
+# define CV_DECL_ALIGNED(x)
+#endif
+
+/* CPU features and intrinsics support */
+#define CV_CPU_NONE 0
+#define CV_CPU_MMX 1
+#define CV_CPU_SSE 2
+#define CV_CPU_SSE2 3
+#define CV_CPU_SSE3 4
+#define CV_CPU_SSSE3 5
+#define CV_CPU_SSE4_1 6
+#define CV_CPU_SSE4_2 7
+#define CV_CPU_POPCNT 8
+
+#define CV_CPU_AVX 10
+#define CV_CPU_AVX2 11
+#define CV_CPU_FMA3 12
+
+#define CV_CPU_AVX_512F 13
+#define CV_CPU_AVX_512BW 14
+#define CV_CPU_AVX_512CD 15
+#define CV_CPU_AVX_512DQ 16
+#define CV_CPU_AVX_512ER 17
+#define CV_CPU_AVX_512IFMA512 18
+#define CV_CPU_AVX_512PF 19
+#define CV_CPU_AVX_512VBMI 20
+#define CV_CPU_AVX_512VL 21
+
+#define CV_CPU_NEON 100
+
+// when adding to this list remember to update the following enum
+#define CV_HARDWARE_MAX_FEATURE 255
+
+/** @brief Available CPU features.
+*/
+enum CpuFeatures {
+ CPU_MMX = 1,
+ CPU_SSE = 2,
+ CPU_SSE2 = 3,
+ CPU_SSE3 = 4,
+ CPU_SSSE3 = 5,
+ CPU_SSE4_1 = 6,
+ CPU_SSE4_2 = 7,
+ CPU_POPCNT = 8,
+
+ CPU_AVX = 10,
+ CPU_AVX2 = 11,
+ CPU_FMA3 = 12,
+
+ CPU_AVX_512F = 13,
+ CPU_AVX_512BW = 14,
+ CPU_AVX_512CD = 15,
+ CPU_AVX_512DQ = 16,
+ CPU_AVX_512ER = 17,
+ CPU_AVX_512IFMA512 = 18,
+ CPU_AVX_512PF = 19,
+ CPU_AVX_512VBMI = 20,
+ CPU_AVX_512VL = 21,
+
+ CPU_NEON = 100
+};
+
+// do not include SSE/AVX/NEON headers for NVCC compiler
+#ifndef __CUDACC__
+
+#if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2)
+# include <emmintrin.h>
+# define CV_MMX 1
+# define CV_SSE 1
+# define CV_SSE2 1
+# if defined __SSE3__ || (defined _MSC_VER && _MSC_VER >= 1500)
+# include <pmmintrin.h>
+# define CV_SSE3 1
+# endif
+# if defined __SSSE3__ || (defined _MSC_VER && _MSC_VER >= 1500)
+# include <tmmintrin.h>
+# define CV_SSSE3 1
+# endif
+# if defined __SSE4_1__ || (defined _MSC_VER && _MSC_VER >= 1500)
+# include <smmintrin.h>
+# define CV_SSE4_1 1
+# endif
+# if defined __SSE4_2__ || (defined _MSC_VER && _MSC_VER >= 1500)
+# include <nmmintrin.h>
+# define CV_SSE4_2 1
+# endif
+# if defined __POPCNT__ || (defined _MSC_VER && _MSC_VER >= 1500)
+# ifdef _MSC_VER
+# include <nmmintrin.h>
+# else
+# include <popcntintrin.h>
+# endif
+# define CV_POPCNT 1
+# endif
+# if defined __AVX__ || (defined _MSC_VER && _MSC_VER >= 1600 && 0)
+// MS Visual Studio 2010 (2012?) has no macro pre-defined to identify the use of /arch:AVX
+// See: http://connect.microsoft.com/VisualStudio/feedback/details/605858/arch-avx-should-define-a-predefined-macro-in-x64-and-set-a-unique-value-for-m-ix86-fp-in-win32
+# include <immintrin.h>
+# define CV_AVX 1
+# if defined(_XCR_XFEATURE_ENABLED_MASK)
+# define __xgetbv() _xgetbv(_XCR_XFEATURE_ENABLED_MASK)
+# else
+# define __xgetbv() 0
+# endif
+# endif
+# if defined __AVX2__ || (defined _MSC_VER && _MSC_VER >= 1800 && 0)
+# include <immintrin.h>
+# define CV_AVX2 1
+# if defined __FMA__
+# define CV_FMA3 1
+# endif
+# endif
+#endif
+
+#if (defined WIN32 || defined _WIN32) && defined(_M_ARM)
+# include <Intrin.h>
+# include "arm_neon.h"
+# define CV_NEON 1
+# define CPU_HAS_NEON_FEATURE (true)
+#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__))
+# include <arm_neon.h>
+# define CV_NEON 1
+#endif
+
+#if defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__
+# define CV_VFP 1
+#endif
+
+#endif // __CUDACC__
+
+#ifndef CV_POPCNT
+#define CV_POPCNT 0
+#endif
+#ifndef CV_MMX
+# define CV_MMX 0
+#endif
+#ifndef CV_SSE
+# define CV_SSE 0
+#endif
+#ifndef CV_SSE2
+# define CV_SSE2 0
+#endif
+#ifndef CV_SSE3
+# define CV_SSE3 0
+#endif
+#ifndef CV_SSSE3
+# define CV_SSSE3 0
+#endif
+#ifndef CV_SSE4_1
+# define CV_SSE4_1 0
+#endif
+#ifndef CV_SSE4_2
+# define CV_SSE4_2 0
+#endif
+#ifndef CV_AVX
+# define CV_AVX 0
+#endif
+#ifndef CV_AVX2
+# define CV_AVX2 0
+#endif
+#ifndef CV_FMA3
+# define CV_FMA3 0
+#endif
+#ifndef CV_AVX_512F
+# define CV_AVX_512F 0
+#endif
+#ifndef CV_AVX_512BW
+# define CV_AVX_512BW 0
+#endif
+#ifndef CV_AVX_512CD
+# define CV_AVX_512CD 0
+#endif
+#ifndef CV_AVX_512DQ
+# define CV_AVX_512DQ 0
+#endif
+#ifndef CV_AVX_512ER
+# define CV_AVX_512ER 0
+#endif
+#ifndef CV_AVX_512IFMA512
+# define CV_AVX_512IFMA512 0
+#endif
+#ifndef CV_AVX_512PF
+# define CV_AVX_512PF 0
+#endif
+#ifndef CV_AVX_512VBMI
+# define CV_AVX_512VBMI 0
+#endif
+#ifndef CV_AVX_512VL
+# define CV_AVX_512VL 0
+#endif
+
+#ifndef CV_NEON
+# define CV_NEON 0
+#endif
+
+#ifndef CV_VFP
+# define CV_VFP 0
+#endif
+
+/* fundamental constants */
+#define CV_PI 3.1415926535897932384626433832795
+#define CV_2PI 6.283185307179586476925286766559
+#define CV_LOG2 0.69314718055994530941723212145818
+
+typedef union Cv32suf
+{
+ int i;
+ unsigned u;
+ float f;
+}
+Cv32suf;
+
+typedef union Cv64suf
+{
+ int64 i;
+ uint64 u;
+ double f;
+}
+Cv64suf;
#define OPENCV_ABI_COMPATIBILITY 300
#define CV_SUBMAT_FLAG (1 << CV_SUBMAT_FLAG_SHIFT)
#define CV_IS_SUBMAT(flags) ((flags) & CV_MAT_SUBMAT_FLAG)
-/* Size of each channel item,
+/** Size of each channel item,
0x124489 = 1000 0100 0100 0010 0010 0001 0001 ~ array of sizeof(arr_type_elem) */
#define CV_ELEM_SIZE1(type) \
((((sizeof(size_t)<<28)|0x8442211) >> CV_MAT_DEPTH(type)*4) & 15)
-/* 0x3a50 = 11 10 10 01 01 00 00 ~ array of log2(sizeof(arr_type_elem)) */
+/** 0x3a50 = 11 10 10 01 01 00 00 ~ array of log2(sizeof(arr_type_elem)) */
#define CV_ELEM_SIZE(type) \
(CV_MAT_CN(type) << ((((sizeof(size_t)/4+1)*16384|0x3a50) >> CV_MAT_DEPTH(type)*2) & 3))
# endif
#endif
+//! @}
+
#endif // __OPENCV_CORE_CVDEF_H__
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Copyright (C) 2015, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_CORE_FAST_MATH_HPP__
+#define __OPENCV_CORE_FAST_MATH_HPP__
+
+#include "opencv2/core/cvdef.h"
+
+//! @addtogroup core_utils
+//! @{
+
+/****************************************************************************************\
+* fast math *
+\****************************************************************************************/
+
+#if defined __BORLANDC__
+# include <fastmath.h>
+#elif defined __cplusplus
+# include <cmath>
+#else
+# include <math.h>
+#endif
+
+#ifdef HAVE_TEGRA_OPTIMIZATION
+# include "tegra_round.hpp"
+#endif
+
+#if CV_VFP
+ // 1. general scheme
+ #define ARM_ROUND(_value, _asm_string) \
+ int res; \
+ float temp; \
+ asm(_asm_string : [res] "=r" (res), [temp] "=w" (temp) : [value] "w" (_value)); \
+ return res
+ // 2. version for double
+ #ifdef __clang__
+ #define ARM_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %[value] \n vmov %[res], %[temp]")
+ #else
+ #define ARM_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %P[value] \n vmov %[res], %[temp]")
+ #endif
+ // 3. version for float
+ #define ARM_ROUND_FLT(value) ARM_ROUND(value, "vcvtr.s32.f32 %[temp], %[value]\n vmov %[res], %[temp]")
+#endif // CV_VFP
+
+/** @brief Rounds floating-point number to the nearest integer
+
+ @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the
+ result is not defined.
+ */
+CV_INLINE int
+cvRound( double value )
+{
+#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
+ && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
+ __m128d t = _mm_set_sd( value );
+ return _mm_cvtsd_si32(t);
+#elif defined _MSC_VER && defined _M_IX86
+ int t;
+ __asm
+ {
+ fld value;
+ fistp t;
+ }
+ return t;
+#elif ((defined _MSC_VER && defined _M_ARM) || defined CV_ICC || \
+ defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION
+ TEGRA_ROUND_DBL(value);
+#elif defined CV_ICC || defined __GNUC__
+# if CV_VFP
+ ARM_ROUND_DBL(value);
+# else
+ return (int)lrint(value);
+# endif
+#else
+ /* it's ok if round does not comply with IEEE754 standard;
+ the tests should allow +/-1 difference when the tested functions use round */
+ return (int)(value + (value >= 0 ? 0.5 : -0.5));
+#endif
+}
+
+
+/** @brief Rounds floating-point number to the nearest integer not larger than the original.
+
+ The function computes an integer i such that:
+ \f[i \le \texttt{value} < i+1\f]
+ @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the
+ result is not defined.
+ */
+CV_INLINE int cvFloor( double value )
+{
+#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
+ __m128d t = _mm_set_sd( value );
+ int i = _mm_cvtsd_si32(t);
+ return i - _mm_movemask_pd(_mm_cmplt_sd(t, _mm_cvtsi32_sd(t,i)));
+#elif defined __GNUC__
+ int i = (int)value;
+ return i - (i > value);
+#else
+ int i = cvRound(value);
+ float diff = (float)(value - i);
+ return i - (diff < 0);
+#endif
+}
+
+/** @brief Rounds floating-point number to the nearest integer not smaller than the original.
+
+ The function computes an integer i such that:
+ \f[i \le \texttt{value} < i+1\f]
+ @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the
+ result is not defined.
+ */
+CV_INLINE int cvCeil( double value )
+{
+#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__&& !defined __APPLE__)) && !defined(__CUDACC__)
+ __m128d t = _mm_set_sd( value );
+ int i = _mm_cvtsd_si32(t);
+ return i + _mm_movemask_pd(_mm_cmplt_sd(_mm_cvtsi32_sd(t,i), t));
+#elif defined __GNUC__
+ int i = (int)value;
+ return i + (i < value);
+#else
+ int i = cvRound(value);
+ float diff = (float)(i - value);
+ return i + (diff < 0);
+#endif
+}
+
+/** @brief Determines if the argument is Not A Number.
+
+ @param value The input floating-point value
+
+ The function returns 1 if the argument is Not A Number (as defined by IEEE754 standard), 0
+ otherwise. */
+CV_INLINE int cvIsNaN( double value )
+{
+ Cv64suf ieee754;
+ ieee754.f = value;
+ return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) +
+ ((unsigned)ieee754.u != 0) > 0x7ff00000;
+}
+
+/** @brief Determines if the argument is Infinity.
+
+ @param value The input floating-point value
+
+ The function returns 1 if the argument is a plus or minus infinity (as defined by IEEE754 standard)
+ and 0 otherwise. */
+CV_INLINE int cvIsInf( double value )
+{
+ Cv64suf ieee754;
+ ieee754.f = value;
+ return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) == 0x7ff00000 &&
+ (unsigned)ieee754.u == 0;
+}
+
+#ifdef __cplusplus
+
+/** @overload */
+CV_INLINE int cvRound(float value)
+{
+#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ && \
+ defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
+ __m128 t = _mm_set_ss( value );
+ return _mm_cvtss_si32(t);
+#elif defined _MSC_VER && defined _M_IX86
+ int t;
+ __asm
+ {
+ fld value;
+ fistp t;
+ }
+ return t;
+#elif ((defined _MSC_VER && defined _M_ARM) || defined CV_ICC || \
+ defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION
+ TEGRA_ROUND_FLT(value);
+#elif defined CV_ICC || defined __GNUC__
+# if CV_VFP
+ ARM_ROUND_FLT(value);
+# else
+ return (int)lrintf(value);
+# endif
+#else
+ /* it's ok if round does not comply with IEEE754 standard;
+ the tests should allow +/-1 difference when the tested functions use round */
+ return (int)(value + (value >= 0 ? 0.5f : -0.5f));
+#endif
+}
+
+/** @overload */
+CV_INLINE int cvRound( int value )
+{
+ return value;
+}
+
+/** @overload */
+CV_INLINE int cvFloor( float value )
+{
+#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
+ __m128 t = _mm_set_ss( value );
+ int i = _mm_cvtss_si32(t);
+ return i - _mm_movemask_ps(_mm_cmplt_ss(t, _mm_cvtsi32_ss(t,i)));
+#elif defined __GNUC__
+ int i = (int)value;
+ return i - (i > value);
+#else
+ int i = cvRound(value);
+ float diff = (float)(value - i);
+ return i - (diff < 0);
+#endif
+}
+
+/** @overload */
+CV_INLINE int cvFloor( int value )
+{
+ return value;
+}
+
+/** @overload */
+CV_INLINE int cvCeil( float value )
+{
+#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__&& !defined __APPLE__)) && !defined(__CUDACC__)
+ __m128 t = _mm_set_ss( value );
+ int i = _mm_cvtss_si32(t);
+ return i + _mm_movemask_ps(_mm_cmplt_ss(_mm_cvtsi32_ss(t,i), t));
+#elif defined __GNUC__
+ int i = (int)value;
+ return i + (i < value);
+#else
+ int i = cvRound(value);
+ float diff = (float)(i - value);
+ return i + (diff < 0);
+#endif
+}
+
+/** @overload */
+CV_INLINE int cvCeil( int value )
+{
+ return value;
+}
+
+/** @overload */
+CV_INLINE int cvIsNaN( float value )
+{
+ Cv32suf ieee754;
+ ieee754.f = value;
+ return (ieee754.u & 0x7fffffff) > 0x7f800000;
+}
+
+/** @overload */
+CV_INLINE int cvIsInf( float value )
+{
+ Cv32suf ieee754;
+ ieee754.f = value;
+ return (ieee754.u & 0x7fffffff) == 0x7f800000;
+}
+
+#endif // __cplusplus
+
+//! @} core_utils
+
+#endif
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Copyright (C) 2015, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_HAL_HPP__
+#define __OPENCV_HAL_HPP__
+
+#include "opencv2/core/cvdef.h"
+#include "opencv2/core/hal/interface.h"
+
+//! @cond IGNORED
+#define CALL_HAL(name, fun, ...) \
+ int res = fun(__VA_ARGS__); \
+ if (res == CV_HAL_ERROR_OK) \
+ return; \
+ else if (res != CV_HAL_ERROR_NOT_IMPLEMENTED) \
+ CV_Error_(cv::Error::StsInternal, \
+ ("HAL implementation " CVAUX_STR(name) " ==> " CVAUX_STR(fun) " returned %d (0x%08x)", res, res));
+//! @endcond
+
+
+namespace cv { namespace hal {
+
+//! @addtogroup core_hal_functions
+//! @{
+
+CV_EXPORTS int normHamming(const uchar* a, int n);
+CV_EXPORTS int normHamming(const uchar* a, const uchar* b, int n);
+
+CV_EXPORTS int normHamming(const uchar* a, int n, int cellSize);
+CV_EXPORTS int normHamming(const uchar* a, const uchar* b, int n, int cellSize);
+
+CV_EXPORTS int LU32f(float* A, size_t astep, int m, float* b, size_t bstep, int n);
+CV_EXPORTS int LU64f(double* A, size_t astep, int m, double* b, size_t bstep, int n);
+CV_EXPORTS bool Cholesky32f(float* A, size_t astep, int m, float* b, size_t bstep, int n);
+CV_EXPORTS bool Cholesky64f(double* A, size_t astep, int m, double* b, size_t bstep, int n);
+
+CV_EXPORTS int normL1_(const uchar* a, const uchar* b, int n);
+CV_EXPORTS float normL1_(const float* a, const float* b, int n);
+CV_EXPORTS float normL2Sqr_(const float* a, const float* b, int n);
+
+CV_EXPORTS void exp32f(const float* src, float* dst, int n);
+CV_EXPORTS void exp64f(const double* src, double* dst, int n);
+CV_EXPORTS void log32f(const float* src, float* dst, int n);
+CV_EXPORTS void log64f(const double* src, double* dst, int n);
+
+CV_EXPORTS void fastAtan2(const float* y, const float* x, float* dst, int n, bool angleInDegrees);
+CV_EXPORTS void magnitude32f(const float* x, const float* y, float* dst, int n);
+CV_EXPORTS void magnitude64f(const double* x, const double* y, double* dst, int n);
+CV_EXPORTS void sqrt32f(const float* src, float* dst, int len);
+CV_EXPORTS void sqrt64f(const double* src, double* dst, int len);
+CV_EXPORTS void invSqrt32f(const float* src, float* dst, int len);
+CV_EXPORTS void invSqrt64f(const double* src, double* dst, int len);
+
+CV_EXPORTS void split8u(const uchar* src, uchar** dst, int len, int cn );
+CV_EXPORTS void split16u(const ushort* src, ushort** dst, int len, int cn );
+CV_EXPORTS void split32s(const int* src, int** dst, int len, int cn );
+CV_EXPORTS void split64s(const int64* src, int64** dst, int len, int cn );
+
+CV_EXPORTS void merge8u(const uchar** src, uchar* dst, int len, int cn );
+CV_EXPORTS void merge16u(const ushort** src, ushort* dst, int len, int cn );
+CV_EXPORTS void merge32s(const int** src, int* dst, int len, int cn );
+CV_EXPORTS void merge64s(const int64** src, int64* dst, int len, int cn );
+
+CV_EXPORTS void add8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void add8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void add16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void add16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void add32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void add32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void add64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
+
+CV_EXPORTS void sub8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void sub8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void sub16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void sub16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void sub32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void sub32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void sub64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
+
+CV_EXPORTS void max8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void max8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void max16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void max16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void max32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void max32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void max64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
+
+CV_EXPORTS void min8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void min8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void min16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void min16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void min32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void min32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void min64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
+
+CV_EXPORTS void absdiff8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void absdiff8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void absdiff16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void absdiff16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void absdiff32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void absdiff32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void absdiff64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
+
+CV_EXPORTS void and8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void or8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void xor8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void not8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
+
+CV_EXPORTS void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
+CV_EXPORTS void cmp8s(const schar* src1, size_t step1, const schar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
+CV_EXPORTS void cmp16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
+CV_EXPORTS void cmp16s(const short* src1, size_t step1, const short* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
+CV_EXPORTS void cmp32s(const int* src1, size_t step1, const int* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
+CV_EXPORTS void cmp32f(const float* src1, size_t step1, const float* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
+CV_EXPORTS void cmp64f(const double* src1, size_t step1, const double* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
+
+CV_EXPORTS void mul8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void mul8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void mul16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void mul16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void mul32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void mul32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void mul64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scale);
+
+CV_EXPORTS void div8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void div8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void div16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void div16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void div32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void div32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void div64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scale);
+
+CV_EXPORTS void recip8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void recip8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void recip16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void recip16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void recip32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void recip32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void recip64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scale);
+
+CV_EXPORTS void addWeighted8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _scalars );
+CV_EXPORTS void addWeighted8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scalars );
+CV_EXPORTS void addWeighted16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scalars );
+CV_EXPORTS void addWeighted16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scalars );
+CV_EXPORTS void addWeighted32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scalars );
+CV_EXPORTS void addWeighted32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scalars );
+CV_EXPORTS void addWeighted64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scalars );
+
+//! @} core_hal
+
+//=============================================================================
+// for binary compatibility with 3.0
+
+//! @cond IGNORED
+
+CV_EXPORTS int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n);
+CV_EXPORTS int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n);
+CV_EXPORTS bool Cholesky(float* A, size_t astep, int m, float* b, size_t bstep, int n);
+CV_EXPORTS bool Cholesky(double* A, size_t astep, int m, double* b, size_t bstep, int n);
+
+CV_EXPORTS void exp(const float* src, float* dst, int n);
+CV_EXPORTS void exp(const double* src, double* dst, int n);
+CV_EXPORTS void log(const float* src, float* dst, int n);
+CV_EXPORTS void log(const double* src, double* dst, int n);
+
+CV_EXPORTS void magnitude(const float* x, const float* y, float* dst, int n);
+CV_EXPORTS void magnitude(const double* x, const double* y, double* dst, int n);
+CV_EXPORTS void sqrt(const float* src, float* dst, int len);
+CV_EXPORTS void sqrt(const double* src, double* dst, int len);
+CV_EXPORTS void invSqrt(const float* src, float* dst, int len);
+CV_EXPORTS void invSqrt(const double* src, double* dst, int len);
+
+//! @endcond
+
+}} //cv::hal
+
+#endif //__OPENCV_HAL_HPP__
#ifndef _HAL_INTERFACE_HPP_INCLUDED_
#define _HAL_INTERFACE_HPP_INCLUDED_
+//! @addtogroup core_hal_interface
+//! @{
+
#define CV_HAL_ERROR_OK 0
-#define CV_HAL_ERROR_NI 1
+#define CV_HAL_ERROR_NOT_IMPLEMENTED 1
#define CV_HAL_ERROR_UNKNOWN -1
#define CV_HAL_CMP_EQ 0
#define CV_HAL_CMP_NE 5
#ifdef __cplusplus
-namespace cv { namespace hal {
-
-namespace Error {
-
-enum
-{
- Ok = 0,
- NotImplemented = 1,
- Unknown = -1
-};
-
-}
-
-enum
-{
- CMP_EQ = 0,
- CMP_GT = 1,
- CMP_GE = 2,
- CMP_LT = 3,
- CMP_LE = 4,
- CMP_NE = 5
-};
-
-}}
-#endif
-
-#ifdef __cplusplus
#include <cstddef>
#else
#include <stddef.h>
# define CV_BIG_UINT(n) n##ULL
#endif
+//! @}
+
#endif
#include <cmath>
#include <float.h>
#include <stdlib.h>
-#include "opencv2/hal/defs.h"
+#include "opencv2/core/cvdef.h"
#define OPENCV_HAL_ADD(a, b) ((a) + (b))
#define OPENCV_HAL_AND(a, b) ((a) & (b))
// access from within opencv code more accessible
namespace cv {
-//! @addtogroup hal_intrin
+//! @addtogroup core_hal_intrin
//! @{
//! @cond IGNORED
#if CV_SSE2
-#include "opencv2/hal/intrin_sse.hpp"
+#include "opencv2/core/hal/intrin_sse.hpp"
#elif CV_NEON
-#include "opencv2/hal/intrin_neon.hpp"
+#include "opencv2/core/hal/intrin_neon.hpp"
#else
-#include "opencv2/hal/intrin_cpp.hpp"
+#include "opencv2/core/hal/intrin_cpp.hpp"
#endif
-//! @addtogroup hal_intrin
+//! @addtogroup core_hal_intrin
//! @{
#ifndef CV_SIMD128
#include <limits>
#include <cstring>
+#include <algorithm>
+#include "opencv2/core/saturate.hpp"
namespace cv
{
-/** @addtogroup hal_intrin
+/** @addtogroup core_hal_intrin
"Universal intrinsics" is a types and functions set intended to simplify vectorization of code on
different platforms. Currently there are two supported SIMD extensions: __SSE/SSE2__ on x86
typedef v_reg<int64, 2> v_int64x2;
//! @brief Helper macro
-//! @ingroup hal_intrin_impl
+//! @ingroup core_hal_intrin_impl
#define OPENCV_HAL_IMPL_BIN_OP(bin_op) \
template<typename _Tp, int n> inline v_reg<_Tp, n> \
operator bin_op (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
OPENCV_HAL_IMPL_BIN_OP(/)
//! @brief Helper macro
-//! @ingroup hal_intrin_impl
+//! @ingroup core_hal_intrin_impl
#define OPENCV_HAL_IMPL_BIT_OP(bit_op) \
template<typename _Tp, int n> inline v_reg<_Tp, n> operator bit_op \
(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
}
//! @brief Helper macro
-//! @ingroup hal_intrin_impl
+//! @ingroup core_hal_intrin_impl
#define OPENCV_HAL_IMPL_MATH_FUNC(func, cfunc, _Tp2) \
template<typename _Tp, int n> inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a) \
{ \
OPENCV_HAL_IMPL_MATH_FUNC(v_trunc, int, int)
//! @brief Helper macro
-//! @ingroup hal_intrin_impl
+//! @ingroup core_hal_intrin_impl
#define OPENCV_HAL_IMPL_MINMAX_FUNC(func, cfunc) \
template<typename _Tp, int n> inline v_reg<_Tp, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
{ \
}
//! @brief Helper macro
-//! @ingroup hal_intrin_impl
+//! @ingroup core_hal_intrin_impl
#define OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(func, cfunc) \
template<typename _Tp, int n> inline _Tp func(const v_reg<_Tp, n>& a) \
{ \
//! @endcond
//! @brief Helper macro
-//! @ingroup hal_intrin_impl
+//! @ingroup core_hal_intrin_impl
#define OPENCV_HAL_IMPL_CMP_OP(cmp_op) \
template<typename _Tp, int n> \
inline v_reg<_Tp, n> operator cmp_op(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
OPENCV_HAL_IMPL_CMP_OP(!=)
//! @brief Helper macro
-//! @ingroup hal_intrin_impl
+//! @ingroup core_hal_intrin_impl
#define OPENCV_HAL_IMPL_ADD_SUB_OP(func, bin_op, cast_op, _Tp2) \
template<typename _Tp, int n> \
inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
//! @endcond
//! @brief Helper macro
-//! @ingroup hal_intrin_impl
+//! @ingroup core_hal_intrin_impl
#define OPENCV_HAL_IMPL_SHIFT_OP(shift_op) \
template<typename _Tp, int n> inline v_reg<_Tp, n> operator shift_op(const v_reg<_Tp, n>& a, int imm) \
{ \
}
//! @brief Helper macro
-//! @ingroup hal_intrin_impl
+//! @ingroup core_hal_intrin_impl
#define OPENCV_HAL_IMPL_C_INIT_ZERO(_Tpvec, _Tp, suffix) \
inline _Tpvec v_setzero_##suffix() { return _Tpvec::zero(); }
//! @}
//! @brief Helper macro
-//! @ingroup hal_intrin_impl
+//! @ingroup core_hal_intrin_impl
#define OPENCV_HAL_IMPL_C_INIT_VAL(_Tpvec, _Tp, suffix) \
inline _Tpvec v_setall_##suffix(_Tp val) { return _Tpvec::all(val); }
//! @}
//! @brief Helper macro
-//! @ingroup hal_intrin_impl
+//! @ingroup core_hal_intrin_impl
#define OPENCV_HAL_IMPL_C_REINTERPRET(_Tpvec, _Tp, suffix) \
template<typename _Tp0, int n0> inline _Tpvec \
v_reinterpret_as_##suffix(const v_reg<_Tp0, n0>& a) \
//! @}
//! @brief Helper macro
-//! @ingroup hal_intrin_impl
+//! @ingroup core_hal_intrin_impl
#define OPENCV_HAL_IMPL_C_SHIFTL(_Tpvec, _Tp) \
template<int n> inline _Tpvec v_shl(const _Tpvec& a) \
{ return a << n; }
//! @}
//! @brief Helper macro
-//! @ingroup hal_intrin_impl
+//! @ingroup core_hal_intrin_impl
#define OPENCV_HAL_IMPL_C_SHIFTR(_Tpvec, _Tp) \
template<int n> inline _Tpvec v_shr(const _Tpvec& a) \
{ return a >> n; }
//! @}
//! @brief Helper macro
-//! @ingroup hal_intrin_impl
+//! @ingroup core_hal_intrin_impl
#define OPENCV_HAL_IMPL_C_RSHIFTR(_Tpvec, _Tp) \
template<int n> inline _Tpvec v_rshr(const _Tpvec& a) \
{ \
//! @}
//! @brief Helper macro
-//! @ingroup hal_intrin_impl
+//! @ingroup core_hal_intrin_impl
#define OPENCV_HAL_IMPL_C_PACK(_Tpvec, _Tpnvec, _Tpn, pack_suffix) \
inline _Tpnvec v_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \
{ \
//! @}
//! @brief Helper macro
-//! @ingroup hal_intrin_impl
+//! @ingroup core_hal_intrin_impl
#define OPENCV_HAL_IMPL_C_RSHR_PACK(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix) \
template<int n> inline _Tpnvec v_rshr_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \
{ \
//! @}
//! @brief Helper macro
-//! @ingroup hal_intrin_impl
+//! @ingroup core_hal_intrin_impl
#define OPENCV_HAL_IMPL_C_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix) \
inline void v_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \
{ \
//! @}
//! @brief Helper macro
-//! @ingroup hal_intrin_impl
+//! @ingroup core_hal_intrin_impl
#define OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix) \
template<int n> inline void v_rshr_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \
{ \
#ifndef __OPENCV_HAL_INTRIN_NEON_HPP__
#define __OPENCV_HAL_INTRIN_NEON_HPP__
+#include <algorithm>
+
namespace cv
{
#ifndef __OPENCV_HAL_SSE_HPP__
#define __OPENCV_HAL_SSE_HPP__
+#include <algorithm>
+
#define CV_SIMD128 1
#define CV_SIMD128_64F 1
#include "opencv2/core/cvdef.h"
#include "opencv2/core/base.hpp"
#include "opencv2/core/traits.hpp"
+#include "opencv2/core/saturate.hpp"
namespace cv
{
#ifndef __OPENCV_HAL_NEON_UTILS_HPP__
#define __OPENCV_HAL_NEON_UTILS_HPP__
-#include "opencv2/hal/defs.h"
+#include "opencv2/core/cvdef.h"
-namespace cv {
+//! @addtogroup core_utils_neon
+//! @{
#if CV_NEON
#endif
-}
+//! @}
#endif // __OPENCV_HAL_NEON_UTILS_HPP__
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Copyright (C) 2014, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_CORE_SATURATE_HPP__
+#define __OPENCV_CORE_SATURATE_HPP__
+
+#include "opencv2/core/cvdef.h"
+#include "opencv2/core/fast_math.hpp"
+
+namespace cv
+{
+
+//! @addtogroup core_utils
+//! @{
+
+/////////////// saturate_cast (used in image & signal processing) ///////////////////
+
+/** @brief Template function for accurate conversion from one primitive type to another.
+
+ The functions saturate_cast resemble the standard C++ cast operations, such as static_cast\<T\>()
+ and others. They perform an efficient and accurate conversion from one primitive type to another
+ (see the introduction chapter). saturate in the name means that when the input value v is out of the
+ range of the target type, the result is not formed just by taking low bits of the input, but instead
+ the value is clipped. For example:
+ @code
+ uchar a = saturate_cast<uchar>(-100); // a = 0 (UCHAR_MIN)
+ short b = saturate_cast<short>(33333.33333); // b = 32767 (SHRT_MAX)
+ @endcode
+ Such clipping is done when the target type is unsigned char , signed char , unsigned short or
+ signed short . For 32-bit integers, no clipping is done.
+
+ When the parameter is a floating-point value and the target type is an integer (8-, 16- or 32-bit),
+ the floating-point value is first rounded to the nearest integer and then clipped if needed (when
+ the target type is 8- or 16-bit).
+
+ This operation is used in the simplest or most complex image processing functions in OpenCV.
+
+ @param v Function parameter.
+ @sa add, subtract, multiply, divide, Mat::convertTo
+ */
+template<typename _Tp> static inline _Tp saturate_cast(uchar v) { return _Tp(v); }
+/** @overload */
+template<typename _Tp> static inline _Tp saturate_cast(schar v) { return _Tp(v); }
+/** @overload */
+template<typename _Tp> static inline _Tp saturate_cast(ushort v) { return _Tp(v); }
+/** @overload */
+template<typename _Tp> static inline _Tp saturate_cast(short v) { return _Tp(v); }
+/** @overload */
+template<typename _Tp> static inline _Tp saturate_cast(unsigned v) { return _Tp(v); }
+/** @overload */
+template<typename _Tp> static inline _Tp saturate_cast(int v) { return _Tp(v); }
+/** @overload */
+template<typename _Tp> static inline _Tp saturate_cast(float v) { return _Tp(v); }
+/** @overload */
+template<typename _Tp> static inline _Tp saturate_cast(double v) { return _Tp(v); }
+/** @overload */
+template<typename _Tp> static inline _Tp saturate_cast(int64 v) { return _Tp(v); }
+/** @overload */
+template<typename _Tp> static inline _Tp saturate_cast(uint64 v) { return _Tp(v); }
+
+template<> inline uchar saturate_cast<uchar>(schar v) { return (uchar)std::max((int)v, 0); }
+template<> inline uchar saturate_cast<uchar>(ushort v) { return (uchar)std::min((unsigned)v, (unsigned)UCHAR_MAX); }
+template<> inline uchar saturate_cast<uchar>(int v) { return (uchar)((unsigned)v <= UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); }
+template<> inline uchar saturate_cast<uchar>(short v) { return saturate_cast<uchar>((int)v); }
+template<> inline uchar saturate_cast<uchar>(unsigned v) { return (uchar)std::min(v, (unsigned)UCHAR_MAX); }
+template<> inline uchar saturate_cast<uchar>(float v) { int iv = cvRound(v); return saturate_cast<uchar>(iv); }
+template<> inline uchar saturate_cast<uchar>(double v) { int iv = cvRound(v); return saturate_cast<uchar>(iv); }
+template<> inline uchar saturate_cast<uchar>(int64 v) { return (uchar)((uint64)v <= (uint64)UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); }
+template<> inline uchar saturate_cast<uchar>(uint64 v) { return (uchar)std::min(v, (uint64)UCHAR_MAX); }
+
+template<> inline schar saturate_cast<schar>(uchar v) { return (schar)std::min((int)v, SCHAR_MAX); }
+template<> inline schar saturate_cast<schar>(ushort v) { return (schar)std::min((unsigned)v, (unsigned)SCHAR_MAX); }
+template<> inline schar saturate_cast<schar>(int v) { return (schar)((unsigned)(v-SCHAR_MIN) <= (unsigned)UCHAR_MAX ? v : v > 0 ? SCHAR_MAX : SCHAR_MIN); }
+template<> inline schar saturate_cast<schar>(short v) { return saturate_cast<schar>((int)v); }
+template<> inline schar saturate_cast<schar>(unsigned v) { return (schar)std::min(v, (unsigned)SCHAR_MAX); }
+template<> inline schar saturate_cast<schar>(float v) { int iv = cvRound(v); return saturate_cast<schar>(iv); }
+template<> inline schar saturate_cast<schar>(double v) { int iv = cvRound(v); return saturate_cast<schar>(iv); }
+template<> inline schar saturate_cast<schar>(int64 v) { return (schar)((uint64)((int64)v-SCHAR_MIN) <= (uint64)UCHAR_MAX ? v : v > 0 ? SCHAR_MAX : SCHAR_MIN); }
+template<> inline schar saturate_cast<schar>(uint64 v) { return (schar)std::min(v, (uint64)SCHAR_MAX); }
+
+template<> inline ushort saturate_cast<ushort>(schar v) { return (ushort)std::max((int)v, 0); }
+template<> inline ushort saturate_cast<ushort>(short v) { return (ushort)std::max((int)v, 0); }
+template<> inline ushort saturate_cast<ushort>(int v) { return (ushort)((unsigned)v <= (unsigned)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0); }
+template<> inline ushort saturate_cast<ushort>(unsigned v) { return (ushort)std::min(v, (unsigned)USHRT_MAX); }
+template<> inline ushort saturate_cast<ushort>(float v) { int iv = cvRound(v); return saturate_cast<ushort>(iv); }
+template<> inline ushort saturate_cast<ushort>(double v) { int iv = cvRound(v); return saturate_cast<ushort>(iv); }
+template<> inline ushort saturate_cast<ushort>(int64 v) { return (ushort)((uint64)v <= (uint64)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0); }
+template<> inline ushort saturate_cast<ushort>(uint64 v) { return (ushort)std::min(v, (uint64)USHRT_MAX); }
+
+template<> inline short saturate_cast<short>(ushort v) { return (short)std::min((int)v, SHRT_MAX); }
+template<> inline short saturate_cast<short>(int v) { return (short)((unsigned)(v - SHRT_MIN) <= (unsigned)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN); }
+template<> inline short saturate_cast<short>(unsigned v) { return (short)std::min(v, (unsigned)SHRT_MAX); }
+template<> inline short saturate_cast<short>(float v) { int iv = cvRound(v); return saturate_cast<short>(iv); }
+template<> inline short saturate_cast<short>(double v) { int iv = cvRound(v); return saturate_cast<short>(iv); }
+template<> inline short saturate_cast<short>(int64 v) { return (short)((uint64)((int64)v - SHRT_MIN) <= (uint64)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN); }
+template<> inline short saturate_cast<short>(uint64 v) { return (short)std::min(v, (uint64)SHRT_MAX); }
+
+template<> inline int saturate_cast<int>(float v) { return cvRound(v); }
+template<> inline int saturate_cast<int>(double v) { return cvRound(v); }
+
+// we intentionally do not clip negative numbers, to make -1 become 0xffffffff etc.
+template<> inline unsigned saturate_cast<unsigned>(float v) { return cvRound(v); }
+template<> inline unsigned saturate_cast<unsigned>(double v) { return cvRound(v); }
+
+//! @}
+
+} // cv
+
+#endif // __OPENCV_CORE_SATURATE_HPP__
# error sse_utils.hpp header must be compiled as C++
#endif
-#include "opencv2/hal/defs.h"
+#include "opencv2/core/cvdef.h"
+
+//! @addtogroup core_utils_sse
+//! @{
#if CV_SSE2
#endif // CV_SSE2
+//! @}
+
#endif //__OPENCV_CORE_SSE_UTILS_HPP__
cv::max( src1, value, dst );
}
+
+
+namespace cv { namespace hal {
+
+//=======================================
+
+#if (ARITHM_USE_IPP == 1)
+static inline void fixSteps(int width, int height, size_t elemSize, size_t& step1, size_t& step2, size_t& step)
+{
+ if( height == 1 )
+ step1 = step2 = step = width*elemSize;
+}
+#define CALL_IPP_BIN_E_12(fun) \
+ CV_IPP_CHECK() \
+ { \
+ fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \
+ if (0 <= fun(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height), 0)) \
+ { \
+ CV_IMPL_ADD(CV_IMPL_IPP); \
+ return; \
+ } \
+ setIppErrorStatus(); \
+ }
+
+#define CALL_IPP_BIN_E_21(fun) \
+ CV_IPP_CHECK() \
+ { \
+ fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \
+ if (0 <= fun(src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(width, height), 0)) \
+ { \
+ CV_IMPL_ADD(CV_IMPL_IPP); \
+ return; \
+ } \
+ setIppErrorStatus(); \
+ }
+
+#define CALL_IPP_BIN_12(fun) \
+ CV_IPP_CHECK() \
+ { \
+ fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \
+ if (0 <= fun(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height))) \
+ { \
+ CV_IMPL_ADD(CV_IMPL_IPP); \
+ return; \
+ } \
+ setIppErrorStatus(); \
+ }
+
+#define CALL_IPP_BIN_21(fun) \
+ CV_IPP_CHECK() \
+ { \
+ fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \
+ if (0 <= fun(src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(width, height))) \
+ { \
+ CV_IMPL_ADD(CV_IMPL_IPP); \
+ return; \
+ } \
+ setIppErrorStatus(); \
+ }
+
+#else
+#define CALL_IPP_BIN_E_12(fun)
+#define CALL_IPP_BIN_E_21(fun)
+#define CALL_IPP_BIN_12(fun)
+#define CALL_IPP_BIN_21(fun)
+#endif
+
+
+//=======================================
+// Add
+//=======================================
+
+void add8u( const uchar* src1, size_t step1,
+ const uchar* src2, size_t step2,
+ uchar* dst, size_t step, int width, int height, void* )
+{
+ CALL_HAL(add8u, cv_hal_add8u, src1, step1, src2, step2, dst, step, width, height)
+ CALL_IPP_BIN_E_12(ippiAdd_8u_C1RSfs)
+ (vBinOp<uchar, cv::OpAdd<uchar>, IF_SIMD(VAdd<uchar>)>(src1, step1, src2, step2, dst, step, width, height));
+}
+
+void add8s( const schar* src1, size_t step1,
+ const schar* src2, size_t step2,
+ schar* dst, size_t step, int width, int height, void* )
+{
+ CALL_HAL(add8s, cv_hal_add8s, src1, step1, src2, step2, dst, step, width, height)
+ vBinOp<schar, cv::OpAdd<schar>, IF_SIMD(VAdd<schar>)>(src1, step1, src2, step2, dst, step, width, height);
+}
+
+void add16u( const ushort* src1, size_t step1,
+ const ushort* src2, size_t step2,
+ ushort* dst, size_t step, int width, int height, void* )
+{
+ CALL_HAL(add16u, cv_hal_add16u, src1, step1, src2, step2, dst, step, width, height)
+ CALL_IPP_BIN_E_12(ippiAdd_16u_C1RSfs)
+ (vBinOp<ushort, cv::OpAdd<ushort>, IF_SIMD(VAdd<ushort>)>(src1, step1, src2, step2, dst, step, width, height));
+}
+
+void add16s( const short* src1, size_t step1,
+ const short* src2, size_t step2,
+ short* dst, size_t step, int width, int height, void* )
+{
+ CALL_HAL(add16s, cv_hal_add16s, src1, step1, src2, step2, dst, step, width, height)
+ CALL_IPP_BIN_E_12(ippiAdd_16s_C1RSfs)
+ (vBinOp<short, cv::OpAdd<short>, IF_SIMD(VAdd<short>)>(src1, step1, src2, step2, dst, step, width, height));
+}
+
+void add32s( const int* src1, size_t step1,
+ const int* src2, size_t step2,
+ int* dst, size_t step, int width, int height, void* )
+{
+ CALL_HAL(add32s, cv_hal_add32s, src1, step1, src2, step2, dst, step, width, height)
+ vBinOp32<int, cv::OpAdd<int>, IF_SIMD(VAdd<int>)>(src1, step1, src2, step2, dst, step, width, height);
+}
+
+void add32f( const float* src1, size_t step1,
+ const float* src2, size_t step2,
+ float* dst, size_t step, int width, int height, void* )
+{
+ CALL_HAL(add32f, cv_hal_add32f, src1, step1, src2, step2, dst, step, width, height)
+ CALL_IPP_BIN_12(ippiAdd_32f_C1R)
+ (vBinOp32<float, cv::OpAdd<float>, IF_SIMD(VAdd<float>)>(src1, step1, src2, step2, dst, step, width, height));
+}
+
+void add64f( const double* src1, size_t step1,
+ const double* src2, size_t step2,
+ double* dst, size_t step, int width, int height, void* )
+{
+ CALL_HAL(add64f, cv_hal_add64f, src1, step1, src2, step2, dst, step, width, height)
+ vBinOp64<double, cv::OpAdd<double>, IF_SIMD(VAdd<double>)>(src1, step1, src2, step2, dst, step, width, height);
+}
+
+//=======================================
+// Subtract
+//=======================================
+
+void sub8u( const uchar* src1, size_t step1,
+ const uchar* src2, size_t step2,
+ uchar* dst, size_t step, int width, int height, void* )
+{
+ CALL_HAL(sub8u, cv_hal_sub8u, src1, step1, src2, step2, dst, step, width, height)
+ CALL_IPP_BIN_E_21(ippiSub_8u_C1RSfs)
+ (vBinOp<uchar, cv::OpSub<uchar>, IF_SIMD(VSub<uchar>)>(src1, step1, src2, step2, dst, step, width, height));
+}
+
+void sub8s( const schar* src1, size_t step1,
+ const schar* src2, size_t step2,
+ schar* dst, size_t step, int width, int height, void* )
+{
+ CALL_HAL(sub8s, cv_hal_sub8s, src1, step1, src2, step2, dst, step, width, height)
+ vBinOp<schar, cv::OpSub<schar>, IF_SIMD(VSub<schar>)>(src1, step1, src2, step2, dst, step, width, height);
+}
+
+void sub16u( const ushort* src1, size_t step1,
+ const ushort* src2, size_t step2,
+ ushort* dst, size_t step, int width, int height, void* )
+{
+ CALL_HAL(sub16u, cv_hal_sub16u, src1, step1, src2, step2, dst, step, width, height)
+ CALL_IPP_BIN_E_21(ippiSub_16u_C1RSfs)
+ (vBinOp<ushort, cv::OpSub<ushort>, IF_SIMD(VSub<ushort>)>(src1, step1, src2, step2, dst, step, width, height));
+}
+
+void sub16s( const short* src1, size_t step1,
+ const short* src2, size_t step2,
+ short* dst, size_t step, int width, int height, void* )
+{
+ CALL_HAL(sub16s, cv_hal_sub16s, src1, step1, src2, step2, dst, step, width, height)
+ CALL_IPP_BIN_E_21(ippiSub_16s_C1RSfs)
+ (vBinOp<short, cv::OpSub<short>, IF_SIMD(VSub<short>)>(src1, step1, src2, step2, dst, step, width, height));
+}
+
+void sub32s( const int* src1, size_t step1,
+ const int* src2, size_t step2,
+ int* dst, size_t step, int width, int height, void* )
+{
+ CALL_HAL(sub32s, cv_hal_sub32s, src1, step1, src2, step2, dst, step, width, height)
+ vBinOp32<int, cv::OpSub<int>, IF_SIMD(VSub<int>)>(src1, step1, src2, step2, dst, step, width, height);
+}
+
+void sub32f( const float* src1, size_t step1,
+ const float* src2, size_t step2,
+ float* dst, size_t step, int width, int height, void* )
+{
+ CALL_HAL(sub32f, cv_hal_sub32f, src1, step1, src2, step2, dst, step, width, height)
+ CALL_IPP_BIN_21(ippiSub_32f_C1R)
+ (vBinOp32<float, cv::OpSub<float>, IF_SIMD(VSub<float>)>(src1, step1, src2, step2, dst, step, width, height));
+}
+
+void sub64f( const double* src1, size_t step1,
+ const double* src2, size_t step2,
+ double* dst, size_t step, int width, int height, void* )
+{
+ CALL_HAL(sub64f, cv_hal_sub64f, src1, step1, src2, step2, dst, step, width, height)
+ vBinOp64<double, cv::OpSub<double>, IF_SIMD(VSub<double>)>(src1, step1, src2, step2, dst, step, width, height);
+}
+
+//=======================================
+
+#if (ARITHM_USE_IPP == 1)
+#define CALL_IPP_MIN_MAX(fun, type) \
+ CV_IPP_CHECK() \
+ { \
+ type* s1 = (type*)src1; \
+ type* s2 = (type*)src2; \
+ type* d = dst; \
+ fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \
+ int i = 0; \
+ for(; i < height; i++) \
+ { \
+ if (0 > fun(s1, s2, d, width)) \
+ break; \
+ s1 = (type*)((uchar*)s1 + step1); \
+ s2 = (type*)((uchar*)s2 + step2); \
+ d = (type*)((uchar*)d + step); \
+ } \
+ if (i == height) \
+ { \
+ CV_IMPL_ADD(CV_IMPL_IPP); \
+ return; \
+ } \
+ setIppErrorStatus(); \
+ }
+#else
+#define CALL_IPP_MIN_MAX(fun, type)
+#endif
+
+//=======================================
+// Max
+//=======================================
+
+void max8u( const uchar* src1, size_t step1,
+ const uchar* src2, size_t step2,
+ uchar* dst, size_t step, int width, int height, void* )
+{
+ CALL_HAL(max8u, cv_hal_max8u, src1, step1, src2, step2, dst, step, width, height)
+ CALL_IPP_MIN_MAX(ippsMaxEvery_8u, uchar)
+ vBinOp<uchar, cv::OpMax<uchar>, IF_SIMD(VMax<uchar>)>(src1, step1, src2, step2, dst, step, width, height);
+}
+
+void max8s( const schar* src1, size_t step1,
+ const schar* src2, size_t step2,
+ schar* dst, size_t step, int width, int height, void* )
+{
+ CALL_HAL(max8s, cv_hal_max8s, src1, step1, src2, step2, dst, step, width, height)
+ vBinOp<schar, cv::OpMax<schar>, IF_SIMD(VMax<schar>)>(src1, step1, src2, step2, dst, step, width, height);
+}
+
+void max16u( const ushort* src1, size_t step1,
+ const ushort* src2, size_t step2,
+ ushort* dst, size_t step, int width, int height, void* )
+{
+ CALL_HAL(max16u, cv_hal_max16u, src1, step1, src2, step2, dst, step, width, height)
+ CALL_IPP_MIN_MAX(ippsMaxEvery_16u, ushort)
+ vBinOp<ushort, cv::OpMax<ushort>, IF_SIMD(VMax<ushort>)>(src1, step1, src2, step2, dst, step, width, height);
+}
+
+void max16s( const short* src1, size_t step1,
+ const short* src2, size_t step2,
+ short* dst, size_t step, int width, int height, void* )
+{
+ CALL_HAL(max16s, cv_hal_max16s, src1, step1, src2, step2, dst, step, width, height)
+ vBinOp<short, cv::OpMax<short>, IF_SIMD(VMax<short>)>(src1, step1, src2, step2, dst, step, width, height);
+}
+
+void max32s( const int* src1, size_t step1,
+ const int* src2, size_t step2,
+ int* dst, size_t step, int width, int height, void* )
+{
+ CALL_HAL(max32s, cv_hal_max32s, src1, step1, src2, step2, dst, step, width, height)
+ vBinOp32<int, cv::OpMax<int>, IF_SIMD(VMax<int>)>(src1, step1, src2, step2, dst, step, width, height);
+}
+
+void max32f( const float* src1, size_t step1,
+ const float* src2, size_t step2,
+ float* dst, size_t step, int width, int height, void* )
+{
+ CALL_HAL(max32f, cv_hal_max32f, src1, step1, src2, step2, dst, step, width, height)
+ CALL_IPP_MIN_MAX(ippsMaxEvery_32f, float)
+ vBinOp32<float, cv::OpMax<float>, IF_SIMD(VMax<float>)>(src1, step1, src2, step2, dst, step, width, height);
+}
+
+void max64f( const double* src1, size_t step1,
+ const double* src2, size_t step2,
+ double* dst, size_t step, int width, int height, void* )
+{
+ CALL_HAL(max64f, cv_hal_max64f, src1, step1, src2, step2, dst, step, width, height)
+ CALL_IPP_MIN_MAX(ippsMaxEvery_64f, double)
+ vBinOp64<double, cv::OpMax<double>, IF_SIMD(VMax<double>)>(src1, step1, src2, step2, dst, step, width, height);
+}
+
+//=======================================
+// Min
+//=======================================
+
+void min8u( const uchar* src1, size_t step1,
+ const uchar* src2, size_t step2,
+ uchar* dst, size_t step, int width, int height, void* )
+{
+ CALL_HAL(min8u, cv_hal_min8u, src1, step1, src2, step2, dst, step, width, height)
+ CALL_IPP_MIN_MAX(ippsMinEvery_8u, uchar)
+ vBinOp<uchar, cv::OpMin<uchar>, IF_SIMD(VMin<uchar>)>(src1, step1, src2, step2, dst, step, width, height);
+}
+
+void min8s( const schar* src1, size_t step1,
+ const schar* src2, size_t step2,
+ schar* dst, size_t step, int width, int height, void* )
+{
+ CALL_HAL(min8s, cv_hal_min8s, src1, step1, src2, step2, dst, step, width, height)
+ vBinOp<schar, cv::OpMin<schar>, IF_SIMD(VMin<schar>)>(src1, step1, src2, step2, dst, step, width, height);
+}
+
+void min16u( const ushort* src1, size_t step1,
+ const ushort* src2, size_t step2,
+ ushort* dst, size_t step, int width, int height, void* )
+{
+ CALL_HAL(min16u, cv_hal_min16u, src1, step1, src2, step2, dst, step, width, height)
+ CALL_IPP_MIN_MAX(ippsMinEvery_16u, ushort)
+ vBinOp<ushort, cv::OpMin<ushort>, IF_SIMD(VMin<ushort>)>(src1, step1, src2, step2, dst, step, width, height);
+}
+
+void min16s( const short* src1, size_t step1,
+ const short* src2, size_t step2,
+ short* dst, size_t step, int width, int height, void* )
+{
+ CALL_HAL(min16s, cv_hal_min16s, src1, step1, src2, step2, dst, step, width, height)
+ vBinOp<short, cv::OpMin<short>, IF_SIMD(VMin<short>)>(src1, step1, src2, step2, dst, step, width, height);
+}
+
+void min32s( const int* src1, size_t step1,
+ const int* src2, size_t step2,
+ int* dst, size_t step, int width, int height, void* )
+{
+ CALL_HAL(min32s, cv_hal_min32s, src1, step1, src2, step2, dst, step, width, height)
+ vBinOp32<int, cv::OpMin<int>, IF_SIMD(VMin<int>)>(src1, step1, src2, step2, dst, step, width, height);
+}
+
+void min32f( const float* src1, size_t step1,
+ const float* src2, size_t step2,
+ float* dst, size_t step, int width, int height, void* )
+{
+ CALL_HAL(min32f, cv_hal_min32f, src1, step1, src2, step2, dst, step, width, height)
+ CALL_IPP_MIN_MAX(ippsMinEvery_32f, float)
+ vBinOp32<float, cv::OpMin<float>, IF_SIMD(VMin<float>)>(src1, step1, src2, step2, dst, step, width, height);
+}
+
+void min64f( const double* src1, size_t step1,
+ const double* src2, size_t step2,
+ double* dst, size_t step, int width, int height, void* )
+{
+ CALL_HAL(min64f, cv_hal_min64f, src1, step1, src2, step2, dst, step, width, height)
+ CALL_IPP_MIN_MAX(ippsMinEvery_64f, double)
+ vBinOp64<double, cv::OpMin<double>, IF_SIMD(VMin<double>)>(src1, step1, src2, step2, dst, step, width, height);
+}
+
+//=======================================
+// AbsDiff
+//=======================================
+
+void absdiff8u( const uchar* src1, size_t step1,
+ const uchar* src2, size_t step2,
+ uchar* dst, size_t step, int width, int height, void* )
+{
+ CALL_HAL(absdiff8u, cv_hal_absdiff8u, src1, step1, src2, step2, dst, step, width, height)
+ CALL_IPP_BIN_12(ippiAbsDiff_8u_C1R)
+ (vBinOp<uchar, cv::OpAbsDiff<uchar>, IF_SIMD(VAbsDiff<uchar>)>(src1, step1, src2, step2, dst, step, width, height));
+}
+
+void absdiff8s( const schar* src1, size_t step1,
+ const schar* src2, size_t step2,
+ schar* dst, size_t step, int width, int height, void* )
+{
+ CALL_HAL(absdiff8s, cv_hal_absdiff8s, src1, step1, src2, step2, dst, step, width, height)
+ vBinOp<schar, cv::OpAbsDiff<schar>, IF_SIMD(VAbsDiff<schar>)>(src1, step1, src2, step2, dst, step, width, height);
+}
+
+void absdiff16u( const ushort* src1, size_t step1,
+ const ushort* src2, size_t step2,
+ ushort* dst, size_t step, int width, int height, void* )
+{
+ CALL_HAL(absdiff16u, cv_hal_absdiff16u, src1, step1, src2, step2, dst, step, width, height)
+ CALL_IPP_BIN_12(ippiAbsDiff_16u_C1R)
+ (vBinOp<ushort, cv::OpAbsDiff<ushort>, IF_SIMD(VAbsDiff<ushort>)>(src1, step1, src2, step2, dst, step, width, height));
+}
+
+void absdiff16s( const short* src1, size_t step1,
+ const short* src2, size_t step2,
+ short* dst, size_t step, int width, int height, void* )
+{
+ CALL_HAL(absdiff16s, cv_hal_absdiff16s, src1, step1, src2, step2, dst, step, width, height)
+ vBinOp<short, cv::OpAbsDiff<short>, IF_SIMD(VAbsDiff<short>)>(src1, step1, src2, step2, dst, step, width, height);
+}
+
+void absdiff32s( const int* src1, size_t step1,
+ const int* src2, size_t step2,
+ int* dst, size_t step, int width, int height, void* )
+{
+ CALL_HAL(absdiff32s, cv_hal_absdiff32s, src1, step1, src2, step2, dst, step, width, height)
+ vBinOp32<int, cv::OpAbsDiff<int>, IF_SIMD(VAbsDiff<int>)>(src1, step1, src2, step2, dst, step, width, height);
+}
+
+void absdiff32f( const float* src1, size_t step1,
+ const float* src2, size_t step2,
+ float* dst, size_t step, int width, int height, void* )
+{
+ CALL_HAL(absdiff32f, cv_hal_absdiff32f, src1, step1, src2, step2, dst, step, width, height)
+ CALL_IPP_BIN_12(ippiAbsDiff_32f_C1R)
+ (vBinOp32<float, cv::OpAbsDiff<float>, IF_SIMD(VAbsDiff<float>)>(src1, step1, src2, step2, dst, step, width, height));
+}
+
+void absdiff64f( const double* src1, size_t step1,
+ const double* src2, size_t step2,
+ double* dst, size_t step, int width, int height, void* )
+{
+ CALL_HAL(absdiff64f, cv_hal_absdiff64f, src1, step1, src2, step2, dst, step, width, height)
+ vBinOp64<double, cv::OpAbsDiff<double>, IF_SIMD(VAbsDiff<double>)>(src1, step1, src2, step2, dst, step, width, height);
+}
+
+//=======================================
+// Logical
+//=======================================
+
+#if (ARITHM_USE_IPP == 1)
+#define CALL_IPP_UN(fun) \
+ CV_IPP_CHECK() \
+ { \
+ fixSteps(width, height, sizeof(dst[0]), step1, step2, step); (void)src2; \
+ if (0 <= fun(src1, (int)step1, dst, (int)step, ippiSize(width, height))) \
+ { \
+ CV_IMPL_ADD(CV_IMPL_IPP); \
+ return; \
+ } \
+ setIppErrorStatus(); \
+ }
+#else
+#define CALL_IPP_UN(fun)
+#endif
+
+void and8u( const uchar* src1, size_t step1,
+ const uchar* src2, size_t step2,
+ uchar* dst, size_t step, int width, int height, void* )
+{
+ CALL_HAL(and8u, cv_hal_and8u, src1, step1, src2, step2, dst, step, width, height)
+ CALL_IPP_BIN_12(ippiAnd_8u_C1R)
+ (vBinOp<uchar, cv::OpAnd<uchar>, IF_SIMD(VAnd<uchar>)>(src1, step1, src2, step2, dst, step, width, height));
+}
+
+void or8u( const uchar* src1, size_t step1,
+ const uchar* src2, size_t step2,
+ uchar* dst, size_t step, int width, int height, void* )
+{
+ CALL_HAL(or8u, cv_hal_or8u, src1, step1, src2, step2, dst, step, width, height)
+ CALL_IPP_BIN_12(ippiOr_8u_C1R)
+ (vBinOp<uchar, cv::OpOr<uchar>, IF_SIMD(VOr<uchar>)>(src1, step1, src2, step2, dst, step, width, height));
+}
+
+void xor8u( const uchar* src1, size_t step1,
+ const uchar* src2, size_t step2,
+ uchar* dst, size_t step, int width, int height, void* )
+{
+ CALL_HAL(xor8u, cv_hal_xor8u, src1, step1, src2, step2, dst, step, width, height)
+ CALL_IPP_BIN_12(ippiXor_8u_C1R)
+ (vBinOp<uchar, cv::OpXor<uchar>, IF_SIMD(VXor<uchar>)>(src1, step1, src2, step2, dst, step, width, height));
+}
+
+void not8u( const uchar* src1, size_t step1,
+ const uchar* src2, size_t step2,
+ uchar* dst, size_t step, int width, int height, void* )
+{
+ CALL_HAL(not8u, cv_hal_not8u, src1, step1, dst, step, width, height)
+ CALL_IPP_UN(ippiNot_8u_C1R)
+ (vBinOp<uchar, cv::OpNot<uchar>, IF_SIMD(VNot<uchar>)>(src1, step1, src2, step2, dst, step, width, height));
+}
+
+//=======================================
+
+#if ARITHM_USE_IPP
+inline static IppCmpOp convert_cmp(int _cmpop)
+{
+ return _cmpop == CMP_EQ ? ippCmpEq :
+ _cmpop == CMP_GT ? ippCmpGreater :
+ _cmpop == CMP_GE ? ippCmpGreaterEq :
+ _cmpop == CMP_LT ? ippCmpLess :
+ _cmpop == CMP_LE ? ippCmpLessEq :
+ (IppCmpOp)-1;
+}
+#define CALL_IPP_CMP(fun) \
+ CV_IPP_CHECK() \
+ { \
+ IppCmpOp op = convert_cmp(*(int *)_cmpop); \
+ if( op >= 0 ) \
+ { \
+ fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \
+ if (0 <= fun(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height), op)) \
+ { \
+ CV_IMPL_ADD(CV_IMPL_IPP); \
+ return; \
+ } \
+ setIppErrorStatus(); \
+ } \
+ }
+#else
+#define CALL_IPP_CMP(fun)
+#endif
+
+//=======================================
+// Compare
+//=======================================
+
+void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
+ uchar* dst, size_t step, int width, int height, void* _cmpop)
+{
+ CALL_HAL(cmp8u, cv_hal_cmp8u, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop)
+ CALL_IPP_CMP(ippiCompare_8u_C1R)
+ //vz optimized cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop);
+ int code = *(int*)_cmpop;
+ step1 /= sizeof(src1[0]);
+ step2 /= sizeof(src2[0]);
+ if( code == CMP_GE || code == CMP_LT )
+ {
+ std::swap(src1, src2);
+ std::swap(step1, step2);
+ code = code == CMP_GE ? CMP_LE : CMP_GT;
+ }
+
+ if( code == CMP_GT || code == CMP_LE )
+ {
+ int m = code == CMP_GT ? 0 : 255;
+ for( ; height--; src1 += step1, src2 += step2, dst += step )
+ {
+ int x =0;
+ #if CV_SSE2
+ if( USE_SSE2 )
+ {
+ __m128i m128 = code == CMP_GT ? _mm_setzero_si128() : _mm_set1_epi8 (-1);
+ __m128i c128 = _mm_set1_epi8 (-128);
+ for( ; x <= width - 16; x += 16 )
+ {
+ __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x));
+ __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x));
+ // no simd for 8u comparison, that's why we need the trick
+ r00 = _mm_sub_epi8(r00,c128);
+ r10 = _mm_sub_epi8(r10,c128);
+
+ r00 =_mm_xor_si128(_mm_cmpgt_epi8(r00, r10), m128);
+ _mm_storeu_si128((__m128i*)(dst + x),r00);
+
+ }
+ }
+ #elif CV_NEON
+ uint8x16_t mask = code == CMP_GT ? vdupq_n_u8(0) : vdupq_n_u8(255);
+
+ for( ; x <= width - 16; x += 16 )
+ {
+ vst1q_u8(dst+x, veorq_u8(vcgtq_u8(vld1q_u8(src1+x), vld1q_u8(src2+x)), mask));
+ }
+
+ #endif
+
+ for( ; x < width; x++ ){
+ dst[x] = (uchar)(-(src1[x] > src2[x]) ^ m);
+ }
+ }
+ }
+ else if( code == CMP_EQ || code == CMP_NE )
+ {
+ int m = code == CMP_EQ ? 0 : 255;
+ for( ; height--; src1 += step1, src2 += step2, dst += step )
+ {
+ int x = 0;
+ #if CV_SSE2
+ if( USE_SSE2 )
+ {
+ __m128i m128 = code == CMP_EQ ? _mm_setzero_si128() : _mm_set1_epi8 (-1);
+ for( ; x <= width - 16; x += 16 )
+ {
+ __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x));
+ __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x));
+ r00 = _mm_xor_si128 ( _mm_cmpeq_epi8 (r00, r10), m128);
+ _mm_storeu_si128((__m128i*)(dst + x), r00);
+ }
+ }
+ #elif CV_NEON
+ uint8x16_t mask = code == CMP_EQ ? vdupq_n_u8(0) : vdupq_n_u8(255);
+
+ for( ; x <= width - 16; x += 16 )
+ {
+ vst1q_u8(dst+x, veorq_u8(vceqq_u8(vld1q_u8(src1+x), vld1q_u8(src2+x)), mask));
+ }
+ #endif
+ for( ; x < width; x++ )
+ dst[x] = (uchar)(-(src1[x] == src2[x]) ^ m);
+ }
+ }
+}
+
+void cmp8s(const schar* src1, size_t step1, const schar* src2, size_t step2,
+ uchar* dst, size_t step, int width, int height, void* _cmpop)
+{
+ CALL_HAL(cmp8s, cv_hal_cmp8s, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop)
+ cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop);
+}
+
+void cmp16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2,
+ uchar* dst, size_t step, int width, int height, void* _cmpop)
+{
+ CALL_HAL(cmp16u, cv_hal_cmp16u, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop)
+ CALL_IPP_CMP(ippiCompare_16u_C1R)
+ cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop);
+}
+
+void cmp16s(const short* src1, size_t step1, const short* src2, size_t step2,
+ uchar* dst, size_t step, int width, int height, void* _cmpop)
+{
+ CALL_HAL(cmp16s, cv_hal_cmp16s, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop)
+ CALL_IPP_CMP(ippiCompare_16s_C1R)
+ //vz optimized cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop);
+
+ int code = *(int*)_cmpop;
+ step1 /= sizeof(src1[0]);
+ step2 /= sizeof(src2[0]);
+ if( code == CMP_GE || code == CMP_LT )
+ {
+ std::swap(src1, src2);
+ std::swap(step1, step2);
+ code = code == CMP_GE ? CMP_LE : CMP_GT;
+ }
+
+ if( code == CMP_GT || code == CMP_LE )
+ {
+ int m = code == CMP_GT ? 0 : 255;
+ for( ; height--; src1 += step1, src2 += step2, dst += step )
+ {
+ int x =0;
+ #if CV_SSE2
+ if( USE_SSE2)
+ {
+ __m128i m128 = code == CMP_GT ? _mm_setzero_si128() : _mm_set1_epi16 (-1);
+ for( ; x <= width - 16; x += 16 )
+ {
+ __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x));
+ __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x));
+ r00 = _mm_xor_si128 ( _mm_cmpgt_epi16 (r00, r10), m128);
+ __m128i r01 = _mm_loadu_si128((const __m128i*)(src1 + x + 8));
+ __m128i r11 = _mm_loadu_si128((const __m128i*)(src2 + x + 8));
+ r01 = _mm_xor_si128 ( _mm_cmpgt_epi16 (r01, r11), m128);
+ r11 = _mm_packs_epi16(r00, r01);
+ _mm_storeu_si128((__m128i*)(dst + x), r11);
+ }
+ if( x <= width-8)
+ {
+ __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x));
+ __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x));
+ r00 = _mm_xor_si128 ( _mm_cmpgt_epi16 (r00, r10), m128);
+ r10 = _mm_packs_epi16(r00, r00);
+ _mm_storel_epi64((__m128i*)(dst + x), r10);
+
+ x += 8;
+ }
+ }
+ #elif CV_NEON
+ uint8x16_t mask = code == CMP_GT ? vdupq_n_u8(0) : vdupq_n_u8(255);
+
+ for( ; x <= width - 16; x += 16 )
+ {
+ int16x8_t in1 = vld1q_s16(src1 + x);
+ int16x8_t in2 = vld1q_s16(src2 + x);
+ uint8x8_t t1 = vmovn_u16(vcgtq_s16(in1, in2));
+
+ in1 = vld1q_s16(src1 + x + 8);
+ in2 = vld1q_s16(src2 + x + 8);
+ uint8x8_t t2 = vmovn_u16(vcgtq_s16(in1, in2));
+
+ vst1q_u8(dst+x, veorq_u8(vcombine_u8(t1, t2), mask));
+ }
+ #endif
+
+ for( ; x < width; x++ ){
+ dst[x] = (uchar)(-(src1[x] > src2[x]) ^ m);
+ }
+ }
+ }
+ else if( code == CMP_EQ || code == CMP_NE )
+ {
+ int m = code == CMP_EQ ? 0 : 255;
+ for( ; height--; src1 += step1, src2 += step2, dst += step )
+ {
+ int x = 0;
+ #if CV_SSE2
+ if( USE_SSE2 )
+ {
+ __m128i m128 = code == CMP_EQ ? _mm_setzero_si128() : _mm_set1_epi16 (-1);
+ for( ; x <= width - 16; x += 16 )
+ {
+ __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x));
+ __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x));
+ r00 = _mm_xor_si128 ( _mm_cmpeq_epi16 (r00, r10), m128);
+ __m128i r01 = _mm_loadu_si128((const __m128i*)(src1 + x + 8));
+ __m128i r11 = _mm_loadu_si128((const __m128i*)(src2 + x + 8));
+ r01 = _mm_xor_si128 ( _mm_cmpeq_epi16 (r01, r11), m128);
+ r11 = _mm_packs_epi16(r00, r01);
+ _mm_storeu_si128((__m128i*)(dst + x), r11);
+ }
+ if( x <= width - 8)
+ {
+ __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x));
+ __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x));
+ r00 = _mm_xor_si128 ( _mm_cmpeq_epi16 (r00, r10), m128);
+ r10 = _mm_packs_epi16(r00, r00);
+ _mm_storel_epi64((__m128i*)(dst + x), r10);
+
+ x += 8;
+ }
+ }
+ #elif CV_NEON
+ uint8x16_t mask = code == CMP_EQ ? vdupq_n_u8(0) : vdupq_n_u8(255);
+
+ for( ; x <= width - 16; x += 16 )
+ {
+ int16x8_t in1 = vld1q_s16(src1 + x);
+ int16x8_t in2 = vld1q_s16(src2 + x);
+ uint8x8_t t1 = vmovn_u16(vceqq_s16(in1, in2));
+
+ in1 = vld1q_s16(src1 + x + 8);
+ in2 = vld1q_s16(src2 + x + 8);
+ uint8x8_t t2 = vmovn_u16(vceqq_s16(in1, in2));
+
+ vst1q_u8(dst+x, veorq_u8(vcombine_u8(t1, t2), mask));
+ }
+ #endif
+ for( ; x < width; x++ )
+ dst[x] = (uchar)(-(src1[x] == src2[x]) ^ m);
+ }
+ }
+}
+
+void cmp32s(const int* src1, size_t step1, const int* src2, size_t step2,
+ uchar* dst, size_t step, int width, int height, void* _cmpop)
+{
+ CALL_HAL(cmp32s, cv_hal_cmp32s, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop)
+ cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop);
+}
+
+void cmp32f(const float* src1, size_t step1, const float* src2, size_t step2,
+ uchar* dst, size_t step, int width, int height, void* _cmpop)
+{
+ CALL_HAL(cmp32f, cv_hal_cmp32f, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop)
+ CALL_IPP_CMP(ippiCompare_32f_C1R)
+ cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop);
+}
+
+void cmp64f(const double* src1, size_t step1, const double* src2, size_t step2,
+ uchar* dst, size_t step, int width, int height, void* _cmpop)
+{
+ CALL_HAL(cmp64f, cv_hal_cmp64f, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop)
+ cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop);
+}
+
+//=======================================
+
+#if defined HAVE_IPP
+#define CALL_IPP_MUL(fun) \
+ CV_IPP_CHECK() \
+ { \
+ if (std::fabs(fscale - 1) <= FLT_EPSILON) \
+ { \
+ if (fun(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height), 0) >= 0) \
+ { \
+ CV_IMPL_ADD(CV_IMPL_IPP); \
+ return; \
+ } \
+ setIppErrorStatus(); \
+ } \
+ }
+
+#define CALL_IPP_MUL_2(fun) \
+ CV_IPP_CHECK() \
+ { \
+ if (std::fabs(fscale - 1) <= FLT_EPSILON) \
+ { \
+ if (fun(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height)) >= 0) \
+ { \
+ CV_IMPL_ADD(CV_IMPL_IPP); \
+ return; \
+ } \
+ setIppErrorStatus(); \
+ } \
+ }
+
+#else
+#define CALL_IPP_MUL(fun)
+#define CALL_IPP_MUL_2(fun)
+#endif
+
+//=======================================
+// Multilpy
+//=======================================
+
+void mul8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2,
+ uchar* dst, size_t step, int width, int height, void* scale)
+{
+ CALL_HAL(mul8u, cv_hal_mul8u, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
+ float fscale = (float)*(const double*)scale;
+ CALL_IPP_MUL(ippiMul_8u_C1RSfs)
+ mul_(src1, step1, src2, step2, dst, step, width, height, fscale);
+}
+
+void mul8s( const schar* src1, size_t step1, const schar* src2, size_t step2,
+ schar* dst, size_t step, int width, int height, void* scale)
+{
+ CALL_HAL(mul8s, cv_hal_mul8s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
+ mul_(src1, step1, src2, step2, dst, step, width, height, (float)*(const double*)scale);
+}
+
+void mul16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2,
+ ushort* dst, size_t step, int width, int height, void* scale)
+{
+ CALL_HAL(mul16u, cv_hal_mul16u, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
+ float fscale = (float)*(const double*)scale;
+ CALL_IPP_MUL(ippiMul_16u_C1RSfs)
+ mul_(src1, step1, src2, step2, dst, step, width, height, fscale);
+}
+
+void mul16s( const short* src1, size_t step1, const short* src2, size_t step2,
+ short* dst, size_t step, int width, int height, void* scale)
+{
+ CALL_HAL(mul16s, cv_hal_mul16s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
+ float fscale = (float)*(const double*)scale;
+ CALL_IPP_MUL(ippiMul_16s_C1RSfs)
+ mul_(src1, step1, src2, step2, dst, step, width, height, fscale);
+}
+
+void mul32s( const int* src1, size_t step1, const int* src2, size_t step2,
+ int* dst, size_t step, int width, int height, void* scale)
+{
+ CALL_HAL(mul32s, cv_hal_mul32s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
+ mul_(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
+}
+
+void mul32f( const float* src1, size_t step1, const float* src2, size_t step2,
+ float* dst, size_t step, int width, int height, void* scale)
+{
+ CALL_HAL(mul32f, cv_hal_mul32f, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
+ float fscale = (float)*(const double*)scale;
+ CALL_IPP_MUL_2(ippiMul_32f_C1R)
+ mul_(src1, step1, src2, step2, dst, step, width, height, fscale);
+}
+
+void mul64f( const double* src1, size_t step1, const double* src2, size_t step2,
+ double* dst, size_t step, int width, int height, void* scale)
+{
+ CALL_HAL(mul64f, cv_hal_mul64f, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
+ mul_(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
+}
+
+//=======================================
+// Divide
+//=======================================
+
+void div8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2,
+ uchar* dst, size_t step, int width, int height, void* scale)
+{
+ CALL_HAL(div8u, cv_hal_div8u, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
+ if( src1 )
+ div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
+ else
+ recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
+}
+
+void div8s( const schar* src1, size_t step1, const schar* src2, size_t step2,
+ schar* dst, size_t step, int width, int height, void* scale)
+{
+ CALL_HAL(div8s, cv_hal_div8s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
+ div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
+}
+
+void div16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2,
+ ushort* dst, size_t step, int width, int height, void* scale)
+{
+ CALL_HAL(div16u, cv_hal_div16u, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
+ div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
+}
+
+void div16s( const short* src1, size_t step1, const short* src2, size_t step2,
+ short* dst, size_t step, int width, int height, void* scale)
+{
+ CALL_HAL(div16s, cv_hal_div16s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
+ div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
+}
+
+void div32s( const int* src1, size_t step1, const int* src2, size_t step2,
+ int* dst, size_t step, int width, int height, void* scale)
+{
+ CALL_HAL(div32s, cv_hal_div32s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
+ div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
+}
+
+void div32f( const float* src1, size_t step1, const float* src2, size_t step2,
+ float* dst, size_t step, int width, int height, void* scale)
+{
+ CALL_HAL(div32f, cv_hal_div32f, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
+ div_f(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
+}
+
+void div64f( const double* src1, size_t step1, const double* src2, size_t step2,
+ double* dst, size_t step, int width, int height, void* scale)
+{
+ CALL_HAL(div64f, cv_hal_div64f, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
+ div_f(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
+}
+
+//=======================================
+// Reciprocial
+//=======================================
+
+void recip8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2,
+ uchar* dst, size_t step, int width, int height, void* scale)
+{
+ CALL_HAL(recip8u, cv_hal_recip8u, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
+ recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
+}
+
+void recip8s( const schar* src1, size_t step1, const schar* src2, size_t step2,
+ schar* dst, size_t step, int width, int height, void* scale)
+{
+ CALL_HAL(recip8s, cv_hal_recip8s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
+ recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
+}
+
+void recip16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2,
+ ushort* dst, size_t step, int width, int height, void* scale)
+{
+ CALL_HAL(recip16u, cv_hal_recip16u, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
+ recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
+}
+
+void recip16s( const short* src1, size_t step1, const short* src2, size_t step2,
+ short* dst, size_t step, int width, int height, void* scale)
+{
+ CALL_HAL(recip16s, cv_hal_recip16s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
+ recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
+}
+
+void recip32s( const int* src1, size_t step1, const int* src2, size_t step2,
+ int* dst, size_t step, int width, int height, void* scale)
+{
+ CALL_HAL(recip32s, cv_hal_recip32s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
+ recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
+}
+
+void recip32f( const float* src1, size_t step1, const float* src2, size_t step2,
+ float* dst, size_t step, int width, int height, void* scale)
+{
+ CALL_HAL(recip32f, cv_hal_recip32f, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
+ recip_f(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
+}
+
+void recip64f( const double* src1, size_t step1, const double* src2, size_t step2,
+ double* dst, size_t step, int width, int height, void* scale)
+{
+ CALL_HAL(recip64f, cv_hal_recip64f, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
+ recip_f(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
+}
+
+//=======================================
+// Add weighted
+//=======================================
+
+void
+addWeighted8u( const uchar* src1, size_t step1,
+ const uchar* src2, size_t step2,
+ uchar* dst, size_t step, int width, int height,
+ void* scalars )
+{
+ CALL_HAL(addWeighted8u, cv_hal_addWeighted8u, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars)
+ const double* scalars_ = (const double*)scalars;
+ float alpha = (float)scalars_[0], beta = (float)scalars_[1], gamma = (float)scalars_[2];
+
+ for( ; height--; src1 += step1, src2 += step2, dst += step )
+ {
+ int x = 0;
+
+#if CV_SSE2
+ if( USE_SSE2 )
+ {
+ __m128 a4 = _mm_set1_ps(alpha), b4 = _mm_set1_ps(beta), g4 = _mm_set1_ps(gamma);
+ __m128i z = _mm_setzero_si128();
+
+ for( ; x <= width - 8; x += 8 )
+ {
+ __m128i u = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(src1 + x)), z);
+ __m128i v = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(src2 + x)), z);
+
+ __m128 u0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(u, z));
+ __m128 u1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(u, z));
+ __m128 v0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v, z));
+ __m128 v1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v, z));
+
+ u0 = _mm_add_ps(_mm_mul_ps(u0, a4), _mm_mul_ps(v0, b4));
+ u1 = _mm_add_ps(_mm_mul_ps(u1, a4), _mm_mul_ps(v1, b4));
+ u0 = _mm_add_ps(u0, g4); u1 = _mm_add_ps(u1, g4);
+
+ u = _mm_packs_epi32(_mm_cvtps_epi32(u0), _mm_cvtps_epi32(u1));
+ u = _mm_packus_epi16(u, u);
+
+ _mm_storel_epi64((__m128i*)(dst + x), u);
+ }
+ }
+#elif CV_NEON
+ float32x4_t g = vdupq_n_f32 (gamma);
+
+ for( ; x <= width - 8; x += 8 )
+ {
+ uint8x8_t in1 = vld1_u8(src1+x);
+ uint16x8_t in1_16 = vmovl_u8(in1);
+ float32x4_t in1_f_l = vcvtq_f32_u32(vmovl_u16(vget_low_u16(in1_16)));
+ float32x4_t in1_f_h = vcvtq_f32_u32(vmovl_u16(vget_high_u16(in1_16)));
+
+ uint8x8_t in2 = vld1_u8(src2+x);
+ uint16x8_t in2_16 = vmovl_u8(in2);
+ float32x4_t in2_f_l = vcvtq_f32_u32(vmovl_u16(vget_low_u16(in2_16)));
+ float32x4_t in2_f_h = vcvtq_f32_u32(vmovl_u16(vget_high_u16(in2_16)));
+
+ float32x4_t out_f_l = vaddq_f32(vmulq_n_f32(in1_f_l, alpha), vmulq_n_f32(in2_f_l, beta));
+ float32x4_t out_f_h = vaddq_f32(vmulq_n_f32(in1_f_h, alpha), vmulq_n_f32(in2_f_h, beta));
+ out_f_l = vaddq_f32(out_f_l, g);
+ out_f_h = vaddq_f32(out_f_h, g);
+
+ uint16x4_t out_16_l = vqmovun_s32(cv_vrndq_s32_f32(out_f_l));
+ uint16x4_t out_16_h = vqmovun_s32(cv_vrndq_s32_f32(out_f_h));
+
+ uint16x8_t out_16 = vcombine_u16(out_16_l, out_16_h);
+ uint8x8_t out = vqmovn_u16(out_16);
+
+ vst1_u8(dst+x, out);
+ }
+#endif
+ #if CV_ENABLE_UNROLLED
+ for( ; x <= width - 4; x += 4 )
+ {
+ float t0, t1;
+ t0 = CV_8TO32F(src1[x])*alpha + CV_8TO32F(src2[x])*beta + gamma;
+ t1 = CV_8TO32F(src1[x+1])*alpha + CV_8TO32F(src2[x+1])*beta + gamma;
+
+ dst[x] = saturate_cast<uchar>(t0);
+ dst[x+1] = saturate_cast<uchar>(t1);
+
+ t0 = CV_8TO32F(src1[x+2])*alpha + CV_8TO32F(src2[x+2])*beta + gamma;
+ t1 = CV_8TO32F(src1[x+3])*alpha + CV_8TO32F(src2[x+3])*beta + gamma;
+
+ dst[x+2] = saturate_cast<uchar>(t0);
+ dst[x+3] = saturate_cast<uchar>(t1);
+ }
+ #endif
+
+ for( ; x < width; x++ )
+ {
+ float t0 = CV_8TO32F(src1[x])*alpha + CV_8TO32F(src2[x])*beta + gamma;
+ dst[x] = saturate_cast<uchar>(t0);
+ }
+ }
+}
+
+void addWeighted8s( const schar* src1, size_t step1, const schar* src2, size_t step2,
+ schar* dst, size_t step, int width, int height, void* scalars )
+{
+ CALL_HAL(addWeighted8s, cv_hal_addWeighted8s, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars)
+ addWeighted_<schar, float>(src1, step1, src2, step2, dst, step, width, height, scalars);
+}
+
+void addWeighted16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2,
+ ushort* dst, size_t step, int width, int height, void* scalars )
+{
+ CALL_HAL(addWeighted16u, cv_hal_addWeighted16u, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars)
+ addWeighted_<ushort, float>(src1, step1, src2, step2, dst, step, width, height, scalars);
+}
+
+void addWeighted16s( const short* src1, size_t step1, const short* src2, size_t step2,
+ short* dst, size_t step, int width, int height, void* scalars )
+{
+ CALL_HAL(addWeighted16s, cv_hal_addWeighted16s, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars)
+ addWeighted_<short, float>(src1, step1, src2, step2, dst, step, width, height, scalars);
+}
+
+void addWeighted32s( const int* src1, size_t step1, const int* src2, size_t step2,
+ int* dst, size_t step, int width, int height, void* scalars )
+{
+ CALL_HAL(addWeighted32s, cv_hal_addWeighted32s, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars)
+ addWeighted_<int, double>(src1, step1, src2, step2, dst, step, width, height, scalars);
+}
+
+void addWeighted32f( const float* src1, size_t step1, const float* src2, size_t step2,
+ float* dst, size_t step, int width, int height, void* scalars )
+{
+ CALL_HAL(addWeighted32f, cv_hal_addWeighted32f, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars)
+ addWeighted_<float, double>(src1, step1, src2, step2, dst, step, width, height, scalars);
+}
+
+void addWeighted64f( const double* src1, size_t step1, const double* src2, size_t step2,
+ double* dst, size_t step, int width, int height, void* scalars )
+{
+ CALL_HAL(addWeighted64f, cv_hal_addWeighted64f, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars)
+ addWeighted_<double, double>(src1, step1, src2, step2, dst, step, width, height, scalars);
+}
+
+}} // cv::hal::
+
/* End of file. */
//
//M*/
-#ifndef __OPENCV_HAL_ARITHM_CORE_HPP__
-#define __OPENCV_HAL_ARITHM_CORE_HPP__
+#ifndef __OPENCV_ARITHM_CORE_HPP__
+#define __OPENCV_ARITHM_CORE_HPP__
#include "arithm_simd.hpp"
-const uchar g_Saturate8u[] =
+namespace cv {
+
+template<typename T1, typename T2=T1, typename T3=T1> struct OpAdd
{
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
- 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
- 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
- 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
- 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
- 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
- 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
- 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
- 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
- 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
- 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
- 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
- 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
- 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
- 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
- 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255
+ typedef T1 type1;
+ typedef T2 type2;
+ typedef T3 rtype;
+ T3 operator ()(const T1 a, const T2 b) const { return saturate_cast<T3>(a + b); }
};
-
-#define CV_FAST_CAST_8U(t) (assert(-256 <= (t) && (t) <= 512), g_Saturate8u[(t)+256])
-#define CV_MIN_8U(a,b) ((a) - CV_FAST_CAST_8U((a) - (b)))
-#define CV_MAX_8U(a,b) ((a) + CV_FAST_CAST_8U((b) - (a)))
-
-const float g_8x32fTab[] =
+template<typename T1, typename T2=T1, typename T3=T1> struct OpSub
{
- -128.f, -127.f, -126.f, -125.f, -124.f, -123.f, -122.f, -121.f,
- -120.f, -119.f, -118.f, -117.f, -116.f, -115.f, -114.f, -113.f,
- -112.f, -111.f, -110.f, -109.f, -108.f, -107.f, -106.f, -105.f,
- -104.f, -103.f, -102.f, -101.f, -100.f, -99.f, -98.f, -97.f,
- -96.f, -95.f, -94.f, -93.f, -92.f, -91.f, -90.f, -89.f,
- -88.f, -87.f, -86.f, -85.f, -84.f, -83.f, -82.f, -81.f,
- -80.f, -79.f, -78.f, -77.f, -76.f, -75.f, -74.f, -73.f,
- -72.f, -71.f, -70.f, -69.f, -68.f, -67.f, -66.f, -65.f,
- -64.f, -63.f, -62.f, -61.f, -60.f, -59.f, -58.f, -57.f,
- -56.f, -55.f, -54.f, -53.f, -52.f, -51.f, -50.f, -49.f,
- -48.f, -47.f, -46.f, -45.f, -44.f, -43.f, -42.f, -41.f,
- -40.f, -39.f, -38.f, -37.f, -36.f, -35.f, -34.f, -33.f,
- -32.f, -31.f, -30.f, -29.f, -28.f, -27.f, -26.f, -25.f,
- -24.f, -23.f, -22.f, -21.f, -20.f, -19.f, -18.f, -17.f,
- -16.f, -15.f, -14.f, -13.f, -12.f, -11.f, -10.f, -9.f,
- -8.f, -7.f, -6.f, -5.f, -4.f, -3.f, -2.f, -1.f,
- 0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f,
- 8.f, 9.f, 10.f, 11.f, 12.f, 13.f, 14.f, 15.f,
- 16.f, 17.f, 18.f, 19.f, 20.f, 21.f, 22.f, 23.f,
- 24.f, 25.f, 26.f, 27.f, 28.f, 29.f, 30.f, 31.f,
- 32.f, 33.f, 34.f, 35.f, 36.f, 37.f, 38.f, 39.f,
- 40.f, 41.f, 42.f, 43.f, 44.f, 45.f, 46.f, 47.f,
- 48.f, 49.f, 50.f, 51.f, 52.f, 53.f, 54.f, 55.f,
- 56.f, 57.f, 58.f, 59.f, 60.f, 61.f, 62.f, 63.f,
- 64.f, 65.f, 66.f, 67.f, 68.f, 69.f, 70.f, 71.f,
- 72.f, 73.f, 74.f, 75.f, 76.f, 77.f, 78.f, 79.f,
- 80.f, 81.f, 82.f, 83.f, 84.f, 85.f, 86.f, 87.f,
- 88.f, 89.f, 90.f, 91.f, 92.f, 93.f, 94.f, 95.f,
- 96.f, 97.f, 98.f, 99.f, 100.f, 101.f, 102.f, 103.f,
- 104.f, 105.f, 106.f, 107.f, 108.f, 109.f, 110.f, 111.f,
- 112.f, 113.f, 114.f, 115.f, 116.f, 117.f, 118.f, 119.f,
- 120.f, 121.f, 122.f, 123.f, 124.f, 125.f, 126.f, 127.f,
- 128.f, 129.f, 130.f, 131.f, 132.f, 133.f, 134.f, 135.f,
- 136.f, 137.f, 138.f, 139.f, 140.f, 141.f, 142.f, 143.f,
- 144.f, 145.f, 146.f, 147.f, 148.f, 149.f, 150.f, 151.f,
- 152.f, 153.f, 154.f, 155.f, 156.f, 157.f, 158.f, 159.f,
- 160.f, 161.f, 162.f, 163.f, 164.f, 165.f, 166.f, 167.f,
- 168.f, 169.f, 170.f, 171.f, 172.f, 173.f, 174.f, 175.f,
- 176.f, 177.f, 178.f, 179.f, 180.f, 181.f, 182.f, 183.f,
- 184.f, 185.f, 186.f, 187.f, 188.f, 189.f, 190.f, 191.f,
- 192.f, 193.f, 194.f, 195.f, 196.f, 197.f, 198.f, 199.f,
- 200.f, 201.f, 202.f, 203.f, 204.f, 205.f, 206.f, 207.f,
- 208.f, 209.f, 210.f, 211.f, 212.f, 213.f, 214.f, 215.f,
- 216.f, 217.f, 218.f, 219.f, 220.f, 221.f, 222.f, 223.f,
- 224.f, 225.f, 226.f, 227.f, 228.f, 229.f, 230.f, 231.f,
- 232.f, 233.f, 234.f, 235.f, 236.f, 237.f, 238.f, 239.f,
- 240.f, 241.f, 242.f, 243.f, 244.f, 245.f, 246.f, 247.f,
- 248.f, 249.f, 250.f, 251.f, 252.f, 253.f, 254.f, 255.f
+ typedef T1 type1;
+ typedef T2 type2;
+ typedef T3 rtype;
+ T3 operator ()(const T1 a, const T2 b) const { return saturate_cast<T3>(a - b); }
};
-#define CV_8TO32F(x) g_8x32fTab[(x)+128]
-
-namespace cv {
+template<typename T1, typename T2=T1, typename T3=T1> struct OpRSub
+{
+ typedef T1 type1;
+ typedef T2 type2;
+ typedef T3 rtype;
+ T3 operator ()(const T1 a, const T2 b) const { return saturate_cast<T3>(b - a); }
+};
-template<> inline uchar OpAdd<uchar>::operator ()(uchar a, uchar b) const
-{ return CV_FAST_CAST_8U(a + b); }
+template<typename T> struct OpMin
+{
+ typedef T type1;
+ typedef T type2;
+ typedef T rtype;
+ T operator ()(const T a, const T b) const { return std::min(a, b); }
+};
-template<> inline uchar OpSub<uchar>::operator ()(uchar a, uchar b) const
-{ return CV_FAST_CAST_8U(a - b); }
+template<typename T> struct OpMax
+{
+ typedef T type1;
+ typedef T type2;
+ typedef T rtype;
+ T operator ()(const T a, const T b) const { return std::max(a, b); }
+};
-template<> inline short OpAbsDiff<short>::operator ()(short a, short b) const
-{ return saturate_cast<short>(std::abs(a - b)); }
+template<typename T> struct OpAbsDiff
+{
+ typedef T type1;
+ typedef T type2;
+ typedef T rtype;
+ T operator()(T a, T b) const { return a > b ? a - b : b - a; }
+};
-template<> inline schar OpAbsDiff<schar>::operator ()(schar a, schar b) const
-{ return saturate_cast<schar>(std::abs(a - b)); }
+template<typename T> struct OpAnd
+{
+ typedef T type1;
+ typedef T type2;
+ typedef T rtype;
+ T operator()( T a, T b ) const { return a & b; }
+};
-template<> inline uchar OpMin<uchar>::operator ()(uchar a, uchar b) const { return CV_MIN_8U(a, b); }
+template<typename T> struct OpOr
+{
+ typedef T type1;
+ typedef T type2;
+ typedef T rtype;
+ T operator()( T a, T b ) const { return a | b; }
+};
-template<> inline uchar OpMax<uchar>::operator ()(uchar a, uchar b) const { return CV_MAX_8U(a, b); }
+template<typename T> struct OpXor
+{
+ typedef T type1;
+ typedef T type2;
+ typedef T rtype;
+ T operator()( T a, T b ) const { return a ^ b; }
+};
-}
+template<typename T> struct OpNot
+{
+ typedef T type1;
+ typedef T type2;
+ typedef T rtype;
+ T operator()( T a, T ) const { return ~a; }
+};
-namespace cv { namespace hal {
+//=============================================================================
template<typename T, class Op, class VOp>
void vBinOp(const T* src1, size_t step1, const T* src2, size_t step2, T* dst, size_t step, int width, int height)
}
}
-}} // cv::hal::
+} // cv::
-#endif // __OPENCV_HAL_ARITHM_CORE_HPP__
+#endif // __OPENCV_ARITHM_CORE_HPP__
//
//M*/
-#ifndef __OPENCV_HAL_ARITHM_SIMD_HPP__
-#define __OPENCV_HAL_ARITHM_SIMD_HPP__
+#ifndef __OPENCV_ARITHM_SIMD_HPP__
+#define __OPENCV_ARITHM_SIMD_HPP__
-namespace cv { namespace hal {
+namespace cv {
struct NOP {};
#endif
-}}
+}
-#endif // __OPENCV_HAL_ARITHM_SIMD_HPP__
+#endif // __OPENCV_ARITHM_SIMD_HPP__
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Copyright (C) 2015, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_CORE_HAL_REPLACEMENT_HPP__
+#define __OPENCV_CORE_HAL_REPLACEMENT_HPP__
+
+#include "opencv2/core/hal/interface.h"
+
+inline int hal_ni_add8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_add8s(const schar*, size_t, const schar*, size_t, schar*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_add16u(const ushort*, size_t, const ushort*, size_t, ushort*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_add16s(const short*, size_t, const short*, size_t, short*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_add32s(const int*, size_t, const int*, size_t, int*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_add32f(const float*, size_t, const float*, size_t, float*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_add64f(const double*, size_t, const double*, size_t, double*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_sub8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_sub8s(const schar*, size_t, const schar*, size_t, schar*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_sub16u(const ushort*, size_t, const ushort*, size_t, ushort*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_sub16s(const short*, size_t, const short*, size_t, short*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_sub32s(const int*, size_t, const int*, size_t, int*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_sub32f(const float*, size_t, const float*, size_t, float*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_sub64f(const double*, size_t, const double*, size_t, double*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_max8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_max8s(const schar*, size_t, const schar*, size_t, schar*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_max16u(const ushort*, size_t, const ushort*, size_t, ushort*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_max16s(const short*, size_t, const short*, size_t, short*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_max32s(const int*, size_t, const int*, size_t, int*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_max32f(const float*, size_t, const float*, size_t, float*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_max64f(const double*, size_t, const double*, size_t, double*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_min8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_min8s(const schar*, size_t, const schar*, size_t, schar*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_min16u(const ushort*, size_t, const ushort*, size_t, ushort*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_min16s(const short*, size_t, const short*, size_t, short*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_min32s(const int*, size_t, const int*, size_t, int*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_min32f(const float*, size_t, const float*, size_t, float*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_min64f(const double*, size_t, const double*, size_t, double*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_absdiff8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_absdiff8s(const schar*, size_t, const schar*, size_t, schar*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_absdiff16u(const ushort*, size_t, const ushort*, size_t, ushort*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_absdiff16s(const short*, size_t, const short*, size_t, short*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_absdiff32s(const int*, size_t, const int*, size_t, int*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_absdiff32f(const float*, size_t, const float*, size_t, float*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_absdiff64f(const double*, size_t, const double*, size_t, double*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_and8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_or8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_xor8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_not8u(const uchar*, size_t, uchar*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+
+#define cv_hal_add8u hal_ni_add8u
+#define cv_hal_add8s hal_ni_add8s
+#define cv_hal_add16u hal_ni_add16u
+#define cv_hal_add16s hal_ni_add16s
+#define cv_hal_add32s hal_ni_add32s
+#define cv_hal_add32f hal_ni_add32f
+#define cv_hal_add64f hal_ni_add64f
+#define cv_hal_sub8u hal_ni_sub8u
+#define cv_hal_sub8s hal_ni_sub8s
+#define cv_hal_sub16u hal_ni_sub16u
+#define cv_hal_sub16s hal_ni_sub16s
+#define cv_hal_sub32s hal_ni_sub32s
+#define cv_hal_sub32f hal_ni_sub32f
+#define cv_hal_sub64f hal_ni_sub64f
+#define cv_hal_max8u hal_ni_max8u
+#define cv_hal_max8s hal_ni_max8s
+#define cv_hal_max16u hal_ni_max16u
+#define cv_hal_max16s hal_ni_max16s
+#define cv_hal_max32s hal_ni_max32s
+#define cv_hal_max32f hal_ni_max32f
+#define cv_hal_max64f hal_ni_max64f
+#define cv_hal_min8u hal_ni_min8u
+#define cv_hal_min8s hal_ni_min8s
+#define cv_hal_min16u hal_ni_min16u
+#define cv_hal_min16s hal_ni_min16s
+#define cv_hal_min32s hal_ni_min32s
+#define cv_hal_min32f hal_ni_min32f
+#define cv_hal_min64f hal_ni_min64f
+#define cv_hal_absdiff8u hal_ni_absdiff8u
+#define cv_hal_absdiff8s hal_ni_absdiff8s
+#define cv_hal_absdiff16u hal_ni_absdiff16u
+#define cv_hal_absdiff16s hal_ni_absdiff16s
+#define cv_hal_absdiff32s hal_ni_absdiff32s
+#define cv_hal_absdiff32f hal_ni_absdiff32f
+#define cv_hal_absdiff64f hal_ni_absdiff64f
+#define cv_hal_and8u hal_ni_and8u
+#define cv_hal_or8u hal_ni_or8u
+#define cv_hal_xor8u hal_ni_xor8u
+#define cv_hal_not8u hal_ni_not8u
+
+inline int hal_ni_cmp8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_cmp8s(const schar*, size_t, const schar*, size_t, uchar*, size_t, int, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_cmp16u(const ushort*, size_t, const ushort*, size_t, uchar*, size_t, int, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_cmp16s(const short*, size_t, const short*, size_t, uchar*, size_t, int, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_cmp32s(const int*, size_t, const int*, size_t, uchar*, size_t, int, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_cmp32f(const float*, size_t, const float*, size_t, uchar*, size_t, int, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_cmp64f(const double*, size_t, const double*, size_t, uchar*, size_t, int, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+
+#define cv_hal_cmp8u hal_ni_cmp8u
+#define cv_hal_cmp8s hal_ni_cmp8s
+#define cv_hal_cmp16u hal_ni_cmp16u
+#define cv_hal_cmp16s hal_ni_cmp16s
+#define cv_hal_cmp32s hal_ni_cmp32s
+#define cv_hal_cmp32f hal_ni_cmp32f
+#define cv_hal_cmp64f hal_ni_cmp64f
+
+inline int hal_ni_mul8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_mul8s(const schar*, size_t, const schar*, size_t, schar*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_mul16u(const ushort*, size_t, const ushort*, size_t, ushort*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_mul16s(const short*, size_t, const short*, size_t, short*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_mul32s(const int*, size_t, const int*, size_t, int*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_mul32f(const float*, size_t, const float*, size_t, float*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_mul64f(const double*, size_t, const double*, size_t, double*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_div8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_div8s(const schar*, size_t, const schar*, size_t, schar*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_div16u(const ushort*, size_t, const ushort*, size_t, ushort*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_div16s(const short*, size_t, const short*, size_t, short*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_div32s(const int*, size_t, const int*, size_t, int*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_div32f(const float*, size_t, const float*, size_t, float*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_div64f(const double*, size_t, const double*, size_t, double*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_recip8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_recip8s(const schar*, size_t, const schar*, size_t, schar*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_recip16u(const ushort*, size_t, const ushort*, size_t, ushort*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_recip16s(const short*, size_t, const short*, size_t, short*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_recip32s(const int*, size_t, const int*, size_t, int*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_recip32f(const float*, size_t, const float*, size_t, float*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_recip64f(const double*, size_t, const double*, size_t, double*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+
+#define cv_hal_mul8u hal_ni_mul8u
+#define cv_hal_mul8s hal_ni_mul8s
+#define cv_hal_mul16u hal_ni_mul16u
+#define cv_hal_mul16s hal_ni_mul16s
+#define cv_hal_mul32s hal_ni_mul32s
+#define cv_hal_mul32f hal_ni_mul32f
+#define cv_hal_mul64f hal_ni_mul64f
+#define cv_hal_div8u hal_ni_div8u
+#define cv_hal_div8s hal_ni_div8s
+#define cv_hal_div16u hal_ni_div16u
+#define cv_hal_div16s hal_ni_div16s
+#define cv_hal_div32s hal_ni_div32s
+#define cv_hal_div32f hal_ni_div32f
+#define cv_hal_div64f hal_ni_div64f
+#define cv_hal_recip8u hal_ni_recip8u
+#define cv_hal_recip8s hal_ni_recip8s
+#define cv_hal_recip16u hal_ni_recip16u
+#define cv_hal_recip16s hal_ni_recip16s
+#define cv_hal_recip32s hal_ni_recip32s
+#define cv_hal_recip32f hal_ni_recip32f
+#define cv_hal_recip64f hal_ni_recip64f
+
+inline int hal_ni_addWeighted8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int, const double*) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_addWeighted8s(const schar*, size_t, const schar*, size_t, schar*, size_t, int, int, const double*) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_addWeighted16u(const ushort*, size_t, const ushort*, size_t, ushort*, size_t, int, int, const double*) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_addWeighted16s(const short*, size_t, const short*, size_t, short*, size_t, int, int, const double*) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_addWeighted32s(const int*, size_t, const int*, size_t, int*, size_t, int, int, const double*) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_addWeighted32f(const float*, size_t, const float*, size_t, float*, size_t, int, int, const double*) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_addWeighted64f(const double*, size_t, const double*, size_t, double*, size_t, int, int, const double*) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+
+#define cv_hal_addWeighted8u hal_ni_addWeighted8u
+#define cv_hal_addWeighted8s hal_ni_addWeighted8s
+#define cv_hal_addWeighted16u hal_ni_addWeighted16u
+#define cv_hal_addWeighted16s hal_ni_addWeighted16s
+#define cv_hal_addWeighted32s hal_ni_addWeighted32s
+#define cv_hal_addWeighted32f hal_ni_addWeighted32f
+#define cv_hal_addWeighted64f hal_ni_addWeighted64f
+
+inline int hal_ni_split8u(const uchar*, uchar**, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_split16u(const ushort*, ushort**, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_split32s(const int*, int**, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_split64s(const int64*, int64**, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+
+#define cv_hal_split8u hal_ni_split8u
+#define cv_hal_split16u hal_ni_split16u
+#define cv_hal_split32s hal_ni_split32s
+#define cv_hal_split64s hal_ni_split64s
+
+inline int hal_ni_merge8u(const uchar**, uchar*, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_merge16u(const ushort**, ushort*, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_merge32s(const int**, int*, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_merge64s(const int64**, int64*, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+
+#define cv_hal_merge8u hal_ni_merge8u
+#define cv_hal_merge16u hal_ni_merge16u
+#define cv_hal_merge32s hal_ni_merge32s
+#define cv_hal_merge64s hal_ni_merge64s
+
+#include "custom_hal.hpp"
+
+#endif
int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n)
{
- return hal::LU(A, astep, m, b, bstep, n);
+ return hal::LU32f(A, astep, m, b, bstep, n);
}
int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n)
{
- return hal::LU(A, astep, m, b, bstep, n);
+ return hal::LU64f(A, astep, m, b, bstep, n);
}
bool Cholesky(float* A, size_t astep, int m, float* b, size_t bstep, int n)
{
- return hal::Cholesky(A, astep, m, b, bstep, n);
+ return hal::Cholesky32f(A, astep, m, b, bstep, n);
}
bool Cholesky(double* A, size_t astep, int m, double* b, size_t bstep, int n)
{
- return hal::Cholesky(A, astep, m, b, bstep, n);
+ return hal::Cholesky64f(A, astep, m, b, bstep, n);
}
template<typename _Tp> static inline _Tp hypot(_Tp a, _Tp b)
Mat a(rows, rows, CV_32F, (uchar*)buffer);
mat.copyTo(a);
- result = hal::LU(a.ptr<float>(), a.step, rows, 0, 0, 0);
+ result = hal::LU32f(a.ptr<float>(), a.step, rows, 0, 0, 0);
if( result )
{
for( int i = 0; i < rows; i++ )
Mat a(rows, rows, CV_64F, (uchar*)buffer);
mat.copyTo(a);
- result = hal::LU(a.ptr<double>(), a.step, rows, 0, 0, 0);
+ result = hal::LU64f(a.ptr<double>(), a.step, rows, 0, 0, 0);
if( result )
{
for( int i = 0; i < rows; i++ )
setIdentity(dst);
if( method == DECOMP_LU && type == CV_32F )
- result = hal::LU(src1.ptr<float>(), src1.step, n, dst.ptr<float>(), dst.step, n) != 0;
+ result = hal::LU32f(src1.ptr<float>(), src1.step, n, dst.ptr<float>(), dst.step, n) != 0;
else if( method == DECOMP_LU && type == CV_64F )
- result = hal::LU(src1.ptr<double>(), src1.step, n, dst.ptr<double>(), dst.step, n) != 0;
+ result = hal::LU64f(src1.ptr<double>(), src1.step, n, dst.ptr<double>(), dst.step, n) != 0;
else if( method == DECOMP_CHOLESKY && type == CV_32F )
- result = hal::Cholesky(src1.ptr<float>(), src1.step, n, dst.ptr<float>(), dst.step, n);
+ result = hal::Cholesky32f(src1.ptr<float>(), src1.step, n, dst.ptr<float>(), dst.step, n);
else
- result = hal::Cholesky(src1.ptr<double>(), src1.step, n, dst.ptr<double>(), dst.step, n);
+ result = hal::Cholesky64f(src1.ptr<double>(), src1.step, n, dst.ptr<double>(), dst.step, n);
if( !result )
dst = Scalar(0);
if( method == DECOMP_LU )
{
if( type == CV_32F )
- result = hal::LU(a.ptr<float>(), a.step, n, dst.ptr<float>(), dst.step, nb) != 0;
+ result = hal::LU32f(a.ptr<float>(), a.step, n, dst.ptr<float>(), dst.step, nb) != 0;
else
- result = hal::LU(a.ptr<double>(), a.step, n, dst.ptr<double>(), dst.step, nb) != 0;
+ result = hal::LU64f(a.ptr<double>(), a.step, n, dst.ptr<double>(), dst.step, nb) != 0;
}
else if( method == DECOMP_CHOLESKY )
{
if( type == CV_32F )
- result = hal::Cholesky(a.ptr<float>(), a.step, n, dst.ptr<float>(), dst.step, nb);
+ result = hal::Cholesky32f(a.ptr<float>(), a.step, n, dst.ptr<float>(), dst.step, nb);
else
- result = hal::Cholesky(a.ptr<double>(), a.step, n, dst.ptr<double>(), dst.step, nb);
+ result = hal::Cholesky64f(a.ptr<double>(), a.step, n, dst.ptr<double>(), dst.step, nb);
}
else
{
{
const float *x = (const float*)ptrs[0], *y = (const float*)ptrs[1];
float *mag = (float*)ptrs[2];
- hal::magnitude( x, y, mag, len );
+ hal::magnitude32f( x, y, mag, len );
}
else
{
const double *x = (const double*)ptrs[0], *y = (const double*)ptrs[1];
double *mag = (double*)ptrs[2];
- hal::magnitude( x, y, mag, len );
+ hal::magnitude64f( x, y, mag, len );
}
}
}
{
const float *x = (const float*)ptrs[0], *y = (const float*)ptrs[1];
float *mag = (float*)ptrs[2], *angle = (float*)ptrs[3];
- hal::magnitude( x, y, mag, len );
+ hal::magnitude32f( x, y, mag, len );
hal::fastAtan2( y, x, angle, len, angleInDegrees );
}
else
const double *x = (const double*)ptrs[0], *y = (const double*)ptrs[1];
double *angle = (double*)ptrs[3];
- hal::magnitude(x, y, (double*)ptrs[2], len);
+ hal::magnitude64f(x, y, (double*)ptrs[2], len);
k = 0;
#if CV_SSE2
}
setIppErrorStatus();
}
- hal::exp(x, y, n);
+ hal::exp32f(x, y, n);
}
static void Exp_64f_ipp(const double *x, double *y, int n)
}
setIppErrorStatus();
}
- hal::exp(x, y, n);
+ hal::exp64f(x, y, n);
}
#define Exp_32f Exp_32f_ipp
#define Exp_64f Exp_64f_ipp
#else
-#define Exp_32f hal::exp
-#define Exp_64f hal::exp
+#define Exp_32f hal::exp32f
+#define Exp_64f hal::exp64f
#endif
}
setIppErrorStatus();
}
- hal::log(x, y, n);
+ hal::log32f(x, y, n);
}
static void Log_64f_ipp(const double *x, double *y, int n)
}
setIppErrorStatus();
}
- hal::log(x, y, n);
+ hal::log64f(x, y, n);
}
#define Log_32f Log_32f_ipp
#define Log_64f Log_64f_ipp
#else
-#define Log_32f hal::log
-#define Log_64f hal::log
+#define Log_32f hal::log32f
+#define Log_64f hal::log64f
#endif
void log( InputArray _src, OutputArray _dst )
#endif
-static void InvSqrt_32f(const float* src, float* dst, int n) { hal::invSqrt(src, dst, n); }
-static void InvSqrt_64f(const double* src, double* dst, int n) { hal::invSqrt(src, dst, n); }
-static void Sqrt_32f(const float* src, float* dst, int n) { hal::sqrt(src, dst, n); }
-static void Sqrt_64f(const double* src, double* dst, int n) { hal::sqrt(src, dst, n); }
+static void InvSqrt_32f(const float* src, float* dst, int n) { hal::invSqrt32f(src, dst, n); }
+static void InvSqrt_64f(const double* src, double* dst, int n) { hal::invSqrt64f(src, dst, n); }
+static void Sqrt_32f(const float* src, float* dst, int n) { hal::sqrt32f(src, dst, n); }
+static void Sqrt_64f(const double* src, double* dst, int n) { hal::sqrt64f(src, dst, n); }
void pow( InputArray _src, double power, OutputArray _dst )
{
static const float atan2_p5 = 0.1555786518463281f*(float)(180/CV_PI);
static const float atan2_p7 = -0.04432655554792128f*(float)(180/CV_PI);
-#if CV_NEON
-static inline float32x4_t cv_vrecpq_f32(float32x4_t val)
-{
- float32x4_t reciprocal = vrecpeq_f32(val);
- reciprocal = vmulq_f32(vrecpsq_f32(val, reciprocal), reciprocal);
- reciprocal = vmulq_f32(vrecpsq_f32(val, reciprocal), reciprocal);
- return reciprocal;
-}
-#endif
-
void fastAtan2(const float *Y, const float *X, float *angle, int len, bool angleInDegrees )
{
int i = 0;
}
-void magnitude(const float* x, const float* y, float* mag, int len)
+void magnitude32f(const float* x, const float* y, float* mag, int len)
{
#if defined HAVE_IPP
CV_IPP_CHECK()
}
}
-void magnitude(const double* x, const double* y, double* mag, int len)
+void magnitude64f(const double* x, const double* y, double* mag, int len)
{
#if defined(HAVE_IPP)
CV_IPP_CHECK()
}
-void invSqrt(const float* src, float* dst, int len)
+void invSqrt32f(const float* src, float* dst, int len)
{
#if defined(HAVE_IPP)
CV_IPP_CHECK()
}
-void invSqrt(const double* src, double* dst, int len)
+void invSqrt64f(const double* src, double* dst, int len)
{
int i = 0;
}
-void sqrt(const float* src, float* dst, int len)
+void sqrt32f(const float* src, float* dst, int len)
{
#if defined(HAVE_IPP)
CV_IPP_CHECK()
}
-void sqrt(const double* src, double* dst, int len)
+void sqrt64f(const double* src, double* dst, int len)
{
#if defined(HAVE_IPP)
CV_IPP_CHECK()
static const double exp_postscale = 1./(1 << EXPTAB_SCALE);
static const double exp_max_val = 3000.*(1 << EXPTAB_SCALE); // log10(DBL_MAX) < 3000
-void exp( const float *_x, float *y, int n )
+void exp32f( const float *_x, float *y, int n )
{
static const float
A4 = (float)(1.000000000000002438532970795181890933776 / EXPPOLY_32F_A0),
}
}
-void exp( const double *_x, double *y, int n )
+void exp64f( const double *_x, double *y, int n )
{
static const double
A5 = .99999999999999999998285227504999 / EXPPOLY_32F_A0,
#define LOGTAB_TRANSLATE(x,h) (((x) - 1.)*icvLogTab[(h)+1])
static const double ln_2 = 0.69314718055994530941723212145818;
-void log( const float *_x, float *y, int n )
+void log32f( const float *_x, float *y, int n )
{
static const float shift[] = { 0, -1.f/512 };
static const float
}
}
-void log( const double *x, double *y, int n )
+void log64f( const double *x, double *y, int n )
{
static const double shift[] = { 0, -1./512 };
static const double
}
}
-}}
+//=============================================================================
+// for compatibility with 3.0
+
+void exp(const float* src, float* dst, int n)
+{
+ exp32f(src, dst, n);
+}
+
+void exp(const double* src, double* dst, int n)
+{
+ exp64f(src, dst, n);
+}
+
+void log(const float* src, float* dst, int n)
+{
+ log32f(src, dst, n);
+}
+
+void log(const double* src, double* dst, int n)
+{
+ log64f(src, dst, n);
+}
+
+void magnitude(const float* x, const float* y, float* dst, int n)
+{
+ magnitude32f(x, y, dst, n);
+}
+
+void magnitude(const double* x, const double* y, double* dst, int n)
+{
+ magnitude64f(x, y, dst, n);
+}
+
+void sqrt(const float* src, float* dst, int len)
+{
+ sqrt32f(src, dst, len);
+}
+
+void sqrt(const double* src, double* dst, int len)
+{
+ sqrt64f(src, dst, len);
+}
+
+void invSqrt(const float* src, float* dst, int len)
+{
+ invSqrt32f(src, dst, len);
+}
+
+void invSqrt(const double* src, double* dst, int len)
+{
+ invSqrt64f(src, dst, len);
+}
+
+
+}} // cv::hal::
}
-int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n)
+int LU32f(float* A, size_t astep, int m, float* b, size_t bstep, int n)
{
return LUImpl(A, astep, m, b, bstep, n, FLT_EPSILON*10);
}
-int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n)
+int LU64f(double* A, size_t astep, int m, double* b, size_t bstep, int n)
{
return LUImpl(A, astep, m, b, bstep, n, DBL_EPSILON*100);
}
-
template<typename _Tp> static inline bool
CholImpl(_Tp* A, size_t astep, int m, _Tp* b, size_t bstep, int n)
{
}
+bool Cholesky32f(float* A, size_t astep, int m, float* b, size_t bstep, int n)
+{
+ return CholImpl(A, astep, m, b, bstep, n);
+}
+
+bool Cholesky64f(double* A, size_t astep, int m, double* b, size_t bstep, int n)
+{
+ return CholImpl(A, astep, m, b, bstep, n);
+}
+
+//=============================================================================
+// for compatibility with 3.0
+
+int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n)
+{
+ return LUImpl(A, astep, m, b, bstep, n, FLT_EPSILON*10);
+}
+
+int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n)
+{
+ return LUImpl(A, astep, m, b, bstep, n, DBL_EPSILON*100);
+}
+
bool Cholesky(float* A, size_t astep, int m, float* b, size_t bstep, int n)
{
return CholImpl(A, astep, m, b, bstep, n);
return CholImpl(A, astep, m, b, bstep, n);
}
+
}}
void merge8u(const uchar** src, uchar* dst, int len, int cn )
{
+ CALL_HAL(merge8u, cv_hal_merge8u, src, dst, len, cn)
merge_(src, dst, len, cn);
}
void merge16u(const ushort** src, ushort* dst, int len, int cn )
{
+ CALL_HAL(merge16u, cv_hal_merge16u, src, dst, len, cn)
merge_(src, dst, len, cn);
}
void merge32s(const int** src, int* dst, int len, int cn )
{
+ CALL_HAL(merge32s, cv_hal_merge32s, src, dst, len, cn)
merge_(src, dst, len, cn);
}
void merge64s(const int64** src, int64* dst, int len, int cn )
{
+ CALL_HAL(merge64s, cv_hal_merge64s, src, dst, len, cn)
merge_(src, dst, len, cn);
}
#include "opencv2/core/ocl.hpp"
#endif
-#include "opencv2/hal.hpp"
-
#include <assert.h>
#include <ctype.h>
#include <float.h>
#include <stdlib.h>
#include <string.h>
+#include <algorithm>
+#include <cmath>
+#include <cstdlib>
+#include <limits>
+#include <float.h>
+#include <cstring>
+#include <cassert>
+
+#define USE_SSE2 (cv::checkHardwareSupport(CV_CPU_SSE))
+#define USE_SSE4_2 (cv::checkHardwareSupport(CV_CPU_SSE4_2))
+#define USE_AVX (cv::checkHardwareSupport(CV_CPU_AVX))
+#define USE_AVX2 (cv::checkHardwareSupport(CV_CPU_AVX2))
+
+#include "opencv2/core/hal/hal.hpp"
+#include "opencv2/core/hal/intrin.hpp"
+#include "opencv2/core/sse_utils.hpp"
+#include "opencv2/core/neon_utils.hpp"
+
+#include "arithm_core.hpp"
+#include "hal_replacement.hpp"
+
#ifdef HAVE_TEGRA_OPTIMIZATION
#include "opencv2/core/core_tegra.hpp"
#else
namespace cv
{
+// -128.f ... 255.f
+extern const float g_8x32fTab[];
+#define CV_8TO32F(x) cv::g_8x32fTab[(x)+128]
+
+extern const ushort g_8x16uSqrTab[];
+#define CV_SQR_8U(x) cv::g_8x16uSqrTab[(x)+255]
+
+extern const uchar g_Saturate8u[];
+#define CV_FAST_CAST_8U(t) (assert(-256 <= (t) && (t) <= 512), cv::g_Saturate8u[(t)+256])
+#define CV_MIN_8U(a,b) ((a) - CV_FAST_CAST_8U((a) - (b)))
+#define CV_MAX_8U(a,b) ((a) + CV_FAST_CAST_8U((b) - (a)))
+
+template<> inline uchar OpAdd<uchar>::operator ()(uchar a, uchar b) const
+{ return CV_FAST_CAST_8U(a + b); }
+
+template<> inline uchar OpSub<uchar>::operator ()(uchar a, uchar b) const
+{ return CV_FAST_CAST_8U(a - b); }
+
+template<> inline short OpAbsDiff<short>::operator ()(short a, short b) const
+{ return saturate_cast<short>(std::abs(a - b)); }
+
+template<> inline schar OpAbsDiff<schar>::operator ()(schar a, schar b) const
+{ return saturate_cast<schar>(std::abs(a - b)); }
+
+template<> inline uchar OpMin<uchar>::operator ()(uchar a, uchar b) const { return CV_MIN_8U(a, b); }
+
+template<> inline uchar OpMax<uchar>::operator ()(uchar a, uchar b) const { return CV_MAX_8U(a, b); }
+
typedef void (*BinaryFunc)(const uchar* src1, size_t step1,
const uchar* src2, size_t step2,
uchar* dst, size_t step, Size sz,
/* maximal average node_count/hash_size ratio beyond which hash table is resized */
#define CV_SPARSE_HASH_RATIO 3
-
-
-// -128.f ... 255.f
-extern const float g_8x32fTab[];
-#define CV_8TO32F(x) cv::g_8x32fTab[(x)+128]
-
-extern const ushort g_8x16uSqrTab[];
-#define CV_SQR_8U(x) cv::g_8x16uSqrTab[(x)+255]
-
-extern const uchar g_Saturate8u[];
-#define CV_FAST_CAST_8U(t) (assert(-256 <= (t) && (t) <= 512), cv::g_Saturate8u[(t)+256])
-#define CV_MIN_8U(a,b) ((a) - CV_FAST_CAST_8U((a) - (b)))
-#define CV_MAX_8U(a,b) ((a) + CV_FAST_CAST_8U((b) - (a)))
-
-
#if defined WIN32 || defined _WIN32
void deleteThreadAllocData();
#endif
}
-#include "opencv2/hal/intrin.hpp"
-
#endif /*_CXCORE_INTERNAL_H_*/
void split8u(const uchar* src, uchar** dst, int len, int cn )
{
+ CALL_HAL(split8u, cv_hal_split8u, src,dst, len, cn)
split_(src, dst, len, cn);
}
void split16u(const ushort* src, ushort** dst, int len, int cn )
{
+ CALL_HAL(split16u, cv_hal_split16u, src,dst, len, cn)
split_(src, dst, len, cn);
}
void split32s(const int* src, int** dst, int len, int cn )
{
+ CALL_HAL(split32s, cv_hal_split32s, src,dst, len, cn)
split_(src, dst, len, cn);
}
void split64s(const int64* src, int64** dst, int len, int cn )
{
+ CALL_HAL(split64s, cv_hal_split64s, src,dst, len, cn)
split_(src, dst, len, cn);
}
return !maskarr ? cv::norm(a, b, normType) : cv::norm(a, b, normType, mask);
}
+
+namespace cv { namespace hal {
+
+static const uchar popCountTable[] =
+{
+ 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
+};
+
+static const uchar popCountTable2[] =
+{
+ 0, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
+ 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
+ 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
+ 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
+ 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
+ 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
+ 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
+ 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4
+};
+
+static const uchar popCountTable4[] =
+{
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
+};
+
+int normHamming(const uchar* a, int n)
+{
+ int i = 0;
+ int result = 0;
+#if CV_NEON
+ {
+ uint32x4_t bits = vmovq_n_u32(0);
+ for (; i <= n - 16; i += 16) {
+ uint8x16_t A_vec = vld1q_u8 (a + i);
+ uint8x16_t bitsSet = vcntq_u8 (A_vec);
+ uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet);
+ uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8);
+ bits = vaddq_u32(bits, bitSet4);
+ }
+ uint64x2_t bitSet2 = vpaddlq_u32 (bits);
+ result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0);
+ result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2);
+ }
+#endif
+ for( ; i <= n - 4; i += 4 )
+ result += popCountTable[a[i]] + popCountTable[a[i+1]] +
+ popCountTable[a[i+2]] + popCountTable[a[i+3]];
+ for( ; i < n; i++ )
+ result += popCountTable[a[i]];
+ return result;
+}
+
+int normHamming(const uchar* a, const uchar* b, int n)
+{
+ int i = 0;
+ int result = 0;
+#if CV_NEON
+ {
+ uint32x4_t bits = vmovq_n_u32(0);
+ for (; i <= n - 16; i += 16) {
+ uint8x16_t A_vec = vld1q_u8 (a + i);
+ uint8x16_t B_vec = vld1q_u8 (b + i);
+ uint8x16_t AxorB = veorq_u8 (A_vec, B_vec);
+ uint8x16_t bitsSet = vcntq_u8 (AxorB);
+ uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet);
+ uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8);
+ bits = vaddq_u32(bits, bitSet4);
+ }
+ uint64x2_t bitSet2 = vpaddlq_u32 (bits);
+ result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0);
+ result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2);
+ }
+#endif
+ for( ; i <= n - 4; i += 4 )
+ result += popCountTable[a[i] ^ b[i]] + popCountTable[a[i+1] ^ b[i+1]] +
+ popCountTable[a[i+2] ^ b[i+2]] + popCountTable[a[i+3] ^ b[i+3]];
+ for( ; i < n; i++ )
+ result += popCountTable[a[i] ^ b[i]];
+ return result;
+}
+
+int normHamming(const uchar* a, int n, int cellSize)
+{
+ if( cellSize == 1 )
+ return normHamming(a, n);
+ const uchar* tab = 0;
+ if( cellSize == 2 )
+ tab = popCountTable2;
+ else if( cellSize == 4 )
+ tab = popCountTable4;
+ else
+ return -1;
+ int i = 0;
+ int result = 0;
+#if CV_ENABLE_UNROLLED
+ for( ; i <= n - 4; i += 4 )
+ result += tab[a[i]] + tab[a[i+1]] + tab[a[i+2]] + tab[a[i+3]];
+#endif
+ for( ; i < n; i++ )
+ result += tab[a[i]];
+ return result;
+}
+
+int normHamming(const uchar* a, const uchar* b, int n, int cellSize)
+{
+ if( cellSize == 1 )
+ return normHamming(a, b, n);
+ const uchar* tab = 0;
+ if( cellSize == 2 )
+ tab = popCountTable2;
+ else if( cellSize == 4 )
+ tab = popCountTable4;
+ else
+ return -1;
+ int i = 0;
+ int result = 0;
+ #if CV_ENABLE_UNROLLED
+ for( ; i <= n - 4; i += 4 )
+ result += tab[a[i] ^ b[i]] + tab[a[i+1] ^ b[i+1]] +
+ tab[a[i+2] ^ b[i+2]] + tab[a[i+3] ^ b[i+3]];
+ #endif
+ for( ; i < n; i++ )
+ result += tab[a[i] ^ b[i]];
+ return result;
+}
+
+float normL2Sqr_(const float* a, const float* b, int n)
+{
+ int j = 0; float d = 0.f;
+#if CV_SSE
+ float CV_DECL_ALIGNED(16) buf[4];
+ __m128 d0 = _mm_setzero_ps(), d1 = _mm_setzero_ps();
+
+ for( ; j <= n - 8; j += 8 )
+ {
+ __m128 t0 = _mm_sub_ps(_mm_loadu_ps(a + j), _mm_loadu_ps(b + j));
+ __m128 t1 = _mm_sub_ps(_mm_loadu_ps(a + j + 4), _mm_loadu_ps(b + j + 4));
+ d0 = _mm_add_ps(d0, _mm_mul_ps(t0, t0));
+ d1 = _mm_add_ps(d1, _mm_mul_ps(t1, t1));
+ }
+ _mm_store_ps(buf, _mm_add_ps(d0, d1));
+ d = buf[0] + buf[1] + buf[2] + buf[3];
+#endif
+ {
+ for( ; j <= n - 4; j += 4 )
+ {
+ float t0 = a[j] - b[j], t1 = a[j+1] - b[j+1], t2 = a[j+2] - b[j+2], t3 = a[j+3] - b[j+3];
+ d += t0*t0 + t1*t1 + t2*t2 + t3*t3;
+ }
+ }
+
+ for( ; j < n; j++ )
+ {
+ float t = a[j] - b[j];
+ d += t*t;
+ }
+ return d;
+}
+
+
+float normL1_(const float* a, const float* b, int n)
+{
+ int j = 0; float d = 0.f;
+#if CV_SSE
+ float CV_DECL_ALIGNED(16) buf[4];
+ static const int CV_DECL_ALIGNED(16) absbuf[4] = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff};
+ __m128 d0 = _mm_setzero_ps(), d1 = _mm_setzero_ps();
+ __m128 absmask = _mm_load_ps((const float*)absbuf);
+
+ for( ; j <= n - 8; j += 8 )
+ {
+ __m128 t0 = _mm_sub_ps(_mm_loadu_ps(a + j), _mm_loadu_ps(b + j));
+ __m128 t1 = _mm_sub_ps(_mm_loadu_ps(a + j + 4), _mm_loadu_ps(b + j + 4));
+ d0 = _mm_add_ps(d0, _mm_and_ps(t0, absmask));
+ d1 = _mm_add_ps(d1, _mm_and_ps(t1, absmask));
+ }
+ _mm_store_ps(buf, _mm_add_ps(d0, d1));
+ d = buf[0] + buf[1] + buf[2] + buf[3];
+#elif CV_NEON
+ float32x4_t v_sum = vdupq_n_f32(0.0f);
+ for ( ; j <= n - 4; j += 4)
+ v_sum = vaddq_f32(v_sum, vabdq_f32(vld1q_f32(a + j), vld1q_f32(b + j)));
+
+ float CV_DECL_ALIGNED(16) buf[4];
+ vst1q_f32(buf, v_sum);
+ d = buf[0] + buf[1] + buf[2] + buf[3];
+#endif
+ {
+ for( ; j <= n - 4; j += 4 )
+ {
+ d += std::abs(a[j] - b[j]) + std::abs(a[j+1] - b[j+1]) +
+ std::abs(a[j+2] - b[j+2]) + std::abs(a[j+3] - b[j+3]);
+ }
+ }
+
+ for( ; j < n; j++ )
+ d += std::abs(a[j] - b[j]);
+ return d;
+}
+
+int normL1_(const uchar* a, const uchar* b, int n)
+{
+ int j = 0, d = 0;
+#if CV_SSE
+ __m128i d0 = _mm_setzero_si128();
+
+ for( ; j <= n - 16; j += 16 )
+ {
+ __m128i t0 = _mm_loadu_si128((const __m128i*)(a + j));
+ __m128i t1 = _mm_loadu_si128((const __m128i*)(b + j));
+
+ d0 = _mm_add_epi32(d0, _mm_sad_epu8(t0, t1));
+ }
+
+ for( ; j <= n - 4; j += 4 )
+ {
+ __m128i t0 = _mm_cvtsi32_si128(*(const int*)(a + j));
+ __m128i t1 = _mm_cvtsi32_si128(*(const int*)(b + j));
+
+ d0 = _mm_add_epi32(d0, _mm_sad_epu8(t0, t1));
+ }
+ d = _mm_cvtsi128_si32(_mm_add_epi32(d0, _mm_unpackhi_epi64(d0, d0)));
+#elif CV_NEON
+ uint32x4_t v_sum = vdupq_n_u32(0.0f);
+ for ( ; j <= n - 16; j += 16)
+ {
+ uint8x16_t v_dst = vabdq_u8(vld1q_u8(a + j), vld1q_u8(b + j));
+ uint16x8_t v_low = vmovl_u8(vget_low_u8(v_dst)), v_high = vmovl_u8(vget_high_u8(v_dst));
+ v_sum = vaddq_u32(v_sum, vaddl_u16(vget_low_u16(v_low), vget_low_u16(v_high)));
+ v_sum = vaddq_u32(v_sum, vaddl_u16(vget_high_u16(v_low), vget_high_u16(v_high)));
+ }
+
+ uint CV_DECL_ALIGNED(16) buf[4];
+ vst1q_u32(buf, v_sum);
+ d = buf[0] + buf[1] + buf[2] + buf[3];
+#endif
+ {
+ for( ; j <= n - 4; j += 4 )
+ {
+ d += std::abs(a[j] - b[j]) + std::abs(a[j+1] - b[j+1]) +
+ std::abs(a[j+2] - b[j+2]) + std::abs(a[j+3] - b[j+3]);
+ }
+ }
+ for( ; j < n; j++ )
+ d += std::abs(a[j] - b[j]);
+ return d;
+}
+
+}} //cv::hal
#undef max
#undef abs
#include <tchar.h>
+#if defined _MSC_VER
+ #if _MSC_VER >= 1400
+ #include <intrin.h>
+ #elif defined _M_IX86
+ static void __cpuid(int* cpuid_data, int)
+ {
+ __asm
+ {
+ push ebx
+ push edi
+ mov edi, cpuid_data
+ mov eax, 1
+ cpuid
+ mov [edi], eax
+ mov [edi + 4], ebx
+ mov [edi + 8], ecx
+ mov [edi + 12], edx
+ pop edi
+ pop ebx
+ }
+ }
+ static void __cpuidex(int* cpuid_data, int, int)
+ {
+ __asm
+ {
+ push edi
+ mov edi, cpuid_data
+ mov eax, 7
+ mov ecx, 0
+ cpuid
+ mov [edi], eax
+ mov [edi + 4], ebx
+ mov [edi + 8], ecx
+ mov [edi + 12], edx
+ pop edi
+ }
+ }
+ #endif
+#endif
#ifdef WINRT
#include <wrl/client.h>
msg = format("%s:%d: error: (%d) %s\n", file.c_str(), line, code, err.c_str());
}
+struct HWFeatures
+{
+ enum { MAX_FEATURE = CV_HARDWARE_MAX_FEATURE };
+
+ HWFeatures(void)
+ {
+ memset( have, 0, sizeof(have) );
+ x86_family = 0;
+ }
+
+ static HWFeatures initialize(void)
+ {
+ HWFeatures f;
+ int cpuid_data[4] = { 0, 0, 0, 0 };
+
+ #if defined _MSC_VER && (defined _M_IX86 || defined _M_X64)
+ __cpuid(cpuid_data, 1);
+ #elif defined __GNUC__ && (defined __i386__ || defined __x86_64__)
+ #ifdef __x86_64__
+ asm __volatile__
+ (
+ "movl $1, %%eax\n\t"
+ "cpuid\n\t"
+ :[eax]"=a"(cpuid_data[0]),[ebx]"=b"(cpuid_data[1]),[ecx]"=c"(cpuid_data[2]),[edx]"=d"(cpuid_data[3])
+ :
+ : "cc"
+ );
+ #else
+ asm volatile
+ (
+ "pushl %%ebx\n\t"
+ "movl $1,%%eax\n\t"
+ "cpuid\n\t"
+ "popl %%ebx\n\t"
+ : "=a"(cpuid_data[0]), "=c"(cpuid_data[2]), "=d"(cpuid_data[3])
+ :
+ : "cc"
+ );
+ #endif
+ #endif
+
+ f.x86_family = (cpuid_data[0] >> 8) & 15;
+ if( f.x86_family >= 6 )
+ {
+ f.have[CV_CPU_MMX] = (cpuid_data[3] & (1 << 23)) != 0;
+ f.have[CV_CPU_SSE] = (cpuid_data[3] & (1<<25)) != 0;
+ f.have[CV_CPU_SSE2] = (cpuid_data[3] & (1<<26)) != 0;
+ f.have[CV_CPU_SSE3] = (cpuid_data[2] & (1<<0)) != 0;
+ f.have[CV_CPU_SSSE3] = (cpuid_data[2] & (1<<9)) != 0;
+ f.have[CV_CPU_FMA3] = (cpuid_data[2] & (1<<12)) != 0;
+ f.have[CV_CPU_SSE4_1] = (cpuid_data[2] & (1<<19)) != 0;
+ f.have[CV_CPU_SSE4_2] = (cpuid_data[2] & (1<<20)) != 0;
+ f.have[CV_CPU_POPCNT] = (cpuid_data[2] & (1<<23)) != 0;
+ f.have[CV_CPU_AVX] = (((cpuid_data[2] & (1<<28)) != 0)&&((cpuid_data[2] & (1<<27)) != 0));//OS uses XSAVE_XRSTORE and CPU support AVX
+
+ // make the second call to the cpuid command in order to get
+ // information about extended features like AVX2
+ #if defined _MSC_VER && (defined _M_IX86 || defined _M_X64)
+ __cpuidex(cpuid_data, 7, 0);
+ #elif defined __GNUC__ && (defined __i386__ || defined __x86_64__)
+ #ifdef __x86_64__
+ asm __volatile__
+ (
+ "movl $7, %%eax\n\t"
+ "movl $0, %%ecx\n\t"
+ "cpuid\n\t"
+ :[eax]"=a"(cpuid_data[0]),[ebx]"=b"(cpuid_data[1]),[ecx]"=c"(cpuid_data[2]),[edx]"=d"(cpuid_data[3])
+ :
+ : "cc"
+ );
+ #else
+ asm volatile
+ (
+ "pushl %%ebx\n\t"
+ "movl $7,%%eax\n\t"
+ "movl $0,%%ecx\n\t"
+ "cpuid\n\t"
+ "movl %%ebx, %0\n\t"
+ "popl %%ebx\n\t"
+ : "=r"(cpuid_data[1]), "=c"(cpuid_data[2])
+ :
+ : "cc"
+ );
+ #endif
+ #endif
+ f.have[CV_CPU_AVX2] = (cpuid_data[1] & (1<<5)) != 0;
+
+ f.have[CV_CPU_AVX_512F] = (cpuid_data[1] & (1<<16)) != 0;
+ f.have[CV_CPU_AVX_512DQ] = (cpuid_data[1] & (1<<17)) != 0;
+ f.have[CV_CPU_AVX_512IFMA512] = (cpuid_data[1] & (1<<21)) != 0;
+ f.have[CV_CPU_AVX_512PF] = (cpuid_data[1] & (1<<26)) != 0;
+ f.have[CV_CPU_AVX_512ER] = (cpuid_data[1] & (1<<27)) != 0;
+ f.have[CV_CPU_AVX_512CD] = (cpuid_data[1] & (1<<28)) != 0;
+ f.have[CV_CPU_AVX_512BW] = (cpuid_data[1] & (1<<30)) != 0;
+ f.have[CV_CPU_AVX_512VL] = (cpuid_data[1] & (1<<31)) != 0;
+ f.have[CV_CPU_AVX_512VBMI] = (cpuid_data[2] & (1<<1)) != 0;
+ }
+
+ #if defined ANDROID || defined __linux__
+ #ifdef __aarch64__
+ f.have[CV_CPU_NEON] = true;
+ #else
+ int cpufile = open("/proc/self/auxv", O_RDONLY);
+
+ if (cpufile >= 0)
+ {
+ Elf32_auxv_t auxv;
+ const size_t size_auxv_t = sizeof(auxv);
+
+ while ((size_t)read(cpufile, &auxv, size_auxv_t) == size_auxv_t)
+ {
+ if (auxv.a_type == AT_HWCAP)
+ {
+ f.have[CV_CPU_NEON] = (auxv.a_un.a_val & 4096) != 0;
+ break;
+ }
+ }
+
+ close(cpufile);
+ }
+ #endif
+ #elif (defined __clang__ || defined __APPLE__) && (defined __ARM_NEON__ || (defined __ARM_NEON && defined __aarch64__))
+ f.have[CV_CPU_NEON] = true;
+ #endif
+
+ return f;
+ }
+
+ int x86_family;
+ bool have[MAX_FEATURE+1];
+};
+
+static HWFeatures featuresEnabled = HWFeatures::initialize(), featuresDisabled = HWFeatures();
+static HWFeatures* currentFeatures = &featuresEnabled;
+
bool checkHardwareSupport(int feature)
{
CV_DbgAssert( 0 <= feature && feature <= CV_HARDWARE_MAX_FEATURE );
- return cv::hal::checkHardwareSupport(feature);
+ return currentFeatures->have[feature];
}
+
+volatile bool useOptimizedFlag = true;
+
void setUseOptimized( bool flag )
{
- cv::hal::setUseOptimized(flag);
+ useOptimizedFlag = flag;
+ currentFeatures = flag ? &featuresEnabled : &featuresDisabled;
ipp::setUseIPP(flag);
#ifdef HAVE_OPENCL
bool useOptimized(void)
{
- return cv::hal::useOptimized();
+ return useOptimizedFlag;
}
int64 getTickCount(void)
CV_IMPL int cvCheckHardwareSupport(int feature)
{
CV_DbgAssert( 0 <= feature && feature <= CV_HARDWARE_MAX_FEATURE );
- return cv::hal::checkHardwareSupport(feature);
+ return cv::currentFeatures->have[feature];
}
CV_IMPL int cvUseOptimized( int flag )
{
- int prevMode = cv::useOptimized();
+ int prevMode = cv::useOptimizedFlag;
cv::setUseOptimized( flag != 0 );
return prevMode;
}
//M*/
#include "test_precomp.hpp"
-#include "opencv2/hal.hpp"
using namespace cv;
{
case HAL_EXP:
if( depth == CV_32F )
- hal::exp(src.ptr<float>(), dst.ptr<float>(), n);
+ hal::exp32f(src.ptr<float>(), dst.ptr<float>(), n);
else
- hal::exp(src.ptr<double>(), dst.ptr<double>(), n);
+ hal::exp64f(src.ptr<double>(), dst.ptr<double>(), n);
break;
case HAL_LOG:
if( depth == CV_32F )
- hal::log(src.ptr<float>(), dst.ptr<float>(), n);
+ hal::log32f(src.ptr<float>(), dst.ptr<float>(), n);
else
- hal::log(src.ptr<double>(), dst.ptr<double>(), n);
+ hal::log64f(src.ptr<double>(), dst.ptr<double>(), n);
break;
case HAL_SQRT:
if( depth == CV_32F )
- hal::sqrt(src.ptr<float>(), dst.ptr<float>(), n);
+ hal::sqrt32f(src.ptr<float>(), dst.ptr<float>(), n);
else
- hal::sqrt(src.ptr<double>(), dst.ptr<double>(), n);
+ hal::sqrt64f(src.ptr<double>(), dst.ptr<double>(), n);
break;
default:
CV_Error(Error::StsBadArg, "unknown function");
{
case HAL_LU:
if( depth == CV_32F )
- hal::LU(a.ptr<float>(), a.step, size, x.ptr<float>(), x.step, 1);
+ hal::LU32f(a.ptr<float>(), a.step, size, x.ptr<float>(), x.step, 1);
else
- hal::LU(a.ptr<double>(), a.step, size, x.ptr<double>(), x.step, 1);
+ hal::LU64f(a.ptr<double>(), a.step, size, x.ptr<double>(), x.step, 1);
break;
case HAL_CHOL:
if( depth == CV_32F )
- hal::Cholesky(a.ptr<float>(), a.step, size, x.ptr<float>(), x.step, 1);
+ hal::Cholesky32f(a.ptr<float>(), a.step, size, x.ptr<float>(), x.step, 1);
else
- hal::Cholesky(a.ptr<double>(), a.step, size, x.ptr<double>(), x.step, 1);
+ hal::Cholesky64f(a.ptr<double>(), a.step, size, x.ptr<double>(), x.step, 1);
break;
default:
CV_Error(Error::StsBadArg, "unknown function");
#ifndef _TEST_UTILS_HPP_
#define _TEST_UTILS_HPP_
-#include "opencv2/hal/intrin.hpp"
+#include "opencv2/core/hal/intrin.hpp"
#include "opencv2/ts.hpp"
#include <ostream>
#include <algorithm>
#include "opencv2/ts.hpp"
#include "opencv2/core/core_c.h"
+#include "opencv2/core/cvdef.h"
#include "opencv2/core/private.hpp"
+#include "opencv2/core/hal/hal.hpp"
+#include "opencv2/core/hal/intrin.hpp"
#endif
#include "opencv2/core/utility.hpp"
#include "opencv2/core/private.hpp"
#include "opencv2/core/ocl.hpp"
+#include "opencv2/core/hal/hal.hpp"
#include <algorithm>
+++ /dev/null
-set(the_description "The Hardware Acceleration Layer (HAL) module")
-
-set(OPENCV_MODULE_TYPE STATIC)
-
-if(OPENCV_HAL_HEADERS AND OPENCV_HAL_LIBS)
- set(OPENCV_HAL_HEADERS_INCLUDES "#include \"${OPENCV_HAL_HEADERS}\"")
- set(DEPS "${OPENCV_HAL_LIBS}")
-else()
- set(OPENCV_HAL_HEADERS_INCLUDES "// using default HAL")
- set(DEPS "")
-endif()
-
-configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/custom_hal.hpp.in" "${CMAKE_BINARY_DIR}/custom_hal.hpp" @ONLY)
-
-if(UNIX)
- if(CMAKE_COMPILER_IS_GNUCXX OR CV_ICC)
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
- endif()
-endif()
-
-ocv_define_module(hal ${DEPS})
+++ /dev/null
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-// By downloading, copying, installing or using the software you agree to this license.
-// If you do not agree to this license, do not download, install,
-// copy or use the software.
-//
-//
-// License Agreement
-// For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
-// Copyright (C) 2015, Itseez Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-// * Redistribution's of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// * Redistribution's in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// * The name of the copyright holders may not be used to endorse or promote products
-// derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifndef __OPENCV_HAL_HPP__
-#define __OPENCV_HAL_HPP__
-
-#include "opencv2/hal/defs.h"
-#include "opencv2/hal/interface.hpp"
-
-/**
- @defgroup hal Hardware Acceleration Layer
- @{
- @defgroup hal_intrin Universal intrinsics
- @{
- @defgroup hal_intrin_impl Private implementation helpers
- @}
- @defgroup hal_utils Platform-dependent utils
- @}
-*/
-
-namespace cv { namespace hal {
-
-//! @addtogroup hal
-//! @{
-
-class Failure
-{
-public:
- Failure(int code_ = Error::Unknown) : code(code_) {}
-public:
- int code;
-};
-
-int normHamming(const uchar* a, int n);
-int normHamming(const uchar* a, const uchar* b, int n);
-
-int normHamming(const uchar* a, int n, int cellSize);
-int normHamming(const uchar* a, const uchar* b, int n, int cellSize);
-
-//////////////////////////////// low-level functions ////////////////////////////////
-
-int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n);
-int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n);
-bool Cholesky(float* A, size_t astep, int m, float* b, size_t bstep, int n);
-bool Cholesky(double* A, size_t astep, int m, double* b, size_t bstep, int n);
-
-int normL1_(const uchar* a, const uchar* b, int n);
-float normL1_(const float* a, const float* b, int n);
-float normL2Sqr_(const float* a, const float* b, int n);
-
-void exp(const float* src, float* dst, int n);
-void exp(const double* src, double* dst, int n);
-void log(const float* src, float* dst, int n);
-void log(const double* src, double* dst, int n);
-
-void fastAtan2(const float* y, const float* x, float* dst, int n, bool angleInDegrees);
-void magnitude(const float* x, const float* y, float* dst, int n);
-void magnitude(const double* x, const double* y, double* dst, int n);
-void sqrt(const float* src, float* dst, int len);
-void sqrt(const double* src, double* dst, int len);
-void invSqrt(const float* src, float* dst, int len);
-void invSqrt(const double* src, double* dst, int len);
-
-void split8u(const uchar* src, uchar** dst, int len, int cn );
-void split16u(const ushort* src, ushort** dst, int len, int cn );
-void split32s(const int* src, int** dst, int len, int cn );
-void split64s(const int64* src, int64** dst, int len, int cn );
-
-void merge8u(const uchar** src, uchar* dst, int len, int cn );
-void merge16u(const ushort** src, ushort* dst, int len, int cn );
-void merge32s(const int** src, int* dst, int len, int cn );
-void merge64s(const int64** src, int64* dst, int len, int cn );
-
-void add8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
-void add8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
-void add16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
-void add16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
-void add32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
-void add32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
-void add64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
-
-void sub8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
-void sub8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
-void sub16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
-void sub16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
-void sub32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
-void sub32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
-void sub64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
-
-void max8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
-void max8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
-void max16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
-void max16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
-void max32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
-void max32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
-void max64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
-
-void min8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
-void min8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
-void min16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
-void min16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
-void min32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
-void min32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
-void min64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
-
-void absdiff8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
-void absdiff8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
-void absdiff16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
-void absdiff16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
-void absdiff32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
-void absdiff32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
-void absdiff64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
-
-void and8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
-void or8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
-void xor8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
-void not8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
-
-void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
-void cmp8s(const schar* src1, size_t step1, const schar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
-void cmp16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
-void cmp16s(const short* src1, size_t step1, const short* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
-void cmp32s(const int* src1, size_t step1, const int* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
-void cmp32f(const float* src1, size_t step1, const float* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
-void cmp64f(const double* src1, size_t step1, const double* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
-
-void mul8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale);
-void mul8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale);
-void mul16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale);
-void mul16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scale);
-void mul32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scale);
-void mul32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scale);
-void mul64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scale);
-
-void div8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale);
-void div8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale);
-void div16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale);
-void div16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scale);
-void div32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scale);
-void div32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scale);
-void div64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scale);
-
-void recip8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale);
-void recip8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale);
-void recip16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale);
-void recip16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scale);
-void recip32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scale);
-void recip32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scale);
-void recip64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scale);
-
-void addWeighted8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _scalars );
-void addWeighted8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scalars );
-void addWeighted16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scalars );
-void addWeighted16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scalars );
-void addWeighted32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scalars );
-void addWeighted32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scalars );
-void addWeighted64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scalars );
-//! @}
-
-}} //cv::hal
-
-namespace cv {
-
-template<typename T1, typename T2=T1, typename T3=T1> struct OpAdd
-{
- typedef T1 type1;
- typedef T2 type2;
- typedef T3 rtype;
- T3 operator ()(const T1 a, const T2 b) const { return saturate_cast<T3>(a + b); }
-};
-
-template<typename T1, typename T2=T1, typename T3=T1> struct OpSub
-{
- typedef T1 type1;
- typedef T2 type2;
- typedef T3 rtype;
- T3 operator ()(const T1 a, const T2 b) const { return saturate_cast<T3>(a - b); }
-};
-
-template<typename T1, typename T2=T1, typename T3=T1> struct OpRSub
-{
- typedef T1 type1;
- typedef T2 type2;
- typedef T3 rtype;
- T3 operator ()(const T1 a, const T2 b) const { return saturate_cast<T3>(b - a); }
-};
-
-template<typename T> struct OpMin
-{
- typedef T type1;
- typedef T type2;
- typedef T rtype;
- T operator ()(const T a, const T b) const { return std::min(a, b); }
-};
-
-template<typename T> struct OpMax
-{
- typedef T type1;
- typedef T type2;
- typedef T rtype;
- T operator ()(const T a, const T b) const { return std::max(a, b); }
-};
-
-template<typename T> struct OpAbsDiff
-{
- typedef T type1;
- typedef T type2;
- typedef T rtype;
- T operator()(T a, T b) const { return a > b ? a - b : b - a; }
-};
-
-template<typename T> struct OpAnd
-{
- typedef T type1;
- typedef T type2;
- typedef T rtype;
- T operator()( T a, T b ) const { return a & b; }
-};
-
-template<typename T> struct OpOr
-{
- typedef T type1;
- typedef T type2;
- typedef T rtype;
- T operator()( T a, T b ) const { return a | b; }
-};
-
-template<typename T> struct OpXor
-{
- typedef T type1;
- typedef T type2;
- typedef T rtype;
- T operator()( T a, T b ) const { return a ^ b; }
-};
-
-template<typename T> struct OpNot
-{
- typedef T type1;
- typedef T type2;
- typedef T rtype;
- T operator()( T a, T ) const { return ~a; }
-};
-
-}
-
-#endif //__OPENCV_HAL_HPP__
+++ /dev/null
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-// By downloading, copying, installing or using the software you agree to this license.
-// If you do not agree to this license, do not download, install,
-// copy or use the software.
-//
-//
-// License Agreement
-// For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
-// Copyright (C) 2015, Itseez Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-// * Redistribution's of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// * Redistribution's in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// * The name of the copyright holders may not be used to endorse or promote products
-// derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifndef __OPENCV_DEF_H__
-#define __OPENCV_DEF_H__
-
-//! @addtogroup hal_utils
-//! @{
-
-#if !defined _CRT_SECURE_NO_DEPRECATE && defined _MSC_VER && _MSC_VER > 1300
-# define _CRT_SECURE_NO_DEPRECATE /* to avoid multiple Visual Studio warnings */
-#endif
-
-#include <limits.h>
-#include "opencv2/hal/interface.hpp"
-
-#if defined __ICL
-# define CV_ICC __ICL
-#elif defined __ICC
-# define CV_ICC __ICC
-#elif defined __ECL
-# define CV_ICC __ECL
-#elif defined __ECC
-# define CV_ICC __ECC
-#elif defined __INTEL_COMPILER
-# define CV_ICC __INTEL_COMPILER
-#endif
-
-#ifndef CV_INLINE
-# if defined __cplusplus
-# define CV_INLINE static inline
-# elif defined _MSC_VER
-# define CV_INLINE __inline
-# else
-# define CV_INLINE static
-# endif
-#endif
-
-#if defined CV_ICC && !defined CV_ENABLE_UNROLLED
-# define CV_ENABLE_UNROLLED 0
-#else
-# define CV_ENABLE_UNROLLED 1
-#endif
-
-#ifdef __GNUC__
-# define CV_DECL_ALIGNED(x) __attribute__ ((aligned (x)))
-#elif defined _MSC_VER
-# define CV_DECL_ALIGNED(x) __declspec(align(x))
-#else
-# define CV_DECL_ALIGNED(x)
-#endif
-
-/* CPU features and intrinsics support */
-#define CV_CPU_NONE 0
-#define CV_CPU_MMX 1
-#define CV_CPU_SSE 2
-#define CV_CPU_SSE2 3
-#define CV_CPU_SSE3 4
-#define CV_CPU_SSSE3 5
-#define CV_CPU_SSE4_1 6
-#define CV_CPU_SSE4_2 7
-#define CV_CPU_POPCNT 8
-
-#define CV_CPU_AVX 10
-#define CV_CPU_AVX2 11
-#define CV_CPU_FMA3 12
-
-#define CV_CPU_AVX_512F 13
-#define CV_CPU_AVX_512BW 14
-#define CV_CPU_AVX_512CD 15
-#define CV_CPU_AVX_512DQ 16
-#define CV_CPU_AVX_512ER 17
-#define CV_CPU_AVX_512IFMA512 18
-#define CV_CPU_AVX_512PF 19
-#define CV_CPU_AVX_512VBMI 20
-#define CV_CPU_AVX_512VL 21
-
-#define CV_CPU_NEON 100
-
-// when adding to this list remember to update the following enum
-#define CV_HARDWARE_MAX_FEATURE 255
-
-/** @brief Available CPU features.
-*/
-enum CpuFeatures {
- CPU_MMX = 1,
- CPU_SSE = 2,
- CPU_SSE2 = 3,
- CPU_SSE3 = 4,
- CPU_SSSE3 = 5,
- CPU_SSE4_1 = 6,
- CPU_SSE4_2 = 7,
- CPU_POPCNT = 8,
-
- CPU_AVX = 10,
- CPU_AVX2 = 11,
- CPU_FMA3 = 12,
-
- CPU_AVX_512F = 13,
- CPU_AVX_512BW = 14,
- CPU_AVX_512CD = 15,
- CPU_AVX_512DQ = 16,
- CPU_AVX_512ER = 17,
- CPU_AVX_512IFMA512 = 18,
- CPU_AVX_512PF = 19,
- CPU_AVX_512VBMI = 20,
- CPU_AVX_512VL = 21,
-
- CPU_NEON = 100
-};
-
-// do not include SSE/AVX/NEON headers for NVCC compiler
-#ifndef __CUDACC__
-
-#if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2)
-# include <emmintrin.h>
-# define CV_MMX 1
-# define CV_SSE 1
-# define CV_SSE2 1
-# if defined __SSE3__ || (defined _MSC_VER && _MSC_VER >= 1500)
-# include <pmmintrin.h>
-# define CV_SSE3 1
-# endif
-# if defined __SSSE3__ || (defined _MSC_VER && _MSC_VER >= 1500)
-# include <tmmintrin.h>
-# define CV_SSSE3 1
-# endif
-# if defined __SSE4_1__ || (defined _MSC_VER && _MSC_VER >= 1500)
-# include <smmintrin.h>
-# define CV_SSE4_1 1
-# endif
-# if defined __SSE4_2__ || (defined _MSC_VER && _MSC_VER >= 1500)
-# include <nmmintrin.h>
-# define CV_SSE4_2 1
-# endif
-# if defined __POPCNT__ || (defined _MSC_VER && _MSC_VER >= 1500)
-# ifdef _MSC_VER
-# include <nmmintrin.h>
-# else
-# include <popcntintrin.h>
-# endif
-# define CV_POPCNT 1
-# endif
-# if defined __AVX__ || (defined _MSC_VER && _MSC_VER >= 1600 && 0)
-// MS Visual Studio 2010 (2012?) has no macro pre-defined to identify the use of /arch:AVX
-// See: http://connect.microsoft.com/VisualStudio/feedback/details/605858/arch-avx-should-define-a-predefined-macro-in-x64-and-set-a-unique-value-for-m-ix86-fp-in-win32
-# include <immintrin.h>
-# define CV_AVX 1
-# if defined(_XCR_XFEATURE_ENABLED_MASK)
-# define __xgetbv() _xgetbv(_XCR_XFEATURE_ENABLED_MASK)
-# else
-# define __xgetbv() 0
-# endif
-# endif
-# if defined __AVX2__ || (defined _MSC_VER && _MSC_VER >= 1800 && 0)
-# include <immintrin.h>
-# define CV_AVX2 1
-# if defined __FMA__
-# define CV_FMA3 1
-# endif
-# endif
-#endif
-
-#if (defined WIN32 || defined _WIN32) && defined(_M_ARM)
-# include <Intrin.h>
-# include "arm_neon.h"
-# define CV_NEON 1
-# define CPU_HAS_NEON_FEATURE (true)
-#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__))
-# include <arm_neon.h>
-# define CV_NEON 1
-#endif
-
-#if defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__
-# define CV_VFP 1
-#endif
-
-#endif // __CUDACC__
-
-#ifndef CV_POPCNT
-#define CV_POPCNT 0
-#endif
-#ifndef CV_MMX
-# define CV_MMX 0
-#endif
-#ifndef CV_SSE
-# define CV_SSE 0
-#endif
-#ifndef CV_SSE2
-# define CV_SSE2 0
-#endif
-#ifndef CV_SSE3
-# define CV_SSE3 0
-#endif
-#ifndef CV_SSSE3
-# define CV_SSSE3 0
-#endif
-#ifndef CV_SSE4_1
-# define CV_SSE4_1 0
-#endif
-#ifndef CV_SSE4_2
-# define CV_SSE4_2 0
-#endif
-#ifndef CV_AVX
-# define CV_AVX 0
-#endif
-#ifndef CV_AVX2
-# define CV_AVX2 0
-#endif
-#ifndef CV_FMA3
-# define CV_FMA3 0
-#endif
-#ifndef CV_AVX_512F
-# define CV_AVX_512F 0
-#endif
-#ifndef CV_AVX_512BW
-# define CV_AVX_512BW 0
-#endif
-#ifndef CV_AVX_512CD
-# define CV_AVX_512CD 0
-#endif
-#ifndef CV_AVX_512DQ
-# define CV_AVX_512DQ 0
-#endif
-#ifndef CV_AVX_512ER
-# define CV_AVX_512ER 0
-#endif
-#ifndef CV_AVX_512IFMA512
-# define CV_AVX_512IFMA512 0
-#endif
-#ifndef CV_AVX_512PF
-# define CV_AVX_512PF 0
-#endif
-#ifndef CV_AVX_512VBMI
-# define CV_AVX_512VBMI 0
-#endif
-#ifndef CV_AVX_512VL
-# define CV_AVX_512VL 0
-#endif
-
-#ifndef CV_NEON
-# define CV_NEON 0
-#endif
-
-#ifndef CV_VFP
-# define CV_VFP 0
-#endif
-
-/* fundamental constants */
-#define CV_PI 3.1415926535897932384626433832795
-#define CV_2PI 6.283185307179586476925286766559
-#define CV_LOG2 0.69314718055994530941723212145818
-
-typedef union Cv32suf
-{
- int i;
- unsigned u;
- float f;
-}
-Cv32suf;
-
-typedef union Cv64suf
-{
- int64 i;
- uint64 u;
- double f;
-}
-Cv64suf;
-
-namespace cv { namespace hal {
-
-bool checkHardwareSupport(int feature);
-void setUseOptimized(bool onoff);
-bool useOptimized();
-
-}}
-
-#define USE_SSE2 (cv::hal::checkHardwareSupport(CV_CPU_SSE))
-#define USE_SSE4_2 (cv::hal::checkHardwareSupport(CV_CPU_SSE4_2))
-#define USE_AVX (cv::hal::checkHardwareSupport(CV_CPU_AVX))
-#define USE_AVX2 (cv::hal::checkHardwareSupport(CV_CPU_AVX2))
-
-
-/****************************************************************************************\
-* fast math *
-\****************************************************************************************/
-
-#if defined __BORLANDC__
-# include <fastmath.h>
-#elif defined __cplusplus
-# include <cmath>
-#else
-# include <math.h>
-#endif
-
-#ifdef HAVE_TEGRA_OPTIMIZATION
-# include "tegra_round.hpp"
-#endif
-
-#if CV_VFP
- // 1. general scheme
- #define ARM_ROUND(_value, _asm_string) \
- int res; \
- float temp; \
- asm(_asm_string : [res] "=r" (res), [temp] "=w" (temp) : [value] "w" (_value)); \
- return res
- // 2. version for double
- #ifdef __clang__
- #define ARM_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %[value] \n vmov %[res], %[temp]")
- #else
- #define ARM_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %P[value] \n vmov %[res], %[temp]")
- #endif
- // 3. version for float
- #define ARM_ROUND_FLT(value) ARM_ROUND(value, "vcvtr.s32.f32 %[temp], %[value]\n vmov %[res], %[temp]")
-#endif // CV_VFP
-
-/** @brief Rounds floating-point number to the nearest integer
-
- @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the
- result is not defined.
- */
-CV_INLINE int
-cvRound( double value )
-{
-#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
- && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
- __m128d t = _mm_set_sd( value );
- return _mm_cvtsd_si32(t);
-#elif defined _MSC_VER && defined _M_IX86
- int t;
- __asm
- {
- fld value;
- fistp t;
- }
- return t;
-#elif ((defined _MSC_VER && defined _M_ARM) || defined CV_ICC || \
- defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION
- TEGRA_ROUND_DBL(value);
-#elif defined CV_ICC || defined __GNUC__
-# if CV_VFP
- ARM_ROUND_DBL(value);
-# else
- return (int)lrint(value);
-# endif
-#else
- /* it's ok if round does not comply with IEEE754 standard;
- the tests should allow +/-1 difference when the tested functions use round */
- return (int)(value + (value >= 0 ? 0.5 : -0.5));
-#endif
-}
-
-
-/** @brief Rounds floating-point number to the nearest integer not larger than the original.
-
- The function computes an integer i such that:
- \f[i \le \texttt{value} < i+1\f]
- @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the
- result is not defined.
- */
-CV_INLINE int cvFloor( double value )
-{
-#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
- __m128d t = _mm_set_sd( value );
- int i = _mm_cvtsd_si32(t);
- return i - _mm_movemask_pd(_mm_cmplt_sd(t, _mm_cvtsi32_sd(t,i)));
-#elif defined __GNUC__
- int i = (int)value;
- return i - (i > value);
-#else
- int i = cvRound(value);
- float diff = (float)(value - i);
- return i - (diff < 0);
-#endif
-}
-
-/** @brief Rounds floating-point number to the nearest integer not smaller than the original.
-
- The function computes an integer i such that:
- \f[i \le \texttt{value} < i+1\f]
- @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the
- result is not defined.
- */
-CV_INLINE int cvCeil( double value )
-{
-#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__&& !defined __APPLE__)) && !defined(__CUDACC__)
- __m128d t = _mm_set_sd( value );
- int i = _mm_cvtsd_si32(t);
- return i + _mm_movemask_pd(_mm_cmplt_sd(_mm_cvtsi32_sd(t,i), t));
-#elif defined __GNUC__
- int i = (int)value;
- return i + (i < value);
-#else
- int i = cvRound(value);
- float diff = (float)(i - value);
- return i + (diff < 0);
-#endif
-}
-
-/** @brief Determines if the argument is Not A Number.
-
- @param value The input floating-point value
-
- The function returns 1 if the argument is Not A Number (as defined by IEEE754 standard), 0
- otherwise. */
-CV_INLINE int cvIsNaN( double value )
-{
- Cv64suf ieee754;
- ieee754.f = value;
- return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) +
- ((unsigned)ieee754.u != 0) > 0x7ff00000;
-}
-
-/** @brief Determines if the argument is Infinity.
-
- @param value The input floating-point value
-
- The function returns 1 if the argument is a plus or minus infinity (as defined by IEEE754 standard)
- and 0 otherwise. */
-CV_INLINE int cvIsInf( double value )
-{
- Cv64suf ieee754;
- ieee754.f = value;
- return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) == 0x7ff00000 &&
- (unsigned)ieee754.u == 0;
-}
-
-#ifdef __cplusplus
-
-/** @overload */
-CV_INLINE int cvRound(float value)
-{
-#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ && \
- defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
- __m128 t = _mm_set_ss( value );
- return _mm_cvtss_si32(t);
-#elif defined _MSC_VER && defined _M_IX86
- int t;
- __asm
- {
- fld value;
- fistp t;
- }
- return t;
-#elif ((defined _MSC_VER && defined _M_ARM) || defined CV_ICC || \
- defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION
- TEGRA_ROUND_FLT(value);
-#elif defined CV_ICC || defined __GNUC__
-# if CV_VFP
- ARM_ROUND_FLT(value);
-# else
- return (int)lrintf(value);
-# endif
-#else
- /* it's ok if round does not comply with IEEE754 standard;
- the tests should allow +/-1 difference when the tested functions use round */
- return (int)(value + (value >= 0 ? 0.5f : -0.5f));
-#endif
-}
-
-/** @overload */
-CV_INLINE int cvRound( int value )
-{
- return value;
-}
-
-/** @overload */
-CV_INLINE int cvFloor( float value )
-{
-#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
- __m128 t = _mm_set_ss( value );
- int i = _mm_cvtss_si32(t);
- return i - _mm_movemask_ps(_mm_cmplt_ss(t, _mm_cvtsi32_ss(t,i)));
-#elif defined __GNUC__
- int i = (int)value;
- return i - (i > value);
-#else
- int i = cvRound(value);
- float diff = (float)(value - i);
- return i - (diff < 0);
-#endif
-}
-
-/** @overload */
-CV_INLINE int cvFloor( int value )
-{
- return value;
-}
-
-/** @overload */
-CV_INLINE int cvCeil( float value )
-{
-#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__&& !defined __APPLE__)) && !defined(__CUDACC__)
- __m128 t = _mm_set_ss( value );
- int i = _mm_cvtss_si32(t);
- return i + _mm_movemask_ps(_mm_cmplt_ss(_mm_cvtsi32_ss(t,i), t));
-#elif defined __GNUC__
- int i = (int)value;
- return i + (i < value);
-#else
- int i = cvRound(value);
- float diff = (float)(i - value);
- return i + (diff < 0);
-#endif
-}
-
-/** @overload */
-CV_INLINE int cvCeil( int value )
-{
- return value;
-}
-
-/** @overload */
-CV_INLINE int cvIsNaN( float value )
-{
- Cv32suf ieee754;
- ieee754.f = value;
- return (ieee754.u & 0x7fffffff) > 0x7f800000;
-}
-
-/** @overload */
-CV_INLINE int cvIsInf( float value )
-{
- Cv32suf ieee754;
- ieee754.f = value;
- return (ieee754.u & 0x7fffffff) == 0x7f800000;
-}
-
-//! @}
-
-#include <algorithm>
-
-namespace cv
-{
-
-//! @addtogroup hal_utils
-//! @{
-
-/////////////// saturate_cast (used in image & signal processing) ///////////////////
-
-/** @brief Template function for accurate conversion from one primitive type to another.
-
- The functions saturate_cast resemble the standard C++ cast operations, such as static_cast\<T\>()
- and others. They perform an efficient and accurate conversion from one primitive type to another
- (see the introduction chapter). saturate in the name means that when the input value v is out of the
- range of the target type, the result is not formed just by taking low bits of the input, but instead
- the value is clipped. For example:
- @code
- uchar a = saturate_cast<uchar>(-100); // a = 0 (UCHAR_MIN)
- short b = saturate_cast<short>(33333.33333); // b = 32767 (SHRT_MAX)
- @endcode
- Such clipping is done when the target type is unsigned char , signed char , unsigned short or
- signed short . For 32-bit integers, no clipping is done.
-
- When the parameter is a floating-point value and the target type is an integer (8-, 16- or 32-bit),
- the floating-point value is first rounded to the nearest integer and then clipped if needed (when
- the target type is 8- or 16-bit).
-
- This operation is used in the simplest or most complex image processing functions in OpenCV.
-
- @param v Function parameter.
- @sa add, subtract, multiply, divide, Mat::convertTo
- */
-template<typename _Tp> static inline _Tp saturate_cast(uchar v) { return _Tp(v); }
-/** @overload */
-template<typename _Tp> static inline _Tp saturate_cast(schar v) { return _Tp(v); }
-/** @overload */
-template<typename _Tp> static inline _Tp saturate_cast(ushort v) { return _Tp(v); }
-/** @overload */
-template<typename _Tp> static inline _Tp saturate_cast(short v) { return _Tp(v); }
-/** @overload */
-template<typename _Tp> static inline _Tp saturate_cast(unsigned v) { return _Tp(v); }
-/** @overload */
-template<typename _Tp> static inline _Tp saturate_cast(int v) { return _Tp(v); }
-/** @overload */
-template<typename _Tp> static inline _Tp saturate_cast(float v) { return _Tp(v); }
-/** @overload */
-template<typename _Tp> static inline _Tp saturate_cast(double v) { return _Tp(v); }
-/** @overload */
-template<typename _Tp> static inline _Tp saturate_cast(int64 v) { return _Tp(v); }
-/** @overload */
-template<typename _Tp> static inline _Tp saturate_cast(uint64 v) { return _Tp(v); }
-
-template<> inline uchar saturate_cast<uchar>(schar v) { return (uchar)std::max((int)v, 0); }
-template<> inline uchar saturate_cast<uchar>(ushort v) { return (uchar)std::min((unsigned)v, (unsigned)UCHAR_MAX); }
-template<> inline uchar saturate_cast<uchar>(int v) { return (uchar)((unsigned)v <= UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); }
-template<> inline uchar saturate_cast<uchar>(short v) { return saturate_cast<uchar>((int)v); }
-template<> inline uchar saturate_cast<uchar>(unsigned v) { return (uchar)std::min(v, (unsigned)UCHAR_MAX); }
-template<> inline uchar saturate_cast<uchar>(float v) { int iv = cvRound(v); return saturate_cast<uchar>(iv); }
-template<> inline uchar saturate_cast<uchar>(double v) { int iv = cvRound(v); return saturate_cast<uchar>(iv); }
-template<> inline uchar saturate_cast<uchar>(int64 v) { return (uchar)((uint64)v <= (uint64)UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); }
-template<> inline uchar saturate_cast<uchar>(uint64 v) { return (uchar)std::min(v, (uint64)UCHAR_MAX); }
-
-template<> inline schar saturate_cast<schar>(uchar v) { return (schar)std::min((int)v, SCHAR_MAX); }
-template<> inline schar saturate_cast<schar>(ushort v) { return (schar)std::min((unsigned)v, (unsigned)SCHAR_MAX); }
-template<> inline schar saturate_cast<schar>(int v) { return (schar)((unsigned)(v-SCHAR_MIN) <= (unsigned)UCHAR_MAX ? v : v > 0 ? SCHAR_MAX : SCHAR_MIN); }
-template<> inline schar saturate_cast<schar>(short v) { return saturate_cast<schar>((int)v); }
-template<> inline schar saturate_cast<schar>(unsigned v) { return (schar)std::min(v, (unsigned)SCHAR_MAX); }
-template<> inline schar saturate_cast<schar>(float v) { int iv = cvRound(v); return saturate_cast<schar>(iv); }
-template<> inline schar saturate_cast<schar>(double v) { int iv = cvRound(v); return saturate_cast<schar>(iv); }
-template<> inline schar saturate_cast<schar>(int64 v) { return (schar)((uint64)((int64)v-SCHAR_MIN) <= (uint64)UCHAR_MAX ? v : v > 0 ? SCHAR_MAX : SCHAR_MIN); }
-template<> inline schar saturate_cast<schar>(uint64 v) { return (schar)std::min(v, (uint64)SCHAR_MAX); }
-
-template<> inline ushort saturate_cast<ushort>(schar v) { return (ushort)std::max((int)v, 0); }
-template<> inline ushort saturate_cast<ushort>(short v) { return (ushort)std::max((int)v, 0); }
-template<> inline ushort saturate_cast<ushort>(int v) { return (ushort)((unsigned)v <= (unsigned)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0); }
-template<> inline ushort saturate_cast<ushort>(unsigned v) { return (ushort)std::min(v, (unsigned)USHRT_MAX); }
-template<> inline ushort saturate_cast<ushort>(float v) { int iv = cvRound(v); return saturate_cast<ushort>(iv); }
-template<> inline ushort saturate_cast<ushort>(double v) { int iv = cvRound(v); return saturate_cast<ushort>(iv); }
-template<> inline ushort saturate_cast<ushort>(int64 v) { return (ushort)((uint64)v <= (uint64)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0); }
-template<> inline ushort saturate_cast<ushort>(uint64 v) { return (ushort)std::min(v, (uint64)USHRT_MAX); }
-
-template<> inline short saturate_cast<short>(ushort v) { return (short)std::min((int)v, SHRT_MAX); }
-template<> inline short saturate_cast<short>(int v) { return (short)((unsigned)(v - SHRT_MIN) <= (unsigned)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN); }
-template<> inline short saturate_cast<short>(unsigned v) { return (short)std::min(v, (unsigned)SHRT_MAX); }
-template<> inline short saturate_cast<short>(float v) { int iv = cvRound(v); return saturate_cast<short>(iv); }
-template<> inline short saturate_cast<short>(double v) { int iv = cvRound(v); return saturate_cast<short>(iv); }
-template<> inline short saturate_cast<short>(int64 v) { return (short)((uint64)((int64)v - SHRT_MIN) <= (uint64)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN); }
-template<> inline short saturate_cast<short>(uint64 v) { return (short)std::min(v, (uint64)SHRT_MAX); }
-
-template<> inline int saturate_cast<int>(float v) { return cvRound(v); }
-template<> inline int saturate_cast<int>(double v) { return cvRound(v); }
-
-// we intentionally do not clip negative numbers, to make -1 become 0xffffffff etc.
-template<> inline unsigned saturate_cast<unsigned>(float v) { return cvRound(v); }
-template<> inline unsigned saturate_cast<unsigned>(double v) { return cvRound(v); }
-
-//! @}
-
-}
-
-#endif // __cplusplus
-
-#endif //__OPENCV_HAL_H__
+++ /dev/null
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-// By downloading, copying, installing or using the software you agree to this license.
-// If you do not agree to this license, do not download, install,
-// copy or use the software.
-//
-//
-// License Agreement
-// For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
-// Copyright (C) 2015, Itseez Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-// * Redistribution's of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// * Redistribution's in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// * The name of the copyright holders may not be used to endorse or promote products
-// derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-#include "arithm_simd.hpp"
-#include "arithm_core.hpp"
-#include "replacement.hpp"
-
-namespace cv { namespace hal {
-
-//=======================================
-
-#undef CALL_HAL
-#define CALL_HAL(fun) \
- int res = fun(src1, step1, src2, step2, dst, step, width, height); \
- if (res == Error::Ok) \
- return; \
- else if (res != Error::NotImplemented) \
- throw Failure(res);
-
-#if (ARITHM_USE_IPP == 1)
-static inline void fixSteps(width, height, size_t elemSize, size_t& step1, size_t& step2, size_t& step)
-{
- if( height == 1 )
- step1 = step2 = step = width*elemSize;
-}
-#define CALL_IPP_BIN_12(fun) \
- CV_IPP_CHECK() \
- { \
- fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \
- if (0 <= fun(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height), 0)) \
- { \
- CV_IMPL_ADD(CV_IMPL_IPP); \
- return; \
- } \
- setIppErrorStatus(); \
- }
-#else
-#define CALL_IPP_BIN_12(fun)
-#endif
-
-//=======================================
-// Add
-//=======================================
-
-void add8u( const uchar* src1, size_t step1,
- const uchar* src2, size_t step2,
- uchar* dst, size_t step, int width, int height, void* )
-{
- CALL_HAL(hal_add8u)
- CALL_IPP_BIN_12(ippiAdd_8u_C1RSfs)
- (vBinOp<uchar, cv::OpAdd<uchar>, IF_SIMD(VAdd<uchar>)>(src1, step1, src2, step2, dst, step, width, height));
-}
-
-void add8s( const schar* src1, size_t step1,
- const schar* src2, size_t step2,
- schar* dst, size_t step, int width, int height, void* )
-{
- CALL_HAL(hal_add8s)
- vBinOp<schar, cv::OpAdd<schar>, IF_SIMD(VAdd<schar>)>(src1, step1, src2, step2, dst, step, width, height);
-}
-
-void add16u( const ushort* src1, size_t step1,
- const ushort* src2, size_t step2,
- ushort* dst, size_t step, int width, int height, void* )
-{
- CALL_HAL(hal_add16u)
- CALL_IPP_BIN_12(ippiAdd_16u_C1RSfs)
- (vBinOp<ushort, cv::OpAdd<ushort>, IF_SIMD(VAdd<ushort>)>(src1, step1, src2, step2, dst, step, width, height));
-}
-
-void add16s( const short* src1, size_t step1,
- const short* src2, size_t step2,
- short* dst, size_t step, int width, int height, void* )
-{
- CALL_HAL(hal_add16s)
- CALL_IPP_BIN_12(ippiAdd_16s_C1RSfs)
- (vBinOp<short, cv::OpAdd<short>, IF_SIMD(VAdd<short>)>(src1, step1, src2, step2, dst, step, width, height));
-}
-
-void add32s( const int* src1, size_t step1,
- const int* src2, size_t step2,
- int* dst, size_t step, int width, int height, void* )
-{
- CALL_HAL(hal_add32s)
- vBinOp32<int, cv::OpAdd<int>, IF_SIMD(VAdd<int>)>(src1, step1, src2, step2, dst, step, width, height);
-}
-
-void add32f( const float* src1, size_t step1,
- const float* src2, size_t step2,
- float* dst, size_t step, int width, int height, void* )
-{
- CALL_HAL(hal_add32f)
- CALL_IPP_BIN_12(ippiAdd_32f_C1R)
- (vBinOp32<float, cv::OpAdd<float>, IF_SIMD(VAdd<float>)>(src1, step1, src2, step2, dst, step, width, height));
-}
-
-void add64f( const double* src1, size_t step1,
- const double* src2, size_t step2,
- double* dst, size_t step, int width, int height, void* )
-{
- CALL_HAL(hal_add64f)
- vBinOp64<double, cv::OpAdd<double>, IF_SIMD(VAdd<double>)>(src1, step1, src2, step2, dst, step, width, height);
-}
-
-//=======================================
-
-#if (ARITHM_USE_IPP == 1)
-#define CALL_IPP_BIN_21(fun) \
- CV_IPP_CHECK() \
- { \
- fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \
- if (0 <= fun(src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(width, height), 0)) \
- { \
- CV_IMPL_ADD(CV_IMPL_IPP); \
- return; \
- } \
- setIppErrorStatus(); \
- }
-#else
-#define CALL_IPP_BIN_21(fun)
-#endif
-
-//=======================================
-// Subtract
-//=======================================
-
-void sub8u( const uchar* src1, size_t step1,
- const uchar* src2, size_t step2,
- uchar* dst, size_t step, int width, int height, void* )
-{
- CALL_HAL(hal_sub8u)
- CALL_IPP_BIN_21(ippiSub_8u_C1RSfs)
- (vBinOp<uchar, cv::OpSub<uchar>, IF_SIMD(VSub<uchar>)>(src1, step1, src2, step2, dst, step, width, height));
-}
-
-void sub8s( const schar* src1, size_t step1,
- const schar* src2, size_t step2,
- schar* dst, size_t step, int width, int height, void* )
-{
- CALL_HAL(hal_sub8s)
- vBinOp<schar, cv::OpSub<schar>, IF_SIMD(VSub<schar>)>(src1, step1, src2, step2, dst, step, width, height);
-}
-
-void sub16u( const ushort* src1, size_t step1,
- const ushort* src2, size_t step2,
- ushort* dst, size_t step, int width, int height, void* )
-{
- CALL_HAL(hal_sub16u)
- CALL_IPP_BIN_21(ippiSub_16u_C1RSfs)
- (vBinOp<ushort, cv::OpSub<ushort>, IF_SIMD(VSub<ushort>)>(src1, step1, src2, step2, dst, step, width, height));
-}
-
-void sub16s( const short* src1, size_t step1,
- const short* src2, size_t step2,
- short* dst, size_t step, int width, int height, void* )
-{
- CALL_HAL(hal_sub16s)
- CALL_IPP_BIN_21(ippiSub_16s_C1RSfs)
- (vBinOp<short, cv::OpSub<short>, IF_SIMD(VSub<short>)>(src1, step1, src2, step2, dst, step, width, height));
-}
-
-void sub32s( const int* src1, size_t step1,
- const int* src2, size_t step2,
- int* dst, size_t step, int width, int height, void* )
-{
- CALL_HAL(hal_sub32s)
- vBinOp32<int, cv::OpSub<int>, IF_SIMD(VSub<int>)>(src1, step1, src2, step2, dst, step, width, height);
-}
-
-void sub32f( const float* src1, size_t step1,
- const float* src2, size_t step2,
- float* dst, size_t step, int width, int height, void* )
-{
- CALL_HAL(hal_sub32f)
- CALL_IPP_BIN_21(ippiSub_32f_C1R)
- (vBinOp32<float, cv::OpSub<float>, IF_SIMD(VSub<float>)>(src1, step1, src2, step2, dst, step, width, height));
-}
-
-void sub64f( const double* src1, size_t step1,
- const double* src2, size_t step2,
- double* dst, size_t step, int width, int height, void* )
-{
- CALL_HAL(hal_sub64f)
- vBinOp64<double, cv::OpSub<double>, IF_SIMD(VSub<double>)>(src1, step1, src2, step2, dst, step, width, height);
-}
-
-//=======================================
-
-#if (ARITHM_USE_IPP == 1)
-#define CALL_IPP_MIN_MAX(fun, type) \
- CV_IPP_CHECK() \
- { \
- type* s1 = (type*)src1; \
- type* s2 = (type*)src2; \
- type* d = dst; \
- fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \
- int i = 0; \
- for(; i < height; i++) \
- { \
- if (0 > fun(s1, s2, d, width)) \
- break; \
- s1 = (type*)((uchar*)s1 + step1); \
- s2 = (type*)((uchar*)s2 + step2); \
- d = (type*)((uchar*)d + step); \
- } \
- if (i == height) \
- { \
- CV_IMPL_ADD(CV_IMPL_IPP); \
- return; \
- } \
- setIppErrorStatus(); \
- }
-#else
-#define CALL_IPP_MIN_MAX(fun, type)
-#endif
-
-//=======================================
-// Max
-//=======================================
-
-void max8u( const uchar* src1, size_t step1,
- const uchar* src2, size_t step2,
- uchar* dst, size_t step, int width, int height, void* )
-{
- CALL_HAL(hal_max8u)
- CALL_IPP_MIN_MAX(ippsMaxEvery_8u, uchar)
- vBinOp<uchar, cv::OpMax<uchar>, IF_SIMD(VMax<uchar>)>(src1, step1, src2, step2, dst, step, width, height);
-}
-
-void max8s( const schar* src1, size_t step1,
- const schar* src2, size_t step2,
- schar* dst, size_t step, int width, int height, void* )
-{
- CALL_HAL(hal_max8s)
- vBinOp<schar, cv::OpMax<schar>, IF_SIMD(VMax<schar>)>(src1, step1, src2, step2, dst, step, width, height);
-}
-
-void max16u( const ushort* src1, size_t step1,
- const ushort* src2, size_t step2,
- ushort* dst, size_t step, int width, int height, void* )
-{
- CALL_HAL(hal_max16u)
- CALL_IPP_MIN_MAX(ippsMaxEvery_16u, ushort)
- vBinOp<ushort, cv::OpMax<ushort>, IF_SIMD(VMax<ushort>)>(src1, step1, src2, step2, dst, step, width, height);
-}
-
-void max16s( const short* src1, size_t step1,
- const short* src2, size_t step2,
- short* dst, size_t step, int width, int height, void* )
-{
- CALL_HAL(hal_max16s)
- vBinOp<short, cv::OpMax<short>, IF_SIMD(VMax<short>)>(src1, step1, src2, step2, dst, step, width, height);
-}
-
-void max32s( const int* src1, size_t step1,
- const int* src2, size_t step2,
- int* dst, size_t step, int width, int height, void* )
-{
- CALL_HAL(hal_max32s)
- vBinOp32<int, cv::OpMax<int>, IF_SIMD(VMax<int>)>(src1, step1, src2, step2, dst, step, width, height);
-}
-
-void max32f( const float* src1, size_t step1,
- const float* src2, size_t step2,
- float* dst, size_t step, int width, int height, void* )
-{
- CALL_HAL(hal_max32f)
- CALL_IPP_MIN_MAX(ippsMaxEvery_32f, float)
- vBinOp32<float, cv::OpMax<float>, IF_SIMD(VMax<float>)>(src1, step1, src2, step2, dst, step, width, height);
-}
-
-void max64f( const double* src1, size_t step1,
- const double* src2, size_t step2,
- double* dst, size_t step, int width, int height, void* )
-{
- CALL_HAL(hal_max64f)
- CALL_IPP_MIN_MAX(ippsMaxEvery_64f, double)
- vBinOp64<double, cv::OpMax<double>, IF_SIMD(VMax<double>)>(src1, step1, src2, step2, dst, step, width, height);
-}
-
-//=======================================
-// Min
-//=======================================
-
-void min8u( const uchar* src1, size_t step1,
- const uchar* src2, size_t step2,
- uchar* dst, size_t step, int width, int height, void* )
-{
- CALL_HAL(hal_min8u)
- CALL_IPP_MIN_MAX(ippsMinEvery_8u, uchar)
- vBinOp<uchar, cv::OpMin<uchar>, IF_SIMD(VMin<uchar>)>(src1, step1, src2, step2, dst, step, width, height);
-}
-
-void min8s( const schar* src1, size_t step1,
- const schar* src2, size_t step2,
- schar* dst, size_t step, int width, int height, void* )
-{
- CALL_HAL(hal_min8s)
- vBinOp<schar, cv::OpMin<schar>, IF_SIMD(VMin<schar>)>(src1, step1, src2, step2, dst, step, width, height);
-}
-
-void min16u( const ushort* src1, size_t step1,
- const ushort* src2, size_t step2,
- ushort* dst, size_t step, int width, int height, void* )
-{
- CALL_HAL(hal_min16u)
- CALL_IPP_MIN_MAX(ippsMinEvery_16u, ushort)
- vBinOp<ushort, cv::OpMin<ushort>, IF_SIMD(VMin<ushort>)>(src1, step1, src2, step2, dst, step, width, height);
-}
-
-void min16s( const short* src1, size_t step1,
- const short* src2, size_t step2,
- short* dst, size_t step, int width, int height, void* )
-{
- CALL_HAL(hal_min16s)
- vBinOp<short, cv::OpMin<short>, IF_SIMD(VMin<short>)>(src1, step1, src2, step2, dst, step, width, height);
-}
-
-void min32s( const int* src1, size_t step1,
- const int* src2, size_t step2,
- int* dst, size_t step, int width, int height, void* )
-{
- CALL_HAL(hal_min32s)
- vBinOp32<int, cv::OpMin<int>, IF_SIMD(VMin<int>)>(src1, step1, src2, step2, dst, step, width, height);
-}
-
-void min32f( const float* src1, size_t step1,
- const float* src2, size_t step2,
- float* dst, size_t step, int width, int height, void* )
-{
- CALL_HAL(hal_min32f)
- CALL_IPP_MIN_MAX(ippsMinEvery_32f, float)
- vBinOp32<float, cv::OpMin<float>, IF_SIMD(VMin<float>)>(src1, step1, src2, step2, dst, step, width, height);
-}
-
-void min64f( const double* src1, size_t step1,
- const double* src2, size_t step2,
- double* dst, size_t step, int width, int height, void* )
-{
- CALL_HAL(hal_min64f)
- CALL_IPP_MIN_MAX(ippsMinEvery_64f, double)
- vBinOp64<double, cv::OpMin<double>, IF_SIMD(VMin<double>)>(src1, step1, src2, step2, dst, step, width, height);
-}
-
-//=======================================
-// AbsDiff
-//=======================================
-
-void absdiff8u( const uchar* src1, size_t step1,
- const uchar* src2, size_t step2,
- uchar* dst, size_t step, int width, int height, void* )
-{
- CALL_HAL(hal_absdiff8u)
- CALL_IPP_BIN_12(ippiAbsDiff_8u_C1R)
- (vBinOp<uchar, cv::OpAbsDiff<uchar>, IF_SIMD(VAbsDiff<uchar>)>(src1, step1, src2, step2, dst, step, width, height));
-}
-
-void absdiff8s( const schar* src1, size_t step1,
- const schar* src2, size_t step2,
- schar* dst, size_t step, int width, int height, void* )
-{
- CALL_HAL(hal_absdiff8s)
- vBinOp<schar, cv::OpAbsDiff<schar>, IF_SIMD(VAbsDiff<schar>)>(src1, step1, src2, step2, dst, step, width, height);
-}
-
-void absdiff16u( const ushort* src1, size_t step1,
- const ushort* src2, size_t step2,
- ushort* dst, size_t step, int width, int height, void* )
-{
- CALL_HAL(hal_absdiff16u)
- CALL_IPP_BIN_12(ippiAbsDiff_16u_C1R)
- (vBinOp<ushort, cv::OpAbsDiff<ushort>, IF_SIMD(VAbsDiff<ushort>)>(src1, step1, src2, step2, dst, step, width, height));
-}
-
-void absdiff16s( const short* src1, size_t step1,
- const short* src2, size_t step2,
- short* dst, size_t step, int width, int height, void* )
-{
- CALL_HAL(hal_absdiff16s)
- vBinOp<short, cv::OpAbsDiff<short>, IF_SIMD(VAbsDiff<short>)>(src1, step1, src2, step2, dst, step, width, height);
-}
-
-void absdiff32s( const int* src1, size_t step1,
- const int* src2, size_t step2,
- int* dst, size_t step, int width, int height, void* )
-{
- CALL_HAL(hal_absdiff32s)
- vBinOp32<int, cv::OpAbsDiff<int>, IF_SIMD(VAbsDiff<int>)>(src1, step1, src2, step2, dst, step, width, height);
-}
-
-void absdiff32f( const float* src1, size_t step1,
- const float* src2, size_t step2,
- float* dst, size_t step, int width, int height, void* )
-{
- CALL_HAL(hal_absdiff32f)
- CALL_IPP_BIN_12(ippiAbsDiff_32f_C1R)
- (vBinOp32<float, cv::OpAbsDiff<float>, IF_SIMD(VAbsDiff<float>)>(src1, step1, src2, step2, dst, step, width, height));
-}
-
-void absdiff64f( const double* src1, size_t step1,
- const double* src2, size_t step2,
- double* dst, size_t step, int width, int height, void* )
-{
- CALL_HAL(hal_absdiff64f)
- vBinOp64<double, cv::OpAbsDiff<double>, IF_SIMD(VAbsDiff<double>)>(src1, step1, src2, step2, dst, step, width, height);
-}
-
-//=======================================
-// Logical
-//=======================================
-
-void and8u( const uchar* src1, size_t step1,
- const uchar* src2, size_t step2,
- uchar* dst, size_t step, int width, int height, void* )
-{
- CALL_HAL(hal_and8u)
- CALL_IPP_BIN_12(ippiAnd_8u_C1R)
- (vBinOp<uchar, cv::OpAnd<uchar>, IF_SIMD(VAnd<uchar>)>(src1, step1, src2, step2, dst, step, width, height));
-}
-
-void or8u( const uchar* src1, size_t step1,
- const uchar* src2, size_t step2,
- uchar* dst, size_t step, int width, int height, void* )
-{
- CALL_HAL(hal_or8u)
- CALL_IPP_BIN_12(ippiOr_8u_C1R)
- (vBinOp<uchar, cv::OpOr<uchar>, IF_SIMD(VOr<uchar>)>(src1, step1, src2, step2, dst, step, width, height));
-}
-
-void xor8u( const uchar* src1, size_t step1,
- const uchar* src2, size_t step2,
- uchar* dst, size_t step, int width, int height, void* )
-{
- CALL_HAL(hal_xor8u)
- CALL_IPP_BIN_12(ippiXor_8u_C1R)
- (vBinOp<uchar, cv::OpXor<uchar>, IF_SIMD(VXor<uchar>)>(src1, step1, src2, step2, dst, step, width, height));
-}
-
-void not8u( const uchar* src1, size_t step1,
- const uchar* src2, size_t step2,
- uchar* dst, size_t step, int width, int height, void* )
-{
- CALL_HAL(hal_not8u)
- CALL_IPP_BIN_12(ippiNot_8u_C1R)
- (vBinOp<uchar, cv::OpNot<uchar>, IF_SIMD(VNot<uchar>)>(src1, step1, src2, step2, dst, step, width, height));
-}
-
-//=======================================
-
-#undef CALL_HAL
-#define CALL_HAL(fun) \
- int res = fun(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop); \
- if (res == Error::Ok) \
- return; \
- else if (res != Error::NotImplemented) \
- throw Failure(res);
-
-#if ARITHM_USE_IPP
-inline static IppCmpOp convert_cmp(int _cmpop)
-{
- return _cmpop == CMP_EQ ? ippCmpEq :
- _cmpop == CMP_GT ? ippCmpGreater :
- _cmpop == CMP_GE ? ippCmpGreaterEq :
- _cmpop == CMP_LT ? ippCmpLess :
- _cmpop == CMP_LE ? ippCmpLessEq :
- (IppCmpOp)-1;
-}
-#define CALL_IPP_CMP(fun) \
- CV_IPP_CHECK() \
- { \
- IppCmpOp op = convert_cmp(*(int *)_cmpop); \
- if( op >= 0 ) \
- { \
- fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \
- if (0 <= fun(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height), op)) \
- { \
- CV_IMPL_ADD(CV_IMPL_IPP); \
- return; \
- } \
- setIppErrorStatus(); \
- } \
- }
-#else
-#define CALL_IPP_CMP(fun)
-#endif
-
-//=======================================
-// Compare
-//=======================================
-
-void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
- uchar* dst, size_t step, int width, int height, void* _cmpop)
-{
- CALL_HAL(hal_cmp8u)
- CALL_IPP_CMP(ippiCompare_8u_C1R)
- //vz optimized cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop);
- int code = *(int*)_cmpop;
- step1 /= sizeof(src1[0]);
- step2 /= sizeof(src2[0]);
- if( code == CMP_GE || code == CMP_LT )
- {
- std::swap(src1, src2);
- std::swap(step1, step2);
- code = code == CMP_GE ? CMP_LE : CMP_GT;
- }
-
- if( code == CMP_GT || code == CMP_LE )
- {
- int m = code == CMP_GT ? 0 : 255;
- for( ; height--; src1 += step1, src2 += step2, dst += step )
- {
- int x =0;
- #if CV_SSE2
- if( USE_SSE2 )
- {
- __m128i m128 = code == CMP_GT ? _mm_setzero_si128() : _mm_set1_epi8 (-1);
- __m128i c128 = _mm_set1_epi8 (-128);
- for( ; x <= width - 16; x += 16 )
- {
- __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x));
- __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x));
- // no simd for 8u comparison, that's why we need the trick
- r00 = _mm_sub_epi8(r00,c128);
- r10 = _mm_sub_epi8(r10,c128);
-
- r00 =_mm_xor_si128(_mm_cmpgt_epi8(r00, r10), m128);
- _mm_storeu_si128((__m128i*)(dst + x),r00);
-
- }
- }
- #elif CV_NEON
- uint8x16_t mask = code == CMP_GT ? vdupq_n_u8(0) : vdupq_n_u8(255);
-
- for( ; x <= width - 16; x += 16 )
- {
- vst1q_u8(dst+x, veorq_u8(vcgtq_u8(vld1q_u8(src1+x), vld1q_u8(src2+x)), mask));
- }
-
- #endif
-
- for( ; x < width; x++ ){
- dst[x] = (uchar)(-(src1[x] > src2[x]) ^ m);
- }
- }
- }
- else if( code == CMP_EQ || code == CMP_NE )
- {
- int m = code == CMP_EQ ? 0 : 255;
- for( ; height--; src1 += step1, src2 += step2, dst += step )
- {
- int x = 0;
- #if CV_SSE2
- if( USE_SSE2 )
- {
- __m128i m128 = code == CMP_EQ ? _mm_setzero_si128() : _mm_set1_epi8 (-1);
- for( ; x <= width - 16; x += 16 )
- {
- __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x));
- __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x));
- r00 = _mm_xor_si128 ( _mm_cmpeq_epi8 (r00, r10), m128);
- _mm_storeu_si128((__m128i*)(dst + x), r00);
- }
- }
- #elif CV_NEON
- uint8x16_t mask = code == CMP_EQ ? vdupq_n_u8(0) : vdupq_n_u8(255);
-
- for( ; x <= width - 16; x += 16 )
- {
- vst1q_u8(dst+x, veorq_u8(vceqq_u8(vld1q_u8(src1+x), vld1q_u8(src2+x)), mask));
- }
- #endif
- for( ; x < width; x++ )
- dst[x] = (uchar)(-(src1[x] == src2[x]) ^ m);
- }
- }
-}
-
-void cmp8s(const schar* src1, size_t step1, const schar* src2, size_t step2,
- uchar* dst, size_t step, int width, int height, void* _cmpop)
-{
- CALL_HAL(hal_cmp8s)
- cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop);
-}
-
-void cmp16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2,
- uchar* dst, size_t step, int width, int height, void* _cmpop)
-{
- CALL_HAL(hal_cmp16u)
- CALL_IPP_CMP(ippiCompare_16u_C1R)
- cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop);
-}
-
-void cmp16s(const short* src1, size_t step1, const short* src2, size_t step2,
- uchar* dst, size_t step, int width, int height, void* _cmpop)
-{
- CALL_HAL(hal_cmp16s)
- CALL_IPP_CMP(ippiCompare_16s_C1R)
- //vz optimized cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop);
-
- int code = *(int*)_cmpop;
- step1 /= sizeof(src1[0]);
- step2 /= sizeof(src2[0]);
- if( code == CMP_GE || code == CMP_LT )
- {
- std::swap(src1, src2);
- std::swap(step1, step2);
- code = code == CMP_GE ? CMP_LE : CMP_GT;
- }
-
- if( code == CMP_GT || code == CMP_LE )
- {
- int m = code == CMP_GT ? 0 : 255;
- for( ; height--; src1 += step1, src2 += step2, dst += step )
- {
- int x =0;
- #if CV_SSE2
- if( USE_SSE2)
- {
- __m128i m128 = code == CMP_GT ? _mm_setzero_si128() : _mm_set1_epi16 (-1);
- for( ; x <= width - 16; x += 16 )
- {
- __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x));
- __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x));
- r00 = _mm_xor_si128 ( _mm_cmpgt_epi16 (r00, r10), m128);
- __m128i r01 = _mm_loadu_si128((const __m128i*)(src1 + x + 8));
- __m128i r11 = _mm_loadu_si128((const __m128i*)(src2 + x + 8));
- r01 = _mm_xor_si128 ( _mm_cmpgt_epi16 (r01, r11), m128);
- r11 = _mm_packs_epi16(r00, r01);
- _mm_storeu_si128((__m128i*)(dst + x), r11);
- }
- if( x <= width-8)
- {
- __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x));
- __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x));
- r00 = _mm_xor_si128 ( _mm_cmpgt_epi16 (r00, r10), m128);
- r10 = _mm_packs_epi16(r00, r00);
- _mm_storel_epi64((__m128i*)(dst + x), r10);
-
- x += 8;
- }
- }
- #elif CV_NEON
- uint8x16_t mask = code == CMP_GT ? vdupq_n_u8(0) : vdupq_n_u8(255);
-
- for( ; x <= width - 16; x += 16 )
- {
- int16x8_t in1 = vld1q_s16(src1 + x);
- int16x8_t in2 = vld1q_s16(src2 + x);
- uint8x8_t t1 = vmovn_u16(vcgtq_s16(in1, in2));
-
- in1 = vld1q_s16(src1 + x + 8);
- in2 = vld1q_s16(src2 + x + 8);
- uint8x8_t t2 = vmovn_u16(vcgtq_s16(in1, in2));
-
- vst1q_u8(dst+x, veorq_u8(vcombine_u8(t1, t2), mask));
- }
- #endif
-
- for( ; x < width; x++ ){
- dst[x] = (uchar)(-(src1[x] > src2[x]) ^ m);
- }
- }
- }
- else if( code == CMP_EQ || code == CMP_NE )
- {
- int m = code == CMP_EQ ? 0 : 255;
- for( ; height--; src1 += step1, src2 += step2, dst += step )
- {
- int x = 0;
- #if CV_SSE2
- if( USE_SSE2 )
- {
- __m128i m128 = code == CMP_EQ ? _mm_setzero_si128() : _mm_set1_epi16 (-1);
- for( ; x <= width - 16; x += 16 )
- {
- __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x));
- __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x));
- r00 = _mm_xor_si128 ( _mm_cmpeq_epi16 (r00, r10), m128);
- __m128i r01 = _mm_loadu_si128((const __m128i*)(src1 + x + 8));
- __m128i r11 = _mm_loadu_si128((const __m128i*)(src2 + x + 8));
- r01 = _mm_xor_si128 ( _mm_cmpeq_epi16 (r01, r11), m128);
- r11 = _mm_packs_epi16(r00, r01);
- _mm_storeu_si128((__m128i*)(dst + x), r11);
- }
- if( x <= width - 8)
- {
- __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x));
- __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x));
- r00 = _mm_xor_si128 ( _mm_cmpeq_epi16 (r00, r10), m128);
- r10 = _mm_packs_epi16(r00, r00);
- _mm_storel_epi64((__m128i*)(dst + x), r10);
-
- x += 8;
- }
- }
- #elif CV_NEON
- uint8x16_t mask = code == CMP_EQ ? vdupq_n_u8(0) : vdupq_n_u8(255);
-
- for( ; x <= width - 16; x += 16 )
- {
- int16x8_t in1 = vld1q_s16(src1 + x);
- int16x8_t in2 = vld1q_s16(src2 + x);
- uint8x8_t t1 = vmovn_u16(vceqq_s16(in1, in2));
-
- in1 = vld1q_s16(src1 + x + 8);
- in2 = vld1q_s16(src2 + x + 8);
- uint8x8_t t2 = vmovn_u16(vceqq_s16(in1, in2));
-
- vst1q_u8(dst+x, veorq_u8(vcombine_u8(t1, t2), mask));
- }
- #endif
- for( ; x < width; x++ )
- dst[x] = (uchar)(-(src1[x] == src2[x]) ^ m);
- }
- }
-}
-
-void cmp32s(const int* src1, size_t step1, const int* src2, size_t step2,
- uchar* dst, size_t step, int width, int height, void* _cmpop)
-{
- CALL_HAL(hal_cmp32s)
- cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop);
-}
-
-void cmp32f(const float* src1, size_t step1, const float* src2, size_t step2,
- uchar* dst, size_t step, int width, int height, void* _cmpop)
-{
- CALL_HAL(hal_cmp32f)
- CALL_IPP_CMP(ippiCompare_32f_C1R)
- cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop);
-}
-
-void cmp64f(const double* src1, size_t step1, const double* src2, size_t step2,
- uchar* dst, size_t step, int width, int height, void* _cmpop)
-{
- CALL_HAL(hal_cmp64f)
- cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop);
-}
-
-//=======================================
-
-#undef CALL_HAL
-#define CALL_HAL(fun) \
- int res = fun(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); \
- if (res == Error::Ok) \
- return; \
- else if (res != Error::NotImplemented) \
- throw Failure(res);
-
-#if defined HAVE_IPP
-#define CALL_IPP_MUL(fun) \
- CV_IPP_CHECK() \
- { \
- if (std::fabs(fscale - 1) <= FLT_EPSILON) \
- { \
- if (fun(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height), 0) >= 0) \
- { \
- CV_IMPL_ADD(CV_IMPL_IPP); \
- return; \
- } \
- setIppErrorStatus(); \
- } \
- }
-#else
-#define CALL_IPP_MUL(fun)
-#endif
-
-//=======================================
-// Multilpy
-//=======================================
-
-void mul8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2,
- uchar* dst, size_t step, int width, int height, void* scale)
-{
- CALL_HAL(hal_mul8u)
- float fscale = (float)*(const double*)scale;
- CALL_IPP_MUL(ippiMul_8u_C1RSfs)
- mul_(src1, step1, src2, step2, dst, step, width, height, fscale);
-}
-
-void mul8s( const schar* src1, size_t step1, const schar* src2, size_t step2,
- schar* dst, size_t step, int width, int height, void* scale)
-{
- CALL_HAL(hal_mul8s)
- mul_(src1, step1, src2, step2, dst, step, width, height, (float)*(const double*)scale);
-}
-
-void mul16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2,
- ushort* dst, size_t step, int width, int height, void* scale)
-{
- CALL_HAL(hal_mul16u)
- float fscale = (float)*(const double*)scale;
- CALL_IPP_MUL(ippiMul_16u_C1RSfs)
- mul_(src1, step1, src2, step2, dst, step, width, height, fscale);
-}
-
-void mul16s( const short* src1, size_t step1, const short* src2, size_t step2,
- short* dst, size_t step, int width, int height, void* scale)
-{
- CALL_HAL(hal_mul16s)
- float fscale = (float)*(const double*)scale;
- CALL_IPP_MUL(ippiMul_16s_C1RSfs)
- mul_(src1, step1, src2, step2, dst, step, width, height, fscale);
-}
-
-void mul32s( const int* src1, size_t step1, const int* src2, size_t step2,
- int* dst, size_t step, int width, int height, void* scale)
-{
- CALL_HAL(hal_mul32s)
- mul_(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
-}
-
-void mul32f( const float* src1, size_t step1, const float* src2, size_t step2,
- float* dst, size_t step, int width, int height, void* scale)
-{
- CALL_HAL(hal_mul32f)
- float fscale = (float)*(const double*)scale;
- CALL_IPP_MUL(ippiMul_32f_C1R)
- mul_(src1, step1, src2, step2, dst, step, width, height, fscale);
-}
-
-void mul64f( const double* src1, size_t step1, const double* src2, size_t step2,
- double* dst, size_t step, int width, int height, void* scale)
-{
- CALL_HAL(hal_mul64f)
- mul_(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
-}
-
-//=======================================
-// Divide
-//=======================================
-
-void div8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2,
- uchar* dst, size_t step, int width, int height, void* scale)
-{
- CALL_HAL(hal_div8u)
- if( src1 )
- div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
- else
- recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
-}
-
-void div8s( const schar* src1, size_t step1, const schar* src2, size_t step2,
- schar* dst, size_t step, int width, int height, void* scale)
-{
- CALL_HAL(hal_div8s)
- div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
-}
-
-void div16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2,
- ushort* dst, size_t step, int width, int height, void* scale)
-{
- CALL_HAL(hal_div16u)
- div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
-}
-
-void div16s( const short* src1, size_t step1, const short* src2, size_t step2,
- short* dst, size_t step, int width, int height, void* scale)
-{
- CALL_HAL(hal_div16s)
- div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
-}
-
-void div32s( const int* src1, size_t step1, const int* src2, size_t step2,
- int* dst, size_t step, int width, int height, void* scale)
-{
- CALL_HAL(hal_div32s)
- div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
-}
-
-void div32f( const float* src1, size_t step1, const float* src2, size_t step2,
- float* dst, size_t step, int width, int height, void* scale)
-{
- CALL_HAL(hal_div32f)
- div_f(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
-}
-
-void div64f( const double* src1, size_t step1, const double* src2, size_t step2,
- double* dst, size_t step, int width, int height, void* scale)
-{
- CALL_HAL(hal_div64f)
- div_f(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
-}
-
-//=======================================
-// Reciprocial
-//=======================================
-
-void recip8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2,
- uchar* dst, size_t step, int width, int height, void* scale)
-{
- CALL_HAL(hal_recip8u)
- recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
-}
-
-void recip8s( const schar* src1, size_t step1, const schar* src2, size_t step2,
- schar* dst, size_t step, int width, int height, void* scale)
-{
- CALL_HAL(hal_recip8s)
- recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
-}
-
-void recip16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2,
- ushort* dst, size_t step, int width, int height, void* scale)
-{
- CALL_HAL(hal_recip16u)
- recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
-}
-
-void recip16s( const short* src1, size_t step1, const short* src2, size_t step2,
- short* dst, size_t step, int width, int height, void* scale)
-{
- CALL_HAL(hal_recip16s)
- recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
-}
-
-void recip32s( const int* src1, size_t step1, const int* src2, size_t step2,
- int* dst, size_t step, int width, int height, void* scale)
-{
- CALL_HAL(hal_recip32s)
- recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
-}
-
-void recip32f( const float* src1, size_t step1, const float* src2, size_t step2,
- float* dst, size_t step, int width, int height, void* scale)
-{
- CALL_HAL(hal_recip32f)
- recip_f(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
-}
-
-void recip64f( const double* src1, size_t step1, const double* src2, size_t step2,
- double* dst, size_t step, int width, int height, void* scale)
-{
- CALL_HAL(hal_recip64f)
- recip_f(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
-}
-
-//=======================================
-
-#undef CALL_HAL
-#define CALL_HAL(fun) \
- int res = fun(src1, step1, src2, step2, dst, step, width, height, scalars); \
- if (res == Error::Ok) \
- return; \
- else if (res != Error::NotImplemented) \
- throw Failure(res);
-
-//=======================================
-// Add weighted
-//=======================================
-
-void
-addWeighted8u( const uchar* src1, size_t step1,
- const uchar* src2, size_t step2,
- uchar* dst, size_t step, int width, int height,
- void* scalars )
-{
- CALL_HAL(hal_addWeighted8u)
- const double* scalars_ = (const double*)scalars;
- float alpha = (float)scalars_[0], beta = (float)scalars_[1], gamma = (float)scalars_[2];
-
- for( ; height--; src1 += step1, src2 += step2, dst += step )
- {
- int x = 0;
-
-#if CV_SSE2
- if( USE_SSE2 )
- {
- __m128 a4 = _mm_set1_ps(alpha), b4 = _mm_set1_ps(beta), g4 = _mm_set1_ps(gamma);
- __m128i z = _mm_setzero_si128();
-
- for( ; x <= width - 8; x += 8 )
- {
- __m128i u = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(src1 + x)), z);
- __m128i v = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(src2 + x)), z);
-
- __m128 u0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(u, z));
- __m128 u1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(u, z));
- __m128 v0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v, z));
- __m128 v1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v, z));
-
- u0 = _mm_add_ps(_mm_mul_ps(u0, a4), _mm_mul_ps(v0, b4));
- u1 = _mm_add_ps(_mm_mul_ps(u1, a4), _mm_mul_ps(v1, b4));
- u0 = _mm_add_ps(u0, g4); u1 = _mm_add_ps(u1, g4);
-
- u = _mm_packs_epi32(_mm_cvtps_epi32(u0), _mm_cvtps_epi32(u1));
- u = _mm_packus_epi16(u, u);
-
- _mm_storel_epi64((__m128i*)(dst + x), u);
- }
- }
-#elif CV_NEON
- float32x4_t g = vdupq_n_f32 (gamma);
-
- for( ; x <= width - 8; x += 8 )
- {
- uint8x8_t in1 = vld1_u8(src1+x);
- uint16x8_t in1_16 = vmovl_u8(in1);
- float32x4_t in1_f_l = vcvtq_f32_u32(vmovl_u16(vget_low_u16(in1_16)));
- float32x4_t in1_f_h = vcvtq_f32_u32(vmovl_u16(vget_high_u16(in1_16)));
-
- uint8x8_t in2 = vld1_u8(src2+x);
- uint16x8_t in2_16 = vmovl_u8(in2);
- float32x4_t in2_f_l = vcvtq_f32_u32(vmovl_u16(vget_low_u16(in2_16)));
- float32x4_t in2_f_h = vcvtq_f32_u32(vmovl_u16(vget_high_u16(in2_16)));
-
- float32x4_t out_f_l = vaddq_f32(vmulq_n_f32(in1_f_l, alpha), vmulq_n_f32(in2_f_l, beta));
- float32x4_t out_f_h = vaddq_f32(vmulq_n_f32(in1_f_h, alpha), vmulq_n_f32(in2_f_h, beta));
- out_f_l = vaddq_f32(out_f_l, g);
- out_f_h = vaddq_f32(out_f_h, g);
-
- uint16x4_t out_16_l = vqmovun_s32(cv_vrndq_s32_f32(out_f_l));
- uint16x4_t out_16_h = vqmovun_s32(cv_vrndq_s32_f32(out_f_h));
-
- uint16x8_t out_16 = vcombine_u16(out_16_l, out_16_h);
- uint8x8_t out = vqmovn_u16(out_16);
-
- vst1_u8(dst+x, out);
- }
-#endif
- #if CV_ENABLE_UNROLLED
- for( ; x <= width - 4; x += 4 )
- {
- float t0, t1;
- t0 = CV_8TO32F(src1[x])*alpha + CV_8TO32F(src2[x])*beta + gamma;
- t1 = CV_8TO32F(src1[x+1])*alpha + CV_8TO32F(src2[x+1])*beta + gamma;
-
- dst[x] = saturate_cast<uchar>(t0);
- dst[x+1] = saturate_cast<uchar>(t1);
-
- t0 = CV_8TO32F(src1[x+2])*alpha + CV_8TO32F(src2[x+2])*beta + gamma;
- t1 = CV_8TO32F(src1[x+3])*alpha + CV_8TO32F(src2[x+3])*beta + gamma;
-
- dst[x+2] = saturate_cast<uchar>(t0);
- dst[x+3] = saturate_cast<uchar>(t1);
- }
- #endif
-
- for( ; x < width; x++ )
- {
- float t0 = CV_8TO32F(src1[x])*alpha + CV_8TO32F(src2[x])*beta + gamma;
- dst[x] = saturate_cast<uchar>(t0);
- }
- }
-}
-
-void addWeighted8s( const schar* src1, size_t step1, const schar* src2, size_t step2,
- schar* dst, size_t step, int width, int height, void* scalars )
-{
- CALL_HAL(hal_addWeighted8s)
- addWeighted_<schar, float>(src1, step1, src2, step2, dst, step, width, height, scalars);
-}
-
-void addWeighted16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2,
- ushort* dst, size_t step, int width, int height, void* scalars )
-{
- CALL_HAL(hal_addWeighted16u)
- addWeighted_<ushort, float>(src1, step1, src2, step2, dst, step, width, height, scalars);
-}
-
-void addWeighted16s( const short* src1, size_t step1, const short* src2, size_t step2,
- short* dst, size_t step, int width, int height, void* scalars )
-{
- CALL_HAL(hal_addWeighted16s)
- addWeighted_<short, float>(src1, step1, src2, step2, dst, step, width, height, scalars);
-}
-
-void addWeighted32s( const int* src1, size_t step1, const int* src2, size_t step2,
- int* dst, size_t step, int width, int height, void* scalars )
-{
- CALL_HAL(hal_addWeighted32s)
- addWeighted_<int, double>(src1, step1, src2, step2, dst, step, width, height, scalars);
-}
-
-void addWeighted32f( const float* src1, size_t step1, const float* src2, size_t step2,
- float* dst, size_t step, int width, int height, void* scalars )
-{
- CALL_HAL(hal_addWeighted32f)
- addWeighted_<float, double>(src1, step1, src2, step2, dst, step, width, height, scalars);
-}
-
-void addWeighted64f( const double* src1, size_t step1, const double* src2, size_t step2,
- double* dst, size_t step, int width, int height, void* scalars )
-{
- CALL_HAL(hal_addWeighted64f)
- addWeighted_<double, double>(src1, step1, src2, step2, dst, step, width, height, scalars);
-}
-
-}} // cv::hal::
+++ /dev/null
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-// By downloading, copying, installing or using the software you agree to this license.
-// If you do not agree to this license, do not download, install,
-// copy or use the software.
-//
-//
-// License Agreement
-// For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-// * Redistribution's of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// * Redistribution's in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// * The name of the copyright holders may not be used to endorse or promote products
-// derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-
-namespace cv { namespace hal {
-
-}}
+++ /dev/null
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-// By downloading, copying, installing or using the software you agree to this license.
-// If you do not agree to this license, do not download, install,
-// copy or use the software.
-//
-//
-// License Agreement
-// For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-// * Redistribution's of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// * Redistribution's in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// * The name of the copyright holders may not be used to endorse or promote products
-// derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-
-namespace cv { namespace hal {
-
-}}
+++ /dev/null
-#include "precomp.hpp"
-
-#if defined WIN32 || defined _WIN32 || defined WINCE
-#include <windows.h>
-#if defined _MSC_VER
- #if _MSC_VER >= 1400
- #include <intrin.h>
- #elif defined _M_IX86
- static void __cpuid(int* cpuid_data, int)
- {
- __asm
- {
- push ebx
- push edi
- mov edi, cpuid_data
- mov eax, 1
- cpuid
- mov [edi], eax
- mov [edi + 4], ebx
- mov [edi + 8], ecx
- mov [edi + 12], edx
- pop edi
- pop ebx
- }
- }
- static void __cpuidex(int* cpuid_data, int, int)
- {
- __asm
- {
- push edi
- mov edi, cpuid_data
- mov eax, 7
- mov ecx, 0
- cpuid
- mov [edi], eax
- mov [edi + 4], ebx
- mov [edi + 8], ecx
- mov [edi + 12], edx
- pop edi
- }
- }
- #endif
-#endif
-#endif
-
-#if defined ANDROID || defined __linux__
-# include <unistd.h>
-# include <fcntl.h>
-# include <elf.h>
-# include <linux/auxvec.h>
-#endif
-
-#if defined __linux__ || defined __APPLE__ || defined __EMSCRIPTEN__
-#include <unistd.h>
-#include <stdio.h>
-#include <sys/types.h>
-#if defined ANDROID
-#include <sys/sysconf.h>
-#endif
-#endif
-
-#ifdef ANDROID
-# include <android/log.h>
-#endif
-
-struct HWFeatures
-{
- enum { MAX_FEATURE = CV_HARDWARE_MAX_FEATURE };
-
- HWFeatures(void)
- {
- memset( have, 0, sizeof(have) );
- x86_family = 0;
- }
-
- static HWFeatures initialize(void)
- {
- HWFeatures f;
- int cpuid_data[4] = { 0, 0, 0, 0 };
-
- #if defined _MSC_VER && (defined _M_IX86 || defined _M_X64)
- __cpuid(cpuid_data, 1);
- #elif defined __GNUC__ && (defined __i386__ || defined __x86_64__)
- #ifdef __x86_64__
- asm __volatile__
- (
- "movl $1, %%eax\n\t"
- "cpuid\n\t"
- :[eax]"=a"(cpuid_data[0]),[ebx]"=b"(cpuid_data[1]),[ecx]"=c"(cpuid_data[2]),[edx]"=d"(cpuid_data[3])
- :
- : "cc"
- );
- #else
- asm volatile
- (
- "pushl %%ebx\n\t"
- "movl $1,%%eax\n\t"
- "cpuid\n\t"
- "popl %%ebx\n\t"
- : "=a"(cpuid_data[0]), "=c"(cpuid_data[2]), "=d"(cpuid_data[3])
- :
- : "cc"
- );
- #endif
- #endif
-
- f.x86_family = (cpuid_data[0] >> 8) & 15;
- if( f.x86_family >= 6 )
- {
- f.have[CV_CPU_MMX] = (cpuid_data[3] & (1 << 23)) != 0;
- f.have[CV_CPU_SSE] = (cpuid_data[3] & (1<<25)) != 0;
- f.have[CV_CPU_SSE2] = (cpuid_data[3] & (1<<26)) != 0;
- f.have[CV_CPU_SSE3] = (cpuid_data[2] & (1<<0)) != 0;
- f.have[CV_CPU_SSSE3] = (cpuid_data[2] & (1<<9)) != 0;
- f.have[CV_CPU_FMA3] = (cpuid_data[2] & (1<<12)) != 0;
- f.have[CV_CPU_SSE4_1] = (cpuid_data[2] & (1<<19)) != 0;
- f.have[CV_CPU_SSE4_2] = (cpuid_data[2] & (1<<20)) != 0;
- f.have[CV_CPU_POPCNT] = (cpuid_data[2] & (1<<23)) != 0;
- f.have[CV_CPU_AVX] = (((cpuid_data[2] & (1<<28)) != 0)&&((cpuid_data[2] & (1<<27)) != 0));//OS uses XSAVE_XRSTORE and CPU support AVX
-
- // make the second call to the cpuid command in order to get
- // information about extended features like AVX2
- #if defined _MSC_VER && (defined _M_IX86 || defined _M_X64)
- __cpuidex(cpuid_data, 7, 0);
- #elif defined __GNUC__ && (defined __i386__ || defined __x86_64__)
- #ifdef __x86_64__
- asm __volatile__
- (
- "movl $7, %%eax\n\t"
- "movl $0, %%ecx\n\t"
- "cpuid\n\t"
- :[eax]"=a"(cpuid_data[0]),[ebx]"=b"(cpuid_data[1]),[ecx]"=c"(cpuid_data[2]),[edx]"=d"(cpuid_data[3])
- :
- : "cc"
- );
- #else
- asm volatile
- (
- "pushl %%ebx\n\t"
- "movl $7,%%eax\n\t"
- "movl $0,%%ecx\n\t"
- "cpuid\n\t"
- "movl %%ebx, %0\n\t"
- "popl %%ebx\n\t"
- : "=r"(cpuid_data[1]), "=c"(cpuid_data[2])
- :
- : "cc"
- );
- #endif
- #endif
- f.have[CV_CPU_AVX2] = (cpuid_data[1] & (1<<5)) != 0;
-
- f.have[CV_CPU_AVX_512F] = (cpuid_data[1] & (1<<16)) != 0;
- f.have[CV_CPU_AVX_512DQ] = (cpuid_data[1] & (1<<17)) != 0;
- f.have[CV_CPU_AVX_512IFMA512] = (cpuid_data[1] & (1<<21)) != 0;
- f.have[CV_CPU_AVX_512PF] = (cpuid_data[1] & (1<<26)) != 0;
- f.have[CV_CPU_AVX_512ER] = (cpuid_data[1] & (1<<27)) != 0;
- f.have[CV_CPU_AVX_512CD] = (cpuid_data[1] & (1<<28)) != 0;
- f.have[CV_CPU_AVX_512BW] = (cpuid_data[1] & (1<<30)) != 0;
- f.have[CV_CPU_AVX_512VL] = (cpuid_data[1] & (1<<31)) != 0;
- f.have[CV_CPU_AVX_512VBMI] = (cpuid_data[2] & (1<<1)) != 0;
- }
-
- #if defined ANDROID || defined __linux__
- #ifdef __aarch64__
- f.have[CV_CPU_NEON] = true;
- #else
- int cpufile = open("/proc/self/auxv", O_RDONLY);
-
- if (cpufile >= 0)
- {
- Elf32_auxv_t auxv;
- const size_t size_auxv_t = sizeof(auxv);
-
- while ((size_t)read(cpufile, &auxv, size_auxv_t) == size_auxv_t)
- {
- if (auxv.a_type == AT_HWCAP)
- {
- f.have[CV_CPU_NEON] = (auxv.a_un.a_val & 4096) != 0;
- break;
- }
- }
-
- close(cpufile);
- }
- #endif
- #elif (defined __clang__ || defined __APPLE__) && (defined __ARM_NEON__ || (defined __ARM_NEON && defined __aarch64__))
- f.have[CV_CPU_NEON] = true;
- #endif
-
- return f;
- }
-
- int x86_family;
- bool have[MAX_FEATURE+1];
-};
-
-static HWFeatures featuresEnabled = HWFeatures::initialize(), featuresDisabled = HWFeatures();
-static HWFeatures* currentFeatures = &featuresEnabled;
-volatile bool useOptimizedFlag = true;
-
-namespace cv { namespace hal {
-
-bool checkHardwareSupport(int feature)
-{
-// CV_DbgAssert( 0 <= feature && feature <= CV_HARDWARE_MAX_FEATURE );
- return currentFeatures->have[feature];
-}
-
-void setUseOptimized( bool flag )
-{
- useOptimizedFlag = flag;
- currentFeatures = flag ? &featuresEnabled : &featuresDisabled;
-}
-
-bool useOptimized(void)
-{
- return useOptimizedFlag;
-}
-
-}}
+++ /dev/null
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-// By downloading, copying, installing or using the software you agree to this license.
-// If you do not agree to this license, do not download, install,
-// copy or use the software.
-//
-//
-// License Agreement
-// For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-// * Redistribution's of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// * Redistribution's in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// * The name of the copyright holders may not be used to endorse or promote products
-// derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "opencv2/hal.hpp"
-#include "opencv2/hal/intrin.hpp"
-#include <algorithm>
-#include <cmath>
-#include <cstdlib>
-#include <limits>
-#include <float.h>
-#include <cstring>
-#include <cassert>
-
-#include "opencv2/hal/sse_utils.hpp"
-#include "opencv2/hal/neon_utils.hpp"
-
-#if defined HAVE_IPP && (IPP_VERSION_X100 >= 700)
-#define ARITHM_USE_IPP 1
-#else
-#define ARITHM_USE_IPP 0
-#endif
+++ /dev/null
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-// By downloading, copying, installing or using the software you agree to this license.
-// If you do not agree to this license, do not download, install,
-// copy or use the software.
-//
-//
-// License Agreement
-// For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
-// Copyright (C) 2015, Itseez Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-// * Redistribution's of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// * Redistribution's in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// * The name of the copyright holders may not be used to endorse or promote products
-// derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifndef __OPENCV_HAL_REPLACEMENT_HPP__
-#define __OPENCV_HAL_REPLACEMENT_HPP__
-
-#include "opencv2/hal.hpp"
-
-inline int hal_t_add8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_add8s(const schar*, size_t, const schar*, size_t, schar*, size_t, int, int) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_add16u(const ushort*, size_t, const ushort*, size_t, ushort*, size_t, int, int) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_add16s(const short*, size_t, const short*, size_t, short*, size_t, int, int) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_add32s(const int*, size_t, const int*, size_t, int*, size_t, int, int) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_add32f(const float*, size_t, const float*, size_t, float*, size_t, int, int) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_add64f(const double*, size_t, const double*, size_t, double*, size_t, int, int) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_sub8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_sub8s(const schar*, size_t, const schar*, size_t, schar*, size_t, int, int) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_sub16u(const ushort*, size_t, const ushort*, size_t, ushort*, size_t, int, int) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_sub16s(const short*, size_t, const short*, size_t, short*, size_t, int, int) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_sub32s(const int*, size_t, const int*, size_t, int*, size_t, int, int) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_sub32f(const float*, size_t, const float*, size_t, float*, size_t, int, int) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_sub64f(const double*, size_t, const double*, size_t, double*, size_t, int, int) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_max8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_max8s(const schar*, size_t, const schar*, size_t, schar*, size_t, int, int) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_max16u(const ushort*, size_t, const ushort*, size_t, ushort*, size_t, int, int) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_max16s(const short*, size_t, const short*, size_t, short*, size_t, int, int) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_max32s(const int*, size_t, const int*, size_t, int*, size_t, int, int) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_max32f(const float*, size_t, const float*, size_t, float*, size_t, int, int) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_max64f(const double*, size_t, const double*, size_t, double*, size_t, int, int) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_min8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_min8s(const schar*, size_t, const schar*, size_t, schar*, size_t, int, int) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_min16u(const ushort*, size_t, const ushort*, size_t, ushort*, size_t, int, int) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_min16s(const short*, size_t, const short*, size_t, short*, size_t, int, int) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_min32s(const int*, size_t, const int*, size_t, int*, size_t, int, int) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_min32f(const float*, size_t, const float*, size_t, float*, size_t, int, int) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_min64f(const double*, size_t, const double*, size_t, double*, size_t, int, int) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_absdiff8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_absdiff8s(const schar*, size_t, const schar*, size_t, schar*, size_t, int, int) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_absdiff16u(const ushort*, size_t, const ushort*, size_t, ushort*, size_t, int, int) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_absdiff16s(const short*, size_t, const short*, size_t, short*, size_t, int, int) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_absdiff32s(const int*, size_t, const int*, size_t, int*, size_t, int, int) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_absdiff32f(const float*, size_t, const float*, size_t, float*, size_t, int, int) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_absdiff64f(const double*, size_t, const double*, size_t, double*, size_t, int, int) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_and8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_or8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_xor8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_not8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int) { return cv::hal::Error::NotImplemented; }
-
-#define hal_add8u hal_t_add8u
-#define hal_add8s hal_t_add8s
-#define hal_add16u hal_t_add16u
-#define hal_add16s hal_t_add16s
-#define hal_add32s hal_t_add32s
-#define hal_add32f hal_t_add32f
-#define hal_add64f hal_t_add64f
-#define hal_sub8u hal_t_sub8u
-#define hal_sub8s hal_t_sub8s
-#define hal_sub16u hal_t_sub16u
-#define hal_sub16s hal_t_sub16s
-#define hal_sub32s hal_t_sub32s
-#define hal_sub32f hal_t_sub32f
-#define hal_sub64f hal_t_sub64f
-#define hal_max8u hal_t_max8u
-#define hal_max8s hal_t_max8s
-#define hal_max16u hal_t_max16u
-#define hal_max16s hal_t_max16s
-#define hal_max32s hal_t_max32s
-#define hal_max32f hal_t_max32f
-#define hal_max64f hal_t_max64f
-#define hal_min8u hal_t_min8u
-#define hal_min8s hal_t_min8s
-#define hal_min16u hal_t_min16u
-#define hal_min16s hal_t_min16s
-#define hal_min32s hal_t_min32s
-#define hal_min32f hal_t_min32f
-#define hal_min64f hal_t_min64f
-#define hal_absdiff8u hal_t_absdiff8u
-#define hal_absdiff8s hal_t_absdiff8s
-#define hal_absdiff16u hal_t_absdiff16u
-#define hal_absdiff16s hal_t_absdiff16s
-#define hal_absdiff32s hal_t_absdiff32s
-#define hal_absdiff32f hal_t_absdiff32f
-#define hal_absdiff64f hal_t_absdiff64f
-#define hal_and8u hal_t_and8u
-#define hal_or8u hal_t_or8u
-#define hal_xor8u hal_t_xor8u
-#define hal_not8u hal_t_not8u
-
-inline int hal_t_cmp8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int, int) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_cmp8s(const schar*, size_t, const schar*, size_t, uchar*, size_t, int, int, int) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_cmp16u(const ushort*, size_t, const ushort*, size_t, uchar*, size_t, int, int, int) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_cmp16s(const short*, size_t, const short*, size_t, uchar*, size_t, int, int, int) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_cmp32s(const int*, size_t, const int*, size_t, uchar*, size_t, int, int, int) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_cmp32f(const float*, size_t, const float*, size_t, uchar*, size_t, int, int, int) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_cmp64f(const double*, size_t, const double*, size_t, uchar*, size_t, int, int, int) { return cv::hal::Error::NotImplemented; }
-
-#define hal_cmp8u hal_t_cmp8u
-#define hal_cmp8s hal_t_cmp8s
-#define hal_cmp16u hal_t_cmp16u
-#define hal_cmp16s hal_t_cmp16s
-#define hal_cmp32s hal_t_cmp32s
-#define hal_cmp32f hal_t_cmp32f
-#define hal_cmp64f hal_t_cmp64f
-
-inline int hal_t_mul8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int, double) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_mul8s(const schar*, size_t, const schar*, size_t, schar*, size_t, int, int, double) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_mul16u(const ushort*, size_t, const ushort*, size_t, ushort*, size_t, int, int, double) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_mul16s(const short*, size_t, const short*, size_t, short*, size_t, int, int, double) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_mul32s(const int*, size_t, const int*, size_t, int*, size_t, int, int, double) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_mul32f(const float*, size_t, const float*, size_t, float*, size_t, int, int, double) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_mul64f(const double*, size_t, const double*, size_t, double*, size_t, int, int, double) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_div8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int, double) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_div8s(const schar*, size_t, const schar*, size_t, schar*, size_t, int, int, double) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_div16u(const ushort*, size_t, const ushort*, size_t, ushort*, size_t, int, int, double) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_div16s(const short*, size_t, const short*, size_t, short*, size_t, int, int, double) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_div32s(const int*, size_t, const int*, size_t, int*, size_t, int, int, double) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_div32f(const float*, size_t, const float*, size_t, float*, size_t, int, int, double) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_div64f(const double*, size_t, const double*, size_t, double*, size_t, int, int, double) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_recip8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int, double) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_recip8s(const schar*, size_t, const schar*, size_t, schar*, size_t, int, int, double) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_recip16u(const ushort*, size_t, const ushort*, size_t, ushort*, size_t, int, int, double) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_recip16s(const short*, size_t, const short*, size_t, short*, size_t, int, int, double) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_recip32s(const int*, size_t, const int*, size_t, int*, size_t, int, int, double) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_recip32f(const float*, size_t, const float*, size_t, float*, size_t, int, int, double) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_recip64f(const double*, size_t, const double*, size_t, double*, size_t, int, int, double) { return cv::hal::Error::NotImplemented; }
-
-#define hal_mul8u hal_t_mul8u
-#define hal_mul8s hal_t_mul8s
-#define hal_mul16u hal_t_mul16u
-#define hal_mul16s hal_t_mul16s
-#define hal_mul32s hal_t_mul32s
-#define hal_mul32f hal_t_mul32f
-#define hal_mul64f hal_t_mul64f
-#define hal_div8u hal_t_div8u
-#define hal_div8s hal_t_div8s
-#define hal_div16u hal_t_div16u
-#define hal_div16s hal_t_div16s
-#define hal_div32s hal_t_div32s
-#define hal_div32f hal_t_div32f
-#define hal_div64f hal_t_div64f
-#define hal_recip8u hal_t_recip8u
-#define hal_recip8s hal_t_recip8s
-#define hal_recip16u hal_t_recip16u
-#define hal_recip16s hal_t_recip16s
-#define hal_recip32s hal_t_recip32s
-#define hal_recip32f hal_t_recip32f
-#define hal_recip64f hal_t_recip64f
-
-inline int hal_t_addWeighted8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int, void*) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_addWeighted8s(const schar*, size_t, const schar*, size_t, schar*, size_t, int, int, void*) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_addWeighted16u(const ushort*, size_t, const ushort*, size_t, ushort*, size_t, int, int, void*) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_addWeighted16s(const short*, size_t, const short*, size_t, short*, size_t, int, int, void*) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_addWeighted32s(const int*, size_t, const int*, size_t, int*, size_t, int, int, void*) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_addWeighted32f(const float*, size_t, const float*, size_t, float*, size_t, int, int, void*) { return cv::hal::Error::NotImplemented; }
-inline int hal_t_addWeighted64f(const double*, size_t, const double*, size_t, double*, size_t, int, int, void*) { return cv::hal::Error::NotImplemented; }
-
-#define hal_addWeighted8u hal_t_addWeighted8u
-#define hal_addWeighted8s hal_t_addWeighted8s
-#define hal_addWeighted16u hal_t_addWeighted16u
-#define hal_addWeighted16s hal_t_addWeighted16s
-#define hal_addWeighted32s hal_t_addWeighted32s
-#define hal_addWeighted32f hal_t_addWeighted32f
-#define hal_addWeighted64f hal_t_addWeighted64f
-
-#include "custom_hal.hpp"
-
-#endif
+++ /dev/null
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-// By downloading, copying, installing or using the software you agree to this license.
-// If you do not agree to this license, do not download, install,
-// copy or use the software.
-//
-//
-// License Agreement
-// For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-// * Redistribution's of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// * Redistribution's in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// * The name of the copyright holders may not be used to endorse or promote products
-// derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-
-namespace cv { namespace hal {
-
-}}
+++ /dev/null
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-// By downloading, copying, installing or using the software you agree to this license.
-// If you do not agree to this license, do not download, install,
-// copy or use the software.
-//
-//
-// License Agreement
-// For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-// * Redistribution's of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// * Redistribution's in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// * The name of the copyright holders may not be used to endorse or promote products
-// derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-
-namespace cv { namespace hal {
-
-static const uchar popCountTable[] =
-{
- 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
- 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
- 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
- 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
-};
-
-static const uchar popCountTable2[] =
-{
- 0, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
- 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
- 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
- 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
- 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
- 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
- 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
- 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4
-};
-
-static const uchar popCountTable4[] =
-{
- 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
-};
-
-int normHamming(const uchar* a, int n)
-{
- int i = 0;
- int result = 0;
-#if CV_NEON
- {
- uint32x4_t bits = vmovq_n_u32(0);
- for (; i <= n - 16; i += 16) {
- uint8x16_t A_vec = vld1q_u8 (a + i);
- uint8x16_t bitsSet = vcntq_u8 (A_vec);
- uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet);
- uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8);
- bits = vaddq_u32(bits, bitSet4);
- }
- uint64x2_t bitSet2 = vpaddlq_u32 (bits);
- result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0);
- result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2);
- }
-#endif
- for( ; i <= n - 4; i += 4 )
- result += popCountTable[a[i]] + popCountTable[a[i+1]] +
- popCountTable[a[i+2]] + popCountTable[a[i+3]];
- for( ; i < n; i++ )
- result += popCountTable[a[i]];
- return result;
-}
-
-int normHamming(const uchar* a, const uchar* b, int n)
-{
- int i = 0;
- int result = 0;
-#if CV_NEON
- {
- uint32x4_t bits = vmovq_n_u32(0);
- for (; i <= n - 16; i += 16) {
- uint8x16_t A_vec = vld1q_u8 (a + i);
- uint8x16_t B_vec = vld1q_u8 (b + i);
- uint8x16_t AxorB = veorq_u8 (A_vec, B_vec);
- uint8x16_t bitsSet = vcntq_u8 (AxorB);
- uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet);
- uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8);
- bits = vaddq_u32(bits, bitSet4);
- }
- uint64x2_t bitSet2 = vpaddlq_u32 (bits);
- result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0);
- result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2);
- }
-#endif
- for( ; i <= n - 4; i += 4 )
- result += popCountTable[a[i] ^ b[i]] + popCountTable[a[i+1] ^ b[i+1]] +
- popCountTable[a[i+2] ^ b[i+2]] + popCountTable[a[i+3] ^ b[i+3]];
- for( ; i < n; i++ )
- result += popCountTable[a[i] ^ b[i]];
- return result;
-}
-
-int normHamming(const uchar* a, int n, int cellSize)
-{
- if( cellSize == 1 )
- return normHamming(a, n);
- const uchar* tab = 0;
- if( cellSize == 2 )
- tab = popCountTable2;
- else if( cellSize == 4 )
- tab = popCountTable4;
- else
- return -1;
- int i = 0;
- int result = 0;
-#if CV_ENABLE_UNROLLED
- for( ; i <= n - 4; i += 4 )
- result += tab[a[i]] + tab[a[i+1]] + tab[a[i+2]] + tab[a[i+3]];
-#endif
- for( ; i < n; i++ )
- result += tab[a[i]];
- return result;
-}
-
-int normHamming(const uchar* a, const uchar* b, int n, int cellSize)
-{
- if( cellSize == 1 )
- return normHamming(a, b, n);
- const uchar* tab = 0;
- if( cellSize == 2 )
- tab = popCountTable2;
- else if( cellSize == 4 )
- tab = popCountTable4;
- else
- return -1;
- int i = 0;
- int result = 0;
- #if CV_ENABLE_UNROLLED
- for( ; i <= n - 4; i += 4 )
- result += tab[a[i] ^ b[i]] + tab[a[i+1] ^ b[i+1]] +
- tab[a[i+2] ^ b[i+2]] + tab[a[i+3] ^ b[i+3]];
- #endif
- for( ; i < n; i++ )
- result += tab[a[i] ^ b[i]];
- return result;
-}
-
-float normL2Sqr_(const float* a, const float* b, int n)
-{
- int j = 0; float d = 0.f;
-#if CV_SSE
- float CV_DECL_ALIGNED(16) buf[4];
- __m128 d0 = _mm_setzero_ps(), d1 = _mm_setzero_ps();
-
- for( ; j <= n - 8; j += 8 )
- {
- __m128 t0 = _mm_sub_ps(_mm_loadu_ps(a + j), _mm_loadu_ps(b + j));
- __m128 t1 = _mm_sub_ps(_mm_loadu_ps(a + j + 4), _mm_loadu_ps(b + j + 4));
- d0 = _mm_add_ps(d0, _mm_mul_ps(t0, t0));
- d1 = _mm_add_ps(d1, _mm_mul_ps(t1, t1));
- }
- _mm_store_ps(buf, _mm_add_ps(d0, d1));
- d = buf[0] + buf[1] + buf[2] + buf[3];
-#endif
- {
- for( ; j <= n - 4; j += 4 )
- {
- float t0 = a[j] - b[j], t1 = a[j+1] - b[j+1], t2 = a[j+2] - b[j+2], t3 = a[j+3] - b[j+3];
- d += t0*t0 + t1*t1 + t2*t2 + t3*t3;
- }
- }
-
- for( ; j < n; j++ )
- {
- float t = a[j] - b[j];
- d += t*t;
- }
- return d;
-}
-
-
-float normL1_(const float* a, const float* b, int n)
-{
- int j = 0; float d = 0.f;
-#if CV_SSE
- float CV_DECL_ALIGNED(16) buf[4];
- static const int CV_DECL_ALIGNED(16) absbuf[4] = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff};
- __m128 d0 = _mm_setzero_ps(), d1 = _mm_setzero_ps();
- __m128 absmask = _mm_load_ps((const float*)absbuf);
-
- for( ; j <= n - 8; j += 8 )
- {
- __m128 t0 = _mm_sub_ps(_mm_loadu_ps(a + j), _mm_loadu_ps(b + j));
- __m128 t1 = _mm_sub_ps(_mm_loadu_ps(a + j + 4), _mm_loadu_ps(b + j + 4));
- d0 = _mm_add_ps(d0, _mm_and_ps(t0, absmask));
- d1 = _mm_add_ps(d1, _mm_and_ps(t1, absmask));
- }
- _mm_store_ps(buf, _mm_add_ps(d0, d1));
- d = buf[0] + buf[1] + buf[2] + buf[3];
-#elif CV_NEON
- float32x4_t v_sum = vdupq_n_f32(0.0f);
- for ( ; j <= n - 4; j += 4)
- v_sum = vaddq_f32(v_sum, vabdq_f32(vld1q_f32(a + j), vld1q_f32(b + j)));
-
- float CV_DECL_ALIGNED(16) buf[4];
- vst1q_f32(buf, v_sum);
- d = buf[0] + buf[1] + buf[2] + buf[3];
-#endif
- {
- for( ; j <= n - 4; j += 4 )
- {
- d += std::abs(a[j] - b[j]) + std::abs(a[j+1] - b[j+1]) +
- std::abs(a[j+2] - b[j+2]) + std::abs(a[j+3] - b[j+3]);
- }
- }
-
- for( ; j < n; j++ )
- d += std::abs(a[j] - b[j]);
- return d;
-}
-
-int normL1_(const uchar* a, const uchar* b, int n)
-{
- int j = 0, d = 0;
-#if CV_SSE
- __m128i d0 = _mm_setzero_si128();
-
- for( ; j <= n - 16; j += 16 )
- {
- __m128i t0 = _mm_loadu_si128((const __m128i*)(a + j));
- __m128i t1 = _mm_loadu_si128((const __m128i*)(b + j));
-
- d0 = _mm_add_epi32(d0, _mm_sad_epu8(t0, t1));
- }
-
- for( ; j <= n - 4; j += 4 )
- {
- __m128i t0 = _mm_cvtsi32_si128(*(const int*)(a + j));
- __m128i t1 = _mm_cvtsi32_si128(*(const int*)(b + j));
-
- d0 = _mm_add_epi32(d0, _mm_sad_epu8(t0, t1));
- }
- d = _mm_cvtsi128_si32(_mm_add_epi32(d0, _mm_unpackhi_epi64(d0, d0)));
-#elif CV_NEON
- uint32x4_t v_sum = vdupq_n_u32(0.0f);
- for ( ; j <= n - 16; j += 16)
- {
- uint8x16_t v_dst = vabdq_u8(vld1q_u8(a + j), vld1q_u8(b + j));
- uint16x8_t v_low = vmovl_u8(vget_low_u8(v_dst)), v_high = vmovl_u8(vget_high_u8(v_dst));
- v_sum = vaddq_u32(v_sum, vaddl_u16(vget_low_u16(v_low), vget_low_u16(v_high)));
- v_sum = vaddq_u32(v_sum, vaddl_u16(vget_high_u16(v_low), vget_high_u16(v_high)));
- }
-
- uint CV_DECL_ALIGNED(16) buf[4];
- vst1q_u32(buf, v_sum);
- d = buf[0] + buf[1] + buf[2] + buf[3];
-#endif
- {
- for( ; j <= n - 4; j += 4 )
- {
- d += std::abs(a[j] - b[j]) + std::abs(a[j+1] - b[j+1]) +
- std::abs(a[j+2] - b[j+2]) + std::abs(a[j+3] - b[j+3]);
- }
- }
- for( ; j < n; j++ )
- d += std::abs(a[j] - b[j]);
- return d;
-}
-
-}} //cv::hal
+++ /dev/null
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-// By downloading, copying, installing or using the software you agree to this license.
-// If you do not agree to this license, do not download, install,
-// copy or use the software.
-//
-//
-// License Agreement
-// For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-// * Redistribution's of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// * Redistribution's in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// * The name of the copyright holders may not be used to endorse or promote products
-// derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-
-namespace cv { namespace hal {
-
-}}
+++ /dev/null
-#include "opencv2/ts.hpp"
-
-CV_TEST_MAIN("cv")
+++ /dev/null
-#ifndef __OPENCV_HAL_TEST_PRECOMP_HPP__
-#define __OPENCV_HAL_TEST_PRECOMP_HPP__
-
-#include <iostream>
-#include <limits>
-#include "opencv2/ts.hpp"
-#include "opencv2/hal.hpp"
-#include "opencv2/hal/defs.h"
-#include "opencv2/hal/intrin.hpp"
-
-#endif
#include "opencv2/imgproc/imgproc_c.h"
#include "opencv2/core/private.hpp"
#include "opencv2/core/ocl.hpp"
-#include "opencv2/hal.hpp"
+#include "opencv2/core/hal/hal.hpp"
#include <math.h>
#include <assert.h>
#include "_geom.h"
#include "filterengine.hpp"
-#include "opencv2/hal/sse_utils.hpp"
+#include "opencv2/core/sse_utils.hpp"
#endif /*__OPENCV_CV_INTERNAL_H_*/
//M*/
#include "precomp.hpp"
-#include "opencv2/hal/intrin.hpp"
+#include "opencv2/core/hal/intrin.hpp"
#include <iostream>
namespace cv
ocv_list_filterout(opencv_hdrs ".h$")
ocv_list_filterout(opencv_hdrs "cuda")
ocv_list_filterout(opencv_hdrs "cudev")
+ocv_list_filterout(opencv_hdrs "/hal/")
ocv_list_filterout(opencv_hdrs "detection_based_tracker.hpp") # Conditional compilation
set(cv2_generated_hdrs
//M*/
#include "precomp.hpp"
+#include "opencv2/core/hal/hal.hpp"
using namespace cv;
namespace {
-template<typename _Tp> static inline bool
-decomposeCholesky(_Tp* A, size_t astep, int m)
+static inline bool decomposeCholesky(double* A, size_t astep, int m)
{
- if (!hal::Cholesky(A, astep, m, 0, 0, 0))
+ if (!hal::Cholesky64f(A, astep, m, 0, 0, 0))
return false;
astep /= sizeof(A[0]);
for (int i = 0; i < m; ++i)
- A[i*astep + i] = (_Tp)(1./A[i*astep + i]);
+ A[i*astep + i] = (double)(1./A[i*astep + i]);
return true;
}
--- /dev/null
+cmake_minimum_required(VERSION 2.8.8 FATAL_ERROR)
+
+if(UNIX)
+ if(CMAKE_COMPILER_IS_GNUC OR CV_ICC)
+ set(CMAKE_C_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
+ endif()
+endif()
+
+add_library(broken_hal broken.c)
+set(OPENCV_SRC_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../..")
+target_include_directories(broken_hal PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${OPENCV_SRC_DIR}/modules/core/include)
--- /dev/null
+#include "broken.h"
+
+int broken_add8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_add8s(const schar* src1, size_t sz1, const schar* src2, size_t sz2, schar* dst, size_t sz, int w, int h)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_add16u(const ushort* src1, size_t sz1, const ushort* src2, size_t sz2, ushort* dst, size_t sz, int w, int h)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_add16s(const short* src1, size_t sz1, const short* src2, size_t sz2, short* dst, size_t sz, int w, int h)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_add32s(const int* src1, size_t sz1, const int* src2, size_t sz2, int* dst, size_t sz, int w, int h)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_add32f(const float* src1, size_t sz1, const float* src2, size_t sz2, float* dst, size_t sz, int w, int h)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_add64f(const double* src1, size_t sz1, const double* src2, size_t sz2, double* dst, size_t sz, int w, int h)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_sub8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_sub8s(const schar* src1, size_t sz1, const schar* src2, size_t sz2, schar* dst, size_t sz, int w, int h)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_sub16u(const ushort* src1, size_t sz1, const ushort* src2, size_t sz2, ushort* dst, size_t sz, int w, int h)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_sub16s(const short* src1, size_t sz1, const short* src2, size_t sz2, short* dst, size_t sz, int w, int h)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_sub32s(const int* src1, size_t sz1, const int* src2, size_t sz2, int* dst, size_t sz, int w, int h)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_sub32f(const float* src1, size_t sz1, const float* src2, size_t sz2, float* dst, size_t sz, int w, int h)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_sub64f(const double* src1, size_t sz1, const double* src2, size_t sz2, double* dst, size_t sz, int w, int h)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_max8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_max8s(const schar* src1, size_t sz1, const schar* src2, size_t sz2, schar* dst, size_t sz, int w, int h)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_max16u(const ushort* src1, size_t sz1, const ushort* src2, size_t sz2, ushort* dst, size_t sz, int w, int h)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_max16s(const short* src1, size_t sz1, const short* src2, size_t sz2, short* dst, size_t sz, int w, int h)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_max32s(const int* src1, size_t sz1, const int* src2, size_t sz2, int* dst, size_t sz, int w, int h)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_max32f(const float* src1, size_t sz1, const float* src2, size_t sz2, float* dst, size_t sz, int w, int h)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_max64f(const double* src1, size_t sz1, const double* src2, size_t sz2, double* dst, size_t sz, int w, int h)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_min8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_min8s(const schar* src1, size_t sz1, const schar* src2, size_t sz2, schar* dst, size_t sz, int w, int h)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_min16u(const ushort* src1, size_t sz1, const ushort* src2, size_t sz2, ushort* dst, size_t sz, int w, int h)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_min16s(const short* src1, size_t sz1, const short* src2, size_t sz2, short* dst, size_t sz, int w, int h)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_min32s(const int* src1, size_t sz1, const int* src2, size_t sz2, int* dst, size_t sz, int w, int h)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_min32f(const float* src1, size_t sz1, const float* src2, size_t sz2, float* dst, size_t sz, int w, int h)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_min64f(const double* src1, size_t sz1, const double* src2, size_t sz2, double* dst, size_t sz, int w, int h)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_absdiff8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_absdiff8s(const schar* src1, size_t sz1, const schar* src2, size_t sz2, schar* dst, size_t sz, int w, int h)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_absdiff16u(const ushort* src1, size_t sz1, const ushort* src2, size_t sz2, ushort* dst, size_t sz, int w, int h)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_absdiff16s(const short* src1, size_t sz1, const short* src2, size_t sz2, short* dst, size_t sz, int w, int h)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_absdiff32s(const int* src1, size_t sz1, const int* src2, size_t sz2, int* dst, size_t sz, int w, int h)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_absdiff32f(const float* src1, size_t sz1, const float* src2, size_t sz2, float* dst, size_t sz, int w, int h)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_absdiff64f(const double* src1, size_t sz1, const double* src2, size_t sz2, double* dst, size_t sz, int w, int h)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_and8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_or8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_xor8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_not8u(const uchar* src1, size_t sz1, uchar* dst, size_t sz, int w, int h)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_cmp8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h, int op)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_cmp8s(const schar* src1, size_t sz1, const schar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h, int op)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_cmp16u(const ushort* src1, size_t sz1, const ushort* src2, size_t sz2, uchar* dst, size_t sz, int w, int h, int op)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_cmp16s(const short* src1, size_t sz1, const short* src2, size_t sz2, uchar* dst, size_t sz, int w, int h, int op)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_cmp32s(const int* src1, size_t sz1, const int* src2, size_t sz2, uchar* dst, size_t sz, int w, int h, int op)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_cmp32f(const float* src1, size_t sz1, const float* src2, size_t sz2, uchar* dst, size_t sz, int w, int h, int op)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_cmp64f(const double* src1, size_t sz1, const double* src2, size_t sz2, uchar* dst, size_t sz, int w, int h, int op)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_mul8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h, double scale)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_mul8s(const schar* src1, size_t sz1, const schar* src2, size_t sz2, schar* dst, size_t sz, int w, int h, double scale)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_mul16u(const ushort* src1, size_t sz1, const ushort* src2, size_t sz2, ushort* dst, size_t sz, int w, int h, double scale)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_mul16s(const short* src1, size_t sz1, const short* src2, size_t sz2, short* dst, size_t sz, int w, int h, double scale)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_mul32s(const int* src1, size_t sz1, const int* src2, size_t sz2, int* dst, size_t sz, int w, int h, double scale)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_mul32f(const float* src1, size_t sz1, const float* src2, size_t sz2, float* dst, size_t sz, int w, int h, double scale)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_mul64f(const double* src1, size_t sz1, const double* src2, size_t sz2, double* dst, size_t sz, int w, int h, double scale)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_div8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h, double scale)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_div8s(const schar* src1, size_t sz1, const schar* src2, size_t sz2, schar* dst, size_t sz, int w, int h, double scale)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_div16u(const ushort* src1, size_t sz1, const ushort* src2, size_t sz2, ushort* dst, size_t sz, int w, int h, double scale)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_div16s(const short* src1, size_t sz1, const short* src2, size_t sz2, short* dst, size_t sz, int w, int h, double scale)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_div32s(const int* src1, size_t sz1, const int* src2, size_t sz2, int* dst, size_t sz, int w, int h, double scale)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_div32f(const float* src1, size_t sz1, const float* src2, size_t sz2, float* dst, size_t sz, int w, int h, double scale)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_div64f(const double* src1, size_t sz1, const double* src2, size_t sz2, double* dst, size_t sz, int w, int h, double scale)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_recip8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h, double scale)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_recip8s(const schar* src1, size_t sz1, const schar* src2, size_t sz2, schar* dst, size_t sz, int w, int h, double scale)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_recip16u(const ushort* src1, size_t sz1, const ushort* src2, size_t sz2, ushort* dst, size_t sz, int w, int h, double scale)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_recip16s(const short* src1, size_t sz1, const short* src2, size_t sz2, short* dst, size_t sz, int w, int h, double scale)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_recip32s(const int* src1, size_t sz1, const int* src2, size_t sz2, int* dst, size_t sz, int w, int h, double scale)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_recip32f(const float* src1, size_t sz1, const float* src2, size_t sz2, float* dst, size_t sz, int w, int h, double scale)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_recip64f(const double* src1, size_t sz1, const double* src2, size_t sz2, double* dst, size_t sz, int w, int h, double scale)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_addWeighted8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h, const double* scales)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_addWeighted8s(const schar* src1, size_t sz1, const schar* src2, size_t sz2, schar* dst, size_t sz, int w, int h, const double* scales)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_addWeighted16u(const ushort* src1, size_t sz1, const ushort* src2, size_t sz2, ushort* dst, size_t sz, int w, int h, const double* scales)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_addWeighted16s(const short* src1, size_t sz1, const short* src2, size_t sz2, short* dst, size_t sz, int w, int h, const double* scales)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_addWeighted32s(const int* src1, size_t sz1, const int* src2, size_t sz2, int* dst, size_t sz, int w, int h, const double* scales)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_addWeighted32f(const float* src1, size_t sz1, const float* src2, size_t sz2, float* dst, size_t sz, int w, int h, const double* scales)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
+
+int broken_addWeighted64f(const double* src1, size_t sz1, const double* src2, size_t sz2, double* dst, size_t sz, int w, int h, const double* scales)
+{
+ return CV_HAL_ERROR_UNKNOWN;
+}
--- /dev/null
+#ifndef _BROKEN_H_INCLUDED_
+#define _BROKEN_H_INCLUDED_
+
+#include "opencv2/core/hal/interface.h"
+
+#if defined(__cplusplus)
+extern "C"
+{
+#endif
+
+int broken_add8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h);
+int broken_add8s(const schar* src1, size_t sz1, const schar* src2, size_t sz2, schar* dst, size_t sz, int w, int h);
+int broken_add16u(const ushort* src1, size_t sz1, const ushort* src2, size_t sz2, ushort* dst, size_t sz, int w, int h);
+int broken_add16s(const short* src1, size_t sz1, const short* src2, size_t sz2, short* dst, size_t sz, int w, int h);
+int broken_add32s(const int* src1, size_t sz1, const int* src2, size_t sz2, int* dst, size_t sz, int w, int h);
+int broken_add32f(const float* src1, size_t sz1, const float* src2, size_t sz2, float* dst, size_t sz, int w, int h);
+int broken_add64f(const double* src1, size_t sz1, const double* src2, size_t sz2, double* dst, size_t sz, int w, int h);
+int broken_sub8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h);
+int broken_sub8s(const schar* src1, size_t sz1, const schar* src2, size_t sz2, schar* dst, size_t sz, int w, int h);
+int broken_sub16u(const ushort* src1, size_t sz1, const ushort* src2, size_t sz2, ushort* dst, size_t sz, int w, int h);
+int broken_sub16s(const short* src1, size_t sz1, const short* src2, size_t sz2, short* dst, size_t sz, int w, int h);
+int broken_sub32s(const int* src1, size_t sz1, const int* src2, size_t sz2, int* dst, size_t sz, int w, int h);
+int broken_sub32f(const float* src1, size_t sz1, const float* src2, size_t sz2, float* dst, size_t sz, int w, int h);
+int broken_sub64f(const double* src1, size_t sz1, const double* src2, size_t sz2, double* dst, size_t sz, int w, int h);
+int broken_max8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h);
+int broken_max8s(const schar* src1, size_t sz1, const schar* src2, size_t sz2, schar* dst, size_t sz, int w, int h);
+int broken_max16u(const ushort* src1, size_t sz1, const ushort* src2, size_t sz2, ushort* dst, size_t sz, int w, int h);
+int broken_max16s(const short* src1, size_t sz1, const short* src2, size_t sz2, short* dst, size_t sz, int w, int h);
+int broken_max32s(const int* src1, size_t sz1, const int* src2, size_t sz2, int* dst, size_t sz, int w, int h);
+int broken_max32f(const float* src1, size_t sz1, const float* src2, size_t sz2, float* dst, size_t sz, int w, int h);
+int broken_max64f(const double* src1, size_t sz1, const double* src2, size_t sz2, double* dst, size_t sz, int w, int h);
+int broken_min8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h);
+int broken_min8s(const schar* src1, size_t sz1, const schar* src2, size_t sz2, schar* dst, size_t sz, int w, int h);
+int broken_min16u(const ushort* src1, size_t sz1, const ushort* src2, size_t sz2, ushort* dst, size_t sz, int w, int h);
+int broken_min16s(const short* src1, size_t sz1, const short* src2, size_t sz2, short* dst, size_t sz, int w, int h);
+int broken_min32s(const int* src1, size_t sz1, const int* src2, size_t sz2, int* dst, size_t sz, int w, int h);
+int broken_min32f(const float* src1, size_t sz1, const float* src2, size_t sz2, float* dst, size_t sz, int w, int h);
+int broken_min64f(const double* src1, size_t sz1, const double* src2, size_t sz2, double* dst, size_t sz, int w, int h);
+int broken_absdiff8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h);
+int broken_absdiff8s(const schar* src1, size_t sz1, const schar* src2, size_t sz2, schar* dst, size_t sz, int w, int h);
+int broken_absdiff16u(const ushort* src1, size_t sz1, const ushort* src2, size_t sz2, ushort* dst, size_t sz, int w, int h);
+int broken_absdiff16s(const short* src1, size_t sz1, const short* src2, size_t sz2, short* dst, size_t sz, int w, int h);
+int broken_absdiff32s(const int* src1, size_t sz1, const int* src2, size_t sz2, int* dst, size_t sz, int w, int h);
+int broken_absdiff32f(const float* src1, size_t sz1, const float* src2, size_t sz2, float* dst, size_t sz, int w, int h);
+int broken_absdiff64f(const double* src1, size_t sz1, const double* src2, size_t sz2, double* dst, size_t sz, int w, int h);
+int broken_and8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h);
+int broken_or8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h);
+int broken_xor8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h);
+int broken_not8u(const uchar* src1, size_t sz1, uchar* dst, size_t sz, int w, int h);
+
+#undef cv_hal_add8u
+#define cv_hal_add8u broken_add8u
+#undef cv_hal_add8s
+#define cv_hal_add8s broken_add8s
+#undef cv_hal_add16u
+#define cv_hal_add16u broken_add16u
+#undef cv_hal_add16s
+#define cv_hal_add16s broken_add16s
+#undef cv_hal_add32s
+#define cv_hal_add32s broken_add32s
+#undef cv_hal_add32f
+#define cv_hal_add32f broken_add32f
+#undef cv_hal_add64f
+#define cv_hal_add64f broken_add64f
+#undef cv_hal_sub8u
+#define cv_hal_sub8u broken_sub8u
+#undef cv_hal_sub8s
+#define cv_hal_sub8s broken_sub8s
+#undef cv_hal_sub16u
+#define cv_hal_sub16u broken_sub16u
+#undef cv_hal_sub16s
+#define cv_hal_sub16s broken_sub16s
+#undef cv_hal_sub32s
+#define cv_hal_sub32s broken_sub32s
+#undef cv_hal_sub32f
+#define cv_hal_sub32f broken_sub32f
+#undef cv_hal_sub64f
+#define cv_hal_sub64f broken_sub64f
+#undef cv_hal_max8u
+#define cv_hal_max8u broken_max8u
+#undef cv_hal_max8s
+#define cv_hal_max8s broken_max8s
+#undef cv_hal_max16u
+#define cv_hal_max16u broken_max16u
+#undef cv_hal_max16s
+#define cv_hal_max16s broken_max16s
+#undef cv_hal_max32s
+#define cv_hal_max32s broken_max32s
+#undef cv_hal_max32f
+#define cv_hal_max32f broken_max32f
+#undef cv_hal_max64f
+#define cv_hal_max64f broken_max64f
+#undef cv_hal_min8u
+#define cv_hal_min8u broken_min8u
+#undef cv_hal_min8s
+#define cv_hal_min8s broken_min8s
+#undef cv_hal_min16u
+#define cv_hal_min16u broken_min16u
+#undef cv_hal_min16s
+#define cv_hal_min16s broken_min16s
+#undef cv_hal_min32s
+#define cv_hal_min32s broken_min32s
+#undef cv_hal_min32f
+#define cv_hal_min32f broken_min32f
+#undef cv_hal_min64f
+#define cv_hal_min64f broken_min64f
+#undef cv_hal_absdiff8u
+#define cv_hal_absdiff8u broken_absdiff8u
+#undef cv_hal_absdiff8s
+#define cv_hal_absdiff8s broken_absdiff8s
+#undef cv_hal_absdiff16u
+#define cv_hal_absdiff16u broken_absdiff16u
+#undef cv_hal_absdiff16s
+#define cv_hal_absdiff16s broken_absdiff16s
+#undef cv_hal_absdiff32s
+#define cv_hal_absdiff32s broken_absdiff32s
+#undef cv_hal_absdiff32f
+#define cv_hal_absdiff32f broken_absdiff32f
+#undef cv_hal_absdiff64f
+#define cv_hal_absdiff64f broken_absdiff64f
+#undef cv_hal_and8u
+#define cv_hal_and8u broken_and8u
+#undef cv_hal_or8u
+#define cv_hal_or8u broken_or8u
+#undef cv_hal_xor8u
+#define cv_hal_xor8u broken_xor8u
+#undef cv_hal_not8u
+#define cv_hal_not8u broken_not8u
+
+int broken_cmp8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h, int op);
+int broken_cmp8s(const schar* src1, size_t sz1, const schar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h, int op);
+int broken_cmp16u(const ushort* src1, size_t sz1, const ushort* src2, size_t sz2, uchar* dst, size_t sz, int w, int h, int op);
+int broken_cmp16s(const short* src1, size_t sz1, const short* src2, size_t sz2, uchar* dst, size_t sz, int w, int h, int op);
+int broken_cmp32s(const int* src1, size_t sz1, const int* src2, size_t sz2, uchar* dst, size_t sz, int w, int h, int op);
+int broken_cmp32f(const float* src1, size_t sz1, const float* src2, size_t sz2, uchar* dst, size_t sz, int w, int h, int op);
+int broken_cmp64f(const double* src1, size_t sz1, const double* src2, size_t sz2, uchar* dst, size_t sz, int w, int h, int op);
+
+#undef cv_hal_cmp8u
+#define cv_hal_cmp8u broken_cmp8u
+#undef cv_hal_cmp8s
+#define cv_hal_cmp8s broken_cmp8s
+#undef cv_hal_cmp16u
+#define cv_hal_cmp16u broken_cmp16u
+#undef cv_hal_cmp16s
+#define cv_hal_cmp16s broken_cmp16s
+#undef cv_hal_cmp32s
+#define cv_hal_cmp32s broken_cmp32s
+#undef cv_hal_cmp32f
+#define cv_hal_cmp32f broken_cmp32f
+#undef cv_hal_cmp64f
+#define cv_hal_cmp64f broken_cmp64f
+
+int broken_mul8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h, double scale);
+int broken_mul8s(const schar* src1, size_t sz1, const schar* src2, size_t sz2, schar* dst, size_t sz, int w, int h, double scale);
+int broken_mul16u(const ushort* src1, size_t sz1, const ushort* src2, size_t sz2, ushort* dst, size_t sz, int w, int h, double scale);
+int broken_mul16s(const short* src1, size_t sz1, const short* src2, size_t sz2, short* dst, size_t sz, int w, int h, double scale);
+int broken_mul32s(const int* src1, size_t sz1, const int* src2, size_t sz2, int* dst, size_t sz, int w, int h, double scale);
+int broken_mul32f(const float* src1, size_t sz1, const float* src2, size_t sz2, float* dst, size_t sz, int w, int h, double scale);
+int broken_mul64f(const double* src1, size_t sz1, const double* src2, size_t sz2, double* dst, size_t sz, int w, int h, double scale);
+int broken_div8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h, double scale);
+int broken_div8s(const schar* src1, size_t sz1, const schar* src2, size_t sz2, schar* dst, size_t sz, int w, int h, double scale);
+int broken_div16u(const ushort* src1, size_t sz1, const ushort* src2, size_t sz2, ushort* dst, size_t sz, int w, int h, double scale);
+int broken_div16s(const short* src1, size_t sz1, const short* src2, size_t sz2, short* dst, size_t sz, int w, int h, double scale);
+int broken_div32s(const int* src1, size_t sz1, const int* src2, size_t sz2, int* dst, size_t sz, int w, int h, double scale);
+int broken_div32f(const float* src1, size_t sz1, const float* src2, size_t sz2, float* dst, size_t sz, int w, int h, double scale);
+int broken_div64f(const double* src1, size_t sz1, const double* src2, size_t sz2, double* dst, size_t sz, int w, int h, double scale);
+int broken_recip8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h, double scale);
+int broken_recip8s(const schar* src1, size_t sz1, const schar* src2, size_t sz2, schar* dst, size_t sz, int w, int h, double scale);
+int broken_recip16u(const ushort* src1, size_t sz1, const ushort* src2, size_t sz2, ushort* dst, size_t sz, int w, int h, double scale);
+int broken_recip16s(const short* src1, size_t sz1, const short* src2, size_t sz2, short* dst, size_t sz, int w, int h, double scale);
+int broken_recip32s(const int* src1, size_t sz1, const int* src2, size_t sz2, int* dst, size_t sz, int w, int h, double scale);
+int broken_recip32f(const float* src1, size_t sz1, const float* src2, size_t sz2, float* dst, size_t sz, int w, int h, double scale);
+int broken_recip64f(const double* src1, size_t sz1, const double* src2, size_t sz2, double* dst, size_t sz, int w, int h, double scale);
+
+#undef cv_hal_mul8u
+#define cv_hal_mul8u broken_mul8u
+#undef cv_hal_mul8s
+#define cv_hal_mul8s broken_mul8s
+#undef cv_hal_mul16u
+#define cv_hal_mul16u broken_mul16u
+#undef cv_hal_mul16s
+#define cv_hal_mul16s broken_mul16s
+#undef cv_hal_mul32s
+#define cv_hal_mul32s broken_mul32s
+#undef cv_hal_mul32f
+#define cv_hal_mul32f broken_mul32f
+#undef cv_hal_mul64f
+#define cv_hal_mul64f broken_mul64f
+#undef cv_hal_div8u
+#define cv_hal_div8u broken_div8u
+#undef cv_hal_div8s
+#define cv_hal_div8s broken_div8s
+#undef cv_hal_div16u
+#define cv_hal_div16u broken_div16u
+#undef cv_hal_div16s
+#define cv_hal_div16s broken_div16s
+#undef cv_hal_div32s
+#define cv_hal_div32s broken_div32s
+#undef cv_hal_div32f
+#define cv_hal_div32f broken_div32f
+#undef cv_hal_div64f
+#define cv_hal_div64f broken_div64f
+#undef cv_hal_recip8u
+#define cv_hal_recip8u broken_recip8u
+#undef cv_hal_recip8s
+#define cv_hal_recip8s broken_recip8s
+#undef cv_hal_recip16u
+#define cv_hal_recip16u broken_recip16u
+#undef cv_hal_recip16s
+#define cv_hal_recip16s broken_recip16s
+#undef cv_hal_recip32s
+#define cv_hal_recip32s broken_recip32s
+#undef cv_hal_recip32f
+#define cv_hal_recip32f broken_recip32f
+#undef cv_hal_recip64f
+#define cv_hal_recip64f broken_recip64f
+
+int broken_addWeighted8u(const uchar* src1, size_t sz1, const uchar* src2, size_t sz2, uchar* dst, size_t sz, int w, int h, const double* scales);
+int broken_addWeighted8s(const schar* src1, size_t sz1, const schar* src2, size_t sz2, schar* dst, size_t sz, int w, int h, const double* scales);
+int broken_addWeighted16u(const ushort* src1, size_t sz1, const ushort* src2, size_t sz2, ushort* dst, size_t sz, int w, int h, const double* scales);
+int broken_addWeighted16s(const short* src1, size_t sz1, const short* src2, size_t sz2, short* dst, size_t sz, int w, int h, const double* scales);
+int broken_addWeighted32s(const int* src1, size_t sz1, const int* src2, size_t sz2, int* dst, size_t sz, int w, int h, const double* scales);
+int broken_addWeighted32f(const float* src1, size_t sz1, const float* src2, size_t sz2, float* dst, size_t sz, int w, int h, const double* scales);
+int broken_addWeighted64f(const double* src1, size_t sz1, const double* src2, size_t sz2, double* dst, size_t sz, int w, int h, const double* scales);
+
+#undef cv_hal_addWeighted8u
+#define cv_hal_addWeighted8u broken_addWeighted8u
+#undef cv_hal_addWeighted8s
+#define cv_hal_addWeighted8s broken_addWeighted8s
+#undef cv_hal_addWeighted16u
+#define cv_hal_addWeighted16u broken_addWeighted16u
+#undef cv_hal_addWeighted16s
+#define cv_hal_addWeighted16s broken_addWeighted16s
+#undef cv_hal_addWeighted32s
+#define cv_hal_addWeighted32s broken_addWeighted32s
+#undef cv_hal_addWeighted32f
+#define cv_hal_addWeighted32f broken_addWeighted32f
+#undef cv_hal_addWeighted64f
+#define cv_hal_addWeighted64f broken_addWeighted64f
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
endif()
add_library(simple_hal simple.cpp)
-set(OPENCV_HAL_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../..")
-target_include_directories(simple_hal PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${OPENCV_HAL_DIR}/include)
+set(OPENCV_SRC_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../..")
+target_include_directories(simple_hal PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${OPENCV_SRC_DIR}/modules/core/include)
for(; height--; src1 = src1 + step1, src2 = src2 + step2, dst = dst + step)
for(int x = 0 ; x < width; x++ )
dst[x] = src1[x] & src2[x];
- return cv::hal::Error::Ok;
+ return CV_HAL_ERROR_OK;
}
int slow_or8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height)
for(; height--; src1 = src1 + step1, src2 = src2 + step2, dst = dst + step)
for(int x = 0 ; x < width; x++ )
dst[x] = src1[x] | src2[x];
- return cv::hal::Error::Ok;
+ return CV_HAL_ERROR_OK;
}
int slow_xor8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height)
for(; height--; src1 = src1 + step1, src2 = src2 + step2, dst = dst + step)
for(int x = 0 ; x < width; x++ )
dst[x] = src1[x] ^ src2[x];
- return cv::hal::Error::Ok;
+ return CV_HAL_ERROR_OK;
}
int slow_not8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height)
for(; height--; src1 = src1 + step1, src2 = src2 + step2, dst = dst + step)
for(int x = 0 ; x < width; x++ )
dst[x] = ~src1[x];
- return cv::hal::Error::Ok;
+ return CV_HAL_ERROR_OK;
}
#ifndef _SIMPLE_HPP_INCLUDED_
#define _SIMPLE_HPP_INCLUDED_
-#include "opencv2/hal/interface.hpp"
+#include "opencv2/core/hal/interface.h"
int slow_and8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height);
int slow_or8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height);