From e07a4880123ac98759f9d96ce1dedc567722602a Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Tue, 19 Nov 2019 21:08:45 +0300 Subject: [PATCH] Merge pull request #15925 from alalek:core_test_simd_cpp_emulation core(test): extending tests with SIMD C++ emulation code (intrin_cpp.hpp) * core(test): test SIMD CPP emulation code (intrin_cpp.hpp) * core(simd): eliminate build warnings from intrin_cpp.hpp --- modules/core/include/opencv2/core/hal/intrin.hpp | 14 +++++++------ .../core/include/opencv2/core/hal/intrin_cpp.hpp | 9 +++++++- modules/core/test/test_intrin.cpp | 17 +++++++++++++++ modules/core/test/test_intrin_emulator.cpp | 24 ++++++++++++++++++++++ 4 files changed, 57 insertions(+), 7 deletions(-) create mode 100644 modules/core/test/test_intrin_emulator.cpp diff --git a/modules/core/include/opencv2/core/hal/intrin.hpp b/modules/core/include/opencv2/core/hal/intrin.hpp index 3bdbf05..427a842 100644 --- a/modules/core/include/opencv2/core/hal/intrin.hpp +++ b/modules/core/include/opencv2/core/hal/intrin.hpp @@ -143,6 +143,7 @@ CV_INTRIN_DEF_TYPE_TRAITS(double, int64, uint64, double, void, void, double, 2); #ifndef CV_DOXYGEN +#ifndef CV_CPU_OPTIMIZATION_HAL_NAMESPACE #ifdef CV_CPU_DISPATCH_MODE #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE __CV_CAT(hal_, CV_CPU_DISPATCH_MODE) #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace __CV_CAT(hal_, CV_CPU_DISPATCH_MODE) { @@ -152,6 +153,7 @@ CV_INTRIN_DEF_TYPE_TRAITS(double, int64, uint64, double, void, void, double, 2); #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace hal_baseline { #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END } #endif +#endif // CV_CPU_OPTIMIZATION_HAL_NAMESPACE CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END @@ -168,29 +170,29 @@ using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE; # undef CV_MSA #endif -#if CV_SSE2 || CV_NEON || CV_VSX || CV_MSA || CV_WASM_SIMD +#if (CV_SSE2 || CV_NEON || CV_VSX || CV_MSA || CV_WASM_SIMD) && !defined(CV_FORCE_SIMD128_CPP) #define CV__SIMD_FORWARD 128 #include "opencv2/core/hal/intrin_forward.hpp" #endif -#if CV_SSE2 +#if CV_SSE2 && !defined(CV_FORCE_SIMD128_CPP) #include "opencv2/core/hal/intrin_sse_em.hpp" #include "opencv2/core/hal/intrin_sse.hpp" -#elif CV_NEON +#elif CV_NEON && !defined(CV_FORCE_SIMD128_CPP) #include "opencv2/core/hal/intrin_neon.hpp" -#elif CV_VSX +#elif CV_VSX && !defined(CV_FORCE_SIMD128_CPP) #include "opencv2/core/hal/intrin_vsx.hpp" -#elif CV_MSA +#elif CV_MSA && !defined(CV_FORCE_SIMD128_CPP) #include "opencv2/core/hal/intrin_msa.hpp" -#elif CV_WASM_SIMD +#elif CV_WASM_SIMD && !defined(CV_FORCE_SIMD128_CPP) #include "opencv2/core/hal/intrin_wasm.hpp" #else diff --git a/modules/core/include/opencv2/core/hal/intrin_cpp.hpp b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp index 61ebd4b..ed5396e 100644 --- a/modules/core/include/opencv2/core/hal/intrin_cpp.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp @@ -365,6 +365,13 @@ template struct v_reg return c; } + v_reg& operator=(const v_reg<_Tp, n> & r) + { + for( int i = 0; i < n; i++ ) + s[i] = r.s[i]; + return *this; + } + _Tp s[n]; //! @endcond }; @@ -623,7 +630,7 @@ template inline v_reg::abs_type, n> v_popcount(const v_reg<_Tp, n>& a) { v_reg::abs_type, n> b = v_reg::abs_type, n>::zero(); - for( int i = 0; i < n*sizeof(_Tp); i++ ) + for (int i = 0; i < n*(int)sizeof(_Tp); i++) b.s[i/sizeof(_Tp)] += popCountTable[v_reinterpret_as_u8(a).s[i]]; return b; } diff --git a/modules/core/test/test_intrin.cpp b/modules/core/test/test_intrin.cpp index 9bc4981..321fa64 100644 --- a/modules/core/test/test_intrin.cpp +++ b/modules/core/test/test_intrin.cpp @@ -4,6 +4,19 @@ #include "test_precomp.hpp" #include "test_intrin128.simd.hpp" + +// see "test_intrin_emulator.cpp" +// see "opencv2/core/private/cv_cpu_include_simd_declarations.hpp" +#define CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY +#undef CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN +#undef CV_CPU_OPTIMIZATION_NAMESPACE_END +#define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace opt_EMULATOR_CPP { +#define CV_CPU_OPTIMIZATION_NAMESPACE_END } +#include "test_intrin128.simd.hpp" +#undef CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN +#undef CV_CPU_OPTIMIZATION_NAMESPACE_END +#undef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY + #include "test_intrin128.simd_declarations.hpp" #undef CV_CPU_DISPATCH_MODES_ALL @@ -22,6 +35,8 @@ namespace opencv_test { namespace hal { +#define CV_CPU_CALL_CPP_EMULATOR_(fn, args) return (opt_EMULATOR_CPP::fn args) + #define CV_CPU_CALL_BASELINE_(fn, args) CV_CPU_CALL_BASELINE(fn, args) #define DISPATCH_SIMD128(fn, cpu_opt) do { \ @@ -53,6 +68,8 @@ TEST(hal_intrin ## simd_size, float64x2_ ## cpu_opt) { DISPATCH_SIMD ## simd_siz namespace intrin128 { +DEFINE_SIMD_TESTS(128, CPP_EMULATOR) + DEFINE_SIMD_TESTS(128, BASELINE) #if defined CV_CPU_DISPATCH_COMPILE_SSE2 || defined CV_CPU_BASELINE_COMPILE_SSE2 diff --git a/modules/core/test/test_intrin_emulator.cpp b/modules/core/test/test_intrin_emulator.cpp new file mode 100644 index 0000000..0ae3c02 --- /dev/null +++ b/modules/core/test/test_intrin_emulator.cpp @@ -0,0 +1,24 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +#include "test_precomp.hpp" + +// see "opencv2/core/hal/intrin.hpp" +#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE hal_EMULATOR_CPP +#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace hal_EMULATOR_CPP { +#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END } + +// see "opencv2/core/private/cv_cpu_include_simd_declarations.hpp" +//#define CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY +#define CV_FORCE_SIMD128_CPP +#undef CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN +#undef CV_CPU_OPTIMIZATION_NAMESPACE_END +#define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace opt_EMULATOR_CPP { +#define CV_CPU_OPTIMIZATION_NAMESPACE_END } +#include "test_intrin128.simd.hpp" +#undef CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN +#undef CV_CPU_OPTIMIZATION_NAMESPACE_END +#undef CV_CPU_DISPATCH_MODE +#undef CV_FORCE_SIMD128_CPP + +// tests implementation is in test_intrin_utils.hpp -- 2.7.4