1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
5 #if defined __OPENCV_BUILD \
7 #include "cv_cpu_config.h"
8 #include "cv_cpu_helper.h"
10 #ifdef CV_CPU_DISPATCH_MODE
11 #define CV_CPU_OPTIMIZATION_NAMESPACE __CV_CAT(opt_, CV_CPU_DISPATCH_MODE)
12 #define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace __CV_CAT(opt_, CV_CPU_DISPATCH_MODE) {
13 #define CV_CPU_OPTIMIZATION_NAMESPACE_END }
15 #define CV_CPU_OPTIMIZATION_NAMESPACE cpu_baseline
16 #define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace cpu_baseline {
17 #define CV_CPU_OPTIMIZATION_NAMESPACE_END }
18 #define CV_CPU_BASELINE_MODE 1
22 #define __CV_CPU_DISPATCH_CHAIN_END(fn, args, mode, ...) /* done */
23 #define __CV_CPU_DISPATCH(fn, args, mode, ...) __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
24 #define __CV_CPU_DISPATCH_EXPAND(fn, args, ...) __CV_EXPAND(__CV_CPU_DISPATCH(fn, args, __VA_ARGS__))
25 #define CV_CPU_DISPATCH(fn, args, ...) __CV_CPU_DISPATCH_EXPAND(fn, args, __VA_ARGS__, END) // expand macros
28 #if defined CV_ENABLE_INTRINSICS \
29 && !defined CV_DISABLE_OPTIMIZATION \
30 && !defined __CUDACC__ /* do not include SSE/AVX/NEON headers for NVCC compiler */ \
32 #ifdef CV_CPU_COMPILE_SSE2
33 # include <emmintrin.h>
38 #ifdef CV_CPU_COMPILE_SSE3
39 # include <pmmintrin.h>
42 #ifdef CV_CPU_COMPILE_SSSE3
43 # include <tmmintrin.h>
46 #ifdef CV_CPU_COMPILE_SSE4_1
47 # include <smmintrin.h>
50 #ifdef CV_CPU_COMPILE_SSE4_2
51 # include <nmmintrin.h>
54 #ifdef CV_CPU_COMPILE_POPCNT
56 # include <nmmintrin.h>
58 # define CV_POPCNT_U64 _mm_popcnt_u64
60 # define CV_POPCNT_U32 _mm_popcnt_u32
62 # include <popcntintrin.h>
63 # if defined(__x86_64__)
64 # define CV_POPCNT_U64 __builtin_popcountll
66 # define CV_POPCNT_U32 __builtin_popcount
70 #ifdef CV_CPU_COMPILE_AVX
71 # include <immintrin.h>
74 #ifdef CV_CPU_COMPILE_FP16
75 # if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM)
76 # include <arm_neon.h>
78 # include <immintrin.h>
82 #ifdef CV_CPU_COMPILE_AVX2
83 # include <immintrin.h>
86 #ifdef CV_CPU_COMPILE_AVX_512F
87 # include <immintrin.h>
88 # define CV_AVX_512F 1
90 #ifdef CV_CPU_COMPILE_AVX512_COMMON
91 # define CV_AVX512_COMMON 1
92 # define CV_AVX_512CD 1
94 #ifdef CV_CPU_COMPILE_AVX512_KNL
95 # define CV_AVX512_KNL 1
96 # define CV_AVX_512ER 1
97 # define CV_AVX_512PF 1
99 #ifdef CV_CPU_COMPILE_AVX512_KNM
100 # define CV_AVX512_KNM 1
101 # define CV_AVX_5124FMAPS 1
102 # define CV_AVX_5124VNNIW 1
103 # define CV_AVX_512VPOPCNTDQ 1
105 #ifdef CV_CPU_COMPILE_AVX512_SKX
106 # define CV_AVX512_SKX 1
107 # define CV_AVX_512VL 1
108 # define CV_AVX_512BW 1
109 # define CV_AVX_512DQ 1
111 #ifdef CV_CPU_COMPILE_AVX512_CNL
112 # define CV_AVX512_CNL 1
113 # define CV_AVX_512IFMA 1
114 # define CV_AVX_512VBMI 1
116 #ifdef CV_CPU_COMPILE_AVX512_CLX
117 # define CV_AVX512_CLX 1
118 # define CV_AVX_512VNNI 1
120 #ifdef CV_CPU_COMPILE_AVX512_ICL
121 # define CV_AVX512_ICL 1
122 # undef CV_AVX_512IFMA
123 # define CV_AVX_512IFMA 1
124 # undef CV_AVX_512VBMI
125 # define CV_AVX_512VBMI 1
126 # undef CV_AVX_512VNNI
127 # define CV_AVX_512VNNI 1
128 # define CV_AVX_512VBMI2 1
129 # define CV_AVX_512BITALG 1
130 # define CV_AVX_512VPOPCNTDQ 1
132 #ifdef CV_CPU_COMPILE_FMA3
136 #if defined _WIN32 && defined(_M_ARM)
138 # include <arm_neon.h>
140 #elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__))
141 # include <arm_neon.h>
145 #if defined(__ARM_NEON__) || defined(__aarch64__)
146 # include <arm_neon.h>
149 #ifdef CV_CPU_COMPILE_VSX
150 # include <altivec.h>
157 #ifdef CV_CPU_COMPILE_VSX3
161 #ifdef CV_CPU_COMPILE_MSA
162 # include "hal/msa_macros.h"
166 #ifdef __EMSCRIPTEN__
167 # define CV_WASM_SIMD 1
168 # include <wasm_simd128.h>
171 #endif // CV_ENABLE_INTRINSICS && !CV_DISABLE_OPTIMIZATION && !__CUDACC__
173 #if defined CV_CPU_COMPILE_AVX && !defined CV_CPU_BASELINE_COMPILE_AVX
174 struct VZeroUpperGuard {
176 __attribute__((always_inline))
178 inline VZeroUpperGuard() { _mm256_zeroupper(); }
180 __attribute__((always_inline))
182 inline ~VZeroUpperGuard() { _mm256_zeroupper(); }
184 #define __CV_AVX_GUARD VZeroUpperGuard __vzeroupper_guard; CV_UNUSED(__vzeroupper_guard);
187 #ifdef __CV_AVX_GUARD
188 #define CV_AVX_GUARD __CV_AVX_GUARD
193 #endif // __OPENCV_BUILD
197 #if !defined __OPENCV_BUILD /* Compatibility code */ \
198 && !defined __CUDACC__ /* do not include SSE/AVX/NEON headers for NVCC compiler */
199 #if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2)
200 # include <emmintrin.h>
204 #elif defined _WIN32 && defined(_M_ARM)
206 # include <arm_neon.h>
208 #elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__))
209 # include <arm_neon.h>
211 #elif defined(__VSX__) && defined(__PPC64__) && defined(__LITTLE_ENDIAN__)
212 # include <altivec.h>
219 #endif // !__OPENCV_BUILD && !__CUDACC (Compatibility code)
260 # define CV_AVX_512F 0
263 # define CV_AVX_512BW 0
266 # define CV_AVX_512CD 0
269 # define CV_AVX_512DQ 0
272 # define CV_AVX_512ER 0
274 #ifndef CV_AVX_512IFMA
275 # define CV_AVX_512IFMA 0
277 #define CV_AVX_512IFMA512 CV_AVX_512IFMA // deprecated
279 # define CV_AVX_512PF 0
281 #ifndef CV_AVX_512VBMI
282 # define CV_AVX_512VBMI 0
285 # define CV_AVX_512VL 0
287 #ifndef CV_AVX_5124FMAPS
288 # define CV_AVX_5124FMAPS 0
290 #ifndef CV_AVX_5124VNNIW
291 # define CV_AVX_5124VNNIW 0
293 #ifndef CV_AVX_512VPOPCNTDQ
294 # define CV_AVX_512VPOPCNTDQ 0
296 #ifndef CV_AVX_512VNNI
297 # define CV_AVX_512VNNI 0
299 #ifndef CV_AVX_512VBMI2
300 # define CV_AVX_512VBMI2 0
302 #ifndef CV_AVX_512BITALG
303 # define CV_AVX_512BITALG 0
305 #ifndef CV_AVX512_COMMON
306 # define CV_AVX512_COMMON 0
308 #ifndef CV_AVX512_KNL
309 # define CV_AVX512_KNL 0
311 #ifndef CV_AVX512_KNM
312 # define CV_AVX512_KNM 0
314 #ifndef CV_AVX512_SKX
315 # define CV_AVX512_SKX 0
317 #ifndef CV_AVX512_CNL
318 # define CV_AVX512_CNL 0
320 #ifndef CV_AVX512_CLX
321 # define CV_AVX512_CLX 0
323 #ifndef CV_AVX512_ICL
324 # define CV_AVX512_ICL 0
344 # define CV_WASM_SIMD 0