1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
16 // Copyright (C) 2015, Itseez Inc., all rights reserved.
17 // Third party copyrights are property of their respective owners.
19 // Redistribution and use in source and binary forms, with or without modification,
20 // are permitted provided that the following conditions are met:
22 // * Redistribution's of source code must retain the above copyright notice,
23 // this list of conditions and the following disclaimer.
25 // * Redistribution's in binary form must reproduce the above copyright notice,
26 // this list of conditions and the following disclaimer in the documentation
27 // and/or other materials provided with the distribution.
29 // * The name of the copyright holders may not be used to endorse or promote products
30 // derived from this software without specific prior written permission.
32 // This software is provided by the copyright holders and contributors "as is" and
33 // any express or implied warranties, including, but not limited to, the implied
34 // warranties of merchantability and fitness for a particular purpose are disclaimed.
35 // In no event shall the Intel Corporation or contributors be liable for any direct,
36 // indirect, incidental, special, exemplary, or consequential damages
37 // (including, but not limited to, procurement of substitute goods or services;
38 // loss of use, data, or profits; or business interruption) however caused
39 // and on any theory of liability, whether in contract, strict liability,
40 // or tort (including negligence or otherwise) arising in any way out of
41 // the use of this software, even if advised of the possibility of such damage.
45 #ifndef OPENCV_HAL_INTRIN_HPP
46 #define OPENCV_HAL_INTRIN_HPP
51 #include "opencv2/core/cvdef.h"
53 #define OPENCV_HAL_ADD(a, b) ((a) + (b))
54 #define OPENCV_HAL_AND(a, b) ((a) & (b))
55 #define OPENCV_HAL_NOP(a) (a)
56 #define OPENCV_HAL_1ST(a, b) (a)
58 // unlike HAL API, which is in cv::hal,
59 // we put intrinsics into cv namespace to make its
60 // access from within opencv code more accessible
65 #ifdef CV_CPU_DISPATCH_MODE
66 #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE __CV_CAT(hal_, CV_CPU_DISPATCH_MODE)
67 #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace __CV_CAT(hal_, CV_CPU_DISPATCH_MODE) {
68 #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
70 #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE hal_baseline
71 #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace hal_baseline {
72 #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
76 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
77 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
78 using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE;
79 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
82 //! @addtogroup core_hal_intrin
86 template<typename _Tp> struct V_TypeTraits
89 typedef _Tp uint_type;
93 enum { delta = 0, shift = 0 };
95 static int_type reinterpret_int(_Tp x) { return x; }
96 static uint_type reinterpet_uint(_Tp x) { return x; }
97 static _Tp reinterpret_from_int(int_type x) { return (_Tp)x; }
100 template<> struct V_TypeTraits<uchar>
102 typedef uchar value_type;
103 typedef schar int_type;
104 typedef uchar uint_type;
105 typedef uchar abs_type;
106 typedef int sum_type;
108 typedef ushort w_type;
109 typedef unsigned q_type;
111 enum { delta = 128, shift = 8 };
113 static int_type reinterpret_int(value_type x) { return (int_type)x; }
114 static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
115 static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
118 template<> struct V_TypeTraits<schar>
120 typedef schar value_type;
121 typedef schar int_type;
122 typedef uchar uint_type;
123 typedef uchar abs_type;
124 typedef int sum_type;
126 typedef short w_type;
129 enum { delta = 128, shift = 8 };
131 static int_type reinterpret_int(value_type x) { return (int_type)x; }
132 static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
133 static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
136 template<> struct V_TypeTraits<ushort>
138 typedef ushort value_type;
139 typedef short int_type;
140 typedef ushort uint_type;
141 typedef ushort abs_type;
142 typedef int sum_type;
144 typedef unsigned w_type;
145 typedef uchar nu_type;
147 enum { delta = 32768, shift = 16 };
149 static int_type reinterpret_int(value_type x) { return (int_type)x; }
150 static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
151 static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
154 template<> struct V_TypeTraits<short>
156 typedef short value_type;
157 typedef short int_type;
158 typedef ushort uint_type;
159 typedef ushort abs_type;
160 typedef int sum_type;
163 typedef uchar nu_type;
164 typedef schar n_type;
166 enum { delta = 128, shift = 8 };
168 static int_type reinterpret_int(value_type x) { return (int_type)x; }
169 static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
170 static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
173 template<> struct V_TypeTraits<unsigned>
175 typedef unsigned value_type;
176 typedef int int_type;
177 typedef unsigned uint_type;
178 typedef unsigned abs_type;
179 typedef unsigned sum_type;
181 typedef uint64 w_type;
182 typedef ushort nu_type;
184 static int_type reinterpret_int(value_type x) { return (int_type)x; }
185 static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
186 static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
189 template<> struct V_TypeTraits<int>
191 typedef int value_type;
192 typedef int int_type;
193 typedef unsigned uint_type;
194 typedef unsigned abs_type;
195 typedef int sum_type;
197 typedef int64 w_type;
198 typedef short n_type;
199 typedef ushort nu_type;
201 static int_type reinterpret_int(value_type x) { return (int_type)x; }
202 static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
203 static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
206 template<> struct V_TypeTraits<uint64>
208 typedef uint64 value_type;
209 typedef int64 int_type;
210 typedef uint64 uint_type;
211 typedef uint64 abs_type;
212 typedef uint64 sum_type;
214 typedef unsigned nu_type;
216 static int_type reinterpret_int(value_type x) { return (int_type)x; }
217 static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
218 static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
221 template<> struct V_TypeTraits<int64>
223 typedef int64 value_type;
224 typedef int64 int_type;
225 typedef uint64 uint_type;
226 typedef uint64 abs_type;
227 typedef int64 sum_type;
231 static int_type reinterpret_int(value_type x) { return (int_type)x; }
232 static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
233 static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
237 template<> struct V_TypeTraits<float>
239 typedef float value_type;
240 typedef int int_type;
241 typedef unsigned uint_type;
242 typedef float abs_type;
243 typedef float sum_type;
245 typedef double w_type;
247 static int_type reinterpret_int(value_type x)
253 static uint_type reinterpet_uint(value_type x)
259 static value_type reinterpret_from_int(int_type x)
267 template<> struct V_TypeTraits<double>
269 typedef double value_type;
270 typedef int64 int_type;
271 typedef uint64 uint_type;
272 typedef double abs_type;
273 typedef double sum_type;
274 static int_type reinterpret_int(value_type x)
280 static uint_type reinterpet_uint(value_type x)
286 static value_type reinterpret_from_int(int_type x)
294 template <typename T> struct V_SIMD128Traits
296 enum { nlanes = 16 / sizeof(T) };
304 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
316 #include "opencv2/core/hal/intrin_sse.hpp"
320 #include "opencv2/core/hal/intrin_neon.hpp"
324 #include "opencv2/core/hal/intrin_vsx.hpp"
328 #include "opencv2/core/hal/intrin_cpp.hpp"
332 //! @addtogroup core_hal_intrin
336 //! Set to 1 if current compiler supports vector extensions (NEON or SSE is enabled)
340 #ifndef CV_SIMD128_64F
341 //! Set to 1 if current intrinsics implementation supports 64-bit float vectors
342 #define CV_SIMD128_64F 0
347 //==================================================================================================
354 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
357 template <typename R> struct V_RegTrait128;
359 template <> struct V_RegTrait128<uchar> {
360 typedef v_uint8x16 reg;
361 typedef v_uint16x8 w_reg;
362 typedef v_uint32x4 q_reg;
363 typedef v_uint8x16 u_reg;
364 static v_uint8x16 zero() { return v_setzero_u8(); }
365 static v_uint8x16 all(uchar val) { return v_setall_u8(val); }
368 template <> struct V_RegTrait128<schar> {
369 typedef v_int8x16 reg;
370 typedef v_int16x8 w_reg;
371 typedef v_int32x4 q_reg;
372 typedef v_uint8x16 u_reg;
373 static v_int8x16 zero() { return v_setzero_s8(); }
374 static v_int8x16 all(schar val) { return v_setall_s8(val); }
377 template <> struct V_RegTrait128<ushort> {
378 typedef v_uint16x8 reg;
379 typedef v_uint32x4 w_reg;
380 typedef v_int16x8 int_reg;
381 typedef v_uint16x8 u_reg;
382 static v_uint16x8 zero() { return v_setzero_u16(); }
383 static v_uint16x8 all(ushort val) { return v_setall_u16(val); }
386 template <> struct V_RegTrait128<short> {
387 typedef v_int16x8 reg;
388 typedef v_int32x4 w_reg;
389 typedef v_uint16x8 u_reg;
390 static v_int16x8 zero() { return v_setzero_s16(); }
391 static v_int16x8 all(short val) { return v_setall_s16(val); }
394 template <> struct V_RegTrait128<unsigned> {
395 typedef v_uint32x4 reg;
396 typedef v_uint64x2 w_reg;
397 typedef v_int32x4 int_reg;
398 typedef v_uint32x4 u_reg;
399 static v_uint32x4 zero() { return v_setzero_u32(); }
400 static v_uint32x4 all(unsigned val) { return v_setall_u32(val); }
403 template <> struct V_RegTrait128<int> {
404 typedef v_int32x4 reg;
405 typedef v_int64x2 w_reg;
406 typedef v_uint32x4 u_reg;
407 static v_int32x4 zero() { return v_setzero_s32(); }
408 static v_int32x4 all(int val) { return v_setall_s32(val); }
411 template <> struct V_RegTrait128<uint64> {
412 typedef v_uint64x2 reg;
413 static v_uint64x2 zero() { return v_setzero_u64(); }
414 static v_uint64x2 all(uint64 val) { return v_setall_u64(val); }
417 template <> struct V_RegTrait128<int64> {
418 typedef v_int64x2 reg;
419 static v_int64x2 zero() { return v_setzero_s64(); }
420 static v_int64x2 all(int64 val) { return v_setall_s64(val); }
423 template <> struct V_RegTrait128<float> {
424 typedef v_float32x4 reg;
425 typedef v_int32x4 int_reg;
426 typedef v_float32x4 u_reg;
427 static v_float32x4 zero() { return v_setzero_f32(); }
428 static v_float32x4 all(float val) { return v_setall_f32(val); }
432 template <> struct V_RegTrait128<double> {
433 typedef v_float64x2 reg;
434 typedef v_int32x4 int_reg;
435 typedef v_float64x2 u_reg;
436 static v_float64x2 zero() { return v_setzero_f64(); }
437 static v_float64x2 all(double val) { return v_setall_f64(val); }
441 inline unsigned int trailingZeros32(unsigned int value) {
442 #if defined(_MSC_VER)
443 #if (_MSC_VER < 1700)
444 unsigned long index = 0;
445 _BitScanForward(&index, value);
446 return (unsigned int)index;
448 return _tzcnt_u32(value);
450 #elif defined(__GNUC__) || defined(__GNUG__)
451 return __builtin_ctz(value);
452 #elif defined(__ICC) || defined(__INTEL_COMPILER)
453 return _bit_scan_forward(value);
454 #elif defined(__clang__)
455 return llvm.cttz.i32(value, true);
457 static const int MultiplyDeBruijnBitPosition[32] = {
458 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
459 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 };
460 return MultiplyDeBruijnBitPosition[((uint32_t)((value & -value) * 0x077CB531U)) >> 27];
465 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END