modules/core/include/opencv2/core/hal/intrin.hpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                          License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
  14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
  15 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
  16 // Copyright (C) 2015, Itseez Inc., all rights reserved.
  17 // Third party copyrights are property of their respective owners.
  18 //
  19 // Redistribution and use in source and binary forms, with or without modification,
  20 // are permitted provided that the following conditions are met:
  21 //
  22 //   * Redistribution's of source code must retain the above copyright notice,
  23 //     this list of conditions and the following disclaimer.
  24 //
  25 //   * Redistribution's in binary form must reproduce the above copyright notice,
  26 //     this list of conditions and the following disclaimer in the documentation
  27 //     and/or other materials provided with the distribution.
  28 //
  29 //   * The name of the copyright holders may not be used to endorse or promote products
  30 //     derived from this software without specific prior written permission.
  31 //
  32 // This software is provided by the copyright holders and contributors "as is" and
  33 // any express or implied warranties, including, but not limited to, the implied
  34 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  35 // In no event shall the Intel Corporation or contributors be liable for any direct,
  36 // indirect, incidental, special, exemplary, or consequential damages
  37 // (including, but not limited to, procurement of substitute goods or services;
  38 // loss of use, data, or profits; or business interruption) however caused
  39 // and on any theory of liability, whether in contract, strict liability,
  40 // or tort (including negligence or otherwise) arising in any way out of
  41 // the use of this software, even if advised of the possibility of such damage.
  42 //
  43 //M*/
  44
  45 #ifndef OPENCV_HAL_INTRIN_HPP
  46 #define OPENCV_HAL_INTRIN_HPP
  47
  48 #include <cmath>
  49 #include <float.h>
  50 #include <stdlib.h>
  51 #include "opencv2/core/cvdef.h"
  52
  53 #define OPENCV_HAL_ADD(a, b) ((a) + (b))
  54 #define OPENCV_HAL_AND(a, b) ((a) & (b))
  55 #define OPENCV_HAL_NOP(a) (a)
  56 #define OPENCV_HAL_1ST(a, b) (a)
  57
  58 // unlike HAL API, which is in cv::hal,
  59 // we put intrinsics into cv namespace to make its
  60 // access from within opencv code more accessible
  61 namespace cv {
  62
  63 #ifndef CV_DOXYGEN
  64
  65 #ifdef CV_CPU_DISPATCH_MODE
  66 #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE __CV_CAT(hal_, CV_CPU_DISPATCH_MODE)
  67 #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace __CV_CAT(hal_, CV_CPU_DISPATCH_MODE) {
  68 #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
  69 #else
  70 #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE hal_baseline
  71 #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace hal_baseline {
  72 #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
  73 #endif
  74
  75
  76 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
  77 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
  78 using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE;
  79 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
  80 #endif
  81
  82 //! @addtogroup core_hal_intrin
  83 //! @{
  84
  85 //! @cond IGNORED
  86 template<typename _Tp> struct V_TypeTraits
  87 {
  88     typedef _Tp int_type;
  89     typedef _Tp uint_type;
  90     typedef _Tp abs_type;
  91     typedef _Tp sum_type;
  92
  93     enum { delta = 0, shift = 0 };
  94
  95     static int_type reinterpret_int(_Tp x) { return x; }
  96     static uint_type reinterpet_uint(_Tp x) { return x; }
  97     static _Tp reinterpret_from_int(int_type x) { return (_Tp)x; }
  98 };
  99
 100 template<> struct V_TypeTraits<uchar>
 101 {
 102     typedef uchar value_type;
 103     typedef schar int_type;
 104     typedef uchar uint_type;
 105     typedef uchar abs_type;
 106     typedef int sum_type;
 107
 108     typedef ushort w_type;
 109     typedef unsigned q_type;
 110
 111     enum { delta = 128, shift = 8 };
 112
 113     static int_type reinterpret_int(value_type x) { return (int_type)x; }
 114     static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
 115     static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
 116 };
 117
 118 template<> struct V_TypeTraits<schar>
 119 {
 120     typedef schar value_type;
 121     typedef schar int_type;
 122     typedef uchar uint_type;
 123     typedef uchar abs_type;
 124     typedef int sum_type;
 125
 126     typedef short w_type;
 127     typedef int q_type;
 128
 129     enum { delta = 128, shift = 8 };
 130
 131     static int_type reinterpret_int(value_type x) { return (int_type)x; }
 132     static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
 133     static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
 134 };
 135
 136 template<> struct V_TypeTraits<ushort>
 137 {
 138     typedef ushort value_type;
 139     typedef short int_type;
 140     typedef ushort uint_type;
 141     typedef ushort abs_type;
 142     typedef int sum_type;
 143
 144     typedef unsigned w_type;
 145     typedef uchar nu_type;
 146
 147     enum { delta = 32768, shift = 16 };
 148
 149     static int_type reinterpret_int(value_type x) { return (int_type)x; }
 150     static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
 151     static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
 152 };
 153
 154 template<> struct V_TypeTraits<short>
 155 {
 156     typedef short value_type;
 157     typedef short int_type;
 158     typedef ushort uint_type;
 159     typedef ushort abs_type;
 160     typedef int sum_type;
 161
 162     typedef int w_type;
 163     typedef uchar nu_type;
 164     typedef schar n_type;
 165
 166     enum { delta = 128, shift = 8 };
 167
 168     static int_type reinterpret_int(value_type x) { return (int_type)x; }
 169     static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
 170     static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
 171 };
 172
 173 template<> struct V_TypeTraits<unsigned>
 174 {
 175     typedef unsigned value_type;
 176     typedef int int_type;
 177     typedef unsigned uint_type;
 178     typedef unsigned abs_type;
 179     typedef unsigned sum_type;
 180
 181     typedef uint64 w_type;
 182     typedef ushort nu_type;
 183
 184     static int_type reinterpret_int(value_type x) { return (int_type)x; }
 185     static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
 186     static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
 187 };
 188
 189 template<> struct V_TypeTraits<int>
 190 {
 191     typedef int value_type;
 192     typedef int int_type;
 193     typedef unsigned uint_type;
 194     typedef unsigned abs_type;
 195     typedef int sum_type;
 196
 197     typedef int64 w_type;
 198     typedef short n_type;
 199     typedef ushort nu_type;
 200
 201     static int_type reinterpret_int(value_type x) { return (int_type)x; }
 202     static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
 203     static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
 204 };
 205
 206 template<> struct V_TypeTraits<uint64>
 207 {
 208     typedef uint64 value_type;
 209     typedef int64 int_type;
 210     typedef uint64 uint_type;
 211     typedef uint64 abs_type;
 212     typedef uint64 sum_type;
 213
 214     typedef unsigned nu_type;
 215
 216     static int_type reinterpret_int(value_type x) { return (int_type)x; }
 217     static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
 218     static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
 219 };
 220
 221 template<> struct V_TypeTraits<int64>
 222 {
 223     typedef int64 value_type;
 224     typedef int64 int_type;
 225     typedef uint64 uint_type;
 226     typedef uint64 abs_type;
 227     typedef int64 sum_type;
 228
 229     typedef int nu_type;
 230
 231     static int_type reinterpret_int(value_type x) { return (int_type)x; }
 232     static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
 233     static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
 234 };
 235
 236
 237 template<> struct V_TypeTraits<float>
 238 {
 239     typedef float value_type;
 240     typedef int int_type;
 241     typedef unsigned uint_type;
 242     typedef float abs_type;
 243     typedef float sum_type;
 244
 245     typedef double w_type;
 246
 247     static int_type reinterpret_int(value_type x)
 248     {
 249         Cv32suf u;
 250         u.f = x;
 251         return u.i;
 252     }
 253     static uint_type reinterpet_uint(value_type x)
 254     {
 255         Cv32suf u;
 256         u.f = x;
 257         return u.u;
 258     }
 259     static value_type reinterpret_from_int(int_type x)
 260     {
 261         Cv32suf u;
 262         u.i = x;
 263         return u.f;
 264     }
 265 };
 266
 267 template<> struct V_TypeTraits<double>
 268 {
 269     typedef double value_type;
 270     typedef int64 int_type;
 271     typedef uint64 uint_type;
 272     typedef double abs_type;
 273     typedef double sum_type;
 274     static int_type reinterpret_int(value_type x)
 275     {
 276         Cv64suf u;
 277         u.f = x;
 278         return u.i;
 279     }
 280     static uint_type reinterpet_uint(value_type x)
 281     {
 282         Cv64suf u;
 283         u.f = x;
 284         return u.u;
 285     }
 286     static value_type reinterpret_from_int(int_type x)
 287     {
 288         Cv64suf u;
 289         u.i = x;
 290         return u.f;
 291     }
 292 };
 293
 294 template <typename T> struct V_SIMD128Traits
 295 {
 296     enum { nlanes = 16 / sizeof(T) };
 297 };
 298
 299 //! @endcond
 300
 301 //! @}
 302
 303 #ifndef CV_DOXYGEN
 304 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
 305 #endif
 306 }
 307
 308 #ifdef CV_DOXYGEN
 309 #   undef CV_SSE2
 310 #   undef CV_NEON
 311 #endif
 312
 313 #if CV_SSE2
 314
 315 #include "opencv2/core/hal/intrin_sse.hpp"
 316
 317 #elif CV_NEON
 318
 319 #include "opencv2/core/hal/intrin_neon.hpp"
 320
 321 #else
 322
 323 #include "opencv2/core/hal/intrin_cpp.hpp"
 324
 325 #endif
 326
 327 //! @addtogroup core_hal_intrin
 328 //! @{
 329
 330 #ifndef CV_SIMD128
 331 //! Set to 1 if current compiler supports vector extensions (NEON or SSE is enabled)
 332 #define CV_SIMD128 0
 333 #endif
 334
 335 #ifndef CV_SIMD128_64F
 336 //! Set to 1 if current intrinsics implementation supports 64-bit float vectors
 337 #define CV_SIMD128_64F 0
 338 #endif
 339
 340 //! @}
 341
 342 //==================================================================================================
 343
 344 //! @cond IGNORED
 345
 346 namespace cv {
 347
 348 #ifndef CV_DOXYGEN
 349 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
 350 #endif
 351
 352 template <typename R> struct V_RegTrait128;
 353
 354 template <> struct V_RegTrait128<uchar> {
 355     typedef v_uint8x16 reg;
 356     typedef v_uint16x8 w_reg;
 357     typedef v_uint32x4 q_reg;
 358     typedef v_uint8x16 u_reg;
 359     static v_uint8x16 zero() { return v_setzero_u8(); }
 360     static v_uint8x16 all(uchar val) { return v_setall_u8(val); }
 361 };
 362
 363 template <> struct V_RegTrait128<schar> {
 364     typedef v_int8x16 reg;
 365     typedef v_int16x8 w_reg;
 366     typedef v_int32x4 q_reg;
 367     typedef v_uint8x16 u_reg;
 368     static v_int8x16 zero() { return v_setzero_s8(); }
 369     static v_int8x16 all(schar val) { return v_setall_s8(val); }
 370 };
 371
 372 template <> struct V_RegTrait128<ushort> {
 373     typedef v_uint16x8 reg;
 374     typedef v_uint32x4 w_reg;
 375     typedef v_int16x8 int_reg;
 376     typedef v_uint16x8 u_reg;
 377     static v_uint16x8 zero() { return v_setzero_u16(); }
 378     static v_uint16x8 all(ushort val) { return v_setall_u16(val); }
 379 };
 380
 381 template <> struct V_RegTrait128<short> {
 382     typedef v_int16x8 reg;
 383     typedef v_int32x4 w_reg;
 384     typedef v_uint16x8 u_reg;
 385     static v_int16x8 zero() { return v_setzero_s16(); }
 386     static v_int16x8 all(short val) { return v_setall_s16(val); }
 387 };
 388
 389 template <> struct V_RegTrait128<unsigned> {
 390     typedef v_uint32x4 reg;
 391     typedef v_uint64x2 w_reg;
 392     typedef v_int32x4 int_reg;
 393     typedef v_uint32x4 u_reg;
 394     static v_uint32x4 zero() { return v_setzero_u32(); }
 395     static v_uint32x4 all(unsigned val) { return v_setall_u32(val); }
 396 };
 397
 398 template <> struct V_RegTrait128<int> {
 399     typedef v_int32x4 reg;
 400     typedef v_int64x2 w_reg;
 401     typedef v_uint32x4 u_reg;
 402     static v_int32x4 zero() { return v_setzero_s32(); }
 403     static v_int32x4 all(int val) { return v_setall_s32(val); }
 404 };
 405
 406 template <> struct V_RegTrait128<uint64> {
 407     typedef v_uint64x2 reg;
 408     static v_uint64x2 zero() { return v_setzero_u64(); }
 409     static v_uint64x2 all(uint64 val) { return v_setall_u64(val); }
 410 };
 411
 412 template <> struct V_RegTrait128<int64> {
 413     typedef v_int64x2 reg;
 414     static v_int64x2 zero() { return v_setzero_s64(); }
 415     static v_int64x2 all(int64 val) { return v_setall_s64(val); }
 416 };
 417
 418 template <> struct V_RegTrait128<float> {
 419     typedef v_float32x4 reg;
 420     typedef v_int32x4 int_reg;
 421     typedef v_float32x4 u_reg;
 422     static v_float32x4 zero() { return v_setzero_f32(); }
 423     static v_float32x4 all(float val) { return v_setall_f32(val); }
 424 };
 425
 426 #if CV_SIMD128_64F
 427 template <> struct V_RegTrait128<double> {
 428     typedef v_float64x2 reg;
 429     typedef v_int32x4 int_reg;
 430     typedef v_float64x2 u_reg;
 431     static v_float64x2 zero() { return v_setzero_f64(); }
 432     static v_float64x2 all(double val) { return v_setall_f64(val); }
 433 };
 434 #endif
 435
 436 inline unsigned int trailingZeros32(unsigned int value) {
 437 #if defined(_MSC_VER)
 438 #if (_MSC_VER < 1700)
 439     unsigned long index = 0;
 440     _BitScanForward(&index, value);
 441     return (unsigned int)index;
 442 #else
 443     return _tzcnt_u32(value);
 444 #endif
 445 #elif defined(__GNUC__) || defined(__GNUG__)
 446     return __builtin_ctz(value);
 447 #elif defined(__ICC) || defined(__INTEL_COMPILER)
 448     return _bit_scan_forward(value);
 449 #elif defined(__clang__)
 450     return llvm.cttz.i32(value, true);
 451 #else
 452     static const int MultiplyDeBruijnBitPosition[32] = {
 453         0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
 454         31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 };
 455     return MultiplyDeBruijnBitPosition[((uint32_t)((value & -value) * 0x077CB531U)) >> 27];
 456 #endif
 457 }
 458
 459 #ifndef CV_DOXYGEN
 460 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
 461 #endif
 462
 463 } // cv::
 464
 465 //! @endcond
 466
 467 #endif