modules/core/include/opencv2/core/hal/intrin.hpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                          License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
  14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
  15 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
  16 // Copyright (C) 2015, Itseez Inc., all rights reserved.
  17 // Third party copyrights are property of their respective owners.
  18 //
  19 // Redistribution and use in source and binary forms, with or without modification,
  20 // are permitted provided that the following conditions are met:
  21 //
  22 //   * Redistribution's of source code must retain the above copyright notice,
  23 //     this list of conditions and the following disclaimer.
  24 //
  25 //   * Redistribution's in binary form must reproduce the above copyright notice,
  26 //     this list of conditions and the following disclaimer in the documentation
  27 //     and/or other materials provided with the distribution.
  28 //
  29 //   * The name of the copyright holders may not be used to endorse or promote products
  30 //     derived from this software without specific prior written permission.
  31 //
  32 // This software is provided by the copyright holders and contributors "as is" and
  33 // any express or implied warranties, including, but not limited to, the implied
  34 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  35 // In no event shall the Intel Corporation or contributors be liable for any direct,
  36 // indirect, incidental, special, exemplary, or consequential damages
  37 // (including, but not limited to, procurement of substitute goods or services;
  38 // loss of use, data, or profits; or business interruption) however caused
  39 // and on any theory of liability, whether in contract, strict liability,
  40 // or tort (including negligence or otherwise) arising in any way out of
  41 // the use of this software, even if advised of the possibility of such damage.
  42 //
  43 //M*/
  44
  45 #ifndef OPENCV_HAL_INTRIN_HPP
  46 #define OPENCV_HAL_INTRIN_HPP
  47
  48 #include <cmath>
  49 #include <float.h>
  50 #include <stdlib.h>
  51 #include "opencv2/core/cvdef.h"
  52
  53 #define OPENCV_HAL_ADD(a, b) ((a) + (b))
  54 #define OPENCV_HAL_AND(a, b) ((a) & (b))
  55 #define OPENCV_HAL_NOP(a) (a)
  56 #define OPENCV_HAL_1ST(a, b) (a)
  57
  58 // unlike HAL API, which is in cv::hal,
  59 // we put intrinsics into cv namespace to make its
  60 // access from within opencv code more accessible
  61 namespace cv {
  62
  63 #ifndef CV_DOXYGEN
  64
  65 #ifdef CV_CPU_DISPATCH_MODE
  66 #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE __CV_CAT(hal_, CV_CPU_DISPATCH_MODE)
  67 #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace __CV_CAT(hal_, CV_CPU_DISPATCH_MODE) {
  68 #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
  69 #else
  70 #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE hal_baseline
  71 #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace hal_baseline {
  72 #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
  73 #endif
  74
  75
  76 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
  77 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
  78 using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE;
  79 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
  80 #endif
  81
  82 //! @addtogroup core_hal_intrin
  83 //! @{
  84
  85 //! @cond IGNORED
  86 template<typename _Tp> struct V_TypeTraits
  87 {
  88     typedef _Tp int_type;
  89     typedef _Tp uint_type;
  90     typedef _Tp abs_type;
  91     typedef _Tp sum_type;
  92
  93     enum { delta = 0, shift = 0 };
  94
  95     static int_type reinterpret_int(_Tp x) { return x; }
  96     static uint_type reinterpet_uint(_Tp x) { return x; }
  97     static _Tp reinterpret_from_int(int_type x) { return (_Tp)x; }
  98 };
  99
 100 template<> struct V_TypeTraits<uchar>
 101 {
 102     typedef uchar value_type;
 103     typedef schar int_type;
 104     typedef uchar uint_type;
 105     typedef uchar abs_type;
 106     typedef int sum_type;
 107
 108     typedef ushort w_type;
 109     typedef unsigned q_type;
 110
 111     enum { delta = 128, shift = 8 };
 112
 113     static int_type reinterpret_int(value_type x) { return (int_type)x; }
 114     static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
 115     static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
 116 };
 117
 118 template<> struct V_TypeTraits<schar>
 119 {
 120     typedef schar value_type;
 121     typedef schar int_type;
 122     typedef uchar uint_type;
 123     typedef uchar abs_type;
 124     typedef int sum_type;
 125
 126     typedef short w_type;
 127     typedef int q_type;
 128
 129     enum { delta = 128, shift = 8 };
 130
 131     static int_type reinterpret_int(value_type x) { return (int_type)x; }
 132     static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
 133     static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
 134 };
 135
 136 template<> struct V_TypeTraits<ushort>
 137 {
 138     typedef ushort value_type;
 139     typedef short int_type;
 140     typedef ushort uint_type;
 141     typedef ushort abs_type;
 142     typedef int sum_type;
 143
 144     typedef unsigned w_type;
 145     typedef uchar nu_type;
 146
 147     enum { delta = 32768, shift = 16 };
 148
 149     static int_type reinterpret_int(value_type x) { return (int_type)x; }
 150     static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
 151     static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
 152 };
 153
 154 template<> struct V_TypeTraits<short>
 155 {
 156     typedef short value_type;
 157     typedef short int_type;
 158     typedef ushort uint_type;
 159     typedef ushort abs_type;
 160     typedef int sum_type;
 161
 162     typedef int w_type;
 163     typedef uchar nu_type;
 164     typedef schar n_type;
 165
 166     enum { delta = 128, shift = 8 };
 167
 168     static int_type reinterpret_int(value_type x) { return (int_type)x; }
 169     static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
 170     static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
 171 };
 172
 173 template<> struct V_TypeTraits<unsigned>
 174 {
 175     typedef unsigned value_type;
 176     typedef int int_type;
 177     typedef unsigned uint_type;
 178     typedef unsigned abs_type;
 179     typedef unsigned sum_type;
 180
 181     typedef uint64 w_type;
 182     typedef ushort nu_type;
 183
 184     static int_type reinterpret_int(value_type x) { return (int_type)x; }
 185     static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
 186     static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
 187 };
 188
 189 template<> struct V_TypeTraits<int>
 190 {
 191     typedef int value_type;
 192     typedef int int_type;
 193     typedef unsigned uint_type;
 194     typedef unsigned abs_type;
 195     typedef int sum_type;
 196
 197     typedef int64 w_type;
 198     typedef short n_type;
 199     typedef ushort nu_type;
 200
 201     static int_type reinterpret_int(value_type x) { return (int_type)x; }
 202     static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
 203     static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
 204 };
 205
 206 template<> struct V_TypeTraits<uint64>
 207 {
 208     typedef uint64 value_type;
 209     typedef int64 int_type;
 210     typedef uint64 uint_type;
 211     typedef uint64 abs_type;
 212     typedef uint64 sum_type;
 213
 214     typedef unsigned nu_type;
 215
 216     static int_type reinterpret_int(value_type x) { return (int_type)x; }
 217     static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
 218     static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
 219 };
 220
 221 template<> struct V_TypeTraits<int64>
 222 {
 223     typedef int64 value_type;
 224     typedef int64 int_type;
 225     typedef uint64 uint_type;
 226     typedef uint64 abs_type;
 227     typedef int64 sum_type;
 228
 229     typedef int nu_type;
 230
 231     static int_type reinterpret_int(value_type x) { return (int_type)x; }
 232     static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
 233     static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
 234 };
 235
 236
 237 template<> struct V_TypeTraits<float>
 238 {
 239     typedef float value_type;
 240     typedef int int_type;
 241     typedef unsigned uint_type;
 242     typedef float abs_type;
 243     typedef float sum_type;
 244
 245     typedef double w_type;
 246
 247     static int_type reinterpret_int(value_type x)
 248     {
 249         Cv32suf u;
 250         u.f = x;
 251         return u.i;
 252     }
 253     static uint_type reinterpet_uint(value_type x)
 254     {
 255         Cv32suf u;
 256         u.f = x;
 257         return u.u;
 258     }
 259     static value_type reinterpret_from_int(int_type x)
 260     {
 261         Cv32suf u;
 262         u.i = x;
 263         return u.f;
 264     }
 265 };
 266
 267 template<> struct V_TypeTraits<double>
 268 {
 269     typedef double value_type;
 270     typedef int64 int_type;
 271     typedef uint64 uint_type;
 272     typedef double abs_type;
 273     typedef double sum_type;
 274     static int_type reinterpret_int(value_type x)
 275     {
 276         Cv64suf u;
 277         u.f = x;
 278         return u.i;
 279     }
 280     static uint_type reinterpet_uint(value_type x)
 281     {
 282         Cv64suf u;
 283         u.f = x;
 284         return u.u;
 285     }
 286     static value_type reinterpret_from_int(int_type x)
 287     {
 288         Cv64suf u;
 289         u.i = x;
 290         return u.f;
 291     }
 292 };
 293
 294 template <typename T> struct V_SIMD128Traits
 295 {
 296     enum { nlanes = 16 / sizeof(T) };
 297 };
 298
 299 //! @endcond
 300
 301 //! @}
 302
 303 #ifndef CV_DOXYGEN
 304 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
 305 #endif
 306 }
 307
 308 #ifdef CV_DOXYGEN
 309 #   undef CV_SSE2
 310 #   undef CV_NEON
 311 #   undef CV_VSX
 312 #endif
 313
 314 #if CV_SSE2
 315
 316 #include "opencv2/core/hal/intrin_sse.hpp"
 317
 318 #elif CV_NEON
 319
 320 #include "opencv2/core/hal/intrin_neon.hpp"
 321
 322 #elif CV_VSX
 323
 324 #include "opencv2/core/hal/intrin_vsx.hpp"
 325
 326 #else
 327
 328 #include "opencv2/core/hal/intrin_cpp.hpp"
 329
 330 #endif
 331
 332 //! @addtogroup core_hal_intrin
 333 //! @{
 334
 335 #ifndef CV_SIMD128
 336 //! Set to 1 if current compiler supports vector extensions (NEON or SSE is enabled)
 337 #define CV_SIMD128 0
 338 #endif
 339
 340 #ifndef CV_SIMD128_64F
 341 //! Set to 1 if current intrinsics implementation supports 64-bit float vectors
 342 #define CV_SIMD128_64F 0
 343 #endif
 344
 345 //! @}
 346
 347 //==================================================================================================
 348
 349 //! @cond IGNORED
 350
 351 namespace cv {
 352
 353 #ifndef CV_DOXYGEN
 354 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
 355 #endif
 356
 357 template <typename R> struct V_RegTrait128;
 358
 359 template <> struct V_RegTrait128<uchar> {
 360     typedef v_uint8x16 reg;
 361     typedef v_uint16x8 w_reg;
 362     typedef v_uint32x4 q_reg;
 363     typedef v_uint8x16 u_reg;
 364     static v_uint8x16 zero() { return v_setzero_u8(); }
 365     static v_uint8x16 all(uchar val) { return v_setall_u8(val); }
 366 };
 367
 368 template <> struct V_RegTrait128<schar> {
 369     typedef v_int8x16 reg;
 370     typedef v_int16x8 w_reg;
 371     typedef v_int32x4 q_reg;
 372     typedef v_uint8x16 u_reg;
 373     static v_int8x16 zero() { return v_setzero_s8(); }
 374     static v_int8x16 all(schar val) { return v_setall_s8(val); }
 375 };
 376
 377 template <> struct V_RegTrait128<ushort> {
 378     typedef v_uint16x8 reg;
 379     typedef v_uint32x4 w_reg;
 380     typedef v_int16x8 int_reg;
 381     typedef v_uint16x8 u_reg;
 382     static v_uint16x8 zero() { return v_setzero_u16(); }
 383     static v_uint16x8 all(ushort val) { return v_setall_u16(val); }
 384 };
 385
 386 template <> struct V_RegTrait128<short> {
 387     typedef v_int16x8 reg;
 388     typedef v_int32x4 w_reg;
 389     typedef v_uint16x8 u_reg;
 390     static v_int16x8 zero() { return v_setzero_s16(); }
 391     static v_int16x8 all(short val) { return v_setall_s16(val); }
 392 };
 393
 394 template <> struct V_RegTrait128<unsigned> {
 395     typedef v_uint32x4 reg;
 396     typedef v_uint64x2 w_reg;
 397     typedef v_int32x4 int_reg;
 398     typedef v_uint32x4 u_reg;
 399     static v_uint32x4 zero() { return v_setzero_u32(); }
 400     static v_uint32x4 all(unsigned val) { return v_setall_u32(val); }
 401 };
 402
 403 template <> struct V_RegTrait128<int> {
 404     typedef v_int32x4 reg;
 405     typedef v_int64x2 w_reg;
 406     typedef v_uint32x4 u_reg;
 407     static v_int32x4 zero() { return v_setzero_s32(); }
 408     static v_int32x4 all(int val) { return v_setall_s32(val); }
 409 };
 410
 411 template <> struct V_RegTrait128<uint64> {
 412     typedef v_uint64x2 reg;
 413     static v_uint64x2 zero() { return v_setzero_u64(); }
 414     static v_uint64x2 all(uint64 val) { return v_setall_u64(val); }
 415 };
 416
 417 template <> struct V_RegTrait128<int64> {
 418     typedef v_int64x2 reg;
 419     static v_int64x2 zero() { return v_setzero_s64(); }
 420     static v_int64x2 all(int64 val) { return v_setall_s64(val); }
 421 };
 422
 423 template <> struct V_RegTrait128<float> {
 424     typedef v_float32x4 reg;
 425     typedef v_int32x4 int_reg;
 426     typedef v_float32x4 u_reg;
 427     static v_float32x4 zero() { return v_setzero_f32(); }
 428     static v_float32x4 all(float val) { return v_setall_f32(val); }
 429 };
 430
 431 #if CV_SIMD128_64F
 432 template <> struct V_RegTrait128<double> {
 433     typedef v_float64x2 reg;
 434     typedef v_int32x4 int_reg;
 435     typedef v_float64x2 u_reg;
 436     static v_float64x2 zero() { return v_setzero_f64(); }
 437     static v_float64x2 all(double val) { return v_setall_f64(val); }
 438 };
 439 #endif
 440
 441 inline unsigned int trailingZeros32(unsigned int value) {
 442 #if defined(_MSC_VER)
 443 #if (_MSC_VER < 1700)
 444     unsigned long index = 0;
 445     _BitScanForward(&index, value);
 446     return (unsigned int)index;
 447 #else
 448     return _tzcnt_u32(value);
 449 #endif
 450 #elif defined(__GNUC__) || defined(__GNUG__)
 451     return __builtin_ctz(value);
 452 #elif defined(__ICC) || defined(__INTEL_COMPILER)
 453     return _bit_scan_forward(value);
 454 #elif defined(__clang__)
 455     return llvm.cttz.i32(value, true);
 456 #else
 457     static const int MultiplyDeBruijnBitPosition[32] = {
 458         0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
 459         31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 };
 460     return MultiplyDeBruijnBitPosition[((uint32_t)((value & -value) * 0x077CB531U)) >> 27];
 461 #endif
 462 }
 463
 464 #ifndef CV_DOXYGEN
 465 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
 466 #endif
 467
 468 } // cv::
 469
 470 //! @endcond
 471
 472 #endif