1 // This file is part of Eigen, a lightweight C++ template library
4 // Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
5 // Copyright (C) 2007-2011 Benoit Jacob <jacob.benoit.1@gmail.com>
7 // This Source Code Form is subject to the terms of the Mozilla
8 // Public License v. 2.0. If a copy of the MPL was not distributed
9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
11 // #include "src/Core/util/DisableStupidWarnings.h"
12 #ifndef EIGEN_WARNINGS_DISABLED
13 #define EIGEN_WARNINGS_DISABLED
15 #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
16 #pragma warning( push )
18 #pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4714 4717 4800)
19 #elif defined __INTEL_COMPILER
20 #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
23 #pragma warning disable 2196 279 1684 2259
24 #elif defined __clang__
25 #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
26 #pragma clang diagnostic push
28 #pragma clang diagnostic ignored "-Wconstant-logical-operand"
29 #elif defined __GNUC__ && __GNUC__>=6
30 #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
31 #pragma GCC diagnostic push
33 #pragma GCC diagnostic ignored "-Wignored-attributes"
36 #pragma diag_suppress code_is_unreachable
37 #pragma diag_suppress initialization_not_reachable
38 #pragma diag_suppress 1222
39 #pragma diag_suppress 2527
40 #pragma diag_suppress 2529
41 #pragma diag_suppress 2651
42 #pragma diag_suppress 2653
43 #pragma diag_suppress 2668
44 #pragma diag_suppress 2669
45 #pragma diag_suppress 2670
46 #pragma diag_suppress 2671
47 #pragma diag_suppress 2735
48 #pragma diag_suppress 2737
51 // end #include "src/Core/util/DisableStupidWarnings.h"
52 #if defined(__CUDACC__) || defined(__SYCL_DEVICE_ONLY__)
53 #ifndef EIGEN_NO_DEBUG
54 #define EIGEN_NO_DEBUG
56 #ifdef EIGEN_INTERNAL_DEBUGGING
57 #undef EIGEN_INTERNAL_DEBUGGING
59 #ifdef EIGEN_EXCEPTIONS
60 #undef EIGEN_EXCEPTIONS
63 #ifndef EIGEN_DONT_VECTORIZE
64 #define EIGEN_DONT_VECTORIZE
66 #define EIGEN_DEVICE_FUNC __host__ __device__
67 #include <math_functions.hpp>
69 #define EIGEN_DEVICE_FUNC
72 #define EIGEN_DEVICE_FUNC
74 #if defined(__CUDA_ARCH__) && defined(__NVCC__)
75 #define EIGEN_USING_STD_MATH(FUNC) using ::FUNC;
77 #define EIGEN_USING_STD_MATH(FUNC) using std::FUNC;
79 #if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(__CUDA_ARCH__) && !defined(EIGEN_EXCEPTIONS) && !defined(EIGEN_USE_SYCL)
80 #define EIGEN_EXCEPTIONS
82 #ifdef EIGEN_EXCEPTIONS
85 // #include "src/Core/util/Macros.h"
86 #ifndef EIGEN_MACROS_H
87 #define EIGEN_MACROS_H
88 #define EIGEN_WORLD_VERSION 3
89 #define EIGEN_MAJOR_VERSION 3
90 #define EIGEN_MINOR_VERSION 4
91 #define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
92 (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
93 EIGEN_MINOR_VERSION>=z))))
95 #define EIGEN_COMP_GNUC 1
97 #define EIGEN_COMP_GNUC 0
99 #if defined(__clang__)
100 #define EIGEN_COMP_CLANG (__clang_major__*100+__clang_minor__)
102 #define EIGEN_COMP_CLANG 0
104 #if defined(__llvm__)
105 #define EIGEN_COMP_LLVM 1
107 #define EIGEN_COMP_LLVM 0
109 #if defined(__INTEL_COMPILER)
110 #define EIGEN_COMP_ICC __INTEL_COMPILER
112 #define EIGEN_COMP_ICC 0
114 #if defined(__MINGW32__)
115 #define EIGEN_COMP_MINGW 1
117 #define EIGEN_COMP_MINGW 0
119 #if defined(__SUNPRO_CC)
120 #define EIGEN_COMP_SUNCC 1
122 #define EIGEN_COMP_SUNCC 0
124 #if defined(_MSC_VER)
125 #define EIGEN_COMP_MSVC _MSC_VER
127 #define EIGEN_COMP_MSVC 0
129 #if EIGEN_COMP_MSVC && !(EIGEN_COMP_ICC || EIGEN_COMP_LLVM || EIGEN_COMP_CLANG)
130 #define EIGEN_COMP_MSVC_STRICT _MSC_VER
132 #define EIGEN_COMP_MSVC_STRICT 0
134 #if defined(__IBMCPP__) || defined(__xlc__)
135 #define EIGEN_COMP_IBM 1
137 #define EIGEN_COMP_IBM 0
140 #define EIGEN_COMP_PGI 1
142 #define EIGEN_COMP_PGI 0
144 #if defined(__CC_ARM) || defined(__ARMCC_VERSION)
145 #define EIGEN_COMP_ARM 1
147 #define EIGEN_COMP_ARM 0
149 #if defined(__EMSCRIPTEN__)
150 #define EIGEN_COMP_EMSCRIPTEN 1
152 #define EIGEN_COMP_EMSCRIPTEN 0
154 #if EIGEN_COMP_GNUC && !(EIGEN_COMP_CLANG || EIGEN_COMP_ICC || EIGEN_COMP_MINGW || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM || EIGEN_COMP_EMSCRIPTEN)
155 #define EIGEN_COMP_GNUC_STRICT 1
157 #define EIGEN_COMP_GNUC_STRICT 0
160 #define EIGEN_GNUC_AT_LEAST(x,y) ((__GNUC__==x && __GNUC_MINOR__>=y) || __GNUC__>x)
161 #define EIGEN_GNUC_AT_MOST(x,y) ((__GNUC__==x && __GNUC_MINOR__<=y) || __GNUC__<x)
162 #define EIGEN_GNUC_AT(x,y) ( __GNUC__==x && __GNUC_MINOR__==y )
164 #define EIGEN_GNUC_AT_LEAST(x,y) 0
165 #define EIGEN_GNUC_AT_MOST(x,y) 0
166 #define EIGEN_GNUC_AT(x,y) 0
168 #if EIGEN_COMP_GNUC && (__GNUC__ <= 3)
169 #define EIGEN_GCC3_OR_OLDER 1
171 #define EIGEN_GCC3_OR_OLDER 0
173 #if defined(__x86_64__) || defined(_M_X64) || defined(__amd64)
174 #define EIGEN_ARCH_x86_64 1
176 #define EIGEN_ARCH_x86_64 0
178 #if defined(__i386__) || defined(_M_IX86) || defined(_X86_) || defined(__i386)
179 #define EIGEN_ARCH_i386 1
181 #define EIGEN_ARCH_i386 0
183 #if EIGEN_ARCH_x86_64 || EIGEN_ARCH_i386
184 #define EIGEN_ARCH_i386_OR_x86_64 1
186 #define EIGEN_ARCH_i386_OR_x86_64 0
189 #define EIGEN_ARCH_ARM 1
191 #define EIGEN_ARCH_ARM 0
193 #if defined(__aarch64__)
194 #define EIGEN_ARCH_ARM64 1
196 #define EIGEN_ARCH_ARM64 0
198 #if EIGEN_ARCH_ARM || EIGEN_ARCH_ARM64
199 #define EIGEN_ARCH_ARM_OR_ARM64 1
201 #define EIGEN_ARCH_ARM_OR_ARM64 0
203 #if defined(__mips__) || defined(__mips)
204 #define EIGEN_ARCH_MIPS 1
206 #define EIGEN_ARCH_MIPS 0
208 #if defined(__sparc__) || defined(__sparc)
209 #define EIGEN_ARCH_SPARC 1
211 #define EIGEN_ARCH_SPARC 0
213 #if defined(__ia64__)
214 #define EIGEN_ARCH_IA64 1
216 #define EIGEN_ARCH_IA64 0
218 #if defined(__powerpc__) || defined(__ppc__) || defined(_M_PPC)
219 #define EIGEN_ARCH_PPC 1
221 #define EIGEN_ARCH_PPC 0
223 #if defined(__unix__) || defined(__unix)
224 #define EIGEN_OS_UNIX 1
226 #define EIGEN_OS_UNIX 0
228 #if defined(__linux__)
229 #define EIGEN_OS_LINUX 1
231 #define EIGEN_OS_LINUX 0
233 #if defined(__ANDROID__) || defined(ANDROID)
234 #define EIGEN_OS_ANDROID 1
236 #define EIGEN_OS_ANDROID 0
238 #if defined(__gnu_linux__) && !(EIGEN_OS_ANDROID)
239 #define EIGEN_OS_GNULINUX 1
241 #define EIGEN_OS_GNULINUX 0
243 #if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__)
244 #define EIGEN_OS_BSD 1
246 #define EIGEN_OS_BSD 0
248 #if defined(__APPLE__)
249 #define EIGEN_OS_MAC 1
251 #define EIGEN_OS_MAC 0
254 #define EIGEN_OS_QNX 1
256 #define EIGEN_OS_QNX 0
259 #define EIGEN_OS_WIN 1
261 #define EIGEN_OS_WIN 0
264 #define EIGEN_OS_WIN64 1
266 #define EIGEN_OS_WIN64 0
268 #if defined(_WIN32_WCE)
269 #define EIGEN_OS_WINCE 1
271 #define EIGEN_OS_WINCE 0
273 #if defined(__CYGWIN__)
274 #define EIGEN_OS_CYGWIN 1
276 #define EIGEN_OS_CYGWIN 0
278 #if EIGEN_OS_WIN && !( EIGEN_OS_WINCE || EIGEN_OS_CYGWIN )
279 #define EIGEN_OS_WIN_STRICT 1
281 #define EIGEN_OS_WIN_STRICT 0
283 #if (defined(sun) || defined(__sun)) && !(defined(__SVR4) || defined(__svr4__))
284 #define EIGEN_OS_SUN 1
286 #define EIGEN_OS_SUN 0
288 #if (defined(sun) || defined(__sun)) && (defined(__SVR4) || defined(__svr4__))
289 #define EIGEN_OS_SOLARIS 1
291 #define EIGEN_OS_SOLARIS 0
293 #if EIGEN_GNUC_AT_MOST(4,3) && !EIGEN_COMP_CLANG
294 #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 0
296 #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 1
298 #define EIGEN_NOT_A_MACRO
299 #ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
300 #define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION Eigen::RowMajor
302 #define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION Eigen::ColMajor
304 #ifndef EIGEN_DEFAULT_DENSE_INDEX_TYPE
305 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE std::ptrdiff_t
308 # define EIGEN_HAS_BUILTIN(x) __has_builtin(x)
310 # define EIGEN_HAS_BUILTIN(x) 0
312 #ifndef __has_feature
313 # define __has_feature(x) 0
315 #ifndef EIGEN_MAX_CPP_VER
316 #define EIGEN_MAX_CPP_VER 99
318 #if EIGEN_MAX_CPP_VER>=11 && (defined(__cplusplus) && (__cplusplus >= 201103L) || EIGEN_COMP_MSVC >= 1900)
319 #define EIGEN_HAS_CXX11 1
321 #define EIGEN_HAS_CXX11 0
323 #ifndef EIGEN_HAS_RVALUE_REFERENCES
324 #if EIGEN_MAX_CPP_VER>=11 && \
325 (__has_feature(cxx_rvalue_references) || \
326 (defined(__cplusplus) && __cplusplus >= 201103L) || \
327 (EIGEN_COMP_MSVC >= 1600))
328 #define EIGEN_HAS_RVALUE_REFERENCES 1
330 #define EIGEN_HAS_RVALUE_REFERENCES 0
333 #ifndef EIGEN_HAS_C99_MATH
334 #if EIGEN_MAX_CPP_VER>=11 && \
335 ((defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901)) \
336 || (defined(__GNUC__) && defined(_GLIBCXX_USE_C99)) \
337 || (defined(_LIBCPP_VERSION) && !defined(_MSC_VER)))
338 #define EIGEN_HAS_C99_MATH 1
340 #define EIGEN_HAS_C99_MATH 0
343 #ifndef EIGEN_HAS_STD_RESULT_OF
344 #if EIGEN_MAX_CPP_VER>=11 && ((__has_feature(cxx_lambdas) || (defined(__cplusplus) && __cplusplus >= 201103L)))
345 #define EIGEN_HAS_STD_RESULT_OF 1
347 #define EIGEN_HAS_STD_RESULT_OF 0
350 #ifndef EIGEN_HAS_VARIADIC_TEMPLATES
351 #if EIGEN_MAX_CPP_VER>=11 && (__cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900) \
352 && ( !defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000) )
353 #define EIGEN_HAS_VARIADIC_TEMPLATES 1
355 #define EIGEN_HAS_VARIADIC_TEMPLATES 0
358 #ifndef EIGEN_HAS_CONSTEXPR
360 #if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && defined(__CUDACC_VER__) && (EIGEN_COMP_CLANG || __CUDACC_VER__ >= 70500))
361 #define EIGEN_HAS_CONSTEXPR 1
363 #elif EIGEN_MAX_CPP_VER>=14 && (__has_feature(cxx_relaxed_constexpr) || (defined(__cplusplus) && __cplusplus >= 201402L) || \
364 (EIGEN_GNUC_AT_LEAST(4,8) && (__cplusplus > 199711L)))
365 #define EIGEN_HAS_CONSTEXPR 1
367 #ifndef EIGEN_HAS_CONSTEXPR
368 #define EIGEN_HAS_CONSTEXPR 0
371 #ifndef EIGEN_HAS_CXX11_MATH
372 #if EIGEN_MAX_CPP_VER>=11 && ((__cplusplus > 201103L) || (__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC || EIGEN_COMP_ICC) \
373 && (EIGEN_ARCH_i386_OR_x86_64) && (EIGEN_OS_GNULINUX || EIGEN_OS_WIN_STRICT || EIGEN_OS_MAC))
374 #define EIGEN_HAS_CXX11_MATH 1
376 #define EIGEN_HAS_CXX11_MATH 0
379 #ifndef EIGEN_HAS_CXX11_CONTAINERS
380 #if EIGEN_MAX_CPP_VER>=11 && \
381 ((__cplusplus > 201103L) \
382 || ((__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_ICC>=1400)) \
383 || EIGEN_COMP_MSVC >= 1900)
384 #define EIGEN_HAS_CXX11_CONTAINERS 1
386 #define EIGEN_HAS_CXX11_CONTAINERS 0
389 #ifndef EIGEN_HAS_CXX11_NOEXCEPT
390 #if EIGEN_MAX_CPP_VER>=11 && \
391 (__has_feature(cxx_noexcept) \
392 || (__cplusplus > 201103L) \
393 || ((__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_ICC>=1400)) \
394 || EIGEN_COMP_MSVC >= 1900)
395 #define EIGEN_HAS_CXX11_NOEXCEPT 1
397 #define EIGEN_HAS_CXX11_NOEXCEPT 0
400 #ifndef EIGEN_FAST_MATH
401 #define EIGEN_FAST_MATH 1
403 #define EIGEN_DEBUG_VAR(x) std::cerr << #x << " = " << x << std::endl;
404 #define EIGEN_CAT2(a,b) a ## b
405 #define EIGEN_CAT(a,b) EIGEN_CAT2(a,b)
406 #define EIGEN_COMMA ,
407 #define EIGEN_MAKESTRING2(a) #a
408 #define EIGEN_MAKESTRING(a) EIGEN_MAKESTRING2(a)
409 #if EIGEN_COMP_MSVC || EIGEN_COMP_ICC
410 #define EIGEN_STRONG_INLINE __forceinline
412 #define EIGEN_STRONG_INLINE inline
414 #if EIGEN_GNUC_AT_LEAST(4,2)
415 #define EIGEN_ALWAYS_INLINE __attribute__((always_inline)) inline
417 #define EIGEN_ALWAYS_INLINE EIGEN_STRONG_INLINE
420 #define EIGEN_DONT_INLINE __attribute__((noinline))
421 #elif EIGEN_COMP_MSVC
422 #define EIGEN_DONT_INLINE __declspec(noinline)
424 #define EIGEN_DONT_INLINE
427 #define EIGEN_PERMISSIVE_EXPR __extension__
429 #define EIGEN_PERMISSIVE_EXPR
431 #define EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
432 #define EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS inline
434 # ifndef EIGEN_NO_DEBUG
435 # define EIGEN_NO_DEBUG
438 #ifdef EIGEN_NO_DEBUG
439 #define eigen_plain_assert(x)
441 #if EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO
444 inline bool copy_bool(bool b) { return b; }
447 #define eigen_plain_assert(x) assert(x)
454 EIGEN_DONT_INLINE bool copy_bool(bool b) { return b; }
456 inline void assert_fail(const char *condition, const char *function, const char *file, int line)
458 std::cerr << "assertion failed: " << condition << " in function " << function << " at " << file << ":" << line << std::endl;
463 #define eigen_plain_assert(x) \
465 if(!Eigen::internal::copy_bool(x)) \
466 Eigen::internal::assert_fail(EIGEN_MAKESTRING(x), __PRETTY_FUNCTION__, __FILE__, __LINE__); \
471 #define eigen_assert(x) eigen_plain_assert(x)
473 #ifdef EIGEN_INTERNAL_DEBUGGING
474 #define eigen_internal_assert(x) eigen_assert(x)
476 #define eigen_internal_assert(x)
478 #ifdef EIGEN_NO_DEBUG
479 #define EIGEN_ONLY_USED_FOR_DEBUG(x) EIGEN_UNUSED_VARIABLE(x)
481 #define EIGEN_ONLY_USED_FOR_DEBUG(x)
483 #ifndef EIGEN_NO_DEPRECATED_WARNING
485 #define EIGEN_DEPRECATED __attribute__((deprecated))
486 #elif EIGEN_COMP_MSVC
487 #define EIGEN_DEPRECATED __declspec(deprecated)
489 #define EIGEN_DEPRECATED
492 #define EIGEN_DEPRECATED
495 #define EIGEN_UNUSED __attribute__((unused))
501 template<typename T> EIGEN_DEVICE_FUNC void ignore_unused_variable(const T&) {}
504 #define EIGEN_UNUSED_VARIABLE(var) Eigen::internal::ignore_unused_variable(var);
505 #if !defined(EIGEN_ASM_COMMENT)
506 #if EIGEN_COMP_GNUC && (EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64)
507 #define EIGEN_ASM_COMMENT(X) __asm__("#" X)
509 #define EIGEN_ASM_COMMENT(X)
512 #if (defined __CUDACC__)
513 #define EIGEN_ALIGN_TO_BOUNDARY(n) __align__(n)
514 #elif EIGEN_COMP_GNUC || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM
515 #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
516 #elif EIGEN_COMP_MSVC
517 #define EIGEN_ALIGN_TO_BOUNDARY(n) __declspec(align(n))
518 #elif EIGEN_COMP_SUNCC
519 #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
521 #error Please tell me what is the equivalent of __attribute__((aligned(n))) for your compiler
523 #if defined(EIGEN_DONT_VECTORIZE)
524 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 0
525 #elif defined(EIGEN_VECTORIZE_AVX512)
526 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 64
527 #elif defined(__AVX__)
528 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 32
530 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 16
532 #define EIGEN_MIN_ALIGN_BYTES 16
533 #if (defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)) && defined(EIGEN_MAX_STATIC_ALIGN_BYTES) && EIGEN_MAX_STATIC_ALIGN_BYTES>0
534 #error EIGEN_MAX_STATIC_ALIGN_BYTES and EIGEN_DONT_ALIGN[_STATICALLY] are both defined with EIGEN_MAX_STATIC_ALIGN_BYTES!=0. Use EIGEN_MAX_STATIC_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN_STATICALLY.
536 #if defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)
537 #ifdef EIGEN_MAX_STATIC_ALIGN_BYTES
538 #undef EIGEN_MAX_STATIC_ALIGN_BYTES
540 #define EIGEN_MAX_STATIC_ALIGN_BYTES 0
542 #ifndef EIGEN_MAX_STATIC_ALIGN_BYTES
543 #if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64)
544 #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
545 #elif EIGEN_ARCH_ARM_OR_ARM64 && EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_MOST(4, 6)
546 #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
548 #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0
550 #if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT \
551 && !EIGEN_GCC3_OR_OLDER \
552 && !EIGEN_COMP_SUNCC \
554 #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1
556 #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0
558 #if EIGEN_ARCH_WANTS_STACK_ALIGNMENT
559 #define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
561 #define EIGEN_MAX_STATIC_ALIGN_BYTES 0
564 #if defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES<EIGEN_MAX_STATIC_ALIGN_BYTES
565 #undef EIGEN_MAX_STATIC_ALIGN_BYTES
566 #define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
568 #if EIGEN_MAX_STATIC_ALIGN_BYTES==0 && !defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT)
569 #define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
571 #define EIGEN_ALIGN8 EIGEN_ALIGN_TO_BOUNDARY(8)
572 #define EIGEN_ALIGN16 EIGEN_ALIGN_TO_BOUNDARY(16)
573 #define EIGEN_ALIGN32 EIGEN_ALIGN_TO_BOUNDARY(32)
574 #define EIGEN_ALIGN64 EIGEN_ALIGN_TO_BOUNDARY(64)
575 #if EIGEN_MAX_STATIC_ALIGN_BYTES>0
576 #define EIGEN_ALIGN_MAX EIGEN_ALIGN_TO_BOUNDARY(EIGEN_MAX_STATIC_ALIGN_BYTES)
578 #define EIGEN_ALIGN_MAX
580 #if defined(EIGEN_DONT_ALIGN) && defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES>0
581 #error EIGEN_MAX_ALIGN_BYTES and EIGEN_DONT_ALIGN are both defined with EIGEN_MAX_ALIGN_BYTES!=0. Use EIGEN_MAX_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN.
583 #ifdef EIGEN_DONT_ALIGN
584 #ifdef EIGEN_MAX_ALIGN_BYTES
585 #undef EIGEN_MAX_ALIGN_BYTES
587 #define EIGEN_MAX_ALIGN_BYTES 0
588 #elif !defined(EIGEN_MAX_ALIGN_BYTES)
589 #define EIGEN_MAX_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
591 #if EIGEN_IDEAL_MAX_ALIGN_BYTES > EIGEN_MAX_ALIGN_BYTES
592 #define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
594 #define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
596 #ifndef EIGEN_UNALIGNED_VECTORIZE
597 #define EIGEN_UNALIGNED_VECTORIZE 1
599 #ifdef EIGEN_DONT_USE_RESTRICT_KEYWORD
600 #define EIGEN_RESTRICT
602 #ifndef EIGEN_RESTRICT
603 #define EIGEN_RESTRICT __restrict
605 #ifndef EIGEN_STACK_ALLOCATION_LIMIT
606 #define EIGEN_STACK_ALLOCATION_LIMIT 131072
608 #ifndef EIGEN_DEFAULT_IO_FORMAT
609 #ifdef EIGEN_MAKING_DOCS
610 #define EIGEN_DEFAULT_IO_FORMAT Eigen::IOFormat(3, 0, " ", "\n", "", "")
612 #define EIGEN_DEFAULT_IO_FORMAT Eigen::IOFormat()
616 #if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || defined(__CUDACC_VER__))
617 #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
618 using Base::operator =;
619 #elif EIGEN_COMP_CLANG
620 #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
621 using Base::operator =; \
622 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const Derived& other) { Base::operator=(other); return *this; } \
623 template <typename OtherDerived> \
624 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const DenseBase<OtherDerived>& other) { Base::operator=(other.derived()); return *this; }
626 #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
627 using Base::operator =; \
628 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const Derived& other) \
630 Base::operator=(other); \
634 #define EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Derived) EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived)
635 #define EIGEN_GENERIC_PUBLIC_INTERFACE(Derived) \
636 typedef typename Eigen::internal::traits<Derived>::Scalar Scalar; \
637 typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; \
638 typedef typename Base::CoeffReturnType CoeffReturnType; \
639 typedef typename Eigen::internal::ref_selector<Derived>::type Nested; \
640 typedef typename Eigen::internal::traits<Derived>::StorageKind StorageKind; \
641 typedef typename Eigen::internal::traits<Derived>::StorageIndex StorageIndex; \
642 enum { RowsAtCompileTime = Eigen::internal::traits<Derived>::RowsAtCompileTime, \
643 ColsAtCompileTime = Eigen::internal::traits<Derived>::ColsAtCompileTime, \
644 Flags = Eigen::internal::traits<Derived>::Flags, \
645 SizeAtCompileTime = Base::SizeAtCompileTime, \
646 MaxSizeAtCompileTime = Base::MaxSizeAtCompileTime, \
647 IsVectorAtCompileTime = Base::IsVectorAtCompileTime }; \
648 using Base::derived; \
649 using Base::const_cast_derived;
650 #define EIGEN_DENSE_PUBLIC_INTERFACE(Derived) \
651 EIGEN_GENERIC_PUBLIC_INTERFACE(Derived) \
652 typedef typename Base::PacketScalar PacketScalar;
653 #define EIGEN_PLAIN_ENUM_MIN(a,b) (((int)a <= (int)b) ? (int)a : (int)b)
654 #define EIGEN_PLAIN_ENUM_MAX(a,b) (((int)a >= (int)b) ? (int)a : (int)b)
655 #define EIGEN_SIZE_MIN_PREFER_DYNAMIC(a,b) (((int)a == 0 || (int)b == 0) ? 0 \
656 : ((int)a == 1 || (int)b == 1) ? 1 \
657 : ((int)a == Dynamic || (int)b == Dynamic) ? Dynamic \
658 : ((int)a <= (int)b) ? (int)a : (int)b)
659 #define EIGEN_SIZE_MIN_PREFER_FIXED(a,b) (((int)a == 0 || (int)b == 0) ? 0 \
660 : ((int)a == 1 || (int)b == 1) ? 1 \
661 : ((int)a == Dynamic && (int)b == Dynamic) ? Dynamic \
662 : ((int)a == Dynamic) ? (int)b \
663 : ((int)b == Dynamic) ? (int)a \
664 : ((int)a <= (int)b) ? (int)a : (int)b)
665 #define EIGEN_SIZE_MAX(a,b) (((int)a == Dynamic || (int)b == Dynamic) ? Dynamic \
666 : ((int)a >= (int)b) ? (int)a : (int)b)
667 #define EIGEN_LOGICAL_XOR(a,b) (((a) || (b)) && !((a) && (b)))
668 #define EIGEN_IMPLIES(a,b) (!(a) || (b))
669 #define EIGEN_CWISE_BINARY_RETURN_TYPE(LHS,RHS,OPNAME) \
671 EIGEN_CAT(EIGEN_CAT(internal::scalar_,OPNAME),_op)< \
672 typename internal::traits<LHS>::Scalar, \
673 typename internal::traits<RHS>::Scalar \
678 #define EIGEN_MAKE_CWISE_BINARY_OP(METHOD,OPNAME) \
679 template<typename OtherDerived> \
680 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,OPNAME) \
681 (METHOD)(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const \
683 return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,OPNAME)(derived(), other.derived()); \
685 #define EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,TYPEA,TYPEB) \
686 (Eigen::internal::has_ReturnType<Eigen::ScalarBinaryOpTraits<TYPEA,TYPEB,EIGEN_CAT(EIGEN_CAT(Eigen::internal::scalar_,OPNAME),_op)<TYPEA,TYPEB> > >::value)
687 #define EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(EXPR,SCALAR,OPNAME) \
688 CwiseBinaryOp<EIGEN_CAT(EIGEN_CAT(internal::scalar_,OPNAME),_op)<typename internal::traits<EXPR>::Scalar,SCALAR>, const EXPR, \
689 const typename internal::plain_constant_type<EXPR,SCALAR>::type>
690 #define EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(SCALAR,EXPR,OPNAME) \
691 CwiseBinaryOp<EIGEN_CAT(EIGEN_CAT(internal::scalar_,OPNAME),_op)<SCALAR,typename internal::traits<EXPR>::Scalar>, \
692 const typename internal::plain_constant_type<EXPR,SCALAR>::type, const EXPR>
693 #if EIGEN_COMP_MSVC_STRICT<=1600
694 #define EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(X) typename internal::enable_if<true,X>::type
696 #define EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(X) X
698 #define EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(METHOD,OPNAME) \
699 template <typename T> EIGEN_DEVICE_FUNC inline \
700 EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,typename internal::promote_scalar_arg<Scalar EIGEN_COMMA T EIGEN_COMMA EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,Scalar,T)>::type,OPNAME))\
701 (METHOD)(const T& scalar) const { \
702 typedef typename internal::promote_scalar_arg<Scalar,T,EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,Scalar,T)>::type PromotedT; \
703 return EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,PromotedT,OPNAME)(derived(), \
704 typename internal::plain_constant_type<Derived,PromotedT>::type(derived().rows(), derived().cols(), internal::scalar_constant_op<PromotedT>(scalar))); \
706 #define EIGEN_MAKE_SCALAR_BINARY_OP_ONTHELEFT(METHOD,OPNAME) \
707 template <typename T> EIGEN_DEVICE_FUNC inline friend \
708 EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename internal::promote_scalar_arg<Scalar EIGEN_COMMA T EIGEN_COMMA EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,T,Scalar)>::type,Derived,OPNAME)) \
709 (METHOD)(const T& scalar, const StorageBaseType& matrix) { \
710 typedef typename internal::promote_scalar_arg<Scalar,T,EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,T,Scalar)>::type PromotedT; \
711 return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(PromotedT,Derived,OPNAME)( \
712 typename internal::plain_constant_type<Derived,PromotedT>::type(matrix.derived().rows(), matrix.derived().cols(), internal::scalar_constant_op<PromotedT>(scalar)), matrix.derived()); \
714 #define EIGEN_MAKE_SCALAR_BINARY_OP(METHOD,OPNAME) \
715 EIGEN_MAKE_SCALAR_BINARY_OP_ONTHELEFT(METHOD,OPNAME) \
716 EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(METHOD,OPNAME)
717 #ifdef EIGEN_EXCEPTIONS
718 # define EIGEN_THROW_X(X) throw X
719 # define EIGEN_THROW throw
720 # define EIGEN_TRY try
721 # define EIGEN_CATCH(X) catch (X)
723 # ifdef __CUDA_ARCH__
724 # define EIGEN_THROW_X(X) asm("trap;")
725 # define EIGEN_THROW asm("trap;")
727 # define EIGEN_THROW_X(X) std::abort()
728 # define EIGEN_THROW std::abort()
730 # define EIGEN_TRY if (true)
731 # define EIGEN_CATCH(X) else
733 #if EIGEN_HAS_CXX11_NOEXCEPT
734 # define EIGEN_INCLUDE_TYPE_TRAITS
735 # define EIGEN_NOEXCEPT noexcept
736 # define EIGEN_NOEXCEPT_IF(x) noexcept(x)
737 # define EIGEN_NO_THROW noexcept(true)
738 # define EIGEN_EXCEPTION_SPEC(X) noexcept(false)
740 # define EIGEN_NOEXCEPT
741 # define EIGEN_NOEXCEPT_IF(x)
742 # define EIGEN_NO_THROW throw()
743 # define EIGEN_EXCEPTION_SPEC(X) throw(X)
746 // end #include "src/Core/util/Macros.h"
747 #if EIGEN_COMP_MINGW && EIGEN_GNUC_AT_LEAST(4,6)
748 #pragma GCC optimize ("-fno-ipa-cp-clone")
751 #if EIGEN_MAX_ALIGN_BYTES==0
752 #ifndef EIGEN_DONT_VECTORIZE
753 #define EIGEN_DONT_VECTORIZE
758 #if (EIGEN_COMP_MSVC >= 1500)
759 #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || EIGEN_ARCH_x86_64
760 #define EIGEN_SSE2_ON_MSVC_2008_OR_LATER
764 #if (defined __SSE2__) && ( (!EIGEN_COMP_GNUC) || EIGEN_COMP_ICC || EIGEN_GNUC_AT_LEAST(4,2) )
765 #define EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC
768 #ifndef EIGEN_DONT_VECTORIZE
769 #if defined (EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)
770 #define EIGEN_VECTORIZE
771 #define EIGEN_VECTORIZE_SSE
772 #define EIGEN_VECTORIZE_SSE2
774 #define EIGEN_VECTORIZE_SSE3
777 #define EIGEN_VECTORIZE_SSSE3
780 #define EIGEN_VECTORIZE_SSE4_1
783 #define EIGEN_VECTORIZE_SSE4_2
786 #define EIGEN_VECTORIZE_AVX
787 #define EIGEN_VECTORIZE_SSE3
788 #define EIGEN_VECTORIZE_SSSE3
789 #define EIGEN_VECTORIZE_SSE4_1
790 #define EIGEN_VECTORIZE_SSE4_2
793 #define EIGEN_VECTORIZE_AVX2
796 #define EIGEN_VECTORIZE_FMA
798 #if defined(__AVX512F__) && defined(EIGEN_ENABLE_AVX512)
799 #define EIGEN_VECTORIZE_AVX512
800 #define EIGEN_VECTORIZE_AVX2
801 #define EIGEN_VECTORIZE_AVX
802 #define EIGEN_VECTORIZE_FMA
804 #define EIGEN_VECTORIZE_AVX512DQ
808 #if EIGEN_COMP_ICC >= 1110
809 #include <immintrin.h>
811 #include <mmintrin.h>
812 #include <emmintrin.h>
813 #include <xmmintrin.h>
814 #ifdef EIGEN_VECTORIZE_SSE3
815 #include <pmmintrin.h>
817 #ifdef EIGEN_VECTORIZE_SSSE3
818 #include <tmmintrin.h>
820 #ifdef EIGEN_VECTORIZE_SSE4_1
821 #include <smmintrin.h>
823 #ifdef EIGEN_VECTORIZE_SSE4_2
824 #include <nmmintrin.h>
826 #if defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_AVX512)
827 #include <immintrin.h>
831 #elif defined __VSX__
832 #define EIGEN_VECTORIZE
833 #define EIGEN_VECTORIZE_VSX
838 #elif defined __ALTIVEC__
839 #define EIGEN_VECTORIZE
840 #define EIGEN_VECTORIZE_ALTIVEC
845 #elif (defined __ARM_NEON) || (defined __ARM_NEON__)
846 #define EIGEN_VECTORIZE
847 #define EIGEN_VECTORIZE_NEON
848 #include <arm_neon.h>
849 #elif (defined __s390x__ && defined __VEC__)
850 #define EIGEN_VECTORIZE
851 #define EIGEN_VECTORIZE_ZVECTOR
852 #include <vecintrin.h>
855 #if defined(__F16C__) && !defined(EIGEN_COMP_CLANG)
856 #define EIGEN_HAS_FP16_C
858 #if defined __CUDACC__
859 #define EIGEN_VECTORIZE_CUDA
860 #include <vector_types.h>
861 #if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
862 #define EIGEN_HAS_CUDA_FP16
865 #if defined EIGEN_HAS_CUDA_FP16
866 #include <host_defines.h>
867 #include <cuda_fp16.h>
869 #if (defined _OPENMP) && (!defined EIGEN_DONT_PARALLELIZE)
870 #define EIGEN_HAS_OPENMP
872 #ifdef EIGEN_HAS_OPENMP
875 #if !(EIGEN_COMP_MSVC && EIGEN_OS_WINCE) && !EIGEN_COMP_ARM
876 #define EIGEN_HAS_ERRNO
878 #ifdef EIGEN_HAS_ERRNO
885 #include <functional>
892 #ifdef EIGEN_INCLUDE_TYPE_TRAITS
893 #include <type_traits>
895 #ifdef EIGEN_DEBUG_ASSIGN
898 #if EIGEN_COMP_MSVC && EIGEN_ARCH_i386_OR_x86_64 && !EIGEN_OS_WINCE
902 inline static const char *SimdInstructionSetsInUse(void) {
903 #if defined(EIGEN_VECTORIZE_AVX512)
904 return "AVX512, FMA, AVX2, AVX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
905 #elif defined(EIGEN_VECTORIZE_AVX)
906 return "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
907 #elif defined(EIGEN_VECTORIZE_SSE4_2)
908 return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
909 #elif defined(EIGEN_VECTORIZE_SSE4_1)
910 return "SSE, SSE2, SSE3, SSSE3, SSE4.1";
911 #elif defined(EIGEN_VECTORIZE_SSSE3)
912 return "SSE, SSE2, SSE3, SSSE3";
913 #elif defined(EIGEN_VECTORIZE_SSE3)
914 return "SSE, SSE2, SSE3";
915 #elif defined(EIGEN_VECTORIZE_SSE2)
917 #elif defined(EIGEN_VECTORIZE_ALTIVEC)
919 #elif defined(EIGEN_VECTORIZE_VSX)
921 #elif defined(EIGEN_VECTORIZE_NEON)
923 #elif defined(EIGEN_VECTORIZE_ZVECTOR)
924 return "S390X ZVECTOR";
930 #if defined EIGEN2_SUPPORT_STAGE40_FULL_EIGEN3_STRICTNESS || defined EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API || defined EIGEN2_SUPPORT_STAGE20_RESOLVE_API_CONFLICTS || defined EIGEN2_SUPPORT_STAGE10_FULL_EIGEN2_API || defined EIGEN2_SUPPORT
931 #error Eigen2-support is only available up to version 3.2. Please go to "http://eigen.tuxfamily.org/index.php?title=Eigen2" for further information
935 using std::ptrdiff_t;
937 // #include "src/Core/util/Constants.h"
938 #ifndef EIGEN_CONSTANTS_H
939 #define EIGEN_CONSTANTS_H
941 const int Dynamic = -1;
942 const int DynamicIndex = 0xffffff;
943 const int Infinity = -1;
944 const int HugeCost = 10000;
945 const unsigned int RowMajorBit = 0x1;
946 const unsigned int EvalBeforeNestingBit = 0x2;
948 const unsigned int EvalBeforeAssigningBit = 0x4;
949 const unsigned int PacketAccessBit = 0x8;
950 #ifdef EIGEN_VECTORIZE
951 const unsigned int ActualPacketAccessBit = PacketAccessBit;
953 const unsigned int ActualPacketAccessBit = 0x0;
955 const unsigned int LinearAccessBit = 0x10;
956 const unsigned int LvalueBit = 0x20;
957 const unsigned int DirectAccessBit = 0x40;
958 EIGEN_DEPRECATED const unsigned int AlignedBit = 0x80;
959 const unsigned int NestByRefBit = 0x100;
960 const unsigned int NoPreferredStorageOrderBit = 0x200;
961 const unsigned int CompressedAccessBit = 0x400;
962 const unsigned int HereditaryBits = RowMajorBit
963 | EvalBeforeNestingBit;
969 UnitLower=UnitDiag|Lower,
970 UnitUpper=UnitDiag|Upper,
971 StrictlyLower=ZeroDiag|Lower,
972 StrictlyUpper=ZeroDiag|Upper,
985 #if EIGEN_MAX_ALIGN_BYTES==128
986 AlignedMax = Aligned128
987 #elif EIGEN_MAX_ALIGN_BYTES==64
988 AlignedMax = Aligned64
989 #elif EIGEN_MAX_ALIGN_BYTES==32
990 AlignedMax = Aligned32
991 #elif EIGEN_MAX_ALIGN_BYTES==16
992 AlignedMax = Aligned16
993 #elif EIGEN_MAX_ALIGN_BYTES==8
994 AlignedMax = Aligned8
995 #elif EIGEN_MAX_ALIGN_BYTES==0
996 AlignedMax = Unaligned
998 #error Invalid value for EIGEN_MAX_ALIGN_BYTES
1001 enum CornerType { TopLeft, TopRight, BottomLeft, BottomRight };
1002 enum DirectionType {
1007 enum TraversalType {
1010 InnerVectorizedTraversal,
1011 LinearVectorizedTraversal,
1012 SliceVectorizedTraversal,
1016 enum UnrollingType {
1021 enum SpecializedType {
1025 enum StorageOptions {
1035 enum NoChange_t { NoChange };
1036 enum Sequential_t { Sequential };
1037 enum Default_t { Default };
1038 enum AmbiVectorMode {
1042 enum AccessorLevels {
1046 DirectWriteAccessors
1048 enum DecompositionOptions {
1051 ComputeFullU = 0x04,
1052 ComputeThinU = 0x08,
1053 ComputeFullV = 0x10,
1054 ComputeThinV = 0x20,
1055 EigenvaluesOnly = 0x40,
1056 ComputeEigenvectors = 0x80,
1057 EigVecMask = EigenvaluesOnly | ComputeEigenvectors,
1061 GenEigMask = Ax_lBx | ABx_lx | BAx_lx
1063 enum QRPreconditioners {
1065 HouseholderQRPreconditioner,
1066 ColPivHouseholderQRPreconditioner,
1067 FullPivHouseholderQRPreconditioner
1070 #error The preprocessor symbol 'Success' is defined, possibly by the X11 header file X.h
1072 enum ComputationInfo {
1078 enum TransformTraits {
1081 AffineCompact = 0x10 | Affine,
1084 namespace Architecture
1092 #if defined EIGEN_VECTORIZE_SSE
1094 #elif defined EIGEN_VECTORIZE_ALTIVEC
1096 #elif defined EIGEN_VECTORIZE_VSX
1098 #elif defined EIGEN_VECTORIZE_NEON
1105 enum ProductImplType
1106 { DefaultProduct=0, LazyProduct, AliasFreeProduct, CoeffBasedProductMode, LazyCoeffBasedProductMode, OuterProduct, InnerProduct, GemvProduct, GemmProduct };
1107 enum Action {GetAction, SetAction};
1110 struct SolverStorage {};
1111 struct PermutationStorage {};
1112 struct TranspositionsStorage {};
1113 struct MatrixXpr {};
1115 struct DenseShape { static std::string debugName() { return "DenseShape"; } };
1116 struct SolverShape { static std::string debugName() { return "SolverShape"; } };
1117 struct HomogeneousShape { static std::string debugName() { return "HomogeneousShape"; } };
1118 struct DiagonalShape { static std::string debugName() { return "DiagonalShape"; } };
1119 struct BandShape { static std::string debugName() { return "BandShape"; } };
1120 struct TriangularShape { static std::string debugName() { return "TriangularShape"; } };
1121 struct SelfAdjointShape { static std::string debugName() { return "SelfAdjointShape"; } };
1122 struct PermutationShape { static std::string debugName() { return "PermutationShape"; } };
1123 struct TranspositionsShape { static std::string debugName() { return "TranspositionsShape"; } };
1124 struct SparseShape { static std::string debugName() { return "SparseShape"; } };
1125 namespace internal {
1126 struct IndexBased {};
1127 struct IteratorBased {};
1128 enum ComparisonName {
1140 // end #include "src/Core/util/Constants.h"
1141 // #include "src/Core/util/Meta.h"
1142 #ifndef EIGEN_META_H
1143 #define EIGEN_META_H
1144 #if defined(__CUDA_ARCH__)
1146 #include <math_constants.h>
1148 #if EIGEN_COMP_ICC>=1600 && __cplusplus >= 201103L
1152 typedef EIGEN_DEFAULT_DENSE_INDEX_TYPE DenseIndex;
1153 typedef EIGEN_DEFAULT_DENSE_INDEX_TYPE Index;
1154 namespace internal {
1155 #if EIGEN_COMP_ICC>=1600 && __cplusplus >= 201103L
1156 typedef std::intptr_t IntPtr;
1157 typedef std::uintptr_t UIntPtr;
1159 typedef std::ptrdiff_t IntPtr;
1160 typedef std::size_t UIntPtr;
1162 struct true_type { enum { value = 1 }; };
1163 struct false_type { enum { value = 0 }; };
1164 template<bool Condition, typename Then, typename Else>
1165 struct conditional { typedef Then type; };
1166 template<typename Then, typename Else>
1167 struct conditional <false, Then, Else> { typedef Else type; };
1168 template<typename T, typename U> struct is_same { enum { value = 0 }; };
1169 template<typename T> struct is_same<T,T> { enum { value = 1 }; };
1170 template<typename T> struct remove_reference { typedef T type; };
1171 template<typename T> struct remove_reference<T&> { typedef T type; };
1172 template<typename T> struct remove_pointer { typedef T type; };
1173 template<typename T> struct remove_pointer<T*> { typedef T type; };
1174 template<typename T> struct remove_pointer<T*const> { typedef T type; };
1175 template <class T> struct remove_const { typedef T type; };
1176 template <class T> struct remove_const<const T> { typedef T type; };
1177 template <class T> struct remove_const<const T[]> { typedef T type[]; };
1178 template <class T, unsigned int Size> struct remove_const<const T[Size]> { typedef T type[Size]; };
1179 template<typename T> struct remove_all { typedef T type; };
1180 template<typename T> struct remove_all<const T> { typedef typename remove_all<T>::type type; };
1181 template<typename T> struct remove_all<T const&> { typedef typename remove_all<T>::type type; };
1182 template<typename T> struct remove_all<T&> { typedef typename remove_all<T>::type type; };
1183 template<typename T> struct remove_all<T const*> { typedef typename remove_all<T>::type type; };
1184 template<typename T> struct remove_all<T*> { typedef typename remove_all<T>::type type; };
1185 template<typename T> struct is_arithmetic { enum { value = false }; };
1186 template<> struct is_arithmetic<float> { enum { value = true }; };
1187 template<> struct is_arithmetic<double> { enum { value = true }; };
1188 template<> struct is_arithmetic<long double> { enum { value = true }; };
1189 template<> struct is_arithmetic<bool> { enum { value = true }; };
1190 template<> struct is_arithmetic<char> { enum { value = true }; };
1191 template<> struct is_arithmetic<signed char> { enum { value = true }; };
1192 template<> struct is_arithmetic<unsigned char> { enum { value = true }; };
1193 template<> struct is_arithmetic<signed short> { enum { value = true }; };
1194 template<> struct is_arithmetic<unsigned short>{ enum { value = true }; };
1195 template<> struct is_arithmetic<signed int> { enum { value = true }; };
1196 template<> struct is_arithmetic<unsigned int> { enum { value = true }; };
1197 template<> struct is_arithmetic<signed long> { enum { value = true }; };
1198 template<> struct is_arithmetic<unsigned long> { enum { value = true }; };
1199 template<typename T> struct is_integral { enum { value = false }; };
1200 template<> struct is_integral<bool> { enum { value = true }; };
1201 template<> struct is_integral<char> { enum { value = true }; };
1202 template<> struct is_integral<signed char> { enum { value = true }; };
1203 template<> struct is_integral<unsigned char> { enum { value = true }; };
1204 template<> struct is_integral<signed short> { enum { value = true }; };
1205 template<> struct is_integral<unsigned short> { enum { value = true }; };
1206 template<> struct is_integral<signed int> { enum { value = true }; };
1207 template<> struct is_integral<unsigned int> { enum { value = true }; };
1208 template<> struct is_integral<signed long> { enum { value = true }; };
1209 template<> struct is_integral<unsigned long> { enum { value = true }; };
1210 template <typename T> struct add_const { typedef const T type; };
1211 template <typename T> struct add_const<T&> { typedef T& type; };
1212 template <typename T> struct is_const { enum { value = 0 }; };
1213 template <typename T> struct is_const<T const> { enum { value = 1 }; };
1214 template<typename T> struct add_const_on_value_type { typedef const T type; };
1215 template<typename T> struct add_const_on_value_type<T&> { typedef T const& type; };
1216 template<typename T> struct add_const_on_value_type<T*> { typedef T const* type; };
1217 template<typename T> struct add_const_on_value_type<T* const> { typedef T const* const type; };
1218 template<typename T> struct add_const_on_value_type<T const* const> { typedef T const* const type; };
1219 template<typename From, typename To>
1220 struct is_convertible_impl
1223 struct any_conversion
1225 template <typename T> any_conversion(const volatile T&);
1226 template <typename T> any_conversion(T&);
1228 struct yes {int a[1];};
1229 struct no {int a[2];};
1230 static yes test(const To&, int);
1231 static no test(any_conversion, ...);
1233 static From ms_from;
1234 #ifdef __INTEL_COMPILER
1235 #pragma warning push
1236 #pragma warning ( disable : 2259 )
1238 enum { value = sizeof(test(ms_from, 0))==sizeof(yes) };
1239 #ifdef __INTEL_COMPILER
1243 template<typename From, typename To>
1244 struct is_convertible
1246 enum { value = is_convertible_impl<typename remove_all<From>::type,
1247 typename remove_all<To >::type>::value };
1249 template<bool Condition, typename T=void> struct enable_if;
1250 template<typename T> struct enable_if<true,T>
1251 { typedef T type; };
1252 #if defined(__CUDA_ARCH__)
1253 #if !defined(__FLT_EPSILON__)
1254 #define __FLT_EPSILON__ FLT_EPSILON
1255 #define __DBL_EPSILON__ DBL_EPSILON
1258 template<typename T> struct numeric_limits
1261 static T epsilon() { return 0; }
1262 static T (max)() { assert(false && "Highest not supported for this type"); }
1263 static T (min)() { assert(false && "Lowest not supported for this type"); }
1264 static T infinity() { assert(false && "Infinity not supported for this type"); }
1265 static T quiet_NaN() { assert(false && "quiet_NaN not supported for this type"); }
1267 template<> struct numeric_limits<float>
1270 static float epsilon() { return __FLT_EPSILON__; }
1272 static float (max)() { return CUDART_MAX_NORMAL_F; }
1274 static float (min)() { return FLT_MIN; }
1276 static float infinity() { return CUDART_INF_F; }
1278 static float quiet_NaN() { return CUDART_NAN_F; }
1280 template<> struct numeric_limits<double>
1283 static double epsilon() { return __DBL_EPSILON__; }
1285 static double (max)() { return DBL_MAX; }
1287 static double (min)() { return DBL_MIN; }
1289 static double infinity() { return CUDART_INF; }
1291 static double quiet_NaN() { return CUDART_NAN; }
1293 template<> struct numeric_limits<int>
1296 static int epsilon() { return 0; }
1298 static int (max)() { return INT_MAX; }
1300 static int (min)() { return INT_MIN; }
1302 template<> struct numeric_limits<unsigned int>
1305 static unsigned int epsilon() { return 0; }
1307 static unsigned int (max)() { return UINT_MAX; }
1309 static unsigned int (min)() { return 0; }
1311 template<> struct numeric_limits<long>
1314 static long epsilon() { return 0; }
1316 static long (max)() { return LONG_MAX; }
1318 static long (min)() { return LONG_MIN; }
1320 template<> struct numeric_limits<unsigned long>
1323 static unsigned long epsilon() { return 0; }
1325 static unsigned long (max)() { return ULONG_MAX; }
1327 static unsigned long (min)() { return 0; }
1329 template<> struct numeric_limits<long long>
1332 static long long epsilon() { return 0; }
1334 static long long (max)() { return LLONG_MAX; }
1336 static long long (min)() { return LLONG_MIN; }
1338 template<> struct numeric_limits<unsigned long long>
1341 static unsigned long long epsilon() { return 0; }
1343 static unsigned long long (max)() { return ULLONG_MAX; }
1345 static unsigned long long (min)() { return 0; }
1351 EIGEN_DEVICE_FUNC noncopyable(const noncopyable&);
1352 EIGEN_DEVICE_FUNC const noncopyable& operator=(const noncopyable&);
1354 EIGEN_DEVICE_FUNC noncopyable() {}
1355 EIGEN_DEVICE_FUNC ~noncopyable() {}
1357 #if EIGEN_HAS_STD_RESULT_OF
1358 template<typename T> struct result_of {
1359 typedef typename std::result_of<T>::type type1;
1360 typedef typename remove_all<type1>::type type;
1363 template<typename T> struct result_of { };
1364 struct has_none {int a[1];};
1365 struct has_std_result_type {int a[2];};
1366 struct has_tr1_result {int a[3];};
1367 template<typename Func, typename ArgType, int SizeOf=sizeof(has_none)>
1368 struct unary_result_of_select {typedef typename internal::remove_all<ArgType>::type type;};
1369 template<typename Func, typename ArgType>
1370 struct unary_result_of_select<Func, ArgType, sizeof(has_std_result_type)> {typedef typename Func::result_type type;};
1371 template<typename Func, typename ArgType>
1372 struct unary_result_of_select<Func, ArgType, sizeof(has_tr1_result)> {typedef typename Func::template result<Func(ArgType)>::type type;};
1373 template<typename Func, typename ArgType>
1374 struct result_of<Func(ArgType)> {
1375 template<typename T>
1376 static has_std_result_type testFunctor(T const *, typename T::result_type const * = 0);
1377 template<typename T>
1378 static has_tr1_result testFunctor(T const *, typename T::template result<T(ArgType)>::type const * = 0);
1379 static has_none testFunctor(...);
1380 enum {FunctorType = sizeof(testFunctor(static_cast<Func*>(0)))};
1381 typedef typename unary_result_of_select<Func, ArgType, FunctorType>::type type;
1383 template<typename Func, typename ArgType0, typename ArgType1, int SizeOf=sizeof(has_none)>
1384 struct binary_result_of_select {typedef typename internal::remove_all<ArgType0>::type type;};
1385 template<typename Func, typename ArgType0, typename ArgType1>
1386 struct binary_result_of_select<Func, ArgType0, ArgType1, sizeof(has_std_result_type)>
1387 {typedef typename Func::result_type type;};
1388 template<typename Func, typename ArgType0, typename ArgType1>
1389 struct binary_result_of_select<Func, ArgType0, ArgType1, sizeof(has_tr1_result)>
1390 {typedef typename Func::template result<Func(ArgType0,ArgType1)>::type type;};
1391 template<typename Func, typename ArgType0, typename ArgType1>
1392 struct result_of<Func(ArgType0,ArgType1)> {
1393 template<typename T>
1394 static has_std_result_type testFunctor(T const *, typename T::result_type const * = 0);
1395 template<typename T>
1396 static has_tr1_result testFunctor(T const *, typename T::template result<T(ArgType0,ArgType1)>::type const * = 0);
1397 static has_none testFunctor(...);
1398 enum {FunctorType = sizeof(testFunctor(static_cast<Func*>(0)))};
1399 typedef typename binary_result_of_select<Func, ArgType0, ArgType1, FunctorType>::type type;
1401 template<typename Func, typename ArgType0, typename ArgType1, typename ArgType2, int SizeOf=sizeof(has_none)>
1402 struct ternary_result_of_select {typedef typename internal::remove_all<ArgType0>::type type;};
1403 template<typename Func, typename ArgType0, typename ArgType1, typename ArgType2>
1404 struct ternary_result_of_select<Func, ArgType0, ArgType1, ArgType2, sizeof(has_std_result_type)>
1405 {typedef typename Func::result_type type;};
1406 template<typename Func, typename ArgType0, typename ArgType1, typename ArgType2>
1407 struct ternary_result_of_select<Func, ArgType0, ArgType1, ArgType2, sizeof(has_tr1_result)>
1408 {typedef typename Func::template result<Func(ArgType0,ArgType1,ArgType2)>::type type;};
1409 template<typename Func, typename ArgType0, typename ArgType1, typename ArgType2>
1410 struct result_of<Func(ArgType0,ArgType1,ArgType2)> {
1411 template<typename T>
1412 static has_std_result_type testFunctor(T const *, typename T::result_type const * = 0);
1413 template<typename T>
1414 static has_tr1_result testFunctor(T const *, typename T::template result<T(ArgType0,ArgType1,ArgType2)>::type const * = 0);
1415 static has_none testFunctor(...);
1416 enum {FunctorType = sizeof(testFunctor(static_cast<Func*>(0)))};
1417 typedef typename ternary_result_of_select<Func, ArgType0, ArgType1, ArgType2, FunctorType>::type type;
1420 struct meta_yes { char a[1]; };
1421 struct meta_no { char a[2]; };
1422 template <typename T>
1423 struct has_ReturnType
1425 template <typename C> static meta_yes testFunctor(typename C::ReturnType const *);
1426 template <typename C> static meta_no testFunctor(...);
1427 enum { value = sizeof(testFunctor<T>(0)) == sizeof(meta_yes) };
1429 template<typename T> const T* return_ptr();
1430 template <typename T, typename IndexType=Index>
1431 struct has_nullary_operator
1433 template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ptr<C>()->operator()())>0)>::type * = 0);
1434 static meta_no testFunctor(...);
1435 enum { value = sizeof(testFunctor(static_cast<T*>(0))) == sizeof(meta_yes) };
1437 template <typename T, typename IndexType=Index>
1438 struct has_unary_operator
1440 template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ptr<C>()->operator()(IndexType(0)))>0)>::type * = 0);
1441 static meta_no testFunctor(...);
1442 enum { value = sizeof(testFunctor(static_cast<T*>(0))) == sizeof(meta_yes) };
1444 template <typename T, typename IndexType=Index>
1445 struct has_binary_operator
1447 template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ptr<C>()->operator()(IndexType(0),IndexType(0)))>0)>::type * = 0);
1448 static meta_no testFunctor(...);
1449 enum { value = sizeof(testFunctor(static_cast<T*>(0))) == sizeof(meta_yes) };
1453 int SupX = ((Y==1) ? 1 : Y/2),
1454 bool Done = ((SupX-InfX)<=1 ? true : ((SupX*SupX <= Y) && ((SupX+1)*(SupX+1) > Y))) >
1458 MidX = (InfX+SupX)/2,
1459 TakeInf = MidX*MidX > Y ? 1 : 0,
1460 NewInf = int(TakeInf) ? InfX : int(MidX),
1461 NewSup = int(TakeInf) ? int(MidX) : SupX
1464 enum { ret = meta_sqrt<Y,NewInf,NewSup>::ret };
1466 template<int Y, int InfX, int SupX>
1467 class meta_sqrt<Y, InfX, SupX, true> { public: enum { ret = (SupX*SupX <= Y) ? SupX : InfX }; };
1468 template<int A, int B, int K=1, bool Done = ((A*K)%B)==0>
1469 struct meta_least_common_multiple
1471 enum { ret = meta_least_common_multiple<A,B,K+1>::ret };
1473 template<int A, int B, int K>
1474 struct meta_least_common_multiple<A,B,K,true>
1478 template<typename T, typename U> struct scalar_product_traits
1480 enum { Defined = 0 };
1484 #if defined(__CUDA_ARCH__)
1485 template<typename T> EIGEN_DEVICE_FUNC void swap(T &a, T &b) { T tmp = b; b = a; a = tmp; }
1487 template<typename T> EIGEN_STRONG_INLINE void swap(T &a, T &b) { std::swap(a,b); }
1489 #if defined(__CUDA_ARCH__)
1490 using internal::device::numeric_limits;
1492 using std::numeric_limits;
1494 template<typename T>
1495 T div_ceil(const T &a, const T &b)
1502 // end #include "src/Core/util/Meta.h"
1503 // #include "src/Core/util/ForwardDeclarations.h"
1504 #ifndef EIGEN_FORWARDDECLARATIONS_H
1505 #define EIGEN_FORWARDDECLARATIONS_H
1507 namespace internal {
1508 template<typename T> struct traits;
1509 template<typename T> struct traits<const T> : traits<T> {};
1510 template<typename Derived> struct has_direct_access
1512 enum { ret = (traits<Derived>::Flags & DirectAccessBit) ? 1 : 0 };
1514 template<typename Derived> struct accessors_level
1516 enum { has_direct_access = (traits<Derived>::Flags & DirectAccessBit) ? 1 : 0,
1517 has_write_access = (traits<Derived>::Flags & LvalueBit) ? 1 : 0,
1518 value = has_direct_access ? (has_write_access ? DirectWriteAccessors : DirectAccessors)
1519 : (has_write_access ? WriteAccessors : ReadOnlyAccessors)
1522 template<typename T> struct evaluator_traits;
1523 template< typename T> struct evaluator;
1525 template<typename T> struct NumTraits;
1526 template<typename Derived> struct EigenBase;
1527 template<typename Derived> class DenseBase;
1528 template<typename Derived> class PlainObjectBase;
1529 template<typename Derived,
1530 int Level = internal::accessors_level<Derived>::value >
1531 class DenseCoeffsBase;
1532 template<typename _Scalar, int _Rows, int _Cols,
1533 int _Options = AutoAlign |
1534 #if EIGEN_GNUC_AT(3,4)
1535 ( (_Rows==1 && _Cols!=1) ? Eigen::RowMajor
1536 : !(_Cols==1 && _Rows!=1) ? EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION
1537 : Eigen::ColMajor ),
1539 ( (_Rows==1 && _Cols!=1) ? Eigen::RowMajor
1540 : (_Cols==1 && _Rows!=1) ? Eigen::ColMajor
1541 : EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION ),
1543 int _MaxRows = _Rows,
1544 int _MaxCols = _Cols
1546 template<typename Derived> class MatrixBase;
1547 template<typename Derived> class ArrayBase;
1548 template<typename ExpressionType, unsigned int Added, unsigned int Removed> class Flagged;
1549 template<typename ExpressionType, template <typename> class StorageBase > class NoAlias;
1550 template<typename ExpressionType> class NestByValue;
1551 template<typename ExpressionType> class ForceAlignedAccess;
1552 template<typename ExpressionType> class SwapWrapper;
1553 template<typename XprType, int BlockRows=Dynamic, int BlockCols=Dynamic, bool InnerPanel = false> class Block;
1554 template<typename MatrixType, int Size=Dynamic> class VectorBlock;
1555 template<typename MatrixType> class Transpose;
1556 template<typename MatrixType> class Conjugate;
1557 template<typename NullaryOp, typename MatrixType> class CwiseNullaryOp;
1558 template<typename UnaryOp, typename MatrixType> class CwiseUnaryOp;
1559 template<typename ViewOp, typename MatrixType> class CwiseUnaryView;
1560 template<typename BinaryOp, typename Lhs, typename Rhs> class CwiseBinaryOp;
1561 template<typename TernaryOp, typename Arg1, typename Arg2, typename Arg3> class CwiseTernaryOp;
1562 template<typename Decomposition, typename Rhstype> class Solve;
1563 template<typename XprType> class Inverse;
1564 template<typename Lhs, typename Rhs, int Option = DefaultProduct> class Product;
1565 template<typename Derived> class DiagonalBase;
1566 template<typename _DiagonalVectorType> class DiagonalWrapper;
1567 template<typename _Scalar, int SizeAtCompileTime, int MaxSizeAtCompileTime=SizeAtCompileTime> class DiagonalMatrix;
1568 template<typename MatrixType, typename DiagonalType, int ProductOrder> class DiagonalProduct;
1569 template<typename MatrixType, int Index = 0> class Diagonal;
1570 template<int SizeAtCompileTime, int MaxSizeAtCompileTime = SizeAtCompileTime, typename IndexType=int> class PermutationMatrix;
1571 template<int SizeAtCompileTime, int MaxSizeAtCompileTime = SizeAtCompileTime, typename IndexType=int> class Transpositions;
1572 template<typename Derived> class PermutationBase;
1573 template<typename Derived> class TranspositionsBase;
1574 template<typename _IndicesType> class PermutationWrapper;
1575 template<typename _IndicesType> class TranspositionsWrapper;
1576 template<typename Derived,
1577 int Level = internal::accessors_level<Derived>::has_write_access ? WriteAccessors : ReadOnlyAccessors
1579 template<int InnerStrideAtCompileTime, int OuterStrideAtCompileTime> class Stride;
1580 template<int Value = Dynamic> class InnerStride;
1581 template<int Value = Dynamic> class OuterStride;
1582 template<typename MatrixType, int MapOptions=Unaligned, typename StrideType = Stride<0,0> > class Map;
1583 template<typename Derived> class RefBase;
1584 template<typename PlainObjectType, int Options = 0,
1585 typename StrideType = typename internal::conditional<PlainObjectType::IsVectorAtCompileTime,InnerStride<1>,OuterStride<> >::type > class Ref;
1586 template<typename Derived> class TriangularBase;
1587 template<typename MatrixType, unsigned int Mode> class TriangularView;
1588 template<typename MatrixType, unsigned int Mode> class SelfAdjointView;
1589 template<typename MatrixType> class SparseView;
1590 template<typename ExpressionType> class WithFormat;
1591 template<typename MatrixType> struct CommaInitializer;
1592 template<typename Derived> class ReturnByValue;
1593 template<typename ExpressionType> class ArrayWrapper;
1594 template<typename Derived> class SolverBase;
1595 template<typename XprType> class InnerIterator;
1596 namespace internal {
1597 template<typename DecompositionType> struct kernel_retval_base;
1598 template<typename DecompositionType> struct kernel_retval;
1599 template<typename DecompositionType> struct image_retval_base;
1600 template<typename DecompositionType> struct image_retval;
1602 namespace internal {
1603 template<typename _Scalar, int Rows=Dynamic, int Cols=Dynamic, int Supers=Dynamic, int Subs=Dynamic, int Options=0> class BandMatrix;
1605 namespace internal {
1606 template<typename Lhs, typename Rhs> struct product_type;
1607 template<bool> struct EnableIf;
1608 template< typename T,
1609 int ProductTag = internal::product_type<typename T::Lhs,typename T::Rhs>::ret,
1610 typename LhsShape = typename evaluator_traits<typename T::Lhs>::Shape,
1611 typename RhsShape = typename evaluator_traits<typename T::Rhs>::Shape,
1612 typename LhsScalar = typename traits<typename T::Lhs>::Scalar,
1613 typename RhsScalar = typename traits<typename T::Rhs>::Scalar
1614 > struct product_evaluator;
1616 template<typename Lhs, typename Rhs,
1617 int ProductType = internal::product_type<Lhs,Rhs>::value>
1618 struct ProductReturnType;
1619 template<typename Lhs, typename Rhs> struct LazyProductReturnType;
1620 namespace internal {
1621 template<typename LhsScalar, typename RhsScalar, bool ConjLhs=false, bool ConjRhs=false> struct conj_helper;
1622 template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_sum_op;
1623 template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_difference_op;
1624 template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_conj_product_op;
1625 template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_min_op;
1626 template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_max_op;
1627 template<typename Scalar> struct scalar_opposite_op;
1628 template<typename Scalar> struct scalar_conjugate_op;
1629 template<typename Scalar> struct scalar_real_op;
1630 template<typename Scalar> struct scalar_imag_op;
1631 template<typename Scalar> struct scalar_abs_op;
1632 template<typename Scalar> struct scalar_abs2_op;
1633 template<typename Scalar> struct scalar_sqrt_op;
1634 template<typename Scalar> struct scalar_rsqrt_op;
1635 template<typename Scalar> struct scalar_exp_op;
1636 template<typename Scalar> struct scalar_log_op;
1637 template<typename Scalar> struct scalar_cos_op;
1638 template<typename Scalar> struct scalar_sin_op;
1639 template<typename Scalar> struct scalar_acos_op;
1640 template<typename Scalar> struct scalar_asin_op;
1641 template<typename Scalar> struct scalar_tan_op;
1642 template<typename Scalar> struct scalar_inverse_op;
1643 template<typename Scalar> struct scalar_square_op;
1644 template<typename Scalar> struct scalar_cube_op;
1645 template<typename Scalar, typename NewType> struct scalar_cast_op;
1646 template<typename Scalar> struct scalar_random_op;
1647 template<typename Scalar> struct scalar_constant_op;
1648 template<typename Scalar> struct scalar_identity_op;
1649 template<typename Scalar,bool iscpx> struct scalar_sign_op;
1650 template<typename Scalar,typename ScalarExponent> struct scalar_pow_op;
1651 template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_hypot_op;
1652 template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_product_op;
1653 template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_quotient_op;
1654 template<typename Scalar> struct scalar_lgamma_op;
1655 template<typename Scalar> struct scalar_digamma_op;
1656 template<typename Scalar> struct scalar_erf_op;
1657 template<typename Scalar> struct scalar_erfc_op;
1658 template<typename Scalar> struct scalar_igamma_op;
1659 template<typename Scalar> struct scalar_igammac_op;
1660 template<typename Scalar> struct scalar_zeta_op;
1661 template<typename Scalar> struct scalar_betainc_op;
1664 template<typename _Scalar, int _Rows, int _Cols,
1665 int _Options = AutoAlign |
1666 #if EIGEN_GNUC_AT(3,4)
1667 ( (_Rows==1 && _Cols!=1) ? Eigen::RowMajor
1668 : !(_Cols==1 && _Rows!=1) ? EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION
1669 : Eigen::ColMajor ),
1671 ( (_Rows==1 && _Cols!=1) ? Eigen::RowMajor
1672 : (_Cols==1 && _Rows!=1) ? Eigen::ColMajor
1673 : EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION ),
1675 int _MaxRows = _Rows, int _MaxCols = _Cols> class Array;
1676 template<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType> class Select;
1677 template<typename MatrixType, typename BinaryOp, int Direction> class PartialReduxExpr;
1678 template<typename ExpressionType, int Direction> class VectorwiseOp;
1679 template<typename MatrixType,int RowFactor,int ColFactor> class Replicate;
1680 template<typename MatrixType, int Direction = BothDirections> class Reverse;
1681 template<typename MatrixType> class FullPivLU;
1682 template<typename MatrixType> class PartialPivLU;
1683 namespace internal {
1684 template<typename MatrixType> struct inverse_impl;
1686 template<typename MatrixType> class HouseholderQR;
1687 template<typename MatrixType> class ColPivHouseholderQR;
1688 template<typename MatrixType> class FullPivHouseholderQR;
1689 template<typename MatrixType> class CompleteOrthogonalDecomposition;
1690 template<typename MatrixType, int QRPreconditioner = ColPivHouseholderQRPreconditioner> class JacobiSVD;
1691 template<typename MatrixType> class BDCSVD;
1692 template<typename MatrixType, int UpLo = Lower> class LLT;
1693 template<typename MatrixType, int UpLo = Lower> class LDLT;
1694 template<typename VectorsType, typename CoeffsType, int Side=OnTheLeft> class HouseholderSequence;
1695 template<typename Scalar> class JacobiRotation;
1696 template<typename Derived, int _Dim> class RotationBase;
1697 template<typename Lhs, typename Rhs> class Cross;
1698 template<typename Derived> class QuaternionBase;
1699 template<typename Scalar> class Rotation2D;
1700 template<typename Scalar> class AngleAxis;
1701 template<typename Scalar,int Dim> class Translation;
1702 template<typename Scalar,int Dim> class AlignedBox;
1703 template<typename Scalar, int Options = AutoAlign> class Quaternion;
1704 template<typename Scalar,int Dim,int Mode,int _Options=AutoAlign> class Transform;
1705 template <typename _Scalar, int _AmbientDim, int Options=AutoAlign> class ParametrizedLine;
1706 template <typename _Scalar, int _AmbientDim, int Options=AutoAlign> class Hyperplane;
1707 template<typename Scalar> class UniformScaling;
1708 template<typename MatrixType,int Direction> class Homogeneous;
1709 template<typename Derived> class SparseMatrixBase;
1710 template<typename Derived> struct MatrixExponentialReturnValue;
1711 template<typename Derived> class MatrixFunctionReturnValue;
1712 template<typename Derived> class MatrixSquareRootReturnValue;
1713 template<typename Derived> class MatrixLogarithmReturnValue;
1714 template<typename Derived> class MatrixPowerReturnValue;
1715 template<typename Derived> class MatrixComplexPowerReturnValue;
1716 namespace internal {
1717 template <typename Scalar>
1718 struct stem_function
1720 typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar;
1721 typedef ComplexScalar type(ComplexScalar, int);
1726 // end #include "src/Core/util/ForwardDeclarations.h"
1727 // #include "src/Core/util/StaticAssert.h"
1728 #ifndef EIGEN_STATIC_ASSERT_H
1729 #define EIGEN_STATIC_ASSERT_H
1730 #ifndef EIGEN_NO_STATIC_ASSERT
1731 #if EIGEN_MAX_CPP_VER>=11 && (__has_feature(cxx_static_assert) || (defined(__cplusplus) && __cplusplus >= 201103L) || (EIGEN_COMP_MSVC >= 1600))
1732 #define EIGEN_STATIC_ASSERT(X,MSG) static_assert(X,#MSG);
1735 namespace internal {
1736 template<bool condition>
1737 struct static_assertion {};
1739 struct static_assertion<true>
1742 YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX,
1743 YOU_MIXED_VECTORS_OF_DIFFERENT_SIZES,
1744 YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES,
1745 THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE,
1746 THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE,
1747 THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE,
1748 OUT_OF_RANGE_ACCESS,
1749 YOU_MADE_A_PROGRAMMING_MISTAKE,
1750 EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT,
1751 EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE,
1752 YOU_CALLED_A_FIXED_SIZE_METHOD_ON_A_DYNAMIC_SIZE_MATRIX_OR_VECTOR,
1753 YOU_CALLED_A_DYNAMIC_SIZE_METHOD_ON_A_FIXED_SIZE_MATRIX_OR_VECTOR,
1754 UNALIGNED_LOAD_AND_STORE_OPERATIONS_UNIMPLEMENTED_ON_ALTIVEC,
1755 THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES,
1756 FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED,
1757 NUMERIC_TYPE_MUST_BE_REAL,
1758 COEFFICIENT_WRITE_ACCESS_TO_SELFADJOINT_NOT_SUPPORTED,
1759 WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED,
1760 THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE,
1761 INVALID_MATRIX_PRODUCT,
1762 INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS,
1763 INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION,
1764 YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY,
1765 THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES,
1766 THIS_METHOD_IS_ONLY_FOR_ROW_MAJOR_MATRICES,
1767 INVALID_MATRIX_TEMPLATE_PARAMETERS,
1768 INVALID_MATRIXBASE_TEMPLATE_PARAMETERS,
1769 BOTH_MATRICES_MUST_HAVE_THE_SAME_STORAGE_ORDER,
1770 THIS_METHOD_IS_ONLY_FOR_DIAGONAL_MATRIX,
1771 THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE,
1772 THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES,
1773 YOU_ALREADY_SPECIFIED_THIS_STRIDE,
1774 INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION,
1775 THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD,
1776 PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1,
1777 THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS,
1778 YOU_CANNOT_MIX_ARRAYS_AND_MATRICES,
1779 YOU_PERFORMED_AN_INVALID_TRANSFORMATION_CONVERSION,
1780 THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY,
1781 YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT,
1782 THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS,
1783 THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS,
1784 THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL,
1785 THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES,
1786 YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED,
1787 YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED,
1788 THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE,
1789 THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH,
1790 OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG,
1791 IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY,
1792 STORAGE_LAYOUT_DOES_NOT_MATCH,
1793 EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE,
1794 THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS,
1795 MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY,
1796 THIS_TYPE_IS_NOT_SUPPORTED,
1797 STORAGE_KIND_MUST_MATCH,
1798 STORAGE_INDEX_MUST_MATCH,
1799 CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY
1805 #define EIGEN_STATIC_ASSERT(CONDITION,MSG) \
1806 {Eigen::internal::static_assertion<bool(CONDITION)>::MSG;}
1808 #define EIGEN_STATIC_ASSERT(CONDITION,MSG) \
1809 if (Eigen::internal::static_assertion<static_cast<bool>(CONDITION)>::MSG) {}
1813 #define EIGEN_STATIC_ASSERT(CONDITION,MSG) eigen_assert((CONDITION) && #MSG);
1815 #define EIGEN_STATIC_ASSERT_VECTOR_ONLY(TYPE) \
1816 EIGEN_STATIC_ASSERT(TYPE::IsVectorAtCompileTime, \
1817 YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX)
1818 #define EIGEN_STATIC_ASSERT_FIXED_SIZE(TYPE) \
1819 EIGEN_STATIC_ASSERT(TYPE::SizeAtCompileTime!=Eigen::Dynamic, \
1820 YOU_CALLED_A_FIXED_SIZE_METHOD_ON_A_DYNAMIC_SIZE_MATRIX_OR_VECTOR)
1821 #define EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(TYPE) \
1822 EIGEN_STATIC_ASSERT(TYPE::SizeAtCompileTime==Eigen::Dynamic, \
1823 YOU_CALLED_A_DYNAMIC_SIZE_METHOD_ON_A_FIXED_SIZE_MATRIX_OR_VECTOR)
1824 #define EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(TYPE, SIZE) \
1825 EIGEN_STATIC_ASSERT(TYPE::IsVectorAtCompileTime && TYPE::SizeAtCompileTime==SIZE, \
1826 THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE)
1827 #define EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(TYPE, ROWS, COLS) \
1828 EIGEN_STATIC_ASSERT(TYPE::RowsAtCompileTime==ROWS && TYPE::ColsAtCompileTime==COLS, \
1829 THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE)
1830 #define EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(TYPE0,TYPE1) \
1831 EIGEN_STATIC_ASSERT( \
1832 (int(TYPE0::SizeAtCompileTime)==Eigen::Dynamic \
1833 || int(TYPE1::SizeAtCompileTime)==Eigen::Dynamic \
1834 || int(TYPE0::SizeAtCompileTime)==int(TYPE1::SizeAtCompileTime)),\
1835 YOU_MIXED_VECTORS_OF_DIFFERENT_SIZES)
1836 #define EIGEN_PREDICATE_SAME_MATRIX_SIZE(TYPE0,TYPE1) \
1838 (int(Eigen::internal::size_of_xpr_at_compile_time<TYPE0>::ret)==0 && int(Eigen::internal::size_of_xpr_at_compile_time<TYPE1>::ret)==0) \
1840 (int(TYPE0::RowsAtCompileTime)==Eigen::Dynamic \
1841 || int(TYPE1::RowsAtCompileTime)==Eigen::Dynamic \
1842 || int(TYPE0::RowsAtCompileTime)==int(TYPE1::RowsAtCompileTime)) \
1843 && (int(TYPE0::ColsAtCompileTime)==Eigen::Dynamic \
1844 || int(TYPE1::ColsAtCompileTime)==Eigen::Dynamic \
1845 || int(TYPE0::ColsAtCompileTime)==int(TYPE1::ColsAtCompileTime))\
1848 #define EIGEN_STATIC_ASSERT_NON_INTEGER(TYPE) \
1849 EIGEN_STATIC_ASSERT(!NumTraits<TYPE>::IsInteger, THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES)
1850 #define EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(TYPE0,TYPE1) \
1851 EIGEN_STATIC_ASSERT( \
1852 EIGEN_PREDICATE_SAME_MATRIX_SIZE(TYPE0,TYPE1),\
1853 YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES)
1854 #define EIGEN_STATIC_ASSERT_SIZE_1x1(TYPE) \
1855 EIGEN_STATIC_ASSERT((TYPE::RowsAtCompileTime == 1 || TYPE::RowsAtCompileTime == Dynamic) && \
1856 (TYPE::ColsAtCompileTime == 1 || TYPE::ColsAtCompileTime == Dynamic), \
1857 THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS)
1858 #define EIGEN_STATIC_ASSERT_LVALUE(Derived) \
1859 EIGEN_STATIC_ASSERT(Eigen::internal::is_lvalue<Derived>::value, \
1860 THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY)
1861 #define EIGEN_STATIC_ASSERT_ARRAYXPR(Derived) \
1862 EIGEN_STATIC_ASSERT((Eigen::internal::is_same<typename Eigen::internal::traits<Derived>::XprKind, ArrayXpr>::value), \
1863 THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES)
1864 #define EIGEN_STATIC_ASSERT_SAME_XPR_KIND(Derived1, Derived2) \
1865 EIGEN_STATIC_ASSERT((Eigen::internal::is_same<typename Eigen::internal::traits<Derived1>::XprKind, \
1866 typename Eigen::internal::traits<Derived2>::XprKind \
1868 YOU_CANNOT_MIX_ARRAYS_AND_MATRICES)
1869 #define EIGEN_INTERNAL_CHECK_COST_VALUE(C) \
1870 EIGEN_STATIC_ASSERT((C)>=0 && (C)<=HugeCost*HugeCost, EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE);
1872 // end #include "src/Core/util/StaticAssert.h"
1873 // #include "src/Core/util/XprHelper.h"
1874 #ifndef EIGEN_XPRHELPER_H
1875 #define EIGEN_XPRHELPER_H
1876 #if EIGEN_COMP_GNUC && !EIGEN_GNUC_AT(4,3)
1877 #define EIGEN_EMPTY_STRUCT_CTOR(X) \
1878 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE X() {} \
1879 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE X(const X& ) {}
1881 #define EIGEN_EMPTY_STRUCT_CTOR(X)
1884 namespace internal {
1885 template<typename IndexDest, typename IndexSrc>
1887 inline IndexDest convert_index(const IndexSrc& idx) {
1888 eigen_internal_assert(idx <= NumTraits<IndexDest>::highest() && "Index value to big for target type");
1889 return IndexDest(idx);
1891 template<typename ExprScalar,typename T, bool IsSupported>
1892 struct promote_scalar_arg;
1893 template<typename S,typename T>
1894 struct promote_scalar_arg<S,T,true>
1898 template<typename ExprScalar,typename T,typename PromotedType,
1899 bool ConvertibleToLiteral = internal::is_convertible<T,PromotedType>::value,
1900 bool IsSafe = NumTraits<T>::IsInteger || !NumTraits<PromotedType>::IsInteger>
1901 struct promote_scalar_arg_unsupported;
1902 template<typename S,typename T>
1903 struct promote_scalar_arg<S,T,false> : promote_scalar_arg_unsupported<S,T,typename NumTraits<S>::Literal> {};
1904 template<typename S,typename T, typename PromotedType>
1905 struct promote_scalar_arg_unsupported<S,T,PromotedType,true,true>
1907 typedef PromotedType type;
1909 template<typename ExprScalar,typename T, typename PromotedType>
1910 struct promote_scalar_arg_unsupported<ExprScalar,T,PromotedType,false,true>
1911 : promote_scalar_arg_unsupported<ExprScalar,T,ExprScalar>
1913 template<typename S,typename T, typename PromotedType, bool ConvertibleToLiteral>
1914 struct promote_scalar_arg_unsupported<S,T,PromotedType,ConvertibleToLiteral,false> {};
1915 template<typename S,typename T>
1916 struct promote_scalar_arg_unsupported<S,T,S,false,true> {};
1917 class no_assignment_operator
1920 no_assignment_operator& operator=(const no_assignment_operator&);
1922 template<typename I1, typename I2>
1923 struct promote_index_type
1925 typedef typename conditional<(sizeof(I1)<sizeof(I2)), I2, I1>::type type;
1927 template<typename T, int Value> class variable_if_dynamic
1930 EIGEN_EMPTY_STRUCT_CTOR(variable_if_dynamic)
1931 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamic(T v) { EIGEN_ONLY_USED_FOR_DEBUG(v); eigen_assert(v == T(Value)); }
1932 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T value() { return T(Value); }
1933 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void setValue(T) {}
1935 template<typename T> class variable_if_dynamic<T, Dynamic>
1938 EIGEN_DEVICE_FUNC variable_if_dynamic() { eigen_assert(false); }
1940 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamic(T value) : m_value(value) {}
1941 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T value() const { return m_value; }
1942 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void setValue(T value) { m_value = value; }
1944 template<typename T, int Value> class variable_if_dynamicindex
1947 EIGEN_EMPTY_STRUCT_CTOR(variable_if_dynamicindex)
1948 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamicindex(T v) { EIGEN_ONLY_USED_FOR_DEBUG(v); eigen_assert(v == T(Value)); }
1949 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T value() { return T(Value); }
1950 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void setValue(T) {}
1952 template<typename T> class variable_if_dynamicindex<T, DynamicIndex>
1955 EIGEN_DEVICE_FUNC variable_if_dynamicindex() { eigen_assert(false); }
1957 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamicindex(T value) : m_value(value) {}
1958 EIGEN_DEVICE_FUNC T EIGEN_STRONG_INLINE value() const { return m_value; }
1959 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void setValue(T value) { m_value = value; }
1961 template<typename T> struct functor_traits
1966 PacketAccess = false,
1967 IsRepeatable = false
1970 template<typename T> struct packet_traits;
1971 template<typename T> struct unpacket_traits
1981 template<int Size, typename PacketType,
1982 bool Stop = Size==Dynamic || (Size%unpacket_traits<PacketType>::size)==0 || is_same<PacketType,typename unpacket_traits<PacketType>::half>::value>
1983 struct find_best_packet_helper;
1984 template< int Size, typename PacketType>
1985 struct find_best_packet_helper<Size,PacketType,true>
1987 typedef PacketType type;
1989 template<int Size, typename PacketType>
1990 struct find_best_packet_helper<Size,PacketType,false>
1992 typedef typename find_best_packet_helper<Size,typename unpacket_traits<PacketType>::half>::type type;
1994 template<typename T, int Size>
1995 struct find_best_packet
1997 typedef typename find_best_packet_helper<Size,typename packet_traits<T>::type>::type type;
1999 #if EIGEN_MAX_STATIC_ALIGN_BYTES>0
2000 template<int ArrayBytes, int AlignmentBytes,
2001 bool Match = bool((ArrayBytes%AlignmentBytes)==0),
2002 bool TryHalf = bool(EIGEN_MIN_ALIGN_BYTES<AlignmentBytes) >
2003 struct compute_default_alignment_helper
2007 template<int ArrayBytes, int AlignmentBytes, bool TryHalf>
2008 struct compute_default_alignment_helper<ArrayBytes, AlignmentBytes, true, TryHalf>
2010 enum { value = AlignmentBytes };
2012 template<int ArrayBytes, int AlignmentBytes>
2013 struct compute_default_alignment_helper<ArrayBytes, AlignmentBytes, false, true>
2015 enum { value = compute_default_alignment_helper<ArrayBytes, AlignmentBytes/2>::value };
2018 template<int ArrayBytes, int AlignmentBytes>
2019 struct compute_default_alignment_helper
2024 template<typename T, int Size> struct compute_default_alignment {
2025 enum { value = compute_default_alignment_helper<Size*sizeof(T),EIGEN_MAX_STATIC_ALIGN_BYTES>::value };
2027 template<typename T> struct compute_default_alignment<T,Dynamic> {
2028 enum { value = EIGEN_MAX_ALIGN_BYTES };
2030 template<typename _Scalar, int _Rows, int _Cols,
2031 int _Options = AutoAlign |
2032 ( (_Rows==1 && _Cols!=1) ? RowMajor
2033 : (_Cols==1 && _Rows!=1) ? ColMajor
2034 : EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION ),
2035 int _MaxRows = _Rows,
2036 int _MaxCols = _Cols
2037 > class make_proper_matrix_type
2040 IsColVector = _Cols==1 && _Rows!=1,
2041 IsRowVector = _Rows==1 && _Cols!=1,
2042 Options = IsColVector ? (_Options | ColMajor) & ~RowMajor
2043 : IsRowVector ? (_Options | RowMajor) & ~ColMajor
2047 typedef Matrix<_Scalar, _Rows, _Cols, Options, _MaxRows, _MaxCols> type;
2049 template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
2050 class compute_matrix_flags
2052 enum { row_major_bit = Options&RowMajor ? RowMajorBit : 0 };
2054 enum { ret = DirectAccessBit | LvalueBit | NestByRefBit | row_major_bit };
2056 template<int _Rows, int _Cols> struct size_at_compile_time
2058 enum { ret = (_Rows==Dynamic || _Cols==Dynamic) ? Dynamic : _Rows * _Cols };
2060 template<typename XprType> struct size_of_xpr_at_compile_time
2062 enum { ret = size_at_compile_time<traits<XprType>::RowsAtCompileTime,traits<XprType>::ColsAtCompileTime>::ret };
2064 template<typename T, typename StorageKind = typename traits<T>::StorageKind> struct plain_matrix_type;
2065 template<typename T, typename BaseClassType, int Flags> struct plain_matrix_type_dense;
2066 template<typename T> struct plain_matrix_type<T,Dense>
2068 typedef typename plain_matrix_type_dense<T,typename traits<T>::XprKind, traits<T>::Flags>::type type;
2070 template<typename T> struct plain_matrix_type<T,DiagonalShape>
2072 typedef typename T::PlainObject type;
2074 template<typename T, int Flags> struct plain_matrix_type_dense<T,MatrixXpr,Flags>
2076 typedef Matrix<typename traits<T>::Scalar,
2077 traits<T>::RowsAtCompileTime,
2078 traits<T>::ColsAtCompileTime,
2079 AutoAlign | (Flags&RowMajorBit ? RowMajor : ColMajor),
2080 traits<T>::MaxRowsAtCompileTime,
2081 traits<T>::MaxColsAtCompileTime
2084 template<typename T, int Flags> struct plain_matrix_type_dense<T,ArrayXpr,Flags>
2086 typedef Array<typename traits<T>::Scalar,
2087 traits<T>::RowsAtCompileTime,
2088 traits<T>::ColsAtCompileTime,
2089 AutoAlign | (Flags&RowMajorBit ? RowMajor : ColMajor),
2090 traits<T>::MaxRowsAtCompileTime,
2091 traits<T>::MaxColsAtCompileTime
2094 template<typename T, typename StorageKind = typename traits<T>::StorageKind> struct eval;
2095 template<typename T> struct eval<T,Dense>
2097 typedef typename plain_matrix_type<T>::type type;
2099 template<typename T> struct eval<T,DiagonalShape>
2101 typedef typename plain_matrix_type<T>::type type;
2103 template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
2104 struct eval<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>, Dense>
2106 typedef const Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>& type;
2108 template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
2109 struct eval<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>, Dense>
2111 typedef const Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>& type;
2113 template<typename T, typename StorageKind = typename traits<T>::StorageKind> struct plain_object_eval;
2114 template<typename T>
2115 struct plain_object_eval<T,Dense>
2117 typedef typename plain_matrix_type_dense<T,typename traits<T>::XprKind, evaluator<T>::Flags>::type type;
2119 template<typename T> struct plain_matrix_type_column_major
2121 enum { Rows = traits<T>::RowsAtCompileTime,
2122 Cols = traits<T>::ColsAtCompileTime,
2123 MaxRows = traits<T>::MaxRowsAtCompileTime,
2124 MaxCols = traits<T>::MaxColsAtCompileTime
2126 typedef Matrix<typename traits<T>::Scalar,
2129 (MaxRows==1&&MaxCols!=1) ? RowMajor : ColMajor,
2134 template<typename T> struct plain_matrix_type_row_major
2136 enum { Rows = traits<T>::RowsAtCompileTime,
2137 Cols = traits<T>::ColsAtCompileTime,
2138 MaxRows = traits<T>::MaxRowsAtCompileTime,
2139 MaxCols = traits<T>::MaxColsAtCompileTime
2141 typedef Matrix<typename traits<T>::Scalar,
2144 (MaxCols==1&&MaxRows!=1) ? RowMajor : ColMajor,
2149 template <typename T>
2152 typedef typename conditional<
2153 bool(traits<T>::Flags & NestByRefBit),
2157 typedef typename conditional<
2158 bool(traits<T>::Flags & NestByRefBit),
2161 >::type non_const_type;
2163 template<typename T1, typename T2>
2164 struct transfer_constness
2166 typedef typename conditional<
2167 bool(internal::is_const<T1>::value),
2168 typename internal::add_const_on_value_type<T2>::type,
2172 template<typename T, int n, typename PlainObject = typename plain_object_eval<T>::type> struct nested_eval
2175 ScalarReadCost = NumTraits<typename traits<T>::Scalar>::ReadCost,
2176 CoeffReadCost = evaluator<T>::CoeffReadCost,
2177 NAsInteger = n == Dynamic ? HugeCost : n,
2178 CostEval = (NAsInteger+1) * ScalarReadCost + CoeffReadCost,
2179 CostNoEval = NAsInteger * CoeffReadCost,
2180 Evaluate = (int(evaluator<T>::Flags) & EvalBeforeNestingBit) || (int(CostEval) < int(CostNoEval))
2182 typedef typename conditional<Evaluate, PlainObject, typename ref_selector<T>::type>::type type;
2184 template<typename T>
2186 inline T* const_cast_ptr(const T* ptr)
2188 return const_cast<T*>(ptr);
2190 template<typename Derived, typename XprKind = typename traits<Derived>::XprKind>
2191 struct dense_xpr_base
2194 template<typename Derived>
2195 struct dense_xpr_base<Derived, MatrixXpr>
2197 typedef MatrixBase<Derived> type;
2199 template<typename Derived>
2200 struct dense_xpr_base<Derived, ArrayXpr>
2202 typedef ArrayBase<Derived> type;
2204 template<typename Derived, typename XprKind = typename traits<Derived>::XprKind, typename StorageKind = typename traits<Derived>::StorageKind>
2205 struct generic_xpr_base;
2206 template<typename Derived, typename XprKind>
2207 struct generic_xpr_base<Derived, XprKind, Dense>
2209 typedef typename dense_xpr_base<Derived,XprKind>::type type;
2211 template<typename XprType, typename CastType> struct cast_return_type
2213 typedef typename XprType::Scalar CurrentScalarType;
2214 typedef typename remove_all<CastType>::type _CastType;
2215 typedef typename _CastType::Scalar NewScalarType;
2216 typedef typename conditional<is_same<CurrentScalarType,NewScalarType>::value,
2217 const XprType&,CastType>::type type;
2219 template <typename A, typename B> struct promote_storage_type;
2220 template <typename A> struct promote_storage_type<A,A>
2224 template <typename A> struct promote_storage_type<A, const A>
2228 template <typename A> struct promote_storage_type<const A, A>
2232 template <typename A, typename B, typename Functor> struct cwise_promote_storage_type;
2233 template <typename A, typename Functor> struct cwise_promote_storage_type<A,A,Functor> { typedef A ret; };
2234 template <typename Functor> struct cwise_promote_storage_type<Dense,Dense,Functor> { typedef Dense ret; };
2235 template <typename A, typename Functor> struct cwise_promote_storage_type<A,Dense,Functor> { typedef Dense ret; };
2236 template <typename B, typename Functor> struct cwise_promote_storage_type<Dense,B,Functor> { typedef Dense ret; };
2237 template <typename Functor> struct cwise_promote_storage_type<Sparse,Dense,Functor> { typedef Sparse ret; };
2238 template <typename Functor> struct cwise_promote_storage_type<Dense,Sparse,Functor> { typedef Sparse ret; };
2239 template <typename LhsKind, typename RhsKind, int LhsOrder, int RhsOrder> struct cwise_promote_storage_order {
2240 enum { value = LhsOrder };
2242 template <typename LhsKind, int LhsOrder, int RhsOrder> struct cwise_promote_storage_order<LhsKind,Sparse,LhsOrder,RhsOrder> { enum { value = RhsOrder }; };
2243 template <typename RhsKind, int LhsOrder, int RhsOrder> struct cwise_promote_storage_order<Sparse,RhsKind,LhsOrder,RhsOrder> { enum { value = LhsOrder }; };
2244 template <int Order> struct cwise_promote_storage_order<Sparse,Sparse,Order,Order> { enum { value = Order }; };
2245 template <typename A, typename B, int ProductTag> struct product_promote_storage_type;
2246 template <typename A, int ProductTag> struct product_promote_storage_type<A, A, ProductTag> { typedef A ret;};
2247 template <int ProductTag> struct product_promote_storage_type<Dense, Dense, ProductTag> { typedef Dense ret;};
2248 template <typename A, int ProductTag> struct product_promote_storage_type<A, Dense, ProductTag> { typedef Dense ret; };
2249 template <typename B, int ProductTag> struct product_promote_storage_type<Dense, B, ProductTag> { typedef Dense ret; };
2250 template <typename A, int ProductTag> struct product_promote_storage_type<A, DiagonalShape, ProductTag> { typedef A ret; };
2251 template <typename B, int ProductTag> struct product_promote_storage_type<DiagonalShape, B, ProductTag> { typedef B ret; };
2252 template <int ProductTag> struct product_promote_storage_type<Dense, DiagonalShape, ProductTag> { typedef Dense ret; };
2253 template <int ProductTag> struct product_promote_storage_type<DiagonalShape, Dense, ProductTag> { typedef Dense ret; };
2254 template <typename A, int ProductTag> struct product_promote_storage_type<A, PermutationStorage, ProductTag> { typedef A ret; };
2255 template <typename B, int ProductTag> struct product_promote_storage_type<PermutationStorage, B, ProductTag> { typedef B ret; };
2256 template <int ProductTag> struct product_promote_storage_type<Dense, PermutationStorage, ProductTag> { typedef Dense ret; };
2257 template <int ProductTag> struct product_promote_storage_type<PermutationStorage, Dense, ProductTag> { typedef Dense ret; };
2258 template<typename ExpressionType, typename Scalar = typename ExpressionType::Scalar>
2259 struct plain_row_type
2261 typedef Matrix<Scalar, 1, ExpressionType::ColsAtCompileTime,
2262 ExpressionType::PlainObject::Options | RowMajor, 1, ExpressionType::MaxColsAtCompileTime> MatrixRowType;
2263 typedef Array<Scalar, 1, ExpressionType::ColsAtCompileTime,
2264 ExpressionType::PlainObject::Options | RowMajor, 1, ExpressionType::MaxColsAtCompileTime> ArrayRowType;
2265 typedef typename conditional<
2266 is_same< typename traits<ExpressionType>::XprKind, MatrixXpr >::value,
2271 template<typename ExpressionType, typename Scalar = typename ExpressionType::Scalar>
2272 struct plain_col_type
2274 typedef Matrix<Scalar, ExpressionType::RowsAtCompileTime, 1,
2275 ExpressionType::PlainObject::Options & ~RowMajor, ExpressionType::MaxRowsAtCompileTime, 1> MatrixColType;
2276 typedef Array<Scalar, ExpressionType::RowsAtCompileTime, 1,
2277 ExpressionType::PlainObject::Options & ~RowMajor, ExpressionType::MaxRowsAtCompileTime, 1> ArrayColType;
2278 typedef typename conditional<
2279 is_same< typename traits<ExpressionType>::XprKind, MatrixXpr >::value,
2284 template<typename ExpressionType, typename Scalar = typename ExpressionType::Scalar>
2285 struct plain_diag_type
2287 enum { diag_size = EIGEN_SIZE_MIN_PREFER_DYNAMIC(ExpressionType::RowsAtCompileTime, ExpressionType::ColsAtCompileTime),
2288 max_diag_size = EIGEN_SIZE_MIN_PREFER_FIXED(ExpressionType::MaxRowsAtCompileTime, ExpressionType::MaxColsAtCompileTime)
2290 typedef Matrix<Scalar, diag_size, 1, ExpressionType::PlainObject::Options & ~RowMajor, max_diag_size, 1> MatrixDiagType;
2291 typedef Array<Scalar, diag_size, 1, ExpressionType::PlainObject::Options & ~RowMajor, max_diag_size, 1> ArrayDiagType;
2292 typedef typename conditional<
2293 is_same< typename traits<ExpressionType>::XprKind, MatrixXpr >::value,
2298 template<typename Expr,typename Scalar = typename Expr::Scalar>
2299 struct plain_constant_type
2301 enum { Options = (traits<Expr>::Flags&RowMajorBit)?RowMajor:0 };
2302 typedef Array<Scalar, traits<Expr>::RowsAtCompileTime, traits<Expr>::ColsAtCompileTime,
2303 Options, traits<Expr>::MaxRowsAtCompileTime,traits<Expr>::MaxColsAtCompileTime> array_type;
2304 typedef Matrix<Scalar, traits<Expr>::RowsAtCompileTime, traits<Expr>::ColsAtCompileTime,
2305 Options, traits<Expr>::MaxRowsAtCompileTime,traits<Expr>::MaxColsAtCompileTime> matrix_type;
2306 typedef CwiseNullaryOp<scalar_constant_op<Scalar>, const typename conditional<is_same< typename traits<Expr>::XprKind, MatrixXpr >::value, matrix_type, array_type>::type > type;
2308 template<typename ExpressionType>
2311 enum { value = (!bool(is_const<ExpressionType>::value)) &&
2312 bool(traits<ExpressionType>::Flags & LvalueBit) };
2314 template<typename T> struct is_diagonal
2315 { enum { ret = false }; };
2316 template<typename T> struct is_diagonal<DiagonalBase<T> >
2317 { enum { ret = true }; };
2318 template<typename T> struct is_diagonal<DiagonalWrapper<T> >
2319 { enum { ret = true }; };
2320 template<typename T, int S> struct is_diagonal<DiagonalMatrix<T,S> >
2321 { enum { ret = true }; };
2322 template<typename S1, typename S2> struct glue_shapes;
2323 template<> struct glue_shapes<DenseShape,TriangularShape> { typedef TriangularShape type; };
2324 template<typename T1, typename T2>
2325 bool is_same_dense(const T1 &mat1, const T2 &mat2, typename enable_if<has_direct_access<T1>::ret&&has_direct_access<T2>::ret, T1>::type * = 0)
2327 return (mat1.data()==mat2.data()) && (mat1.innerStride()==mat2.innerStride()) && (mat1.outerStride()==mat2.outerStride());
2329 template<typename T1, typename T2>
2330 bool is_same_dense(const T1 &, const T2 &, typename enable_if<!(has_direct_access<T1>::ret&&has_direct_access<T2>::ret), T1>::type * = 0)
2334 template<typename T,bool Vectorized=false,typename EnaleIf = void>
2335 struct scalar_div_cost {
2336 enum { value = 8*NumTraits<T>::MulCost };
2338 template<typename T,bool Vectorized>
2339 struct scalar_div_cost<std::complex<T>, Vectorized> {
2340 enum { value = 2*scalar_div_cost<T>::value
2341 + 6*NumTraits<T>::MulCost
2342 + 3*NumTraits<T>::AddCost
2345 template<bool Vectorized>
2346 struct scalar_div_cost<signed long,Vectorized,typename conditional<sizeof(long)==8,void,false_type>::type> { enum { value = 24 }; };
2347 template<bool Vectorized>
2348 struct scalar_div_cost<unsigned long,Vectorized,typename conditional<sizeof(long)==8,void,false_type>::type> { enum { value = 21 }; };
2349 #ifdef EIGEN_DEBUG_ASSIGN
2350 std::string demangle_traversal(int t)
2352 if(t==DefaultTraversal) return "DefaultTraversal";
2353 if(t==LinearTraversal) return "LinearTraversal";
2354 if(t==InnerVectorizedTraversal) return "InnerVectorizedTraversal";
2355 if(t==LinearVectorizedTraversal) return "LinearVectorizedTraversal";
2356 if(t==SliceVectorizedTraversal) return "SliceVectorizedTraversal";
2359 std::string demangle_unrolling(int t)
2361 if(t==NoUnrolling) return "NoUnrolling";
2362 if(t==InnerUnrolling) return "InnerUnrolling";
2363 if(t==CompleteUnrolling) return "CompleteUnrolling";
2366 std::string demangle_flags(int f)
2369 if(f&RowMajorBit) res += " | RowMajor";
2370 if(f&PacketAccessBit) res += " | Packet";
2371 if(f&LinearAccessBit) res += " | Linear";
2372 if(f&LvalueBit) res += " | Lvalue";
2373 if(f&DirectAccessBit) res += " | Direct";
2374 if(f&NestByRefBit) res += " | NestByRef";
2375 if(f&NoPreferredStorageOrderBit) res += " | NoPreferredStorageOrderBit";
2380 template<typename ScalarA, typename ScalarB, typename BinaryOp=internal::scalar_product_op<ScalarA,ScalarB> >
2381 struct ScalarBinaryOpTraits
2382 #ifndef EIGEN_PARSED_BY_DOXYGEN
2383 : internal::scalar_product_traits<ScalarA,ScalarB>
2386 template<typename T, typename BinaryOp>
2387 struct ScalarBinaryOpTraits<T,T,BinaryOp>
2389 typedef T ReturnType;
2391 template <typename T, typename BinaryOp>
2392 struct ScalarBinaryOpTraits<T, typename NumTraits<typename internal::enable_if<NumTraits<T>::IsComplex,T>::type>::Real, BinaryOp>
2394 typedef T ReturnType;
2396 template <typename T, typename BinaryOp>
2397 struct ScalarBinaryOpTraits<typename NumTraits<typename internal::enable_if<NumTraits<T>::IsComplex,T>::type>::Real, T, BinaryOp>
2399 typedef T ReturnType;
2401 template<typename T, typename BinaryOp>
2402 struct ScalarBinaryOpTraits<T,void,BinaryOp>
2404 typedef T ReturnType;
2406 template<typename T, typename BinaryOp>
2407 struct ScalarBinaryOpTraits<void,T,BinaryOp>
2409 typedef T ReturnType;
2411 template<typename BinaryOp>
2412 struct ScalarBinaryOpTraits<void,void,BinaryOp>
2414 typedef void ReturnType;
2416 #define EIGEN_CHECK_BINARY_COMPATIBILIY(BINOP,LHS,RHS) \
2417 EIGEN_STATIC_ASSERT((Eigen::internal::has_ReturnType<ScalarBinaryOpTraits<LHS, RHS,BINOP> >::value), \
2418 YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
2421 // end #include "src/Core/util/XprHelper.h"
2422 // #include "src/Core/util/Memory.h"
2423 #ifndef EIGEN_MEMORY_H
2424 #define EIGEN_MEMORY_H
2425 #ifndef EIGEN_MALLOC_ALREADY_ALIGNED
2426 #if defined(__GLIBC__) && ((__GLIBC__>=2 && __GLIBC_MINOR__ >= 8) || __GLIBC__>2) \
2427 && defined(__LP64__) && ! defined( __SANITIZE_ADDRESS__ ) && (EIGEN_DEFAULT_ALIGN_BYTES == 16)
2428 #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 1
2430 #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 0
2432 #if defined(__FreeBSD__) && !(EIGEN_ARCH_ARM || EIGEN_ARCH_MIPS) && (EIGEN_DEFAULT_ALIGN_BYTES == 16)
2433 #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 1
2435 #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 0
2437 #if (EIGEN_OS_MAC && (EIGEN_DEFAULT_ALIGN_BYTES == 16)) \
2438 || (EIGEN_OS_WIN64 && (EIGEN_DEFAULT_ALIGN_BYTES == 16)) \
2439 || EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED \
2440 || EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED
2441 #define EIGEN_MALLOC_ALREADY_ALIGNED 1
2443 #define EIGEN_MALLOC_ALREADY_ALIGNED 0
2447 namespace internal {
2449 inline void throw_std_bad_alloc()
2451 #ifdef EIGEN_EXCEPTIONS
2452 throw std::bad_alloc();
2454 std::size_t huge = static_cast<std::size_t>(-1);
2458 inline void* handmade_aligned_malloc(std::size_t size)
2460 void *original = std::malloc(size+EIGEN_DEFAULT_ALIGN_BYTES);
2461 if (original == 0) return 0;
2462 void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1))) + EIGEN_DEFAULT_ALIGN_BYTES);
2463 *(reinterpret_cast<void**>(aligned) - 1) = original;
2466 inline void handmade_aligned_free(void *ptr)
2468 if (ptr) std::free(*(reinterpret_cast<void**>(ptr) - 1));
2470 inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t = 0)
2472 if (ptr == 0) return handmade_aligned_malloc(size);
2473 void *original = *(reinterpret_cast<void**>(ptr) - 1);
2474 std::ptrdiff_t previous_offset = static_cast<char *>(ptr)-static_cast<char *>(original);
2475 original = std::realloc(original,size+EIGEN_DEFAULT_ALIGN_BYTES);
2476 if (original == 0) return 0;
2477 void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1))) + EIGEN_DEFAULT_ALIGN_BYTES);
2478 void *previous_aligned = static_cast<char *>(original)+previous_offset;
2479 if(aligned!=previous_aligned)
2480 std::memmove(aligned, previous_aligned, size);
2481 *(reinterpret_cast<void**>(aligned) - 1) = original;
2484 #ifdef EIGEN_NO_MALLOC
2485 EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
2487 eigen_assert(false && "heap allocation is forbidden (EIGEN_NO_MALLOC is defined)");
2489 #elif defined EIGEN_RUNTIME_NO_MALLOC
2490 EIGEN_DEVICE_FUNC inline bool is_malloc_allowed_impl(bool update, bool new_value = false)
2492 static bool value = true;
2497 EIGEN_DEVICE_FUNC inline bool is_malloc_allowed() { return is_malloc_allowed_impl(false); }
2498 EIGEN_DEVICE_FUNC inline bool set_is_malloc_allowed(bool new_value) { return is_malloc_allowed_impl(true, new_value); }
2499 EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
2501 eigen_assert(is_malloc_allowed() && "heap allocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and g_is_malloc_allowed is false)");
2504 EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
2507 EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size)
2509 check_that_malloc_is_allowed();
2511 #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
2512 result = std::malloc(size);
2513 #if EIGEN_DEFAULT_ALIGN_BYTES==16
2514 eigen_assert((size<16 || (std::size_t(result)%16)==0) && "System's malloc returned an unaligned pointer. Compile with EIGEN_MALLOC_ALREADY_ALIGNED=0 to fallback to handmade alignd memory allocator.");
2517 result = handmade_aligned_malloc(size);
2520 throw_std_bad_alloc();
2523 EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr)
2525 #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
2528 handmade_aligned_free(ptr);
2531 inline void* aligned_realloc(void *ptr, std::size_t new_size, std::size_t old_size)
2533 EIGEN_UNUSED_VARIABLE(old_size);
2535 #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
2536 result = std::realloc(ptr,new_size);
2538 result = handmade_aligned_realloc(ptr,new_size,old_size);
2540 if (!result && new_size)
2541 throw_std_bad_alloc();
2544 template<bool Align> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc(std::size_t size)
2546 return aligned_malloc(size);
2548 template<> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc<false>(std::size_t size)
2550 check_that_malloc_is_allowed();
2551 void *result = std::malloc(size);
2553 throw_std_bad_alloc();
2556 template<bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_free(void *ptr)
2560 template<> EIGEN_DEVICE_FUNC inline void conditional_aligned_free<false>(void *ptr)
2564 template<bool Align> inline void* conditional_aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size)
2566 return aligned_realloc(ptr, new_size, old_size);
2568 template<> inline void* conditional_aligned_realloc<false>(void* ptr, std::size_t new_size, std::size_t)
2570 return std::realloc(ptr, new_size);
2572 template<typename T> EIGEN_DEVICE_FUNC inline void destruct_elements_of_array(T *ptr, std::size_t size)
2575 while(size) ptr[--size].~T();
2577 template<typename T> EIGEN_DEVICE_FUNC inline T* construct_elements_of_array(T *ptr, std::size_t size)
2582 for (i = 0; i < size; ++i) ::new (ptr + i) T;
2587 destruct_elements_of_array(ptr, i);
2592 template<typename T>
2593 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void check_size_for_overflow(std::size_t size)
2595 if(size > std::size_t(-1) / sizeof(T))
2596 throw_std_bad_alloc();
2598 template<typename T> EIGEN_DEVICE_FUNC inline T* aligned_new(std::size_t size)
2600 check_size_for_overflow<T>(size);
2601 T *result = reinterpret_cast<T*>(aligned_malloc(sizeof(T)*size));
2604 return construct_elements_of_array(result, size);
2608 aligned_free(result);
2613 template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_new(std::size_t size)
2615 check_size_for_overflow<T>(size);
2616 T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size));
2619 return construct_elements_of_array(result, size);
2623 conditional_aligned_free<Align>(result);
2628 template<typename T> EIGEN_DEVICE_FUNC inline void aligned_delete(T *ptr, std::size_t size)
2630 destruct_elements_of_array<T>(ptr, size);
2633 template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_delete(T *ptr, std::size_t size)
2635 destruct_elements_of_array<T>(ptr, size);
2636 conditional_aligned_free<Align>(ptr);
2638 template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_realloc_new(T* pts, std::size_t new_size, std::size_t old_size)
2640 check_size_for_overflow<T>(new_size);
2641 check_size_for_overflow<T>(old_size);
2642 if(new_size < old_size)
2643 destruct_elements_of_array(pts+new_size, old_size-new_size);
2644 T *result = reinterpret_cast<T*>(conditional_aligned_realloc<Align>(reinterpret_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size));
2645 if(new_size > old_size)
2649 construct_elements_of_array(result+old_size, new_size-old_size);
2653 conditional_aligned_free<Align>(result);
2659 template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_new_auto(std::size_t size)
2663 check_size_for_overflow<T>(size);
2664 T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size));
2665 if(NumTraits<T>::RequireInitialization)
2669 construct_elements_of_array(result, size);
2673 conditional_aligned_free<Align>(result);
2679 template<typename T, bool Align> inline T* conditional_aligned_realloc_new_auto(T* pts, std::size_t new_size, std::size_t old_size)
2681 check_size_for_overflow<T>(new_size);
2682 check_size_for_overflow<T>(old_size);
2683 if(NumTraits<T>::RequireInitialization && (new_size < old_size))
2684 destruct_elements_of_array(pts+new_size, old_size-new_size);
2685 T *result = reinterpret_cast<T*>(conditional_aligned_realloc<Align>(reinterpret_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size));
2686 if(NumTraits<T>::RequireInitialization && (new_size > old_size))
2690 construct_elements_of_array(result+old_size, new_size-old_size);
2694 conditional_aligned_free<Align>(result);
2700 template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_delete_auto(T *ptr, std::size_t size)
2702 if(NumTraits<T>::RequireInitialization)
2703 destruct_elements_of_array<T>(ptr, size);
2704 conditional_aligned_free<Align>(ptr);
2706 template<int Alignment, typename Scalar, typename Index>
2707 EIGEN_DEVICE_FUNC inline Index first_aligned(const Scalar* array, Index size)
2709 const Index ScalarSize = sizeof(Scalar);
2710 const Index AlignmentSize = Alignment / ScalarSize;
2711 const Index AlignmentMask = AlignmentSize-1;
2712 if(AlignmentSize<=1)
2716 else if( (UIntPtr(array) & (sizeof(Scalar)-1)) || (Alignment%ScalarSize)!=0)
2722 Index first = (AlignmentSize - (Index((UIntPtr(array)/sizeof(Scalar))) & AlignmentMask)) & AlignmentMask;
2723 return (first < size) ? first : size;
2726 template<typename Scalar, typename Index>
2727 EIGEN_DEVICE_FUNC inline Index first_default_aligned(const Scalar* array, Index size)
2729 typedef typename packet_traits<Scalar>::type DefaultPacketType;
2730 return first_aligned<unpacket_traits<DefaultPacketType>::alignment>(array, size);
2732 template<typename Index>
2733 inline Index first_multiple(Index size, Index base)
2735 return ((size+base-1)/base)*base;
2737 template<typename T, bool UseMemcpy> struct smart_copy_helper;
2738 template<typename T> EIGEN_DEVICE_FUNC void smart_copy(const T* start, const T* end, T* target)
2740 smart_copy_helper<T,!NumTraits<T>::RequireInitialization>::run(start, end, target);
2742 template<typename T> struct smart_copy_helper<T,true> {
2743 EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target)
2745 IntPtr size = IntPtr(end)-IntPtr(start);
2747 eigen_internal_assert(start!=0 && end!=0 && target!=0);
2748 memcpy(target, start, size);
2751 template<typename T> struct smart_copy_helper<T,false> {
2752 EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target)
2753 { std::copy(start, end, target); }
2755 template<typename T, bool UseMemmove> struct smart_memmove_helper;
2756 template<typename T> void smart_memmove(const T* start, const T* end, T* target)
2758 smart_memmove_helper<T,!NumTraits<T>::RequireInitialization>::run(start, end, target);
2760 template<typename T> struct smart_memmove_helper<T,true> {
2761 static inline void run(const T* start, const T* end, T* target)
2763 IntPtr size = IntPtr(end)-IntPtr(start);
2765 eigen_internal_assert(start!=0 && end!=0 && target!=0);
2766 std::memmove(target, start, size);
2769 template<typename T> struct smart_memmove_helper<T,false> {
2770 static inline void run(const T* start, const T* end, T* target)
2772 if (UIntPtr(target) < UIntPtr(start))
2774 std::copy(start, end, target);
2778 std::ptrdiff_t count = (std::ptrdiff_t(end)-std::ptrdiff_t(start)) / sizeof(T);
2779 std::copy_backward(start, end, target + count);
2783 #ifndef EIGEN_ALLOCA
2784 #if EIGEN_OS_LINUX || EIGEN_OS_MAC || (defined alloca)
2785 #define EIGEN_ALLOCA alloca
2786 #elif EIGEN_COMP_MSVC
2787 #define EIGEN_ALLOCA _alloca
2790 template<typename T> class aligned_stack_memory_handler : noncopyable
2793 aligned_stack_memory_handler(T* ptr, std::size_t size, bool dealloc)
2794 : m_ptr(ptr), m_size(size), m_deallocate(dealloc)
2796 if(NumTraits<T>::RequireInitialization && m_ptr)
2797 Eigen::internal::construct_elements_of_array(m_ptr, size);
2799 ~aligned_stack_memory_handler()
2801 if(NumTraits<T>::RequireInitialization && m_ptr)
2802 Eigen::internal::destruct_elements_of_array<T>(m_ptr, m_size);
2804 Eigen::internal::aligned_free(m_ptr);
2811 template<typename T> class scoped_array : noncopyable
2815 explicit scoped_array(std::ptrdiff_t size)
2817 m_ptr = new T[size];
2823 T& operator[](std::ptrdiff_t i) { return m_ptr[i]; }
2824 const T& operator[](std::ptrdiff_t i) const { return m_ptr[i]; }
2825 T* &ptr() { return m_ptr; }
2826 const T* ptr() const { return m_ptr; }
2827 operator const T*() const { return m_ptr; }
2829 template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
2831 std::swap(a.ptr(),b.ptr());
2835 #if EIGEN_DEFAULT_ALIGN_BYTES>0
2836 #define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast<void*>((internal::UIntPtr(EIGEN_ALLOCA(SIZE+EIGEN_DEFAULT_ALIGN_BYTES-1)) + EIGEN_DEFAULT_ALIGN_BYTES-1) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1)))
2838 #define EIGEN_ALIGNED_ALLOCA(SIZE) EIGEN_ALLOCA(SIZE)
2840 #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
2841 Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
2842 TYPE* NAME = (BUFFER)!=0 ? (BUFFER) \
2843 : reinterpret_cast<TYPE*>( \
2844 (sizeof(TYPE)*SIZE<=EIGEN_STACK_ALLOCATION_LIMIT) ? EIGEN_ALIGNED_ALLOCA(sizeof(TYPE)*SIZE) \
2845 : Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE) ); \
2846 Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,sizeof(TYPE)*SIZE>EIGEN_STACK_ALLOCATION_LIMIT)
2848 #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
2849 Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
2850 TYPE* NAME = (BUFFER)!=0 ? BUFFER : reinterpret_cast<TYPE*>(Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE)); \
2851 Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,true)
2853 #if EIGEN_MAX_ALIGN_BYTES!=0
2854 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
2855 void* operator new(std::size_t size, const std::nothrow_t&) EIGEN_NO_THROW { \
2856 EIGEN_TRY { return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); } \
2857 EIGEN_CATCH (...) { return 0; } \
2859 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \
2860 void *operator new(std::size_t size) { \
2861 return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
2863 void *operator new[](std::size_t size) { \
2864 return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
2866 void operator delete(void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
2867 void operator delete[](void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
2868 void operator delete(void * ptr, std::size_t ) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
2869 void operator delete[](void * ptr, std::size_t ) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
2873 static void *operator new(std::size_t size, void *ptr) { return ::operator new(size,ptr); } \
2874 static void *operator new[](std::size_t size, void* ptr) { return ::operator new[](size,ptr); } \
2875 void operator delete(void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete(memory,ptr); } \
2876 void operator delete[](void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete[](memory,ptr); } \
2878 EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
2879 void operator delete(void *ptr, const std::nothrow_t&) EIGEN_NO_THROW { \
2880 Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \
2882 typedef void eigen_aligned_operator_new_marker_type;
2884 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
2886 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(true)
2887 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \
2888 EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(((Size)!=Eigen::Dynamic) && ((sizeof(Scalar)*(Size))%EIGEN_MAX_ALIGN_BYTES==0)))
2890 class aligned_allocator : public std::allocator<T>
2893 typedef std::size_t size_type;
2894 typedef std::ptrdiff_t difference_type;
2896 typedef const T* const_pointer;
2897 typedef T& reference;
2898 typedef const T& const_reference;
2899 typedef T value_type;
2903 typedef aligned_allocator<U> other;
2905 aligned_allocator() : std::allocator<T>() {}
2906 aligned_allocator(const aligned_allocator& other) : std::allocator<T>(other) {}
2908 aligned_allocator(const aligned_allocator<U>& other) : std::allocator<T>(other) {}
2909 ~aligned_allocator() {}
2910 pointer allocate(size_type num, const void* = 0)
2912 internal::check_size_for_overflow<T>(num);
2913 return static_cast<pointer>( internal::aligned_malloc(num * sizeof(T)) );
2915 void deallocate(pointer p, size_type )
2917 internal::aligned_free(p);
2920 #if !defined(EIGEN_NO_CPUID)
2921 # if EIGEN_COMP_GNUC && EIGEN_ARCH_i386_OR_x86_64
2922 # if defined(__PIC__) && EIGEN_ARCH_i386
2923 # define EIGEN_CPUID(abcd,func,id) \
2924 __asm__ __volatile__ ("xchgl %%ebx, %k1;cpuid; xchgl %%ebx,%k1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id));
2925 # elif defined(__PIC__) && EIGEN_ARCH_x86_64
2926 # define EIGEN_CPUID(abcd,func,id) \
2927 __asm__ __volatile__ ("xchg{q}\t{%%}rbx, %q1; cpuid; xchg{q}\t{%%}rbx, %q1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id));
2929 # define EIGEN_CPUID(abcd,func,id) \
2930 __asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id) );
2932 # elif EIGEN_COMP_MSVC
2933 # if (EIGEN_COMP_MSVC > 1500) && EIGEN_ARCH_i386_OR_x86_64
2934 # define EIGEN_CPUID(abcd,func,id) __cpuidex((int*)abcd,func,id)
2938 namespace internal {
2940 inline bool cpuid_is_vendor(int abcd[4], const int vendor[3])
2942 return abcd[1]==vendor[0] && abcd[3]==vendor[1] && abcd[2]==vendor[2];
2944 inline void queryCacheSizes_intel_direct(int& l1, int& l2, int& l3)
2951 abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
2952 EIGEN_CPUID(abcd,0x4,cache_id);
2953 cache_type = (abcd[0] & 0x0F) >> 0;
2954 if(cache_type==1||cache_type==3)
2956 int cache_level = (abcd[0] & 0xE0) >> 5;
2957 int ways = (abcd[1] & 0xFFC00000) >> 22;
2958 int partitions = (abcd[1] & 0x003FF000) >> 12;
2959 int line_size = (abcd[1] & 0x00000FFF) >> 0;
2960 int sets = (abcd[2]);
2961 int cache_size = (ways+1) * (partitions+1) * (line_size+1) * (sets+1);
2964 case 1: l1 = cache_size; break;
2965 case 2: l2 = cache_size; break;
2966 case 3: l3 = cache_size; break;
2971 } while(cache_type>0 && cache_id<16);
2973 inline void queryCacheSizes_intel_codes(int& l1, int& l2, int& l3)
2976 abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
2978 EIGEN_CPUID(abcd,0x00000002,0);
2979 unsigned char * bytes = reinterpret_cast<unsigned char *>(abcd)+2;
2980 bool check_for_p2_core2 = false;
2981 for(int i=0; i<14; ++i)
2985 case 0x0A: l1 = 8; break;
2986 case 0x0C: l1 = 16; break;
2987 case 0x0E: l1 = 24; break;
2988 case 0x10: l1 = 16; break;
2989 case 0x15: l1 = 16; break;
2990 case 0x2C: l1 = 32; break;
2991 case 0x30: l1 = 32; break;
2992 case 0x60: l1 = 16; break;
2993 case 0x66: l1 = 8; break;
2994 case 0x67: l1 = 16; break;
2995 case 0x68: l1 = 32; break;
2996 case 0x1A: l2 = 96; break;
2997 case 0x22: l3 = 512; break;
2998 case 0x23: l3 = 1024; break;
2999 case 0x25: l3 = 2048; break;
3000 case 0x29: l3 = 4096; break;
3001 case 0x39: l2 = 128; break;
3002 case 0x3A: l2 = 192; break;
3003 case 0x3B: l2 = 128; break;
3004 case 0x3C: l2 = 256; break;
3005 case 0x3D: l2 = 384; break;
3006 case 0x3E: l2 = 512; break;
3007 case 0x40: l2 = 0; break;
3008 case 0x41: l2 = 128; break;
3009 case 0x42: l2 = 256; break;
3010 case 0x43: l2 = 512; break;
3011 case 0x44: l2 = 1024; break;
3012 case 0x45: l2 = 2048; break;
3013 case 0x46: l3 = 4096; break;
3014 case 0x47: l3 = 8192; break;
3015 case 0x48: l2 = 3072; break;
3016 case 0x49: if(l2!=0) l3 = 4096; else {check_for_p2_core2=true; l3 = l2 = 4096;} break;
3017 case 0x4A: l3 = 6144; break;
3018 case 0x4B: l3 = 8192; break;
3019 case 0x4C: l3 = 12288; break;
3020 case 0x4D: l3 = 16384; break;
3021 case 0x4E: l2 = 6144; break;
3022 case 0x78: l2 = 1024; break;
3023 case 0x79: l2 = 128; break;
3024 case 0x7A: l2 = 256; break;
3025 case 0x7B: l2 = 512; break;
3026 case 0x7C: l2 = 1024; break;
3027 case 0x7D: l2 = 2048; break;
3028 case 0x7E: l2 = 256; break;
3029 case 0x7F: l2 = 512; break;
3030 case 0x80: l2 = 512; break;
3031 case 0x81: l2 = 128; break;
3032 case 0x82: l2 = 256; break;
3033 case 0x83: l2 = 512; break;
3034 case 0x84: l2 = 1024; break;
3035 case 0x85: l2 = 2048; break;
3036 case 0x86: l2 = 512; break;
3037 case 0x87: l2 = 1024; break;
3038 case 0x88: l3 = 2048; break;
3039 case 0x89: l3 = 4096; break;
3040 case 0x8A: l3 = 8192; break;
3041 case 0x8D: l3 = 3072; break;
3045 if(check_for_p2_core2 && l2 == l3)
3051 inline void queryCacheSizes_intel(int& l1, int& l2, int& l3, int max_std_funcs)
3053 if(max_std_funcs>=4)
3054 queryCacheSizes_intel_direct(l1,l2,l3);
3056 queryCacheSizes_intel_codes(l1,l2,l3);
3058 inline void queryCacheSizes_amd(int& l1, int& l2, int& l3)
3061 abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
3062 EIGEN_CPUID(abcd,0x80000005,0);
3063 l1 = (abcd[2] >> 24) * 1024;
3064 abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
3065 EIGEN_CPUID(abcd,0x80000006,0);
3066 l2 = (abcd[2] >> 16) * 1024;
3067 l3 = ((abcd[3] & 0xFFFC000) >> 18) * 512 * 1024;
3070 inline void queryCacheSizes(int& l1, int& l2, int& l3)
3074 const int GenuineIntel[] = {0x756e6547, 0x49656e69, 0x6c65746e};
3075 const int AuthenticAMD[] = {0x68747541, 0x69746e65, 0x444d4163};
3076 const int AMDisbetter_[] = {0x69444d41, 0x74656273, 0x21726574};
3077 EIGEN_CPUID(abcd,0x0,0);
3078 int max_std_funcs = abcd[1];
3079 if(cpuid_is_vendor(abcd,GenuineIntel))
3080 queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
3081 else if(cpuid_is_vendor(abcd,AuthenticAMD) || cpuid_is_vendor(abcd,AMDisbetter_))
3082 queryCacheSizes_amd(l1,l2,l3);
3084 queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
3089 inline int queryL1CacheSize()
3092 queryCacheSizes(l1,l2,l3);
3095 inline int queryTopLevelCacheSize()
3097 int l1, l2(-1), l3(-1);
3098 queryCacheSizes(l1,l2,l3);
3099 return (std::max)(l2,l3);
3104 // end #include "src/Core/util/Memory.h"
3105 // #include "src/Core/NumTraits.h"
3106 #ifndef EIGEN_NUMTRAITS_H
3107 #define EIGEN_NUMTRAITS_H
3109 namespace internal {
3110 template< typename T,
3111 bool use_numeric_limits = std::numeric_limits<T>::is_specialized,
3112 bool is_integer = NumTraits<T>::IsInteger>
3113 struct default_digits10_impl
3115 static int run() { return std::numeric_limits<T>::digits10; }
3117 template<typename T>
3118 struct default_digits10_impl<T,false,false>
3123 typedef typename NumTraits<T>::Real Real;
3124 return int(ceil(-log10(NumTraits<Real>::epsilon())));
3127 template<typename T>
3128 struct default_digits10_impl<T,false,true>
3130 static int run() { return 0; }
3133 template<typename T> struct GenericNumTraits
3136 IsInteger = std::numeric_limits<T>::is_integer,
3137 IsSigned = std::numeric_limits<T>::is_signed,
3139 RequireInitialization = internal::is_arithmetic<T>::value ? 0 : 1,
3145 typedef typename internal::conditional<
3147 typename internal::conditional<sizeof(T)<=2, float, double>::type,
3153 static inline Real epsilon()
3155 return numext::numeric_limits<T>::epsilon();
3158 static inline int digits10()
3160 return internal::default_digits10_impl<T>::run();
3163 static inline Real dummy_precision()
3168 static inline T highest() {
3169 return (numext::numeric_limits<T>::max)();
3172 static inline T lowest() {
3173 return IsInteger ? (numext::numeric_limits<T>::min)() : (-(numext::numeric_limits<T>::max)());
3176 static inline T infinity() {
3177 return numext::numeric_limits<T>::infinity();
3180 static inline T quiet_NaN() {
3181 return numext::numeric_limits<T>::quiet_NaN();
3184 template<typename T> struct NumTraits : GenericNumTraits<T>
3186 template<> struct NumTraits<float>
3187 : GenericNumTraits<float>
3190 static inline float dummy_precision() { return 1e-5f; }
3192 template<> struct NumTraits<double> : GenericNumTraits<double>
3195 static inline double dummy_precision() { return 1e-12; }
3197 template<> struct NumTraits<long double>
3198 : GenericNumTraits<long double>
3200 static inline long double dummy_precision() { return 1e-15l; }
3202 template<typename _Real> struct NumTraits<std::complex<_Real> >
3203 : GenericNumTraits<std::complex<_Real> >
3206 typedef typename NumTraits<_Real>::Literal Literal;
3209 RequireInitialization = NumTraits<_Real>::RequireInitialization,
3210 ReadCost = 2 * NumTraits<_Real>::ReadCost,
3211 AddCost = 2 * NumTraits<Real>::AddCost,
3212 MulCost = 4 * NumTraits<Real>::MulCost + 2 * NumTraits<Real>::AddCost
3215 static inline Real epsilon() { return NumTraits<Real>::epsilon(); }
3217 static inline Real dummy_precision() { return NumTraits<Real>::dummy_precision(); }
3219 static inline int digits10() { return NumTraits<Real>::digits10(); }
3221 template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
3222 struct NumTraits<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
3224 typedef Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> ArrayType;
3225 typedef typename NumTraits<Scalar>::Real RealScalar;
3226 typedef Array<RealScalar, Rows, Cols, Options, MaxRows, MaxCols> Real;
3227 typedef typename NumTraits<Scalar>::NonInteger NonIntegerScalar;
3228 typedef Array<NonIntegerScalar, Rows, Cols, Options, MaxRows, MaxCols> NonInteger;
3229 typedef ArrayType & Nested;
3230 typedef typename NumTraits<Scalar>::Literal Literal;
3232 IsComplex = NumTraits<Scalar>::IsComplex,
3233 IsInteger = NumTraits<Scalar>::IsInteger,
3234 IsSigned = NumTraits<Scalar>::IsSigned,
3235 RequireInitialization = 1,
3236 ReadCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::ReadCost,
3237 AddCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::AddCost,
3238 MulCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::MulCost
3241 static inline RealScalar epsilon() { return NumTraits<RealScalar>::epsilon(); }
3243 static inline RealScalar dummy_precision() { return NumTraits<RealScalar>::dummy_precision(); }
3244 static inline int digits10() { return NumTraits<Scalar>::digits10(); }
3246 template<> struct NumTraits<std::string>
3247 : GenericNumTraits<std::string>
3250 RequireInitialization = 1,
3251 ReadCost = HugeCost,
3255 static inline int digits10() { return 0; }
3257 static inline std::string epsilon();
3258 static inline std::string dummy_precision();
3259 static inline std::string lowest();
3260 static inline std::string highest();
3261 static inline std::string infinity();
3262 static inline std::string quiet_NaN();
3264 template<> struct NumTraits<void> {};
3267 // end #include "src/Core/NumTraits.h"
3268 // #include "src/Core/MathFunctions.h"
3269 #ifndef EIGEN_MATHFUNCTIONS_H
3270 #define EIGEN_MATHFUNCTIONS_H
3271 #define EIGEN_PI 3.141592653589793238462643383279502884197169399375105820974944592307816406L
3273 #if EIGEN_OS_WINCE && EIGEN_COMP_MSVC && EIGEN_COMP_MSVC<=1500
3274 long abs(long x) { return (labs(x)); }
3275 double abs(double x) { return (fabs(x)); }
3276 float abs(float x) { return (fabsf(x)); }
3277 long double abs(long double x) { return (fabsl(x)); }
3279 namespace internal {
3280 template<typename T, typename dummy = void>
3281 struct global_math_functions_filtering_base
3285 template<typename T> struct always_void { typedef void type; };
3286 template<typename T>
3287 struct global_math_functions_filtering_base
3289 typename always_void<typename T::Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl>::type
3292 typedef typename T::Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl type;
3294 #define EIGEN_MATHFUNC_IMPL(func, scalar) Eigen::internal::func##_impl<typename Eigen::internal::global_math_functions_filtering_base<scalar>::type>
3295 #define EIGEN_MATHFUNC_RETVAL(func, scalar) typename Eigen::internal::func##_retval<typename Eigen::internal::global_math_functions_filtering_base<scalar>::type>::type
3296 template<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
3297 struct real_default_impl
3299 typedef typename NumTraits<Scalar>::Real RealScalar;
3301 static inline RealScalar run(const Scalar& x)
3306 template<typename Scalar>
3307 struct real_default_impl<Scalar,true>
3309 typedef typename NumTraits<Scalar>::Real RealScalar;
3311 static inline RealScalar run(const Scalar& x)
3317 template<typename Scalar> struct real_impl : real_default_impl<Scalar> {};
3318 #ifdef __CUDA_ARCH__
3319 template<typename T>
3320 struct real_impl<std::complex<T> >
3322 typedef T RealScalar;
3324 static inline T run(const std::complex<T>& x)
3330 template<typename Scalar>
3333 typedef typename NumTraits<Scalar>::Real type;
3335 template<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
3336 struct imag_default_impl
3338 typedef typename NumTraits<Scalar>::Real RealScalar;
3340 static inline RealScalar run(const Scalar&)
3342 return RealScalar(0);
3345 template<typename Scalar>
3346 struct imag_default_impl<Scalar,true>
3348 typedef typename NumTraits<Scalar>::Real RealScalar;
3350 static inline RealScalar run(const Scalar& x)
3356 template<typename Scalar> struct imag_impl : imag_default_impl<Scalar> {};
3357 #ifdef __CUDA_ARCH__
3358 template<typename T>
3359 struct imag_impl<std::complex<T> >
3361 typedef T RealScalar;
3363 static inline T run(const std::complex<T>& x)
3369 template<typename Scalar>
3372 typedef typename NumTraits<Scalar>::Real type;
3374 template<typename Scalar>
3375 struct real_ref_impl
3377 typedef typename NumTraits<Scalar>::Real RealScalar;
3379 static inline RealScalar& run(Scalar& x)
3381 return reinterpret_cast<RealScalar*>(&x)[0];
3384 static inline const RealScalar& run(const Scalar& x)
3386 return reinterpret_cast<const RealScalar*>(&x)[0];
3389 template<typename Scalar>
3390 struct real_ref_retval
3392 typedef typename NumTraits<Scalar>::Real & type;
3394 template<typename Scalar, bool IsComplex>
3395 struct imag_ref_default_impl
3397 typedef typename NumTraits<Scalar>::Real RealScalar;
3399 static inline RealScalar& run(Scalar& x)
3401 return reinterpret_cast<RealScalar*>(&x)[1];
3404 static inline const RealScalar& run(const Scalar& x)
3406 return reinterpret_cast<RealScalar*>(&x)[1];
3409 template<typename Scalar>
3410 struct imag_ref_default_impl<Scalar, false>
3413 static inline Scalar run(Scalar&)
3418 static inline const Scalar run(const Scalar&)
3423 template<typename Scalar>
3424 struct imag_ref_impl : imag_ref_default_impl<Scalar, NumTraits<Scalar>::IsComplex> {};
3425 template<typename Scalar>
3426 struct imag_ref_retval
3428 typedef typename NumTraits<Scalar>::Real & type;
3430 template<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
3434 static inline Scalar run(const Scalar& x)
3439 template<typename Scalar>
3440 struct conj_impl<Scalar,true>
3443 static inline Scalar run(const Scalar& x)
3449 template<typename Scalar>
3452 typedef Scalar type;
3454 template<typename Scalar,bool IsComplex>
3455 struct abs2_impl_default
3457 typedef typename NumTraits<Scalar>::Real RealScalar;
3459 static inline RealScalar run(const Scalar& x)
3464 template<typename Scalar>
3465 struct abs2_impl_default<Scalar, true>
3467 typedef typename NumTraits<Scalar>::Real RealScalar;
3469 static inline RealScalar run(const Scalar& x)
3471 return real(x)*real(x) + imag(x)*imag(x);
3474 template<typename Scalar>
3477 typedef typename NumTraits<Scalar>::Real RealScalar;
3479 static inline RealScalar run(const Scalar& x)
3481 return abs2_impl_default<Scalar,NumTraits<Scalar>::IsComplex>::run(x);
3484 template<typename Scalar>
3487 typedef typename NumTraits<Scalar>::Real type;
3489 template<typename Scalar, bool IsComplex>
3490 struct norm1_default_impl
3492 typedef typename NumTraits<Scalar>::Real RealScalar;
3494 static inline RealScalar run(const Scalar& x)
3496 EIGEN_USING_STD_MATH(abs);
3497 return abs(real(x)) + abs(imag(x));
3500 template<typename Scalar>
3501 struct norm1_default_impl<Scalar, false>
3504 static inline Scalar run(const Scalar& x)
3506 EIGEN_USING_STD_MATH(abs);
3510 template<typename Scalar>
3511 struct norm1_impl : norm1_default_impl<Scalar, NumTraits<Scalar>::IsComplex> {};
3512 template<typename Scalar>
3515 typedef typename NumTraits<Scalar>::Real type;
3517 template<typename Scalar>
3520 typedef typename NumTraits<Scalar>::Real RealScalar;
3521 static inline RealScalar run(const Scalar& x, const Scalar& y)
3523 EIGEN_USING_STD_MATH(abs);
3524 EIGEN_USING_STD_MATH(sqrt);
3525 RealScalar _x = abs(x);
3526 RealScalar _y = abs(y);
3538 if(p==RealScalar(0)) return RealScalar(0);
3539 return p * sqrt(RealScalar(1) + qp*qp);
3542 template<typename Scalar>
3545 typedef typename NumTraits<Scalar>::Real type;
3547 template<typename OldType, typename NewType>
3551 static inline NewType run(const OldType& x)
3553 return static_cast<NewType>(x);
3556 template<typename OldType, typename NewType>
3558 inline NewType cast(const OldType& x)
3560 return cast_impl<OldType, NewType>::run(x);
3562 #if EIGEN_HAS_CXX11_MATH
3563 template<typename Scalar>
3565 static inline Scalar run(const Scalar& x)
3567 EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL)
3573 template<typename Scalar>
3576 static inline Scalar run(const Scalar& x)
3578 EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL)
3579 EIGEN_USING_STD_MATH(floor);
3580 EIGEN_USING_STD_MATH(ceil);
3581 return (x > Scalar(0)) ? floor(x + Scalar(0.5)) : ceil(x - Scalar(0.5));
3585 template<typename Scalar>
3588 typedef Scalar type;
3590 #if EIGEN_HAS_CXX11_MATH
3591 template<typename Scalar>
3593 static inline Scalar run(const Scalar& x)
3595 EIGEN_USING_STD_MATH(arg);
3600 template<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
3601 struct arg_default_impl
3603 typedef typename NumTraits<Scalar>::Real RealScalar;
3605 static inline RealScalar run(const Scalar& x)
3607 return (x < Scalar(0)) ? Scalar(EIGEN_PI) : Scalar(0); }
3609 template<typename Scalar>
3610 struct arg_default_impl<Scalar,true>
3612 typedef typename NumTraits<Scalar>::Real RealScalar;
3614 static inline RealScalar run(const Scalar& x)
3616 EIGEN_USING_STD_MATH(arg);
3620 template<typename Scalar> struct arg_impl : arg_default_impl<Scalar> {};
3622 template<typename Scalar>
3625 typedef typename NumTraits<Scalar>::Real type;
3627 namespace std_fallback {
3628 template<typename Scalar>
3629 EIGEN_DEVICE_FUNC inline Scalar log1p(const Scalar& x) {
3630 EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
3631 typedef typename NumTraits<Scalar>::Real RealScalar;
3632 EIGEN_USING_STD_MATH(log);
3633 Scalar x1p = RealScalar(1) + x;
3634 return ( x1p == Scalar(1) ) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) );
3637 template<typename Scalar>
3639 static inline Scalar run(const Scalar& x)
3641 EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
3642 #if EIGEN_HAS_CXX11_MATH
3645 using std_fallback::log1p;
3649 template<typename Scalar>
3652 typedef Scalar type;
3654 template<typename ScalarX,typename ScalarY, bool IsInteger = NumTraits<ScalarX>::IsInteger&&NumTraits<ScalarY>::IsInteger>
3657 typedef typename ScalarBinaryOpTraits<ScalarX,ScalarY,internal::scalar_pow_op<ScalarX,ScalarY> >::ReturnType result_type;
3658 static EIGEN_DEVICE_FUNC inline result_type run(const ScalarX& x, const ScalarY& y)
3660 EIGEN_USING_STD_MATH(pow);
3664 template<typename ScalarX,typename ScalarY>
3665 struct pow_impl<ScalarX,ScalarY, true>
3667 typedef ScalarX result_type;
3668 static EIGEN_DEVICE_FUNC inline ScalarX run(ScalarX x, ScalarY y)
3671 eigen_assert(!NumTraits<ScalarY>::IsSigned || y >= 0);
3683 template<typename Scalar,
3686 struct random_default_impl {};
3687 template<typename Scalar>
3688 struct random_impl : random_default_impl<Scalar, NumTraits<Scalar>::IsComplex, NumTraits<Scalar>::IsInteger> {};
3689 template<typename Scalar>
3690 struct random_retval
3692 typedef Scalar type;
3694 template<typename Scalar> inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random(const Scalar& x, const Scalar& y);
3695 template<typename Scalar> inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random();
3696 template<typename Scalar>
3697 struct random_default_impl<Scalar, false, false>
3699 static inline Scalar run(const Scalar& x, const Scalar& y)
3701 return x + (y-x) * Scalar(std::rand()) / Scalar(RAND_MAX);
3703 static inline Scalar run()
3705 return run(Scalar(NumTraits<Scalar>::IsSigned ? -1 : 0), Scalar(1));
3709 meta_floor_log2_terminate,
3710 meta_floor_log2_move_up,
3711 meta_floor_log2_move_down,
3712 meta_floor_log2_bogus
3714 template<unsigned int n, int lower, int upper> struct meta_floor_log2_selector
3716 enum { middle = (lower + upper) / 2,
3717 value = (upper <= lower + 1) ? int(meta_floor_log2_terminate)
3718 : (n < (1 << middle)) ? int(meta_floor_log2_move_down)
3719 : (n==0) ? int(meta_floor_log2_bogus)
3720 : int(meta_floor_log2_move_up)
3723 template<unsigned int n,
3725 int upper = sizeof(unsigned int) * CHAR_BIT - 1,
3726 int selector = meta_floor_log2_selector<n, lower, upper>::value>
3727 struct meta_floor_log2 {};
3728 template<unsigned int n, int lower, int upper>
3729 struct meta_floor_log2<n, lower, upper, meta_floor_log2_move_down>
3731 enum { value = meta_floor_log2<n, lower, meta_floor_log2_selector<n, lower, upper>::middle>::value };
3733 template<unsigned int n, int lower, int upper>
3734 struct meta_floor_log2<n, lower, upper, meta_floor_log2_move_up>
3736 enum { value = meta_floor_log2<n, meta_floor_log2_selector<n, lower, upper>::middle, upper>::value };
3738 template<unsigned int n, int lower, int upper>
3739 struct meta_floor_log2<n, lower, upper, meta_floor_log2_terminate>
3741 enum { value = (n >= ((unsigned int)(1) << (lower+1))) ? lower+1 : lower };
3743 template<unsigned int n, int lower, int upper>
3744 struct meta_floor_log2<n, lower, upper, meta_floor_log2_bogus>
3747 template<typename Scalar>
3748 struct random_default_impl<Scalar, false, true>
3750 static inline Scalar run(const Scalar& x, const Scalar& y)
3752 typedef typename conditional<NumTraits<Scalar>::IsSigned,std::ptrdiff_t,std::size_t>::type ScalarX;
3755 std::size_t range = ScalarX(y)-ScalarX(x);
3756 std::size_t offset = 0;
3757 std::size_t divisor = 1;
3758 std::size_t multiplier = 1;
3759 if(range<RAND_MAX) divisor = (std::size_t(RAND_MAX)+1)/(range+1);
3760 else multiplier = 1 + range/(std::size_t(RAND_MAX)+1);
3762 offset = (std::size_t(std::rand()) * multiplier) / divisor;
3763 } while (offset > range);
3764 return Scalar(ScalarX(x) + offset);
3766 static inline Scalar run()
3768 #ifdef EIGEN_MAKING_DOCS
3769 return run(Scalar(NumTraits<Scalar>::IsSigned ? -10 : 0), Scalar(10));
3771 enum { rand_bits = meta_floor_log2<(unsigned int)(RAND_MAX)+1>::value,
3772 scalar_bits = sizeof(Scalar) * CHAR_BIT,
3773 shift = EIGEN_PLAIN_ENUM_MAX(0, int(rand_bits) - int(scalar_bits)),
3774 offset = NumTraits<Scalar>::IsSigned ? (1 << (EIGEN_PLAIN_ENUM_MIN(rand_bits,scalar_bits)-1)) : 0
3776 return Scalar((std::rand() >> shift) - offset);
3780 template<typename Scalar>
3781 struct random_default_impl<Scalar, true, false>
3783 static inline Scalar run(const Scalar& x, const Scalar& y)
3785 return Scalar(random(real(x), real(y)),
3786 random(imag(x), imag(y)));
3788 static inline Scalar run()
3790 typedef typename NumTraits<Scalar>::Real RealScalar;
3791 return Scalar(random<RealScalar>(), random<RealScalar>());
3794 template<typename Scalar>
3795 inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random(const Scalar& x, const Scalar& y)
3797 return EIGEN_MATHFUNC_IMPL(random, Scalar)::run(x, y);
3799 template<typename Scalar>
3800 inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random()
3802 return EIGEN_MATHFUNC_IMPL(random, Scalar)::run();
3804 #if (EIGEN_HAS_CXX11_MATH && !(EIGEN_COMP_GNUC_STRICT && __FINITE_MATH_ONLY__)) || (EIGEN_COMP_MSVC>=1800) || (EIGEN_COMP_CLANG)
3805 #define EIGEN_USE_STD_FPCLASSIFY 1
3807 #define EIGEN_USE_STD_FPCLASSIFY 0
3809 template<typename T>
3811 typename internal::enable_if<internal::is_integral<T>::value,bool>::type
3812 isnan_impl(const T&) { return false; }
3813 template<typename T>
3815 typename internal::enable_if<internal::is_integral<T>::value,bool>::type
3816 isinf_impl(const T&) { return false; }
3817 template<typename T>
3819 typename internal::enable_if<internal::is_integral<T>::value,bool>::type
3820 isfinite_impl(const T&) { return true; }
3821 template<typename T>
3823 typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type
3824 isfinite_impl(const T& x)
3826 #ifdef __CUDA_ARCH__
3827 return (::isfinite)(x);
3828 #elif EIGEN_USE_STD_FPCLASSIFY
3829 using std::isfinite;
3830 return isfinite EIGEN_NOT_A_MACRO (x);
3832 return x<=NumTraits<T>::highest() && x>=NumTraits<T>::lowest();
3835 template<typename T>
3837 typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type
3838 isinf_impl(const T& x)
3840 #ifdef __CUDA_ARCH__
3841 return (::isinf)(x);
3842 #elif EIGEN_USE_STD_FPCLASSIFY
3844 return isinf EIGEN_NOT_A_MACRO (x);
3846 return x>NumTraits<T>::highest() || x<NumTraits<T>::lowest();
3849 template<typename T>
3851 typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type
3852 isnan_impl(const T& x)
3854 #ifdef __CUDA_ARCH__
3855 return (::isnan)(x);
3856 #elif EIGEN_USE_STD_FPCLASSIFY
3858 return isnan EIGEN_NOT_A_MACRO (x);
3863 #if (!EIGEN_USE_STD_FPCLASSIFY)
3865 template<typename T> EIGEN_DEVICE_FUNC bool isinf_msvc_helper(T x)
3867 return _fpclass(x)==_FPCLASS_NINF || _fpclass(x)==_FPCLASS_PINF;
3869 EIGEN_DEVICE_FUNC inline bool isnan_impl(const long double& x) { return _isnan(x)!=0; }
3870 EIGEN_DEVICE_FUNC inline bool isnan_impl(const double& x) { return _isnan(x)!=0; }
3871 EIGEN_DEVICE_FUNC inline bool isnan_impl(const float& x) { return _isnan(x)!=0; }
3872 EIGEN_DEVICE_FUNC inline bool isinf_impl(const long double& x) { return isinf_msvc_helper(x); }
3873 EIGEN_DEVICE_FUNC inline bool isinf_impl(const double& x) { return isinf_msvc_helper(x); }
3874 EIGEN_DEVICE_FUNC inline bool isinf_impl(const float& x) { return isinf_msvc_helper(x); }
3875 #elif (defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ && EIGEN_COMP_GNUC)
3876 #if EIGEN_GNUC_AT_LEAST(5,0)
3877 #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC inline __attribute__((optimize("no-finite-math-only")))
3879 #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC inline __attribute__((noinline,optimize("no-finite-math-only")))
3881 template<> EIGEN_TMP_NOOPT_ATTRIB bool isnan_impl(const long double& x) { return __builtin_isnan(x); }
3882 template<> EIGEN_TMP_NOOPT_ATTRIB bool isnan_impl(const double& x) { return __builtin_isnan(x); }
3883 template<> EIGEN_TMP_NOOPT_ATTRIB bool isnan_impl(const float& x) { return __builtin_isnan(x); }
3884 template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const double& x) { return __builtin_isinf(x); }
3885 template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const float& x) { return __builtin_isinf(x); }
3886 template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const long double& x) { return __builtin_isinf(x); }
3887 #undef EIGEN_TMP_NOOPT_ATTRIB
3890 template<typename T> EIGEN_DEVICE_FUNC bool isfinite_impl(const std::complex<T>& x);
3891 template<typename T> EIGEN_DEVICE_FUNC bool isnan_impl(const std::complex<T>& x);
3892 template<typename T> EIGEN_DEVICE_FUNC bool isinf_impl(const std::complex<T>& x);
3893 template<typename T> T generic_fast_tanh_float(const T& a_x);
3896 #ifndef __CUDA_ARCH__
3897 template<typename T>
3899 EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y)
3901 EIGEN_USING_STD_MATH(min);
3902 return min EIGEN_NOT_A_MACRO (x,y);
3904 template<typename T>
3906 EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y)
3908 EIGEN_USING_STD_MATH(max);
3909 return max EIGEN_NOT_A_MACRO (x,y);
3912 template<typename T>
3914 EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y)
3916 return y < x ? y : x;
3920 EIGEN_ALWAYS_INLINE float mini(const float& x, const float& y)
3924 template<typename T>
3926 EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y)
3928 return x < y ? y : x;
3932 EIGEN_ALWAYS_INLINE float maxi(const float& x, const float& y)
3937 template<typename Scalar>
3939 inline EIGEN_MATHFUNC_RETVAL(real, Scalar) real(const Scalar& x)
3941 return EIGEN_MATHFUNC_IMPL(real, Scalar)::run(x);
3943 template<typename Scalar>
3945 inline typename internal::add_const_on_value_type< EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) >::type real_ref(const Scalar& x)
3947 return internal::real_ref_impl<Scalar>::run(x);
3949 template<typename Scalar>
3951 inline EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) real_ref(Scalar& x)
3953 return EIGEN_MATHFUNC_IMPL(real_ref, Scalar)::run(x);
3955 template<typename Scalar>
3957 inline EIGEN_MATHFUNC_RETVAL(imag, Scalar) imag(const Scalar& x)
3959 return EIGEN_MATHFUNC_IMPL(imag, Scalar)::run(x);
3961 template<typename Scalar>
3963 inline EIGEN_MATHFUNC_RETVAL(arg, Scalar) arg(const Scalar& x)
3965 return EIGEN_MATHFUNC_IMPL(arg, Scalar)::run(x);
3967 template<typename Scalar>
3969 inline typename internal::add_const_on_value_type< EIGEN_MATHFUNC_RETVAL(imag_ref, Scalar) >::type imag_ref(const Scalar& x)
3971 return internal::imag_ref_impl<Scalar>::run(x);
3973 template<typename Scalar>
3975 inline EIGEN_MATHFUNC_RETVAL(imag_ref, Scalar) imag_ref(Scalar& x)
3977 return EIGEN_MATHFUNC_IMPL(imag_ref, Scalar)::run(x);
3979 template<typename Scalar>
3981 inline EIGEN_MATHFUNC_RETVAL(conj, Scalar) conj(const Scalar& x)
3983 return EIGEN_MATHFUNC_IMPL(conj, Scalar)::run(x);
3985 template<typename Scalar>
3987 inline EIGEN_MATHFUNC_RETVAL(abs2, Scalar) abs2(const Scalar& x)
3989 return EIGEN_MATHFUNC_IMPL(abs2, Scalar)::run(x);
3991 template<typename Scalar>
3993 inline EIGEN_MATHFUNC_RETVAL(norm1, Scalar) norm1(const Scalar& x)
3995 return EIGEN_MATHFUNC_IMPL(norm1, Scalar)::run(x);
3997 template<typename Scalar>
3999 inline EIGEN_MATHFUNC_RETVAL(hypot, Scalar) hypot(const Scalar& x, const Scalar& y)
4001 return EIGEN_MATHFUNC_IMPL(hypot, Scalar)::run(x, y);
4003 template<typename Scalar>
4005 inline EIGEN_MATHFUNC_RETVAL(log1p, Scalar) log1p(const Scalar& x)
4007 return EIGEN_MATHFUNC_IMPL(log1p, Scalar)::run(x);
4010 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4011 float log1p(const float &x) { return ::log1pf(x); }
4012 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4013 double log1p(const double &x) { return ::log1p(x); }
4015 template<typename ScalarX,typename ScalarY>
4017 inline typename internal::pow_impl<ScalarX,ScalarY>::result_type pow(const ScalarX& x, const ScalarY& y)
4019 return internal::pow_impl<ScalarX,ScalarY>::run(x, y);
4021 template<typename T> EIGEN_DEVICE_FUNC bool (isnan) (const T &x) { return internal::isnan_impl(x); }
4022 template<typename T> EIGEN_DEVICE_FUNC bool (isinf) (const T &x) { return internal::isinf_impl(x); }
4023 template<typename T> EIGEN_DEVICE_FUNC bool (isfinite)(const T &x) { return internal::isfinite_impl(x); }
4024 template<typename Scalar>
4026 inline EIGEN_MATHFUNC_RETVAL(round, Scalar) round(const Scalar& x)
4028 return EIGEN_MATHFUNC_IMPL(round, Scalar)::run(x);
4030 template<typename T>
4032 T (floor)(const T& x)
4034 EIGEN_USING_STD_MATH(floor);
4038 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4039 float floor(const float &x) { return ::floorf(x); }
4040 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4041 double floor(const double &x) { return ::floor(x); }
4043 template<typename T>
4045 T (ceil)(const T& x)
4047 EIGEN_USING_STD_MATH(ceil);
4051 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4052 float ceil(const float &x) { return ::ceilf(x); }
4053 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4054 double ceil(const double &x) { return ::ceil(x); }
4056 inline int log2(int x)
4060 static const int table[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
4066 return table[(v * 0x07C4ACDDU) >> 27];
4068 template<typename T>
4069 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4072 EIGEN_USING_STD_MATH(sqrt);
4075 template<typename T>
4076 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4078 EIGEN_USING_STD_MATH(log);
4082 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4083 float log(const float &x) { return ::logf(x); }
4084 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4085 double log(const double &x) { return ::log(x); }
4087 template<typename T>
4088 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4089 typename internal::enable_if<NumTraits<T>::IsSigned || NumTraits<T>::IsComplex,typename NumTraits<T>::Real>::type
4091 EIGEN_USING_STD_MATH(abs);
4094 template<typename T>
4095 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4096 typename internal::enable_if<!(NumTraits<T>::IsSigned || NumTraits<T>::IsComplex),typename NumTraits<T>::Real>::type
4100 #if defined(__SYCL_DEVICE_ONLY__)
4101 EIGEN_ALWAYS_INLINE float abs(float x) { return cl::sycl::fabs(x); }
4102 EIGEN_ALWAYS_INLINE double abs(double x) { return cl::sycl::fabs(x); }
4105 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4106 float abs(const float &x) { return ::fabsf(x); }
4107 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4108 double abs(const double &x) { return ::fabs(x); }
4109 template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4110 float abs(const std::complex<float>& x) {
4111 return ::hypotf(x.real(), x.imag());
4113 template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4114 double abs(const std::complex<double>& x) {
4115 return ::hypot(x.real(), x.imag());
4118 template<typename T>
4119 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4121 EIGEN_USING_STD_MATH(exp);
4125 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4126 float exp(const float &x) { return ::expf(x); }
4127 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4128 double exp(const double &x) { return ::exp(x); }
4130 template<typename T>
4131 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4133 EIGEN_USING_STD_MATH(cos);
4137 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4138 float cos(const float &x) { return ::cosf(x); }
4139 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4140 double cos(const double &x) { return ::cos(x); }
4142 template<typename T>
4143 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4145 EIGEN_USING_STD_MATH(sin);
4149 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4150 float sin(const float &x) { return ::sinf(x); }
4151 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4152 double sin(const double &x) { return ::sin(x); }
4154 template<typename T>
4155 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4157 EIGEN_USING_STD_MATH(tan);
4161 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4162 float tan(const float &x) { return ::tanf(x); }
4163 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4164 double tan(const double &x) { return ::tan(x); }
4166 template<typename T>
4167 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4168 T acos(const T &x) {
4169 EIGEN_USING_STD_MATH(acos);
4173 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4174 float acos(const float &x) { return ::acosf(x); }
4175 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4176 double acos(const double &x) { return ::acos(x); }
4178 template<typename T>
4179 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4180 T asin(const T &x) {
4181 EIGEN_USING_STD_MATH(asin);
4185 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4186 float asin(const float &x) { return ::asinf(x); }
4187 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4188 double asin(const double &x) { return ::asin(x); }
4190 template<typename T>
4191 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4192 T atan(const T &x) {
4193 EIGEN_USING_STD_MATH(atan);
4197 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4198 float atan(const float &x) { return ::atanf(x); }
4199 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4200 double atan(const double &x) { return ::atan(x); }
4202 template<typename T>
4203 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4204 T cosh(const T &x) {
4205 EIGEN_USING_STD_MATH(cosh);
4209 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4210 float cosh(const float &x) { return ::coshf(x); }
4211 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4212 double cosh(const double &x) { return ::cosh(x); }
4214 template<typename T>
4215 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4216 T sinh(const T &x) {
4217 EIGEN_USING_STD_MATH(sinh);
4221 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4222 float sinh(const float &x) { return ::sinhf(x); }
4223 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4224 double sinh(const double &x) { return ::sinh(x); }
4226 template<typename T>
4227 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4228 T tanh(const T &x) {
4229 EIGEN_USING_STD_MATH(tanh);
4232 #if (!defined(__CUDACC__)) && EIGEN_FAST_MATH
4233 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4234 float tanh(float x) { return internal::generic_fast_tanh_float(x); }
4237 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4238 float tanh(const float &x) { return ::tanhf(x); }
4239 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4240 double tanh(const double &x) { return ::tanh(x); }
4242 template <typename T>
4243 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4244 T fmod(const T& a, const T& b) {
4245 EIGEN_USING_STD_MATH(fmod);
4250 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4251 float fmod(const float& a, const float& b) {
4252 return ::fmodf(a, b);
4255 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
4256 double fmod(const double& a, const double& b) {
4257 return ::fmod(a, b);
4261 namespace internal {
4262 template<typename T>
4263 EIGEN_DEVICE_FUNC bool isfinite_impl(const std::complex<T>& x)
4265 return (numext::isfinite)(numext::real(x)) && (numext::isfinite)(numext::imag(x));
4267 template<typename T>
4268 EIGEN_DEVICE_FUNC bool isnan_impl(const std::complex<T>& x)
4270 return (numext::isnan)(numext::real(x)) || (numext::isnan)(numext::imag(x));
4272 template<typename T>
4273 EIGEN_DEVICE_FUNC bool isinf_impl(const std::complex<T>& x)
4275 return ((numext::isinf)(numext::real(x)) || (numext::isinf)(numext::imag(x))) && (!(numext::isnan)(x));
4277 template<typename Scalar,
4280 struct scalar_fuzzy_default_impl {};
4281 template<typename Scalar>
4282 struct scalar_fuzzy_default_impl<Scalar, false, false>
4284 typedef typename NumTraits<Scalar>::Real RealScalar;
4285 template<typename OtherScalar> EIGEN_DEVICE_FUNC
4286 static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, const RealScalar& prec)
4288 return numext::abs(x) <= numext::abs(y) * prec;
4291 static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec)
4293 return numext::abs(x - y) <= numext::mini(numext::abs(x), numext::abs(y)) * prec;
4296 static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar& prec)
4298 return x <= y || isApprox(x, y, prec);
4301 template<typename Scalar>
4302 struct scalar_fuzzy_default_impl<Scalar, false, true>
4304 typedef typename NumTraits<Scalar>::Real RealScalar;
4305 template<typename OtherScalar> EIGEN_DEVICE_FUNC
4306 static inline bool isMuchSmallerThan(const Scalar& x, const Scalar&, const RealScalar&)
4308 return x == Scalar(0);
4311 static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar&)
4316 static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar&)
4321 template<typename Scalar>
4322 struct scalar_fuzzy_default_impl<Scalar, true, false>
4324 typedef typename NumTraits<Scalar>::Real RealScalar;
4325 template<typename OtherScalar> EIGEN_DEVICE_FUNC
4326 static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, const RealScalar& prec)
4328 return numext::abs2(x) <= numext::abs2(y) * prec * prec;
4331 static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec)
4333 return numext::abs2(x - y) <= numext::mini(numext::abs2(x), numext::abs2(y)) * prec * prec;
4336 template<typename Scalar>
4337 struct scalar_fuzzy_impl : scalar_fuzzy_default_impl<Scalar, NumTraits<Scalar>::IsComplex, NumTraits<Scalar>::IsInteger> {};
4338 template<typename Scalar, typename OtherScalar> EIGEN_DEVICE_FUNC
4339 inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y,
4340 const typename NumTraits<Scalar>::Real &precision = NumTraits<Scalar>::dummy_precision())
4342 return scalar_fuzzy_impl<Scalar>::template isMuchSmallerThan<OtherScalar>(x, y, precision);
4344 template<typename Scalar> EIGEN_DEVICE_FUNC
4345 inline bool isApprox(const Scalar& x, const Scalar& y,
4346 const typename NumTraits<Scalar>::Real &precision = NumTraits<Scalar>::dummy_precision())
4348 return scalar_fuzzy_impl<Scalar>::isApprox(x, y, precision);
4350 template<typename Scalar> EIGEN_DEVICE_FUNC
4351 inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y,
4352 const typename NumTraits<Scalar>::Real &precision = NumTraits<Scalar>::dummy_precision())
4354 return scalar_fuzzy_impl<Scalar>::isApproxOrLessThan(x, y, precision);
4356 template<> struct random_impl<bool>
4358 static inline bool run()
4360 return random<int>(0,1)==0 ? false : true;
4363 template<> struct scalar_fuzzy_impl<bool>
4365 typedef bool RealScalar;
4366 template<typename OtherScalar> EIGEN_DEVICE_FUNC
4367 static inline bool isMuchSmallerThan(const bool& x, const bool&, const bool&)
4372 static inline bool isApprox(bool x, bool y, bool)
4377 static inline bool isApproxOrLessThan(const bool& x, const bool& y, const bool&)
4385 // end #include "src/Core/MathFunctions.h"
4386 // #include "src/Core/GenericPacketMath.h"
4387 #ifndef EIGEN_GENERIC_PACKET_MATH_H
4388 #define EIGEN_GENERIC_PACKET_MATH_H
4390 namespace internal {
4391 #ifndef EIGEN_DEBUG_ALIGNED_LOAD
4392 #define EIGEN_DEBUG_ALIGNED_LOAD
4394 #ifndef EIGEN_DEBUG_UNALIGNED_LOAD
4395 #define EIGEN_DEBUG_UNALIGNED_LOAD
4397 #ifndef EIGEN_DEBUG_ALIGNED_STORE
4398 #define EIGEN_DEBUG_ALIGNED_STORE
4400 #ifndef EIGEN_DEBUG_UNALIGNED_STORE
4401 #define EIGEN_DEBUG_UNALIGNED_STORE
4403 struct default_packet_traits
4451 template<typename T> struct packet_traits : default_packet_traits
4458 AlignedOnScalar = 0,
4474 template<typename T> struct packet_traits<const T> : packet_traits<T> { };
4475 template <typename Src, typename Tgt> struct type_casting_traits {
4482 template <typename SrcPacket, typename TgtPacket>
4483 EIGEN_DEVICE_FUNC inline TgtPacket
4484 pcast(const SrcPacket& a) {
4485 return static_cast<TgtPacket>(a);
4487 template <typename SrcPacket, typename TgtPacket>
4488 EIGEN_DEVICE_FUNC inline TgtPacket
4489 pcast(const SrcPacket& a, const SrcPacket& ) {
4490 return static_cast<TgtPacket>(a);
4492 template <typename SrcPacket, typename TgtPacket>
4493 EIGEN_DEVICE_FUNC inline TgtPacket
4494 pcast(const SrcPacket& a, const SrcPacket& , const SrcPacket& , const SrcPacket& ) {
4495 return static_cast<TgtPacket>(a);
4497 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
4498 padd(const Packet& a,
4499 const Packet& b) { return a+b; }
4500 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
4501 psub(const Packet& a,
4502 const Packet& b) { return a-b; }
4503 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
4504 pnegate(const Packet& a) { return -a; }
4505 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
4506 pconj(const Packet& a) { return numext::conj(a); }
4507 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
4508 pmul(const Packet& a,
4509 const Packet& b) { return a*b; }
4510 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
4511 pdiv(const Packet& a,
4512 const Packet& b) { return a/b; }
4513 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
4514 pmin(const Packet& a,
4515 const Packet& b) { return numext::mini(a, b); }
4516 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
4517 pmax(const Packet& a,
4518 const Packet& b) { return numext::maxi(a, b); }
4519 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
4520 pabs(const Packet& a) { using std::abs; return abs(a); }
4521 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
4522 parg(const Packet& a) { using numext::arg; return arg(a); }
4523 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
4524 pand(const Packet& a, const Packet& b) { return a & b; }
4525 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
4526 por(const Packet& a, const Packet& b) { return a | b; }
4527 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
4528 pxor(const Packet& a, const Packet& b) { return a ^ b; }
4529 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
4530 pandnot(const Packet& a, const Packet& b) { return a & (!b); }
4531 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
4532 pload(const typename unpacket_traits<Packet>::type* from) { return *from; }
4533 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
4534 ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; }
4535 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
4536 pset1(const typename unpacket_traits<Packet>::type& a) { return a; }
4537 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
4538 pload1(const typename unpacket_traits<Packet>::type *a) { return pset1<Packet>(*a); }
4539 template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
4540 ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }
4541 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
4542 ploadquad(const typename unpacket_traits<Packet>::type* from)
4543 { return pload1<Packet>(from); }
4544 template<typename Packet> EIGEN_DEVICE_FUNC
4545 inline void pbroadcast4(const typename unpacket_traits<Packet>::type *a,
4546 Packet& a0, Packet& a1, Packet& a2, Packet& a3)
4548 a0 = pload1<Packet>(a+0);
4549 a1 = pload1<Packet>(a+1);
4550 a2 = pload1<Packet>(a+2);
4551 a3 = pload1<Packet>(a+3);
4553 template<typename Packet> EIGEN_DEVICE_FUNC
4554 inline void pbroadcast2(const typename unpacket_traits<Packet>::type *a,
4555 Packet& a0, Packet& a1)
4557 a0 = pload1<Packet>(a+0);
4558 a1 = pload1<Packet>(a+1);
4560 template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
4561 plset(const typename unpacket_traits<Packet>::type& a) { return a; }
4562 template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from)
4564 template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from)
4566 template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, Index )
4567 { return ploadu<Packet>(from); }
4568 template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, Index )
4569 { pstore(to, from); }
4570 template<typename Scalar> EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr)
4572 #ifdef __CUDA_ARCH__
4573 #if defined(__LP64__)
4574 asm(" prefetch.L1 [ %1 ];" : "=l"(addr) : "l"(addr));
4576 asm(" prefetch.L1 [ %1 ];" : "=r"(addr) : "r"(addr));
4578 #elif (!EIGEN_COMP_MSVC) && (EIGEN_COMP_GNUC || EIGEN_COMP_CLANG || EIGEN_COMP_ICC)
4579 __builtin_prefetch(addr);
4582 template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type pfirst(const Packet& a)
4584 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
4585 preduxp(const Packet* vecs) { return vecs[0]; }
4586 template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux(const Packet& a)
4588 template<typename Packet> EIGEN_DEVICE_FUNC inline
4589 typename conditional<(unpacket_traits<Packet>::size%8)==0,typename unpacket_traits<Packet>::half,Packet>::type
4590 predux_downto4(const Packet& a)
4592 template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a)
4594 template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(const Packet& a)
4596 template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(const Packet& a)
4598 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a)
4600 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a)
4602 return Packet(imag(a),real(a));
4604 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
4605 Packet psin(const Packet& a) { using std::sin; return sin(a); }
4606 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
4607 Packet pcos(const Packet& a) { using std::cos; return cos(a); }
4608 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
4609 Packet ptan(const Packet& a) { using std::tan; return tan(a); }
4610 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
4611 Packet pasin(const Packet& a) { using std::asin; return asin(a); }
4612 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
4613 Packet pacos(const Packet& a) { using std::acos; return acos(a); }
4614 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
4615 Packet patan(const Packet& a) { using std::atan; return atan(a); }
4616 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
4617 Packet psinh(const Packet& a) { using std::sinh; return sinh(a); }
4618 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
4619 Packet pcosh(const Packet& a) { using std::cosh; return cosh(a); }
4620 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
4621 Packet ptanh(const Packet& a) { using std::tanh; return tanh(a); }
4622 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
4623 Packet pexp(const Packet& a) { using std::exp; return exp(a); }
4624 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
4625 Packet plog(const Packet& a) { using std::log; return log(a); }
4626 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
4627 Packet plog1p(const Packet& a) { return numext::log1p(a); }
4628 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
4629 Packet plog10(const Packet& a) { using std::log10; return log10(a); }
4630 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
4631 Packet psqrt(const Packet& a) { using std::sqrt; return sqrt(a); }
4632 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
4633 Packet prsqrt(const Packet& a) {
4634 return pdiv(pset1<Packet>(1), psqrt(a));
4636 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
4637 Packet pround(const Packet& a) { using numext::round; return round(a); }
4638 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
4639 Packet pfloor(const Packet& a) { using numext::floor; return floor(a); }
4640 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
4641 Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); }
4642 template<typename Packet>
4643 inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename unpacket_traits<Packet>::type& a)
4645 pstore(to, pset1<Packet>(a));
4647 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
4648 pmadd(const Packet& a,
4651 { return padd(pmul(a, b),c); }
4652 template<typename Packet, int Alignment>
4653 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits<Packet>::type* from)
4655 if(Alignment >= unpacket_traits<Packet>::alignment)
4656 return pload<Packet>(from);
4658 return ploadu<Packet>(from);
4660 template<typename Scalar, typename Packet, int Alignment>
4661 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from)
4663 if(Alignment >= unpacket_traits<Packet>::alignment)
4668 template<typename Packet, int LoadMode>
4669 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from)
4671 return ploadt<Packet, LoadMode>(from);
4673 template<int Offset,typename PacketType>
4676 static inline void run(PacketType&, const PacketType&) {}
4678 template<int Offset,typename PacketType>
4679 inline void palign(PacketType& first, const PacketType& second)
4681 palign_impl<Offset,PacketType>::run(first,second);
4684 template<> inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b)
4685 { return std::complex<float>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
4686 template<> inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b)
4687 { return std::complex<double>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
4689 template <typename Packet,int N=unpacket_traits<Packet>::size> struct PacketBlock {
4692 template<typename Packet> EIGEN_DEVICE_FUNC inline void
4693 ptranspose(PacketBlock<Packet,1>& ) {
4695 template <size_t N> struct Selector {
4698 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
4699 pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket, const Packet& thenPacket, const Packet& elsePacket) {
4700 return ifPacket.select[0] ? thenPacket : elsePacket;
4702 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
4703 pinsertfirst(const Packet& a, typename unpacket_traits<Packet>::type b)
4705 Selector<unpacket_traits<Packet>::size> mask;
4706 mask.select[0] = true;
4707 for(Index i=1; i<unpacket_traits<Packet>::size; ++i)
4708 mask.select[i] = false;
4709 return pblend(mask, pset1<Packet>(b), a);
4711 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
4712 pinsertlast(const Packet& a, typename unpacket_traits<Packet>::type b)
4714 Selector<unpacket_traits<Packet>::size> mask;
4715 for(Index i=0; i<unpacket_traits<Packet>::size-1; ++i)
4716 mask.select[i] = false;
4717 mask.select[unpacket_traits<Packet>::size-1] = true;
4718 return pblend(mask, pset1<Packet>(b), a);
4723 // end #include "src/Core/GenericPacketMath.h"
4724 #if defined EIGEN_VECTORIZE_AVX512
4725 // #include "src/Core/arch/SSE/PacketMath.h"
4726 #ifndef EIGEN_PACKET_MATH_SSE_H
4727 #define EIGEN_PACKET_MATH_SSE_H
4729 namespace internal {
4730 #ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
4731 #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
4733 #ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
4734 #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
4737 #ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
4738 #define EIGEN_HAS_SINGLE_INSTRUCTION_MADD 1
4741 #if (defined EIGEN_VECTORIZE_AVX) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_MINGW) && (__GXX_ABI_VERSION < 1004)
4742 template<typename T>
4743 struct eigen_packet_wrapper
4745 EIGEN_ALWAYS_INLINE operator T&() { return m_val; }
4746 EIGEN_ALWAYS_INLINE operator const T&() const { return m_val; }
4747 EIGEN_ALWAYS_INLINE eigen_packet_wrapper() {}
4748 EIGEN_ALWAYS_INLINE eigen_packet_wrapper(const T &v) : m_val(v) {}
4749 EIGEN_ALWAYS_INLINE eigen_packet_wrapper& operator=(const T &v) {
4755 typedef eigen_packet_wrapper<__m128> Packet4f;
4756 typedef eigen_packet_wrapper<__m128i> Packet4i;
4757 typedef eigen_packet_wrapper<__m128d> Packet2d;
4759 typedef __m128 Packet4f;
4760 typedef __m128i Packet4i;
4761 typedef __m128d Packet2d;
4763 template<> struct is_arithmetic<__m128> { enum { value = true }; };
4764 template<> struct is_arithmetic<__m128i> { enum { value = true }; };
4765 template<> struct is_arithmetic<__m128d> { enum { value = true }; };
4766 #define vec4f_swizzle1(v,p,q,r,s) \
4767 (_mm_castsi128_ps(_mm_shuffle_epi32( _mm_castps_si128(v), ((s)<<6|(r)<<4|(q)<<2|(p)))))
4768 #define vec4i_swizzle1(v,p,q,r,s) \
4769 (_mm_shuffle_epi32( v, ((s)<<6|(r)<<4|(q)<<2|(p))))
4770 #define vec2d_swizzle1(v,p,q) \
4771 (_mm_castsi128_pd(_mm_shuffle_epi32( _mm_castpd_si128(v), ((q*2+1)<<6|(q*2)<<4|(p*2+1)<<2|(p*2)))))
4772 #define vec4f_swizzle2(a,b,p,q,r,s) \
4773 (_mm_shuffle_ps( (a), (b), ((s)<<6|(r)<<4|(q)<<2|(p))))
4774 #define vec4i_swizzle2(a,b,p,q,r,s) \
4775 (_mm_castps_si128( (_mm_shuffle_ps( _mm_castsi128_ps(a), _mm_castsi128_ps(b), ((s)<<6|(r)<<4|(q)<<2|(p))))))
4776 #define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
4777 const Packet4f p4f_##NAME = pset1<Packet4f>(X)
4778 #define _EIGEN_DECLARE_CONST_Packet2d(NAME,X) \
4779 const Packet2d p2d_##NAME = pset1<Packet2d>(X)
4780 #define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
4781 const Packet4f p4f_##NAME = _mm_castsi128_ps(pset1<Packet4i>(X))
4782 #define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
4783 const Packet4i p4i_##NAME = pset1<Packet4i>(X)
4784 #ifndef EIGEN_VECTORIZE_AVX
4785 template<> struct packet_traits<float> : default_packet_traits
4787 typedef Packet4f type;
4788 typedef Packet4f half;
4791 AlignedOnScalar = 1,
4795 HasSin = EIGEN_FAST_MATH,
4796 HasCos = EIGEN_FAST_MATH,
4801 HasTanh = EIGEN_FAST_MATH,
4803 #ifdef EIGEN_VECTORIZE_SSE4_1
4811 template<> struct packet_traits<double> : default_packet_traits
4813 typedef Packet2d type;
4814 typedef Packet2d half;
4817 AlignedOnScalar = 1,
4825 #ifdef EIGEN_VECTORIZE_SSE4_1
4834 template<> struct packet_traits<int> : default_packet_traits
4836 typedef Packet4i type;
4837 typedef Packet4i half;
4840 AlignedOnScalar = 1,
4845 template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; };
4846 template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
4847 template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
4848 #ifndef EIGEN_VECTORIZE_AVX
4849 template<> struct scalar_div_cost<float,true> { enum { value = 7 }; };
4850 template<> struct scalar_div_cost<double,true> { enum { value = 8 }; };
4852 #if EIGEN_COMP_MSVC==1500
4853 template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set_ps(from,from,from,from); }
4854 template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set_pd(from,from); }
4855 template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set_epi32(from,from,from,from); }
4857 template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set_ps1(from); }
4858 template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set1_pd(from); }
4859 template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set1_epi32(from); }
4861 #if EIGEN_COMP_GNUC_STRICT && (!defined __AVX__)
4862 template<> EIGEN_STRONG_INLINE Packet4f pload1<Packet4f>(const float *from) {
4863 return vec4f_swizzle1(_mm_load_ss(from),0,0,0,0);
4866 template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) { return _mm_add_ps(pset1<Packet4f>(a), _mm_set_ps(3,2,1,0)); }
4867 template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) { return _mm_add_pd(pset1<Packet2d>(a),_mm_set_pd(1,0)); }
4868 template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) { return _mm_add_epi32(pset1<Packet4i>(a),_mm_set_epi32(3,2,1,0)); }
4869 template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_add_ps(a,b); }
4870 template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_add_pd(a,b); }
4871 template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_add_epi32(a,b); }
4872 template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_sub_ps(a,b); }
4873 template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_sub_pd(a,b); }
4874 template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_sub_epi32(a,b); }
4875 template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a)
4877 const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));
4878 return _mm_xor_ps(a,mask);
4880 template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a)
4882 const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x80000000,0x0,0x80000000));
4883 return _mm_xor_pd(a,mask);
4885 template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a)
4887 return psub(Packet4i(_mm_setr_epi32(0,0,0,0)), a);
4889 template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
4890 template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
4891 template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
4892 template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_mul_ps(a,b); }
4893 template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_mul_pd(a,b); }
4894 template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b)
4896 #ifdef EIGEN_VECTORIZE_SSE4_1
4897 return _mm_mullo_epi32(a,b);
4899 return vec4i_swizzle1(
4902 _mm_mul_epu32(vec4i_swizzle1(a,1,0,3,2),
4903 vec4i_swizzle1(b,1,0,3,2)),
4908 template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_div_ps(a,b); }
4909 template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_div_pd(a,b); }
4910 template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd(pmul(a,b), c); }
4912 template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return _mm_fmadd_ps(a,b,c); }
4913 template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return _mm_fmadd_pd(a,b,c); }
4915 template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_min_ps(a,b); }
4916 template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_min_pd(a,b); }
4917 template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b)
4919 #ifdef EIGEN_VECTORIZE_SSE4_1
4920 return _mm_min_epi32(a,b);
4922 Packet4i mask = _mm_cmplt_epi32(a,b);
4923 return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
4926 template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_max_ps(a,b); }
4927 template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_max_pd(a,b); }
4928 template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b)
4930 #ifdef EIGEN_VECTORIZE_SSE4_1
4931 return _mm_max_epi32(a,b);
4933 Packet4i mask = _mm_cmpgt_epi32(a,b);
4934 return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
4937 #ifdef EIGEN_VECTORIZE_SSE4_1
4938 template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a) { return _mm_round_ps(a, 0); }
4939 template<> EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) { return _mm_round_pd(a, 0); }
4940 template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a) { return _mm_ceil_ps(a); }
4941 template<> EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a) { return _mm_ceil_pd(a); }
4942 template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a) { return _mm_floor_ps(a); }
4943 template<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) { return _mm_floor_pd(a); }
4945 template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_and_ps(a,b); }
4946 template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_and_pd(a,b); }
4947 template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_and_si128(a,b); }
4948 template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_or_ps(a,b); }
4949 template<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_or_pd(a,b); }
4950 template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_or_si128(a,b); }
4951 template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_xor_ps(a,b); }
4952 template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_xor_pd(a,b); }
4953 template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_xor_si128(a,b); }
4954 template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_andnot_ps(a,b); }
4955 template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_andnot_pd(a,b); }
4956 template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_andnot_si128(a,b); }
4957 template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_ps(from); }
4958 template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_pd(from); }
4959 template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(reinterpret_cast<const __m128i*>(from)); }
4961 template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) {
4962 EIGEN_DEBUG_UNALIGNED_LOAD
4963 #if (EIGEN_COMP_MSVC==1600)
4964 __m128 res = _mm_loadl_pi(_mm_set1_ps(0.0f), (const __m64*)(from));
4965 res = _mm_loadh_pi(res, (const __m64*)(from+2));
4968 return _mm_loadu_ps(from);
4972 template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
4974 EIGEN_DEBUG_UNALIGNED_LOAD
4975 return _mm_loadu_ps(from);
4978 template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from)
4980 EIGEN_DEBUG_UNALIGNED_LOAD
4981 return _mm_loadu_pd(from);
4983 template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
4985 EIGEN_DEBUG_UNALIGNED_LOAD
4986 return _mm_loadu_si128(reinterpret_cast<const __m128i*>(from));
4988 template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
4990 return vec4f_swizzle1(_mm_castpd_ps(_mm_load_sd(reinterpret_cast<const double*>(from))), 0, 0, 1, 1);
4992 template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
4993 { return pset1<Packet2d>(from[0]); }
4994 template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
4997 tmp = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(from));
4998 return vec4i_swizzle1(tmp, 0, 0, 1, 1);
5000 template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_ps(to, from); }
5001 template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_pd(to, from); }
5002 template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<__m128i*>(to), from); }
5003 template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_pd(to, from); }
5004 template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_ps(to, from); }
5005 template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from); }
5006 template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)
5008 return _mm_set_ps(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
5010 template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride)
5012 return _mm_set_pd(from[1*stride], from[0*stride]);
5014 template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride)
5016 return _mm_set_epi32(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
5018 template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride)
5020 to[stride*0] = _mm_cvtss_f32(from);
5021 to[stride*1] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 1));
5022 to[stride*2] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 2));
5023 to[stride*3] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 3));
5025 template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride)
5027 to[stride*0] = _mm_cvtsd_f64(from);
5028 to[stride*1] = _mm_cvtsd_f64(_mm_shuffle_pd(from, from, 1));
5030 template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride)
5032 to[stride*0] = _mm_cvtsi128_si32(from);
5033 to[stride*1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 1));
5034 to[stride*2] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 2));
5035 to[stride*3] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 3));
5037 template<> EIGEN_STRONG_INLINE void pstore1<Packet4f>(float* to, const float& a)
5039 Packet4f pa = _mm_set_ss(a);
5040 pstore(to, Packet4f(vec4f_swizzle1(pa,0,0,0,0)));
5042 template<> EIGEN_STRONG_INLINE void pstore1<Packet2d>(double* to, const double& a)
5044 Packet2d pa = _mm_set_sd(a);
5045 pstore(to, Packet2d(vec2d_swizzle1(pa,0,0)));
5047 #ifndef EIGEN_VECTORIZE_AVX
5048 template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
5049 template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
5050 template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
5052 #if EIGEN_COMP_MSVC_STRICT && EIGEN_OS_WIN64
5053 template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { return a.m128_f32[0]; }
5054 template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return a.m128d_f64[0]; }
5055 template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; }
5056 #elif EIGEN_COMP_MSVC_STRICT
5057 template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float x = _mm_cvtss_f32(a); return x; }
5058 template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double x = _mm_cvtsd_f64(a); return x; }
5059 template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; }
5061 template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { return _mm_cvtss_f32(a); }
5062 template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return _mm_cvtsd_f64(a); }
5063 template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { return _mm_cvtsi128_si32(a); }
5065 template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a)
5066 { return _mm_shuffle_ps(a,a,0x1B); }
5067 template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a)
5068 { return _mm_shuffle_pd(a,a,0x1); }
5069 template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a)
5070 { return _mm_shuffle_epi32(a,0x1B); }
5071 template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a)
5073 const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF));
5074 return _mm_and_ps(a,mask);
5076 template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a)
5078 const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF));
5079 return _mm_and_pd(a,mask);
5081 template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a)
5083 #ifdef EIGEN_VECTORIZE_SSSE3
5084 return _mm_abs_epi32(a);
5086 Packet4i aux = _mm_srai_epi32(a,31);
5087 return _mm_sub_epi32(_mm_xor_si128(a,aux),aux);
5091 template<> EIGEN_STRONG_INLINE void
5092 pbroadcast4<Packet4f>(const float *a,
5093 Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
5095 a3 = pload<Packet4f>(a);
5096 a0 = vec4f_swizzle1(a3, 0,0,0,0);
5097 a1 = vec4f_swizzle1(a3, 1,1,1,1);
5098 a2 = vec4f_swizzle1(a3, 2,2,2,2);
5099 a3 = vec4f_swizzle1(a3, 3,3,3,3);
5101 template<> EIGEN_STRONG_INLINE void
5102 pbroadcast4<Packet2d>(const double *a,
5103 Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)
5105 #ifdef EIGEN_VECTORIZE_SSE3
5106 a0 = _mm_loaddup_pd(a+0);
5107 a1 = _mm_loaddup_pd(a+1);
5108 a2 = _mm_loaddup_pd(a+2);
5109 a3 = _mm_loaddup_pd(a+3);
5111 a1 = pload<Packet2d>(a);
5112 a0 = vec2d_swizzle1(a1, 0,0);
5113 a1 = vec2d_swizzle1(a1, 1,1);
5114 a3 = pload<Packet2d>(a+2);
5115 a2 = vec2d_swizzle1(a3, 0,0);
5116 a3 = vec2d_swizzle1(a3, 1,1);
5120 EIGEN_STRONG_INLINE void punpackp(Packet4f* vecs)
5122 vecs[1] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x55));
5123 vecs[2] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0xAA));
5124 vecs[3] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0xFF));
5125 vecs[0] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x00));
5127 #ifdef EIGEN_VECTORIZE_SSE3
5128 template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
5130 return _mm_hadd_ps(_mm_hadd_ps(vecs[0], vecs[1]),_mm_hadd_ps(vecs[2], vecs[3]));
5132 template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
5134 return _mm_hadd_pd(vecs[0], vecs[1]);
5137 template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
5139 Packet4f tmp0, tmp1, tmp2;
5140 tmp0 = _mm_unpacklo_ps(vecs[0], vecs[1]);
5141 tmp1 = _mm_unpackhi_ps(vecs[0], vecs[1]);
5142 tmp2 = _mm_unpackhi_ps(vecs[2], vecs[3]);
5143 tmp0 = _mm_add_ps(tmp0, tmp1);
5144 tmp1 = _mm_unpacklo_ps(vecs[2], vecs[3]);
5145 tmp1 = _mm_add_ps(tmp1, tmp2);
5146 tmp2 = _mm_movehl_ps(tmp1, tmp0);
5147 tmp0 = _mm_movelh_ps(tmp0, tmp1);
5148 return _mm_add_ps(tmp0, tmp2);
5150 template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
5152 return _mm_add_pd(_mm_unpacklo_pd(vecs[0], vecs[1]), _mm_unpackhi_pd(vecs[0], vecs[1]));
5155 template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
5157 Packet4f tmp = _mm_add_ps(a, _mm_movehl_ps(a,a));
5158 return pfirst<Packet4f>(_mm_add_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
5160 template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
5162 return pfirst<Packet2d>(_mm_add_sd(a, _mm_unpackhi_pd(a,a)));
5164 #ifdef EIGEN_VECTORIZE_SSSE3
5165 template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
5167 return _mm_hadd_epi32(_mm_hadd_epi32(vecs[0], vecs[1]),_mm_hadd_epi32(vecs[2], vecs[3]));
5169 template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
5171 Packet4i tmp0 = _mm_hadd_epi32(a,a);
5172 return pfirst<Packet4i>(_mm_hadd_epi32(tmp0,tmp0));
5175 template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
5177 Packet4i tmp = _mm_add_epi32(a, _mm_unpackhi_epi64(a,a));
5178 return pfirst(tmp) + pfirst<Packet4i>(_mm_shuffle_epi32(tmp, 1));
5180 template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
5182 Packet4i tmp0, tmp1, tmp2;
5183 tmp0 = _mm_unpacklo_epi32(vecs[0], vecs[1]);
5184 tmp1 = _mm_unpackhi_epi32(vecs[0], vecs[1]);
5185 tmp2 = _mm_unpackhi_epi32(vecs[2], vecs[3]);
5186 tmp0 = _mm_add_epi32(tmp0, tmp1);
5187 tmp1 = _mm_unpacklo_epi32(vecs[2], vecs[3]);
5188 tmp1 = _mm_add_epi32(tmp1, tmp2);
5189 tmp2 = _mm_unpacklo_epi64(tmp0, tmp1);
5190 tmp0 = _mm_unpackhi_epi64(tmp0, tmp1);
5191 return _mm_add_epi32(tmp0, tmp2);
5194 template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
5196 Packet4f tmp = _mm_mul_ps(a, _mm_movehl_ps(a,a));
5197 return pfirst<Packet4f>(_mm_mul_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
5199 template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)
5201 return pfirst<Packet2d>(_mm_mul_sd(a, _mm_unpackhi_pd(a,a)));
5203 template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
5205 EIGEN_ALIGN16 int aux[4];
5207 return (aux[0] * aux[1]) * (aux[2] * aux[3]);;
5209 template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
5211 Packet4f tmp = _mm_min_ps(a, _mm_movehl_ps(a,a));
5212 return pfirst<Packet4f>(_mm_min_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
5214 template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)
5216 return pfirst<Packet2d>(_mm_min_sd(a, _mm_unpackhi_pd(a,a)));
5218 template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
5220 #ifdef EIGEN_VECTORIZE_SSE4_1
5221 Packet4i tmp = _mm_min_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0,0,3,2)));
5222 return pfirst<Packet4i>(_mm_min_epi32(tmp,_mm_shuffle_epi32(tmp, 1)));
5224 EIGEN_ALIGN16 int aux[4];
5226 int aux0 = aux[0]<aux[1] ? aux[0] : aux[1];
5227 int aux2 = aux[2]<aux[3] ? aux[2] : aux[3];
5228 return aux0<aux2 ? aux0 : aux2;
5231 template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
5233 Packet4f tmp = _mm_max_ps(a, _mm_movehl_ps(a,a));
5234 return pfirst<Packet4f>(_mm_max_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
5236 template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
5238 return pfirst<Packet2d>(_mm_max_sd(a, _mm_unpackhi_pd(a,a)));
5240 template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
5242 #ifdef EIGEN_VECTORIZE_SSE4_1
5243 Packet4i tmp = _mm_max_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0,0,3,2)));
5244 return pfirst<Packet4i>(_mm_max_epi32(tmp,_mm_shuffle_epi32(tmp, 1)));
5246 EIGEN_ALIGN16 int aux[4];
5248 int aux0 = aux[0]>aux[1] ? aux[0] : aux[1];
5249 int aux2 = aux[2]>aux[3] ? aux[2] : aux[3];
5250 return aux0>aux2 ? aux0 : aux2;
5255 #ifdef EIGEN_VECTORIZE_SSSE3
5256 template<int Offset>
5257 struct palign_impl<Offset,Packet4f>
5259 static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
5262 first = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(second), _mm_castps_si128(first), Offset*4));
5265 template<int Offset>
5266 struct palign_impl<Offset,Packet4i>
5268 static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
5271 first = _mm_alignr_epi8(second,first, Offset*4);
5274 template<int Offset>
5275 struct palign_impl<Offset,Packet2d>
5277 static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
5280 first = _mm_castsi128_pd(_mm_alignr_epi8(_mm_castpd_si128(second), _mm_castpd_si128(first), 8));
5284 template<int Offset>
5285 struct palign_impl<Offset,Packet4f>
5287 static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
5291 first = _mm_move_ss(first,second);
5292 first = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(first),0x39));
5296 first = _mm_movehl_ps(first,first);
5297 first = _mm_movelh_ps(first,second);
5301 first = _mm_move_ss(first,second);
5302 first = _mm_shuffle_ps(first,second,0x93);
5306 template<int Offset>
5307 struct palign_impl<Offset,Packet4i>
5309 static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
5313 first = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(first),_mm_castsi128_ps(second)));
5314 first = _mm_shuffle_epi32(first,0x39);
5318 first = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(first)));
5319 first = _mm_castps_si128(_mm_movelh_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(second)));
5323 first = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(first),_mm_castsi128_ps(second)));
5324 first = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(second),0x93));
5328 template<int Offset>
5329 struct palign_impl<Offset,Packet2d>
5331 static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
5335 first = _mm_castps_pd(_mm_movehl_ps(_mm_castpd_ps(first),_mm_castpd_ps(first)));
5336 first = _mm_castps_pd(_mm_movelh_ps(_mm_castpd_ps(first),_mm_castpd_ps(second)));
5341 EIGEN_DEVICE_FUNC inline void
5342 ptranspose(PacketBlock<Packet4f,4>& kernel) {
5343 _MM_TRANSPOSE4_PS(kernel.packet[0], kernel.packet[1], kernel.packet[2], kernel.packet[3]);
5345 EIGEN_DEVICE_FUNC inline void
5346 ptranspose(PacketBlock<Packet2d,2>& kernel) {
5347 __m128d tmp = _mm_unpackhi_pd(kernel.packet[0], kernel.packet[1]);
5348 kernel.packet[0] = _mm_unpacklo_pd(kernel.packet[0], kernel.packet[1]);
5349 kernel.packet[1] = tmp;
5351 EIGEN_DEVICE_FUNC inline void
5352 ptranspose(PacketBlock<Packet4i,4>& kernel) {
5353 __m128i T0 = _mm_unpacklo_epi32(kernel.packet[0], kernel.packet[1]);
5354 __m128i T1 = _mm_unpacklo_epi32(kernel.packet[2], kernel.packet[3]);
5355 __m128i T2 = _mm_unpackhi_epi32(kernel.packet[0], kernel.packet[1]);
5356 __m128i T3 = _mm_unpackhi_epi32(kernel.packet[2], kernel.packet[3]);
5357 kernel.packet[0] = _mm_unpacklo_epi64(T0, T1);
5358 kernel.packet[1] = _mm_unpackhi_epi64(T0, T1);
5359 kernel.packet[2] = _mm_unpacklo_epi64(T2, T3);
5360 kernel.packet[3] = _mm_unpackhi_epi64(T2, T3);
5362 template<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) {
5363 const __m128i zero = _mm_setzero_si128();
5364 const __m128i select = _mm_set_epi32(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
5365 __m128i false_mask = _mm_cmpeq_epi32(select, zero);
5366 #ifdef EIGEN_VECTORIZE_SSE4_1
5367 return _mm_blendv_epi8(thenPacket, elsePacket, false_mask);
5369 return _mm_or_si128(_mm_andnot_si128(false_mask, thenPacket), _mm_and_si128(false_mask, elsePacket));
5372 template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket) {
5373 const __m128 zero = _mm_setzero_ps();
5374 const __m128 select = _mm_set_ps(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
5375 __m128 false_mask = _mm_cmpeq_ps(select, zero);
5376 #ifdef EIGEN_VECTORIZE_SSE4_1
5377 return _mm_blendv_ps(thenPacket, elsePacket, false_mask);
5379 return _mm_or_ps(_mm_andnot_ps(false_mask, thenPacket), _mm_and_ps(false_mask, elsePacket));
5382 template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) {
5383 const __m128d zero = _mm_setzero_pd();
5384 const __m128d select = _mm_set_pd(ifPacket.select[1], ifPacket.select[0]);
5385 __m128d false_mask = _mm_cmpeq_pd(select, zero);
5386 #ifdef EIGEN_VECTORIZE_SSE4_1
5387 return _mm_blendv_pd(thenPacket, elsePacket, false_mask);
5389 return _mm_or_pd(_mm_andnot_pd(false_mask, thenPacket), _mm_and_pd(false_mask, elsePacket));
5392 template<> EIGEN_STRONG_INLINE Packet4f pinsertfirst(const Packet4f& a, float b)
5394 #ifdef EIGEN_VECTORIZE_SSE4_1
5395 return _mm_blend_ps(a,pset1<Packet4f>(b),1);
5397 return _mm_move_ss(a, _mm_load_ss(&b));
5400 template<> EIGEN_STRONG_INLINE Packet2d pinsertfirst(const Packet2d& a, double b)
5402 #ifdef EIGEN_VECTORIZE_SSE4_1
5403 return _mm_blend_pd(a,pset1<Packet2d>(b),1);
5405 return _mm_move_sd(a, _mm_load_sd(&b));
5408 template<> EIGEN_STRONG_INLINE Packet4f pinsertlast(const Packet4f& a, float b)
5410 #ifdef EIGEN_VECTORIZE_SSE4_1
5411 return _mm_blend_ps(a,pset1<Packet4f>(b),(1<<3));
5413 const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x0,0x0,0x0,0xFFFFFFFF));
5414 return _mm_or_ps(_mm_andnot_ps(mask, a), _mm_and_ps(mask, pset1<Packet4f>(b)));
5417 template<> EIGEN_STRONG_INLINE Packet2d pinsertlast(const Packet2d& a, double b)
5419 #ifdef EIGEN_VECTORIZE_SSE4_1
5420 return _mm_blend_pd(a,pset1<Packet2d>(b),(1<<1));
5422 const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x0,0xFFFFFFFF,0xFFFFFFFF));
5423 return _mm_or_pd(_mm_andnot_pd(mask, a), _mm_and_pd(mask, pset1<Packet2d>(b)));
5427 template<> EIGEN_STRONG_INLINE float pmadd(const float& a, const float& b, const float& c) {
5428 return ::fmaf(a,b,c);
5430 template<> EIGEN_STRONG_INLINE double pmadd(const double& a, const double& b, const double& c) {
5431 return ::fma(a,b,c);
5437 // end #include "src/Core/arch/SSE/PacketMath.h"
5438 // #include "src/Core/arch/AVX/PacketMath.h"
5439 #ifndef EIGEN_PACKET_MATH_AVX_H
5440 #define EIGEN_PACKET_MATH_AVX_H
5442 namespace internal {
5443 #ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
5444 #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
5446 #ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
5447 #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
5450 #ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
5451 #define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
5454 typedef __m256 Packet8f;
5455 typedef __m256i Packet8i;
5456 typedef __m256d Packet4d;
5457 template<> struct is_arithmetic<__m256> { enum { value = true }; };
5458 template<> struct is_arithmetic<__m256i> { enum { value = true }; };
5459 template<> struct is_arithmetic<__m256d> { enum { value = true }; };
5460 #define _EIGEN_DECLARE_CONST_Packet8f(NAME,X) \
5461 const Packet8f p8f_##NAME = pset1<Packet8f>(X)
5462 #define _EIGEN_DECLARE_CONST_Packet4d(NAME,X) \
5463 const Packet4d p4d_##NAME = pset1<Packet4d>(X)
5464 #define _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(NAME,X) \
5465 const Packet8f p8f_##NAME = _mm256_castsi256_ps(pset1<Packet8i>(X))
5466 #define _EIGEN_DECLARE_CONST_Packet8i(NAME,X) \
5467 const Packet8i p8i_##NAME = pset1<Packet8i>(X)
5468 #ifndef EIGEN_VECTORIZE_AVX512
5469 template<> struct packet_traits<float> : default_packet_traits
5471 typedef Packet8f type;
5472 typedef Packet4f half;
5475 AlignedOnScalar = 1,
5479 HasSin = EIGEN_FAST_MATH,
5485 HasTanh = EIGEN_FAST_MATH,
5492 template<> struct packet_traits<double> : default_packet_traits
5494 typedef Packet4d type;
5495 typedef Packet2d half;
5498 AlignedOnScalar = 1,
5512 template<> struct scalar_div_cost<float,true> { enum { value = 14 }; };
5513 template<> struct scalar_div_cost<double,true> { enum { value = 16 }; };
5514 template<> struct unpacket_traits<Packet8f> { typedef float type; typedef Packet4f half; enum {size=8, alignment=Aligned32}; };
5515 template<> struct unpacket_traits<Packet4d> { typedef double type; typedef Packet2d half; enum {size=4, alignment=Aligned32}; };
5516 template<> struct unpacket_traits<Packet8i> { typedef int type; typedef Packet4i half; enum {size=8, alignment=Aligned32}; };
5517 template<> EIGEN_STRONG_INLINE Packet8f pset1<Packet8f>(const float& from) { return _mm256_set1_ps(from); }
5518 template<> EIGEN_STRONG_INLINE Packet4d pset1<Packet4d>(const double& from) { return _mm256_set1_pd(from); }
5519 template<> EIGEN_STRONG_INLINE Packet8i pset1<Packet8i>(const int& from) { return _mm256_set1_epi32(from); }
5520 template<> EIGEN_STRONG_INLINE Packet8f pload1<Packet8f>(const float* from) { return _mm256_broadcast_ss(from); }
5521 template<> EIGEN_STRONG_INLINE Packet4d pload1<Packet4d>(const double* from) { return _mm256_broadcast_sd(from); }
5522 template<> EIGEN_STRONG_INLINE Packet8f plset<Packet8f>(const float& a) { return _mm256_add_ps(_mm256_set1_ps(a), _mm256_set_ps(7.0,6.0,5.0,4.0,3.0,2.0,1.0,0.0)); }
5523 template<> EIGEN_STRONG_INLINE Packet4d plset<Packet4d>(const double& a) { return _mm256_add_pd(_mm256_set1_pd(a), _mm256_set_pd(3.0,2.0,1.0,0.0)); }
5524 template<> EIGEN_STRONG_INLINE Packet8f padd<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_add_ps(a,b); }
5525 template<> EIGEN_STRONG_INLINE Packet4d padd<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_add_pd(a,b); }
5526 template<> EIGEN_STRONG_INLINE Packet8f psub<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_sub_ps(a,b); }
5527 template<> EIGEN_STRONG_INLINE Packet4d psub<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_sub_pd(a,b); }
5528 template<> EIGEN_STRONG_INLINE Packet8f pnegate(const Packet8f& a)
5530 return _mm256_sub_ps(_mm256_set1_ps(0.0),a);
5532 template<> EIGEN_STRONG_INLINE Packet4d pnegate(const Packet4d& a)
5534 return _mm256_sub_pd(_mm256_set1_pd(0.0),a);
5536 template<> EIGEN_STRONG_INLINE Packet8f pconj(const Packet8f& a) { return a; }
5537 template<> EIGEN_STRONG_INLINE Packet4d pconj(const Packet4d& a) { return a; }
5538 template<> EIGEN_STRONG_INLINE Packet8i pconj(const Packet8i& a) { return a; }
5539 template<> EIGEN_STRONG_INLINE Packet8f pmul<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_mul_ps(a,b); }
5540 template<> EIGEN_STRONG_INLINE Packet4d pmul<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_mul_pd(a,b); }
5541 template<> EIGEN_STRONG_INLINE Packet8f pdiv<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_div_ps(a,b); }
5542 template<> EIGEN_STRONG_INLINE Packet4d pdiv<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_div_pd(a,b); }
5543 template<> EIGEN_STRONG_INLINE Packet8i pdiv<Packet8i>(const Packet8i& , const Packet8i& )
5544 { eigen_assert(false && "packet integer division are not supported by AVX");
5545 return pset1<Packet8i>(0);
5548 template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) {
5549 #if ( EIGEN_COMP_GNUC_STRICT || (EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<308)) )
5551 __asm__("vfmadd231ps %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
5554 return _mm256_fmadd_ps(a,b,c);
5557 template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) {
5558 #if ( EIGEN_COMP_GNUC_STRICT || (EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<308)) )
5560 __asm__("vfmadd231pd %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
5563 return _mm256_fmadd_pd(a,b,c);
5567 template<> EIGEN_STRONG_INLINE Packet8f pmin<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_min_ps(a,b); }
5568 template<> EIGEN_STRONG_INLINE Packet4d pmin<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_min_pd(a,b); }
5569 template<> EIGEN_STRONG_INLINE Packet8f pmax<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_max_ps(a,b); }
5570 template<> EIGEN_STRONG_INLINE Packet4d pmax<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_max_pd(a,b); }
5571 template<> EIGEN_STRONG_INLINE Packet8f pround<Packet8f>(const Packet8f& a) { return _mm256_round_ps(a, _MM_FROUND_CUR_DIRECTION); }
5572 template<> EIGEN_STRONG_INLINE Packet4d pround<Packet4d>(const Packet4d& a) { return _mm256_round_pd(a, _MM_FROUND_CUR_DIRECTION); }
5573 template<> EIGEN_STRONG_INLINE Packet8f pceil<Packet8f>(const Packet8f& a) { return _mm256_ceil_ps(a); }
5574 template<> EIGEN_STRONG_INLINE Packet4d pceil<Packet4d>(const Packet4d& a) { return _mm256_ceil_pd(a); }
5575 template<> EIGEN_STRONG_INLINE Packet8f pfloor<Packet8f>(const Packet8f& a) { return _mm256_floor_ps(a); }
5576 template<> EIGEN_STRONG_INLINE Packet4d pfloor<Packet4d>(const Packet4d& a) { return _mm256_floor_pd(a); }
5577 template<> EIGEN_STRONG_INLINE Packet8f pand<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_and_ps(a,b); }
5578 template<> EIGEN_STRONG_INLINE Packet4d pand<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_and_pd(a,b); }
5579 template<> EIGEN_STRONG_INLINE Packet8f por<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_or_ps(a,b); }
5580 template<> EIGEN_STRONG_INLINE Packet4d por<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_or_pd(a,b); }
5581 template<> EIGEN_STRONG_INLINE Packet8f pxor<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_xor_ps(a,b); }
5582 template<> EIGEN_STRONG_INLINE Packet4d pxor<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_xor_pd(a,b); }
5583 template<> EIGEN_STRONG_INLINE Packet8f pandnot<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_andnot_ps(a,b); }
5584 template<> EIGEN_STRONG_INLINE Packet4d pandnot<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_andnot_pd(a,b); }
5585 template<> EIGEN_STRONG_INLINE Packet8f pload<Packet8f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_ps(from); }
5586 template<> EIGEN_STRONG_INLINE Packet4d pload<Packet4d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_pd(from); }
5587 template<> EIGEN_STRONG_INLINE Packet8i pload<Packet8i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_si256(reinterpret_cast<const __m256i*>(from)); }
5588 template<> EIGEN_STRONG_INLINE Packet8f ploadu<Packet8f>(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_ps(from); }
5589 template<> EIGEN_STRONG_INLINE Packet4d ploadu<Packet4d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_pd(from); }
5590 template<> EIGEN_STRONG_INLINE Packet8i ploadu<Packet8i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_si256(reinterpret_cast<const __m256i*>(from)); }
5591 template<> EIGEN_STRONG_INLINE Packet8f ploaddup<Packet8f>(const float* from)
5593 Packet8f tmp = _mm256_broadcast_ps((const __m128*)(const void*)from);
5594 tmp = _mm256_blend_ps(tmp,_mm256_castps128_ps256(_mm_permute_ps( _mm256_castps256_ps128(tmp), _MM_SHUFFLE(1,0,1,0))), 15);
5595 return _mm256_permute_ps(tmp, _MM_SHUFFLE(3,3,2,2));
5597 template<> EIGEN_STRONG_INLINE Packet4d ploaddup<Packet4d>(const double* from)
5599 Packet4d tmp = _mm256_broadcast_pd((const __m128d*)(const void*)from);
5600 return _mm256_permute_pd(tmp, 3<<2);
5602 template<> EIGEN_STRONG_INLINE Packet8f ploadquad<Packet8f>(const float* from)
5604 Packet8f tmp = _mm256_castps128_ps256(_mm_broadcast_ss(from));
5605 return _mm256_insertf128_ps(tmp, _mm_broadcast_ss(from+1), 1);
5607 template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet8f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_store_ps(to, from); }
5608 template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet4d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_store_pd(to, from); }
5609 template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet8i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_storeu_si256(reinterpret_cast<__m256i*>(to), from); }
5610 template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet8f& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_ps(to, from); }
5611 template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet4d& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_pd(to, from); }
5612 template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet8i& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_si256(reinterpret_cast<__m256i*>(to), from); }
5613 template<> EIGEN_DEVICE_FUNC inline Packet8f pgather<float, Packet8f>(const float* from, Index stride)
5615 return _mm256_set_ps(from[7*stride], from[6*stride], from[5*stride], from[4*stride],
5616 from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
5618 template<> EIGEN_DEVICE_FUNC inline Packet4d pgather<double, Packet4d>(const double* from, Index stride)
5620 return _mm256_set_pd(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
5622 template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet8f>(float* to, const Packet8f& from, Index stride)
5624 __m128 low = _mm256_extractf128_ps(from, 0);
5625 to[stride*0] = _mm_cvtss_f32(low);
5626 to[stride*1] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 1));
5627 to[stride*2] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 2));
5628 to[stride*3] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 3));
5629 __m128 high = _mm256_extractf128_ps(from, 1);
5630 to[stride*4] = _mm_cvtss_f32(high);
5631 to[stride*5] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 1));
5632 to[stride*6] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 2));
5633 to[stride*7] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 3));
5635 template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet4d>(double* to, const Packet4d& from, Index stride)
5637 __m128d low = _mm256_extractf128_pd(from, 0);
5638 to[stride*0] = _mm_cvtsd_f64(low);
5639 to[stride*1] = _mm_cvtsd_f64(_mm_shuffle_pd(low, low, 1));
5640 __m128d high = _mm256_extractf128_pd(from, 1);
5641 to[stride*2] = _mm_cvtsd_f64(high);
5642 to[stride*3] = _mm_cvtsd_f64(_mm_shuffle_pd(high, high, 1));
5644 template<> EIGEN_STRONG_INLINE void pstore1<Packet8f>(float* to, const float& a)
5646 Packet8f pa = pset1<Packet8f>(a);
5649 template<> EIGEN_STRONG_INLINE void pstore1<Packet4d>(double* to, const double& a)
5651 Packet4d pa = pset1<Packet4d>(a);
5654 template<> EIGEN_STRONG_INLINE void pstore1<Packet8i>(int* to, const int& a)
5656 Packet8i pa = pset1<Packet8i>(a);
5659 #ifndef EIGEN_VECTORIZE_AVX512
5660 template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
5661 template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
5662 template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
5664 template<> EIGEN_STRONG_INLINE float pfirst<Packet8f>(const Packet8f& a) {
5665 return _mm_cvtss_f32(_mm256_castps256_ps128(a));
5667 template<> EIGEN_STRONG_INLINE double pfirst<Packet4d>(const Packet4d& a) {
5668 return _mm_cvtsd_f64(_mm256_castpd256_pd128(a));
5670 template<> EIGEN_STRONG_INLINE int pfirst<Packet8i>(const Packet8i& a) {
5671 return _mm_cvtsi128_si32(_mm256_castsi256_si128(a));
5673 template<> EIGEN_STRONG_INLINE Packet8f preverse(const Packet8f& a)
5675 __m256 tmp = _mm256_shuffle_ps(a,a,0x1b);
5676 return _mm256_permute2f128_ps(tmp, tmp, 1);
5678 template<> EIGEN_STRONG_INLINE Packet4d preverse(const Packet4d& a)
5680 __m256d tmp = _mm256_shuffle_pd(a,a,5);
5681 return _mm256_permute2f128_pd(tmp, tmp, 1);
5682 __m256d swap_halves = _mm256_permute2f128_pd(a,a,1);
5683 return _mm256_permute_pd(swap_halves,5);
5685 template<> EIGEN_STRONG_INLINE Packet8f pabs(const Packet8f& a)
5687 const Packet8f mask = _mm256_castsi256_ps(_mm256_setr_epi32(0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF));
5688 return _mm256_and_ps(a,mask);
5690 template<> EIGEN_STRONG_INLINE Packet4d pabs(const Packet4d& a)
5692 const Packet4d mask = _mm256_castsi256_pd(_mm256_setr_epi32(0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF));
5693 return _mm256_and_pd(a,mask);
5695 template<> EIGEN_STRONG_INLINE Packet8f preduxp<Packet8f>(const Packet8f* vecs)
5697 __m256 hsum1 = _mm256_hadd_ps(vecs[0], vecs[1]);
5698 __m256 hsum2 = _mm256_hadd_ps(vecs[2], vecs[3]);
5699 __m256 hsum3 = _mm256_hadd_ps(vecs[4], vecs[5]);
5700 __m256 hsum4 = _mm256_hadd_ps(vecs[6], vecs[7]);
5701 __m256 hsum5 = _mm256_hadd_ps(hsum1, hsum1);
5702 __m256 hsum6 = _mm256_hadd_ps(hsum2, hsum2);
5703 __m256 hsum7 = _mm256_hadd_ps(hsum3, hsum3);
5704 __m256 hsum8 = _mm256_hadd_ps(hsum4, hsum4);
5705 __m256 perm1 = _mm256_permute2f128_ps(hsum5, hsum5, 0x23);
5706 __m256 perm2 = _mm256_permute2f128_ps(hsum6, hsum6, 0x23);
5707 __m256 perm3 = _mm256_permute2f128_ps(hsum7, hsum7, 0x23);
5708 __m256 perm4 = _mm256_permute2f128_ps(hsum8, hsum8, 0x23);
5709 __m256 sum1 = _mm256_add_ps(perm1, hsum5);
5710 __m256 sum2 = _mm256_add_ps(perm2, hsum6);
5711 __m256 sum3 = _mm256_add_ps(perm3, hsum7);
5712 __m256 sum4 = _mm256_add_ps(perm4, hsum8);
5713 __m256 blend1 = _mm256_blend_ps(sum1, sum2, 0xcc);
5714 __m256 blend2 = _mm256_blend_ps(sum3, sum4, 0xcc);
5715 __m256 final = _mm256_blend_ps(blend1, blend2, 0xf0);
5718 template<> EIGEN_STRONG_INLINE Packet4d preduxp<Packet4d>(const Packet4d* vecs)
5720 Packet4d tmp0, tmp1;
5721 tmp0 = _mm256_hadd_pd(vecs[0], vecs[1]);
5722 tmp0 = _mm256_add_pd(tmp0, _mm256_permute2f128_pd(tmp0, tmp0, 1));
5723 tmp1 = _mm256_hadd_pd(vecs[2], vecs[3]);
5724 tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1));
5725 return _mm256_blend_pd(tmp0, tmp1, 0xC);
5727 template<> EIGEN_STRONG_INLINE float predux<Packet8f>(const Packet8f& a)
5729 return predux(Packet4f(_mm_add_ps(_mm256_castps256_ps128(a),_mm256_extractf128_ps(a,1))));
5731 template<> EIGEN_STRONG_INLINE double predux<Packet4d>(const Packet4d& a)
5733 return predux(Packet2d(_mm_add_pd(_mm256_castpd256_pd128(a),_mm256_extractf128_pd(a,1))));
5735 template<> EIGEN_STRONG_INLINE Packet4f predux_downto4<Packet8f>(const Packet8f& a)
5737 return _mm_add_ps(_mm256_castps256_ps128(a),_mm256_extractf128_ps(a,1));
5739 template<> EIGEN_STRONG_INLINE float predux_mul<Packet8f>(const Packet8f& a)
5742 tmp = _mm256_mul_ps(a, _mm256_permute2f128_ps(a,a,1));
5743 tmp = _mm256_mul_ps(tmp, _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(1,0,3,2)));
5744 return pfirst(_mm256_mul_ps(tmp, _mm256_shuffle_ps(tmp,tmp,1)));
5746 template<> EIGEN_STRONG_INLINE double predux_mul<Packet4d>(const Packet4d& a)
5749 tmp = _mm256_mul_pd(a, _mm256_permute2f128_pd(a,a,1));
5750 return pfirst(_mm256_mul_pd(tmp, _mm256_shuffle_pd(tmp,tmp,1)));
5752 template<> EIGEN_STRONG_INLINE float predux_min<Packet8f>(const Packet8f& a)
5754 Packet8f tmp = _mm256_min_ps(a, _mm256_permute2f128_ps(a,a,1));
5755 tmp = _mm256_min_ps(tmp, _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(1,0,3,2)));
5756 return pfirst(_mm256_min_ps(tmp, _mm256_shuffle_ps(tmp,tmp,1)));
5758 template<> EIGEN_STRONG_INLINE double predux_min<Packet4d>(const Packet4d& a)
5760 Packet4d tmp = _mm256_min_pd(a, _mm256_permute2f128_pd(a,a,1));
5761 return pfirst(_mm256_min_pd(tmp, _mm256_shuffle_pd(tmp, tmp, 1)));
5763 template<> EIGEN_STRONG_INLINE float predux_max<Packet8f>(const Packet8f& a)
5765 Packet8f tmp = _mm256_max_ps(a, _mm256_permute2f128_ps(a,a,1));
5766 tmp = _mm256_max_ps(tmp, _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(1,0,3,2)));
5767 return pfirst(_mm256_max_ps(tmp, _mm256_shuffle_ps(tmp,tmp,1)));
5769 template<> EIGEN_STRONG_INLINE double predux_max<Packet4d>(const Packet4d& a)
5771 Packet4d tmp = _mm256_max_pd(a, _mm256_permute2f128_pd(a,a,1));
5772 return pfirst(_mm256_max_pd(tmp, _mm256_shuffle_pd(tmp, tmp, 1)));
5774 template<int Offset>
5775 struct palign_impl<Offset,Packet8f>
5777 static EIGEN_STRONG_INLINE void run(Packet8f& first, const Packet8f& second)
5781 first = _mm256_blend_ps(first, second, 1);
5782 Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(0,3,2,1));
5783 Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
5784 first = _mm256_blend_ps(tmp1, tmp2, 0x88);
5788 first = _mm256_blend_ps(first, second, 3);
5789 Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(1,0,3,2));
5790 Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
5791 first = _mm256_blend_ps(tmp1, tmp2, 0xcc);
5795 first = _mm256_blend_ps(first, second, 7);
5796 Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(2,1,0,3));
5797 Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
5798 first = _mm256_blend_ps(tmp1, tmp2, 0xee);
5802 first = _mm256_blend_ps(first, second, 15);
5803 Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(3,2,1,0));
5804 Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
5805 first = _mm256_permute_ps(tmp2, _MM_SHUFFLE(3,2,1,0));
5809 first = _mm256_blend_ps(first, second, 31);
5810 first = _mm256_permute2f128_ps(first, first, 1);
5811 Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(0,3,2,1));
5812 first = _mm256_permute2f128_ps(tmp, tmp, 1);
5813 first = _mm256_blend_ps(tmp, first, 0x88);
5817 first = _mm256_blend_ps(first, second, 63);
5818 first = _mm256_permute2f128_ps(first, first, 1);
5819 Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(1,0,3,2));
5820 first = _mm256_permute2f128_ps(tmp, tmp, 1);
5821 first = _mm256_blend_ps(tmp, first, 0xcc);
5825 first = _mm256_blend_ps(first, second, 127);
5826 first = _mm256_permute2f128_ps(first, first, 1);
5827 Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(2,1,0,3));
5828 first = _mm256_permute2f128_ps(tmp, tmp, 1);
5829 first = _mm256_blend_ps(tmp, first, 0xee);
5833 template<int Offset>
5834 struct palign_impl<Offset,Packet4d>
5836 static EIGEN_STRONG_INLINE void run(Packet4d& first, const Packet4d& second)
5840 first = _mm256_blend_pd(first, second, 1);
5841 __m256d tmp = _mm256_permute_pd(first, 5);
5842 first = _mm256_permute2f128_pd(tmp, tmp, 1);
5843 first = _mm256_blend_pd(tmp, first, 0xA);
5847 first = _mm256_blend_pd(first, second, 3);
5848 first = _mm256_permute2f128_pd(first, first, 1);
5852 first = _mm256_blend_pd(first, second, 7);
5853 __m256d tmp = _mm256_permute_pd(first, 5);
5854 first = _mm256_permute2f128_pd(tmp, tmp, 1);
5855 first = _mm256_blend_pd(tmp, first, 5);
5859 EIGEN_DEVICE_FUNC inline void
5860 ptranspose(PacketBlock<Packet8f,8>& kernel) {
5861 __m256 T0 = _mm256_unpacklo_ps(kernel.packet[0], kernel.packet[1]);
5862 __m256 T1 = _mm256_unpackhi_ps(kernel.packet[0], kernel.packet[1]);
5863 __m256 T2 = _mm256_unpacklo_ps(kernel.packet[2], kernel.packet[3]);
5864 __m256 T3 = _mm256_unpackhi_ps(kernel.packet[2], kernel.packet[3]);
5865 __m256 T4 = _mm256_unpacklo_ps(kernel.packet[4], kernel.packet[5]);
5866 __m256 T5 = _mm256_unpackhi_ps(kernel.packet[4], kernel.packet[5]);
5867 __m256 T6 = _mm256_unpacklo_ps(kernel.packet[6], kernel.packet[7]);
5868 __m256 T7 = _mm256_unpackhi_ps(kernel.packet[6], kernel.packet[7]);
5869 __m256 S0 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(1,0,1,0));
5870 __m256 S1 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(3,2,3,2));
5871 __m256 S2 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(1,0,1,0));
5872 __m256 S3 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(3,2,3,2));
5873 __m256 S4 = _mm256_shuffle_ps(T4,T6,_MM_SHUFFLE(1,0,1,0));
5874 __m256 S5 = _mm256_shuffle_ps(T4,T6,_MM_SHUFFLE(3,2,3,2));
5875 __m256 S6 = _mm256_shuffle_ps(T5,T7,_MM_SHUFFLE(1,0,1,0));
5876 __m256 S7 = _mm256_shuffle_ps(T5,T7,_MM_SHUFFLE(3,2,3,2));
5877 kernel.packet[0] = _mm256_permute2f128_ps(S0, S4, 0x20);
5878 kernel.packet[1] = _mm256_permute2f128_ps(S1, S5, 0x20);
5879 kernel.packet[2] = _mm256_permute2f128_ps(S2, S6, 0x20);
5880 kernel.packet[3] = _mm256_permute2f128_ps(S3, S7, 0x20);
5881 kernel.packet[4] = _mm256_permute2f128_ps(S0, S4, 0x31);
5882 kernel.packet[5] = _mm256_permute2f128_ps(S1, S5, 0x31);
5883 kernel.packet[6] = _mm256_permute2f128_ps(S2, S6, 0x31);
5884 kernel.packet[7] = _mm256_permute2f128_ps(S3, S7, 0x31);
5886 EIGEN_DEVICE_FUNC inline void
5887 ptranspose(PacketBlock<Packet8f,4>& kernel) {
5888 __m256 T0 = _mm256_unpacklo_ps(kernel.packet[0], kernel.packet[1]);
5889 __m256 T1 = _mm256_unpackhi_ps(kernel.packet[0], kernel.packet[1]);
5890 __m256 T2 = _mm256_unpacklo_ps(kernel.packet[2], kernel.packet[3]);
5891 __m256 T3 = _mm256_unpackhi_ps(kernel.packet[2], kernel.packet[3]);
5892 __m256 S0 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(1,0,1,0));
5893 __m256 S1 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(3,2,3,2));
5894 __m256 S2 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(1,0,1,0));
5895 __m256 S3 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(3,2,3,2));
5896 kernel.packet[0] = _mm256_permute2f128_ps(S0, S1, 0x20);
5897 kernel.packet[1] = _mm256_permute2f128_ps(S2, S3, 0x20);
5898 kernel.packet[2] = _mm256_permute2f128_ps(S0, S1, 0x31);
5899 kernel.packet[3] = _mm256_permute2f128_ps(S2, S3, 0x31);
5901 EIGEN_DEVICE_FUNC inline void
5902 ptranspose(PacketBlock<Packet4d,4>& kernel) {
5903 __m256d T0 = _mm256_shuffle_pd(kernel.packet[0], kernel.packet[1], 15);
5904 __m256d T1 = _mm256_shuffle_pd(kernel.packet[0], kernel.packet[1], 0);
5905 __m256d T2 = _mm256_shuffle_pd(kernel.packet[2], kernel.packet[3], 15);
5906 __m256d T3 = _mm256_shuffle_pd(kernel.packet[2], kernel.packet[3], 0);
5907 kernel.packet[1] = _mm256_permute2f128_pd(T0, T2, 32);
5908 kernel.packet[3] = _mm256_permute2f128_pd(T0, T2, 49);
5909 kernel.packet[0] = _mm256_permute2f128_pd(T1, T3, 32);
5910 kernel.packet[2] = _mm256_permute2f128_pd(T1, T3, 49);
5912 template<> EIGEN_STRONG_INLINE Packet8f pblend(const Selector<8>& ifPacket, const Packet8f& thenPacket, const Packet8f& elsePacket) {
5913 const __m256 zero = _mm256_setzero_ps();
5914 const __m256 select = _mm256_set_ps(ifPacket.select[7], ifPacket.select[6], ifPacket.select[5], ifPacket.select[4], ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
5915 __m256 false_mask = _mm256_cmp_ps(select, zero, _CMP_EQ_UQ);
5916 return _mm256_blendv_ps(thenPacket, elsePacket, false_mask);
5918 template<> EIGEN_STRONG_INLINE Packet4d pblend(const Selector<4>& ifPacket, const Packet4d& thenPacket, const Packet4d& elsePacket) {
5919 const __m256d zero = _mm256_setzero_pd();
5920 const __m256d select = _mm256_set_pd(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
5921 __m256d false_mask = _mm256_cmp_pd(select, zero, _CMP_EQ_UQ);
5922 return _mm256_blendv_pd(thenPacket, elsePacket, false_mask);
5924 template<> EIGEN_STRONG_INLINE Packet8f pinsertfirst(const Packet8f& a, float b)
5926 return _mm256_blend_ps(a,pset1<Packet8f>(b),1);
5928 template<> EIGEN_STRONG_INLINE Packet4d pinsertfirst(const Packet4d& a, double b)
5930 return _mm256_blend_pd(a,pset1<Packet4d>(b),1);
5932 template<> EIGEN_STRONG_INLINE Packet8f pinsertlast(const Packet8f& a, float b)
5934 return _mm256_blend_ps(a,pset1<Packet8f>(b),(1<<7));
5936 template<> EIGEN_STRONG_INLINE Packet4d pinsertlast(const Packet4d& a, double b)
5938 return _mm256_blend_pd(a,pset1<Packet4d>(b),(1<<3));
5943 // end #include "src/Core/arch/AVX/PacketMath.h"
5944 // #include "src/Core/arch/AVX512/PacketMath.h"
5945 #ifndef EIGEN_PACKET_MATH_AVX512_H
5946 #define EIGEN_PACKET_MATH_AVX512_H
5948 namespace internal {
5949 #ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
5950 #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
5952 #ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
5953 #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
5956 #ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
5957 #define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
5960 typedef __m512 Packet16f;
5961 typedef __m512i Packet16i;
5962 typedef __m512d Packet8d;
5964 struct is_arithmetic<__m512> {
5965 enum { value = true };
5968 struct is_arithmetic<__m512i> {
5969 enum { value = true };
5972 struct is_arithmetic<__m512d> {
5973 enum { value = true };
5975 template<> struct packet_traits<float> : default_packet_traits
5977 typedef Packet16f type;
5978 typedef Packet8f half;
5981 AlignedOnScalar = 1,
5984 #if EIGEN_GNUC_AT_LEAST(5, 3)
5985 #ifdef EIGEN_VECTORIZE_AVX512DQ
5995 template<> struct packet_traits<double> : default_packet_traits
5997 typedef Packet8d type;
5998 typedef Packet4d half;
6001 AlignedOnScalar = 1,
6004 #if EIGEN_GNUC_AT_LEAST(5, 3)
6006 HasRsqrt = EIGEN_FAST_MATH,
6012 struct unpacket_traits<Packet16f> {
6014 typedef Packet8f half;
6015 enum { size = 16, alignment=Aligned64 };
6018 struct unpacket_traits<Packet8d> {
6019 typedef double type;
6020 typedef Packet4d half;
6021 enum { size = 8, alignment=Aligned64 };
6024 struct unpacket_traits<Packet16i> {
6026 typedef Packet8i half;
6027 enum { size = 16, alignment=Aligned64 };
6030 EIGEN_STRONG_INLINE Packet16f pset1<Packet16f>(const float& from) {
6031 return _mm512_set1_ps(from);
6034 EIGEN_STRONG_INLINE Packet8d pset1<Packet8d>(const double& from) {
6035 return _mm512_set1_pd(from);
6038 EIGEN_STRONG_INLINE Packet16i pset1<Packet16i>(const int& from) {
6039 return _mm512_set1_epi32(from);
6042 EIGEN_STRONG_INLINE Packet16f pload1<Packet16f>(const float* from) {
6043 return _mm512_broadcastss_ps(_mm_load_ps1(from));
6046 EIGEN_STRONG_INLINE Packet8d pload1<Packet8d>(const double* from) {
6047 return _mm512_broadcastsd_pd(_mm_load_pd1(from));
6050 EIGEN_STRONG_INLINE Packet16f plset<Packet16f>(const float& a) {
6051 return _mm512_add_ps(
6053 _mm512_set_ps(15.0f, 14.0f, 13.0f, 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f,
6054 4.0f, 3.0f, 2.0f, 1.0f, 0.0f));
6057 EIGEN_STRONG_INLINE Packet8d plset<Packet8d>(const double& a) {
6058 return _mm512_add_pd(_mm512_set1_pd(a),
6059 _mm512_set_pd(7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, 0.0));
6062 EIGEN_STRONG_INLINE Packet16f padd<Packet16f>(const Packet16f& a,
6063 const Packet16f& b) {
6064 return _mm512_add_ps(a, b);
6067 EIGEN_STRONG_INLINE Packet8d padd<Packet8d>(const Packet8d& a,
6068 const Packet8d& b) {
6069 return _mm512_add_pd(a, b);
6072 EIGEN_STRONG_INLINE Packet16f psub<Packet16f>(const Packet16f& a,
6073 const Packet16f& b) {
6074 return _mm512_sub_ps(a, b);
6077 EIGEN_STRONG_INLINE Packet8d psub<Packet8d>(const Packet8d& a,
6078 const Packet8d& b) {
6079 return _mm512_sub_pd(a, b);
6082 EIGEN_STRONG_INLINE Packet16f pnegate(const Packet16f& a) {
6083 return _mm512_sub_ps(_mm512_set1_ps(0.0), a);
6086 EIGEN_STRONG_INLINE Packet8d pnegate(const Packet8d& a) {
6087 return _mm512_sub_pd(_mm512_set1_pd(0.0), a);
6090 EIGEN_STRONG_INLINE Packet16f pconj(const Packet16f& a) {
6094 EIGEN_STRONG_INLINE Packet8d pconj(const Packet8d& a) {
6098 EIGEN_STRONG_INLINE Packet16i pconj(const Packet16i& a) {
6102 EIGEN_STRONG_INLINE Packet16f pmul<Packet16f>(const Packet16f& a,
6103 const Packet16f& b) {
6104 return _mm512_mul_ps(a, b);
6107 EIGEN_STRONG_INLINE Packet8d pmul<Packet8d>(const Packet8d& a,
6108 const Packet8d& b) {
6109 return _mm512_mul_pd(a, b);
6112 EIGEN_STRONG_INLINE Packet16f pdiv<Packet16f>(const Packet16f& a,
6113 const Packet16f& b) {
6114 return _mm512_div_ps(a, b);
6117 EIGEN_STRONG_INLINE Packet8d pdiv<Packet8d>(const Packet8d& a,
6118 const Packet8d& b) {
6119 return _mm512_div_pd(a, b);
6123 EIGEN_STRONG_INLINE Packet16f pmadd(const Packet16f& a, const Packet16f& b,
6124 const Packet16f& c) {
6125 return _mm512_fmadd_ps(a, b, c);
6128 EIGEN_STRONG_INLINE Packet8d pmadd(const Packet8d& a, const Packet8d& b,
6129 const Packet8d& c) {
6130 return _mm512_fmadd_pd(a, b, c);
6134 EIGEN_STRONG_INLINE Packet16f pmin<Packet16f>(const Packet16f& a,
6135 const Packet16f& b) {
6136 return _mm512_min_ps(a, b);
6139 EIGEN_STRONG_INLINE Packet8d pmin<Packet8d>(const Packet8d& a,
6140 const Packet8d& b) {
6141 return _mm512_min_pd(a, b);
6144 EIGEN_STRONG_INLINE Packet16f pmax<Packet16f>(const Packet16f& a,
6145 const Packet16f& b) {
6146 return _mm512_max_ps(a, b);
6149 EIGEN_STRONG_INLINE Packet8d pmax<Packet8d>(const Packet8d& a,
6150 const Packet8d& b) {
6151 return _mm512_max_pd(a, b);
6154 EIGEN_STRONG_INLINE Packet16f pand<Packet16f>(const Packet16f& a,
6155 const Packet16f& b) {
6156 #ifdef EIGEN_VECTORIZE_AVX512DQ
6157 return _mm512_and_ps(a, b);
6159 Packet16f res = _mm512_undefined_ps();
6160 Packet4f lane0_a = _mm512_extractf32x4_ps(a, 0);
6161 Packet4f lane0_b = _mm512_extractf32x4_ps(b, 0);
6162 res = _mm512_insertf32x4(res, _mm_and_ps(lane0_a, lane0_b), 0);
6163 Packet4f lane1_a = _mm512_extractf32x4_ps(a, 1);
6164 Packet4f lane1_b = _mm512_extractf32x4_ps(b, 1);
6165 res = _mm512_insertf32x4(res, _mm_and_ps(lane1_a, lane1_b), 1);
6166 Packet4f lane2_a = _mm512_extractf32x4_ps(a, 2);
6167 Packet4f lane2_b = _mm512_extractf32x4_ps(b, 2);
6168 res = _mm512_insertf32x4(res, _mm_and_ps(lane2_a, lane2_b), 2);
6169 Packet4f lane3_a = _mm512_extractf32x4_ps(a, 3);
6170 Packet4f lane3_b = _mm512_extractf32x4_ps(b, 3);
6171 res = _mm512_insertf32x4(res, _mm_and_ps(lane3_a, lane3_b), 3);
6176 EIGEN_STRONG_INLINE Packet8d pand<Packet8d>(const Packet8d& a,
6177 const Packet8d& b) {
6178 #ifdef EIGEN_VECTORIZE_AVX512DQ
6179 return _mm512_and_pd(a, b);
6181 Packet8d res = _mm512_undefined_pd();
6182 Packet4d lane0_a = _mm512_extractf64x4_pd(a, 0);
6183 Packet4d lane0_b = _mm512_extractf64x4_pd(b, 0);
6184 res = _mm512_insertf64x4(res, _mm256_and_pd(lane0_a, lane0_b), 0);
6185 Packet4d lane1_a = _mm512_extractf64x4_pd(a, 1);
6186 Packet4d lane1_b = _mm512_extractf64x4_pd(b, 1);
6187 res = _mm512_insertf64x4(res, _mm256_and_pd(lane1_a, lane1_b), 1);
6192 EIGEN_STRONG_INLINE Packet16f por<Packet16f>(const Packet16f& a,
6193 const Packet16f& b) {
6194 #ifdef EIGEN_VECTORIZE_AVX512DQ
6195 return _mm512_or_ps(a, b);
6197 Packet16f res = _mm512_undefined_ps();
6198 Packet4f lane0_a = _mm512_extractf32x4_ps(a, 0);
6199 Packet4f lane0_b = _mm512_extractf32x4_ps(b, 0);
6200 res = _mm512_insertf32x4(res, _mm_or_ps(lane0_a, lane0_b), 0);
6201 Packet4f lane1_a = _mm512_extractf32x4_ps(a, 1);
6202 Packet4f lane1_b = _mm512_extractf32x4_ps(b, 1);
6203 res = _mm512_insertf32x4(res, _mm_or_ps(lane1_a, lane1_b), 1);
6204 Packet4f lane2_a = _mm512_extractf32x4_ps(a, 2);
6205 Packet4f lane2_b = _mm512_extractf32x4_ps(b, 2);
6206 res = _mm512_insertf32x4(res, _mm_or_ps(lane2_a, lane2_b), 2);
6207 Packet4f lane3_a = _mm512_extractf32x4_ps(a, 3);
6208 Packet4f lane3_b = _mm512_extractf32x4_ps(b, 3);
6209 res = _mm512_insertf32x4(res, _mm_or_ps(lane3_a, lane3_b), 3);
6214 EIGEN_STRONG_INLINE Packet8d por<Packet8d>(const Packet8d& a,
6215 const Packet8d& b) {
6216 #ifdef EIGEN_VECTORIZE_AVX512DQ
6217 return _mm512_or_pd(a, b);
6219 Packet8d res = _mm512_undefined_pd();
6220 Packet4d lane0_a = _mm512_extractf64x4_pd(a, 0);
6221 Packet4d lane0_b = _mm512_extractf64x4_pd(b, 0);
6222 res = _mm512_insertf64x4(res, _mm256_or_pd(lane0_a, lane0_b), 0);
6223 Packet4d lane1_a = _mm512_extractf64x4_pd(a, 1);
6224 Packet4d lane1_b = _mm512_extractf64x4_pd(b, 1);
6225 res = _mm512_insertf64x4(res, _mm256_or_pd(lane1_a, lane1_b), 1);
6230 EIGEN_STRONG_INLINE Packet16f pxor<Packet16f>(const Packet16f& a,
6231 const Packet16f& b) {
6232 #ifdef EIGEN_VECTORIZE_AVX512DQ
6233 return _mm512_xor_ps(a, b);
6235 Packet16f res = _mm512_undefined_ps();
6236 Packet4f lane0_a = _mm512_extractf32x4_ps(a, 0);
6237 Packet4f lane0_b = _mm512_extractf32x4_ps(b, 0);
6238 res = _mm512_insertf32x4(res, _mm_xor_ps(lane0_a, lane0_b), 0);
6239 Packet4f lane1_a = _mm512_extractf32x4_ps(a, 1);
6240 Packet4f lane1_b = _mm512_extractf32x4_ps(b, 1);
6241 res = _mm512_insertf32x4(res, _mm_xor_ps(lane1_a, lane1_b), 1);
6242 Packet4f lane2_a = _mm512_extractf32x4_ps(a, 2);
6243 Packet4f lane2_b = _mm512_extractf32x4_ps(b, 2);
6244 res = _mm512_insertf32x4(res, _mm_xor_ps(lane2_a, lane2_b), 2);
6245 Packet4f lane3_a = _mm512_extractf32x4_ps(a, 3);
6246 Packet4f lane3_b = _mm512_extractf32x4_ps(b, 3);
6247 res = _mm512_insertf32x4(res, _mm_xor_ps(lane3_a, lane3_b), 3);
6252 EIGEN_STRONG_INLINE Packet8d pxor<Packet8d>(const Packet8d& a,
6253 const Packet8d& b) {
6254 #ifdef EIGEN_VECTORIZE_AVX512DQ
6255 return _mm512_xor_pd(a, b);
6257 Packet8d res = _mm512_undefined_pd();
6258 Packet4d lane0_a = _mm512_extractf64x4_pd(a, 0);
6259 Packet4d lane0_b = _mm512_extractf64x4_pd(b, 0);
6260 res = _mm512_insertf64x4(res, _mm256_xor_pd(lane0_a, lane0_b), 0);
6261 Packet4d lane1_a = _mm512_extractf64x4_pd(a, 1);
6262 Packet4d lane1_b = _mm512_extractf64x4_pd(b, 1);
6263 res = _mm512_insertf64x4(res, _mm256_xor_pd(lane1_a, lane1_b), 1);
6268 EIGEN_STRONG_INLINE Packet16f pandnot<Packet16f>(const Packet16f& a,
6269 const Packet16f& b) {
6270 #ifdef EIGEN_VECTORIZE_AVX512DQ
6271 return _mm512_andnot_ps(a, b);
6273 Packet16f res = _mm512_undefined_ps();
6274 Packet4f lane0_a = _mm512_extractf32x4_ps(a, 0);
6275 Packet4f lane0_b = _mm512_extractf32x4_ps(b, 0);
6276 res = _mm512_insertf32x4(res, _mm_andnot_ps(lane0_a, lane0_b), 0);
6277 Packet4f lane1_a = _mm512_extractf32x4_ps(a, 1);
6278 Packet4f lane1_b = _mm512_extractf32x4_ps(b, 1);
6279 res = _mm512_insertf32x4(res, _mm_andnot_ps(lane1_a, lane1_b), 1);
6280 Packet4f lane2_a = _mm512_extractf32x4_ps(a, 2);
6281 Packet4f lane2_b = _mm512_extractf32x4_ps(b, 2);
6282 res = _mm512_insertf32x4(res, _mm_andnot_ps(lane2_a, lane2_b), 2);
6283 Packet4f lane3_a = _mm512_extractf32x4_ps(a, 3);
6284 Packet4f lane3_b = _mm512_extractf32x4_ps(b, 3);
6285 res = _mm512_insertf32x4(res, _mm_andnot_ps(lane3_a, lane3_b), 3);
6290 EIGEN_STRONG_INLINE Packet8d pandnot<Packet8d>(const Packet8d& a,
6291 const Packet8d& b) {
6292 #ifdef EIGEN_VECTORIZE_AVX512DQ
6293 return _mm512_andnot_pd(a, b);
6295 Packet8d res = _mm512_undefined_pd();
6296 Packet4d lane0_a = _mm512_extractf64x4_pd(a, 0);
6297 Packet4d lane0_b = _mm512_extractf64x4_pd(b, 0);
6298 res = _mm512_insertf64x4(res, _mm256_andnot_pd(lane0_a, lane0_b), 0);
6299 Packet4d lane1_a = _mm512_extractf64x4_pd(a, 1);
6300 Packet4d lane1_b = _mm512_extractf64x4_pd(b, 1);
6301 res = _mm512_insertf64x4(res, _mm256_andnot_pd(lane1_a, lane1_b), 1);
6306 EIGEN_STRONG_INLINE Packet16f pload<Packet16f>(const float* from) {
6307 EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_ps(from);
6310 EIGEN_STRONG_INLINE Packet8d pload<Packet8d>(const double* from) {
6311 EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_pd(from);
6314 EIGEN_STRONG_INLINE Packet16i pload<Packet16i>(const int* from) {
6315 EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_si512(
6316 reinterpret_cast<const __m512i*>(from));
6319 EIGEN_STRONG_INLINE Packet16f ploadu<Packet16f>(const float* from) {
6320 EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_ps(from);
6323 EIGEN_STRONG_INLINE Packet8d ploadu<Packet8d>(const double* from) {
6324 EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_pd(from);
6327 EIGEN_STRONG_INLINE Packet16i ploadu<Packet16i>(const int* from) {
6328 EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_si512(
6329 reinterpret_cast<const __m512i*>(from));
6332 EIGEN_STRONG_INLINE Packet16f ploaddup<Packet16f>(const float* from) {
6333 Packet8f lane0 = _mm256_broadcast_ps((const __m128*)(const void*)from);
6334 lane0 = _mm256_blend_ps(
6335 lane0, _mm256_castps128_ps256(_mm_permute_ps(
6336 _mm256_castps256_ps128(lane0), _MM_SHUFFLE(1, 0, 1, 0))),
6338 lane0 = _mm256_permute_ps(lane0, _MM_SHUFFLE(3, 3, 2, 2));
6339 Packet8f lane1 = _mm256_broadcast_ps((const __m128*)(const void*)(from + 4));
6340 lane1 = _mm256_blend_ps(
6341 lane1, _mm256_castps128_ps256(_mm_permute_ps(
6342 _mm256_castps256_ps128(lane1), _MM_SHUFFLE(1, 0, 1, 0))),
6344 lane1 = _mm256_permute_ps(lane1, _MM_SHUFFLE(3, 3, 2, 2));
6345 #ifdef EIGEN_VECTORIZE_AVX512DQ
6346 Packet16f res = _mm512_undefined_ps();
6347 return _mm512_insertf32x8(res, lane0, 0);
6348 return _mm512_insertf32x8(res, lane1, 1);
6351 Packet16f res = _mm512_undefined_ps();
6352 res = _mm512_insertf32x4(res, _mm256_extractf128_ps(lane0, 0), 0);
6353 res = _mm512_insertf32x4(res, _mm256_extractf128_ps(lane0, 1), 1);
6354 res = _mm512_insertf32x4(res, _mm256_extractf128_ps(lane1, 0), 2);
6355 res = _mm512_insertf32x4(res, _mm256_extractf128_ps(lane1, 1), 3);
6360 EIGEN_STRONG_INLINE Packet8d ploaddup<Packet8d>(const double* from) {
6361 Packet4d lane0 = _mm256_broadcast_pd((const __m128d*)(const void*)from);
6362 lane0 = _mm256_permute_pd(lane0, 3 << 2);
6363 Packet4d lane1 = _mm256_broadcast_pd((const __m128d*)(const void*)(from + 2));
6364 lane1 = _mm256_permute_pd(lane1, 3 << 2);
6365 Packet8d res = _mm512_undefined_pd();
6366 res = _mm512_insertf64x4(res, lane0, 0);
6367 return _mm512_insertf64x4(res, lane1, 1);
6370 EIGEN_STRONG_INLINE Packet16f ploadquad<Packet16f>(const float* from) {
6371 Packet16f tmp = _mm512_undefined_ps();
6372 tmp = _mm512_insertf32x4(tmp, _mm_load_ps1(from), 0);
6373 tmp = _mm512_insertf32x4(tmp, _mm_load_ps1(from + 1), 1);
6374 tmp = _mm512_insertf32x4(tmp, _mm_load_ps1(from + 2), 2);
6375 tmp = _mm512_insertf32x4(tmp, _mm_load_ps1(from + 3), 3);
6379 EIGEN_STRONG_INLINE Packet8d ploadquad<Packet8d>(const double* from) {
6380 Packet8d tmp = _mm512_undefined_pd();
6381 Packet2d tmp0 = _mm_load_pd1(from);
6382 Packet2d tmp1 = _mm_load_pd1(from + 1);
6383 Packet4d lane0 = _mm256_broadcastsd_pd(tmp0);
6384 Packet4d lane1 = _mm256_broadcastsd_pd(tmp1);
6385 tmp = _mm512_insertf64x4(tmp, lane0, 0);
6386 return _mm512_insertf64x4(tmp, lane1, 1);
6389 EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet16f& from) {
6390 EIGEN_DEBUG_ALIGNED_STORE _mm512_store_ps(to, from);
6393 EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet8d& from) {
6394 EIGEN_DEBUG_ALIGNED_STORE _mm512_store_pd(to, from);
6397 EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet16i& from) {
6398 EIGEN_DEBUG_ALIGNED_STORE _mm512_storeu_si512(reinterpret_cast<__m512i*>(to),
6402 EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet16f& from) {
6403 EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_ps(to, from);
6406 EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet8d& from) {
6407 EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_pd(to, from);
6410 EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet16i& from) {
6411 EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_si512(
6412 reinterpret_cast<__m512i*>(to), from);
6415 EIGEN_DEVICE_FUNC inline Packet16f pgather<float, Packet16f>(const float* from,
6417 Packet16i stride_vector = _mm512_set1_epi32(stride);
6418 Packet16i stride_multiplier =
6419 _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
6420 Packet16i indices = _mm512_mullo_epi32(stride_vector, stride_multiplier);
6421 return _mm512_i32gather_ps(indices, from, 4);
6424 EIGEN_DEVICE_FUNC inline Packet8d pgather<double, Packet8d>(const double* from,
6426 Packet8i stride_vector = _mm256_set1_epi32(stride);
6427 Packet8i stride_multiplier = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
6428 Packet8i indices = _mm256_mullo_epi32(stride_vector, stride_multiplier);
6429 return _mm512_i32gather_pd(indices, from, 8);
6432 EIGEN_DEVICE_FUNC inline void pscatter<float, Packet16f>(float* to,
6433 const Packet16f& from,
6435 Packet16i stride_vector = _mm512_set1_epi32(stride);
6436 Packet16i stride_multiplier =
6437 _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
6438 Packet16i indices = _mm512_mullo_epi32(stride_vector, stride_multiplier);
6439 _mm512_i32scatter_ps(to, indices, from, 4);
6442 EIGEN_DEVICE_FUNC inline void pscatter<double, Packet8d>(double* to,
6443 const Packet8d& from,
6445 Packet8i stride_vector = _mm256_set1_epi32(stride);
6446 Packet8i stride_multiplier = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
6447 Packet8i indices = _mm256_mullo_epi32(stride_vector, stride_multiplier);
6448 _mm512_i32scatter_pd(to, indices, from, 8);
6451 EIGEN_STRONG_INLINE void pstore1<Packet16f>(float* to, const float& a) {
6452 Packet16f pa = pset1<Packet16f>(a);
6456 EIGEN_STRONG_INLINE void pstore1<Packet8d>(double* to, const double& a) {
6457 Packet8d pa = pset1<Packet8d>(a);
6461 EIGEN_STRONG_INLINE void pstore1<Packet16i>(int* to, const int& a) {
6462 Packet16i pa = pset1<Packet16i>(a);
6465 template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
6466 template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
6467 template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
6469 EIGEN_STRONG_INLINE float pfirst<Packet16f>(const Packet16f& a) {
6470 return _mm_cvtss_f32(_mm512_extractf32x4_ps(a, 0));
6473 EIGEN_STRONG_INLINE double pfirst<Packet8d>(const Packet8d& a) {
6474 return _mm_cvtsd_f64(_mm256_extractf128_pd(_mm512_extractf64x4_pd(a, 0), 0));
6477 EIGEN_STRONG_INLINE int pfirst<Packet16i>(const Packet16i& a) {
6478 return _mm_extract_epi32(_mm512_extracti32x4_epi32(a, 0), 0);
6480 template<> EIGEN_STRONG_INLINE Packet16f preverse(const Packet16f& a)
6482 return _mm512_permutexvar_ps(_mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15), a);
6484 template<> EIGEN_STRONG_INLINE Packet8d preverse(const Packet8d& a)
6486 return _mm512_permutexvar_pd(_mm512_set_epi32(0, 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7), a);
6488 template<> EIGEN_STRONG_INLINE Packet16f pabs(const Packet16f& a)
6490 return (__m512)_mm512_and_si512((__m512i)a, _mm512_set1_epi32(0x7fffffff));
6493 EIGEN_STRONG_INLINE Packet8d pabs(const Packet8d& a) {
6494 return (__m512d)_mm512_and_si512((__m512i)a,
6495 _mm512_set1_epi64(0x7fffffffffffffff));
6497 #ifdef EIGEN_VECTORIZE_AVX512DQ
6498 #define EIGEN_EXTRACT_8f_FROM_16f(INPUT, OUTPUT) \
6499 __m256 OUTPUT##_0 = _mm512_extractf32x8_ps(INPUT, 0) __m256 OUTPUT##_1 = \
6500 _mm512_extractf32x8_ps(INPUT, 1)
6502 #define EIGEN_EXTRACT_8f_FROM_16f(INPUT, OUTPUT) \
6503 __m256 OUTPUT##_0 = _mm256_insertf128_ps( \
6504 _mm256_castps128_ps256(_mm512_extractf32x4_ps(INPUT, 0)), \
6505 _mm512_extractf32x4_ps(INPUT, 1), 1); \
6506 __m256 OUTPUT##_1 = _mm256_insertf128_ps( \
6507 _mm256_castps128_ps256(_mm512_extractf32x4_ps(INPUT, 2)), \
6508 _mm512_extractf32x4_ps(INPUT, 3), 1);
6510 #ifdef EIGEN_VECTORIZE_AVX512DQ
6511 #define EIGEN_INSERT_8f_INTO_16f(OUTPUT, INPUTA, INPUTB) \
6512 OUTPUT = _mm512_insertf32x8(OUTPUT, INPUTA, 0); \
6513 OUTPUT = _mm512_insertf32x8(OUTPUT, INPUTB, 1);
6515 #define EIGEN_INSERT_8f_INTO_16f(OUTPUT, INPUTA, INPUTB) \
6516 OUTPUT = _mm512_insertf32x4(OUTPUT, _mm256_extractf128_ps(INPUTA, 0), 0); \
6517 OUTPUT = _mm512_insertf32x4(OUTPUT, _mm256_extractf128_ps(INPUTA, 1), 1); \
6518 OUTPUT = _mm512_insertf32x4(OUTPUT, _mm256_extractf128_ps(INPUTB, 0), 2); \
6519 OUTPUT = _mm512_insertf32x4(OUTPUT, _mm256_extractf128_ps(INPUTB, 1), 3);
6521 template<> EIGEN_STRONG_INLINE Packet16f preduxp<Packet16f>(const Packet16f*
6524 EIGEN_EXTRACT_8f_FROM_16f(vecs[0], vecs0);
6525 EIGEN_EXTRACT_8f_FROM_16f(vecs[1], vecs1);
6526 EIGEN_EXTRACT_8f_FROM_16f(vecs[2], vecs2);
6527 EIGEN_EXTRACT_8f_FROM_16f(vecs[3], vecs3);
6528 EIGEN_EXTRACT_8f_FROM_16f(vecs[4], vecs4);
6529 EIGEN_EXTRACT_8f_FROM_16f(vecs[5], vecs5);
6530 EIGEN_EXTRACT_8f_FROM_16f(vecs[6], vecs6);
6531 EIGEN_EXTRACT_8f_FROM_16f(vecs[7], vecs7);
6532 EIGEN_EXTRACT_8f_FROM_16f(vecs[8], vecs8);
6533 EIGEN_EXTRACT_8f_FROM_16f(vecs[9], vecs9);
6534 EIGEN_EXTRACT_8f_FROM_16f(vecs[10], vecs10);
6535 EIGEN_EXTRACT_8f_FROM_16f(vecs[11], vecs11);
6536 EIGEN_EXTRACT_8f_FROM_16f(vecs[12], vecs12);
6537 EIGEN_EXTRACT_8f_FROM_16f(vecs[13], vecs13);
6538 EIGEN_EXTRACT_8f_FROM_16f(vecs[14], vecs14);
6539 EIGEN_EXTRACT_8f_FROM_16f(vecs[15], vecs15);
6540 __m256 hsum1 = _mm256_hadd_ps(vecs0_0, vecs1_0);
6541 __m256 hsum2 = _mm256_hadd_ps(vecs2_0, vecs3_0);
6542 __m256 hsum3 = _mm256_hadd_ps(vecs4_0, vecs5_0);
6543 __m256 hsum4 = _mm256_hadd_ps(vecs6_0, vecs7_0);
6544 __m256 hsum5 = _mm256_hadd_ps(hsum1, hsum1);
6545 __m256 hsum6 = _mm256_hadd_ps(hsum2, hsum2);
6546 __m256 hsum7 = _mm256_hadd_ps(hsum3, hsum3);
6547 __m256 hsum8 = _mm256_hadd_ps(hsum4, hsum4);
6548 __m256 perm1 = _mm256_permute2f128_ps(hsum5, hsum5, 0x23);
6549 __m256 perm2 = _mm256_permute2f128_ps(hsum6, hsum6, 0x23);
6550 __m256 perm3 = _mm256_permute2f128_ps(hsum7, hsum7, 0x23);
6551 __m256 perm4 = _mm256_permute2f128_ps(hsum8, hsum8, 0x23);
6552 __m256 sum1 = _mm256_add_ps(perm1, hsum5);
6553 __m256 sum2 = _mm256_add_ps(perm2, hsum6);
6554 __m256 sum3 = _mm256_add_ps(perm3, hsum7);
6555 __m256 sum4 = _mm256_add_ps(perm4, hsum8);
6556 __m256 blend1 = _mm256_blend_ps(sum1, sum2, 0xcc);
6557 __m256 blend2 = _mm256_blend_ps(sum3, sum4, 0xcc);
6558 __m256 final = _mm256_blend_ps(blend1, blend2, 0xf0);
6559 hsum1 = _mm256_hadd_ps(vecs0_1, vecs1_1);
6560 hsum2 = _mm256_hadd_ps(vecs2_1, vecs3_1);
6561 hsum3 = _mm256_hadd_ps(vecs4_1, vecs5_1);
6562 hsum4 = _mm256_hadd_ps(vecs6_1, vecs7_1);
6563 hsum5 = _mm256_hadd_ps(hsum1, hsum1);
6564 hsum6 = _mm256_hadd_ps(hsum2, hsum2);
6565 hsum7 = _mm256_hadd_ps(hsum3, hsum3);
6566 hsum8 = _mm256_hadd_ps(hsum4, hsum4);
6567 perm1 = _mm256_permute2f128_ps(hsum5, hsum5, 0x23);
6568 perm2 = _mm256_permute2f128_ps(hsum6, hsum6, 0x23);
6569 perm3 = _mm256_permute2f128_ps(hsum7, hsum7, 0x23);
6570 perm4 = _mm256_permute2f128_ps(hsum8, hsum8, 0x23);
6571 sum1 = _mm256_add_ps(perm1, hsum5);
6572 sum2 = _mm256_add_ps(perm2, hsum6);
6573 sum3 = _mm256_add_ps(perm3, hsum7);
6574 sum4 = _mm256_add_ps(perm4, hsum8);
6575 blend1 = _mm256_blend_ps(sum1, sum2, 0xcc);
6576 blend2 = _mm256_blend_ps(sum3, sum4, 0xcc);
6577 final = padd(final, _mm256_blend_ps(blend1, blend2, 0xf0));
6578 hsum1 = _mm256_hadd_ps(vecs8_0, vecs9_0);
6579 hsum2 = _mm256_hadd_ps(vecs10_0, vecs11_0);
6580 hsum3 = _mm256_hadd_ps(vecs12_0, vecs13_0);
6581 hsum4 = _mm256_hadd_ps(vecs14_0, vecs15_0);
6582 hsum5 = _mm256_hadd_ps(hsum1, hsum1);
6583 hsum6 = _mm256_hadd_ps(hsum2, hsum2);
6584 hsum7 = _mm256_hadd_ps(hsum3, hsum3);
6585 hsum8 = _mm256_hadd_ps(hsum4, hsum4);
6586 perm1 = _mm256_permute2f128_ps(hsum5, hsum5, 0x23);
6587 perm2 = _mm256_permute2f128_ps(hsum6, hsum6, 0x23);
6588 perm3 = _mm256_permute2f128_ps(hsum7, hsum7, 0x23);
6589 perm4 = _mm256_permute2f128_ps(hsum8, hsum8, 0x23);
6590 sum1 = _mm256_add_ps(perm1, hsum5);
6591 sum2 = _mm256_add_ps(perm2, hsum6);
6592 sum3 = _mm256_add_ps(perm3, hsum7);
6593 sum4 = _mm256_add_ps(perm4, hsum8);
6594 blend1 = _mm256_blend_ps(sum1, sum2, 0xcc);
6595 blend2 = _mm256_blend_ps(sum3, sum4, 0xcc);
6596 __m256 final_1 = _mm256_blend_ps(blend1, blend2, 0xf0);
6597 hsum1 = _mm256_hadd_ps(vecs8_1, vecs9_1);
6598 hsum2 = _mm256_hadd_ps(vecs10_1, vecs11_1);
6599 hsum3 = _mm256_hadd_ps(vecs12_1, vecs13_1);
6600 hsum4 = _mm256_hadd_ps(vecs14_1, vecs15_1);
6601 hsum5 = _mm256_hadd_ps(hsum1, hsum1);
6602 hsum6 = _mm256_hadd_ps(hsum2, hsum2);
6603 hsum7 = _mm256_hadd_ps(hsum3, hsum3);
6604 hsum8 = _mm256_hadd_ps(hsum4, hsum4);
6605 perm1 = _mm256_permute2f128_ps(hsum5, hsum5, 0x23);
6606 perm2 = _mm256_permute2f128_ps(hsum6, hsum6, 0x23);
6607 perm3 = _mm256_permute2f128_ps(hsum7, hsum7, 0x23);
6608 perm4 = _mm256_permute2f128_ps(hsum8, hsum8, 0x23);
6609 sum1 = _mm256_add_ps(perm1, hsum5);
6610 sum2 = _mm256_add_ps(perm2, hsum6);
6611 sum3 = _mm256_add_ps(perm3, hsum7);
6612 sum4 = _mm256_add_ps(perm4, hsum8);
6613 blend1 = _mm256_blend_ps(sum1, sum2, 0xcc);
6614 blend2 = _mm256_blend_ps(sum3, sum4, 0xcc);
6615 final_1 = padd(final_1, _mm256_blend_ps(blend1, blend2, 0xf0));
6616 __m512 final_output;
6617 EIGEN_INSERT_8f_INTO_16f(final_output, final, final_1);
6618 return final_output;
6620 template<> EIGEN_STRONG_INLINE Packet8d preduxp<Packet8d>(const Packet8d* vecs)
6622 Packet4d vecs0_0 = _mm512_extractf64x4_pd(vecs[0], 0);
6623 Packet4d vecs0_1 = _mm512_extractf64x4_pd(vecs[0], 1);
6624 Packet4d vecs1_0 = _mm512_extractf64x4_pd(vecs[1], 0);
6625 Packet4d vecs1_1 = _mm512_extractf64x4_pd(vecs[1], 1);
6626 Packet4d vecs2_0 = _mm512_extractf64x4_pd(vecs[2], 0);
6627 Packet4d vecs2_1 = _mm512_extractf64x4_pd(vecs[2], 1);
6628 Packet4d vecs3_0 = _mm512_extractf64x4_pd(vecs[3], 0);
6629 Packet4d vecs3_1 = _mm512_extractf64x4_pd(vecs[3], 1);
6630 Packet4d vecs4_0 = _mm512_extractf64x4_pd(vecs[4], 0);
6631 Packet4d vecs4_1 = _mm512_extractf64x4_pd(vecs[4], 1);
6632 Packet4d vecs5_0 = _mm512_extractf64x4_pd(vecs[5], 0);
6633 Packet4d vecs5_1 = _mm512_extractf64x4_pd(vecs[5], 1);
6634 Packet4d vecs6_0 = _mm512_extractf64x4_pd(vecs[6], 0);
6635 Packet4d vecs6_1 = _mm512_extractf64x4_pd(vecs[6], 1);
6636 Packet4d vecs7_0 = _mm512_extractf64x4_pd(vecs[7], 0);
6637 Packet4d vecs7_1 = _mm512_extractf64x4_pd(vecs[7], 1);
6638 Packet4d tmp0, tmp1;
6639 tmp0 = _mm256_hadd_pd(vecs0_0, vecs1_0);
6640 tmp0 = _mm256_add_pd(tmp0, _mm256_permute2f128_pd(tmp0, tmp0, 1));
6641 tmp1 = _mm256_hadd_pd(vecs2_0, vecs3_0);
6642 tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1));
6643 __m256d final_0 = _mm256_blend_pd(tmp0, tmp1, 0xC);
6644 tmp0 = _mm256_hadd_pd(vecs0_1, vecs1_1);
6645 tmp0 = _mm256_add_pd(tmp0, _mm256_permute2f128_pd(tmp0, tmp0, 1));
6646 tmp1 = _mm256_hadd_pd(vecs2_1, vecs3_1);
6647 tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1));
6648 final_0 = padd(final_0, _mm256_blend_pd(tmp0, tmp1, 0xC));
6649 tmp0 = _mm256_hadd_pd(vecs4_0, vecs5_0);
6650 tmp0 = _mm256_add_pd(tmp0, _mm256_permute2f128_pd(tmp0, tmp0, 1));
6651 tmp1 = _mm256_hadd_pd(vecs6_0, vecs7_0);
6652 tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1));
6653 __m256d final_1 = _mm256_blend_pd(tmp0, tmp1, 0xC);
6654 tmp0 = _mm256_hadd_pd(vecs4_1, vecs5_1);
6655 tmp0 = _mm256_add_pd(tmp0, _mm256_permute2f128_pd(tmp0, tmp0, 1));
6656 tmp1 = _mm256_hadd_pd(vecs6_1, vecs7_1);
6657 tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1));
6658 final_1 = padd(final_1, _mm256_blend_pd(tmp0, tmp1, 0xC));
6659 __m512d final_output = _mm512_insertf64x4(final_output, final_0, 0);
6660 return _mm512_insertf64x4(final_output, final_1, 1);
6663 EIGEN_STRONG_INLINE float predux<Packet16f>(const Packet16f& a) {
6665 Packet8f lane0 = _mm512_extractf32x8_ps(a, 0);
6666 Packet8f lane1 = _mm512_extractf32x8_ps(a, 1);
6667 Packet8f sum = padd(lane0, lane1);
6668 Packet8f tmp0 = _mm256_hadd_ps(sum, _mm256_permute2f128_ps(a, a, 1));
6669 tmp0 = _mm256_hadd_ps(tmp0, tmp0);
6670 return pfirst(_mm256_hadd_ps(tmp0, tmp0));
6672 Packet4f lane0 = _mm512_extractf32x4_ps(a, 0);
6673 Packet4f lane1 = _mm512_extractf32x4_ps(a, 1);
6674 Packet4f lane2 = _mm512_extractf32x4_ps(a, 2);
6675 Packet4f lane3 = _mm512_extractf32x4_ps(a, 3);
6676 Packet4f sum = padd(padd(lane0, lane1), padd(lane2, lane3));
6677 sum = _mm_hadd_ps(sum, sum);
6678 sum = _mm_hadd_ps(sum, _mm_permute_ps(sum, 1));
6683 EIGEN_STRONG_INLINE double predux<Packet8d>(const Packet8d& a) {
6684 Packet4d lane0 = _mm512_extractf64x4_pd(a, 0);
6685 Packet4d lane1 = _mm512_extractf64x4_pd(a, 1);
6686 Packet4d sum = padd(lane0, lane1);
6687 Packet4d tmp0 = _mm256_hadd_pd(sum, _mm256_permute2f128_pd(sum, sum, 1));
6688 return pfirst(_mm256_hadd_pd(tmp0, tmp0));
6691 EIGEN_STRONG_INLINE Packet8f predux_downto4<Packet16f>(const Packet16f& a) {
6692 #ifdef EIGEN_VECTORIZE_AVX512DQ
6693 Packet8f lane0 = _mm512_extractf32x8_ps(a, 0);
6694 Packet8f lane1 = _mm512_extractf32x8_ps(a, 1);
6695 return padd(lane0, lane1);
6697 Packet4f lane0 = _mm512_extractf32x4_ps(a, 0);
6698 Packet4f lane1 = _mm512_extractf32x4_ps(a, 1);
6699 Packet4f lane2 = _mm512_extractf32x4_ps(a, 2);
6700 Packet4f lane3 = _mm512_extractf32x4_ps(a, 3);
6701 Packet4f sum0 = padd(lane0, lane2);
6702 Packet4f sum1 = padd(lane1, lane3);
6703 return _mm256_insertf128_ps(_mm256_castps128_ps256(sum0), sum1, 1);
6707 EIGEN_STRONG_INLINE Packet4d predux_downto4<Packet8d>(const Packet8d& a) {
6708 Packet4d lane0 = _mm512_extractf64x4_pd(a, 0);
6709 Packet4d lane1 = _mm512_extractf64x4_pd(a, 1);
6710 Packet4d res = padd(lane0, lane1);
6714 EIGEN_STRONG_INLINE float predux_mul<Packet16f>(const Packet16f& a) {
6716 Packet8f lane0 = _mm512_extractf32x8_ps(a, 0);
6717 Packet8f lane1 = _mm512_extractf32x8_ps(a, 1);
6718 Packet8f res = pmul(lane0, lane1);
6719 res = pmul(res, _mm256_permute2f128_ps(res, res, 1));
6720 res = pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2)));
6721 return pfirst(pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1))));
6723 Packet4f lane0 = _mm512_extractf32x4_ps(a, 0);
6724 Packet4f lane1 = _mm512_extractf32x4_ps(a, 1);
6725 Packet4f lane2 = _mm512_extractf32x4_ps(a, 2);
6726 Packet4f lane3 = _mm512_extractf32x4_ps(a, 3);
6727 Packet4f res = pmul(pmul(lane0, lane1), pmul(lane2, lane3));
6728 res = pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2)));
6729 return pfirst(pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1))));
6733 EIGEN_STRONG_INLINE double predux_mul<Packet8d>(const Packet8d& a) {
6734 Packet4d lane0 = _mm512_extractf64x4_pd(a, 0);
6735 Packet4d lane1 = _mm512_extractf64x4_pd(a, 1);
6736 Packet4d res = pmul(lane0, lane1);
6737 res = pmul(res, _mm256_permute2f128_pd(res, res, 1));
6738 return pfirst(pmul(res, _mm256_shuffle_pd(res, res, 1)));
6741 EIGEN_STRONG_INLINE float predux_min<Packet16f>(const Packet16f& a) {
6742 Packet4f lane0 = _mm512_extractf32x4_ps(a, 0);
6743 Packet4f lane1 = _mm512_extractf32x4_ps(a, 1);
6744 Packet4f lane2 = _mm512_extractf32x4_ps(a, 2);
6745 Packet4f lane3 = _mm512_extractf32x4_ps(a, 3);
6746 Packet4f res = _mm_min_ps(_mm_min_ps(lane0, lane1), _mm_min_ps(lane2, lane3));
6747 res = _mm_min_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2)));
6748 return pfirst(_mm_min_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1))));
6751 EIGEN_STRONG_INLINE double predux_min<Packet8d>(const Packet8d& a) {
6752 Packet4d lane0 = _mm512_extractf64x4_pd(a, 0);
6753 Packet4d lane1 = _mm512_extractf64x4_pd(a, 1);
6754 Packet4d res = _mm256_min_pd(lane0, lane1);
6755 res = _mm256_min_pd(res, _mm256_permute2f128_pd(res, res, 1));
6756 return pfirst(_mm256_min_pd(res, _mm256_shuffle_pd(res, res, 1)));
6759 EIGEN_STRONG_INLINE float predux_max<Packet16f>(const Packet16f& a) {
6760 Packet4f lane0 = _mm512_extractf32x4_ps(a, 0);
6761 Packet4f lane1 = _mm512_extractf32x4_ps(a, 1);
6762 Packet4f lane2 = _mm512_extractf32x4_ps(a, 2);
6763 Packet4f lane3 = _mm512_extractf32x4_ps(a, 3);
6764 Packet4f res = _mm_max_ps(_mm_max_ps(lane0, lane1), _mm_max_ps(lane2, lane3));
6765 res = _mm_max_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2)));
6766 return pfirst(_mm_max_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1))));
6769 EIGEN_STRONG_INLINE double predux_max<Packet8d>(const Packet8d& a) {
6770 Packet4d lane0 = _mm512_extractf64x4_pd(a, 0);
6771 Packet4d lane1 = _mm512_extractf64x4_pd(a, 1);
6772 Packet4d res = _mm256_max_pd(lane0, lane1);
6773 res = _mm256_max_pd(res, _mm256_permute2f128_pd(res, res, 1));
6774 return pfirst(_mm256_max_pd(res, _mm256_shuffle_pd(res, res, 1)));
6776 template <int Offset>
6777 struct palign_impl<Offset, Packet16f> {
6778 static EIGEN_STRONG_INLINE void run(Packet16f& first,
6779 const Packet16f& second) {
6781 __m512i first_idx = _mm512_set_epi32(
6782 Offset + 15, Offset + 14, Offset + 13, Offset + 12, Offset + 11,
6783 Offset + 10, Offset + 9, Offset + 8, Offset + 7, Offset + 6,
6784 Offset + 5, Offset + 4, Offset + 3, Offset + 2, Offset + 1, Offset);
6785 __m512i second_idx =
6786 _mm512_set_epi32(Offset - 1, Offset - 2, Offset - 3, Offset - 4,
6787 Offset - 5, Offset - 6, Offset - 7, Offset - 8,
6788 Offset - 9, Offset - 10, Offset - 11, Offset - 12,
6789 Offset - 13, Offset - 14, Offset - 15, Offset - 16);
6790 unsigned short mask = 0xFFFF;
6791 mask <<= (16 - Offset);
6792 first = _mm512_permutexvar_ps(first_idx, first);
6793 Packet16f tmp = _mm512_permutexvar_ps(second_idx, second);
6794 first = _mm512_mask_blend_ps(mask, first, tmp);
6798 template <int Offset>
6799 struct palign_impl<Offset, Packet8d> {
6800 static EIGEN_STRONG_INLINE void run(Packet8d& first, const Packet8d& second) {
6802 __m512i first_idx = _mm512_set_epi32(
6803 0, Offset + 7, 0, Offset + 6, 0, Offset + 5, 0, Offset + 4, 0,
6804 Offset + 3, 0, Offset + 2, 0, Offset + 1, 0, Offset);
6805 __m512i second_idx = _mm512_set_epi32(
6806 0, Offset - 1, 0, Offset - 2, 0, Offset - 3, 0, Offset - 4, 0,
6807 Offset - 5, 0, Offset - 6, 0, Offset - 7, 0, Offset - 8);
6808 unsigned char mask = 0xFF;
6809 mask <<= (8 - Offset);
6810 first = _mm512_permutexvar_pd(first_idx, first);
6811 Packet8d tmp = _mm512_permutexvar_pd(second_idx, second);
6812 first = _mm512_mask_blend_pd(mask, first, tmp);
6816 #define PACK_OUTPUT(OUTPUT, INPUT, INDEX, STRIDE) \
6817 EIGEN_INSERT_8f_INTO_16f(OUTPUT[INDEX], INPUT[INDEX], INPUT[INDEX + STRIDE]);
6818 EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet16f, 16>& kernel) {
6819 __m512 T0 = _mm512_unpacklo_ps(kernel.packet[0], kernel.packet[1]);
6820 __m512 T1 = _mm512_unpackhi_ps(kernel.packet[0], kernel.packet[1]);
6821 __m512 T2 = _mm512_unpacklo_ps(kernel.packet[2], kernel.packet[3]);
6822 __m512 T3 = _mm512_unpackhi_ps(kernel.packet[2], kernel.packet[3]);
6823 __m512 T4 = _mm512_unpacklo_ps(kernel.packet[4], kernel.packet[5]);
6824 __m512 T5 = _mm512_unpackhi_ps(kernel.packet[4], kernel.packet[5]);
6825 __m512 T6 = _mm512_unpacklo_ps(kernel.packet[6], kernel.packet[7]);
6826 __m512 T7 = _mm512_unpackhi_ps(kernel.packet[6], kernel.packet[7]);
6827 __m512 T8 = _mm512_unpacklo_ps(kernel.packet[8], kernel.packet[9]);
6828 __m512 T9 = _mm512_unpackhi_ps(kernel.packet[8], kernel.packet[9]);
6829 __m512 T10 = _mm512_unpacklo_ps(kernel.packet[10], kernel.packet[11]);
6830 __m512 T11 = _mm512_unpackhi_ps(kernel.packet[10], kernel.packet[11]);
6831 __m512 T12 = _mm512_unpacklo_ps(kernel.packet[12], kernel.packet[13]);
6832 __m512 T13 = _mm512_unpackhi_ps(kernel.packet[12], kernel.packet[13]);
6833 __m512 T14 = _mm512_unpacklo_ps(kernel.packet[14], kernel.packet[15]);
6834 __m512 T15 = _mm512_unpackhi_ps(kernel.packet[14], kernel.packet[15]);
6835 __m512 S0 = _mm512_shuffle_ps(T0, T2, _MM_SHUFFLE(1, 0, 1, 0));
6836 __m512 S1 = _mm512_shuffle_ps(T0, T2, _MM_SHUFFLE(3, 2, 3, 2));
6837 __m512 S2 = _mm512_shuffle_ps(T1, T3, _MM_SHUFFLE(1, 0, 1, 0));
6838 __m512 S3 = _mm512_shuffle_ps(T1, T3, _MM_SHUFFLE(3, 2, 3, 2));
6839 __m512 S4 = _mm512_shuffle_ps(T4, T6, _MM_SHUFFLE(1, 0, 1, 0));
6840 __m512 S5 = _mm512_shuffle_ps(T4, T6, _MM_SHUFFLE(3, 2, 3, 2));
6841 __m512 S6 = _mm512_shuffle_ps(T5, T7, _MM_SHUFFLE(1, 0, 1, 0));
6842 __m512 S7 = _mm512_shuffle_ps(T5, T7, _MM_SHUFFLE(3, 2, 3, 2));
6843 __m512 S8 = _mm512_shuffle_ps(T8, T10, _MM_SHUFFLE(1, 0, 1, 0));
6844 __m512 S9 = _mm512_shuffle_ps(T8, T10, _MM_SHUFFLE(3, 2, 3, 2));
6845 __m512 S10 = _mm512_shuffle_ps(T9, T11, _MM_SHUFFLE(1, 0, 1, 0));
6846 __m512 S11 = _mm512_shuffle_ps(T9, T11, _MM_SHUFFLE(3, 2, 3, 2));
6847 __m512 S12 = _mm512_shuffle_ps(T12, T14, _MM_SHUFFLE(1, 0, 1, 0));
6848 __m512 S13 = _mm512_shuffle_ps(T12, T14, _MM_SHUFFLE(3, 2, 3, 2));
6849 __m512 S14 = _mm512_shuffle_ps(T13, T15, _MM_SHUFFLE(1, 0, 1, 0));
6850 __m512 S15 = _mm512_shuffle_ps(T13, T15, _MM_SHUFFLE(3, 2, 3, 2));
6851 EIGEN_EXTRACT_8f_FROM_16f(S0, S0);
6852 EIGEN_EXTRACT_8f_FROM_16f(S1, S1);
6853 EIGEN_EXTRACT_8f_FROM_16f(S2, S2);
6854 EIGEN_EXTRACT_8f_FROM_16f(S3, S3);
6855 EIGEN_EXTRACT_8f_FROM_16f(S4, S4);
6856 EIGEN_EXTRACT_8f_FROM_16f(S5, S5);
6857 EIGEN_EXTRACT_8f_FROM_16f(S6, S6);
6858 EIGEN_EXTRACT_8f_FROM_16f(S7, S7);
6859 EIGEN_EXTRACT_8f_FROM_16f(S8, S8);
6860 EIGEN_EXTRACT_8f_FROM_16f(S9, S9);
6861 EIGEN_EXTRACT_8f_FROM_16f(S10, S10);
6862 EIGEN_EXTRACT_8f_FROM_16f(S11, S11);
6863 EIGEN_EXTRACT_8f_FROM_16f(S12, S12);
6864 EIGEN_EXTRACT_8f_FROM_16f(S13, S13);
6865 EIGEN_EXTRACT_8f_FROM_16f(S14, S14);
6866 EIGEN_EXTRACT_8f_FROM_16f(S15, S15);
6867 PacketBlock<Packet8f, 32> tmp;
6868 tmp.packet[0] = _mm256_permute2f128_ps(S0_0, S4_0, 0x20);
6869 tmp.packet[1] = _mm256_permute2f128_ps(S1_0, S5_0, 0x20);
6870 tmp.packet[2] = _mm256_permute2f128_ps(S2_0, S6_0, 0x20);
6871 tmp.packet[3] = _mm256_permute2f128_ps(S3_0, S7_0, 0x20);
6872 tmp.packet[4] = _mm256_permute2f128_ps(S0_0, S4_0, 0x31);
6873 tmp.packet[5] = _mm256_permute2f128_ps(S1_0, S5_0, 0x31);
6874 tmp.packet[6] = _mm256_permute2f128_ps(S2_0, S6_0, 0x31);
6875 tmp.packet[7] = _mm256_permute2f128_ps(S3_0, S7_0, 0x31);
6876 tmp.packet[8] = _mm256_permute2f128_ps(S0_1, S4_1, 0x20);
6877 tmp.packet[9] = _mm256_permute2f128_ps(S1_1, S5_1, 0x20);
6878 tmp.packet[10] = _mm256_permute2f128_ps(S2_1, S6_1, 0x20);
6879 tmp.packet[11] = _mm256_permute2f128_ps(S3_1, S7_1, 0x20);
6880 tmp.packet[12] = _mm256_permute2f128_ps(S0_1, S4_1, 0x31);
6881 tmp.packet[13] = _mm256_permute2f128_ps(S1_1, S5_1, 0x31);
6882 tmp.packet[14] = _mm256_permute2f128_ps(S2_1, S6_1, 0x31);
6883 tmp.packet[15] = _mm256_permute2f128_ps(S3_1, S7_1, 0x31);
6884 tmp.packet[16] = _mm256_permute2f128_ps(S8_0, S12_0, 0x20);
6885 tmp.packet[17] = _mm256_permute2f128_ps(S9_0, S13_0, 0x20);
6886 tmp.packet[18] = _mm256_permute2f128_ps(S10_0, S14_0, 0x20);
6887 tmp.packet[19] = _mm256_permute2f128_ps(S11_0, S15_0, 0x20);
6888 tmp.packet[20] = _mm256_permute2f128_ps(S8_0, S12_0, 0x31);
6889 tmp.packet[21] = _mm256_permute2f128_ps(S9_0, S13_0, 0x31);
6890 tmp.packet[22] = _mm256_permute2f128_ps(S10_0, S14_0, 0x31);
6891 tmp.packet[23] = _mm256_permute2f128_ps(S11_0, S15_0, 0x31);
6892 tmp.packet[24] = _mm256_permute2f128_ps(S8_1, S12_1, 0x20);
6893 tmp.packet[25] = _mm256_permute2f128_ps(S9_1, S13_1, 0x20);
6894 tmp.packet[26] = _mm256_permute2f128_ps(S10_1, S14_1, 0x20);
6895 tmp.packet[27] = _mm256_permute2f128_ps(S11_1, S15_1, 0x20);
6896 tmp.packet[28] = _mm256_permute2f128_ps(S8_1, S12_1, 0x31);
6897 tmp.packet[29] = _mm256_permute2f128_ps(S9_1, S13_1, 0x31);
6898 tmp.packet[30] = _mm256_permute2f128_ps(S10_1, S14_1, 0x31);
6899 tmp.packet[31] = _mm256_permute2f128_ps(S11_1, S15_1, 0x31);
6900 PACK_OUTPUT(kernel.packet, tmp.packet, 0, 16);
6901 PACK_OUTPUT(kernel.packet, tmp.packet, 1, 16);
6902 PACK_OUTPUT(kernel.packet, tmp.packet, 2, 16);
6903 PACK_OUTPUT(kernel.packet, tmp.packet, 3, 16);
6904 PACK_OUTPUT(kernel.packet, tmp.packet, 4, 16);
6905 PACK_OUTPUT(kernel.packet, tmp.packet, 5, 16);
6906 PACK_OUTPUT(kernel.packet, tmp.packet, 6, 16);
6907 PACK_OUTPUT(kernel.packet, tmp.packet, 7, 16);
6908 PACK_OUTPUT(kernel.packet, tmp.packet, 8, 16);
6909 PACK_OUTPUT(kernel.packet, tmp.packet, 9, 16);
6910 PACK_OUTPUT(kernel.packet, tmp.packet, 10, 16);
6911 PACK_OUTPUT(kernel.packet, tmp.packet, 11, 16);
6912 PACK_OUTPUT(kernel.packet, tmp.packet, 12, 16);
6913 PACK_OUTPUT(kernel.packet, tmp.packet, 13, 16);
6914 PACK_OUTPUT(kernel.packet, tmp.packet, 14, 16);
6915 PACK_OUTPUT(kernel.packet, tmp.packet, 15, 16);
6917 #define PACK_OUTPUT_2(OUTPUT, INPUT, INDEX, STRIDE) \
6918 EIGEN_INSERT_8f_INTO_16f(OUTPUT[INDEX], INPUT[2 * INDEX], \
6919 INPUT[2 * INDEX + STRIDE]);
6920 EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet16f, 4>& kernel) {
6921 __m512 T0 = _mm512_unpacklo_ps(kernel.packet[0], kernel.packet[1]);
6922 __m512 T1 = _mm512_unpackhi_ps(kernel.packet[0], kernel.packet[1]);
6923 __m512 T2 = _mm512_unpacklo_ps(kernel.packet[2], kernel.packet[3]);
6924 __m512 T3 = _mm512_unpackhi_ps(kernel.packet[2], kernel.packet[3]);
6925 __m512 S0 = _mm512_shuffle_ps(T0, T2, _MM_SHUFFLE(1, 0, 1, 0));
6926 __m512 S1 = _mm512_shuffle_ps(T0, T2, _MM_SHUFFLE(3, 2, 3, 2));
6927 __m512 S2 = _mm512_shuffle_ps(T1, T3, _MM_SHUFFLE(1, 0, 1, 0));
6928 __m512 S3 = _mm512_shuffle_ps(T1, T3, _MM_SHUFFLE(3, 2, 3, 2));
6929 EIGEN_EXTRACT_8f_FROM_16f(S0, S0);
6930 EIGEN_EXTRACT_8f_FROM_16f(S1, S1);
6931 EIGEN_EXTRACT_8f_FROM_16f(S2, S2);
6932 EIGEN_EXTRACT_8f_FROM_16f(S3, S3);
6933 PacketBlock<Packet8f, 8> tmp;
6934 tmp.packet[0] = _mm256_permute2f128_ps(S0_0, S1_0, 0x20);
6935 tmp.packet[1] = _mm256_permute2f128_ps(S2_0, S3_0, 0x20);
6936 tmp.packet[2] = _mm256_permute2f128_ps(S0_0, S1_0, 0x31);
6937 tmp.packet[3] = _mm256_permute2f128_ps(S2_0, S3_0, 0x31);
6938 tmp.packet[4] = _mm256_permute2f128_ps(S0_1, S1_1, 0x20);
6939 tmp.packet[5] = _mm256_permute2f128_ps(S2_1, S3_1, 0x20);
6940 tmp.packet[6] = _mm256_permute2f128_ps(S0_1, S1_1, 0x31);
6941 tmp.packet[7] = _mm256_permute2f128_ps(S2_1, S3_1, 0x31);
6942 PACK_OUTPUT_2(kernel.packet, tmp.packet, 0, 1);
6943 PACK_OUTPUT_2(kernel.packet, tmp.packet, 1, 1);
6944 PACK_OUTPUT_2(kernel.packet, tmp.packet, 2, 1);
6945 PACK_OUTPUT_2(kernel.packet, tmp.packet, 3, 1);
6947 #define PACK_OUTPUT_SQ_D(OUTPUT, INPUT, INDEX, STRIDE) \
6948 OUTPUT[INDEX] = _mm512_insertf64x4(OUTPUT[INDEX], INPUT[INDEX], 0); \
6949 OUTPUT[INDEX] = _mm512_insertf64x4(OUTPUT[INDEX], INPUT[INDEX + STRIDE], 1);
6950 #define PACK_OUTPUT_D(OUTPUT, INPUT, INDEX, STRIDE) \
6951 OUTPUT[INDEX] = _mm512_insertf64x4(OUTPUT[INDEX], INPUT[(2 * INDEX)], 0); \
6953 _mm512_insertf64x4(OUTPUT[INDEX], INPUT[(2 * INDEX) + STRIDE], 1);
6954 EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet8d, 4>& kernel) {
6955 __m512d T0 = _mm512_shuffle_pd(kernel.packet[0], kernel.packet[1], 0);
6956 __m512d T1 = _mm512_shuffle_pd(kernel.packet[0], kernel.packet[1], 0xff);
6957 __m512d T2 = _mm512_shuffle_pd(kernel.packet[2], kernel.packet[3], 0);
6958 __m512d T3 = _mm512_shuffle_pd(kernel.packet[2], kernel.packet[3], 0xff);
6959 PacketBlock<Packet4d, 8> tmp;
6960 tmp.packet[0] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T0, 0),
6961 _mm512_extractf64x4_pd(T2, 0), 0x20);
6962 tmp.packet[1] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T1, 0),
6963 _mm512_extractf64x4_pd(T3, 0), 0x20);
6964 tmp.packet[2] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T0, 0),
6965 _mm512_extractf64x4_pd(T2, 0), 0x31);
6966 tmp.packet[3] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T1, 0),
6967 _mm512_extractf64x4_pd(T3, 0), 0x31);
6968 tmp.packet[4] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T0, 1),
6969 _mm512_extractf64x4_pd(T2, 1), 0x20);
6970 tmp.packet[5] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T1, 1),
6971 _mm512_extractf64x4_pd(T3, 1), 0x20);
6972 tmp.packet[6] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T0, 1),
6973 _mm512_extractf64x4_pd(T2, 1), 0x31);
6974 tmp.packet[7] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T1, 1),
6975 _mm512_extractf64x4_pd(T3, 1), 0x31);
6976 PACK_OUTPUT_D(kernel.packet, tmp.packet, 0, 1);
6977 PACK_OUTPUT_D(kernel.packet, tmp.packet, 1, 1);
6978 PACK_OUTPUT_D(kernel.packet, tmp.packet, 2, 1);
6979 PACK_OUTPUT_D(kernel.packet, tmp.packet, 3, 1);
6981 EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet8d, 8>& kernel) {
6982 __m512d T0 = _mm512_unpacklo_pd(kernel.packet[0], kernel.packet[1]);
6983 __m512d T1 = _mm512_unpackhi_pd(kernel.packet[0], kernel.packet[1]);
6984 __m512d T2 = _mm512_unpacklo_pd(kernel.packet[2], kernel.packet[3]);
6985 __m512d T3 = _mm512_unpackhi_pd(kernel.packet[2], kernel.packet[3]);
6986 __m512d T4 = _mm512_unpacklo_pd(kernel.packet[4], kernel.packet[5]);
6987 __m512d T5 = _mm512_unpackhi_pd(kernel.packet[4], kernel.packet[5]);
6988 __m512d T6 = _mm512_unpacklo_pd(kernel.packet[6], kernel.packet[7]);
6989 __m512d T7 = _mm512_unpackhi_pd(kernel.packet[6], kernel.packet[7]);
6990 PacketBlock<Packet4d, 16> tmp;
6991 tmp.packet[0] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T0, 0),
6992 _mm512_extractf64x4_pd(T2, 0), 0x20);
6993 tmp.packet[1] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T1, 0),
6994 _mm512_extractf64x4_pd(T3, 0), 0x20);
6995 tmp.packet[2] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T0, 0),
6996 _mm512_extractf64x4_pd(T2, 0), 0x31);
6997 tmp.packet[3] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T1, 0),
6998 _mm512_extractf64x4_pd(T3, 0), 0x31);
6999 tmp.packet[4] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T0, 1),
7000 _mm512_extractf64x4_pd(T2, 1), 0x20);
7001 tmp.packet[5] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T1, 1),
7002 _mm512_extractf64x4_pd(T3, 1), 0x20);
7003 tmp.packet[6] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T0, 1),
7004 _mm512_extractf64x4_pd(T2, 1), 0x31);
7005 tmp.packet[7] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T1, 1),
7006 _mm512_extractf64x4_pd(T3, 1), 0x31);
7007 tmp.packet[8] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T4, 0),
7008 _mm512_extractf64x4_pd(T6, 0), 0x20);
7009 tmp.packet[9] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T5, 0),
7010 _mm512_extractf64x4_pd(T7, 0), 0x20);
7011 tmp.packet[10] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T4, 0),
7012 _mm512_extractf64x4_pd(T6, 0), 0x31);
7013 tmp.packet[11] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T5, 0),
7014 _mm512_extractf64x4_pd(T7, 0), 0x31);
7015 tmp.packet[12] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T4, 1),
7016 _mm512_extractf64x4_pd(T6, 1), 0x20);
7017 tmp.packet[13] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T5, 1),
7018 _mm512_extractf64x4_pd(T7, 1), 0x20);
7019 tmp.packet[14] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T4, 1),
7020 _mm512_extractf64x4_pd(T6, 1), 0x31);
7021 tmp.packet[15] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T5, 1),
7022 _mm512_extractf64x4_pd(T7, 1), 0x31);
7023 PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 0, 8);
7024 PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 1, 8);
7025 PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 2, 8);
7026 PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 3, 8);
7027 PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 4, 8);
7028 PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 5, 8);
7029 PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 6, 8);
7030 PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 7, 8);
7033 EIGEN_STRONG_INLINE Packet16f pblend(const Selector<16>& ,
7035 const Packet16f& ) {
7036 assert(false && "To be implemented");
7040 EIGEN_STRONG_INLINE Packet8d pblend(const Selector<8>& ,
7043 assert(false && "To be implemented");
7049 // end #include "src/Core/arch/AVX512/PacketMath.h"
7050 // #include "src/Core/arch/AVX512/MathFunctions.h"
7051 #ifndef THIRD_PARTY_EIGEN3_EIGEN_SRC_CORE_ARCH_AVX512_MATHFUNCTIONS_H_
7052 #define THIRD_PARTY_EIGEN3_EIGEN_SRC_CORE_ARCH_AVX512_MATHFUNCTIONS_H_
7054 namespace internal {
7055 #if EIGEN_GNUC_AT_LEAST(5, 3)
7056 #define _EIGEN_DECLARE_CONST_Packet16f(NAME, X) \
7057 const Packet16f p16f_##NAME = pset1<Packet16f>(X)
7058 #define _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(NAME, X) \
7059 const Packet16f p16f_##NAME = (__m512)pset1<Packet16i>(X)
7060 #define _EIGEN_DECLARE_CONST_Packet8d(NAME, X) \
7061 const Packet8d p8d_##NAME = pset1<Packet8d>(X)
7062 #define _EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(NAME, X) \
7063 const Packet8d p8d_##NAME = _mm512_castsi512_pd(_mm512_set1_epi64(X))
7064 #if defined(EIGEN_VECTORIZE_AVX512DQ)
7066 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
7067 plog<Packet16f>(const Packet16f& _x) {
7069 _EIGEN_DECLARE_CONST_Packet16f(1, 1.0f);
7070 _EIGEN_DECLARE_CONST_Packet16f(half, 0.5f);
7071 _EIGEN_DECLARE_CONST_Packet16f(126f, 126.0f);
7072 _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(inv_mant_mask, ~0x7f800000);
7073 _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(min_norm_pos, 0x00800000);
7074 _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(minus_inf, 0xff800000);
7075 _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(nan, 0x7fc00000);
7076 _EIGEN_DECLARE_CONST_Packet16f(cephes_SQRTHF, 0.707106781186547524f);
7077 _EIGEN_DECLARE_CONST_Packet16f(cephes_log_p0, 7.0376836292E-2f);
7078 _EIGEN_DECLARE_CONST_Packet16f(cephes_log_p1, -1.1514610310E-1f);
7079 _EIGEN_DECLARE_CONST_Packet16f(cephes_log_p2, 1.1676998740E-1f);
7080 _EIGEN_DECLARE_CONST_Packet16f(cephes_log_p3, -1.2420140846E-1f);
7081 _EIGEN_DECLARE_CONST_Packet16f(cephes_log_p4, +1.4249322787E-1f);
7082 _EIGEN_DECLARE_CONST_Packet16f(cephes_log_p5, -1.6668057665E-1f);
7083 _EIGEN_DECLARE_CONST_Packet16f(cephes_log_p6, +2.0000714765E-1f);
7084 _EIGEN_DECLARE_CONST_Packet16f(cephes_log_p7, -2.4999993993E-1f);
7085 _EIGEN_DECLARE_CONST_Packet16f(cephes_log_p8, +3.3333331174E-1f);
7086 _EIGEN_DECLARE_CONST_Packet16f(cephes_log_q1, -2.12194440e-4f);
7087 _EIGEN_DECLARE_CONST_Packet16f(cephes_log_q2, 0.693359375f);
7088 __mmask16 invalid_mask =
7089 _mm512_cmp_ps_mask(x, _mm512_setzero_ps(), _CMP_NGE_UQ);
7090 __mmask16 iszero_mask =
7091 _mm512_cmp_ps_mask(x, _mm512_setzero_ps(), _CMP_EQ_UQ);
7092 x = pmax(x, p16f_min_norm_pos);
7093 Packet16f emm0 = _mm512_cvtepi32_ps(_mm512_srli_epi32((__m512i)x, 23));
7094 Packet16f e = _mm512_sub_ps(emm0, p16f_126f);
7095 x = _mm512_and_ps(x, p16f_inv_mant_mask);
7096 x = _mm512_or_ps(x, p16f_half);
7097 __mmask16 mask = _mm512_cmp_ps_mask(x, p16f_cephes_SQRTHF, _CMP_LT_OQ);
7098 Packet16f tmp = _mm512_mask_blend_ps(mask, x, _mm512_setzero_ps());
7099 x = psub(x, p16f_1);
7100 e = psub(e, _mm512_mask_blend_ps(mask, p16f_1, _mm512_setzero_ps()));
7102 Packet16f x2 = pmul(x, x);
7103 Packet16f x3 = pmul(x2, x);
7104 Packet16f y, y1, y2;
7105 y = pmadd(p16f_cephes_log_p0, x, p16f_cephes_log_p1);
7106 y1 = pmadd(p16f_cephes_log_p3, x, p16f_cephes_log_p4);
7107 y2 = pmadd(p16f_cephes_log_p6, x, p16f_cephes_log_p7);
7108 y = pmadd(y, x, p16f_cephes_log_p2);
7109 y1 = pmadd(y1, x, p16f_cephes_log_p5);
7110 y2 = pmadd(y2, x, p16f_cephes_log_p8);
7111 y = pmadd(y, x3, y1);
7112 y = pmadd(y, x3, y2);
7114 y1 = pmul(e, p16f_cephes_log_q1);
7115 tmp = pmul(x2, p16f_half);
7118 y2 = pmul(e, p16f_cephes_log_q2);
7121 return _mm512_mask_blend_ps(iszero_mask, p16f_minus_inf,
7122 _mm512_mask_blend_ps(invalid_mask, p16f_nan, x));
7126 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
7127 pexp<Packet16f>(const Packet16f& _x) {
7128 _EIGEN_DECLARE_CONST_Packet16f(1, 1.0f);
7129 _EIGEN_DECLARE_CONST_Packet16f(half, 0.5f);
7130 _EIGEN_DECLARE_CONST_Packet16f(127, 127.0f);
7131 _EIGEN_DECLARE_CONST_Packet16f(exp_hi, 88.3762626647950f);
7132 _EIGEN_DECLARE_CONST_Packet16f(exp_lo, -88.3762626647949f);
7133 _EIGEN_DECLARE_CONST_Packet16f(cephes_LOG2EF, 1.44269504088896341f);
7134 _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p0, 1.9875691500E-4f);
7135 _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p1, 1.3981999507E-3f);
7136 _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p2, 8.3334519073E-3f);
7137 _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p3, 4.1665795894E-2f);
7138 _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p4, 1.6666665459E-1f);
7139 _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p5, 5.0000001201E-1f);
7140 Packet16f x = pmax(pmin(_x, p16f_exp_hi), p16f_exp_lo);
7141 Packet16f m = _mm512_floor_ps(pmadd(x, p16f_cephes_LOG2EF, p16f_half));
7142 _EIGEN_DECLARE_CONST_Packet16f(nln2, -0.6931471805599453f);
7143 Packet16f r = _mm512_fmadd_ps(m, p16f_nln2, x);
7144 Packet16f r2 = pmul(r, r);
7145 Packet16f y = p16f_cephes_exp_p0;
7146 y = pmadd(y, r, p16f_cephes_exp_p1);
7147 y = pmadd(y, r, p16f_cephes_exp_p2);
7148 y = pmadd(y, r, p16f_cephes_exp_p3);
7149 y = pmadd(y, r, p16f_cephes_exp_p4);
7150 y = pmadd(y, r, p16f_cephes_exp_p5);
7151 y = pmadd(y, r2, r);
7152 y = padd(y, p16f_1);
7153 Packet16i emm0 = _mm512_cvttps_epi32(padd(m, p16f_127));
7154 emm0 = _mm512_slli_epi32(emm0, 23);
7155 return pmax(pmul(y, _mm512_castsi512_ps(emm0)), _x);
7159 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
7160 psqrt<Packet16f>(const Packet16f& _x) {
7161 _EIGEN_DECLARE_CONST_Packet16f(one_point_five, 1.5f);
7162 _EIGEN_DECLARE_CONST_Packet16f(minus_half, -0.5f);
7163 _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(flt_min, 0x00800000);
7164 Packet16f neg_half = pmul(_x, p16f_minus_half);
7165 __mmask16 non_zero_mask = _mm512_cmp_ps_mask(_x, p16f_flt_min, _CMP_GE_OQ);
7166 Packet16f x = _mm512_mask_blend_ps(non_zero_mask, _mm512_rsqrt14_ps(_x),
7167 _mm512_setzero_ps());
7168 x = pmul(x, pmadd(neg_half, pmul(x, x), p16f_one_point_five));
7172 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
7173 psqrt<Packet8d>(const Packet8d& _x) {
7174 _EIGEN_DECLARE_CONST_Packet8d(one_point_five, 1.5);
7175 _EIGEN_DECLARE_CONST_Packet8d(minus_half, -0.5);
7176 _EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(dbl_min, 0x0010000000000000LL);
7177 Packet8d neg_half = pmul(_x, p8d_minus_half);
7178 __mmask8 non_zero_mask = _mm512_cmp_pd_mask(_x, p8d_dbl_min, _CMP_GE_OQ);
7179 Packet8d x = _mm512_mask_blend_pd(non_zero_mask, _mm512_rsqrt14_pd(_x),
7180 _mm512_setzero_pd());
7181 x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
7182 x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
7187 EIGEN_STRONG_INLINE Packet16f psqrt<Packet16f>(const Packet16f& x) {
7188 return _mm512_sqrt_ps(x);
7191 EIGEN_STRONG_INLINE Packet8d psqrt<Packet8d>(const Packet8d& x) {
7192 return _mm512_sqrt_pd(x);
7195 #ifdef EIGEN_FAST_MATH
7197 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
7198 prsqrt<Packet16f>(const Packet16f& _x) {
7199 _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(inf, 0x7f800000);
7200 _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(nan, 0x7fc00000);
7201 _EIGEN_DECLARE_CONST_Packet16f(one_point_five, 1.5f);
7202 _EIGEN_DECLARE_CONST_Packet16f(minus_half, -0.5f);
7203 _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(flt_min, 0x00800000);
7204 Packet16f neg_half = pmul(_x, p16f_minus_half);
7205 __mmask16 le_zero_mask = _mm512_cmp_ps_mask(_x, p16f_flt_min, _CMP_LT_OQ);
7206 Packet16f x = _mm512_mask_blend_ps(le_zero_mask, _mm512_setzero_ps(),
7207 _mm512_rsqrt14_ps(_x));
7208 __mmask16 neg_mask = _mm512_cmp_ps_mask(_x, _mm512_setzero_ps(), _CMP_LT_OQ);
7209 Packet16f infs_and_nans = _mm512_mask_blend_ps(
7211 _mm512_mask_blend_ps(le_zero_mask, p16f_inf, _mm512_setzero_ps()));
7212 x = pmul(x, pmadd(neg_half, pmul(x, x), p16f_one_point_five));
7213 return _mm512_mask_blend_ps(le_zero_mask, infs_and_nans, x);
7216 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
7217 prsqrt<Packet8d>(const Packet8d& _x) {
7218 _EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(inf, 0x7ff0000000000000LL);
7219 _EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(nan, 0x7ff1000000000000LL);
7220 _EIGEN_DECLARE_CONST_Packet8d(one_point_five, 1.5);
7221 _EIGEN_DECLARE_CONST_Packet8d(minus_half, -0.5);
7222 _EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(dbl_min, 0x0010000000000000LL);
7223 Packet8d neg_half = pmul(_x, p8d_minus_half);
7224 __mmask8 le_zero_mask = _mm512_cmp_pd_mask(_x, p8d_dbl_min, _CMP_LT_OQ);
7225 Packet8d x = _mm512_mask_blend_pd(le_zero_mask, _mm512_setzero_pd(),
7226 _mm512_rsqrt14_pd(_x));
7227 __mmask8 neg_mask = _mm512_cmp_pd_mask(_x, _mm512_setzero_pd(), _CMP_LT_OQ);
7228 Packet8d infs_and_nans = _mm512_mask_blend_pd(
7230 _mm512_mask_blend_pd(le_zero_mask, p8d_inf, _mm512_setzero_pd()));
7231 x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
7232 x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
7233 return _mm512_mask_blend_pd(le_zero_mask, infs_and_nans, x);
7237 EIGEN_STRONG_INLINE Packet16f prsqrt<Packet16f>(const Packet16f& x) {
7238 return _mm512_rsqrt28_ps(x);
7245 // end #include "src/Core/arch/AVX512/MathFunctions.h"
7246 #elif defined EIGEN_VECTORIZE_AVX
7247 // #include "src/Core/arch/SSE/PacketMath.h"
7248 #ifndef EIGEN_PACKET_MATH_SSE_H
7249 #define EIGEN_PACKET_MATH_SSE_H
7251 namespace internal {
7252 #ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
7253 #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
7255 #ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
7256 #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
7259 #ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
7260 #define EIGEN_HAS_SINGLE_INSTRUCTION_MADD 1
7263 #if (defined EIGEN_VECTORIZE_AVX) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_MINGW) && (__GXX_ABI_VERSION < 1004)
7264 template<typename T>
7265 struct eigen_packet_wrapper
7267 EIGEN_ALWAYS_INLINE operator T&() { return m_val; }
7268 EIGEN_ALWAYS_INLINE operator const T&() const { return m_val; }
7269 EIGEN_ALWAYS_INLINE eigen_packet_wrapper() {}
7270 EIGEN_ALWAYS_INLINE eigen_packet_wrapper(const T &v) : m_val(v) {}
7271 EIGEN_ALWAYS_INLINE eigen_packet_wrapper& operator=(const T &v) {
7277 typedef eigen_packet_wrapper<__m128> Packet4f;
7278 typedef eigen_packet_wrapper<__m128i> Packet4i;
7279 typedef eigen_packet_wrapper<__m128d> Packet2d;
7281 typedef __m128 Packet4f;
7282 typedef __m128i Packet4i;
7283 typedef __m128d Packet2d;
7285 template<> struct is_arithmetic<__m128> { enum { value = true }; };
7286 template<> struct is_arithmetic<__m128i> { enum { value = true }; };
7287 template<> struct is_arithmetic<__m128d> { enum { value = true }; };
7288 #define vec4f_swizzle1(v,p,q,r,s) \
7289 (_mm_castsi128_ps(_mm_shuffle_epi32( _mm_castps_si128(v), ((s)<<6|(r)<<4|(q)<<2|(p)))))
7290 #define vec4i_swizzle1(v,p,q,r,s) \
7291 (_mm_shuffle_epi32( v, ((s)<<6|(r)<<4|(q)<<2|(p))))
7292 #define vec2d_swizzle1(v,p,q) \
7293 (_mm_castsi128_pd(_mm_shuffle_epi32( _mm_castpd_si128(v), ((q*2+1)<<6|(q*2)<<4|(p*2+1)<<2|(p*2)))))
7294 #define vec4f_swizzle2(a,b,p,q,r,s) \
7295 (_mm_shuffle_ps( (a), (b), ((s)<<6|(r)<<4|(q)<<2|(p))))
7296 #define vec4i_swizzle2(a,b,p,q,r,s) \
7297 (_mm_castps_si128( (_mm_shuffle_ps( _mm_castsi128_ps(a), _mm_castsi128_ps(b), ((s)<<6|(r)<<4|(q)<<2|(p))))))
7298 #define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
7299 const Packet4f p4f_##NAME = pset1<Packet4f>(X)
7300 #define _EIGEN_DECLARE_CONST_Packet2d(NAME,X) \
7301 const Packet2d p2d_##NAME = pset1<Packet2d>(X)
7302 #define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
7303 const Packet4f p4f_##NAME = _mm_castsi128_ps(pset1<Packet4i>(X))
7304 #define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
7305 const Packet4i p4i_##NAME = pset1<Packet4i>(X)
7306 #ifndef EIGEN_VECTORIZE_AVX
7307 template<> struct packet_traits<float> : default_packet_traits
7309 typedef Packet4f type;
7310 typedef Packet4f half;
7313 AlignedOnScalar = 1,
7317 HasSin = EIGEN_FAST_MATH,
7318 HasCos = EIGEN_FAST_MATH,
7323 HasTanh = EIGEN_FAST_MATH,
7325 #ifdef EIGEN_VECTORIZE_SSE4_1
7333 template<> struct packet_traits<double> : default_packet_traits
7335 typedef Packet2d type;
7336 typedef Packet2d half;
7339 AlignedOnScalar = 1,
7347 #ifdef EIGEN_VECTORIZE_SSE4_1
7356 template<> struct packet_traits<int> : default_packet_traits
7358 typedef Packet4i type;
7359 typedef Packet4i half;
7362 AlignedOnScalar = 1,
7367 template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; };
7368 template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
7369 template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
7370 #ifndef EIGEN_VECTORIZE_AVX
7371 template<> struct scalar_div_cost<float,true> { enum { value = 7 }; };
7372 template<> struct scalar_div_cost<double,true> { enum { value = 8 }; };
7374 #if EIGEN_COMP_MSVC==1500
7375 template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set_ps(from,from,from,from); }
7376 template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set_pd(from,from); }
7377 template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set_epi32(from,from,from,from); }
7379 template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set_ps1(from); }
7380 template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set1_pd(from); }
7381 template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set1_epi32(from); }
7383 #if EIGEN_COMP_GNUC_STRICT && (!defined __AVX__)
7384 template<> EIGEN_STRONG_INLINE Packet4f pload1<Packet4f>(const float *from) {
7385 return vec4f_swizzle1(_mm_load_ss(from),0,0,0,0);
7388 template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) { return _mm_add_ps(pset1<Packet4f>(a), _mm_set_ps(3,2,1,0)); }
7389 template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) { return _mm_add_pd(pset1<Packet2d>(a),_mm_set_pd(1,0)); }
7390 template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) { return _mm_add_epi32(pset1<Packet4i>(a),_mm_set_epi32(3,2,1,0)); }
7391 template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_add_ps(a,b); }
7392 template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_add_pd(a,b); }
7393 template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_add_epi32(a,b); }
7394 template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_sub_ps(a,b); }
7395 template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_sub_pd(a,b); }
7396 template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_sub_epi32(a,b); }
7397 template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a)
7399 const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));
7400 return _mm_xor_ps(a,mask);
7402 template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a)
7404 const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x80000000,0x0,0x80000000));
7405 return _mm_xor_pd(a,mask);
7407 template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a)
7409 return psub(Packet4i(_mm_setr_epi32(0,0,0,0)), a);
7411 template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
7412 template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
7413 template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
7414 template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_mul_ps(a,b); }
7415 template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_mul_pd(a,b); }
7416 template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b)
7418 #ifdef EIGEN_VECTORIZE_SSE4_1
7419 return _mm_mullo_epi32(a,b);
7421 return vec4i_swizzle1(
7424 _mm_mul_epu32(vec4i_swizzle1(a,1,0,3,2),
7425 vec4i_swizzle1(b,1,0,3,2)),
7430 template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_div_ps(a,b); }
7431 template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_div_pd(a,b); }
7432 template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd(pmul(a,b), c); }
7434 template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return _mm_fmadd_ps(a,b,c); }
7435 template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return _mm_fmadd_pd(a,b,c); }
7437 template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_min_ps(a,b); }
7438 template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_min_pd(a,b); }
7439 template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b)
7441 #ifdef EIGEN_VECTORIZE_SSE4_1
7442 return _mm_min_epi32(a,b);
7444 Packet4i mask = _mm_cmplt_epi32(a,b);
7445 return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
7448 template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_max_ps(a,b); }
7449 template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_max_pd(a,b); }
7450 template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b)
7452 #ifdef EIGEN_VECTORIZE_SSE4_1
7453 return _mm_max_epi32(a,b);
7455 Packet4i mask = _mm_cmpgt_epi32(a,b);
7456 return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
7459 #ifdef EIGEN_VECTORIZE_SSE4_1
7460 template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a) { return _mm_round_ps(a, 0); }
7461 template<> EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) { return _mm_round_pd(a, 0); }
7462 template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a) { return _mm_ceil_ps(a); }
7463 template<> EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a) { return _mm_ceil_pd(a); }
7464 template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a) { return _mm_floor_ps(a); }
7465 template<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) { return _mm_floor_pd(a); }
7467 template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_and_ps(a,b); }
7468 template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_and_pd(a,b); }
7469 template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_and_si128(a,b); }
7470 template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_or_ps(a,b); }
7471 template<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_or_pd(a,b); }
7472 template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_or_si128(a,b); }
7473 template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_xor_ps(a,b); }
7474 template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_xor_pd(a,b); }
7475 template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_xor_si128(a,b); }
7476 template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_andnot_ps(a,b); }
7477 template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_andnot_pd(a,b); }
7478 template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_andnot_si128(a,b); }
7479 template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_ps(from); }
7480 template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_pd(from); }
7481 template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(reinterpret_cast<const __m128i*>(from)); }
7483 template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) {
7484 EIGEN_DEBUG_UNALIGNED_LOAD
7485 #if (EIGEN_COMP_MSVC==1600)
7486 __m128 res = _mm_loadl_pi(_mm_set1_ps(0.0f), (const __m64*)(from));
7487 res = _mm_loadh_pi(res, (const __m64*)(from+2));
7490 return _mm_loadu_ps(from);
7494 template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
7496 EIGEN_DEBUG_UNALIGNED_LOAD
7497 return _mm_loadu_ps(from);
7500 template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from)
7502 EIGEN_DEBUG_UNALIGNED_LOAD
7503 return _mm_loadu_pd(from);
7505 template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
7507 EIGEN_DEBUG_UNALIGNED_LOAD
7508 return _mm_loadu_si128(reinterpret_cast<const __m128i*>(from));
7510 template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
7512 return vec4f_swizzle1(_mm_castpd_ps(_mm_load_sd(reinterpret_cast<const double*>(from))), 0, 0, 1, 1);
7514 template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
7515 { return pset1<Packet2d>(from[0]); }
7516 template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
7519 tmp = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(from));
7520 return vec4i_swizzle1(tmp, 0, 0, 1, 1);
7522 template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_ps(to, from); }
7523 template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_pd(to, from); }
7524 template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<__m128i*>(to), from); }
7525 template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_pd(to, from); }
7526 template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_ps(to, from); }
7527 template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from); }
7528 template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)
7530 return _mm_set_ps(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
7532 template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride)
7534 return _mm_set_pd(from[1*stride], from[0*stride]);
7536 template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride)
7538 return _mm_set_epi32(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
7540 template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride)
7542 to[stride*0] = _mm_cvtss_f32(from);
7543 to[stride*1] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 1));
7544 to[stride*2] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 2));
7545 to[stride*3] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 3));
7547 template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride)
7549 to[stride*0] = _mm_cvtsd_f64(from);
7550 to[stride*1] = _mm_cvtsd_f64(_mm_shuffle_pd(from, from, 1));
7552 template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride)
7554 to[stride*0] = _mm_cvtsi128_si32(from);
7555 to[stride*1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 1));
7556 to[stride*2] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 2));
7557 to[stride*3] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 3));
7559 template<> EIGEN_STRONG_INLINE void pstore1<Packet4f>(float* to, const float& a)
7561 Packet4f pa = _mm_set_ss(a);
7562 pstore(to, Packet4f(vec4f_swizzle1(pa,0,0,0,0)));
7564 template<> EIGEN_STRONG_INLINE void pstore1<Packet2d>(double* to, const double& a)
7566 Packet2d pa = _mm_set_sd(a);
7567 pstore(to, Packet2d(vec2d_swizzle1(pa,0,0)));
7569 #ifndef EIGEN_VECTORIZE_AVX
7570 template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
7571 template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
7572 template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
7574 #if EIGEN_COMP_MSVC_STRICT && EIGEN_OS_WIN64
7575 template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { return a.m128_f32[0]; }
7576 template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return a.m128d_f64[0]; }
7577 template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; }
7578 #elif EIGEN_COMP_MSVC_STRICT
7579 template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float x = _mm_cvtss_f32(a); return x; }
7580 template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double x = _mm_cvtsd_f64(a); return x; }
7581 template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; }
7583 template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { return _mm_cvtss_f32(a); }
7584 template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return _mm_cvtsd_f64(a); }
7585 template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { return _mm_cvtsi128_si32(a); }
7587 template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a)
7588 { return _mm_shuffle_ps(a,a,0x1B); }
7589 template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a)
7590 { return _mm_shuffle_pd(a,a,0x1); }
7591 template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a)
7592 { return _mm_shuffle_epi32(a,0x1B); }
7593 template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a)
7595 const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF));
7596 return _mm_and_ps(a,mask);
7598 template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a)
7600 const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF));
7601 return _mm_and_pd(a,mask);
7603 template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a)
7605 #ifdef EIGEN_VECTORIZE_SSSE3
7606 return _mm_abs_epi32(a);
7608 Packet4i aux = _mm_srai_epi32(a,31);
7609 return _mm_sub_epi32(_mm_xor_si128(a,aux),aux);
7613 template<> EIGEN_STRONG_INLINE void
7614 pbroadcast4<Packet4f>(const float *a,
7615 Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
7617 a3 = pload<Packet4f>(a);
7618 a0 = vec4f_swizzle1(a3, 0,0,0,0);
7619 a1 = vec4f_swizzle1(a3, 1,1,1,1);
7620 a2 = vec4f_swizzle1(a3, 2,2,2,2);
7621 a3 = vec4f_swizzle1(a3, 3,3,3,3);
7623 template<> EIGEN_STRONG_INLINE void
7624 pbroadcast4<Packet2d>(const double *a,
7625 Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)
7627 #ifdef EIGEN_VECTORIZE_SSE3
7628 a0 = _mm_loaddup_pd(a+0);
7629 a1 = _mm_loaddup_pd(a+1);
7630 a2 = _mm_loaddup_pd(a+2);
7631 a3 = _mm_loaddup_pd(a+3);
7633 a1 = pload<Packet2d>(a);
7634 a0 = vec2d_swizzle1(a1, 0,0);
7635 a1 = vec2d_swizzle1(a1, 1,1);
7636 a3 = pload<Packet2d>(a+2);
7637 a2 = vec2d_swizzle1(a3, 0,0);
7638 a3 = vec2d_swizzle1(a3, 1,1);
7642 EIGEN_STRONG_INLINE void punpackp(Packet4f* vecs)
7644 vecs[1] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x55));
7645 vecs[2] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0xAA));
7646 vecs[3] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0xFF));
7647 vecs[0] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x00));
7649 #ifdef EIGEN_VECTORIZE_SSE3
7650 template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
7652 return _mm_hadd_ps(_mm_hadd_ps(vecs[0], vecs[1]),_mm_hadd_ps(vecs[2], vecs[3]));
7654 template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
7656 return _mm_hadd_pd(vecs[0], vecs[1]);
7659 template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
7661 Packet4f tmp0, tmp1, tmp2;
7662 tmp0 = _mm_unpacklo_ps(vecs[0], vecs[1]);
7663 tmp1 = _mm_unpackhi_ps(vecs[0], vecs[1]);
7664 tmp2 = _mm_unpackhi_ps(vecs[2], vecs[3]);
7665 tmp0 = _mm_add_ps(tmp0, tmp1);
7666 tmp1 = _mm_unpacklo_ps(vecs[2], vecs[3]);
7667 tmp1 = _mm_add_ps(tmp1, tmp2);
7668 tmp2 = _mm_movehl_ps(tmp1, tmp0);
7669 tmp0 = _mm_movelh_ps(tmp0, tmp1);
7670 return _mm_add_ps(tmp0, tmp2);
7672 template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
7674 return _mm_add_pd(_mm_unpacklo_pd(vecs[0], vecs[1]), _mm_unpackhi_pd(vecs[0], vecs[1]));
7677 template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
7679 Packet4f tmp = _mm_add_ps(a, _mm_movehl_ps(a,a));
7680 return pfirst<Packet4f>(_mm_add_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
7682 template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
7684 return pfirst<Packet2d>(_mm_add_sd(a, _mm_unpackhi_pd(a,a)));
7686 #ifdef EIGEN_VECTORIZE_SSSE3
7687 template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
7689 return _mm_hadd_epi32(_mm_hadd_epi32(vecs[0], vecs[1]),_mm_hadd_epi32(vecs[2], vecs[3]));
7691 template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
7693 Packet4i tmp0 = _mm_hadd_epi32(a,a);
7694 return pfirst<Packet4i>(_mm_hadd_epi32(tmp0,tmp0));
7697 template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
7699 Packet4i tmp = _mm_add_epi32(a, _mm_unpackhi_epi64(a,a));
7700 return pfirst(tmp) + pfirst<Packet4i>(_mm_shuffle_epi32(tmp, 1));
7702 template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
7704 Packet4i tmp0, tmp1, tmp2;
7705 tmp0 = _mm_unpacklo_epi32(vecs[0], vecs[1]);
7706 tmp1 = _mm_unpackhi_epi32(vecs[0], vecs[1]);
7707 tmp2 = _mm_unpackhi_epi32(vecs[2], vecs[3]);
7708 tmp0 = _mm_add_epi32(tmp0, tmp1);
7709 tmp1 = _mm_unpacklo_epi32(vecs[2], vecs[3]);
7710 tmp1 = _mm_add_epi32(tmp1, tmp2);
7711 tmp2 = _mm_unpacklo_epi64(tmp0, tmp1);
7712 tmp0 = _mm_unpackhi_epi64(tmp0, tmp1);
7713 return _mm_add_epi32(tmp0, tmp2);
7716 template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
7718 Packet4f tmp = _mm_mul_ps(a, _mm_movehl_ps(a,a));
7719 return pfirst<Packet4f>(_mm_mul_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
7721 template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)
7723 return pfirst<Packet2d>(_mm_mul_sd(a, _mm_unpackhi_pd(a,a)));
7725 template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
7727 EIGEN_ALIGN16 int aux[4];
7729 return (aux[0] * aux[1]) * (aux[2] * aux[3]);;
7731 template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
7733 Packet4f tmp = _mm_min_ps(a, _mm_movehl_ps(a,a));
7734 return pfirst<Packet4f>(_mm_min_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
7736 template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)
7738 return pfirst<Packet2d>(_mm_min_sd(a, _mm_unpackhi_pd(a,a)));
7740 template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
7742 #ifdef EIGEN_VECTORIZE_SSE4_1
7743 Packet4i tmp = _mm_min_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0,0,3,2)));
7744 return pfirst<Packet4i>(_mm_min_epi32(tmp,_mm_shuffle_epi32(tmp, 1)));
7746 EIGEN_ALIGN16 int aux[4];
7748 int aux0 = aux[0]<aux[1] ? aux[0] : aux[1];
7749 int aux2 = aux[2]<aux[3] ? aux[2] : aux[3];
7750 return aux0<aux2 ? aux0 : aux2;
7753 template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
7755 Packet4f tmp = _mm_max_ps(a, _mm_movehl_ps(a,a));
7756 return pfirst<Packet4f>(_mm_max_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
7758 template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
7760 return pfirst<Packet2d>(_mm_max_sd(a, _mm_unpackhi_pd(a,a)));
7762 template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
7764 #ifdef EIGEN_VECTORIZE_SSE4_1
7765 Packet4i tmp = _mm_max_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0,0,3,2)));
7766 return pfirst<Packet4i>(_mm_max_epi32(tmp,_mm_shuffle_epi32(tmp, 1)));
7768 EIGEN_ALIGN16 int aux[4];
7770 int aux0 = aux[0]>aux[1] ? aux[0] : aux[1];
7771 int aux2 = aux[2]>aux[3] ? aux[2] : aux[3];
7772 return aux0>aux2 ? aux0 : aux2;
7777 #ifdef EIGEN_VECTORIZE_SSSE3
7778 template<int Offset>
7779 struct palign_impl<Offset,Packet4f>
7781 static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
7784 first = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(second), _mm_castps_si128(first), Offset*4));
7787 template<int Offset>
7788 struct palign_impl<Offset,Packet4i>
7790 static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
7793 first = _mm_alignr_epi8(second,first, Offset*4);
7796 template<int Offset>
7797 struct palign_impl<Offset,Packet2d>
7799 static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
7802 first = _mm_castsi128_pd(_mm_alignr_epi8(_mm_castpd_si128(second), _mm_castpd_si128(first), 8));
7806 template<int Offset>
7807 struct palign_impl<Offset,Packet4f>
7809 static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
7813 first = _mm_move_ss(first,second);
7814 first = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(first),0x39));
7818 first = _mm_movehl_ps(first,first);
7819 first = _mm_movelh_ps(first,second);
7823 first = _mm_move_ss(first,second);
7824 first = _mm_shuffle_ps(first,second,0x93);
7828 template<int Offset>
7829 struct palign_impl<Offset,Packet4i>
7831 static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
7835 first = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(first),_mm_castsi128_ps(second)));
7836 first = _mm_shuffle_epi32(first,0x39);
7840 first = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(first)));
7841 first = _mm_castps_si128(_mm_movelh_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(second)));
7845 first = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(first),_mm_castsi128_ps(second)));
7846 first = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(second),0x93));
7850 template<int Offset>
7851 struct palign_impl<Offset,Packet2d>
7853 static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
7857 first = _mm_castps_pd(_mm_movehl_ps(_mm_castpd_ps(first),_mm_castpd_ps(first)));
7858 first = _mm_castps_pd(_mm_movelh_ps(_mm_castpd_ps(first),_mm_castpd_ps(second)));
7863 EIGEN_DEVICE_FUNC inline void
7864 ptranspose(PacketBlock<Packet4f,4>& kernel) {
7865 _MM_TRANSPOSE4_PS(kernel.packet[0], kernel.packet[1], kernel.packet[2], kernel.packet[3]);
7867 EIGEN_DEVICE_FUNC inline void
7868 ptranspose(PacketBlock<Packet2d,2>& kernel) {
7869 __m128d tmp = _mm_unpackhi_pd(kernel.packet[0], kernel.packet[1]);
7870 kernel.packet[0] = _mm_unpacklo_pd(kernel.packet[0], kernel.packet[1]);
7871 kernel.packet[1] = tmp;
7873 EIGEN_DEVICE_FUNC inline void
7874 ptranspose(PacketBlock<Packet4i,4>& kernel) {
7875 __m128i T0 = _mm_unpacklo_epi32(kernel.packet[0], kernel.packet[1]);
7876 __m128i T1 = _mm_unpacklo_epi32(kernel.packet[2], kernel.packet[3]);
7877 __m128i T2 = _mm_unpackhi_epi32(kernel.packet[0], kernel.packet[1]);
7878 __m128i T3 = _mm_unpackhi_epi32(kernel.packet[2], kernel.packet[3]);
7879 kernel.packet[0] = _mm_unpacklo_epi64(T0, T1);
7880 kernel.packet[1] = _mm_unpackhi_epi64(T0, T1);
7881 kernel.packet[2] = _mm_unpacklo_epi64(T2, T3);
7882 kernel.packet[3] = _mm_unpackhi_epi64(T2, T3);
7884 template<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) {
7885 const __m128i zero = _mm_setzero_si128();
7886 const __m128i select = _mm_set_epi32(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
7887 __m128i false_mask = _mm_cmpeq_epi32(select, zero);
7888 #ifdef EIGEN_VECTORIZE_SSE4_1
7889 return _mm_blendv_epi8(thenPacket, elsePacket, false_mask);
7891 return _mm_or_si128(_mm_andnot_si128(false_mask, thenPacket), _mm_and_si128(false_mask, elsePacket));
7894 template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket) {
7895 const __m128 zero = _mm_setzero_ps();
7896 const __m128 select = _mm_set_ps(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
7897 __m128 false_mask = _mm_cmpeq_ps(select, zero);
7898 #ifdef EIGEN_VECTORIZE_SSE4_1
7899 return _mm_blendv_ps(thenPacket, elsePacket, false_mask);
7901 return _mm_or_ps(_mm_andnot_ps(false_mask, thenPacket), _mm_and_ps(false_mask, elsePacket));
7904 template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) {
7905 const __m128d zero = _mm_setzero_pd();
7906 const __m128d select = _mm_set_pd(ifPacket.select[1], ifPacket.select[0]);
7907 __m128d false_mask = _mm_cmpeq_pd(select, zero);
7908 #ifdef EIGEN_VECTORIZE_SSE4_1
7909 return _mm_blendv_pd(thenPacket, elsePacket, false_mask);
7911 return _mm_or_pd(_mm_andnot_pd(false_mask, thenPacket), _mm_and_pd(false_mask, elsePacket));
7914 template<> EIGEN_STRONG_INLINE Packet4f pinsertfirst(const Packet4f& a, float b)
7916 #ifdef EIGEN_VECTORIZE_SSE4_1
7917 return _mm_blend_ps(a,pset1<Packet4f>(b),1);
7919 return _mm_move_ss(a, _mm_load_ss(&b));
7922 template<> EIGEN_STRONG_INLINE Packet2d pinsertfirst(const Packet2d& a, double b)
7924 #ifdef EIGEN_VECTORIZE_SSE4_1
7925 return _mm_blend_pd(a,pset1<Packet2d>(b),1);
7927 return _mm_move_sd(a, _mm_load_sd(&b));
7930 template<> EIGEN_STRONG_INLINE Packet4f pinsertlast(const Packet4f& a, float b)
7932 #ifdef EIGEN_VECTORIZE_SSE4_1
7933 return _mm_blend_ps(a,pset1<Packet4f>(b),(1<<3));
7935 const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x0,0x0,0x0,0xFFFFFFFF));
7936 return _mm_or_ps(_mm_andnot_ps(mask, a), _mm_and_ps(mask, pset1<Packet4f>(b)));
7939 template<> EIGEN_STRONG_INLINE Packet2d pinsertlast(const Packet2d& a, double b)
7941 #ifdef EIGEN_VECTORIZE_SSE4_1
7942 return _mm_blend_pd(a,pset1<Packet2d>(b),(1<<1));
7944 const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x0,0xFFFFFFFF,0xFFFFFFFF));
7945 return _mm_or_pd(_mm_andnot_pd(mask, a), _mm_and_pd(mask, pset1<Packet2d>(b)));
7949 template<> EIGEN_STRONG_INLINE float pmadd(const float& a, const float& b, const float& c) {
7950 return ::fmaf(a,b,c);
7952 template<> EIGEN_STRONG_INLINE double pmadd(const double& a, const double& b, const double& c) {
7953 return ::fma(a,b,c);
7959 // end #include "src/Core/arch/SSE/PacketMath.h"
7960 // #include "src/Core/arch/SSE/Complex.h"
7961 #ifndef EIGEN_COMPLEX_SSE_H
7962 #define EIGEN_COMPLEX_SSE_H
7964 namespace internal {
7967 EIGEN_STRONG_INLINE Packet2cf() {}
7968 EIGEN_STRONG_INLINE explicit Packet2cf(const __m128& a) : v(a) {}
7971 #ifndef EIGEN_VECTORIZE_AVX
7972 template<> struct packet_traits<std::complex<float> > : default_packet_traits
7974 typedef Packet2cf type;
7975 typedef Packet2cf half;
7978 AlignedOnScalar = 1,
7995 template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
7996 template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_add_ps(a.v,b.v)); }
7997 template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_sub_ps(a.v,b.v)); }
7998 template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a)
8000 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));
8001 return Packet2cf(_mm_xor_ps(a.v,mask));
8003 template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
8005 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
8006 return Packet2cf(_mm_xor_ps(a.v,mask));
8008 template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
8010 #ifdef EIGEN_VECTORIZE_SSE3
8011 return Packet2cf(_mm_addsub_ps(_mm_mul_ps(_mm_moveldup_ps(a.v), b.v),
8012 _mm_mul_ps(_mm_movehdup_ps(a.v),
8013 vec4f_swizzle1(b.v, 1, 0, 3, 2))));
8015 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x00000000,0x80000000,0x00000000));
8016 return Packet2cf(_mm_add_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
8017 _mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
8018 vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask)));
8021 template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_and_ps(a.v,b.v)); }
8022 template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_or_ps(a.v,b.v)); }
8023 template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_xor_ps(a.v,b.v)); }
8024 template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_andnot_ps(a.v,b.v)); }
8025 template<> EIGEN_STRONG_INLINE Packet2cf pload <Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>(&numext::real_ref(*from))); }
8026 template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>(&numext::real_ref(*from))); }
8027 template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
8030 #if EIGEN_GNUC_AT_MOST(4,2)
8031 res.v = _mm_loadl_pi(_mm_set1_ps(0.0f), reinterpret_cast<const __m64*>(&from));
8032 #elif EIGEN_GNUC_AT_LEAST(4,6)
8033 #pragma GCC diagnostic push
8034 #pragma GCC diagnostic ignored "-Wuninitialized"
8035 res.v = _mm_loadl_pi(res.v, (const __m64*)&from);
8036 #pragma GCC diagnostic pop
8038 res.v = _mm_loadl_pi(res.v, (const __m64*)&from);
8040 return Packet2cf(_mm_movelh_ps(res.v,res.v));
8042 template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
8043 template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), Packet4f(from.v)); }
8044 template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), Packet4f(from.v)); }
8045 template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
8047 return Packet2cf(_mm_set_ps(std::imag(from[1*stride]), std::real(from[1*stride]),
8048 std::imag(from[0*stride]), std::real(from[0*stride])));
8050 template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
8052 to[stride*0] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 0)),
8053 _mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 1)));
8054 to[stride*1] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 2)),
8055 _mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 3)));
8057 template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
8058 template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
8060 #if EIGEN_GNUC_AT_MOST(4,3)
8061 EIGEN_ALIGN16 std::complex<float> res[2];
8062 _mm_store_ps((float*)res, a.v);
8065 std::complex<float> res;
8066 _mm_storel_pi((__m64*)&res, a.v);
8070 template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a) { return Packet2cf(_mm_castpd_ps(preverse(Packet2d(_mm_castps_pd(a.v))))); }
8071 template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
8073 return pfirst(Packet2cf(_mm_add_ps(a.v, _mm_movehl_ps(a.v,a.v))));
8075 template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
8077 return Packet2cf(_mm_add_ps(_mm_movelh_ps(vecs[0].v,vecs[1].v), _mm_movehl_ps(vecs[1].v,vecs[0].v)));
8079 template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
8081 return pfirst(pmul(a, Packet2cf(_mm_movehl_ps(a.v,a.v))));
8083 template<int Offset>
8084 struct palign_impl<Offset,Packet2cf>
8086 static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
8090 first.v = _mm_movehl_ps(first.v, first.v);
8091 first.v = _mm_movelh_ps(first.v, second.v);
8095 template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
8097 EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
8098 { return padd(pmul(x,y),c); }
8099 EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
8101 #ifdef EIGEN_VECTORIZE_SSE3
8102 return internal::pmul(a, pconj(b));
8104 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
8105 return Packet2cf(_mm_add_ps(_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask),
8106 _mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
8107 vec4f_swizzle1(b.v, 1, 0, 3, 2))));
8111 template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
8113 EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
8114 { return padd(pmul(x,y),c); }
8115 EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
8117 #ifdef EIGEN_VECTORIZE_SSE3
8118 return internal::pmul(pconj(a), b);
8120 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
8121 return Packet2cf(_mm_add_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
8122 _mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
8123 vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask)));
8127 template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
8129 EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
8130 { return padd(pmul(x,y),c); }
8131 EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
8133 #ifdef EIGEN_VECTORIZE_SSE3
8134 return pconj(internal::pmul(a, b));
8136 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
8137 return Packet2cf(_mm_sub_ps(_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask),
8138 _mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
8139 vec4f_swizzle1(b.v, 1, 0, 3, 2))));
8143 template<> struct conj_helper<Packet4f, Packet2cf, false,false>
8145 EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet4f& x, const Packet2cf& y, const Packet2cf& c) const
8146 { return padd(c, pmul(x,y)); }
8147 EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const
8148 { return Packet2cf(Eigen::internal::pmul<Packet4f>(x, y.v)); }
8150 template<> struct conj_helper<Packet2cf, Packet4f, false,false>
8152 EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet4f& y, const Packet2cf& c) const
8153 { return padd(c, pmul(x,y)); }
8154 EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const
8155 { return Packet2cf(Eigen::internal::pmul<Packet4f>(x.v, y)); }
8157 template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
8159 Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
8160 __m128 s = _mm_mul_ps(b.v,b.v);
8161 return Packet2cf(_mm_div_ps(res.v,_mm_add_ps(s,_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(s), 0xb1)))));
8163 EIGEN_STRONG_INLINE Packet2cf pcplxflip(const Packet2cf& x)
8165 return Packet2cf(vec4f_swizzle1(x.v, 1, 0, 3, 2));
8169 EIGEN_STRONG_INLINE Packet1cd() {}
8170 EIGEN_STRONG_INLINE explicit Packet1cd(const __m128d& a) : v(a) {}
8173 #ifndef EIGEN_VECTORIZE_AVX
8174 template<> struct packet_traits<std::complex<double> > : default_packet_traits
8176 typedef Packet1cd type;
8177 typedef Packet1cd half;
8180 AlignedOnScalar = 0,
8196 template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
8197 template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_add_pd(a.v,b.v)); }
8198 template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_sub_pd(a.v,b.v)); }
8199 template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); }
8200 template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a)
8202 const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
8203 return Packet1cd(_mm_xor_pd(a.v,mask));
8205 template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
8207 #ifdef EIGEN_VECTORIZE_SSE3
8208 return Packet1cd(_mm_addsub_pd(_mm_mul_pd(_mm_movedup_pd(a.v), b.v),
8209 _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
8210 vec2d_swizzle1(b.v, 1, 0))));
8212 const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
8213 return Packet1cd(_mm_add_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v),
8214 _mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
8215 vec2d_swizzle1(b.v, 1, 0)), mask)));
8218 template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_and_pd(a.v,b.v)); }
8219 template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_or_pd(a.v,b.v)); }
8220 template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_xor_pd(a.v,b.v)); }
8221 template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_andnot_pd(a.v,b.v)); }
8222 template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from)
8223 { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
8224 template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from)
8225 { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
8226 template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
8227 { return ploadu<Packet1cd>(&from); }
8228 template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
8229 template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, Packet2d(from.v)); }
8230 template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, Packet2d(from.v)); }
8231 template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
8232 template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
8234 EIGEN_ALIGN16 double res[2];
8235 _mm_store_pd(res, a.v);
8236 return std::complex<double>(res[0],res[1]);
8238 template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
8239 template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a)
8243 template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs)
8247 template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a)
8251 template<int Offset>
8252 struct palign_impl<Offset,Packet1cd>
8254 static EIGEN_STRONG_INLINE void run(Packet1cd& , const Packet1cd& )
8258 template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
8260 EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
8261 { return padd(pmul(x,y),c); }
8262 EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
8264 #ifdef EIGEN_VECTORIZE_SSE3
8265 return internal::pmul(a, pconj(b));
8267 const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
8268 return Packet1cd(_mm_add_pd(_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), mask),
8269 _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
8270 vec2d_swizzle1(b.v, 1, 0))));
8274 template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
8276 EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
8277 { return padd(pmul(x,y),c); }
8278 EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
8280 #ifdef EIGEN_VECTORIZE_SSE3
8281 return internal::pmul(pconj(a), b);
8283 const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
8284 return Packet1cd(_mm_add_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v),
8285 _mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
8286 vec2d_swizzle1(b.v, 1, 0)), mask)));
8290 template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
8292 EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
8293 { return padd(pmul(x,y),c); }
8294 EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
8296 #ifdef EIGEN_VECTORIZE_SSE3
8297 return pconj(internal::pmul(a, b));
8299 const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
8300 return Packet1cd(_mm_sub_pd(_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), mask),
8301 _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
8302 vec2d_swizzle1(b.v, 1, 0))));
8306 template<> struct conj_helper<Packet2d, Packet1cd, false,false>
8308 EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet2d& x, const Packet1cd& y, const Packet1cd& c) const
8309 { return padd(c, pmul(x,y)); }
8310 EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const
8311 { return Packet1cd(Eigen::internal::pmul<Packet2d>(x, y.v)); }
8313 template<> struct conj_helper<Packet1cd, Packet2d, false,false>
8315 EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet2d& y, const Packet1cd& c) const
8316 { return padd(c, pmul(x,y)); }
8317 EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const
8318 { return Packet1cd(Eigen::internal::pmul<Packet2d>(x.v, y)); }
8320 template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
8322 Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
8323 __m128d s = _mm_mul_pd(b.v,b.v);
8324 return Packet1cd(_mm_div_pd(res.v, _mm_add_pd(s,_mm_shuffle_pd(s, s, 0x1))));
8326 EIGEN_STRONG_INLINE Packet1cd pcplxflip(const Packet1cd& x)
8328 return Packet1cd(preverse(Packet2d(x.v)));
8330 EIGEN_DEVICE_FUNC inline void
8331 ptranspose(PacketBlock<Packet2cf,2>& kernel) {
8332 __m128d w1 = _mm_castps_pd(kernel.packet[0].v);
8333 __m128d w2 = _mm_castps_pd(kernel.packet[1].v);
8334 __m128 tmp = _mm_castpd_ps(_mm_unpackhi_pd(w1, w2));
8335 kernel.packet[0].v = _mm_castpd_ps(_mm_unpacklo_pd(w1, w2));
8336 kernel.packet[1].v = tmp;
8338 template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) {
8339 __m128d result = pblend<Packet2d>(ifPacket, _mm_castps_pd(thenPacket.v), _mm_castps_pd(elsePacket.v));
8340 return Packet2cf(_mm_castpd_ps(result));
8342 template<> EIGEN_STRONG_INLINE Packet2cf pinsertfirst(const Packet2cf& a, std::complex<float> b)
8344 return Packet2cf(_mm_loadl_pi(a.v, reinterpret_cast<const __m64*>(&b)));
8346 template<> EIGEN_STRONG_INLINE Packet1cd pinsertfirst(const Packet1cd&, std::complex<double> b)
8348 return pset1<Packet1cd>(b);
8350 template<> EIGEN_STRONG_INLINE Packet2cf pinsertlast(const Packet2cf& a, std::complex<float> b)
8352 return Packet2cf(_mm_loadh_pi(a.v, reinterpret_cast<const __m64*>(&b)));
8354 template<> EIGEN_STRONG_INLINE Packet1cd pinsertlast(const Packet1cd&, std::complex<double> b)
8356 return pset1<Packet1cd>(b);
8361 // end #include "src/Core/arch/SSE/Complex.h"
8362 // #include "src/Core/arch/SSE/MathFunctions.h"
8363 #ifndef EIGEN_MATH_FUNCTIONS_SSE_H
8364 #define EIGEN_MATH_FUNCTIONS_SSE_H
8366 namespace internal {
8367 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
8368 Packet4f plog<Packet4f>(const Packet4f& _x)
8371 _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
8372 _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
8373 _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
8374 _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000);
8375 _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos, 0x00800000);
8376 _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_inf, 0xff800000);
8377 _EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524f);
8378 _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292E-2f);
8379 _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, - 1.1514610310E-1f);
8380 _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740E-1f);
8381 _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, - 1.2420140846E-1f);
8382 _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, + 1.4249322787E-1f);
8383 _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, - 1.6668057665E-1f);
8384 _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, + 2.0000714765E-1f);
8385 _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, - 2.4999993993E-1f);
8386 _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, + 3.3333331174E-1f);
8387 _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4f);
8388 _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375f);
8390 Packet4f invalid_mask = _mm_cmpnge_ps(x, _mm_setzero_ps());
8391 Packet4f iszero_mask = _mm_cmpeq_ps(x, _mm_setzero_ps());
8392 x = pmax(x, p4f_min_norm_pos);
8393 emm0 = _mm_srli_epi32(_mm_castps_si128(x), 23);
8394 x = _mm_and_ps(x, p4f_inv_mant_mask);
8395 x = _mm_or_ps(x, p4f_half);
8396 emm0 = _mm_sub_epi32(emm0, p4i_0x7f);
8397 Packet4f e = padd(Packet4f(_mm_cvtepi32_ps(emm0)), p4f_1);
8398 Packet4f mask = _mm_cmplt_ps(x, p4f_cephes_SQRTHF);
8399 Packet4f tmp = pand(x, mask);
8401 e = psub(e, pand(p4f_1, mask));
8403 Packet4f x2 = pmul(x,x);
8404 Packet4f x3 = pmul(x2,x);
8406 y = pmadd(p4f_cephes_log_p0, x, p4f_cephes_log_p1);
8407 y1 = pmadd(p4f_cephes_log_p3, x, p4f_cephes_log_p4);
8408 y2 = pmadd(p4f_cephes_log_p6, x, p4f_cephes_log_p7);
8409 y = pmadd(y , x, p4f_cephes_log_p2);
8410 y1 = pmadd(y1, x, p4f_cephes_log_p5);
8411 y2 = pmadd(y2, x, p4f_cephes_log_p8);
8412 y = pmadd(y, x3, y1);
8413 y = pmadd(y, x3, y2);
8415 y1 = pmul(e, p4f_cephes_log_q1);
8416 tmp = pmul(x2, p4f_half);
8419 y2 = pmul(e, p4f_cephes_log_q2);
8422 return _mm_or_ps(_mm_andnot_ps(iszero_mask, _mm_or_ps(x, invalid_mask)),
8423 _mm_and_ps(iszero_mask, p4f_minus_inf));
8425 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
8426 Packet4f pexp<Packet4f>(const Packet4f& _x)
8429 _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
8430 _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
8431 _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
8432 _EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647950f);
8433 _EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f);
8434 _EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
8435 _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f);
8436 _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f);
8437 _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4f);
8438 _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3f);
8439 _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3f);
8440 _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2f);
8441 _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f);
8442 _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f);
8445 x = pmax(pmin(x, p4f_exp_hi), p4f_exp_lo);
8446 fx = pmadd(x, p4f_cephes_LOG2EF, p4f_half);
8447 #ifdef EIGEN_VECTORIZE_SSE4_1
8448 fx = _mm_floor_ps(fx);
8450 emm0 = _mm_cvttps_epi32(fx);
8451 tmp = _mm_cvtepi32_ps(emm0);
8452 Packet4f mask = _mm_cmpgt_ps(tmp, fx);
8453 mask = _mm_and_ps(mask, p4f_1);
8454 fx = psub(tmp, mask);
8456 tmp = pmul(fx, p4f_cephes_exp_C1);
8457 Packet4f z = pmul(fx, p4f_cephes_exp_C2);
8461 Packet4f y = p4f_cephes_exp_p0;
8462 y = pmadd(y, x, p4f_cephes_exp_p1);
8463 y = pmadd(y, x, p4f_cephes_exp_p2);
8464 y = pmadd(y, x, p4f_cephes_exp_p3);
8465 y = pmadd(y, x, p4f_cephes_exp_p4);
8466 y = pmadd(y, x, p4f_cephes_exp_p5);
8469 emm0 = _mm_cvttps_epi32(fx);
8470 emm0 = _mm_add_epi32(emm0, p4i_0x7f);
8471 emm0 = _mm_slli_epi32(emm0, 23);
8472 return pmax(pmul(y, Packet4f(_mm_castsi128_ps(emm0))), _x);
8474 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
8475 Packet2d pexp<Packet2d>(const Packet2d& _x)
8478 _EIGEN_DECLARE_CONST_Packet2d(1 , 1.0);
8479 _EIGEN_DECLARE_CONST_Packet2d(2 , 2.0);
8480 _EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
8481 _EIGEN_DECLARE_CONST_Packet2d(exp_hi, 709.437);
8482 _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303);
8483 _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
8484 _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
8485 _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
8486 _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
8487 _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
8488 _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
8489 _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
8490 _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
8491 _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
8492 _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
8493 static const __m128i p4i_1023_0 = _mm_setr_epi32(1023, 1023, 0, 0);
8496 x = pmax(pmin(x, p2d_exp_hi), p2d_exp_lo);
8497 fx = pmadd(p2d_cephes_LOG2EF, x, p2d_half);
8498 #ifdef EIGEN_VECTORIZE_SSE4_1
8499 fx = _mm_floor_pd(fx);
8501 emm0 = _mm_cvttpd_epi32(fx);
8502 tmp = _mm_cvtepi32_pd(emm0);
8503 Packet2d mask = _mm_cmpgt_pd(tmp, fx);
8504 mask = _mm_and_pd(mask, p2d_1);
8505 fx = psub(tmp, mask);
8507 tmp = pmul(fx, p2d_cephes_exp_C1);
8508 Packet2d z = pmul(fx, p2d_cephes_exp_C2);
8511 Packet2d x2 = pmul(x,x);
8512 Packet2d px = p2d_cephes_exp_p0;
8513 px = pmadd(px, x2, p2d_cephes_exp_p1);
8514 px = pmadd(px, x2, p2d_cephes_exp_p2);
8516 Packet2d qx = p2d_cephes_exp_q0;
8517 qx = pmadd(qx, x2, p2d_cephes_exp_q1);
8518 qx = pmadd(qx, x2, p2d_cephes_exp_q2);
8519 qx = pmadd(qx, x2, p2d_cephes_exp_q3);
8520 x = pdiv(px,psub(qx,px));
8521 x = pmadd(p2d_2,x,p2d_1);
8522 emm0 = _mm_cvttpd_epi32(fx);
8523 emm0 = _mm_add_epi32(emm0, p4i_1023_0);
8524 emm0 = _mm_slli_epi32(emm0, 20);
8525 emm0 = _mm_shuffle_epi32(emm0, _MM_SHUFFLE(1,2,0,3));
8526 return pmax(pmul(x, Packet2d(_mm_castsi128_pd(emm0))), _x);
8528 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
8529 Packet4f psin<Packet4f>(const Packet4f& _x)
8532 _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
8533 _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
8534 _EIGEN_DECLARE_CONST_Packet4i(1, 1);
8535 _EIGEN_DECLARE_CONST_Packet4i(not1, ~1);
8536 _EIGEN_DECLARE_CONST_Packet4i(2, 2);
8537 _EIGEN_DECLARE_CONST_Packet4i(4, 4);
8538 _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(sign_mask, 0x80000000);
8539 _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP1,-0.78515625f);
8540 _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP2, -2.4187564849853515625e-4f);
8541 _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP3, -3.77489497744594108e-8f);
8542 _EIGEN_DECLARE_CONST_Packet4f(sincof_p0, -1.9515295891E-4f);
8543 _EIGEN_DECLARE_CONST_Packet4f(sincof_p1, 8.3321608736E-3f);
8544 _EIGEN_DECLARE_CONST_Packet4f(sincof_p2, -1.6666654611E-1f);
8545 _EIGEN_DECLARE_CONST_Packet4f(coscof_p0, 2.443315711809948E-005f);
8546 _EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765E-003f);
8547 _EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827E-002f);
8548 _EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516f);
8549 Packet4f xmm1, xmm2, xmm3, sign_bit, y;
8550 Packet4i emm0, emm2;
8553 sign_bit = _mm_and_ps(sign_bit, p4f_sign_mask);
8554 y = pmul(x, p4f_cephes_FOPI);
8555 emm2 = _mm_cvttps_epi32(y);
8556 emm2 = _mm_add_epi32(emm2, p4i_1);
8557 emm2 = _mm_and_si128(emm2, p4i_not1);
8558 y = _mm_cvtepi32_ps(emm2);
8559 emm0 = _mm_and_si128(emm2, p4i_4);
8560 emm0 = _mm_slli_epi32(emm0, 29);
8561 emm2 = _mm_and_si128(emm2, p4i_2);
8562 emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
8563 Packet4f swap_sign_bit = _mm_castsi128_ps(emm0);
8564 Packet4f poly_mask = _mm_castsi128_ps(emm2);
8565 sign_bit = _mm_xor_ps(sign_bit, swap_sign_bit);
8566 xmm1 = pmul(y, p4f_minus_cephes_DP1);
8567 xmm2 = pmul(y, p4f_minus_cephes_DP2);
8568 xmm3 = pmul(y, p4f_minus_cephes_DP3);
8573 Packet4f z = _mm_mul_ps(x,x);
8574 y = pmadd(y, z, p4f_coscof_p1);
8575 y = pmadd(y, z, p4f_coscof_p2);
8578 Packet4f tmp = pmul(z, p4f_half);
8581 Packet4f y2 = p4f_sincof_p0;
8582 y2 = pmadd(y2, z, p4f_sincof_p1);
8583 y2 = pmadd(y2, z, p4f_sincof_p2);
8587 y2 = _mm_and_ps(poly_mask, y2);
8588 y = _mm_andnot_ps(poly_mask, y);
8589 y = _mm_or_ps(y,y2);
8590 return _mm_xor_ps(y, sign_bit);
8592 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
8593 Packet4f pcos<Packet4f>(const Packet4f& _x)
8596 _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
8597 _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
8598 _EIGEN_DECLARE_CONST_Packet4i(1, 1);
8599 _EIGEN_DECLARE_CONST_Packet4i(not1, ~1);
8600 _EIGEN_DECLARE_CONST_Packet4i(2, 2);
8601 _EIGEN_DECLARE_CONST_Packet4i(4, 4);
8602 _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP1,-0.78515625f);
8603 _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP2, -2.4187564849853515625e-4f);
8604 _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP3, -3.77489497744594108e-8f);
8605 _EIGEN_DECLARE_CONST_Packet4f(sincof_p0, -1.9515295891E-4f);
8606 _EIGEN_DECLARE_CONST_Packet4f(sincof_p1, 8.3321608736E-3f);
8607 _EIGEN_DECLARE_CONST_Packet4f(sincof_p2, -1.6666654611E-1f);
8608 _EIGEN_DECLARE_CONST_Packet4f(coscof_p0, 2.443315711809948E-005f);
8609 _EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765E-003f);
8610 _EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827E-002f);
8611 _EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516f);
8612 Packet4f xmm1, xmm2, xmm3, y;
8613 Packet4i emm0, emm2;
8615 y = pmul(x, p4f_cephes_FOPI);
8616 emm2 = _mm_cvttps_epi32(y);
8617 emm2 = _mm_add_epi32(emm2, p4i_1);
8618 emm2 = _mm_and_si128(emm2, p4i_not1);
8619 y = _mm_cvtepi32_ps(emm2);
8620 emm2 = _mm_sub_epi32(emm2, p4i_2);
8621 emm0 = _mm_andnot_si128(emm2, p4i_4);
8622 emm0 = _mm_slli_epi32(emm0, 29);
8623 emm2 = _mm_and_si128(emm2, p4i_2);
8624 emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
8625 Packet4f sign_bit = _mm_castsi128_ps(emm0);
8626 Packet4f poly_mask = _mm_castsi128_ps(emm2);
8627 xmm1 = pmul(y, p4f_minus_cephes_DP1);
8628 xmm2 = pmul(y, p4f_minus_cephes_DP2);
8629 xmm3 = pmul(y, p4f_minus_cephes_DP3);
8634 Packet4f z = pmul(x,x);
8635 y = pmadd(y,z,p4f_coscof_p1);
8636 y = pmadd(y,z,p4f_coscof_p2);
8639 Packet4f tmp = _mm_mul_ps(z, p4f_half);
8642 Packet4f y2 = p4f_sincof_p0;
8643 y2 = pmadd(y2, z, p4f_sincof_p1);
8644 y2 = pmadd(y2, z, p4f_sincof_p2);
8646 y2 = pmadd(y2, x, x);
8647 y2 = _mm_and_ps(poly_mask, y2);
8648 y = _mm_andnot_ps(poly_mask, y);
8649 y = _mm_or_ps(y,y2);
8650 return _mm_xor_ps(y, sign_bit);
8653 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
8654 Packet4f psqrt<Packet4f>(const Packet4f& _x)
8656 Packet4f half = pmul(_x, pset1<Packet4f>(.5f));
8657 Packet4f denormal_mask = _mm_and_ps(
8658 _mm_cmpge_ps(_x, _mm_setzero_ps()),
8659 _mm_cmplt_ps(_x, pset1<Packet4f>((std::numeric_limits<float>::min)())));
8660 Packet4f x = _mm_rsqrt_ps(_x);
8661 x = pmul(x, psub(pset1<Packet4f>(1.5f), pmul(half, pmul(x,x))));
8662 return _mm_andnot_ps(denormal_mask, pmul(_x,x));
8665 template<>EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
8666 Packet4f psqrt<Packet4f>(const Packet4f& x) { return _mm_sqrt_ps(x); }
8668 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
8669 Packet2d psqrt<Packet2d>(const Packet2d& x) { return _mm_sqrt_pd(x); }
8671 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
8672 Packet4f prsqrt<Packet4f>(const Packet4f& _x) {
8673 _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inf, 0x7f800000);
8674 _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(nan, 0x7fc00000);
8675 _EIGEN_DECLARE_CONST_Packet4f(one_point_five, 1.5f);
8676 _EIGEN_DECLARE_CONST_Packet4f(minus_half, -0.5f);
8677 _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(flt_min, 0x00800000);
8678 Packet4f neg_half = pmul(_x, p4f_minus_half);
8679 Packet4f le_zero_mask = _mm_cmple_ps(_x, p4f_flt_min);
8680 Packet4f x = _mm_andnot_ps(le_zero_mask, _mm_rsqrt_ps(_x));
8681 Packet4f neg_mask = _mm_cmplt_ps(_x, _mm_setzero_ps());
8682 Packet4f zero_mask = _mm_andnot_ps(neg_mask, le_zero_mask);
8683 Packet4f infs_and_nans = _mm_or_ps(_mm_and_ps(neg_mask, p4f_nan),
8684 _mm_and_ps(zero_mask, p4f_inf));
8685 x = pmul(x, pmadd(neg_half, pmul(x, x), p4f_one_point_five));
8686 return _mm_or_ps(x, infs_and_nans);
8689 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
8690 Packet4f prsqrt<Packet4f>(const Packet4f& x) {
8691 return _mm_div_ps(pset1<Packet4f>(1.0f), _mm_sqrt_ps(x));
8694 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
8695 Packet2d prsqrt<Packet2d>(const Packet2d& x) {
8696 return _mm_div_pd(pset1<Packet2d>(1.0), _mm_sqrt_pd(x));
8699 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
8700 ptanh<Packet4f>(const Packet4f& x) {
8701 return internal::generic_fast_tanh_float(x);
8706 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
8707 float sqrt(const float &x)
8709 return internal::pfirst(internal::Packet4f(_mm_sqrt_ss(_mm_set_ss(x))));
8712 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
8713 double sqrt(const double &x)
8715 #if EIGEN_COMP_GNUC_STRICT
8716 return internal::pfirst(internal::Packet2d(__builtin_ia32_sqrtsd(_mm_set_sd(x))));
8718 return internal::pfirst(internal::Packet2d(_mm_sqrt_pd(_mm_set_sd(x))));
8724 // end #include "src/Core/arch/SSE/MathFunctions.h"
8725 // #include "src/Core/arch/AVX/PacketMath.h"
8726 #ifndef EIGEN_PACKET_MATH_AVX_H
8727 #define EIGEN_PACKET_MATH_AVX_H
8729 namespace internal {
8730 #ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
8731 #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
8733 #ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
8734 #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
8737 #ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
8738 #define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
8741 typedef __m256 Packet8f;
8742 typedef __m256i Packet8i;
8743 typedef __m256d Packet4d;
8744 template<> struct is_arithmetic<__m256> { enum { value = true }; };
8745 template<> struct is_arithmetic<__m256i> { enum { value = true }; };
8746 template<> struct is_arithmetic<__m256d> { enum { value = true }; };
8747 #define _EIGEN_DECLARE_CONST_Packet8f(NAME,X) \
8748 const Packet8f p8f_##NAME = pset1<Packet8f>(X)
8749 #define _EIGEN_DECLARE_CONST_Packet4d(NAME,X) \
8750 const Packet4d p4d_##NAME = pset1<Packet4d>(X)
8751 #define _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(NAME,X) \
8752 const Packet8f p8f_##NAME = _mm256_castsi256_ps(pset1<Packet8i>(X))
8753 #define _EIGEN_DECLARE_CONST_Packet8i(NAME,X) \
8754 const Packet8i p8i_##NAME = pset1<Packet8i>(X)
8755 #ifndef EIGEN_VECTORIZE_AVX512
8756 template<> struct packet_traits<float> : default_packet_traits
8758 typedef Packet8f type;
8759 typedef Packet4f half;
8762 AlignedOnScalar = 1,
8766 HasSin = EIGEN_FAST_MATH,
8772 HasTanh = EIGEN_FAST_MATH,
8779 template<> struct packet_traits<double> : default_packet_traits
8781 typedef Packet4d type;
8782 typedef Packet2d half;
8785 AlignedOnScalar = 1,
8799 template<> struct scalar_div_cost<float,true> { enum { value = 14 }; };
8800 template<> struct scalar_div_cost<double,true> { enum { value = 16 }; };
8801 template<> struct unpacket_traits<Packet8f> { typedef float type; typedef Packet4f half; enum {size=8, alignment=Aligned32}; };
8802 template<> struct unpacket_traits<Packet4d> { typedef double type; typedef Packet2d half; enum {size=4, alignment=Aligned32}; };
8803 template<> struct unpacket_traits<Packet8i> { typedef int type; typedef Packet4i half; enum {size=8, alignment=Aligned32}; };
8804 template<> EIGEN_STRONG_INLINE Packet8f pset1<Packet8f>(const float& from) { return _mm256_set1_ps(from); }
8805 template<> EIGEN_STRONG_INLINE Packet4d pset1<Packet4d>(const double& from) { return _mm256_set1_pd(from); }
8806 template<> EIGEN_STRONG_INLINE Packet8i pset1<Packet8i>(const int& from) { return _mm256_set1_epi32(from); }
8807 template<> EIGEN_STRONG_INLINE Packet8f pload1<Packet8f>(const float* from) { return _mm256_broadcast_ss(from); }
8808 template<> EIGEN_STRONG_INLINE Packet4d pload1<Packet4d>(const double* from) { return _mm256_broadcast_sd(from); }
8809 template<> EIGEN_STRONG_INLINE Packet8f plset<Packet8f>(const float& a) { return _mm256_add_ps(_mm256_set1_ps(a), _mm256_set_ps(7.0,6.0,5.0,4.0,3.0,2.0,1.0,0.0)); }
8810 template<> EIGEN_STRONG_INLINE Packet4d plset<Packet4d>(const double& a) { return _mm256_add_pd(_mm256_set1_pd(a), _mm256_set_pd(3.0,2.0,1.0,0.0)); }
8811 template<> EIGEN_STRONG_INLINE Packet8f padd<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_add_ps(a,b); }
8812 template<> EIGEN_STRONG_INLINE Packet4d padd<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_add_pd(a,b); }
8813 template<> EIGEN_STRONG_INLINE Packet8f psub<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_sub_ps(a,b); }
8814 template<> EIGEN_STRONG_INLINE Packet4d psub<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_sub_pd(a,b); }
8815 template<> EIGEN_STRONG_INLINE Packet8f pnegate(const Packet8f& a)
8817 return _mm256_sub_ps(_mm256_set1_ps(0.0),a);
8819 template<> EIGEN_STRONG_INLINE Packet4d pnegate(const Packet4d& a)
8821 return _mm256_sub_pd(_mm256_set1_pd(0.0),a);
8823 template<> EIGEN_STRONG_INLINE Packet8f pconj(const Packet8f& a) { return a; }
8824 template<> EIGEN_STRONG_INLINE Packet4d pconj(const Packet4d& a) { return a; }
8825 template<> EIGEN_STRONG_INLINE Packet8i pconj(const Packet8i& a) { return a; }
8826 template<> EIGEN_STRONG_INLINE Packet8f pmul<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_mul_ps(a,b); }
8827 template<> EIGEN_STRONG_INLINE Packet4d pmul<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_mul_pd(a,b); }
8828 template<> EIGEN_STRONG_INLINE Packet8f pdiv<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_div_ps(a,b); }
8829 template<> EIGEN_STRONG_INLINE Packet4d pdiv<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_div_pd(a,b); }
8830 template<> EIGEN_STRONG_INLINE Packet8i pdiv<Packet8i>(const Packet8i& , const Packet8i& )
8831 { eigen_assert(false && "packet integer division are not supported by AVX");
8832 return pset1<Packet8i>(0);
8835 template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) {
8836 #if ( EIGEN_COMP_GNUC_STRICT || (EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<308)) )
8838 __asm__("vfmadd231ps %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
8841 return _mm256_fmadd_ps(a,b,c);
8844 template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) {
8845 #if ( EIGEN_COMP_GNUC_STRICT || (EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<308)) )
8847 __asm__("vfmadd231pd %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
8850 return _mm256_fmadd_pd(a,b,c);
8854 template<> EIGEN_STRONG_INLINE Packet8f pmin<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_min_ps(a,b); }
8855 template<> EIGEN_STRONG_INLINE Packet4d pmin<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_min_pd(a,b); }
8856 template<> EIGEN_STRONG_INLINE Packet8f pmax<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_max_ps(a,b); }
8857 template<> EIGEN_STRONG_INLINE Packet4d pmax<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_max_pd(a,b); }
8858 template<> EIGEN_STRONG_INLINE Packet8f pround<Packet8f>(const Packet8f& a) { return _mm256_round_ps(a, _MM_FROUND_CUR_DIRECTION); }
8859 template<> EIGEN_STRONG_INLINE Packet4d pround<Packet4d>(const Packet4d& a) { return _mm256_round_pd(a, _MM_FROUND_CUR_DIRECTION); }
8860 template<> EIGEN_STRONG_INLINE Packet8f pceil<Packet8f>(const Packet8f& a) { return _mm256_ceil_ps(a); }
8861 template<> EIGEN_STRONG_INLINE Packet4d pceil<Packet4d>(const Packet4d& a) { return _mm256_ceil_pd(a); }
8862 template<> EIGEN_STRONG_INLINE Packet8f pfloor<Packet8f>(const Packet8f& a) { return _mm256_floor_ps(a); }
8863 template<> EIGEN_STRONG_INLINE Packet4d pfloor<Packet4d>(const Packet4d& a) { return _mm256_floor_pd(a); }
8864 template<> EIGEN_STRONG_INLINE Packet8f pand<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_and_ps(a,b); }
8865 template<> EIGEN_STRONG_INLINE Packet4d pand<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_and_pd(a,b); }
8866 template<> EIGEN_STRONG_INLINE Packet8f por<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_or_ps(a,b); }
8867 template<> EIGEN_STRONG_INLINE Packet4d por<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_or_pd(a,b); }
8868 template<> EIGEN_STRONG_INLINE Packet8f pxor<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_xor_ps(a,b); }
8869 template<> EIGEN_STRONG_INLINE Packet4d pxor<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_xor_pd(a,b); }
8870 template<> EIGEN_STRONG_INLINE Packet8f pandnot<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_andnot_ps(a,b); }
8871 template<> EIGEN_STRONG_INLINE Packet4d pandnot<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_andnot_pd(a,b); }
8872 template<> EIGEN_STRONG_INLINE Packet8f pload<Packet8f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_ps(from); }
8873 template<> EIGEN_STRONG_INLINE Packet4d pload<Packet4d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_pd(from); }
8874 template<> EIGEN_STRONG_INLINE Packet8i pload<Packet8i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_si256(reinterpret_cast<const __m256i*>(from)); }
8875 template<> EIGEN_STRONG_INLINE Packet8f ploadu<Packet8f>(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_ps(from); }
8876 template<> EIGEN_STRONG_INLINE Packet4d ploadu<Packet4d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_pd(from); }
8877 template<> EIGEN_STRONG_INLINE Packet8i ploadu<Packet8i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_si256(reinterpret_cast<const __m256i*>(from)); }
8878 template<> EIGEN_STRONG_INLINE Packet8f ploaddup<Packet8f>(const float* from)
8880 Packet8f tmp = _mm256_broadcast_ps((const __m128*)(const void*)from);
8881 tmp = _mm256_blend_ps(tmp,_mm256_castps128_ps256(_mm_permute_ps( _mm256_castps256_ps128(tmp), _MM_SHUFFLE(1,0,1,0))), 15);
8882 return _mm256_permute_ps(tmp, _MM_SHUFFLE(3,3,2,2));
8884 template<> EIGEN_STRONG_INLINE Packet4d ploaddup<Packet4d>(const double* from)
8886 Packet4d tmp = _mm256_broadcast_pd((const __m128d*)(const void*)from);
8887 return _mm256_permute_pd(tmp, 3<<2);
8889 template<> EIGEN_STRONG_INLINE Packet8f ploadquad<Packet8f>(const float* from)
8891 Packet8f tmp = _mm256_castps128_ps256(_mm_broadcast_ss(from));
8892 return _mm256_insertf128_ps(tmp, _mm_broadcast_ss(from+1), 1);
8894 template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet8f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_store_ps(to, from); }
8895 template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet4d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_store_pd(to, from); }
8896 template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet8i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_storeu_si256(reinterpret_cast<__m256i*>(to), from); }
8897 template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet8f& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_ps(to, from); }
8898 template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet4d& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_pd(to, from); }
8899 template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet8i& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_si256(reinterpret_cast<__m256i*>(to), from); }
8900 template<> EIGEN_DEVICE_FUNC inline Packet8f pgather<float, Packet8f>(const float* from, Index stride)
8902 return _mm256_set_ps(from[7*stride], from[6*stride], from[5*stride], from[4*stride],
8903 from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
8905 template<> EIGEN_DEVICE_FUNC inline Packet4d pgather<double, Packet4d>(const double* from, Index stride)
8907 return _mm256_set_pd(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
8909 template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet8f>(float* to, const Packet8f& from, Index stride)
8911 __m128 low = _mm256_extractf128_ps(from, 0);
8912 to[stride*0] = _mm_cvtss_f32(low);
8913 to[stride*1] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 1));
8914 to[stride*2] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 2));
8915 to[stride*3] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 3));
8916 __m128 high = _mm256_extractf128_ps(from, 1);
8917 to[stride*4] = _mm_cvtss_f32(high);
8918 to[stride*5] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 1));
8919 to[stride*6] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 2));
8920 to[stride*7] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 3));
8922 template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet4d>(double* to, const Packet4d& from, Index stride)
8924 __m128d low = _mm256_extractf128_pd(from, 0);
8925 to[stride*0] = _mm_cvtsd_f64(low);
8926 to[stride*1] = _mm_cvtsd_f64(_mm_shuffle_pd(low, low, 1));
8927 __m128d high = _mm256_extractf128_pd(from, 1);
8928 to[stride*2] = _mm_cvtsd_f64(high);
8929 to[stride*3] = _mm_cvtsd_f64(_mm_shuffle_pd(high, high, 1));
8931 template<> EIGEN_STRONG_INLINE void pstore1<Packet8f>(float* to, const float& a)
8933 Packet8f pa = pset1<Packet8f>(a);
8936 template<> EIGEN_STRONG_INLINE void pstore1<Packet4d>(double* to, const double& a)
8938 Packet4d pa = pset1<Packet4d>(a);
8941 template<> EIGEN_STRONG_INLINE void pstore1<Packet8i>(int* to, const int& a)
8943 Packet8i pa = pset1<Packet8i>(a);
8946 #ifndef EIGEN_VECTORIZE_AVX512
8947 template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
8948 template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
8949 template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
8951 template<> EIGEN_STRONG_INLINE float pfirst<Packet8f>(const Packet8f& a) {
8952 return _mm_cvtss_f32(_mm256_castps256_ps128(a));
8954 template<> EIGEN_STRONG_INLINE double pfirst<Packet4d>(const Packet4d& a) {
8955 return _mm_cvtsd_f64(_mm256_castpd256_pd128(a));
8957 template<> EIGEN_STRONG_INLINE int pfirst<Packet8i>(const Packet8i& a) {
8958 return _mm_cvtsi128_si32(_mm256_castsi256_si128(a));
8960 template<> EIGEN_STRONG_INLINE Packet8f preverse(const Packet8f& a)
8962 __m256 tmp = _mm256_shuffle_ps(a,a,0x1b);
8963 return _mm256_permute2f128_ps(tmp, tmp, 1);
8965 template<> EIGEN_STRONG_INLINE Packet4d preverse(const Packet4d& a)
8967 __m256d tmp = _mm256_shuffle_pd(a,a,5);
8968 return _mm256_permute2f128_pd(tmp, tmp, 1);
8969 __m256d swap_halves = _mm256_permute2f128_pd(a,a,1);
8970 return _mm256_permute_pd(swap_halves,5);
8972 template<> EIGEN_STRONG_INLINE Packet8f pabs(const Packet8f& a)
8974 const Packet8f mask = _mm256_castsi256_ps(_mm256_setr_epi32(0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF));
8975 return _mm256_and_ps(a,mask);
8977 template<> EIGEN_STRONG_INLINE Packet4d pabs(const Packet4d& a)
8979 const Packet4d mask = _mm256_castsi256_pd(_mm256_setr_epi32(0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF));
8980 return _mm256_and_pd(a,mask);
8982 template<> EIGEN_STRONG_INLINE Packet8f preduxp<Packet8f>(const Packet8f* vecs)
8984 __m256 hsum1 = _mm256_hadd_ps(vecs[0], vecs[1]);
8985 __m256 hsum2 = _mm256_hadd_ps(vecs[2], vecs[3]);
8986 __m256 hsum3 = _mm256_hadd_ps(vecs[4], vecs[5]);
8987 __m256 hsum4 = _mm256_hadd_ps(vecs[6], vecs[7]);
8988 __m256 hsum5 = _mm256_hadd_ps(hsum1, hsum1);
8989 __m256 hsum6 = _mm256_hadd_ps(hsum2, hsum2);
8990 __m256 hsum7 = _mm256_hadd_ps(hsum3, hsum3);
8991 __m256 hsum8 = _mm256_hadd_ps(hsum4, hsum4);
8992 __m256 perm1 = _mm256_permute2f128_ps(hsum5, hsum5, 0x23);
8993 __m256 perm2 = _mm256_permute2f128_ps(hsum6, hsum6, 0x23);
8994 __m256 perm3 = _mm256_permute2f128_ps(hsum7, hsum7, 0x23);
8995 __m256 perm4 = _mm256_permute2f128_ps(hsum8, hsum8, 0x23);
8996 __m256 sum1 = _mm256_add_ps(perm1, hsum5);
8997 __m256 sum2 = _mm256_add_ps(perm2, hsum6);
8998 __m256 sum3 = _mm256_add_ps(perm3, hsum7);
8999 __m256 sum4 = _mm256_add_ps(perm4, hsum8);
9000 __m256 blend1 = _mm256_blend_ps(sum1, sum2, 0xcc);
9001 __m256 blend2 = _mm256_blend_ps(sum3, sum4, 0xcc);
9002 __m256 final = _mm256_blend_ps(blend1, blend2, 0xf0);
9005 template<> EIGEN_STRONG_INLINE Packet4d preduxp<Packet4d>(const Packet4d* vecs)
9007 Packet4d tmp0, tmp1;
9008 tmp0 = _mm256_hadd_pd(vecs[0], vecs[1]);
9009 tmp0 = _mm256_add_pd(tmp0, _mm256_permute2f128_pd(tmp0, tmp0, 1));
9010 tmp1 = _mm256_hadd_pd(vecs[2], vecs[3]);
9011 tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1));
9012 return _mm256_blend_pd(tmp0, tmp1, 0xC);
9014 template<> EIGEN_STRONG_INLINE float predux<Packet8f>(const Packet8f& a)
9016 return predux(Packet4f(_mm_add_ps(_mm256_castps256_ps128(a),_mm256_extractf128_ps(a,1))));
9018 template<> EIGEN_STRONG_INLINE double predux<Packet4d>(const Packet4d& a)
9020 return predux(Packet2d(_mm_add_pd(_mm256_castpd256_pd128(a),_mm256_extractf128_pd(a,1))));
9022 template<> EIGEN_STRONG_INLINE Packet4f predux_downto4<Packet8f>(const Packet8f& a)
9024 return _mm_add_ps(_mm256_castps256_ps128(a),_mm256_extractf128_ps(a,1));
9026 template<> EIGEN_STRONG_INLINE float predux_mul<Packet8f>(const Packet8f& a)
9029 tmp = _mm256_mul_ps(a, _mm256_permute2f128_ps(a,a,1));
9030 tmp = _mm256_mul_ps(tmp, _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(1,0,3,2)));
9031 return pfirst(_mm256_mul_ps(tmp, _mm256_shuffle_ps(tmp,tmp,1)));
9033 template<> EIGEN_STRONG_INLINE double predux_mul<Packet4d>(const Packet4d& a)
9036 tmp = _mm256_mul_pd(a, _mm256_permute2f128_pd(a,a,1));
9037 return pfirst(_mm256_mul_pd(tmp, _mm256_shuffle_pd(tmp,tmp,1)));
9039 template<> EIGEN_STRONG_INLINE float predux_min<Packet8f>(const Packet8f& a)
9041 Packet8f tmp = _mm256_min_ps(a, _mm256_permute2f128_ps(a,a,1));
9042 tmp = _mm256_min_ps(tmp, _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(1,0,3,2)));
9043 return pfirst(_mm256_min_ps(tmp, _mm256_shuffle_ps(tmp,tmp,1)));
9045 template<> EIGEN_STRONG_INLINE double predux_min<Packet4d>(const Packet4d& a)
9047 Packet4d tmp = _mm256_min_pd(a, _mm256_permute2f128_pd(a,a,1));
9048 return pfirst(_mm256_min_pd(tmp, _mm256_shuffle_pd(tmp, tmp, 1)));
9050 template<> EIGEN_STRONG_INLINE float predux_max<Packet8f>(const Packet8f& a)
9052 Packet8f tmp = _mm256_max_ps(a, _mm256_permute2f128_ps(a,a,1));
9053 tmp = _mm256_max_ps(tmp, _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(1,0,3,2)));
9054 return pfirst(_mm256_max_ps(tmp, _mm256_shuffle_ps(tmp,tmp,1)));
9056 template<> EIGEN_STRONG_INLINE double predux_max<Packet4d>(const Packet4d& a)
9058 Packet4d tmp = _mm256_max_pd(a, _mm256_permute2f128_pd(a,a,1));
9059 return pfirst(_mm256_max_pd(tmp, _mm256_shuffle_pd(tmp, tmp, 1)));
9061 template<int Offset>
9062 struct palign_impl<Offset,Packet8f>
9064 static EIGEN_STRONG_INLINE void run(Packet8f& first, const Packet8f& second)
9068 first = _mm256_blend_ps(first, second, 1);
9069 Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(0,3,2,1));
9070 Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
9071 first = _mm256_blend_ps(tmp1, tmp2, 0x88);
9075 first = _mm256_blend_ps(first, second, 3);
9076 Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(1,0,3,2));
9077 Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
9078 first = _mm256_blend_ps(tmp1, tmp2, 0xcc);
9082 first = _mm256_blend_ps(first, second, 7);
9083 Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(2,1,0,3));
9084 Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
9085 first = _mm256_blend_ps(tmp1, tmp2, 0xee);
9089 first = _mm256_blend_ps(first, second, 15);
9090 Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(3,2,1,0));
9091 Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
9092 first = _mm256_permute_ps(tmp2, _MM_SHUFFLE(3,2,1,0));
9096 first = _mm256_blend_ps(first, second, 31);
9097 first = _mm256_permute2f128_ps(first, first, 1);
9098 Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(0,3,2,1));
9099 first = _mm256_permute2f128_ps(tmp, tmp, 1);
9100 first = _mm256_blend_ps(tmp, first, 0x88);
9104 first = _mm256_blend_ps(first, second, 63);
9105 first = _mm256_permute2f128_ps(first, first, 1);
9106 Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(1,0,3,2));
9107 first = _mm256_permute2f128_ps(tmp, tmp, 1);
9108 first = _mm256_blend_ps(tmp, first, 0xcc);
9112 first = _mm256_blend_ps(first, second, 127);
9113 first = _mm256_permute2f128_ps(first, first, 1);
9114 Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(2,1,0,3));
9115 first = _mm256_permute2f128_ps(tmp, tmp, 1);
9116 first = _mm256_blend_ps(tmp, first, 0xee);
9120 template<int Offset>
9121 struct palign_impl<Offset,Packet4d>
9123 static EIGEN_STRONG_INLINE void run(Packet4d& first, const Packet4d& second)
9127 first = _mm256_blend_pd(first, second, 1);
9128 __m256d tmp = _mm256_permute_pd(first, 5);
9129 first = _mm256_permute2f128_pd(tmp, tmp, 1);
9130 first = _mm256_blend_pd(tmp, first, 0xA);
9134 first = _mm256_blend_pd(first, second, 3);
9135 first = _mm256_permute2f128_pd(first, first, 1);
9139 first = _mm256_blend_pd(first, second, 7);
9140 __m256d tmp = _mm256_permute_pd(first, 5);
9141 first = _mm256_permute2f128_pd(tmp, tmp, 1);
9142 first = _mm256_blend_pd(tmp, first, 5);
9146 EIGEN_DEVICE_FUNC inline void
9147 ptranspose(PacketBlock<Packet8f,8>& kernel) {
9148 __m256 T0 = _mm256_unpacklo_ps(kernel.packet[0], kernel.packet[1]);
9149 __m256 T1 = _mm256_unpackhi_ps(kernel.packet[0], kernel.packet[1]);
9150 __m256 T2 = _mm256_unpacklo_ps(kernel.packet[2], kernel.packet[3]);
9151 __m256 T3 = _mm256_unpackhi_ps(kernel.packet[2], kernel.packet[3]);
9152 __m256 T4 = _mm256_unpacklo_ps(kernel.packet[4], kernel.packet[5]);
9153 __m256 T5 = _mm256_unpackhi_ps(kernel.packet[4], kernel.packet[5]);
9154 __m256 T6 = _mm256_unpacklo_ps(kernel.packet[6], kernel.packet[7]);
9155 __m256 T7 = _mm256_unpackhi_ps(kernel.packet[6], kernel.packet[7]);
9156 __m256 S0 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(1,0,1,0));
9157 __m256 S1 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(3,2,3,2));
9158 __m256 S2 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(1,0,1,0));
9159 __m256 S3 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(3,2,3,2));
9160 __m256 S4 = _mm256_shuffle_ps(T4,T6,_MM_SHUFFLE(1,0,1,0));
9161 __m256 S5 = _mm256_shuffle_ps(T4,T6,_MM_SHUFFLE(3,2,3,2));
9162 __m256 S6 = _mm256_shuffle_ps(T5,T7,_MM_SHUFFLE(1,0,1,0));
9163 __m256 S7 = _mm256_shuffle_ps(T5,T7,_MM_SHUFFLE(3,2,3,2));
9164 kernel.packet[0] = _mm256_permute2f128_ps(S0, S4, 0x20);
9165 kernel.packet[1] = _mm256_permute2f128_ps(S1, S5, 0x20);
9166 kernel.packet[2] = _mm256_permute2f128_ps(S2, S6, 0x20);
9167 kernel.packet[3] = _mm256_permute2f128_ps(S3, S7, 0x20);
9168 kernel.packet[4] = _mm256_permute2f128_ps(S0, S4, 0x31);
9169 kernel.packet[5] = _mm256_permute2f128_ps(S1, S5, 0x31);
9170 kernel.packet[6] = _mm256_permute2f128_ps(S2, S6, 0x31);
9171 kernel.packet[7] = _mm256_permute2f128_ps(S3, S7, 0x31);
9173 EIGEN_DEVICE_FUNC inline void
9174 ptranspose(PacketBlock<Packet8f,4>& kernel) {
9175 __m256 T0 = _mm256_unpacklo_ps(kernel.packet[0], kernel.packet[1]);
9176 __m256 T1 = _mm256_unpackhi_ps(kernel.packet[0], kernel.packet[1]);
9177 __m256 T2 = _mm256_unpacklo_ps(kernel.packet[2], kernel.packet[3]);
9178 __m256 T3 = _mm256_unpackhi_ps(kernel.packet[2], kernel.packet[3]);
9179 __m256 S0 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(1,0,1,0));
9180 __m256 S1 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(3,2,3,2));
9181 __m256 S2 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(1,0,1,0));
9182 __m256 S3 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(3,2,3,2));
9183 kernel.packet[0] = _mm256_permute2f128_ps(S0, S1, 0x20);
9184 kernel.packet[1] = _mm256_permute2f128_ps(S2, S3, 0x20);
9185 kernel.packet[2] = _mm256_permute2f128_ps(S0, S1, 0x31);
9186 kernel.packet[3] = _mm256_permute2f128_ps(S2, S3, 0x31);
9188 EIGEN_DEVICE_FUNC inline void
9189 ptranspose(PacketBlock<Packet4d,4>& kernel) {
9190 __m256d T0 = _mm256_shuffle_pd(kernel.packet[0], kernel.packet[1], 15);
9191 __m256d T1 = _mm256_shuffle_pd(kernel.packet[0], kernel.packet[1], 0);
9192 __m256d T2 = _mm256_shuffle_pd(kernel.packet[2], kernel.packet[3], 15);
9193 __m256d T3 = _mm256_shuffle_pd(kernel.packet[2], kernel.packet[3], 0);
9194 kernel.packet[1] = _mm256_permute2f128_pd(T0, T2, 32);
9195 kernel.packet[3] = _mm256_permute2f128_pd(T0, T2, 49);
9196 kernel.packet[0] = _mm256_permute2f128_pd(T1, T3, 32);
9197 kernel.packet[2] = _mm256_permute2f128_pd(T1, T3, 49);
9199 template<> EIGEN_STRONG_INLINE Packet8f pblend(const Selector<8>& ifPacket, const Packet8f& thenPacket, const Packet8f& elsePacket) {
9200 const __m256 zero = _mm256_setzero_ps();
9201 const __m256 select = _mm256_set_ps(ifPacket.select[7], ifPacket.select[6], ifPacket.select[5], ifPacket.select[4], ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
9202 __m256 false_mask = _mm256_cmp_ps(select, zero, _CMP_EQ_UQ);
9203 return _mm256_blendv_ps(thenPacket, elsePacket, false_mask);
9205 template<> EIGEN_STRONG_INLINE Packet4d pblend(const Selector<4>& ifPacket, const Packet4d& thenPacket, const Packet4d& elsePacket) {
9206 const __m256d zero = _mm256_setzero_pd();
9207 const __m256d select = _mm256_set_pd(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
9208 __m256d false_mask = _mm256_cmp_pd(select, zero, _CMP_EQ_UQ);
9209 return _mm256_blendv_pd(thenPacket, elsePacket, false_mask);
9211 template<> EIGEN_STRONG_INLINE Packet8f pinsertfirst(const Packet8f& a, float b)
9213 return _mm256_blend_ps(a,pset1<Packet8f>(b),1);
9215 template<> EIGEN_STRONG_INLINE Packet4d pinsertfirst(const Packet4d& a, double b)
9217 return _mm256_blend_pd(a,pset1<Packet4d>(b),1);
9219 template<> EIGEN_STRONG_INLINE Packet8f pinsertlast(const Packet8f& a, float b)
9221 return _mm256_blend_ps(a,pset1<Packet8f>(b),(1<<7));
9223 template<> EIGEN_STRONG_INLINE Packet4d pinsertlast(const Packet4d& a, double b)
9225 return _mm256_blend_pd(a,pset1<Packet4d>(b),(1<<3));
9230 // end #include "src/Core/arch/AVX/PacketMath.h"
9231 // #include "src/Core/arch/AVX/MathFunctions.h"
9232 #ifndef EIGEN_MATH_FUNCTIONS_AVX_H
9233 #define EIGEN_MATH_FUNCTIONS_AVX_H
9235 namespace internal {
9236 inline Packet8i pshiftleft(Packet8i v, int n)
9238 #ifdef EIGEN_VECTORIZE_AVX2
9239 return _mm256_slli_epi32(v, n);
9241 __m128i lo = _mm_slli_epi32(_mm256_extractf128_si256(v, 0), n);
9242 __m128i hi = _mm_slli_epi32(_mm256_extractf128_si256(v, 1), n);
9243 return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
9246 inline Packet8f pshiftright(Packet8f v, int n)
9248 #ifdef EIGEN_VECTORIZE_AVX2
9249 return _mm256_cvtepi32_ps(_mm256_srli_epi32(_mm256_castps_si256(v), n));
9251 __m128i lo = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(v), 0), n);
9252 __m128i hi = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(v), 1), n);
9253 return _mm256_cvtepi32_ps(_mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1));
9257 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
9258 psin<Packet8f>(const Packet8f& _x) {
9260 _EIGEN_DECLARE_CONST_Packet8i(one, 1);
9261 _EIGEN_DECLARE_CONST_Packet8f(one, 1.0f);
9262 _EIGEN_DECLARE_CONST_Packet8f(two, 2.0f);
9263 _EIGEN_DECLARE_CONST_Packet8f(one_over_four, 0.25f);
9264 _EIGEN_DECLARE_CONST_Packet8f(one_over_pi, 3.183098861837907e-01f);
9265 _EIGEN_DECLARE_CONST_Packet8f(neg_pi_first, -3.140625000000000e+00f);
9266 _EIGEN_DECLARE_CONST_Packet8f(neg_pi_second, -9.670257568359375e-04f);
9267 _EIGEN_DECLARE_CONST_Packet8f(neg_pi_third, -6.278329571784980e-07f);
9268 _EIGEN_DECLARE_CONST_Packet8f(four_over_pi, 1.273239544735163e+00f);
9269 Packet8f z = pmul(x, p8f_one_over_pi);
9270 Packet8f shift = _mm256_floor_ps(padd(z, p8f_one_over_four));
9271 x = pmadd(shift, p8f_neg_pi_first, x);
9272 x = pmadd(shift, p8f_neg_pi_second, x);
9273 x = pmadd(shift, p8f_neg_pi_third, x);
9274 z = pmul(x, p8f_four_over_pi);
9275 Packet8i shift_ints = _mm256_cvtps_epi32(shift);
9276 Packet8i shift_isodd = _mm256_castps_si256(_mm256_and_ps(_mm256_castsi256_ps(shift_ints), _mm256_castsi256_ps(p8i_one)));
9277 Packet8i sign_flip_mask = pshiftleft(shift_isodd, 31);
9278 Packet8f ival_mask = _mm256_cmp_ps(z, p8f_one, _CMP_GT_OQ);
9279 _EIGEN_DECLARE_CONST_Packet8f(coeff_right_0, 9.999999724233232e-01f);
9280 _EIGEN_DECLARE_CONST_Packet8f(coeff_right_2, -3.084242535619928e-01f);
9281 _EIGEN_DECLARE_CONST_Packet8f(coeff_right_4, 1.584991525700324e-02f);
9282 _EIGEN_DECLARE_CONST_Packet8f(coeff_right_6, -3.188805084631342e-04f);
9283 Packet8f z_minus_two = psub(z, p8f_two);
9284 Packet8f z_minus_two2 = pmul(z_minus_two, z_minus_two);
9285 Packet8f right = pmadd(p8f_coeff_right_6, z_minus_two2, p8f_coeff_right_4);
9286 right = pmadd(right, z_minus_two2, p8f_coeff_right_2);
9287 right = pmadd(right, z_minus_two2, p8f_coeff_right_0);
9288 _EIGEN_DECLARE_CONST_Packet8f(coeff_left_1, 7.853981525427295e-01f);
9289 _EIGEN_DECLARE_CONST_Packet8f(coeff_left_3, -8.074536727092352e-02f);
9290 _EIGEN_DECLARE_CONST_Packet8f(coeff_left_5, 2.489871967827018e-03f);
9291 _EIGEN_DECLARE_CONST_Packet8f(coeff_left_7, -3.587725841214251e-05f);
9292 Packet8f z2 = pmul(z, z);
9293 Packet8f left = pmadd(p8f_coeff_left_7, z2, p8f_coeff_left_5);
9294 left = pmadd(left, z2, p8f_coeff_left_3);
9295 left = pmadd(left, z2, p8f_coeff_left_1);
9296 left = pmul(left, z);
9297 left = _mm256_andnot_ps(ival_mask, left);
9298 right = _mm256_and_ps(ival_mask, right);
9299 Packet8f res = _mm256_or_ps(left, right);
9300 res = _mm256_xor_ps(res, _mm256_castsi256_ps(sign_flip_mask));
9304 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
9305 plog<Packet8f>(const Packet8f& _x) {
9307 _EIGEN_DECLARE_CONST_Packet8f(1, 1.0f);
9308 _EIGEN_DECLARE_CONST_Packet8f(half, 0.5f);
9309 _EIGEN_DECLARE_CONST_Packet8f(126f, 126.0f);
9310 _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(inv_mant_mask, ~0x7f800000);
9311 _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(min_norm_pos, 0x00800000);
9312 _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(minus_inf, 0xff800000);
9313 _EIGEN_DECLARE_CONST_Packet8f(cephes_SQRTHF, 0.707106781186547524f);
9314 _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p0, 7.0376836292E-2f);
9315 _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p1, -1.1514610310E-1f);
9316 _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p2, 1.1676998740E-1f);
9317 _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p3, -1.2420140846E-1f);
9318 _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p4, +1.4249322787E-1f);
9319 _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p5, -1.6668057665E-1f);
9320 _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p6, +2.0000714765E-1f);
9321 _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p7, -2.4999993993E-1f);
9322 _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p8, +3.3333331174E-1f);
9323 _EIGEN_DECLARE_CONST_Packet8f(cephes_log_q1, -2.12194440e-4f);
9324 _EIGEN_DECLARE_CONST_Packet8f(cephes_log_q2, 0.693359375f);
9325 Packet8f invalid_mask = _mm256_cmp_ps(x, _mm256_setzero_ps(), _CMP_NGE_UQ);
9326 Packet8f iszero_mask = _mm256_cmp_ps(x, _mm256_setzero_ps(), _CMP_EQ_OQ);
9327 x = pmax(x, p8f_min_norm_pos);
9328 Packet8f emm0 = pshiftright(x,23);
9329 Packet8f e = _mm256_sub_ps(emm0, p8f_126f);
9330 x = _mm256_and_ps(x, p8f_inv_mant_mask);
9331 x = _mm256_or_ps(x, p8f_half);
9332 Packet8f mask = _mm256_cmp_ps(x, p8f_cephes_SQRTHF, _CMP_LT_OQ);
9333 Packet8f tmp = _mm256_and_ps(x, mask);
9335 e = psub(e, _mm256_and_ps(p8f_1, mask));
9337 Packet8f x2 = pmul(x, x);
9338 Packet8f x3 = pmul(x2, x);
9340 y = pmadd(p8f_cephes_log_p0, x, p8f_cephes_log_p1);
9341 y1 = pmadd(p8f_cephes_log_p3, x, p8f_cephes_log_p4);
9342 y2 = pmadd(p8f_cephes_log_p6, x, p8f_cephes_log_p7);
9343 y = pmadd(y, x, p8f_cephes_log_p2);
9344 y1 = pmadd(y1, x, p8f_cephes_log_p5);
9345 y2 = pmadd(y2, x, p8f_cephes_log_p8);
9346 y = pmadd(y, x3, y1);
9347 y = pmadd(y, x3, y2);
9349 y1 = pmul(e, p8f_cephes_log_q1);
9350 tmp = pmul(x2, p8f_half);
9353 y2 = pmul(e, p8f_cephes_log_q2);
9356 return _mm256_or_ps(
9357 _mm256_andnot_ps(iszero_mask, _mm256_or_ps(x, invalid_mask)),
9358 _mm256_and_ps(iszero_mask, p8f_minus_inf));
9361 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
9362 pexp<Packet8f>(const Packet8f& _x) {
9363 _EIGEN_DECLARE_CONST_Packet8f(1, 1.0f);
9364 _EIGEN_DECLARE_CONST_Packet8f(half, 0.5f);
9365 _EIGEN_DECLARE_CONST_Packet8f(127, 127.0f);
9366 _EIGEN_DECLARE_CONST_Packet8f(exp_hi, 88.3762626647950f);
9367 _EIGEN_DECLARE_CONST_Packet8f(exp_lo, -88.3762626647949f);
9368 _EIGEN_DECLARE_CONST_Packet8f(cephes_LOG2EF, 1.44269504088896341f);
9369 _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p0, 1.9875691500E-4f);
9370 _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p1, 1.3981999507E-3f);
9371 _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p2, 8.3334519073E-3f);
9372 _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p3, 4.1665795894E-2f);
9373 _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p4, 1.6666665459E-1f);
9374 _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p5, 5.0000001201E-1f);
9375 Packet8f x = pmax(pmin(_x, p8f_exp_hi), p8f_exp_lo);
9376 Packet8f m = _mm256_floor_ps(pmadd(x, p8f_cephes_LOG2EF, p8f_half));
9377 #ifdef EIGEN_VECTORIZE_FMA
9378 _EIGEN_DECLARE_CONST_Packet8f(nln2, -0.6931471805599453f);
9379 Packet8f r = _mm256_fmadd_ps(m, p8f_nln2, x);
9381 _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_C1, 0.693359375f);
9382 _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_C2, -2.12194440e-4f);
9383 Packet8f r = psub(x, pmul(m, p8f_cephes_exp_C1));
9384 r = psub(r, pmul(m, p8f_cephes_exp_C2));
9386 Packet8f r2 = pmul(r, r);
9387 Packet8f y = p8f_cephes_exp_p0;
9388 y = pmadd(y, r, p8f_cephes_exp_p1);
9389 y = pmadd(y, r, p8f_cephes_exp_p2);
9390 y = pmadd(y, r, p8f_cephes_exp_p3);
9391 y = pmadd(y, r, p8f_cephes_exp_p4);
9392 y = pmadd(y, r, p8f_cephes_exp_p5);
9393 y = pmadd(y, r2, r);
9395 Packet8i emm0 = _mm256_cvttps_epi32(padd(m, p8f_127));
9396 emm0 = pshiftleft(emm0, 23);
9397 return pmax(pmul(y, _mm256_castsi256_ps(emm0)), _x);
9400 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
9401 ptanh<Packet8f>(const Packet8f& x) {
9402 return internal::generic_fast_tanh_float(x);
9405 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d
9406 pexp<Packet4d>(const Packet4d& _x) {
9408 _EIGEN_DECLARE_CONST_Packet4d(1, 1.0);
9409 _EIGEN_DECLARE_CONST_Packet4d(2, 2.0);
9410 _EIGEN_DECLARE_CONST_Packet4d(half, 0.5);
9411 _EIGEN_DECLARE_CONST_Packet4d(exp_hi, 709.437);
9412 _EIGEN_DECLARE_CONST_Packet4d(exp_lo, -709.436139303);
9413 _EIGEN_DECLARE_CONST_Packet4d(cephes_LOG2EF, 1.4426950408889634073599);
9414 _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_p0, 1.26177193074810590878e-4);
9415 _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_p1, 3.02994407707441961300e-2);
9416 _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_p2, 9.99999999999999999910e-1);
9417 _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q0, 3.00198505138664455042e-6);
9418 _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q1, 2.52448340349684104192e-3);
9419 _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q2, 2.27265548208155028766e-1);
9420 _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q3, 2.00000000000000000009e0);
9421 _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_C1, 0.693145751953125);
9422 _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_C2, 1.42860682030941723212e-6);
9423 _EIGEN_DECLARE_CONST_Packet4i(1023, 1023);
9425 x = pmax(pmin(x, p4d_exp_hi), p4d_exp_lo);
9426 fx = pmadd(p4d_cephes_LOG2EF, x, p4d_half);
9427 fx = _mm256_floor_pd(fx);
9428 tmp = pmul(fx, p4d_cephes_exp_C1);
9429 Packet4d z = pmul(fx, p4d_cephes_exp_C2);
9432 Packet4d x2 = pmul(x, x);
9433 Packet4d px = p4d_cephes_exp_p0;
9434 px = pmadd(px, x2, p4d_cephes_exp_p1);
9435 px = pmadd(px, x2, p4d_cephes_exp_p2);
9437 Packet4d qx = p4d_cephes_exp_q0;
9438 qx = pmadd(qx, x2, p4d_cephes_exp_q1);
9439 qx = pmadd(qx, x2, p4d_cephes_exp_q2);
9440 qx = pmadd(qx, x2, p4d_cephes_exp_q3);
9441 x = _mm256_div_pd(px, psub(qx, px));
9442 x = pmadd(p4d_2, x, p4d_1);
9443 __m128i emm0 = _mm256_cvtpd_epi32(fx);
9444 emm0 = _mm_add_epi32(emm0, p4i_1023);
9445 emm0 = _mm_shuffle_epi32(emm0, _MM_SHUFFLE(3, 1, 2, 0));
9446 __m128i lo = _mm_slli_epi64(emm0, 52);
9447 __m128i hi = _mm_slli_epi64(_mm_srli_epi64(emm0, 32), 52);
9448 __m256i e = _mm256_insertf128_si256(_mm256_setzero_si256(), lo, 0);
9449 e = _mm256_insertf128_si256(e, hi, 1);
9450 return pmax(pmul(x, _mm256_castsi256_pd(e)), _x);
9454 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
9455 psqrt<Packet8f>(const Packet8f& _x) {
9456 Packet8f half = pmul(_x, pset1<Packet8f>(.5f));
9457 Packet8f denormal_mask = _mm256_and_ps(
9458 _mm256_cmp_ps(_x, pset1<Packet8f>((std::numeric_limits<float>::min)()),
9460 _mm256_cmp_ps(_x, _mm256_setzero_ps(), _CMP_GE_OQ));
9461 Packet8f x = _mm256_rsqrt_ps(_x);
9462 x = pmul(x, psub(pset1<Packet8f>(1.5f), pmul(half, pmul(x,x))));
9463 return _mm256_andnot_ps(denormal_mask, pmul(_x,x));
9466 template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
9467 Packet8f psqrt<Packet8f>(const Packet8f& x) {
9468 return _mm256_sqrt_ps(x);
9471 template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
9472 Packet4d psqrt<Packet4d>(const Packet4d& x) {
9473 return _mm256_sqrt_pd(x);
9476 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
9477 Packet8f prsqrt<Packet8f>(const Packet8f& _x) {
9478 _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(inf, 0x7f800000);
9479 _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(nan, 0x7fc00000);
9480 _EIGEN_DECLARE_CONST_Packet8f(one_point_five, 1.5f);
9481 _EIGEN_DECLARE_CONST_Packet8f(minus_half, -0.5f);
9482 _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(flt_min, 0x00800000);
9483 Packet8f neg_half = pmul(_x, p8f_minus_half);
9484 Packet8f le_zero_mask = _mm256_cmp_ps(_x, p8f_flt_min, _CMP_LT_OQ);
9485 Packet8f x = _mm256_andnot_ps(le_zero_mask, _mm256_rsqrt_ps(_x));
9486 Packet8f neg_mask = _mm256_cmp_ps(_x, _mm256_setzero_ps(), _CMP_LT_OQ);
9487 Packet8f zero_mask = _mm256_andnot_ps(neg_mask, le_zero_mask);
9488 Packet8f infs_and_nans = _mm256_or_ps(_mm256_and_ps(neg_mask, p8f_nan),
9489 _mm256_and_ps(zero_mask, p8f_inf));
9490 x = pmul(x, pmadd(neg_half, pmul(x, x), p8f_one_point_five));
9491 return _mm256_or_ps(x, infs_and_nans);
9494 template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
9495 Packet8f prsqrt<Packet8f>(const Packet8f& x) {
9496 _EIGEN_DECLARE_CONST_Packet8f(one, 1.0f);
9497 return _mm256_div_ps(p8f_one, _mm256_sqrt_ps(x));
9500 template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
9501 Packet4d prsqrt<Packet4d>(const Packet4d& x) {
9502 _EIGEN_DECLARE_CONST_Packet4d(one, 1.0);
9503 return _mm256_div_pd(p4d_one, _mm256_sqrt_pd(x));
9508 // end #include "src/Core/arch/AVX/MathFunctions.h"
9509 // #include "src/Core/arch/AVX/Complex.h"
9510 #ifndef EIGEN_COMPLEX_AVX_H
9511 #define EIGEN_COMPLEX_AVX_H
9513 namespace internal {
9516 EIGEN_STRONG_INLINE Packet4cf() {}
9517 EIGEN_STRONG_INLINE explicit Packet4cf(const __m256& a) : v(a) {}
9520 template<> struct packet_traits<std::complex<float> > : default_packet_traits
9522 typedef Packet4cf type;
9523 typedef Packet2cf half;
9526 AlignedOnScalar = 1,
9541 template<> struct unpacket_traits<Packet4cf> { typedef std::complex<float> type; enum {size=4, alignment=Aligned32}; typedef Packet2cf half; };
9542 template<> EIGEN_STRONG_INLINE Packet4cf padd<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_add_ps(a.v,b.v)); }
9543 template<> EIGEN_STRONG_INLINE Packet4cf psub<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_sub_ps(a.v,b.v)); }
9544 template<> EIGEN_STRONG_INLINE Packet4cf pnegate(const Packet4cf& a)
9546 return Packet4cf(pnegate(a.v));
9548 template<> EIGEN_STRONG_INLINE Packet4cf pconj(const Packet4cf& a)
9550 const __m256 mask = _mm256_castsi256_ps(_mm256_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000,0x00000000,0x80000000,0x00000000,0x80000000));
9551 return Packet4cf(_mm256_xor_ps(a.v,mask));
9553 template<> EIGEN_STRONG_INLINE Packet4cf pmul<Packet4cf>(const Packet4cf& a, const Packet4cf& b)
9555 __m256 tmp1 = _mm256_mul_ps(_mm256_moveldup_ps(a.v), b.v);
9556 __m256 tmp2 = _mm256_mul_ps(_mm256_movehdup_ps(a.v), _mm256_permute_ps(b.v, _MM_SHUFFLE(2,3,0,1)));
9557 __m256 result = _mm256_addsub_ps(tmp1, tmp2);
9558 return Packet4cf(result);
9560 template<> EIGEN_STRONG_INLINE Packet4cf pand <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_and_ps(a.v,b.v)); }
9561 template<> EIGEN_STRONG_INLINE Packet4cf por <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_or_ps(a.v,b.v)); }
9562 template<> EIGEN_STRONG_INLINE Packet4cf pxor <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_xor_ps(a.v,b.v)); }
9563 template<> EIGEN_STRONG_INLINE Packet4cf pandnot<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_andnot_ps(a.v,b.v)); }
9564 template<> EIGEN_STRONG_INLINE Packet4cf pload <Packet4cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet4cf(pload<Packet8f>(&numext::real_ref(*from))); }
9565 template<> EIGEN_STRONG_INLINE Packet4cf ploadu<Packet4cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet4cf(ploadu<Packet8f>(&numext::real_ref(*from))); }
9566 template<> EIGEN_STRONG_INLINE Packet4cf pset1<Packet4cf>(const std::complex<float>& from)
9568 return Packet4cf(_mm256_castpd_ps(_mm256_broadcast_sd((const double*)(const void*)&from)));
9570 template<> EIGEN_STRONG_INLINE Packet4cf ploaddup<Packet4cf>(const std::complex<float>* from)
9572 Packet2cf a = ploaddup<Packet2cf>(from);
9573 Packet2cf b = ploaddup<Packet2cf>(from+1);
9574 return Packet4cf(_mm256_insertf128_ps(_mm256_castps128_ps256(a.v), b.v, 1));
9576 template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float>* to, const Packet4cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), from.v); }
9577 template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float>* to, const Packet4cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), from.v); }
9578 template<> EIGEN_DEVICE_FUNC inline Packet4cf pgather<std::complex<float>, Packet4cf>(const std::complex<float>* from, Index stride)
9580 return Packet4cf(_mm256_set_ps(std::imag(from[3*stride]), std::real(from[3*stride]),
9581 std::imag(from[2*stride]), std::real(from[2*stride]),
9582 std::imag(from[1*stride]), std::real(from[1*stride]),
9583 std::imag(from[0*stride]), std::real(from[0*stride])));
9585 template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet4cf>(std::complex<float>* to, const Packet4cf& from, Index stride)
9587 __m128 low = _mm256_extractf128_ps(from.v, 0);
9588 to[stride*0] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(low, low, 0)),
9589 _mm_cvtss_f32(_mm_shuffle_ps(low, low, 1)));
9590 to[stride*1] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(low, low, 2)),
9591 _mm_cvtss_f32(_mm_shuffle_ps(low, low, 3)));
9592 __m128 high = _mm256_extractf128_ps(from.v, 1);
9593 to[stride*2] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(high, high, 0)),
9594 _mm_cvtss_f32(_mm_shuffle_ps(high, high, 1)));
9595 to[stride*3] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(high, high, 2)),
9596 _mm_cvtss_f32(_mm_shuffle_ps(high, high, 3)));
9598 template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet4cf>(const Packet4cf& a)
9600 return pfirst(Packet2cf(_mm256_castps256_ps128(a.v)));
9602 template<> EIGEN_STRONG_INLINE Packet4cf preverse(const Packet4cf& a) {
9603 __m128 low = _mm256_extractf128_ps(a.v, 0);
9604 __m128 high = _mm256_extractf128_ps(a.v, 1);
9605 __m128d lowd = _mm_castps_pd(low);
9606 __m128d highd = _mm_castps_pd(high);
9607 low = _mm_castpd_ps(_mm_shuffle_pd(lowd,lowd,0x1));
9608 high = _mm_castpd_ps(_mm_shuffle_pd(highd,highd,0x1));
9609 __m256 result = _mm256_setzero_ps();
9610 result = _mm256_insertf128_ps(result, low, 1);
9611 result = _mm256_insertf128_ps(result, high, 0);
9612 return Packet4cf(result);
9614 template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet4cf>(const Packet4cf& a)
9616 return predux(padd(Packet2cf(_mm256_extractf128_ps(a.v,0)),
9617 Packet2cf(_mm256_extractf128_ps(a.v,1))));
9619 template<> EIGEN_STRONG_INLINE Packet4cf preduxp<Packet4cf>(const Packet4cf* vecs)
9621 Packet8f t0 = _mm256_shuffle_ps(vecs[0].v, vecs[0].v, _MM_SHUFFLE(3, 1, 2 ,0));
9622 Packet8f t1 = _mm256_shuffle_ps(vecs[1].v, vecs[1].v, _MM_SHUFFLE(3, 1, 2 ,0));
9623 t0 = _mm256_hadd_ps(t0,t1);
9624 Packet8f t2 = _mm256_shuffle_ps(vecs[2].v, vecs[2].v, _MM_SHUFFLE(3, 1, 2 ,0));
9625 Packet8f t3 = _mm256_shuffle_ps(vecs[3].v, vecs[3].v, _MM_SHUFFLE(3, 1, 2 ,0));
9626 t2 = _mm256_hadd_ps(t2,t3);
9627 t1 = _mm256_permute2f128_ps(t0,t2, 0 + (2<<4));
9628 t3 = _mm256_permute2f128_ps(t0,t2, 1 + (3<<4));
9629 return Packet4cf(_mm256_add_ps(t1,t3));
9631 template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet4cf>(const Packet4cf& a)
9633 return predux_mul(pmul(Packet2cf(_mm256_extractf128_ps(a.v, 0)),
9634 Packet2cf(_mm256_extractf128_ps(a.v, 1))));
9636 template<int Offset>
9637 struct palign_impl<Offset,Packet4cf>
9639 static EIGEN_STRONG_INLINE void run(Packet4cf& first, const Packet4cf& second)
9641 if (Offset==0) return;
9642 palign_impl<Offset*2,Packet8f>::run(first.v, second.v);
9645 template<> struct conj_helper<Packet4cf, Packet4cf, false,true>
9647 EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const
9648 { return padd(pmul(x,y),c); }
9649 EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) const
9651 return internal::pmul(a, pconj(b));
9654 template<> struct conj_helper<Packet4cf, Packet4cf, true,false>
9656 EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const
9657 { return padd(pmul(x,y),c); }
9658 EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) const
9660 return internal::pmul(pconj(a), b);
9663 template<> struct conj_helper<Packet4cf, Packet4cf, true,true>
9665 EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const
9666 { return padd(pmul(x,y),c); }
9667 EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) const
9669 return pconj(internal::pmul(a, b));
9672 template<> struct conj_helper<Packet8f, Packet4cf, false,false>
9674 EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet8f& x, const Packet4cf& y, const Packet4cf& c) const
9675 { return padd(c, pmul(x,y)); }
9676 EIGEN_STRONG_INLINE Packet4cf pmul(const Packet8f& x, const Packet4cf& y) const
9677 { return Packet4cf(Eigen::internal::pmul(x, y.v)); }
9679 template<> struct conj_helper<Packet4cf, Packet8f, false,false>
9681 EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet8f& y, const Packet4cf& c) const
9682 { return padd(c, pmul(x,y)); }
9683 EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& x, const Packet8f& y) const
9684 { return Packet4cf(Eigen::internal::pmul(x.v, y)); }
9686 template<> EIGEN_STRONG_INLINE Packet4cf pdiv<Packet4cf>(const Packet4cf& a, const Packet4cf& b)
9688 Packet4cf num = pmul(a, pconj(b));
9689 __m256 tmp = _mm256_mul_ps(b.v, b.v);
9690 __m256 tmp2 = _mm256_shuffle_ps(tmp,tmp,0xB1);
9691 __m256 denom = _mm256_add_ps(tmp, tmp2);
9692 return Packet4cf(_mm256_div_ps(num.v, denom));
9694 template<> EIGEN_STRONG_INLINE Packet4cf pcplxflip<Packet4cf>(const Packet4cf& x)
9696 return Packet4cf(_mm256_shuffle_ps(x.v, x.v, _MM_SHUFFLE(2, 3, 0 ,1)));
9700 EIGEN_STRONG_INLINE Packet2cd() {}
9701 EIGEN_STRONG_INLINE explicit Packet2cd(const __m256d& a) : v(a) {}
9704 template<> struct packet_traits<std::complex<double> > : default_packet_traits
9706 typedef Packet2cd type;
9707 typedef Packet1cd half;
9710 AlignedOnScalar = 0,
9725 template<> struct unpacket_traits<Packet2cd> { typedef std::complex<double> type; enum {size=2, alignment=Aligned32}; typedef Packet1cd half; };
9726 template<> EIGEN_STRONG_INLINE Packet2cd padd<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_add_pd(a.v,b.v)); }
9727 template<> EIGEN_STRONG_INLINE Packet2cd psub<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_sub_pd(a.v,b.v)); }
9728 template<> EIGEN_STRONG_INLINE Packet2cd pnegate(const Packet2cd& a) { return Packet2cd(pnegate(a.v)); }
9729 template<> EIGEN_STRONG_INLINE Packet2cd pconj(const Packet2cd& a)
9731 const __m256d mask = _mm256_castsi256_pd(_mm256_set_epi32(0x80000000,0x0,0x0,0x0,0x80000000,0x0,0x0,0x0));
9732 return Packet2cd(_mm256_xor_pd(a.v,mask));
9734 template<> EIGEN_STRONG_INLINE Packet2cd pmul<Packet2cd>(const Packet2cd& a, const Packet2cd& b)
9736 __m256d tmp1 = _mm256_shuffle_pd(a.v,a.v,0x0);
9737 __m256d even = _mm256_mul_pd(tmp1, b.v);
9738 __m256d tmp2 = _mm256_shuffle_pd(a.v,a.v,0xF);
9739 __m256d tmp3 = _mm256_shuffle_pd(b.v,b.v,0x5);
9740 __m256d odd = _mm256_mul_pd(tmp2, tmp3);
9741 return Packet2cd(_mm256_addsub_pd(even, odd));
9743 template<> EIGEN_STRONG_INLINE Packet2cd pand <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_and_pd(a.v,b.v)); }
9744 template<> EIGEN_STRONG_INLINE Packet2cd por <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_or_pd(a.v,b.v)); }
9745 template<> EIGEN_STRONG_INLINE Packet2cd pxor <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_xor_pd(a.v,b.v)); }
9746 template<> EIGEN_STRONG_INLINE Packet2cd pandnot<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_andnot_pd(a.v,b.v)); }
9747 template<> EIGEN_STRONG_INLINE Packet2cd pload <Packet2cd>(const std::complex<double>* from)
9748 { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cd(pload<Packet4d>((const double*)from)); }
9749 template<> EIGEN_STRONG_INLINE Packet2cd ploadu<Packet2cd>(const std::complex<double>* from)
9750 { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cd(ploadu<Packet4d>((const double*)from)); }
9751 template<> EIGEN_STRONG_INLINE Packet2cd pset1<Packet2cd>(const std::complex<double>& from)
9753 return Packet2cd(_mm256_broadcast_pd((const __m128d*)(const void*)&from));
9755 template<> EIGEN_STRONG_INLINE Packet2cd ploaddup<Packet2cd>(const std::complex<double>* from) { return pset1<Packet2cd>(*from); }
9756 template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet2cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
9757 template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet2cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
9758 template<> EIGEN_DEVICE_FUNC inline Packet2cd pgather<std::complex<double>, Packet2cd>(const std::complex<double>* from, Index stride)
9760 return Packet2cd(_mm256_set_pd(std::imag(from[1*stride]), std::real(from[1*stride]),
9761 std::imag(from[0*stride]), std::real(from[0*stride])));
9763 template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet2cd>(std::complex<double>* to, const Packet2cd& from, Index stride)
9765 __m128d low = _mm256_extractf128_pd(from.v, 0);
9766 to[stride*0] = std::complex<double>(_mm_cvtsd_f64(low), _mm_cvtsd_f64(_mm_shuffle_pd(low, low, 1)));
9767 __m128d high = _mm256_extractf128_pd(from.v, 1);
9768 to[stride*1] = std::complex<double>(_mm_cvtsd_f64(high), _mm_cvtsd_f64(_mm_shuffle_pd(high, high, 1)));
9770 template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet2cd>(const Packet2cd& a)
9772 __m128d low = _mm256_extractf128_pd(a.v, 0);
9773 EIGEN_ALIGN16 double res[2];
9774 _mm_store_pd(res, low);
9775 return std::complex<double>(res[0],res[1]);
9777 template<> EIGEN_STRONG_INLINE Packet2cd preverse(const Packet2cd& a) {
9778 __m256d result = _mm256_permute2f128_pd(a.v, a.v, 1);
9779 return Packet2cd(result);
9781 template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet2cd>(const Packet2cd& a)
9783 return predux(padd(Packet1cd(_mm256_extractf128_pd(a.v,0)),
9784 Packet1cd(_mm256_extractf128_pd(a.v,1))));
9786 template<> EIGEN_STRONG_INLINE Packet2cd preduxp<Packet2cd>(const Packet2cd* vecs)
9788 Packet4d t0 = _mm256_permute2f128_pd(vecs[0].v,vecs[1].v, 0 + (2<<4));
9789 Packet4d t1 = _mm256_permute2f128_pd(vecs[0].v,vecs[1].v, 1 + (3<<4));
9790 return Packet2cd(_mm256_add_pd(t0,t1));
9792 template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet2cd>(const Packet2cd& a)
9794 return predux(pmul(Packet1cd(_mm256_extractf128_pd(a.v,0)),
9795 Packet1cd(_mm256_extractf128_pd(a.v,1))));
9797 template<int Offset>
9798 struct palign_impl<Offset,Packet2cd>
9800 static EIGEN_STRONG_INLINE void run(Packet2cd& first, const Packet2cd& second)
9802 if (Offset==0) return;
9803 palign_impl<Offset*2,Packet4d>::run(first.v, second.v);
9806 template<> struct conj_helper<Packet2cd, Packet2cd, false,true>
9808 EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const
9809 { return padd(pmul(x,y),c); }
9810 EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) const
9812 return internal::pmul(a, pconj(b));
9815 template<> struct conj_helper<Packet2cd, Packet2cd, true,false>
9817 EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const
9818 { return padd(pmul(x,y),c); }
9819 EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) const
9821 return internal::pmul(pconj(a), b);
9824 template<> struct conj_helper<Packet2cd, Packet2cd, true,true>
9826 EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const
9827 { return padd(pmul(x,y),c); }
9828 EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) const
9830 return pconj(internal::pmul(a, b));
9833 template<> struct conj_helper<Packet4d, Packet2cd, false,false>
9835 EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet4d& x, const Packet2cd& y, const Packet2cd& c) const
9836 { return padd(c, pmul(x,y)); }
9837 EIGEN_STRONG_INLINE Packet2cd pmul(const Packet4d& x, const Packet2cd& y) const
9838 { return Packet2cd(Eigen::internal::pmul(x, y.v)); }
9840 template<> struct conj_helper<Packet2cd, Packet4d, false,false>
9842 EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet4d& y, const Packet2cd& c) const
9843 { return padd(c, pmul(x,y)); }
9844 EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& x, const Packet4d& y) const
9845 { return Packet2cd(Eigen::internal::pmul(x.v, y)); }
9847 template<> EIGEN_STRONG_INLINE Packet2cd pdiv<Packet2cd>(const Packet2cd& a, const Packet2cd& b)
9849 Packet2cd num = pmul(a, pconj(b));
9850 __m256d tmp = _mm256_mul_pd(b.v, b.v);
9851 __m256d denom = _mm256_hadd_pd(tmp, tmp);
9852 return Packet2cd(_mm256_div_pd(num.v, denom));
9854 template<> EIGEN_STRONG_INLINE Packet2cd pcplxflip<Packet2cd>(const Packet2cd& x)
9856 return Packet2cd(_mm256_shuffle_pd(x.v, x.v, 0x5));
9858 EIGEN_DEVICE_FUNC inline void
9859 ptranspose(PacketBlock<Packet4cf,4>& kernel) {
9860 __m256d P0 = _mm256_castps_pd(kernel.packet[0].v);
9861 __m256d P1 = _mm256_castps_pd(kernel.packet[1].v);
9862 __m256d P2 = _mm256_castps_pd(kernel.packet[2].v);
9863 __m256d P3 = _mm256_castps_pd(kernel.packet[3].v);
9864 __m256d T0 = _mm256_shuffle_pd(P0, P1, 15);
9865 __m256d T1 = _mm256_shuffle_pd(P0, P1, 0);
9866 __m256d T2 = _mm256_shuffle_pd(P2, P3, 15);
9867 __m256d T3 = _mm256_shuffle_pd(P2, P3, 0);
9868 kernel.packet[1].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T0, T2, 32));
9869 kernel.packet[3].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T0, T2, 49));
9870 kernel.packet[0].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T1, T3, 32));
9871 kernel.packet[2].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T1, T3, 49));
9873 EIGEN_DEVICE_FUNC inline void
9874 ptranspose(PacketBlock<Packet2cd,2>& kernel) {
9875 __m256d tmp = _mm256_permute2f128_pd(kernel.packet[0].v, kernel.packet[1].v, 0+(2<<4));
9876 kernel.packet[1].v = _mm256_permute2f128_pd(kernel.packet[0].v, kernel.packet[1].v, 1+(3<<4));
9877 kernel.packet[0].v = tmp;
9879 template<> EIGEN_STRONG_INLINE Packet4cf pinsertfirst(const Packet4cf& a, std::complex<float> b)
9881 return Packet4cf(_mm256_blend_ps(a.v,pset1<Packet4cf>(b).v,1|2));
9883 template<> EIGEN_STRONG_INLINE Packet2cd pinsertfirst(const Packet2cd& a, std::complex<double> b)
9885 return Packet2cd(_mm256_blend_pd(a.v,pset1<Packet2cd>(b).v,1|2));
9887 template<> EIGEN_STRONG_INLINE Packet4cf pinsertlast(const Packet4cf& a, std::complex<float> b)
9889 return Packet4cf(_mm256_blend_ps(a.v,pset1<Packet4cf>(b).v,(1<<7)|(1<<6)));
9891 template<> EIGEN_STRONG_INLINE Packet2cd pinsertlast(const Packet2cd& a, std::complex<double> b)
9893 return Packet2cd(_mm256_blend_pd(a.v,pset1<Packet2cd>(b).v,(1<<3)|(1<<2)));
9898 // end #include "src/Core/arch/AVX/Complex.h"
9899 // #include "src/Core/arch/AVX/TypeCasting.h"
9900 #ifndef EIGEN_TYPE_CASTING_AVX_H
9901 #define EIGEN_TYPE_CASTING_AVX_H
9903 namespace internal {
9905 struct type_casting_traits<float, int> {
9913 struct type_casting_traits<int, float> {
9920 template<> EIGEN_STRONG_INLINE Packet8i pcast<Packet8f, Packet8i>(const Packet8f& a) {
9921 return _mm256_cvtps_epi32(a);
9923 template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8i, Packet8f>(const Packet8i& a) {
9924 return _mm256_cvtepi32_ps(a);
9929 // end #include "src/Core/arch/AVX/TypeCasting.h"
9930 #elif defined EIGEN_VECTORIZE_SSE
9931 // #include "src/Core/arch/SSE/PacketMath.h"
9932 #ifndef EIGEN_PACKET_MATH_SSE_H
9933 #define EIGEN_PACKET_MATH_SSE_H
9935 namespace internal {
9936 #ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
9937 #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
9939 #ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
9940 #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
9943 #ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
9944 #define EIGEN_HAS_SINGLE_INSTRUCTION_MADD 1
9947 #if (defined EIGEN_VECTORIZE_AVX) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_MINGW) && (__GXX_ABI_VERSION < 1004)
9948 template<typename T>
9949 struct eigen_packet_wrapper
9951 EIGEN_ALWAYS_INLINE operator T&() { return m_val; }
9952 EIGEN_ALWAYS_INLINE operator const T&() const { return m_val; }
9953 EIGEN_ALWAYS_INLINE eigen_packet_wrapper() {}
9954 EIGEN_ALWAYS_INLINE eigen_packet_wrapper(const T &v) : m_val(v) {}
9955 EIGEN_ALWAYS_INLINE eigen_packet_wrapper& operator=(const T &v) {
9961 typedef eigen_packet_wrapper<__m128> Packet4f;
9962 typedef eigen_packet_wrapper<__m128i> Packet4i;
9963 typedef eigen_packet_wrapper<__m128d> Packet2d;
9965 typedef __m128 Packet4f;
9966 typedef __m128i Packet4i;
9967 typedef __m128d Packet2d;
9969 template<> struct is_arithmetic<__m128> { enum { value = true }; };
9970 template<> struct is_arithmetic<__m128i> { enum { value = true }; };
9971 template<> struct is_arithmetic<__m128d> { enum { value = true }; };
9972 #define vec4f_swizzle1(v,p,q,r,s) \
9973 (_mm_castsi128_ps(_mm_shuffle_epi32( _mm_castps_si128(v), ((s)<<6|(r)<<4|(q)<<2|(p)))))
9974 #define vec4i_swizzle1(v,p,q,r,s) \
9975 (_mm_shuffle_epi32( v, ((s)<<6|(r)<<4|(q)<<2|(p))))
9976 #define vec2d_swizzle1(v,p,q) \
9977 (_mm_castsi128_pd(_mm_shuffle_epi32( _mm_castpd_si128(v), ((q*2+1)<<6|(q*2)<<4|(p*2+1)<<2|(p*2)))))
9978 #define vec4f_swizzle2(a,b,p,q,r,s) \
9979 (_mm_shuffle_ps( (a), (b), ((s)<<6|(r)<<4|(q)<<2|(p))))
9980 #define vec4i_swizzle2(a,b,p,q,r,s) \
9981 (_mm_castps_si128( (_mm_shuffle_ps( _mm_castsi128_ps(a), _mm_castsi128_ps(b), ((s)<<6|(r)<<4|(q)<<2|(p))))))
9982 #define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
9983 const Packet4f p4f_##NAME = pset1<Packet4f>(X)
9984 #define _EIGEN_DECLARE_CONST_Packet2d(NAME,X) \
9985 const Packet2d p2d_##NAME = pset1<Packet2d>(X)
9986 #define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
9987 const Packet4f p4f_##NAME = _mm_castsi128_ps(pset1<Packet4i>(X))
9988 #define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
9989 const Packet4i p4i_##NAME = pset1<Packet4i>(X)
9990 #ifndef EIGEN_VECTORIZE_AVX
9991 template<> struct packet_traits<float> : default_packet_traits
9993 typedef Packet4f type;
9994 typedef Packet4f half;
9997 AlignedOnScalar = 1,
10001 HasSin = EIGEN_FAST_MATH,
10002 HasCos = EIGEN_FAST_MATH,
10007 HasTanh = EIGEN_FAST_MATH,
10009 #ifdef EIGEN_VECTORIZE_SSE4_1
10017 template<> struct packet_traits<double> : default_packet_traits
10019 typedef Packet2d type;
10020 typedef Packet2d half;
10023 AlignedOnScalar = 1,
10031 #ifdef EIGEN_VECTORIZE_SSE4_1
10040 template<> struct packet_traits<int> : default_packet_traits
10042 typedef Packet4i type;
10043 typedef Packet4i half;
10046 AlignedOnScalar = 1,
10051 template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; };
10052 template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
10053 template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
10054 #ifndef EIGEN_VECTORIZE_AVX
10055 template<> struct scalar_div_cost<float,true> { enum { value = 7 }; };
10056 template<> struct scalar_div_cost<double,true> { enum { value = 8 }; };
10058 #if EIGEN_COMP_MSVC==1500
10059 template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set_ps(from,from,from,from); }
10060 template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set_pd(from,from); }
10061 template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set_epi32(from,from,from,from); }
10063 template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set_ps1(from); }
10064 template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set1_pd(from); }
10065 template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set1_epi32(from); }
10067 #if EIGEN_COMP_GNUC_STRICT && (!defined __AVX__)
10068 template<> EIGEN_STRONG_INLINE Packet4f pload1<Packet4f>(const float *from) {
10069 return vec4f_swizzle1(_mm_load_ss(from),0,0,0,0);
10072 template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) { return _mm_add_ps(pset1<Packet4f>(a), _mm_set_ps(3,2,1,0)); }
10073 template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) { return _mm_add_pd(pset1<Packet2d>(a),_mm_set_pd(1,0)); }
10074 template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) { return _mm_add_epi32(pset1<Packet4i>(a),_mm_set_epi32(3,2,1,0)); }
10075 template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_add_ps(a,b); }
10076 template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_add_pd(a,b); }
10077 template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_add_epi32(a,b); }
10078 template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_sub_ps(a,b); }
10079 template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_sub_pd(a,b); }
10080 template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_sub_epi32(a,b); }
10081 template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a)
10083 const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));
10084 return _mm_xor_ps(a,mask);
10086 template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a)
10088 const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x80000000,0x0,0x80000000));
10089 return _mm_xor_pd(a,mask);
10091 template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a)
10093 return psub(Packet4i(_mm_setr_epi32(0,0,0,0)), a);
10095 template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
10096 template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
10097 template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
10098 template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_mul_ps(a,b); }
10099 template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_mul_pd(a,b); }
10100 template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b)
10102 #ifdef EIGEN_VECTORIZE_SSE4_1
10103 return _mm_mullo_epi32(a,b);
10105 return vec4i_swizzle1(
10107 _mm_mul_epu32(a,b),
10108 _mm_mul_epu32(vec4i_swizzle1(a,1,0,3,2),
10109 vec4i_swizzle1(b,1,0,3,2)),
10114 template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_div_ps(a,b); }
10115 template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_div_pd(a,b); }
10116 template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd(pmul(a,b), c); }
10118 template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return _mm_fmadd_ps(a,b,c); }
10119 template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return _mm_fmadd_pd(a,b,c); }
10121 template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_min_ps(a,b); }
10122 template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_min_pd(a,b); }
10123 template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b)
10125 #ifdef EIGEN_VECTORIZE_SSE4_1
10126 return _mm_min_epi32(a,b);
10128 Packet4i mask = _mm_cmplt_epi32(a,b);
10129 return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
10132 template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_max_ps(a,b); }
10133 template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_max_pd(a,b); }
10134 template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b)
10136 #ifdef EIGEN_VECTORIZE_SSE4_1
10137 return _mm_max_epi32(a,b);
10139 Packet4i mask = _mm_cmpgt_epi32(a,b);
10140 return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
10143 #ifdef EIGEN_VECTORIZE_SSE4_1
10144 template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a) { return _mm_round_ps(a, 0); }
10145 template<> EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) { return _mm_round_pd(a, 0); }
10146 template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a) { return _mm_ceil_ps(a); }
10147 template<> EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a) { return _mm_ceil_pd(a); }
10148 template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a) { return _mm_floor_ps(a); }
10149 template<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) { return _mm_floor_pd(a); }
10151 template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_and_ps(a,b); }
10152 template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_and_pd(a,b); }
10153 template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_and_si128(a,b); }
10154 template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_or_ps(a,b); }
10155 template<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_or_pd(a,b); }
10156 template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_or_si128(a,b); }
10157 template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_xor_ps(a,b); }
10158 template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_xor_pd(a,b); }
10159 template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_xor_si128(a,b); }
10160 template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_andnot_ps(a,b); }
10161 template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_andnot_pd(a,b); }
10162 template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_andnot_si128(a,b); }
10163 template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_ps(from); }
10164 template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_pd(from); }
10165 template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(reinterpret_cast<const __m128i*>(from)); }
10166 #if EIGEN_COMP_MSVC
10167 template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) {
10168 EIGEN_DEBUG_UNALIGNED_LOAD
10169 #if (EIGEN_COMP_MSVC==1600)
10170 __m128 res = _mm_loadl_pi(_mm_set1_ps(0.0f), (const __m64*)(from));
10171 res = _mm_loadh_pi(res, (const __m64*)(from+2));
10174 return _mm_loadu_ps(from);
10178 template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
10180 EIGEN_DEBUG_UNALIGNED_LOAD
10181 return _mm_loadu_ps(from);
10184 template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from)
10186 EIGEN_DEBUG_UNALIGNED_LOAD
10187 return _mm_loadu_pd(from);
10189 template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
10191 EIGEN_DEBUG_UNALIGNED_LOAD
10192 return _mm_loadu_si128(reinterpret_cast<const __m128i*>(from));
10194 template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
10196 return vec4f_swizzle1(_mm_castpd_ps(_mm_load_sd(reinterpret_cast<const double*>(from))), 0, 0, 1, 1);
10198 template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
10199 { return pset1<Packet2d>(from[0]); }
10200 template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
10203 tmp = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(from));
10204 return vec4i_swizzle1(tmp, 0, 0, 1, 1);
10206 template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_ps(to, from); }
10207 template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_pd(to, from); }
10208 template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<__m128i*>(to), from); }
10209 template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_pd(to, from); }
10210 template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_ps(to, from); }
10211 template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from); }
10212 template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)
10214 return _mm_set_ps(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
10216 template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride)
10218 return _mm_set_pd(from[1*stride], from[0*stride]);
10220 template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride)
10222 return _mm_set_epi32(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
10224 template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride)
10226 to[stride*0] = _mm_cvtss_f32(from);
10227 to[stride*1] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 1));
10228 to[stride*2] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 2));
10229 to[stride*3] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 3));
10231 template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride)
10233 to[stride*0] = _mm_cvtsd_f64(from);
10234 to[stride*1] = _mm_cvtsd_f64(_mm_shuffle_pd(from, from, 1));
10236 template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride)
10238 to[stride*0] = _mm_cvtsi128_si32(from);
10239 to[stride*1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 1));
10240 to[stride*2] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 2));
10241 to[stride*3] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 3));
10243 template<> EIGEN_STRONG_INLINE void pstore1<Packet4f>(float* to, const float& a)
10245 Packet4f pa = _mm_set_ss(a);
10246 pstore(to, Packet4f(vec4f_swizzle1(pa,0,0,0,0)));
10248 template<> EIGEN_STRONG_INLINE void pstore1<Packet2d>(double* to, const double& a)
10250 Packet2d pa = _mm_set_sd(a);
10251 pstore(to, Packet2d(vec2d_swizzle1(pa,0,0)));
10253 #ifndef EIGEN_VECTORIZE_AVX
10254 template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
10255 template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
10256 template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
10258 #if EIGEN_COMP_MSVC_STRICT && EIGEN_OS_WIN64
10259 template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { return a.m128_f32[0]; }
10260 template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return a.m128d_f64[0]; }
10261 template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; }
10262 #elif EIGEN_COMP_MSVC_STRICT
10263 template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float x = _mm_cvtss_f32(a); return x; }
10264 template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double x = _mm_cvtsd_f64(a); return x; }
10265 template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; }
10267 template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { return _mm_cvtss_f32(a); }
10268 template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return _mm_cvtsd_f64(a); }
10269 template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { return _mm_cvtsi128_si32(a); }
10271 template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a)
10272 { return _mm_shuffle_ps(a,a,0x1B); }
10273 template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a)
10274 { return _mm_shuffle_pd(a,a,0x1); }
10275 template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a)
10276 { return _mm_shuffle_epi32(a,0x1B); }
10277 template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a)
10279 const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF));
10280 return _mm_and_ps(a,mask);
10282 template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a)
10284 const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF));
10285 return _mm_and_pd(a,mask);
10287 template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a)
10289 #ifdef EIGEN_VECTORIZE_SSSE3
10290 return _mm_abs_epi32(a);
10292 Packet4i aux = _mm_srai_epi32(a,31);
10293 return _mm_sub_epi32(_mm_xor_si128(a,aux),aux);
10297 template<> EIGEN_STRONG_INLINE void
10298 pbroadcast4<Packet4f>(const float *a,
10299 Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
10301 a3 = pload<Packet4f>(a);
10302 a0 = vec4f_swizzle1(a3, 0,0,0,0);
10303 a1 = vec4f_swizzle1(a3, 1,1,1,1);
10304 a2 = vec4f_swizzle1(a3, 2,2,2,2);
10305 a3 = vec4f_swizzle1(a3, 3,3,3,3);
10307 template<> EIGEN_STRONG_INLINE void
10308 pbroadcast4<Packet2d>(const double *a,
10309 Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)
10311 #ifdef EIGEN_VECTORIZE_SSE3
10312 a0 = _mm_loaddup_pd(a+0);
10313 a1 = _mm_loaddup_pd(a+1);
10314 a2 = _mm_loaddup_pd(a+2);
10315 a3 = _mm_loaddup_pd(a+3);
10317 a1 = pload<Packet2d>(a);
10318 a0 = vec2d_swizzle1(a1, 0,0);
10319 a1 = vec2d_swizzle1(a1, 1,1);
10320 a3 = pload<Packet2d>(a+2);
10321 a2 = vec2d_swizzle1(a3, 0,0);
10322 a3 = vec2d_swizzle1(a3, 1,1);
10326 EIGEN_STRONG_INLINE void punpackp(Packet4f* vecs)
10328 vecs[1] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x55));
10329 vecs[2] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0xAA));
10330 vecs[3] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0xFF));
10331 vecs[0] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x00));
10333 #ifdef EIGEN_VECTORIZE_SSE3
10334 template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
10336 return _mm_hadd_ps(_mm_hadd_ps(vecs[0], vecs[1]),_mm_hadd_ps(vecs[2], vecs[3]));
10338 template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
10340 return _mm_hadd_pd(vecs[0], vecs[1]);
10343 template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
10345 Packet4f tmp0, tmp1, tmp2;
10346 tmp0 = _mm_unpacklo_ps(vecs[0], vecs[1]);
10347 tmp1 = _mm_unpackhi_ps(vecs[0], vecs[1]);
10348 tmp2 = _mm_unpackhi_ps(vecs[2], vecs[3]);
10349 tmp0 = _mm_add_ps(tmp0, tmp1);
10350 tmp1 = _mm_unpacklo_ps(vecs[2], vecs[3]);
10351 tmp1 = _mm_add_ps(tmp1, tmp2);
10352 tmp2 = _mm_movehl_ps(tmp1, tmp0);
10353 tmp0 = _mm_movelh_ps(tmp0, tmp1);
10354 return _mm_add_ps(tmp0, tmp2);
10356 template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
10358 return _mm_add_pd(_mm_unpacklo_pd(vecs[0], vecs[1]), _mm_unpackhi_pd(vecs[0], vecs[1]));
10361 template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
10363 Packet4f tmp = _mm_add_ps(a, _mm_movehl_ps(a,a));
10364 return pfirst<Packet4f>(_mm_add_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
10366 template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
10368 return pfirst<Packet2d>(_mm_add_sd(a, _mm_unpackhi_pd(a,a)));
10370 #ifdef EIGEN_VECTORIZE_SSSE3
10371 template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
10373 return _mm_hadd_epi32(_mm_hadd_epi32(vecs[0], vecs[1]),_mm_hadd_epi32(vecs[2], vecs[3]));
10375 template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
10377 Packet4i tmp0 = _mm_hadd_epi32(a,a);
10378 return pfirst<Packet4i>(_mm_hadd_epi32(tmp0,tmp0));
10381 template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
10383 Packet4i tmp = _mm_add_epi32(a, _mm_unpackhi_epi64(a,a));
10384 return pfirst(tmp) + pfirst<Packet4i>(_mm_shuffle_epi32(tmp, 1));
10386 template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
10388 Packet4i tmp0, tmp1, tmp2;
10389 tmp0 = _mm_unpacklo_epi32(vecs[0], vecs[1]);
10390 tmp1 = _mm_unpackhi_epi32(vecs[0], vecs[1]);
10391 tmp2 = _mm_unpackhi_epi32(vecs[2], vecs[3]);
10392 tmp0 = _mm_add_epi32(tmp0, tmp1);
10393 tmp1 = _mm_unpacklo_epi32(vecs[2], vecs[3]);
10394 tmp1 = _mm_add_epi32(tmp1, tmp2);
10395 tmp2 = _mm_unpacklo_epi64(tmp0, tmp1);
10396 tmp0 = _mm_unpackhi_epi64(tmp0, tmp1);
10397 return _mm_add_epi32(tmp0, tmp2);
10400 template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
10402 Packet4f tmp = _mm_mul_ps(a, _mm_movehl_ps(a,a));
10403 return pfirst<Packet4f>(_mm_mul_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
10405 template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)
10407 return pfirst<Packet2d>(_mm_mul_sd(a, _mm_unpackhi_pd(a,a)));
10409 template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
10411 EIGEN_ALIGN16 int aux[4];
10413 return (aux[0] * aux[1]) * (aux[2] * aux[3]);;
10415 template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
10417 Packet4f tmp = _mm_min_ps(a, _mm_movehl_ps(a,a));
10418 return pfirst<Packet4f>(_mm_min_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
10420 template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)
10422 return pfirst<Packet2d>(_mm_min_sd(a, _mm_unpackhi_pd(a,a)));
10424 template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
10426 #ifdef EIGEN_VECTORIZE_SSE4_1
10427 Packet4i tmp = _mm_min_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0,0,3,2)));
10428 return pfirst<Packet4i>(_mm_min_epi32(tmp,_mm_shuffle_epi32(tmp, 1)));
10430 EIGEN_ALIGN16 int aux[4];
10432 int aux0 = aux[0]<aux[1] ? aux[0] : aux[1];
10433 int aux2 = aux[2]<aux[3] ? aux[2] : aux[3];
10434 return aux0<aux2 ? aux0 : aux2;
10437 template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
10439 Packet4f tmp = _mm_max_ps(a, _mm_movehl_ps(a,a));
10440 return pfirst<Packet4f>(_mm_max_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
10442 template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
10444 return pfirst<Packet2d>(_mm_max_sd(a, _mm_unpackhi_pd(a,a)));
10446 template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
10448 #ifdef EIGEN_VECTORIZE_SSE4_1
10449 Packet4i tmp = _mm_max_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0,0,3,2)));
10450 return pfirst<Packet4i>(_mm_max_epi32(tmp,_mm_shuffle_epi32(tmp, 1)));
10452 EIGEN_ALIGN16 int aux[4];
10454 int aux0 = aux[0]>aux[1] ? aux[0] : aux[1];
10455 int aux2 = aux[2]>aux[3] ? aux[2] : aux[3];
10456 return aux0>aux2 ? aux0 : aux2;
10459 #if EIGEN_COMP_GNUC
10461 #ifdef EIGEN_VECTORIZE_SSSE3
10462 template<int Offset>
10463 struct palign_impl<Offset,Packet4f>
10465 static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
10468 first = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(second), _mm_castps_si128(first), Offset*4));
10471 template<int Offset>
10472 struct palign_impl<Offset,Packet4i>
10474 static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
10477 first = _mm_alignr_epi8(second,first, Offset*4);
10480 template<int Offset>
10481 struct palign_impl<Offset,Packet2d>
10483 static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
10486 first = _mm_castsi128_pd(_mm_alignr_epi8(_mm_castpd_si128(second), _mm_castpd_si128(first), 8));
10490 template<int Offset>
10491 struct palign_impl<Offset,Packet4f>
10493 static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
10497 first = _mm_move_ss(first,second);
10498 first = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(first),0x39));
10500 else if (Offset==2)
10502 first = _mm_movehl_ps(first,first);
10503 first = _mm_movelh_ps(first,second);
10505 else if (Offset==3)
10507 first = _mm_move_ss(first,second);
10508 first = _mm_shuffle_ps(first,second,0x93);
10512 template<int Offset>
10513 struct palign_impl<Offset,Packet4i>
10515 static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
10519 first = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(first),_mm_castsi128_ps(second)));
10520 first = _mm_shuffle_epi32(first,0x39);
10522 else if (Offset==2)
10524 first = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(first)));
10525 first = _mm_castps_si128(_mm_movelh_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(second)));
10527 else if (Offset==3)
10529 first = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(first),_mm_castsi128_ps(second)));
10530 first = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(second),0x93));
10534 template<int Offset>
10535 struct palign_impl<Offset,Packet2d>
10537 static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
10541 first = _mm_castps_pd(_mm_movehl_ps(_mm_castpd_ps(first),_mm_castpd_ps(first)));
10542 first = _mm_castps_pd(_mm_movelh_ps(_mm_castpd_ps(first),_mm_castpd_ps(second)));
10547 EIGEN_DEVICE_FUNC inline void
10548 ptranspose(PacketBlock<Packet4f,4>& kernel) {
10549 _MM_TRANSPOSE4_PS(kernel.packet[0], kernel.packet[1], kernel.packet[2], kernel.packet[3]);
10551 EIGEN_DEVICE_FUNC inline void
10552 ptranspose(PacketBlock<Packet2d,2>& kernel) {
10553 __m128d tmp = _mm_unpackhi_pd(kernel.packet[0], kernel.packet[1]);
10554 kernel.packet[0] = _mm_unpacklo_pd(kernel.packet[0], kernel.packet[1]);
10555 kernel.packet[1] = tmp;
10557 EIGEN_DEVICE_FUNC inline void
10558 ptranspose(PacketBlock<Packet4i,4>& kernel) {
10559 __m128i T0 = _mm_unpacklo_epi32(kernel.packet[0], kernel.packet[1]);
10560 __m128i T1 = _mm_unpacklo_epi32(kernel.packet[2], kernel.packet[3]);
10561 __m128i T2 = _mm_unpackhi_epi32(kernel.packet[0], kernel.packet[1]);
10562 __m128i T3 = _mm_unpackhi_epi32(kernel.packet[2], kernel.packet[3]);
10563 kernel.packet[0] = _mm_unpacklo_epi64(T0, T1);
10564 kernel.packet[1] = _mm_unpackhi_epi64(T0, T1);
10565 kernel.packet[2] = _mm_unpacklo_epi64(T2, T3);
10566 kernel.packet[3] = _mm_unpackhi_epi64(T2, T3);
10568 template<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) {
10569 const __m128i zero = _mm_setzero_si128();
10570 const __m128i select = _mm_set_epi32(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
10571 __m128i false_mask = _mm_cmpeq_epi32(select, zero);
10572 #ifdef EIGEN_VECTORIZE_SSE4_1
10573 return _mm_blendv_epi8(thenPacket, elsePacket, false_mask);
10575 return _mm_or_si128(_mm_andnot_si128(false_mask, thenPacket), _mm_and_si128(false_mask, elsePacket));
10578 template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket) {
10579 const __m128 zero = _mm_setzero_ps();
10580 const __m128 select = _mm_set_ps(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
10581 __m128 false_mask = _mm_cmpeq_ps(select, zero);
10582 #ifdef EIGEN_VECTORIZE_SSE4_1
10583 return _mm_blendv_ps(thenPacket, elsePacket, false_mask);
10585 return _mm_or_ps(_mm_andnot_ps(false_mask, thenPacket), _mm_and_ps(false_mask, elsePacket));
10588 template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) {
10589 const __m128d zero = _mm_setzero_pd();
10590 const __m128d select = _mm_set_pd(ifPacket.select[1], ifPacket.select[0]);
10591 __m128d false_mask = _mm_cmpeq_pd(select, zero);
10592 #ifdef EIGEN_VECTORIZE_SSE4_1
10593 return _mm_blendv_pd(thenPacket, elsePacket, false_mask);
10595 return _mm_or_pd(_mm_andnot_pd(false_mask, thenPacket), _mm_and_pd(false_mask, elsePacket));
10598 template<> EIGEN_STRONG_INLINE Packet4f pinsertfirst(const Packet4f& a, float b)
10600 #ifdef EIGEN_VECTORIZE_SSE4_1
10601 return _mm_blend_ps(a,pset1<Packet4f>(b),1);
10603 return _mm_move_ss(a, _mm_load_ss(&b));
10606 template<> EIGEN_STRONG_INLINE Packet2d pinsertfirst(const Packet2d& a, double b)
10608 #ifdef EIGEN_VECTORIZE_SSE4_1
10609 return _mm_blend_pd(a,pset1<Packet2d>(b),1);
10611 return _mm_move_sd(a, _mm_load_sd(&b));
10614 template<> EIGEN_STRONG_INLINE Packet4f pinsertlast(const Packet4f& a, float b)
10616 #ifdef EIGEN_VECTORIZE_SSE4_1
10617 return _mm_blend_ps(a,pset1<Packet4f>(b),(1<<3));
10619 const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x0,0x0,0x0,0xFFFFFFFF));
10620 return _mm_or_ps(_mm_andnot_ps(mask, a), _mm_and_ps(mask, pset1<Packet4f>(b)));
10623 template<> EIGEN_STRONG_INLINE Packet2d pinsertlast(const Packet2d& a, double b)
10625 #ifdef EIGEN_VECTORIZE_SSE4_1
10626 return _mm_blend_pd(a,pset1<Packet2d>(b),(1<<1));
10628 const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x0,0xFFFFFFFF,0xFFFFFFFF));
10629 return _mm_or_pd(_mm_andnot_pd(mask, a), _mm_and_pd(mask, pset1<Packet2d>(b)));
10633 template<> EIGEN_STRONG_INLINE float pmadd(const float& a, const float& b, const float& c) {
10634 return ::fmaf(a,b,c);
10636 template<> EIGEN_STRONG_INLINE double pmadd(const double& a, const double& b, const double& c) {
10637 return ::fma(a,b,c);
10643 // end #include "src/Core/arch/SSE/PacketMath.h"
10644 // #include "src/Core/arch/SSE/MathFunctions.h"
10645 #ifndef EIGEN_MATH_FUNCTIONS_SSE_H
10646 #define EIGEN_MATH_FUNCTIONS_SSE_H
10648 namespace internal {
10649 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
10650 Packet4f plog<Packet4f>(const Packet4f& _x)
10653 _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
10654 _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
10655 _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
10656 _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000);
10657 _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos, 0x00800000);
10658 _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_inf, 0xff800000);
10659 _EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524f);
10660 _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292E-2f);
10661 _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, - 1.1514610310E-1f);
10662 _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740E-1f);
10663 _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, - 1.2420140846E-1f);
10664 _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, + 1.4249322787E-1f);
10665 _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, - 1.6668057665E-1f);
10666 _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, + 2.0000714765E-1f);
10667 _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, - 2.4999993993E-1f);
10668 _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, + 3.3333331174E-1f);
10669 _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4f);
10670 _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375f);
10672 Packet4f invalid_mask = _mm_cmpnge_ps(x, _mm_setzero_ps());
10673 Packet4f iszero_mask = _mm_cmpeq_ps(x, _mm_setzero_ps());
10674 x = pmax(x, p4f_min_norm_pos);
10675 emm0 = _mm_srli_epi32(_mm_castps_si128(x), 23);
10676 x = _mm_and_ps(x, p4f_inv_mant_mask);
10677 x = _mm_or_ps(x, p4f_half);
10678 emm0 = _mm_sub_epi32(emm0, p4i_0x7f);
10679 Packet4f e = padd(Packet4f(_mm_cvtepi32_ps(emm0)), p4f_1);
10680 Packet4f mask = _mm_cmplt_ps(x, p4f_cephes_SQRTHF);
10681 Packet4f tmp = pand(x, mask);
10682 x = psub(x, p4f_1);
10683 e = psub(e, pand(p4f_1, mask));
10685 Packet4f x2 = pmul(x,x);
10686 Packet4f x3 = pmul(x2,x);
10687 Packet4f y, y1, y2;
10688 y = pmadd(p4f_cephes_log_p0, x, p4f_cephes_log_p1);
10689 y1 = pmadd(p4f_cephes_log_p3, x, p4f_cephes_log_p4);
10690 y2 = pmadd(p4f_cephes_log_p6, x, p4f_cephes_log_p7);
10691 y = pmadd(y , x, p4f_cephes_log_p2);
10692 y1 = pmadd(y1, x, p4f_cephes_log_p5);
10693 y2 = pmadd(y2, x, p4f_cephes_log_p8);
10694 y = pmadd(y, x3, y1);
10695 y = pmadd(y, x3, y2);
10697 y1 = pmul(e, p4f_cephes_log_q1);
10698 tmp = pmul(x2, p4f_half);
10701 y2 = pmul(e, p4f_cephes_log_q2);
10704 return _mm_or_ps(_mm_andnot_ps(iszero_mask, _mm_or_ps(x, invalid_mask)),
10705 _mm_and_ps(iszero_mask, p4f_minus_inf));
10707 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
10708 Packet4f pexp<Packet4f>(const Packet4f& _x)
10711 _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
10712 _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
10713 _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
10714 _EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647950f);
10715 _EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f);
10716 _EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
10717 _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f);
10718 _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f);
10719 _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4f);
10720 _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3f);
10721 _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3f);
10722 _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2f);
10723 _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f);
10724 _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f);
10727 x = pmax(pmin(x, p4f_exp_hi), p4f_exp_lo);
10728 fx = pmadd(x, p4f_cephes_LOG2EF, p4f_half);
10729 #ifdef EIGEN_VECTORIZE_SSE4_1
10730 fx = _mm_floor_ps(fx);
10732 emm0 = _mm_cvttps_epi32(fx);
10733 tmp = _mm_cvtepi32_ps(emm0);
10734 Packet4f mask = _mm_cmpgt_ps(tmp, fx);
10735 mask = _mm_and_ps(mask, p4f_1);
10736 fx = psub(tmp, mask);
10738 tmp = pmul(fx, p4f_cephes_exp_C1);
10739 Packet4f z = pmul(fx, p4f_cephes_exp_C2);
10743 Packet4f y = p4f_cephes_exp_p0;
10744 y = pmadd(y, x, p4f_cephes_exp_p1);
10745 y = pmadd(y, x, p4f_cephes_exp_p2);
10746 y = pmadd(y, x, p4f_cephes_exp_p3);
10747 y = pmadd(y, x, p4f_cephes_exp_p4);
10748 y = pmadd(y, x, p4f_cephes_exp_p5);
10749 y = pmadd(y, z, x);
10750 y = padd(y, p4f_1);
10751 emm0 = _mm_cvttps_epi32(fx);
10752 emm0 = _mm_add_epi32(emm0, p4i_0x7f);
10753 emm0 = _mm_slli_epi32(emm0, 23);
10754 return pmax(pmul(y, Packet4f(_mm_castsi128_ps(emm0))), _x);
10756 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
10757 Packet2d pexp<Packet2d>(const Packet2d& _x)
10760 _EIGEN_DECLARE_CONST_Packet2d(1 , 1.0);
10761 _EIGEN_DECLARE_CONST_Packet2d(2 , 2.0);
10762 _EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
10763 _EIGEN_DECLARE_CONST_Packet2d(exp_hi, 709.437);
10764 _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303);
10765 _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
10766 _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
10767 _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
10768 _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
10769 _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
10770 _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
10771 _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
10772 _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
10773 _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
10774 _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
10775 static const __m128i p4i_1023_0 = _mm_setr_epi32(1023, 1023, 0, 0);
10778 x = pmax(pmin(x, p2d_exp_hi), p2d_exp_lo);
10779 fx = pmadd(p2d_cephes_LOG2EF, x, p2d_half);
10780 #ifdef EIGEN_VECTORIZE_SSE4_1
10781 fx = _mm_floor_pd(fx);
10783 emm0 = _mm_cvttpd_epi32(fx);
10784 tmp = _mm_cvtepi32_pd(emm0);
10785 Packet2d mask = _mm_cmpgt_pd(tmp, fx);
10786 mask = _mm_and_pd(mask, p2d_1);
10787 fx = psub(tmp, mask);
10789 tmp = pmul(fx, p2d_cephes_exp_C1);
10790 Packet2d z = pmul(fx, p2d_cephes_exp_C2);
10793 Packet2d x2 = pmul(x,x);
10794 Packet2d px = p2d_cephes_exp_p0;
10795 px = pmadd(px, x2, p2d_cephes_exp_p1);
10796 px = pmadd(px, x2, p2d_cephes_exp_p2);
10798 Packet2d qx = p2d_cephes_exp_q0;
10799 qx = pmadd(qx, x2, p2d_cephes_exp_q1);
10800 qx = pmadd(qx, x2, p2d_cephes_exp_q2);
10801 qx = pmadd(qx, x2, p2d_cephes_exp_q3);
10802 x = pdiv(px,psub(qx,px));
10803 x = pmadd(p2d_2,x,p2d_1);
10804 emm0 = _mm_cvttpd_epi32(fx);
10805 emm0 = _mm_add_epi32(emm0, p4i_1023_0);
10806 emm0 = _mm_slli_epi32(emm0, 20);
10807 emm0 = _mm_shuffle_epi32(emm0, _MM_SHUFFLE(1,2,0,3));
10808 return pmax(pmul(x, Packet2d(_mm_castsi128_pd(emm0))), _x);
10810 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
10811 Packet4f psin<Packet4f>(const Packet4f& _x)
10814 _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
10815 _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
10816 _EIGEN_DECLARE_CONST_Packet4i(1, 1);
10817 _EIGEN_DECLARE_CONST_Packet4i(not1, ~1);
10818 _EIGEN_DECLARE_CONST_Packet4i(2, 2);
10819 _EIGEN_DECLARE_CONST_Packet4i(4, 4);
10820 _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(sign_mask, 0x80000000);
10821 _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP1,-0.78515625f);
10822 _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP2, -2.4187564849853515625e-4f);
10823 _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP3, -3.77489497744594108e-8f);
10824 _EIGEN_DECLARE_CONST_Packet4f(sincof_p0, -1.9515295891E-4f);
10825 _EIGEN_DECLARE_CONST_Packet4f(sincof_p1, 8.3321608736E-3f);
10826 _EIGEN_DECLARE_CONST_Packet4f(sincof_p2, -1.6666654611E-1f);
10827 _EIGEN_DECLARE_CONST_Packet4f(coscof_p0, 2.443315711809948E-005f);
10828 _EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765E-003f);
10829 _EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827E-002f);
10830 _EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516f);
10831 Packet4f xmm1, xmm2, xmm3, sign_bit, y;
10832 Packet4i emm0, emm2;
10835 sign_bit = _mm_and_ps(sign_bit, p4f_sign_mask);
10836 y = pmul(x, p4f_cephes_FOPI);
10837 emm2 = _mm_cvttps_epi32(y);
10838 emm2 = _mm_add_epi32(emm2, p4i_1);
10839 emm2 = _mm_and_si128(emm2, p4i_not1);
10840 y = _mm_cvtepi32_ps(emm2);
10841 emm0 = _mm_and_si128(emm2, p4i_4);
10842 emm0 = _mm_slli_epi32(emm0, 29);
10843 emm2 = _mm_and_si128(emm2, p4i_2);
10844 emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
10845 Packet4f swap_sign_bit = _mm_castsi128_ps(emm0);
10846 Packet4f poly_mask = _mm_castsi128_ps(emm2);
10847 sign_bit = _mm_xor_ps(sign_bit, swap_sign_bit);
10848 xmm1 = pmul(y, p4f_minus_cephes_DP1);
10849 xmm2 = pmul(y, p4f_minus_cephes_DP2);
10850 xmm3 = pmul(y, p4f_minus_cephes_DP3);
10855 Packet4f z = _mm_mul_ps(x,x);
10856 y = pmadd(y, z, p4f_coscof_p1);
10857 y = pmadd(y, z, p4f_coscof_p2);
10860 Packet4f tmp = pmul(z, p4f_half);
10862 y = padd(y, p4f_1);
10863 Packet4f y2 = p4f_sincof_p0;
10864 y2 = pmadd(y2, z, p4f_sincof_p1);
10865 y2 = pmadd(y2, z, p4f_sincof_p2);
10869 y2 = _mm_and_ps(poly_mask, y2);
10870 y = _mm_andnot_ps(poly_mask, y);
10871 y = _mm_or_ps(y,y2);
10872 return _mm_xor_ps(y, sign_bit);
10874 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
10875 Packet4f pcos<Packet4f>(const Packet4f& _x)
10878 _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
10879 _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
10880 _EIGEN_DECLARE_CONST_Packet4i(1, 1);
10881 _EIGEN_DECLARE_CONST_Packet4i(not1, ~1);
10882 _EIGEN_DECLARE_CONST_Packet4i(2, 2);
10883 _EIGEN_DECLARE_CONST_Packet4i(4, 4);
10884 _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP1,-0.78515625f);
10885 _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP2, -2.4187564849853515625e-4f);
10886 _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP3, -3.77489497744594108e-8f);
10887 _EIGEN_DECLARE_CONST_Packet4f(sincof_p0, -1.9515295891E-4f);
10888 _EIGEN_DECLARE_CONST_Packet4f(sincof_p1, 8.3321608736E-3f);
10889 _EIGEN_DECLARE_CONST_Packet4f(sincof_p2, -1.6666654611E-1f);
10890 _EIGEN_DECLARE_CONST_Packet4f(coscof_p0, 2.443315711809948E-005f);
10891 _EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765E-003f);
10892 _EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827E-002f);
10893 _EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516f);
10894 Packet4f xmm1, xmm2, xmm3, y;
10895 Packet4i emm0, emm2;
10897 y = pmul(x, p4f_cephes_FOPI);
10898 emm2 = _mm_cvttps_epi32(y);
10899 emm2 = _mm_add_epi32(emm2, p4i_1);
10900 emm2 = _mm_and_si128(emm2, p4i_not1);
10901 y = _mm_cvtepi32_ps(emm2);
10902 emm2 = _mm_sub_epi32(emm2, p4i_2);
10903 emm0 = _mm_andnot_si128(emm2, p4i_4);
10904 emm0 = _mm_slli_epi32(emm0, 29);
10905 emm2 = _mm_and_si128(emm2, p4i_2);
10906 emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
10907 Packet4f sign_bit = _mm_castsi128_ps(emm0);
10908 Packet4f poly_mask = _mm_castsi128_ps(emm2);
10909 xmm1 = pmul(y, p4f_minus_cephes_DP1);
10910 xmm2 = pmul(y, p4f_minus_cephes_DP2);
10911 xmm3 = pmul(y, p4f_minus_cephes_DP3);
10916 Packet4f z = pmul(x,x);
10917 y = pmadd(y,z,p4f_coscof_p1);
10918 y = pmadd(y,z,p4f_coscof_p2);
10921 Packet4f tmp = _mm_mul_ps(z, p4f_half);
10923 y = padd(y, p4f_1);
10924 Packet4f y2 = p4f_sincof_p0;
10925 y2 = pmadd(y2, z, p4f_sincof_p1);
10926 y2 = pmadd(y2, z, p4f_sincof_p2);
10928 y2 = pmadd(y2, x, x);
10929 y2 = _mm_and_ps(poly_mask, y2);
10930 y = _mm_andnot_ps(poly_mask, y);
10931 y = _mm_or_ps(y,y2);
10932 return _mm_xor_ps(y, sign_bit);
10934 #if EIGEN_FAST_MATH
10935 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
10936 Packet4f psqrt<Packet4f>(const Packet4f& _x)
10938 Packet4f half = pmul(_x, pset1<Packet4f>(.5f));
10939 Packet4f denormal_mask = _mm_and_ps(
10940 _mm_cmpge_ps(_x, _mm_setzero_ps()),
10941 _mm_cmplt_ps(_x, pset1<Packet4f>((std::numeric_limits<float>::min)())));
10942 Packet4f x = _mm_rsqrt_ps(_x);
10943 x = pmul(x, psub(pset1<Packet4f>(1.5f), pmul(half, pmul(x,x))));
10944 return _mm_andnot_ps(denormal_mask, pmul(_x,x));
10947 template<>EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
10948 Packet4f psqrt<Packet4f>(const Packet4f& x) { return _mm_sqrt_ps(x); }
10950 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
10951 Packet2d psqrt<Packet2d>(const Packet2d& x) { return _mm_sqrt_pd(x); }
10952 #if EIGEN_FAST_MATH
10953 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
10954 Packet4f prsqrt<Packet4f>(const Packet4f& _x) {
10955 _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inf, 0x7f800000);
10956 _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(nan, 0x7fc00000);
10957 _EIGEN_DECLARE_CONST_Packet4f(one_point_five, 1.5f);
10958 _EIGEN_DECLARE_CONST_Packet4f(minus_half, -0.5f);
10959 _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(flt_min, 0x00800000);
10960 Packet4f neg_half = pmul(_x, p4f_minus_half);
10961 Packet4f le_zero_mask = _mm_cmple_ps(_x, p4f_flt_min);
10962 Packet4f x = _mm_andnot_ps(le_zero_mask, _mm_rsqrt_ps(_x));
10963 Packet4f neg_mask = _mm_cmplt_ps(_x, _mm_setzero_ps());
10964 Packet4f zero_mask = _mm_andnot_ps(neg_mask, le_zero_mask);
10965 Packet4f infs_and_nans = _mm_or_ps(_mm_and_ps(neg_mask, p4f_nan),
10966 _mm_and_ps(zero_mask, p4f_inf));
10967 x = pmul(x, pmadd(neg_half, pmul(x, x), p4f_one_point_five));
10968 return _mm_or_ps(x, infs_and_nans);
10971 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
10972 Packet4f prsqrt<Packet4f>(const Packet4f& x) {
10973 return _mm_div_ps(pset1<Packet4f>(1.0f), _mm_sqrt_ps(x));
10976 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
10977 Packet2d prsqrt<Packet2d>(const Packet2d& x) {
10978 return _mm_div_pd(pset1<Packet2d>(1.0), _mm_sqrt_pd(x));
10981 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
10982 ptanh<Packet4f>(const Packet4f& x) {
10983 return internal::generic_fast_tanh_float(x);
10988 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
10989 float sqrt(const float &x)
10991 return internal::pfirst(internal::Packet4f(_mm_sqrt_ss(_mm_set_ss(x))));
10994 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
10995 double sqrt(const double &x)
10997 #if EIGEN_COMP_GNUC_STRICT
10998 return internal::pfirst(internal::Packet2d(__builtin_ia32_sqrtsd(_mm_set_sd(x))));
11000 return internal::pfirst(internal::Packet2d(_mm_sqrt_pd(_mm_set_sd(x))));
11006 // end #include "src/Core/arch/SSE/MathFunctions.h"
11007 // #include "src/Core/arch/SSE/Complex.h"
11008 #ifndef EIGEN_COMPLEX_SSE_H
11009 #define EIGEN_COMPLEX_SSE_H
11011 namespace internal {
11014 EIGEN_STRONG_INLINE Packet2cf() {}
11015 EIGEN_STRONG_INLINE explicit Packet2cf(const __m128& a) : v(a) {}
11018 #ifndef EIGEN_VECTORIZE_AVX
11019 template<> struct packet_traits<std::complex<float> > : default_packet_traits
11021 typedef Packet2cf type;
11022 typedef Packet2cf half;
11025 AlignedOnScalar = 1,
11042 template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
11043 template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_add_ps(a.v,b.v)); }
11044 template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_sub_ps(a.v,b.v)); }
11045 template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a)
11047 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));
11048 return Packet2cf(_mm_xor_ps(a.v,mask));
11050 template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
11052 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
11053 return Packet2cf(_mm_xor_ps(a.v,mask));
11055 template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
11057 #ifdef EIGEN_VECTORIZE_SSE3
11058 return Packet2cf(_mm_addsub_ps(_mm_mul_ps(_mm_moveldup_ps(a.v), b.v),
11059 _mm_mul_ps(_mm_movehdup_ps(a.v),
11060 vec4f_swizzle1(b.v, 1, 0, 3, 2))));
11062 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x00000000,0x80000000,0x00000000));
11063 return Packet2cf(_mm_add_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
11064 _mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
11065 vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask)));
11068 template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_and_ps(a.v,b.v)); }
11069 template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_or_ps(a.v,b.v)); }
11070 template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_xor_ps(a.v,b.v)); }
11071 template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_andnot_ps(a.v,b.v)); }
11072 template<> EIGEN_STRONG_INLINE Packet2cf pload <Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>(&numext::real_ref(*from))); }
11073 template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>(&numext::real_ref(*from))); }
11074 template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
11077 #if EIGEN_GNUC_AT_MOST(4,2)
11078 res.v = _mm_loadl_pi(_mm_set1_ps(0.0f), reinterpret_cast<const __m64*>(&from));
11079 #elif EIGEN_GNUC_AT_LEAST(4,6)
11080 #pragma GCC diagnostic push
11081 #pragma GCC diagnostic ignored "-Wuninitialized"
11082 res.v = _mm_loadl_pi(res.v, (const __m64*)&from);
11083 #pragma GCC diagnostic pop
11085 res.v = _mm_loadl_pi(res.v, (const __m64*)&from);
11087 return Packet2cf(_mm_movelh_ps(res.v,res.v));
11089 template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
11090 template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), Packet4f(from.v)); }
11091 template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), Packet4f(from.v)); }
11092 template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
11094 return Packet2cf(_mm_set_ps(std::imag(from[1*stride]), std::real(from[1*stride]),
11095 std::imag(from[0*stride]), std::real(from[0*stride])));
11097 template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
11099 to[stride*0] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 0)),
11100 _mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 1)));
11101 to[stride*1] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 2)),
11102 _mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 3)));
11104 template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
11105 template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
11107 #if EIGEN_GNUC_AT_MOST(4,3)
11108 EIGEN_ALIGN16 std::complex<float> res[2];
11109 _mm_store_ps((float*)res, a.v);
11112 std::complex<float> res;
11113 _mm_storel_pi((__m64*)&res, a.v);
11117 template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a) { return Packet2cf(_mm_castpd_ps(preverse(Packet2d(_mm_castps_pd(a.v))))); }
11118 template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
11120 return pfirst(Packet2cf(_mm_add_ps(a.v, _mm_movehl_ps(a.v,a.v))));
11122 template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
11124 return Packet2cf(_mm_add_ps(_mm_movelh_ps(vecs[0].v,vecs[1].v), _mm_movehl_ps(vecs[1].v,vecs[0].v)));
11126 template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
11128 return pfirst(pmul(a, Packet2cf(_mm_movehl_ps(a.v,a.v))));
11130 template<int Offset>
11131 struct palign_impl<Offset,Packet2cf>
11133 static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
11137 first.v = _mm_movehl_ps(first.v, first.v);
11138 first.v = _mm_movelh_ps(first.v, second.v);
11142 template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
11144 EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
11145 { return padd(pmul(x,y),c); }
11146 EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
11148 #ifdef EIGEN_VECTORIZE_SSE3
11149 return internal::pmul(a, pconj(b));
11151 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
11152 return Packet2cf(_mm_add_ps(_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask),
11153 _mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
11154 vec4f_swizzle1(b.v, 1, 0, 3, 2))));
11158 template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
11160 EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
11161 { return padd(pmul(x,y),c); }
11162 EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
11164 #ifdef EIGEN_VECTORIZE_SSE3
11165 return internal::pmul(pconj(a), b);
11167 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
11168 return Packet2cf(_mm_add_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
11169 _mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
11170 vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask)));
11174 template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
11176 EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
11177 { return padd(pmul(x,y),c); }
11178 EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
11180 #ifdef EIGEN_VECTORIZE_SSE3
11181 return pconj(internal::pmul(a, b));
11183 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
11184 return Packet2cf(_mm_sub_ps(_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask),
11185 _mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
11186 vec4f_swizzle1(b.v, 1, 0, 3, 2))));
11190 template<> struct conj_helper<Packet4f, Packet2cf, false,false>
11192 EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet4f& x, const Packet2cf& y, const Packet2cf& c) const
11193 { return padd(c, pmul(x,y)); }
11194 EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const
11195 { return Packet2cf(Eigen::internal::pmul<Packet4f>(x, y.v)); }
11197 template<> struct conj_helper<Packet2cf, Packet4f, false,false>
11199 EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet4f& y, const Packet2cf& c) const
11200 { return padd(c, pmul(x,y)); }
11201 EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const
11202 { return Packet2cf(Eigen::internal::pmul<Packet4f>(x.v, y)); }
11204 template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
11206 Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
11207 __m128 s = _mm_mul_ps(b.v,b.v);
11208 return Packet2cf(_mm_div_ps(res.v,_mm_add_ps(s,_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(s), 0xb1)))));
11210 EIGEN_STRONG_INLINE Packet2cf pcplxflip(const Packet2cf& x)
11212 return Packet2cf(vec4f_swizzle1(x.v, 1, 0, 3, 2));
11216 EIGEN_STRONG_INLINE Packet1cd() {}
11217 EIGEN_STRONG_INLINE explicit Packet1cd(const __m128d& a) : v(a) {}
11220 #ifndef EIGEN_VECTORIZE_AVX
11221 template<> struct packet_traits<std::complex<double> > : default_packet_traits
11223 typedef Packet1cd type;
11224 typedef Packet1cd half;
11227 AlignedOnScalar = 0,
11243 template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
11244 template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_add_pd(a.v,b.v)); }
11245 template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_sub_pd(a.v,b.v)); }
11246 template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); }
11247 template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a)
11249 const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
11250 return Packet1cd(_mm_xor_pd(a.v,mask));
11252 template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
11254 #ifdef EIGEN_VECTORIZE_SSE3
11255 return Packet1cd(_mm_addsub_pd(_mm_mul_pd(_mm_movedup_pd(a.v), b.v),
11256 _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
11257 vec2d_swizzle1(b.v, 1, 0))));
11259 const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
11260 return Packet1cd(_mm_add_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v),
11261 _mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
11262 vec2d_swizzle1(b.v, 1, 0)), mask)));
11265 template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_and_pd(a.v,b.v)); }
11266 template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_or_pd(a.v,b.v)); }
11267 template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_xor_pd(a.v,b.v)); }
11268 template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_andnot_pd(a.v,b.v)); }
11269 template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from)
11270 { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
11271 template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from)
11272 { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
11273 template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
11274 { return ploadu<Packet1cd>(&from); }
11275 template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
11276 template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, Packet2d(from.v)); }
11277 template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, Packet2d(from.v)); }
11278 template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
11279 template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
11281 EIGEN_ALIGN16 double res[2];
11282 _mm_store_pd(res, a.v);
11283 return std::complex<double>(res[0],res[1]);
11285 template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
11286 template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a)
11290 template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs)
11294 template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a)
11298 template<int Offset>
11299 struct palign_impl<Offset,Packet1cd>
11301 static EIGEN_STRONG_INLINE void run(Packet1cd& , const Packet1cd& )
11305 template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
11307 EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
11308 { return padd(pmul(x,y),c); }
11309 EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
11311 #ifdef EIGEN_VECTORIZE_SSE3
11312 return internal::pmul(a, pconj(b));
11314 const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
11315 return Packet1cd(_mm_add_pd(_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), mask),
11316 _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
11317 vec2d_swizzle1(b.v, 1, 0))));
11321 template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
11323 EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
11324 { return padd(pmul(x,y),c); }
11325 EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
11327 #ifdef EIGEN_VECTORIZE_SSE3
11328 return internal::pmul(pconj(a), b);
11330 const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
11331 return Packet1cd(_mm_add_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v),
11332 _mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
11333 vec2d_swizzle1(b.v, 1, 0)), mask)));
11337 template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
11339 EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
11340 { return padd(pmul(x,y),c); }
11341 EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
11343 #ifdef EIGEN_VECTORIZE_SSE3
11344 return pconj(internal::pmul(a, b));
11346 const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
11347 return Packet1cd(_mm_sub_pd(_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), mask),
11348 _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
11349 vec2d_swizzle1(b.v, 1, 0))));
11353 template<> struct conj_helper<Packet2d, Packet1cd, false,false>
11355 EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet2d& x, const Packet1cd& y, const Packet1cd& c) const
11356 { return padd(c, pmul(x,y)); }
11357 EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const
11358 { return Packet1cd(Eigen::internal::pmul<Packet2d>(x, y.v)); }
11360 template<> struct conj_helper<Packet1cd, Packet2d, false,false>
11362 EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet2d& y, const Packet1cd& c) const
11363 { return padd(c, pmul(x,y)); }
11364 EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const
11365 { return Packet1cd(Eigen::internal::pmul<Packet2d>(x.v, y)); }
11367 template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
11369 Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
11370 __m128d s = _mm_mul_pd(b.v,b.v);
11371 return Packet1cd(_mm_div_pd(res.v, _mm_add_pd(s,_mm_shuffle_pd(s, s, 0x1))));
11373 EIGEN_STRONG_INLINE Packet1cd pcplxflip(const Packet1cd& x)
11375 return Packet1cd(preverse(Packet2d(x.v)));
11377 EIGEN_DEVICE_FUNC inline void
11378 ptranspose(PacketBlock<Packet2cf,2>& kernel) {
11379 __m128d w1 = _mm_castps_pd(kernel.packet[0].v);
11380 __m128d w2 = _mm_castps_pd(kernel.packet[1].v);
11381 __m128 tmp = _mm_castpd_ps(_mm_unpackhi_pd(w1, w2));
11382 kernel.packet[0].v = _mm_castpd_ps(_mm_unpacklo_pd(w1, w2));
11383 kernel.packet[1].v = tmp;
11385 template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) {
11386 __m128d result = pblend<Packet2d>(ifPacket, _mm_castps_pd(thenPacket.v), _mm_castps_pd(elsePacket.v));
11387 return Packet2cf(_mm_castpd_ps(result));
11389 template<> EIGEN_STRONG_INLINE Packet2cf pinsertfirst(const Packet2cf& a, std::complex<float> b)
11391 return Packet2cf(_mm_loadl_pi(a.v, reinterpret_cast<const __m64*>(&b)));
11393 template<> EIGEN_STRONG_INLINE Packet1cd pinsertfirst(const Packet1cd&, std::complex<double> b)
11395 return pset1<Packet1cd>(b);
11397 template<> EIGEN_STRONG_INLINE Packet2cf pinsertlast(const Packet2cf& a, std::complex<float> b)
11399 return Packet2cf(_mm_loadh_pi(a.v, reinterpret_cast<const __m64*>(&b)));
11401 template<> EIGEN_STRONG_INLINE Packet1cd pinsertlast(const Packet1cd&, std::complex<double> b)
11403 return pset1<Packet1cd>(b);
11408 // end #include "src/Core/arch/SSE/Complex.h"
11409 // #include "src/Core/arch/SSE/TypeCasting.h"
11410 #ifndef EIGEN_TYPE_CASTING_SSE_H
11411 #define EIGEN_TYPE_CASTING_SSE_H
11413 namespace internal {
11415 struct type_casting_traits<float, int> {
11417 VectorizedCast = 1,
11422 template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a) {
11423 return _mm_cvttps_epi32(a);
11426 struct type_casting_traits<int, float> {
11428 VectorizedCast = 1,
11433 template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) {
11434 return _mm_cvtepi32_ps(a);
11437 struct type_casting_traits<double, float> {
11439 VectorizedCast = 1,
11444 template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet2d, Packet4f>(const Packet2d& a, const Packet2d& b) {
11445 return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6));
11448 struct type_casting_traits<float, double> {
11450 VectorizedCast = 1,
11455 template<> EIGEN_STRONG_INLINE Packet2d pcast<Packet4f, Packet2d>(const Packet4f& a) {
11456 return _mm_cvtps_pd(a);
11461 // end #include "src/Core/arch/SSE/TypeCasting.h"
11462 #elif defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX)
11463 // #include "src/Core/arch/AltiVec/PacketMath.h"
11464 #ifndef EIGEN_PACKET_MATH_ALTIVEC_H
11465 #define EIGEN_PACKET_MATH_ALTIVEC_H
11467 namespace internal {
11468 #ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
11469 #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 4
11471 #ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
11472 #define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
11474 #ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
11475 #define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
11477 #ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
11478 #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
11480 typedef __vector float Packet4f;
11481 typedef __vector int Packet4i;
11482 typedef __vector unsigned int Packet4ui;
11483 typedef __vector __bool int Packet4bi;
11484 typedef __vector short int Packet8i;
11485 typedef __vector unsigned char Packet16uc;
11486 #define _EIGEN_DECLARE_CONST_FAST_Packet4f(NAME,X) \
11487 Packet4f p4f_##NAME = reinterpret_cast<Packet4f>(vec_splat_s32(X))
11488 #define _EIGEN_DECLARE_CONST_FAST_Packet4i(NAME,X) \
11489 Packet4i p4i_##NAME = vec_splat_s32(X)
11490 #define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
11491 Packet4f p4f_##NAME = pset1<Packet4f>(X)
11492 #define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
11493 Packet4i p4i_##NAME = pset1<Packet4i>(X)
11494 #define _EIGEN_DECLARE_CONST_Packet2d(NAME,X) \
11495 Packet2d p2d_##NAME = pset1<Packet2d>(X)
11496 #define _EIGEN_DECLARE_CONST_Packet2l(NAME,X) \
11497 Packet2l p2l_##NAME = pset1<Packet2l>(X)
11498 #define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
11499 const Packet4f p4f_##NAME = reinterpret_cast<Packet4f>(pset1<Packet4i>(X))
11501 #define DST_CTRL(size, count, stride) (((size) << 24) | ((count) << 16) | (stride))
11502 static _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0);
11503 static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0);
11504 static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE,1);
11505 static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS16,-16);
11506 static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1,-1);
11507 static Packet4f p4f_MZERO = (Packet4f) vec_sl((Packet4ui)p4i_MINUS1, (Packet4ui)p4i_MINUS1);
11509 static Packet4f p4f_ONE = vec_ctf(p4i_ONE, 0);
11511 static Packet4f p4f_COUNTDOWN = { 0.0, 1.0, 2.0, 3.0 };
11512 static Packet4i p4i_COUNTDOWN = { 0, 1, 2, 3 };
11513 static Packet16uc p16uc_REVERSE32 = { 12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3 };
11514 static Packet16uc p16uc_DUPLICATE32_HI = { 0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7 };
11516 #define _EIGEN_MASK_ALIGNMENT 0xfffffffffffffff0
11518 #define _EIGEN_MASK_ALIGNMENT 0xfffffff0
11520 #define _EIGEN_ALIGNED_PTR(x) ((std::ptrdiff_t)(x) & _EIGEN_MASK_ALIGNMENT)
11522 static Packet16uc p16uc_FORWARD = vec_lvsl(0, (float*)0);
11524 static Packet16uc p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
11526 static Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);
11527 static Packet16uc p16uc_PSET32_WEVEN = vec_sld(p16uc_DUPLICATE32_HI, (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);
11528 static Packet16uc p16uc_HALF64_0_16 = vec_sld((Packet16uc)p4i_ZERO, vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 3), 8);
11530 static Packet16uc p16uc_FORWARD = p16uc_REVERSE32;
11531 static Packet16uc p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
11532 static Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 1), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);
11533 static Packet16uc p16uc_PSET32_WEVEN = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);
11534 static Packet16uc p16uc_HALF64_0_16 = vec_sld(vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 0), (Packet16uc)p4i_ZERO, 8);
11536 static Packet16uc p16uc_PSET64_HI = (Packet16uc) vec_mergeh((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN);
11537 static Packet16uc p16uc_PSET64_LO = (Packet16uc) vec_mergel((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN);
11538 static Packet16uc p16uc_TRANSPOSE64_HI = p16uc_PSET64_HI + p16uc_HALF64_0_16;
11539 static Packet16uc p16uc_TRANSPOSE64_LO = p16uc_PSET64_LO + p16uc_HALF64_0_16;
11540 static Packet16uc p16uc_COMPLEX32_REV = vec_sld(p16uc_REVERSE32, p16uc_REVERSE32, 8);
11542 static Packet16uc p16uc_COMPLEX32_REV2 = vec_sld(p16uc_FORWARD, p16uc_FORWARD, 8);
11544 static Packet16uc p16uc_COMPLEX32_REV2 = vec_sld(p16uc_PSET64_HI, p16uc_PSET64_LO, 8);
11546 #if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC
11547 #define EIGEN_PPC_PREFETCH(ADDR) __builtin_prefetch(ADDR);
11549 #define EIGEN_PPC_PREFETCH(ADDR) asm( " dcbt [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" );
11551 template<> struct packet_traits<float> : default_packet_traits
11553 typedef Packet4f type;
11554 typedef Packet4f half;
11557 AlignedOnScalar = 1,
11573 #if !EIGEN_COMP_CLANG
11589 template<> struct packet_traits<int> : default_packet_traits
11591 typedef Packet4i type;
11592 typedef Packet4i half;
11595 AlignedOnScalar = 1,
11605 template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; };
11606 template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
11607 inline std::ostream & operator <<(std::ostream & s, const Packet16uc & v)
11611 unsigned char n[16];
11614 for (int i=0; i< 16; i++)
11615 s << (int)vt.n[i] << ", ";
11618 inline std::ostream & operator <<(std::ostream & s, const Packet4f & v)
11625 s << vt.n[0] << ", " << vt.n[1] << ", " << vt.n[2] << ", " << vt.n[3];
11628 inline std::ostream & operator <<(std::ostream & s, const Packet4i & v)
11635 s << vt.n[0] << ", " << vt.n[1] << ", " << vt.n[2] << ", " << vt.n[3];
11638 inline std::ostream & operator <<(std::ostream & s, const Packet4ui & v)
11645 s << vt.n[0] << ", " << vt.n[1] << ", " << vt.n[2] << ", " << vt.n[3];
11648 template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from)
11650 EIGEN_DEBUG_ALIGNED_LOAD
11652 return vec_vsx_ld(0, from);
11654 return vec_ld(0, from);
11657 template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from)
11659 EIGEN_DEBUG_ALIGNED_LOAD
11661 return vec_vsx_ld(0, from);
11663 return vec_ld(0, from);
11666 template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from)
11668 EIGEN_DEBUG_ALIGNED_STORE
11670 vec_vsx_st(from, 0, to);
11672 vec_st(from, 0, to);
11675 template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from)
11677 EIGEN_DEBUG_ALIGNED_STORE
11679 vec_vsx_st(from, 0, to);
11681 vec_st(from, 0, to);
11684 template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) {
11685 Packet4f v = {from, from, from, from};
11688 template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) {
11689 Packet4i v = {from, from, from, from};
11692 template<> EIGEN_STRONG_INLINE void
11693 pbroadcast4<Packet4f>(const float *a,
11694 Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
11696 a3 = pload<Packet4f>(a);
11697 a0 = vec_splat(a3, 0);
11698 a1 = vec_splat(a3, 1);
11699 a2 = vec_splat(a3, 2);
11700 a3 = vec_splat(a3, 3);
11702 template<> EIGEN_STRONG_INLINE void
11703 pbroadcast4<Packet4i>(const int *a,
11704 Packet4i& a0, Packet4i& a1, Packet4i& a2, Packet4i& a3)
11706 a3 = pload<Packet4i>(a);
11707 a0 = vec_splat(a3, 0);
11708 a1 = vec_splat(a3, 1);
11709 a2 = vec_splat(a3, 2);
11710 a3 = vec_splat(a3, 3);
11712 template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)
11714 float EIGEN_ALIGN16 af[4];
11715 af[0] = from[0*stride];
11716 af[1] = from[1*stride];
11717 af[2] = from[2*stride];
11718 af[3] = from[3*stride];
11719 return pload<Packet4f>(af);
11721 template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride)
11723 int EIGEN_ALIGN16 ai[4];
11724 ai[0] = from[0*stride];
11725 ai[1] = from[1*stride];
11726 ai[2] = from[2*stride];
11727 ai[3] = from[3*stride];
11728 return pload<Packet4i>(ai);
11730 template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride)
11732 float EIGEN_ALIGN16 af[4];
11733 pstore<float>(af, from);
11734 to[0*stride] = af[0];
11735 to[1*stride] = af[1];
11736 to[2*stride] = af[2];
11737 to[3*stride] = af[3];
11739 template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride)
11741 int EIGEN_ALIGN16 ai[4];
11742 pstore<int>((int *)ai, from);
11743 to[0*stride] = ai[0];
11744 to[1*stride] = ai[1];
11745 to[2*stride] = ai[2];
11746 to[3*stride] = ai[3];
11748 template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) { return pset1<Packet4f>(a) + p4f_COUNTDOWN; }
11749 template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) { return pset1<Packet4i>(a) + p4i_COUNTDOWN; }
11750 template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return a + b; }
11751 template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return a + b; }
11752 template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return a - b; }
11753 template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return a - b; }
11754 template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) { return p4f_ZERO - a; }
11755 template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return p4i_ZERO - a; }
11756 template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
11757 template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
11758 template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_madd(a,b, p4f_MZERO); }
11759 template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { return a * b; }
11760 template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b)
11763 Packet4f t, y_0, y_1;
11765 t = vec_nmsub(y_0, b, p4f_ONE);
11766 y_1 = vec_madd(y_0, t, y_0);
11767 return vec_madd(a, y_1, p4f_MZERO);
11769 return vec_div(a, b);
11772 template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& , const Packet4i& )
11773 { eigen_assert(false && "packet integer division are not supported by AltiVec");
11774 return pset1<Packet4i>(0);
11776 template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_madd(a,b,c); }
11777 template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return a*b + c; }
11778 template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_min(a, b); }
11779 template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); }
11780 template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_max(a, b); }
11781 template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); }
11782 template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, b); }
11783 template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, b); }
11784 template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_or(a, b); }
11785 template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_or(a, b); }
11786 template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_xor(a, b); }
11787 template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_xor(a, b); }
11788 template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, vec_nor(b, b)); }
11789 template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, vec_nor(b, b)); }
11790 template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a) { return vec_round(a); }
11791 template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a) { return vec_ceil(a); }
11792 template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a) { return vec_floor(a); }
11794 template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
11796 EIGEN_DEBUG_ALIGNED_LOAD
11797 Packet16uc MSQ, LSQ;
11799 MSQ = vec_ld(0, (unsigned char *)from);
11800 LSQ = vec_ld(15, (unsigned char *)from);
11801 mask = vec_lvsl(0, from);
11802 return (Packet4f) vec_perm(MSQ, LSQ, mask);
11804 template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
11806 EIGEN_DEBUG_ALIGNED_LOAD
11807 Packet16uc MSQ, LSQ;
11809 MSQ = vec_ld(0, (unsigned char *)from);
11810 LSQ = vec_ld(15, (unsigned char *)from);
11811 mask = vec_lvsl(0, from);
11812 return (Packet4i) vec_perm(MSQ, LSQ, mask);
11815 template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
11817 EIGEN_DEBUG_UNALIGNED_LOAD
11818 return (Packet4i) vec_vsx_ld((long)from & 15, (const int*) _EIGEN_ALIGNED_PTR(from));
11820 template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
11822 EIGEN_DEBUG_UNALIGNED_LOAD
11823 return (Packet4f) vec_vsx_ld((long)from & 15, (const float*) _EIGEN_ALIGNED_PTR(from));
11826 template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
11829 if((std::ptrdiff_t(from) % 16) == 0) p = pload<Packet4f>(from);
11830 else p = ploadu<Packet4f>(from);
11831 return vec_perm(p, p, p16uc_DUPLICATE32_HI);
11833 template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
11836 if((std::ptrdiff_t(from) % 16) == 0) p = pload<Packet4i>(from);
11837 else p = ploadu<Packet4i>(from);
11838 return vec_perm(p, p, p16uc_DUPLICATE32_HI);
11841 template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from)
11843 EIGEN_DEBUG_UNALIGNED_STORE
11844 Packet16uc MSQ, LSQ, edges;
11845 Packet16uc edgeAlign, align;
11846 MSQ = vec_ld(0, (unsigned char *)to);
11847 LSQ = vec_ld(15, (unsigned char *)to);
11848 edgeAlign = vec_lvsl(0, to);
11849 edges=vec_perm(LSQ,MSQ,edgeAlign);
11850 align = vec_lvsr( 0, to );
11851 MSQ = vec_perm(edges,(Packet16uc)from,align);
11852 LSQ = vec_perm((Packet16uc)from,edges,align);
11853 vec_st( LSQ, 15, (unsigned char *)to );
11854 vec_st( MSQ, 0, (unsigned char *)to );
11856 template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from)
11858 EIGEN_DEBUG_UNALIGNED_STORE
11859 Packet16uc MSQ, LSQ, edges;
11860 Packet16uc edgeAlign, align;
11861 MSQ = vec_ld(0, (unsigned char *)to);
11862 LSQ = vec_ld(15, (unsigned char *)to);
11863 edgeAlign = vec_lvsl(0, to);
11864 edges=vec_perm(LSQ, MSQ, edgeAlign);
11865 align = vec_lvsr( 0, to );
11866 MSQ = vec_perm(edges, (Packet16uc) from, align);
11867 LSQ = vec_perm((Packet16uc) from, edges, align);
11868 vec_st( LSQ, 15, (unsigned char *)to );
11869 vec_st( MSQ, 0, (unsigned char *)to );
11872 template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from)
11874 EIGEN_DEBUG_ALIGNED_STORE
11875 vec_vsx_st(from, (long)to & 15, (int*) _EIGEN_ALIGNED_PTR(to));
11877 template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from)
11879 EIGEN_DEBUG_ALIGNED_STORE
11880 vec_vsx_st(from, (long)to & 15, (float*) _EIGEN_ALIGNED_PTR(to));
11883 template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { EIGEN_PPC_PREFETCH(addr); }
11884 template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { EIGEN_PPC_PREFETCH(addr); }
11885 template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x; vec_ste(a, 0, &x); return x; }
11886 template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x; vec_ste(a, 0, &x); return x; }
11887 template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a)
11889 return reinterpret_cast<Packet4f>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE32));
11891 template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a)
11893 return reinterpret_cast<Packet4i>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE32)); }
11894 template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vec_abs(a); }
11895 template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vec_abs(a); }
11896 template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
11899 b = vec_sld(a, a, 8);
11901 b = vec_sld(sum, sum, 4);
11903 return pfirst(sum);
11905 template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
11907 Packet4f v[4], sum[4];
11908 v[0] = vec_mergeh(vecs[0], vecs[2]);
11909 v[1] = vec_mergel(vecs[0], vecs[2]);
11910 v[2] = vec_mergeh(vecs[1], vecs[3]);
11911 v[3] = vec_mergel(vecs[1], vecs[3]);
11912 sum[0] = vec_mergeh(v[0], v[2]);
11913 sum[1] = vec_mergel(v[0], v[2]);
11914 sum[2] = vec_mergeh(v[1], v[3]);
11915 sum[3] = vec_mergel(v[1], v[3]);
11916 sum[0] = sum[0] + sum[1];
11917 sum[1] = sum[2] + sum[3];
11918 sum[0] = sum[0] + sum[1];
11921 template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
11924 sum = vec_sums(a, p4i_ZERO);
11926 sum = vec_sld(sum, p4i_ZERO, 12);
11928 sum = vec_sld(p4i_ZERO, sum, 4);
11930 return pfirst(sum);
11932 template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
11934 Packet4i v[4], sum[4];
11935 v[0] = vec_mergeh(vecs[0], vecs[2]);
11936 v[1] = vec_mergel(vecs[0], vecs[2]);
11937 v[2] = vec_mergeh(vecs[1], vecs[3]);
11938 v[3] = vec_mergel(vecs[1], vecs[3]);
11939 sum[0] = vec_mergeh(v[0], v[2]);
11940 sum[1] = vec_mergel(v[0], v[2]);
11941 sum[2] = vec_mergeh(v[1], v[3]);
11942 sum[3] = vec_mergel(v[1], v[3]);
11943 sum[0] = sum[0] + sum[1];
11944 sum[1] = sum[2] + sum[3];
11945 sum[0] = sum[0] + sum[1];
11948 template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
11951 prod = pmul(a, vec_sld(a, a, 8));
11952 return pfirst(pmul(prod, vec_sld(prod, prod, 4)));
11954 template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
11956 EIGEN_ALIGN16 int aux[4];
11958 return aux[0] * aux[1] * aux[2] * aux[3];
11960 template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
11963 b = vec_min(a, vec_sld(a, a, 8));
11964 res = vec_min(b, vec_sld(b, b, 4));
11965 return pfirst(res);
11967 template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
11970 b = vec_min(a, vec_sld(a, a, 8));
11971 res = vec_min(b, vec_sld(b, b, 4));
11972 return pfirst(res);
11974 template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
11977 b = vec_max(a, vec_sld(a, a, 8));
11978 res = vec_max(b, vec_sld(b, b, 4));
11979 return pfirst(res);
11981 template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
11984 b = vec_max(a, vec_sld(a, a, 8));
11985 res = vec_max(b, vec_sld(b, b, 4));
11986 return pfirst(res);
11988 template<int Offset>
11989 struct palign_impl<Offset,Packet4f>
11991 static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
11994 switch (Offset % 4) {
11996 first = vec_sld(first, second, 4); break;
11998 first = vec_sld(first, second, 8); break;
12000 first = vec_sld(first, second, 12); break;
12003 switch (Offset % 4) {
12005 first = vec_sld(second, first, 12); break;
12007 first = vec_sld(second, first, 8); break;
12009 first = vec_sld(second, first, 4); break;
12014 template<int Offset>
12015 struct palign_impl<Offset,Packet4i>
12017 static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
12020 switch (Offset % 4) {
12022 first = vec_sld(first, second, 4); break;
12024 first = vec_sld(first, second, 8); break;
12026 first = vec_sld(first, second, 12); break;
12029 switch (Offset % 4) {
12031 first = vec_sld(second, first, 12); break;
12033 first = vec_sld(second, first, 8); break;
12035 first = vec_sld(second, first, 4); break;
12040 EIGEN_DEVICE_FUNC inline void
12041 ptranspose(PacketBlock<Packet4f,4>& kernel) {
12042 Packet4f t0, t1, t2, t3;
12043 t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);
12044 t1 = vec_mergel(kernel.packet[0], kernel.packet[2]);
12045 t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]);
12046 t3 = vec_mergel(kernel.packet[1], kernel.packet[3]);
12047 kernel.packet[0] = vec_mergeh(t0, t2);
12048 kernel.packet[1] = vec_mergel(t0, t2);
12049 kernel.packet[2] = vec_mergeh(t1, t3);
12050 kernel.packet[3] = vec_mergel(t1, t3);
12052 EIGEN_DEVICE_FUNC inline void
12053 ptranspose(PacketBlock<Packet4i,4>& kernel) {
12054 Packet4i t0, t1, t2, t3;
12055 t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);
12056 t1 = vec_mergel(kernel.packet[0], kernel.packet[2]);
12057 t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]);
12058 t3 = vec_mergel(kernel.packet[1], kernel.packet[3]);
12059 kernel.packet[0] = vec_mergeh(t0, t2);
12060 kernel.packet[1] = vec_mergel(t0, t2);
12061 kernel.packet[2] = vec_mergeh(t1, t3);
12062 kernel.packet[3] = vec_mergel(t1, t3);
12064 template<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) {
12065 Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3] };
12066 Packet4ui mask = reinterpret_cast<Packet4ui>(vec_cmpeq(reinterpret_cast<Packet4ui>(select), reinterpret_cast<Packet4ui>(p4i_ONE)));
12067 return vec_sel(elsePacket, thenPacket, mask);
12069 template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket) {
12070 Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3] };
12071 Packet4ui mask = reinterpret_cast<Packet4ui>(vec_cmpeq(reinterpret_cast<Packet4ui>(select), reinterpret_cast<Packet4ui>(p4i_ONE)));
12072 return vec_sel(elsePacket, thenPacket, mask);
12075 typedef __vector double Packet2d;
12076 typedef __vector unsigned long long Packet2ul;
12077 typedef __vector long long Packet2l;
12078 #if EIGEN_COMP_CLANG
12079 typedef Packet2ul Packet2bl;
12081 typedef __vector __bool long Packet2bl;
12083 static Packet2l p2l_ONE = { 1, 1 };
12084 static Packet2l p2l_ZERO = reinterpret_cast<Packet2l>(p4i_ZERO);
12085 static Packet2d p2d_ONE = { 1.0, 1.0 };
12086 static Packet2d p2d_ZERO = reinterpret_cast<Packet2d>(p4f_ZERO);
12087 static Packet2d p2d_MZERO = { -0.0, -0.0 };
12089 static Packet2d p2d_COUNTDOWN = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(p2d_ZERO), reinterpret_cast<Packet4f>(p2d_ONE), 8));
12091 static Packet2d p2d_COUNTDOWN = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(p2d_ONE), reinterpret_cast<Packet4f>(p2d_ZERO), 8));
12093 template<int index> Packet2d vec_splat_dbl(Packet2d& a);
12094 template<> EIGEN_STRONG_INLINE Packet2d vec_splat_dbl<0>(Packet2d& a)
12096 return reinterpret_cast<Packet2d>(vec_perm(a, a, p16uc_PSET64_HI));
12098 template<> EIGEN_STRONG_INLINE Packet2d vec_splat_dbl<1>(Packet2d& a)
12100 return reinterpret_cast<Packet2d>(vec_perm(a, a, p16uc_PSET64_LO));
12102 template<> struct packet_traits<double> : default_packet_traits
12104 typedef Packet2d type;
12105 typedef Packet2d half;
12108 AlignedOnScalar = 1,
12131 template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
12132 inline std::ostream & operator <<(std::ostream & s, const Packet2l & v)
12139 s << vt.n[0] << ", " << vt.n[1];
12142 inline std::ostream & operator <<(std::ostream & s, const Packet2d & v)
12149 s << vt.n[0] << ", " << vt.n[1];
12152 template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from)
12154 EIGEN_DEBUG_ALIGNED_LOAD
12156 return vec_vsx_ld(0, from);
12158 return vec_ld(0, from);
12161 template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from)
12163 EIGEN_DEBUG_ALIGNED_STORE
12165 vec_vsx_st(from, 0, to);
12167 vec_st(from, 0, to);
12170 template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) {
12171 Packet2d v = {from, from};
12174 template<> EIGEN_STRONG_INLINE void
12175 pbroadcast4<Packet2d>(const double *a,
12176 Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)
12178 a1 = pload<Packet2d>(a);
12179 a0 = vec_splat_dbl<0>(a1);
12180 a1 = vec_splat_dbl<1>(a1);
12181 a3 = pload<Packet2d>(a+2);
12182 a2 = vec_splat_dbl<0>(a3);
12183 a3 = vec_splat_dbl<1>(a3);
12185 template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride)
12187 double EIGEN_ALIGN16 af[2];
12188 af[0] = from[0*stride];
12189 af[1] = from[1*stride];
12190 return pload<Packet2d>(af);
12192 template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride)
12194 double EIGEN_ALIGN16 af[2];
12195 pstore<double>(af, from);
12196 to[0*stride] = af[0];
12197 to[1*stride] = af[1];
12199 template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) { return pset1<Packet2d>(a) + p2d_COUNTDOWN; }
12200 template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return a + b; }
12201 template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return a - b; }
12202 template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return p2d_ZERO - a; }
12203 template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
12204 template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_madd(a,b,p2d_MZERO); }
12205 template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_div(a,b); }
12206 template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vec_madd(a, b, c); }
12207 template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_min(a, b); }
12208 template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_max(a, b); }
12209 template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, b); }
12210 template<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_or(a, b); }
12211 template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_xor(a, b); }
12212 template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, vec_nor(b, b)); }
12213 template<> EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) { return vec_round(a); }
12214 template<> EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a) { return vec_ceil(a); }
12215 template<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) { return vec_floor(a); }
12216 template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from)
12218 EIGEN_DEBUG_ALIGNED_LOAD
12219 return (Packet2d) vec_vsx_ld((long)from & 15, (const double*) _EIGEN_ALIGNED_PTR(from));
12221 template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
12224 if((std::ptrdiff_t(from) % 16) == 0) p = pload<Packet2d>(from);
12225 else p = ploadu<Packet2d>(from);
12226 return vec_splat_dbl<0>(p);
12228 template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from)
12230 EIGEN_DEBUG_ALIGNED_STORE
12231 vec_vsx_st((Packet4f)from, (long)to & 15, (float*) _EIGEN_ALIGNED_PTR(to));
12233 template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { EIGEN_PPC_PREFETCH(addr); }
12234 template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double EIGEN_ALIGN16 x[2]; pstore<double>(x, a); return x[0]; }
12235 template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a)
12237 return reinterpret_cast<Packet2d>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE64));
12239 template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vec_abs(a); }
12240 template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
12243 b = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(a), reinterpret_cast<Packet4f>(a), 8));
12245 return pfirst<Packet2d>(sum);
12247 template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
12249 Packet2d v[2], sum;
12250 v[0] = vecs[0] + reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(vecs[0]), reinterpret_cast<Packet4f>(vecs[0]), 8));
12251 v[1] = vecs[1] + reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(vecs[1]), reinterpret_cast<Packet4f>(vecs[1]), 8));
12253 sum = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(v[0]), reinterpret_cast<Packet4f>(v[1]), 8));
12255 sum = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(v[1]), reinterpret_cast<Packet4f>(v[0]), 8));
12259 template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)
12261 return pfirst(pmul(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(a), reinterpret_cast<Packet4ui>(a), 8))));
12263 template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)
12265 return pfirst(pmin(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(a), reinterpret_cast<Packet4ui>(a), 8))));
12267 template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
12269 return pfirst(pmax(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(a), reinterpret_cast<Packet4ui>(a), 8))));
12271 template<int Offset>
12272 struct palign_impl<Offset,Packet2d>
12274 static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
12278 first = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(first), reinterpret_cast<Packet4ui>(second), 8));
12280 first = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(second), reinterpret_cast<Packet4ui>(first), 8));
12284 EIGEN_DEVICE_FUNC inline void
12285 ptranspose(PacketBlock<Packet2d,2>& kernel) {
12287 t0 = vec_perm(kernel.packet[0], kernel.packet[1], p16uc_TRANSPOSE64_HI);
12288 t1 = vec_perm(kernel.packet[0], kernel.packet[1], p16uc_TRANSPOSE64_LO);
12289 kernel.packet[0] = t0;
12290 kernel.packet[1] = t1;
12292 template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) {
12293 Packet2l select = { ifPacket.select[0], ifPacket.select[1] };
12294 Packet2bl mask = vec_cmpeq(reinterpret_cast<Packet2d>(select), reinterpret_cast<Packet2d>(p2l_ONE));
12295 return vec_sel(elsePacket, thenPacket, mask);
12301 // end #include "src/Core/arch/AltiVec/PacketMath.h"
12302 // #include "src/Core/arch/AltiVec/MathFunctions.h"
12303 #ifndef EIGEN_MATH_FUNCTIONS_ALTIVEC_H
12304 #define EIGEN_MATH_FUNCTIONS_ALTIVEC_H
12306 namespace internal {
12307 static _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
12308 static _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
12309 static _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
12310 static _EIGEN_DECLARE_CONST_Packet4i(23, 23);
12311 static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000);
12312 static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos, 0x00800000);
12313 static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_inf, 0xff800000);
12314 static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_nan, 0xffffffff);
12315 static _EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524f);
12316 static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292E-2f);
12317 static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, - 1.1514610310E-1f);
12318 static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740E-1f);
12319 static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, - 1.2420140846E-1f);
12320 static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, + 1.4249322787E-1f);
12321 static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, - 1.6668057665E-1f);
12322 static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, + 2.0000714765E-1f);
12323 static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, - 2.4999993993E-1f);
12324 static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, + 3.3333331174E-1f);
12325 static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4f);
12326 static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375f);
12327 static _EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647950f);
12328 static _EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f);
12329 static _EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
12330 static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f);
12331 static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f);
12332 static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4f);
12333 static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3f);
12334 static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3f);
12335 static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2f);
12336 static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f);
12337 static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f);
12339 static _EIGEN_DECLARE_CONST_Packet2d(1 , 1.0);
12340 static _EIGEN_DECLARE_CONST_Packet2d(2 , 2.0);
12341 static _EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
12342 static _EIGEN_DECLARE_CONST_Packet2d(exp_hi, 709.437);
12343 static _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303);
12344 static _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
12345 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
12346 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
12347 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
12348 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
12349 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
12350 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
12351 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
12352 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
12353 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
12354 #ifdef __POWER8_VECTOR__
12355 static Packet2l p2l_1023 = { 1023, 1023 };
12356 static Packet2ul p2ul_52 = { 52, 52 };
12359 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
12360 Packet4f plog<Packet4f>(const Packet4f& _x)
12364 Packet4ui isvalid_mask = reinterpret_cast<Packet4ui>(vec_cmpge(x, p4f_ZERO));
12365 Packet4ui iszero_mask = reinterpret_cast<Packet4ui>(vec_cmpeq(x, p4f_ZERO));
12366 x = pmax(x, p4f_min_norm_pos);
12367 emm0 = vec_sr(reinterpret_cast<Packet4i>(x),
12368 reinterpret_cast<Packet4ui>(p4i_23));
12369 x = pand(x, p4f_inv_mant_mask);
12370 x = por(x, p4f_half);
12371 emm0 = psub(emm0, p4i_0x7f);
12372 Packet4f e = padd(vec_ctf(emm0, 0), p4f_1);
12373 Packet4f mask = reinterpret_cast<Packet4f>(vec_cmplt(x, p4f_cephes_SQRTHF));
12374 Packet4f tmp = pand(x, mask);
12375 x = psub(x, p4f_1);
12376 e = psub(e, pand(p4f_1, mask));
12378 Packet4f x2 = pmul(x,x);
12379 Packet4f x3 = pmul(x2,x);
12380 Packet4f y, y1, y2;
12381 y = pmadd(p4f_cephes_log_p0, x, p4f_cephes_log_p1);
12382 y1 = pmadd(p4f_cephes_log_p3, x, p4f_cephes_log_p4);
12383 y2 = pmadd(p4f_cephes_log_p6, x, p4f_cephes_log_p7);
12384 y = pmadd(y , x, p4f_cephes_log_p2);
12385 y1 = pmadd(y1, x, p4f_cephes_log_p5);
12386 y2 = pmadd(y2, x, p4f_cephes_log_p8);
12387 y = pmadd(y, x3, y1);
12388 y = pmadd(y, x3, y2);
12390 y1 = pmul(e, p4f_cephes_log_q1);
12391 tmp = pmul(x2, p4f_half);
12394 y2 = pmul(e, p4f_cephes_log_q2);
12397 x = vec_sel(x, p4f_minus_inf, iszero_mask);
12398 x = vec_sel(p4f_minus_nan, x, isvalid_mask);
12401 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
12402 Packet4f pexp<Packet4f>(const Packet4f& _x)
12407 x = pmax(pmin(x, p4f_exp_hi), p4f_exp_lo);
12408 fx = pmadd(x, p4f_cephes_LOG2EF, p4f_half);
12410 tmp = pmul(fx, p4f_cephes_exp_C1);
12411 Packet4f z = pmul(fx, p4f_cephes_exp_C2);
12415 Packet4f y = p4f_cephes_exp_p0;
12416 y = pmadd(y, x, p4f_cephes_exp_p1);
12417 y = pmadd(y, x, p4f_cephes_exp_p2);
12418 y = pmadd(y, x, p4f_cephes_exp_p3);
12419 y = pmadd(y, x, p4f_cephes_exp_p4);
12420 y = pmadd(y, x, p4f_cephes_exp_p5);
12421 y = pmadd(y, z, x);
12422 y = padd(y, p4f_1);
12423 emm0 = vec_cts(fx, 0);
12424 emm0 = vec_add(emm0, p4i_0x7f);
12425 emm0 = vec_sl(emm0, reinterpret_cast<Packet4ui>(p4i_23));
12426 Packet4ui isnumber_mask = reinterpret_cast<Packet4ui>(vec_cmpeq(_x, _x));
12427 return vec_sel(_x, pmax(pmul(y, reinterpret_cast<Packet4f>(emm0)), _x),
12430 #ifndef EIGEN_COMP_CLANG
12431 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
12432 Packet4f prsqrt<Packet4f>(const Packet4f& x)
12434 return vec_rsqrt(x);
12438 #ifndef EIGEN_COMP_CLANG
12439 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
12440 Packet2d prsqrt<Packet2d>(const Packet2d& x)
12442 return vec_rsqrt(x);
12445 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
12446 Packet4f psqrt<Packet4f>(const Packet4f& x)
12448 return vec_sqrt(x);
12450 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
12451 Packet2d psqrt<Packet2d>(const Packet2d& x)
12453 return vec_sqrt(x);
12455 static inline Packet2l ConvertToPacket2l(const Packet2d& x) {
12456 #if EIGEN_GNUC_AT_LEAST(5, 4) || \
12457 (EIGEN_GNUC_AT(6, 1) && __GNUC_PATCHLEVEL__ >= 1)
12458 return vec_cts(x, 0);
12461 memcpy(tmp, &x, sizeof(tmp));
12462 Packet2l l = { static_cast<long long>(tmp[0]),
12463 static_cast<long long>(tmp[1]) };
12467 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
12468 Packet2d pexp<Packet2d>(const Packet2d& _x)
12473 x = pmax(pmin(x, p2d_exp_hi), p2d_exp_lo);
12474 fx = pmadd(x, p2d_cephes_LOG2EF, p2d_half);
12476 tmp = pmul(fx, p2d_cephes_exp_C1);
12477 Packet2d z = pmul(fx, p2d_cephes_exp_C2);
12480 Packet2d x2 = pmul(x,x);
12481 Packet2d px = p2d_cephes_exp_p0;
12482 px = pmadd(px, x2, p2d_cephes_exp_p1);
12483 px = pmadd(px, x2, p2d_cephes_exp_p2);
12485 Packet2d qx = p2d_cephes_exp_q0;
12486 qx = pmadd(qx, x2, p2d_cephes_exp_q1);
12487 qx = pmadd(qx, x2, p2d_cephes_exp_q2);
12488 qx = pmadd(qx, x2, p2d_cephes_exp_q3);
12489 x = pdiv(px,psub(qx,px));
12490 x = pmadd(p2d_2,x,p2d_1);
12491 emm0 = ConvertToPacket2l(fx);
12492 #ifdef __POWER8_VECTOR__
12493 emm0 = vec_add(emm0, p2l_1023);
12494 emm0 = vec_sl(emm0, p2ul_52);
12496 _EIGEN_DECLARE_CONST_Packet4i(1023, 1023);
12497 _EIGEN_DECLARE_CONST_Packet4i(20, 20);
12498 Packet4i emm04i = reinterpret_cast<Packet4i>(emm0);
12499 emm04i = vec_add(emm04i, p4i_1023);
12500 emm04i = vec_sl(emm04i, reinterpret_cast<Packet4ui>(p4i_20));
12501 static const Packet16uc perm = {
12502 0x14, 0x15, 0x16, 0x17, 0x00, 0x01, 0x02, 0x03,
12503 0x1c, 0x1d, 0x1e, 0x1f, 0x08, 0x09, 0x0a, 0x0b };
12505 emm0 = reinterpret_cast<Packet2l>(vec_perm(p4i_ZERO, emm04i, perm));
12507 emm0 = reinterpret_cast<Packet2l>(vec_perm(emm04i, p4i_ZERO, perm));
12510 Packet2ul isnumber_mask = reinterpret_cast<Packet2ul>(vec_cmpeq(_x, _x));
12511 return vec_sel(_x, pmax(pmul(x, reinterpret_cast<Packet2d>(emm0)), _x),
12518 // end #include "src/Core/arch/AltiVec/MathFunctions.h"
12519 // #include "src/Core/arch/AltiVec/Complex.h"
12520 #ifndef EIGEN_COMPLEX32_ALTIVEC_H
12521 #define EIGEN_COMPLEX32_ALTIVEC_H
12523 namespace internal {
12524 static Packet4ui p4ui_CONJ_XOR = vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_MZERO);
12526 #if defined(_BIG_ENDIAN)
12527 static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2d_MZERO, (Packet4ui) p2l_ZERO, 8);
12528 static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_MZERO, 8);
12530 static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_MZERO, 8);
12531 static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2d_MZERO, (Packet4ui) p2l_ZERO, 8);
12536 EIGEN_STRONG_INLINE explicit Packet2cf() : v(p4f_ZERO) {}
12537 EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
12540 template<> struct packet_traits<std::complex<float> > : default_packet_traits
12542 typedef Packet2cf type;
12543 typedef Packet2cf half;
12546 AlignedOnScalar = 1,
12564 template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
12565 template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
12568 if((std::ptrdiff_t(&from) % 16) == 0)
12569 res.v = pload<Packet4f>((const float *)&from);
12571 res.v = ploadu<Packet4f>((const float *)&from);
12572 res.v = vec_perm(res.v, res.v, p16uc_PSET64_HI);
12575 template<> EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(const std::complex<float>* from) { return Packet2cf(pload<Packet4f>((const float *) from)); }
12576 template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { return Packet2cf(ploadu<Packet4f>((const float*) from)); }
12577 template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
12578 template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { pstore((float*)to, from.v); }
12579 template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { pstoreu((float*)to, from.v); }
12580 template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
12582 std::complex<float> EIGEN_ALIGN16 af[2];
12583 af[0] = from[0*stride];
12584 af[1] = from[1*stride];
12585 return pload<Packet2cf>(af);
12587 template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
12589 std::complex<float> EIGEN_ALIGN16 af[2];
12590 pstore<std::complex<float> >((std::complex<float> *) af, from);
12591 to[0*stride] = af[0];
12592 to[1*stride] = af[1];
12594 template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(a.v + b.v); }
12595 template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(a.v - b.v); }
12596 template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(a.v)); }
12597 template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { return Packet2cf(pxor<Packet4f>(a.v, reinterpret_cast<Packet4f>(p4ui_CONJ_XOR))); }
12598 template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
12601 v1 = vec_perm(a.v, a.v, p16uc_PSET32_WODD);
12602 v2 = vec_perm(a.v, a.v, p16uc_PSET32_WEVEN);
12603 v1 = vec_madd(v1, b.v, p4f_ZERO);
12604 v2 = vec_madd(v2, b.v, p4f_ZERO);
12605 v2 = reinterpret_cast<Packet4f>(pxor(v2, reinterpret_cast<Packet4f>(p4ui_CONJ_XOR)));
12606 v2 = vec_perm(v2, v2, p16uc_COMPLEX32_REV);
12607 return Packet2cf(padd<Packet4f>(v1, v2));
12609 template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pand<Packet4f>(a.v, b.v)); }
12610 template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(por<Packet4f>(a.v, b.v)); }
12611 template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pxor<Packet4f>(a.v, b.v)); }
12612 template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pandnot<Packet4f>(a.v, b.v)); }
12613 template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_PPC_PREFETCH(addr); }
12614 template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
12616 std::complex<float> EIGEN_ALIGN16 res[2];
12617 pstore((float *)&res, a.v);
12620 template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
12623 rev_a = vec_perm(a.v, a.v, p16uc_COMPLEX32_REV2);
12624 return Packet2cf(rev_a);
12626 template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
12629 b = vec_sld(a.v, a.v, 8);
12630 b = padd<Packet4f>(a.v, b);
12631 return pfirst<Packet2cf>(Packet2cf(b));
12633 template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
12637 b1 = vec_sld(vecs[0].v, vecs[1].v, 8);
12638 b2 = vec_sld(vecs[1].v, vecs[0].v, 8);
12640 b1 = vec_sld(vecs[1].v, vecs[0].v, 8);
12641 b2 = vec_sld(vecs[0].v, vecs[1].v, 8);
12643 b2 = vec_sld(b2, b2, 8);
12644 b2 = padd<Packet4f>(b1, b2);
12645 return Packet2cf(b2);
12647 template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
12651 b = vec_sld(a.v, a.v, 8);
12652 prod = pmul<Packet2cf>(a, Packet2cf(b));
12653 return pfirst<Packet2cf>(prod);
12655 template<int Offset>
12656 struct palign_impl<Offset,Packet2cf>
12658 static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
12663 first.v = vec_sld(first.v, second.v, 8);
12665 first.v = vec_sld(second.v, first.v, 8);
12670 template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
12672 EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
12673 { return padd(pmul(x,y),c); }
12674 EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
12676 return internal::pmul(a, pconj(b));
12679 template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
12681 EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
12682 { return padd(pmul(x,y),c); }
12683 EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
12685 return internal::pmul(pconj(a), b);
12688 template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
12690 EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
12691 { return padd(pmul(x,y),c); }
12692 EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
12694 return pconj(internal::pmul(a, b));
12697 template<> struct conj_helper<Packet4f, Packet2cf, false,false>
12699 EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet4f& x, const Packet2cf& y, const Packet2cf& c) const
12700 { return padd(c, pmul(x,y)); }
12701 EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const
12702 { return Packet2cf(internal::pmul<Packet4f>(x, y.v)); }
12704 template<> struct conj_helper<Packet2cf, Packet4f, false,false>
12706 EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet4f& y, const Packet2cf& c) const
12707 { return padd(c, pmul(x,y)); }
12708 EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const
12709 { return Packet2cf(internal::pmul<Packet4f>(x.v, y)); }
12711 template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
12713 Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a, b);
12714 Packet4f s = pmul<Packet4f>(b.v, b.v);
12715 return Packet2cf(pdiv(res.v, padd<Packet4f>(s, vec_perm(s, s, p16uc_COMPLEX32_REV))));
12717 template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& x)
12719 return Packet2cf(vec_perm(x.v, x.v, p16uc_COMPLEX32_REV));
12721 EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel)
12723 Packet4f tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI);
12724 kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO);
12725 kernel.packet[0].v = tmp;
12728 template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) {
12730 result.v = reinterpret_cast<Packet4f>(pblend<Packet2d>(ifPacket, reinterpret_cast<Packet2d>(thenPacket.v), reinterpret_cast<Packet2d>(elsePacket.v)));
12737 EIGEN_STRONG_INLINE Packet1cd() {}
12738 EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {}
12741 template<> struct packet_traits<std::complex<double> > : default_packet_traits
12743 typedef Packet1cd type;
12744 typedef Packet1cd half;
12747 AlignedOnScalar = 0,
12762 template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
12763 template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from) { return Packet1cd(pload<Packet2d>((const double*)from)); }
12764 template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { return Packet1cd(ploadu<Packet2d>((const double*)from)); }
12765 template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { pstore((double*)to, from.v); }
12766 template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { pstoreu((double*)to, from.v); }
12767 template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
12768 { return ploadu<Packet1cd>(&from); }
12769 template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride)
12771 std::complex<double> EIGEN_ALIGN16 af[2];
12772 af[0] = from[0*stride];
12773 af[1] = from[1*stride];
12774 return pload<Packet1cd>(af);
12776 template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index stride)
12778 std::complex<double> EIGEN_ALIGN16 af[2];
12779 pstore<std::complex<double> >(af, from);
12780 to[0*stride] = af[0];
12781 to[1*stride] = af[1];
12783 template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v + b.v); }
12784 template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v - b.v); }
12785 template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); }
12786 template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd(pxor(a.v, reinterpret_cast<Packet2d>(p2ul_CONJ_XOR2))); }
12787 template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
12789 Packet2d a_re, a_im, v1, v2;
12790 a_re = vec_perm(a.v, a.v, p16uc_PSET64_HI);
12791 a_im = vec_perm(a.v, a.v, p16uc_PSET64_LO);
12792 v1 = vec_madd(a_re, b.v, p2d_ZERO);
12793 v2 = vec_madd(a_im, b.v, p2d_ZERO);
12794 v2 = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(v2), reinterpret_cast<Packet4ui>(v2), 8));
12795 v2 = pxor(v2, reinterpret_cast<Packet2d>(p2ul_CONJ_XOR1));
12796 return Packet1cd(padd<Packet2d>(v1, v2));
12798 template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pand(a.v,b.v)); }
12799 template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(por(a.v,b.v)); }
12800 template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pxor(a.v,b.v)); }
12801 template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pandnot(a.v, b.v)); }
12802 template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
12803 template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_PPC_PREFETCH(addr); }
12804 template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
12806 std::complex<double> EIGEN_ALIGN16 res[2];
12807 pstore<std::complex<double> >(res, a);
12810 template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
12811 template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
12812 template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs) { return vecs[0]; }
12813 template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
12814 template<int Offset>
12815 struct palign_impl<Offset,Packet1cd>
12817 static EIGEN_STRONG_INLINE void run(Packet1cd& , const Packet1cd& )
12821 template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
12823 EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
12824 { return padd(pmul(x,y),c); }
12825 EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
12827 return internal::pmul(a, pconj(b));
12830 template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
12832 EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
12833 { return padd(pmul(x,y),c); }
12834 EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
12836 return internal::pmul(pconj(a), b);
12839 template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
12841 EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
12842 { return padd(pmul(x,y),c); }
12843 EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
12845 return pconj(internal::pmul(a, b));
12848 template<> struct conj_helper<Packet2d, Packet1cd, false,false>
12850 EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet2d& x, const Packet1cd& y, const Packet1cd& c) const
12851 { return padd(c, pmul(x,y)); }
12852 EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const
12853 { return Packet1cd(internal::pmul<Packet2d>(x, y.v)); }
12855 template<> struct conj_helper<Packet1cd, Packet2d, false,false>
12857 EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet2d& y, const Packet1cd& c) const
12858 { return padd(c, pmul(x,y)); }
12859 EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const
12860 { return Packet1cd(internal::pmul<Packet2d>(x.v, y)); }
12862 template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
12864 Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
12865 Packet2d s = pmul<Packet2d>(b.v, b.v);
12866 return Packet1cd(pdiv(res.v, padd<Packet2d>(s, vec_perm(s, s, p16uc_REVERSE64))));
12868 EIGEN_STRONG_INLINE Packet1cd pcplxflip(const Packet1cd& x)
12870 return Packet1cd(preverse(Packet2d(x.v)));
12872 EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)
12874 Packet2d tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI);
12875 kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO);
12876 kernel.packet[0].v = tmp;
12882 // end #include "src/Core/arch/AltiVec/Complex.h"
12883 #elif defined EIGEN_VECTORIZE_NEON
12884 // #include "src/Core/arch/NEON/PacketMath.h"
12885 #ifndef EIGEN_PACKET_MATH_NEON_H
12886 #define EIGEN_PACKET_MATH_NEON_H
12888 namespace internal {
12889 #ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
12890 #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
12892 #ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
12893 #define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
12895 #ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
12896 #define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
12898 #ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
12899 #if EIGEN_ARCH_ARM64
12900 #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
12902 #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 16
12905 typedef float32x2_t Packet2f;
12906 typedef float32x4_t Packet4f;
12907 typedef int32x4_t Packet4i;
12908 typedef int32x2_t Packet2i;
12909 typedef uint32x4_t Packet4ui;
12910 #define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
12911 const Packet4f p4f_##NAME = pset1<Packet4f>(X)
12912 #define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
12913 const Packet4f p4f_##NAME = vreinterpretq_f32_u32(pset1<int32_t>(X))
12914 #define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
12915 const Packet4i p4i_##NAME = pset1<Packet4i>(X)
12916 #if EIGEN_ARCH_ARM64
12917 #define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__("prfm pldl1keep, [%[addr]]\n" ::[addr] "r"(ADDR) : );
12918 #elif EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC
12919 #define EIGEN_ARM_PREFETCH(ADDR) __builtin_prefetch(ADDR);
12920 #elif defined __pld
12921 #define EIGEN_ARM_PREFETCH(ADDR) __pld(ADDR)
12922 #elif EIGEN_ARCH_ARM32
12923 #define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__ ("pld [%[addr]]\n" :: [addr] "r" (ADDR) : );
12925 #define EIGEN_ARM_PREFETCH(ADDR)
12927 template<> struct packet_traits<float> : default_packet_traits
12929 typedef Packet4f type;
12930 typedef Packet4f half;
12933 AlignedOnScalar = 1,
12944 template<> struct packet_traits<int32_t> : default_packet_traits
12946 typedef Packet4i type;
12947 typedef Packet4i half;
12950 AlignedOnScalar = 1,
12955 #if EIGEN_GNUC_AT_MOST(4,4) && !EIGEN_COMP_LLVM
12956 EIGEN_STRONG_INLINE float32x4_t vld1q_f32(const float* x) { return ::vld1q_f32((const float32_t*)x); }
12957 EIGEN_STRONG_INLINE float32x2_t vld1_f32 (const float* x) { return ::vld1_f32 ((const float32_t*)x); }
12958 EIGEN_STRONG_INLINE float32x2_t vld1_dup_f32 (const float* x) { return ::vld1_dup_f32 ((const float32_t*)x); }
12959 EIGEN_STRONG_INLINE void vst1q_f32(float* to, float32x4_t from) { ::vst1q_f32((float32_t*)to,from); }
12960 EIGEN_STRONG_INLINE void vst1_f32 (float* to, float32x2_t from) { ::vst1_f32 ((float32_t*)to,from); }
12962 template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; };
12963 template<> struct unpacket_traits<Packet4i> { typedef int32_t type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
12964 template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return vdupq_n_f32(from); }
12965 template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int32_t& from) { return vdupq_n_s32(from); }
12966 template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a)
12968 const float f[] = {0, 1, 2, 3};
12969 Packet4f countdown = vld1q_f32(f);
12970 return vaddq_f32(pset1<Packet4f>(a), countdown);
12972 template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int32_t& a)
12974 const int32_t i[] = {0, 1, 2, 3};
12975 Packet4i countdown = vld1q_s32(i);
12976 return vaddq_s32(pset1<Packet4i>(a), countdown);
12978 template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return vaddq_f32(a,b); }
12979 template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return vaddq_s32(a,b); }
12980 template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return vsubq_f32(a,b); }
12981 template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return vsubq_s32(a,b); }
12982 template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) { return vnegq_f32(a); }
12983 template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return vnegq_s32(a); }
12984 template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
12985 template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
12986 template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return vmulq_f32(a,b); }
12987 template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { return vmulq_s32(a,b); }
12988 template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b)
12990 #if EIGEN_ARCH_ARM64
12991 return vdivq_f32(a,b);
12993 Packet4f inv, restep, div;
12994 inv = vrecpeq_f32(b);
12995 restep = vrecpsq_f32(b, inv);
12996 inv = vmulq_f32(restep, inv);
12997 div = vmulq_f32(a, inv);
13001 template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& , const Packet4i& )
13002 { eigen_assert(false && "packet integer division are not supported by NEON");
13003 return pset1<Packet4i>(0);
13005 #if (defined __ARM_FEATURE_FMA) && !(EIGEN_COMP_CLANG && EIGEN_ARCH_ARM)
13006 template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vfmaq_f32(c,a,b); }
13008 template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) {
13009 #if EIGEN_COMP_CLANG && EIGEN_ARCH_ARM
13012 "vmla.f32 %q[r], %q[a], %q[b]"
13019 return vmlaq_f32(c,a,b);
13023 template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return vmlaq_s32(c,a,b); }
13024 template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return vminq_f32(a,b); }
13025 template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vminq_s32(a,b); }
13026 template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return vmaxq_f32(a,b); }
13027 template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vmaxq_s32(a,b); }
13028 template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b)
13030 return vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
13032 template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vandq_s32(a,b); }
13033 template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b)
13035 return vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
13037 template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vorrq_s32(a,b); }
13038 template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b)
13040 return vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
13042 template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return veorq_s32(a,b); }
13043 template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b)
13045 return vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
13047 template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vbicq_s32(a,b); }
13048 template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); }
13049 template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int32_t* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); }
13050 template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f32(from); }
13051 template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int32_t* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_s32(from); }
13052 template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
13054 float32x2_t lo, hi;
13055 lo = vld1_dup_f32(from);
13056 hi = vld1_dup_f32(from+1);
13057 return vcombine_f32(lo, hi);
13059 template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int32_t* from)
13062 lo = vld1_dup_s32(from);
13063 hi = vld1_dup_s32(from+1);
13064 return vcombine_s32(lo, hi);
13066 template<> EIGEN_STRONG_INLINE void pstore<float> (float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_f32(to, from); }
13067 template<> EIGEN_STRONG_INLINE void pstore<int32_t>(int32_t* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_s32(to, from); }
13068 template<> EIGEN_STRONG_INLINE void pstoreu<float> (float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f32(to, from); }
13069 template<> EIGEN_STRONG_INLINE void pstoreu<int32_t>(int32_t* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_s32(to, from); }
13070 template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)
13072 Packet4f res = pset1<Packet4f>(0.f);
13073 res = vsetq_lane_f32(from[0*stride], res, 0);
13074 res = vsetq_lane_f32(from[1*stride], res, 1);
13075 res = vsetq_lane_f32(from[2*stride], res, 2);
13076 res = vsetq_lane_f32(from[3*stride], res, 3);
13079 template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int32_t, Packet4i>(const int32_t* from, Index stride)
13081 Packet4i res = pset1<Packet4i>(0);
13082 res = vsetq_lane_s32(from[0*stride], res, 0);
13083 res = vsetq_lane_s32(from[1*stride], res, 1);
13084 res = vsetq_lane_s32(from[2*stride], res, 2);
13085 res = vsetq_lane_s32(from[3*stride], res, 3);
13088 template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride)
13090 to[stride*0] = vgetq_lane_f32(from, 0);
13091 to[stride*1] = vgetq_lane_f32(from, 1);
13092 to[stride*2] = vgetq_lane_f32(from, 2);
13093 to[stride*3] = vgetq_lane_f32(from, 3);
13095 template<> EIGEN_DEVICE_FUNC inline void pscatter<int32_t, Packet4i>(int32_t* to, const Packet4i& from, Index stride)
13097 to[stride*0] = vgetq_lane_s32(from, 0);
13098 to[stride*1] = vgetq_lane_s32(from, 1);
13099 to[stride*2] = vgetq_lane_s32(from, 2);
13100 to[stride*3] = vgetq_lane_s32(from, 3);
13102 template<> EIGEN_STRONG_INLINE void prefetch<float> (const float* addr) { EIGEN_ARM_PREFETCH(addr); }
13103 template<> EIGEN_STRONG_INLINE void prefetch<int32_t>(const int32_t* addr) { EIGEN_ARM_PREFETCH(addr); }
13104 template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vst1q_f32(x, a); return x[0]; }
13105 template<> EIGEN_STRONG_INLINE int32_t pfirst<Packet4i>(const Packet4i& a) { int32_t EIGEN_ALIGN16 x[4]; vst1q_s32(x, a); return x[0]; }
13106 template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) {
13107 float32x2_t a_lo, a_hi;
13109 a_r64 = vrev64q_f32(a);
13110 a_lo = vget_low_f32(a_r64);
13111 a_hi = vget_high_f32(a_r64);
13112 return vcombine_f32(a_hi, a_lo);
13114 template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) {
13115 int32x2_t a_lo, a_hi;
13117 a_r64 = vrev64q_s32(a);
13118 a_lo = vget_low_s32(a_r64);
13119 a_hi = vget_high_s32(a_r64);
13120 return vcombine_s32(a_hi, a_lo);
13122 template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vabsq_f32(a); }
13123 template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vabsq_s32(a); }
13124 template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
13126 float32x2_t a_lo, a_hi, sum;
13127 a_lo = vget_low_f32(a);
13128 a_hi = vget_high_f32(a);
13129 sum = vpadd_f32(a_lo, a_hi);
13130 sum = vpadd_f32(sum, sum);
13131 return vget_lane_f32(sum, 0);
13133 template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
13135 float32x4x2_t vtrn1, vtrn2, res1, res2;
13136 Packet4f sum1, sum2, sum;
13137 vtrn1 = vzipq_f32(vecs[0], vecs[2]);
13138 vtrn2 = vzipq_f32(vecs[1], vecs[3]);
13139 res1 = vzipq_f32(vtrn1.val[0], vtrn2.val[0]);
13140 res2 = vzipq_f32(vtrn1.val[1], vtrn2.val[1]);
13141 sum1 = vaddq_f32(res1.val[0], res1.val[1]);
13142 sum2 = vaddq_f32(res2.val[0], res2.val[1]);
13143 sum = vaddq_f32(sum1, sum2);
13146 template<> EIGEN_STRONG_INLINE int32_t predux<Packet4i>(const Packet4i& a)
13148 int32x2_t a_lo, a_hi, sum;
13149 a_lo = vget_low_s32(a);
13150 a_hi = vget_high_s32(a);
13151 sum = vpadd_s32(a_lo, a_hi);
13152 sum = vpadd_s32(sum, sum);
13153 return vget_lane_s32(sum, 0);
13155 template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
13157 int32x4x2_t vtrn1, vtrn2, res1, res2;
13158 Packet4i sum1, sum2, sum;
13159 vtrn1 = vzipq_s32(vecs[0], vecs[2]);
13160 vtrn2 = vzipq_s32(vecs[1], vecs[3]);
13161 res1 = vzipq_s32(vtrn1.val[0], vtrn2.val[0]);
13162 res2 = vzipq_s32(vtrn1.val[1], vtrn2.val[1]);
13163 sum1 = vaddq_s32(res1.val[0], res1.val[1]);
13164 sum2 = vaddq_s32(res2.val[0], res2.val[1]);
13165 sum = vaddq_s32(sum1, sum2);
13168 template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
13170 float32x2_t a_lo, a_hi, prod;
13171 a_lo = vget_low_f32(a);
13172 a_hi = vget_high_f32(a);
13173 prod = vmul_f32(a_lo, a_hi);
13174 prod = vmul_f32(prod, vrev64_f32(prod));
13175 return vget_lane_f32(prod, 0);
13177 template<> EIGEN_STRONG_INLINE int32_t predux_mul<Packet4i>(const Packet4i& a)
13179 int32x2_t a_lo, a_hi, prod;
13180 a_lo = vget_low_s32(a);
13181 a_hi = vget_high_s32(a);
13182 prod = vmul_s32(a_lo, a_hi);
13183 prod = vmul_s32(prod, vrev64_s32(prod));
13184 return vget_lane_s32(prod, 0);
13186 template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
13188 float32x2_t a_lo, a_hi, min;
13189 a_lo = vget_low_f32(a);
13190 a_hi = vget_high_f32(a);
13191 min = vpmin_f32(a_lo, a_hi);
13192 min = vpmin_f32(min, min);
13193 return vget_lane_f32(min, 0);
13195 template<> EIGEN_STRONG_INLINE int32_t predux_min<Packet4i>(const Packet4i& a)
13197 int32x2_t a_lo, a_hi, min;
13198 a_lo = vget_low_s32(a);
13199 a_hi = vget_high_s32(a);
13200 min = vpmin_s32(a_lo, a_hi);
13201 min = vpmin_s32(min, min);
13202 return vget_lane_s32(min, 0);
13204 template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
13206 float32x2_t a_lo, a_hi, max;
13207 a_lo = vget_low_f32(a);
13208 a_hi = vget_high_f32(a);
13209 max = vpmax_f32(a_lo, a_hi);
13210 max = vpmax_f32(max, max);
13211 return vget_lane_f32(max, 0);
13213 template<> EIGEN_STRONG_INLINE int32_t predux_max<Packet4i>(const Packet4i& a)
13215 int32x2_t a_lo, a_hi, max;
13216 a_lo = vget_low_s32(a);
13217 a_hi = vget_high_s32(a);
13218 max = vpmax_s32(a_lo, a_hi);
13219 max = vpmax_s32(max, max);
13220 return vget_lane_s32(max, 0);
13222 #define PALIGN_NEON(Offset,Type,Command) \
13224 struct palign_impl<Offset,Type>\
13226 EIGEN_STRONG_INLINE static void run(Type& first, const Type& second)\
13229 first = Command(first, second, Offset);\
13233 PALIGN_NEON(0,Packet4f,vextq_f32)
13234 PALIGN_NEON(1,Packet4f,vextq_f32)
13235 PALIGN_NEON(2,Packet4f,vextq_f32)
13236 PALIGN_NEON(3,Packet4f,vextq_f32)
13237 PALIGN_NEON(0,Packet4i,vextq_s32)
13238 PALIGN_NEON(1,Packet4i,vextq_s32)
13239 PALIGN_NEON(2,Packet4i,vextq_s32)
13240 PALIGN_NEON(3,Packet4i,vextq_s32)
13242 EIGEN_DEVICE_FUNC inline void
13243 ptranspose(PacketBlock<Packet4f,4>& kernel) {
13244 float32x4x2_t tmp1 = vzipq_f32(kernel.packet[0], kernel.packet[1]);
13245 float32x4x2_t tmp2 = vzipq_f32(kernel.packet[2], kernel.packet[3]);
13246 kernel.packet[0] = vcombine_f32(vget_low_f32(tmp1.val[0]), vget_low_f32(tmp2.val[0]));
13247 kernel.packet[1] = vcombine_f32(vget_high_f32(tmp1.val[0]), vget_high_f32(tmp2.val[0]));
13248 kernel.packet[2] = vcombine_f32(vget_low_f32(tmp1.val[1]), vget_low_f32(tmp2.val[1]));
13249 kernel.packet[3] = vcombine_f32(vget_high_f32(tmp1.val[1]), vget_high_f32(tmp2.val[1]));
13251 EIGEN_DEVICE_FUNC inline void
13252 ptranspose(PacketBlock<Packet4i,4>& kernel) {
13253 int32x4x2_t tmp1 = vzipq_s32(kernel.packet[0], kernel.packet[1]);
13254 int32x4x2_t tmp2 = vzipq_s32(kernel.packet[2], kernel.packet[3]);
13255 kernel.packet[0] = vcombine_s32(vget_low_s32(tmp1.val[0]), vget_low_s32(tmp2.val[0]));
13256 kernel.packet[1] = vcombine_s32(vget_high_s32(tmp1.val[0]), vget_high_s32(tmp2.val[0]));
13257 kernel.packet[2] = vcombine_s32(vget_low_s32(tmp1.val[1]), vget_low_s32(tmp2.val[1]));
13258 kernel.packet[3] = vcombine_s32(vget_high_s32(tmp1.val[1]), vget_high_s32(tmp2.val[1]));
13260 #ifdef __apple_build_version__
13261 #define EIGEN_APPLE_DOUBLE_NEON_BUG (__apple_build_version__ < 6010000)
13263 #define EIGEN_APPLE_DOUBLE_NEON_BUG 0
13265 #if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
13266 template <typename T>
13267 uint64x2_t vreinterpretq_u64_f64(T a)
13269 return (uint64x2_t) a;
13271 template <typename T>
13272 float64x2_t vreinterpretq_f64_u64(T a)
13274 return (float64x2_t) a;
13276 typedef float64x2_t Packet2d;
13277 typedef float64x1_t Packet1d;
13278 template<> struct packet_traits<double> : default_packet_traits
13280 typedef Packet2d type;
13281 typedef Packet2d half;
13284 AlignedOnScalar = 1,
13295 template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
13296 template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return vdupq_n_f64(from); }
13297 template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a)
13299 const double countdown_raw[] = {0.0,1.0};
13300 const Packet2d countdown = vld1q_f64(countdown_raw);
13301 return vaddq_f64(pset1<Packet2d>(a), countdown);
13303 template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return vaddq_f64(a,b); }
13304 template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return vsubq_f64(a,b); }
13305 template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return vnegq_f64(a); }
13306 template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
13307 template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return vmulq_f64(a,b); }
13308 template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return vdivq_f64(a,b); }
13309 #ifdef __ARM_FEATURE_FMA
13310 template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vfmaq_f64(c,a,b); }
13312 template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vmlaq_f64(c,a,b); }
13314 template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return vminq_f64(a,b); }
13315 template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return vmaxq_f64(a,b); }
13316 template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b)
13318 return vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(a),vreinterpretq_u64_f64(b)));
13320 template<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b)
13322 return vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(a),vreinterpretq_u64_f64(b)));
13324 template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b)
13326 return vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a),vreinterpretq_u64_f64(b)));
13328 template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b)
13330 return vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(a),vreinterpretq_u64_f64(b)));
13332 template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f64(from); }
13333 template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f64(from); }
13334 template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
13336 return vld1q_dup_f64(from);
13338 template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_f64(to, from); }
13339 template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f64(to, from); }
13340 template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride)
13342 Packet2d res = pset1<Packet2d>(0.0);
13343 res = vsetq_lane_f64(from[0*stride], res, 0);
13344 res = vsetq_lane_f64(from[1*stride], res, 1);
13347 template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride)
13349 to[stride*0] = vgetq_lane_f64(from, 0);
13350 to[stride*1] = vgetq_lane_f64(from, 1);
13352 template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { EIGEN_ARM_PREFETCH(addr); }
13353 template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return vgetq_lane_f64(a, 0); }
13354 template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) { return vcombine_f64(vget_high_f64(a), vget_low_f64(a)); }
13355 template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vabsq_f64(a); }
13356 #if EIGEN_COMP_CLANG && defined(__apple_build_version__)
13357 template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) { return (vget_low_f64(a) + vget_high_f64(a))[0]; }
13359 template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) { return vget_lane_f64(vget_low_f64(a) + vget_high_f64(a), 0); }
13361 template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
13363 float64x2_t trn1, trn2;
13364 trn1 = vzip1q_f64(vecs[0], vecs[1]);
13365 trn2 = vzip2q_f64(vecs[0], vecs[1]);
13366 return vaddq_f64(trn1, trn2);
13368 #if EIGEN_COMP_CLANG && defined(__apple_build_version__)
13369 template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a) { return (vget_low_f64(a) * vget_high_f64(a))[0]; }
13371 template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a) { return vget_lane_f64(vget_low_f64(a) * vget_high_f64(a), 0); }
13373 template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a) { return vgetq_lane_f64(vpminq_f64(a, a), 0); }
13374 template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a) { return vgetq_lane_f64(vpmaxq_f64(a, a), 0); }
13375 #define PALIGN_NEON(Offset,Type,Command) \
13377 struct palign_impl<Offset,Type>\
13379 EIGEN_STRONG_INLINE static void run(Type& first, const Type& second)\
13382 first = Command(first, second, Offset);\
13385 PALIGN_NEON(0,Packet2d,vextq_f64)
13386 PALIGN_NEON(1,Packet2d,vextq_f64)
13388 EIGEN_DEVICE_FUNC inline void
13389 ptranspose(PacketBlock<Packet2d,2>& kernel) {
13390 float64x2_t trn1 = vzip1q_f64(kernel.packet[0], kernel.packet[1]);
13391 float64x2_t trn2 = vzip2q_f64(kernel.packet[0], kernel.packet[1]);
13392 kernel.packet[0] = trn1;
13393 kernel.packet[1] = trn2;
13399 // end #include "src/Core/arch/NEON/PacketMath.h"
13400 // #include "src/Core/arch/NEON/MathFunctions.h"
13401 #ifndef EIGEN_MATH_FUNCTIONS_NEON_H
13402 #define EIGEN_MATH_FUNCTIONS_NEON_H
13404 namespace internal {
13405 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
13406 Packet4f pexp<Packet4f>(const Packet4f& _x)
13410 _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
13411 _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
13412 _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
13413 _EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647950f);
13414 _EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f);
13415 _EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
13416 _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f);
13417 _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f);
13418 _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4f);
13419 _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3f);
13420 _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3f);
13421 _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2f);
13422 _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f);
13423 _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f);
13424 x = vminq_f32(x, p4f_exp_hi);
13425 x = vmaxq_f32(x, p4f_exp_lo);
13426 fx = vmlaq_f32(p4f_half, x, p4f_cephes_LOG2EF);
13427 tmp = vcvtq_f32_s32(vcvtq_s32_f32(fx));
13428 Packet4ui mask = vcgtq_f32(tmp, fx);
13429 mask = vandq_u32(mask, vreinterpretq_u32_f32(p4f_1));
13430 fx = vsubq_f32(tmp, vreinterpretq_f32_u32(mask));
13431 tmp = vmulq_f32(fx, p4f_cephes_exp_C1);
13432 Packet4f z = vmulq_f32(fx, p4f_cephes_exp_C2);
13433 x = vsubq_f32(x, tmp);
13434 x = vsubq_f32(x, z);
13435 Packet4f y = vmulq_f32(p4f_cephes_exp_p0, x);
13436 z = vmulq_f32(x, x);
13437 y = vaddq_f32(y, p4f_cephes_exp_p1);
13438 y = vmulq_f32(y, x);
13439 y = vaddq_f32(y, p4f_cephes_exp_p2);
13440 y = vmulq_f32(y, x);
13441 y = vaddq_f32(y, p4f_cephes_exp_p3);
13442 y = vmulq_f32(y, x);
13443 y = vaddq_f32(y, p4f_cephes_exp_p4);
13444 y = vmulq_f32(y, x);
13445 y = vaddq_f32(y, p4f_cephes_exp_p5);
13446 y = vmulq_f32(y, z);
13447 y = vaddq_f32(y, x);
13448 y = vaddq_f32(y, p4f_1);
13450 mm = vcvtq_s32_f32(fx);
13451 mm = vaddq_s32(mm, p4i_0x7f);
13452 mm = vshlq_n_s32(mm, 23);
13453 Packet4f pow2n = vreinterpretq_f32_s32(mm);
13454 y = vmulq_f32(y, pow2n);
13460 // end #include "src/Core/arch/NEON/MathFunctions.h"
13461 // #include "src/Core/arch/NEON/Complex.h"
13462 #ifndef EIGEN_COMPLEX_NEON_H
13463 #define EIGEN_COMPLEX_NEON_H
13465 namespace internal {
13466 inline uint32x4_t p4ui_CONJ_XOR() {
13467 #if EIGEN_COMP_CLANG
13468 uint32x4_t ret = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
13471 static const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
13472 return vld1q_u32( conj_XOR_DATA );
13475 inline uint32x2_t p2ui_CONJ_XOR() {
13476 static const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000 };
13477 return vld1_u32( conj_XOR_DATA );
13481 EIGEN_STRONG_INLINE Packet2cf() {}
13482 EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
13485 template<> struct packet_traits<std::complex<float> > : default_packet_traits
13487 typedef Packet2cf type;
13488 typedef Packet2cf half;
13491 AlignedOnScalar = 1,
13506 template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
13507 template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
13510 r64 = vld1_f32((float *)&from);
13511 return Packet2cf(vcombine_f32(r64, r64));
13513 template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(padd<Packet4f>(a.v,b.v)); }
13514 template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(psub<Packet4f>(a.v,b.v)); }
13515 template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate<Packet4f>(a.v)); }
13516 template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
13518 Packet4ui b = vreinterpretq_u32_f32(a.v);
13519 return Packet2cf(vreinterpretq_f32_u32(veorq_u32(b, p4ui_CONJ_XOR())));
13521 template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
13524 v1 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 0), vdup_lane_f32(vget_high_f32(a.v), 0));
13525 v2 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 1), vdup_lane_f32(vget_high_f32(a.v), 1));
13526 v1 = vmulq_f32(v1, b.v);
13527 v2 = vmulq_f32(v2, b.v);
13528 v2 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v2), p4ui_CONJ_XOR()));
13529 v2 = vrev64q_f32(v2);
13530 return Packet2cf(vaddq_f32(v1, v2));
13532 template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
13534 return Packet2cf(vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
13536 template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
13538 return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
13540 template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
13542 return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
13544 template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
13546 return Packet2cf(vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
13548 template<> EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>((const float*)from)); }
13549 template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>((const float*)from)); }
13550 template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
13551 template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
13552 template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
13553 template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
13555 Packet4f res = pset1<Packet4f>(0.f);
13556 res = vsetq_lane_f32(std::real(from[0*stride]), res, 0);
13557 res = vsetq_lane_f32(std::imag(from[0*stride]), res, 1);
13558 res = vsetq_lane_f32(std::real(from[1*stride]), res, 2);
13559 res = vsetq_lane_f32(std::imag(from[1*stride]), res, 3);
13560 return Packet2cf(res);
13562 template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
13564 to[stride*0] = std::complex<float>(vgetq_lane_f32(from.v, 0), vgetq_lane_f32(from.v, 1));
13565 to[stride*1] = std::complex<float>(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3));
13567 template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_ARM_PREFETCH((float *)addr); }
13568 template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
13570 std::complex<float> EIGEN_ALIGN16 x[2];
13571 vst1q_f32((float *)x, a.v);
13574 template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
13576 float32x2_t a_lo, a_hi;
13578 a_lo = vget_low_f32(a.v);
13579 a_hi = vget_high_f32(a.v);
13580 a_r128 = vcombine_f32(a_hi, a_lo);
13581 return Packet2cf(a_r128);
13583 template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& a)
13585 return Packet2cf(vrev64q_f32(a.v));
13587 template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
13589 float32x2_t a1, a2;
13590 std::complex<float> s;
13591 a1 = vget_low_f32(a.v);
13592 a2 = vget_high_f32(a.v);
13593 a2 = vadd_f32(a1, a2);
13594 vst1_f32((float *)&s, a2);
13597 template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
13599 Packet4f sum1, sum2, sum;
13600 sum1 = vcombine_f32(vget_low_f32(vecs[0].v), vget_low_f32(vecs[1].v));
13601 sum2 = vcombine_f32(vget_high_f32(vecs[0].v), vget_high_f32(vecs[1].v));
13602 sum = vaddq_f32(sum1, sum2);
13603 return Packet2cf(sum);
13605 template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
13607 float32x2_t a1, a2, v1, v2, prod;
13608 std::complex<float> s;
13609 a1 = vget_low_f32(a.v);
13610 a2 = vget_high_f32(a.v);
13611 v1 = vdup_lane_f32(a1, 0);
13612 v2 = vdup_lane_f32(a1, 1);
13613 v1 = vmul_f32(v1, a2);
13614 v2 = vmul_f32(v2, a2);
13615 v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), p2ui_CONJ_XOR()));
13616 v2 = vrev64_f32(v2);
13617 prod = vadd_f32(v1, v2);
13618 vst1_f32((float *)&s, prod);
13621 template<int Offset>
13622 struct palign_impl<Offset,Packet2cf>
13624 EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second)
13628 first.v = vextq_f32(first.v, second.v, 2);
13632 template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
13634 EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
13635 { return padd(pmul(x,y),c); }
13636 EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
13638 return internal::pmul(a, pconj(b));
13641 template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
13643 EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
13644 { return padd(pmul(x,y),c); }
13645 EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
13647 return internal::pmul(pconj(a), b);
13650 template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
13652 EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
13653 { return padd(pmul(x,y),c); }
13654 EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
13656 return pconj(internal::pmul(a, b));
13659 template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
13661 Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
13663 s = vmulq_f32(b.v, b.v);
13664 rev_s = vrev64q_f32(s);
13665 return Packet2cf(pdiv(res.v, vaddq_f32(s,rev_s)));
13667 EIGEN_DEVICE_FUNC inline void
13668 ptranspose(PacketBlock<Packet2cf,2>& kernel) {
13669 Packet4f tmp = vcombine_f32(vget_high_f32(kernel.packet[0].v), vget_high_f32(kernel.packet[1].v));
13670 kernel.packet[0].v = vcombine_f32(vget_low_f32(kernel.packet[0].v), vget_low_f32(kernel.packet[1].v));
13671 kernel.packet[1].v = tmp;
13673 #if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
13674 #if EIGEN_COMP_CLANG
13675 static uint64x2_t p2ul_CONJ_XOR = {0x0, 0x8000000000000000};
13677 const uint64_t p2ul_conj_XOR_DATA[] = { 0x0, 0x8000000000000000 };
13678 static uint64x2_t p2ul_CONJ_XOR = vld1q_u64( p2ul_conj_XOR_DATA );
13682 EIGEN_STRONG_INLINE Packet1cd() {}
13683 EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {}
13686 template<> struct packet_traits<std::complex<double> > : default_packet_traits
13688 typedef Packet1cd type;
13689 typedef Packet1cd half;
13692 AlignedOnScalar = 0,
13707 template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
13708 template<> EIGEN_STRONG_INLINE Packet1cd pload<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
13709 template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
13710 template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
13711 { return ploadu<Packet1cd>(&from); }
13712 template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(padd<Packet2d>(a.v,b.v)); }
13713 template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(psub<Packet2d>(a.v,b.v)); }
13714 template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate<Packet2d>(a.v)); }
13715 template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v), p2ul_CONJ_XOR))); }
13716 template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
13719 v1 = vdupq_lane_f64(vget_low_f64(a.v), 0);
13720 v2 = vdupq_lane_f64(vget_high_f64(a.v), 0);
13721 v1 = vmulq_f64(v1, b.v);
13722 v2 = vmulq_f64(v2, b.v);
13723 v2 = vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(v2), p2ul_CONJ_XOR));
13724 v2 = preverse<Packet2d>(v2);
13725 return Packet1cd(vaddq_f64(v1, v2));
13727 template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b)
13729 return Packet1cd(vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
13731 template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b)
13733 return Packet1cd(vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
13735 template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b)
13737 return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
13739 template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
13741 return Packet1cd(vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
13743 template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
13744 template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
13745 template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
13746 template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_ARM_PREFETCH((double *)addr); }
13747 template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride)
13749 Packet2d res = pset1<Packet2d>(0.0);
13750 res = vsetq_lane_f64(std::real(from[0*stride]), res, 0);
13751 res = vsetq_lane_f64(std::imag(from[0*stride]), res, 1);
13752 return Packet1cd(res);
13754 template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index stride)
13756 to[stride*0] = std::complex<double>(vgetq_lane_f64(from.v, 0), vgetq_lane_f64(from.v, 1));
13758 template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
13760 std::complex<double> EIGEN_ALIGN16 res;
13761 pstore<std::complex<double> >(&res, a);
13764 template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
13765 template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
13766 template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs) { return vecs[0]; }
13767 template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
13768 template<int Offset>
13769 struct palign_impl<Offset,Packet1cd>
13771 static EIGEN_STRONG_INLINE void run(Packet1cd& , const Packet1cd& )
13775 template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
13777 EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
13778 { return padd(pmul(x,y),c); }
13779 EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
13781 return internal::pmul(a, pconj(b));
13784 template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
13786 EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
13787 { return padd(pmul(x,y),c); }
13788 EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
13790 return internal::pmul(pconj(a), b);
13793 template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
13795 EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
13796 { return padd(pmul(x,y),c); }
13797 EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
13799 return pconj(internal::pmul(a, b));
13802 template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
13804 Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
13805 Packet2d s = pmul<Packet2d>(b.v, b.v);
13806 Packet2d rev_s = preverse<Packet2d>(s);
13807 return Packet1cd(pdiv(res.v, padd<Packet2d>(s,rev_s)));
13809 EIGEN_STRONG_INLINE Packet1cd pcplxflip(const Packet1cd& x)
13811 return Packet1cd(preverse(Packet2d(x.v)));
13813 EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)
13815 Packet2d tmp = vcombine_f64(vget_high_f64(kernel.packet[0].v), vget_high_f64(kernel.packet[1].v));
13816 kernel.packet[0].v = vcombine_f64(vget_low_f64(kernel.packet[0].v), vget_low_f64(kernel.packet[1].v));
13817 kernel.packet[1].v = tmp;
13823 // end #include "src/Core/arch/NEON/Complex.h"
13824 #elif defined EIGEN_VECTORIZE_ZVECTOR
13825 // #include "src/Core/arch/ZVector/PacketMath.h"
13826 #ifndef EIGEN_PACKET_MATH_ZVECTOR_H
13827 #define EIGEN_PACKET_MATH_ZVECTOR_H
13828 #include <stdint.h>
13830 namespace internal {
13831 #ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
13832 #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 4
13834 #ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
13835 #define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
13837 #ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
13838 #define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
13840 #ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
13841 #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 16
13843 typedef __vector int Packet4i;
13844 typedef __vector unsigned int Packet4ui;
13845 typedef __vector __bool int Packet4bi;
13846 typedef __vector short int Packet8i;
13847 typedef __vector unsigned char Packet16uc;
13848 typedef __vector double Packet2d;
13849 typedef __vector unsigned long long Packet2ul;
13850 typedef __vector long long Packet2l;
13866 #define _EIGEN_DECLARE_CONST_FAST_Packet4i(NAME,X) \
13867 Packet4i p4i_##NAME = reinterpret_cast<Packet4i>(vec_splat_s32(X))
13868 #define _EIGEN_DECLARE_CONST_FAST_Packet2d(NAME,X) \
13869 Packet2d p2d_##NAME = reinterpret_cast<Packet2d>(vec_splat_s64(X))
13870 #define _EIGEN_DECLARE_CONST_FAST_Packet2l(NAME,X) \
13871 Packet2l p2l_##NAME = reinterpret_cast<Packet2l>(vec_splat_s64(X))
13872 #define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
13873 Packet4i p4i_##NAME = pset1<Packet4i>(X)
13874 #define _EIGEN_DECLARE_CONST_Packet2d(NAME,X) \
13875 Packet2d p2d_##NAME = pset1<Packet2d>(X)
13876 #define _EIGEN_DECLARE_CONST_Packet2l(NAME,X) \
13877 Packet2l p2l_##NAME = pset1<Packet2l>(X)
13878 static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE, 1);
13879 static _EIGEN_DECLARE_CONST_FAST_Packet2d(ZERO, 0);
13880 static _EIGEN_DECLARE_CONST_FAST_Packet2l(ZERO, 0);
13881 static _EIGEN_DECLARE_CONST_FAST_Packet2l(ONE, 1);
13882 static Packet2d p2d_ONE = { 1.0, 1.0 };
13883 static Packet2d p2d_ZERO_ = { -0.0, -0.0 };
13884 static Packet4i p4i_COUNTDOWN = { 0, 1, 2, 3 };
13885 static Packet4f p4f_COUNTDOWN = { 0.0, 1.0, 2.0, 3.0 };
13886 static Packet2d p2d_COUNTDOWN = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet16uc>(p2d_ZERO), reinterpret_cast<Packet16uc>(p2d_ONE), 8));
13887 static Packet16uc p16uc_PSET64_HI = { 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };
13888 static Packet16uc p16uc_DUPLICATE32_HI = { 0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7 };
13889 #define _EIGEN_MASK_ALIGNMENT 0xfffffffffffffff0
13890 #define _EIGEN_ALIGNED_PTR(x) ((std::ptrdiff_t)(x) & _EIGEN_MASK_ALIGNMENT)
13891 static Packet16uc p16uc_FORWARD = { 0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15 };
13892 static Packet16uc p16uc_REVERSE32 = { 12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3 };
13893 static Packet16uc p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
13894 static Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);
13895 static Packet16uc p16uc_PSET32_WEVEN = vec_sld(p16uc_DUPLICATE32_HI, (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);
13896 EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4f,4>& kernel);
13897 inline std::ostream & operator <<(std::ostream & s, const Packet4i & v)
13901 s << vt.i[0] << ", " << vt.i[1] << ", " << vt.i[2] << ", " << vt.i[3];
13904 inline std::ostream & operator <<(std::ostream & s, const Packet4ui & v)
13908 s << vt.ui[0] << ", " << vt.ui[1] << ", " << vt.ui[2] << ", " << vt.ui[3];
13911 inline std::ostream & operator <<(std::ostream & s, const Packet2l & v)
13915 s << vt.l[0] << ", " << vt.l[1];
13918 inline std::ostream & operator <<(std::ostream & s, const Packet2ul & v)
13922 s << vt.ul[0] << ", " << vt.ul[1] ;
13925 inline std::ostream & operator <<(std::ostream & s, const Packet2d & v)
13929 s << vt.d[0] << ", " << vt.d[1];
13932 template<int element> EIGEN_STRONG_INLINE Packet4f vec_splat_packet4f(const Packet4f& from)
13937 splat.v4f[0] = vec_splat(from.v4f[0], 0);
13938 splat.v4f[1] = splat.v4f[0];
13941 splat.v4f[0] = vec_splat(from.v4f[0], 1);
13942 splat.v4f[1] = splat.v4f[0];
13945 splat.v4f[0] = vec_splat(from.v4f[1], 0);
13946 splat.v4f[1] = splat.v4f[0];
13949 splat.v4f[0] = vec_splat(from.v4f[1], 1);
13950 splat.v4f[1] = splat.v4f[0];
13955 template<int Offset>
13956 struct palign_impl<Offset,Packet4i>
13958 static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
13960 switch (Offset % 4) {
13962 first = vec_sld(first, second, 4); break;
13964 first = vec_sld(first, second, 8); break;
13966 first = vec_sld(first, second, 12); break;
13970 template<int Offset>
13971 struct palign_impl<Offset,Packet4f>
13973 static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
13975 switch (Offset % 4) {
13977 first.v4f[0] = vec_sld(first.v4f[0], first.v4f[1], 8);
13978 first.v4f[1] = vec_sld(first.v4f[1], second.v4f[0], 8);
13981 first.v4f[0] = first.v4f[1];
13982 first.v4f[1] = second.v4f[0];
13985 first.v4f[0] = vec_sld(first.v4f[1], second.v4f[0], 8);
13986 first.v4f[1] = vec_sld(second.v4f[0], second.v4f[1], 8);
13991 template<int Offset>
13992 struct palign_impl<Offset,Packet2d>
13994 static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
13997 first = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(first), reinterpret_cast<Packet4i>(second), 8));
14000 template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from)
14002 EIGEN_DEBUG_ALIGNED_LOAD
14004 vfrom = (Packet *) from;
14007 template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from)
14009 EIGEN_DEBUG_ALIGNED_LOAD
14011 vfrom.v4f[0] = vec_ld2f(&from[0]);
14012 vfrom.v4f[1] = vec_ld2f(&from[2]);
14015 template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from)
14017 EIGEN_DEBUG_ALIGNED_LOAD
14019 vfrom = (Packet *) from;
14022 template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from)
14024 EIGEN_DEBUG_ALIGNED_STORE
14026 vto = (Packet *) to;
14029 template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from)
14031 EIGEN_DEBUG_ALIGNED_STORE
14032 vec_st2f(from.v4f[0], &to[0]);
14033 vec_st2f(from.v4f[1], &to[2]);
14035 template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from)
14037 EIGEN_DEBUG_ALIGNED_STORE
14039 vto = (Packet *) to;
14042 template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from)
14044 return vec_splats(from);
14046 template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) {
14047 return vec_splats(from);
14049 template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from)
14052 to.v4f[0] = pset1<Packet2d>(static_cast<const double&>(from));
14053 to.v4f[1] = to.v4f[0];
14056 template<> EIGEN_STRONG_INLINE void
14057 pbroadcast4<Packet4i>(const int *a,
14058 Packet4i& a0, Packet4i& a1, Packet4i& a2, Packet4i& a3)
14060 a3 = pload<Packet4i>(a);
14061 a0 = vec_splat(a3, 0);
14062 a1 = vec_splat(a3, 1);
14063 a2 = vec_splat(a3, 2);
14064 a3 = vec_splat(a3, 3);
14066 template<> EIGEN_STRONG_INLINE void
14067 pbroadcast4<Packet4f>(const float *a,
14068 Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
14070 a3 = pload<Packet4f>(a);
14071 a0 = vec_splat_packet4f<0>(a3);
14072 a1 = vec_splat_packet4f<1>(a3);
14073 a2 = vec_splat_packet4f<2>(a3);
14074 a3 = vec_splat_packet4f<3>(a3);
14076 template<> EIGEN_STRONG_INLINE void
14077 pbroadcast4<Packet2d>(const double *a,
14078 Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)
14080 a1 = pload<Packet2d>(a);
14081 a0 = vec_splat(a1, 0);
14082 a1 = vec_splat(a1, 1);
14083 a3 = pload<Packet2d>(a+2);
14084 a2 = vec_splat(a3, 0);
14085 a3 = vec_splat(a3, 1);
14087 template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride)
14089 int EIGEN_ALIGN16 ai[4];
14090 ai[0] = from[0*stride];
14091 ai[1] = from[1*stride];
14092 ai[2] = from[2*stride];
14093 ai[3] = from[3*stride];
14094 return pload<Packet4i>(ai);
14096 template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)
14098 float EIGEN_ALIGN16 ai[4];
14099 ai[0] = from[0*stride];
14100 ai[1] = from[1*stride];
14101 ai[2] = from[2*stride];
14102 ai[3] = from[3*stride];
14103 return pload<Packet4f>(ai);
14105 template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride)
14107 double EIGEN_ALIGN16 af[2];
14108 af[0] = from[0*stride];
14109 af[1] = from[1*stride];
14110 return pload<Packet2d>(af);
14112 template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride)
14114 int EIGEN_ALIGN16 ai[4];
14115 pstore<int>((int *)ai, from);
14116 to[0*stride] = ai[0];
14117 to[1*stride] = ai[1];
14118 to[2*stride] = ai[2];
14119 to[3*stride] = ai[3];
14121 template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride)
14123 float EIGEN_ALIGN16 ai[4];
14124 pstore<float>((float *)ai, from);
14125 to[0*stride] = ai[0];
14126 to[1*stride] = ai[1];
14127 to[2*stride] = ai[2];
14128 to[3*stride] = ai[3];
14130 template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride)
14132 double EIGEN_ALIGN16 af[2];
14133 pstore<double>(af, from);
14134 to[0*stride] = af[0];
14135 to[1*stride] = af[1];
14137 template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a + b); }
14138 template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b)
14141 c.v4f[0] = a.v4f[0] + b.v4f[0];
14142 c.v4f[1] = a.v4f[1] + b.v4f[1];
14145 template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a + b); }
14146 template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a - b); }
14147 template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b)
14150 c.v4f[0] = a.v4f[0] - b.v4f[0];
14151 c.v4f[1] = a.v4f[1] - b.v4f[1];
14154 template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a - b); }
14155 template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a * b); }
14156 template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b)
14159 c.v4f[0] = a.v4f[0] * b.v4f[0];
14160 c.v4f[1] = a.v4f[1] * b.v4f[1];
14163 template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a * b); }
14164 template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a / b); }
14165 template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b)
14168 c.v4f[0] = a.v4f[0] / b.v4f[0];
14169 c.v4f[1] = a.v4f[1] / b.v4f[1];
14172 template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a / b); }
14173 template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return (-a); }
14174 template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a)
14177 c.v4f[0] = -a.v4f[0];
14178 c.v4f[1] = -a.v4f[1];
14181 template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return (-a); }
14182 template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
14183 template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
14184 template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
14185 template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd<Packet4i>(pmul<Packet4i>(a, b), c); }
14186 template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c)
14189 res.v4f[0] = vec_madd(a.v4f[0], b.v4f[0], c.v4f[0]);
14190 res.v4f[1] = vec_madd(a.v4f[1], b.v4f[1], c.v4f[1]);
14193 template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vec_madd(a, b, c); }
14194 template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) { return padd<Packet4i>(pset1<Packet4i>(a), p4i_COUNTDOWN); }
14195 template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) { return padd<Packet4f>(pset1<Packet4f>(a), p4f_COUNTDOWN); }
14196 template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) { return padd<Packet2d>(pset1<Packet2d>(a), p2d_COUNTDOWN); }
14197 template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); }
14198 template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_min(a, b); }
14199 template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b)
14202 res.v4f[0] = pmin(a.v4f[0], b.v4f[0]);
14203 res.v4f[1] = pmin(a.v4f[1], b.v4f[1]);
14206 template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); }
14207 template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_max(a, b); }
14208 template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b)
14211 res.v4f[0] = pmax(a.v4f[0], b.v4f[0]);
14212 res.v4f[1] = pmax(a.v4f[1], b.v4f[1]);
14215 template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, b); }
14216 template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, b); }
14217 template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b)
14220 res.v4f[0] = pand(a.v4f[0], b.v4f[0]);
14221 res.v4f[1] = pand(a.v4f[1], b.v4f[1]);
14224 template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_or(a, b); }
14225 template<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_or(a, b); }
14226 template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b)
14229 res.v4f[0] = pand(a.v4f[0], b.v4f[0]);
14230 res.v4f[1] = pand(a.v4f[1], b.v4f[1]);
14233 template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_xor(a, b); }
14234 template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_xor(a, b); }
14235 template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b)
14238 res.v4f[0] = pand(a.v4f[0], b.v4f[0]);
14239 res.v4f[1] = pand(a.v4f[1], b.v4f[1]);
14242 template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return pand<Packet4i>(a, vec_nor(b, b)); }
14243 template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, vec_nor(b, b)); }
14244 template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b)
14247 res.v4f[0] = pandnot(a.v4f[0], b.v4f[0]);
14248 res.v4f[1] = pandnot(a.v4f[1], b.v4f[1]);
14251 template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a)
14254 res.v4f[0] = vec_round(a.v4f[0]);
14255 res.v4f[1] = vec_round(a.v4f[1]);
14258 template<> EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) { return vec_round(a); }
14259 template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a)
14262 res.v4f[0] = vec_ceil(a.v4f[0]);
14263 res.v4f[1] = vec_ceil(a.v4f[1]);
14266 template<> EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a) { return vec_ceil(a); }
14267 template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a)
14270 res.v4f[0] = vec_floor(a.v4f[0]);
14271 res.v4f[1] = vec_floor(a.v4f[1]);
14274 template<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) { return vec_floor(a); }
14275 template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) { return pload<Packet4i>(from); }
14276 template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) { return pload<Packet4f>(from); }
14277 template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) { return pload<Packet2d>(from); }
14278 template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
14280 Packet4i p = pload<Packet4i>(from);
14281 return vec_perm(p, p, p16uc_DUPLICATE32_HI);
14283 template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
14285 Packet4f p = pload<Packet4f>(from);
14286 p.v4f[1] = vec_splat(p.v4f[0], 1);
14287 p.v4f[0] = vec_splat(p.v4f[0], 0);
14290 template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
14292 Packet2d p = pload<Packet2d>(from);
14293 return vec_perm(p, p, p16uc_PSET64_HI);
14295 template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { pstore<int>(to, from); }
14296 template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { pstore<float>(to, from); }
14297 template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) { pstore<double>(to, from); }
14298 template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
14299 template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
14300 template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
14301 template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; pstore(x, a); return x[0]; }
14302 template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[2]; vec_st2f(a.v4f[0], &x[0]); return x[0]; }
14303 template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double EIGEN_ALIGN16 x[2]; pstore(x, a); return x[0]; }
14304 template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a)
14306 return reinterpret_cast<Packet4i>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE32));
14308 template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a)
14310 return reinterpret_cast<Packet2d>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE64));
14312 template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a)
14315 rev.v4f[0] = preverse<Packet2d>(a.v4f[1]);
14316 rev.v4f[1] = preverse<Packet2d>(a.v4f[0]);
14319 template<> EIGEN_STRONG_INLINE Packet4i pabs<Packet4i>(const Packet4i& a) { return vec_abs(a); }
14320 template<> EIGEN_STRONG_INLINE Packet2d pabs<Packet2d>(const Packet2d& a) { return vec_abs(a); }
14321 template<> EIGEN_STRONG_INLINE Packet4f pabs<Packet4f>(const Packet4f& a)
14324 res.v4f[0] = pabs(a.v4f[0]);
14325 res.v4f[1] = pabs(a.v4f[1]);
14328 template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
14331 b = vec_sld(a, a, 8);
14332 sum = padd<Packet4i>(a, b);
14333 b = vec_sld(sum, sum, 4);
14334 sum = padd<Packet4i>(sum, b);
14335 return pfirst(sum);
14337 template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
14340 b = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8));
14341 sum = padd<Packet2d>(a, b);
14342 return pfirst(sum);
14344 template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
14347 sum = padd<Packet2d>(a.v4f[0], a.v4f[1]);
14348 double first = predux<Packet2d>(sum);
14349 return static_cast<float>(first);
14351 template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
14353 Packet4i v[4], sum[4];
14354 v[0] = vec_mergeh(vecs[0], vecs[2]);
14355 v[1] = vec_mergel(vecs[0], vecs[2]);
14356 v[2] = vec_mergeh(vecs[1], vecs[3]);
14357 v[3] = vec_mergel(vecs[1], vecs[3]);
14358 sum[0] = vec_mergeh(v[0], v[2]);
14359 sum[1] = vec_mergel(v[0], v[2]);
14360 sum[2] = vec_mergeh(v[1], v[3]);
14361 sum[3] = vec_mergel(v[1], v[3]);
14362 sum[0] = padd<Packet4i>(sum[0], sum[1]);
14363 sum[1] = padd<Packet4i>(sum[2], sum[3]);
14364 sum[0] = padd<Packet4i>(sum[0], sum[1]);
14367 template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
14369 Packet2d v[2], sum;
14370 v[0] = padd<Packet2d>(vecs[0], reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(vecs[0]), reinterpret_cast<Packet4ui>(vecs[0]), 8)));
14371 v[1] = padd<Packet2d>(vecs[1], reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(vecs[1]), reinterpret_cast<Packet4ui>(vecs[1]), 8)));
14372 sum = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(v[0]), reinterpret_cast<Packet4ui>(v[1]), 8));
14375 template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
14377 PacketBlock<Packet4f,4> transpose;
14378 transpose.packet[0] = vecs[0];
14379 transpose.packet[1] = vecs[1];
14380 transpose.packet[2] = vecs[2];
14381 transpose.packet[3] = vecs[3];
14382 ptranspose(transpose);
14383 Packet4f sum = padd(transpose.packet[0], transpose.packet[1]);
14384 sum = padd(sum, transpose.packet[2]);
14385 sum = padd(sum, transpose.packet[3]);
14388 template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
14390 EIGEN_ALIGN16 int aux[4];
14392 return aux[0] * aux[1] * aux[2] * aux[3];
14394 template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)
14396 return pfirst(pmul(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8))));
14398 template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
14400 return static_cast<float>(pfirst(predux_mul(pmul(a.v4f[0], a.v4f[1]))));
14402 template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
14405 b = pmin<Packet4i>(a, vec_sld(a, a, 8));
14406 res = pmin<Packet4i>(b, vec_sld(b, b, 4));
14407 return pfirst(res);
14409 template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)
14411 return pfirst(pmin<Packet2d>(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8))));
14413 template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
14416 b = pmin<Packet2d>(a.v4f[0], a.v4f[1]);
14417 res = pmin<Packet2d>(b, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(b), reinterpret_cast<Packet4i>(b), 8)));
14418 return static_cast<float>(pfirst(res));
14420 template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
14423 b = pmax<Packet4i>(a, vec_sld(a, a, 8));
14424 res = pmax<Packet4i>(b, vec_sld(b, b, 4));
14425 return pfirst(res);
14427 template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
14429 return pfirst(pmax<Packet2d>(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8))));
14431 template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
14434 b = pmax<Packet2d>(a.v4f[0], a.v4f[1]);
14435 res = pmax<Packet2d>(b, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(b), reinterpret_cast<Packet4i>(b), 8)));
14436 return static_cast<float>(pfirst(res));
14438 EIGEN_DEVICE_FUNC inline void
14439 ptranspose(PacketBlock<Packet4i,4>& kernel) {
14440 Packet4i t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);
14441 Packet4i t1 = vec_mergel(kernel.packet[0], kernel.packet[2]);
14442 Packet4i t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]);
14443 Packet4i t3 = vec_mergel(kernel.packet[1], kernel.packet[3]);
14444 kernel.packet[0] = vec_mergeh(t0, t2);
14445 kernel.packet[1] = vec_mergel(t0, t2);
14446 kernel.packet[2] = vec_mergeh(t1, t3);
14447 kernel.packet[3] = vec_mergel(t1, t3);
14449 EIGEN_DEVICE_FUNC inline void
14450 ptranspose(PacketBlock<Packet2d,2>& kernel) {
14451 Packet2d t0 = vec_perm(kernel.packet[0], kernel.packet[1], p16uc_TRANSPOSE64_HI);
14452 Packet2d t1 = vec_perm(kernel.packet[0], kernel.packet[1], p16uc_TRANSPOSE64_LO);
14453 kernel.packet[0] = t0;
14454 kernel.packet[1] = t1;
14456 EIGEN_DEVICE_FUNC inline void
14457 ptranspose(PacketBlock<Packet4f,4>& kernel) {
14458 PacketBlock<Packet2d,2> t0,t1,t2,t3;
14459 t0.packet[0] = kernel.packet[0].v4f[0];
14460 t0.packet[1] = kernel.packet[1].v4f[0];
14461 t1.packet[0] = kernel.packet[0].v4f[1];
14462 t1.packet[1] = kernel.packet[1].v4f[1];
14463 t2.packet[0] = kernel.packet[2].v4f[0];
14464 t2.packet[1] = kernel.packet[3].v4f[0];
14465 t3.packet[0] = kernel.packet[2].v4f[1];
14466 t3.packet[1] = kernel.packet[3].v4f[1];
14471 kernel.packet[0].v4f[0] = t0.packet[0];
14472 kernel.packet[0].v4f[1] = t2.packet[0];
14473 kernel.packet[1].v4f[0] = t0.packet[1];
14474 kernel.packet[1].v4f[1] = t2.packet[1];
14475 kernel.packet[2].v4f[0] = t1.packet[0];
14476 kernel.packet[2].v4f[1] = t3.packet[0];
14477 kernel.packet[3].v4f[0] = t1.packet[1];
14478 kernel.packet[3].v4f[1] = t3.packet[1];
14480 template<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) {
14481 Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3] };
14482 Packet4ui mask = vec_cmpeq(select, reinterpret_cast<Packet4ui>(p4i_ONE));
14483 return vec_sel(elsePacket, thenPacket, mask);
14485 template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket) {
14486 Packet2ul select_hi = { ifPacket.select[0], ifPacket.select[1] };
14487 Packet2ul select_lo = { ifPacket.select[2], ifPacket.select[3] };
14488 Packet2ul mask_hi = vec_cmpeq(select_hi, reinterpret_cast<Packet2ul>(p2l_ONE));
14489 Packet2ul mask_lo = vec_cmpeq(select_lo, reinterpret_cast<Packet2ul>(p2l_ONE));
14491 result.v4f[0] = vec_sel(elsePacket.v4f[0], thenPacket.v4f[0], mask_hi);
14492 result.v4f[1] = vec_sel(elsePacket.v4f[1], thenPacket.v4f[1], mask_lo);
14495 template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) {
14496 Packet2ul select = { ifPacket.select[0], ifPacket.select[1] };
14497 Packet2ul mask = vec_cmpeq(select, reinterpret_cast<Packet2ul>(p2l_ONE));
14498 return vec_sel(elsePacket, thenPacket, mask);
14503 // end #include "src/Core/arch/ZVector/PacketMath.h"
14504 // #include "src/Core/arch/ZVector/MathFunctions.h"
14505 #ifndef EIGEN_MATH_FUNCTIONS_ALTIVEC_H
14506 #define EIGEN_MATH_FUNCTIONS_ALTIVEC_H
14508 namespace internal {
14509 static _EIGEN_DECLARE_CONST_Packet2d(1 , 1.0);
14510 static _EIGEN_DECLARE_CONST_Packet2d(2 , 2.0);
14511 static _EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
14512 static _EIGEN_DECLARE_CONST_Packet2d(exp_hi, 709.437);
14513 static _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303);
14514 static _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
14515 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
14516 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
14517 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
14518 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
14519 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
14520 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
14521 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
14522 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
14523 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
14524 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
14525 Packet2d pexp<Packet2d>(const Packet2d& _x)
14530 x = pmax(pmin(x, p2d_exp_hi), p2d_exp_lo);
14531 fx = pmadd(p2d_cephes_LOG2EF, x, p2d_half);
14532 fx = vec_floor(fx);
14533 tmp = pmul(fx, p2d_cephes_exp_C1);
14534 Packet2d z = pmul(fx, p2d_cephes_exp_C2);
14537 Packet2d x2 = pmul(x,x);
14538 Packet2d px = p2d_cephes_exp_p0;
14539 px = pmadd(px, x2, p2d_cephes_exp_p1);
14540 px = pmadd(px, x2, p2d_cephes_exp_p2);
14542 Packet2d qx = p2d_cephes_exp_q0;
14543 qx = pmadd(qx, x2, p2d_cephes_exp_q1);
14544 qx = pmadd(qx, x2, p2d_cephes_exp_q2);
14545 qx = pmadd(qx, x2, p2d_cephes_exp_q3);
14546 x = pdiv(px,psub(qx,px));
14547 x = pmadd(p2d_2,x,p2d_1);
14548 emm0 = vec_ctsl(fx, 0);
14549 static const Packet2l p2l_1023 = { 1023, 1023 };
14550 static const Packet2ul p2ul_52 = { 52, 52 };
14551 emm0 = emm0 + p2l_1023;
14552 emm0 = emm0 << reinterpret_cast<Packet2l>(p2ul_52);
14553 Packet2ul isnumber_mask = reinterpret_cast<Packet2ul>(vec_cmpeq(_x, _x));
14554 return vec_sel(_x, pmax(pmul(x, reinterpret_cast<Packet2d>(emm0)), _x),
14557 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
14558 Packet4f pexp<Packet4f>(const Packet4f& x)
14561 res.v4f[0] = pexp<Packet2d>(x.v4f[0]);
14562 res.v4f[1] = pexp<Packet2d>(x.v4f[1]);
14565 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
14566 Packet2d psqrt<Packet2d>(const Packet2d& x)
14568 return __builtin_s390_vfsqdb(x);
14570 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
14571 Packet4f psqrt<Packet4f>(const Packet4f& x)
14574 res.v4f[0] = psqrt<Packet2d>(x.v4f[0]);
14575 res.v4f[1] = psqrt<Packet2d>(x.v4f[1]);
14578 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
14579 Packet2d prsqrt<Packet2d>(const Packet2d& x) {
14580 return pset1<Packet2d>(1.0) / psqrt<Packet2d>(x);
14582 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
14583 Packet4f prsqrt<Packet4f>(const Packet4f& x) {
14585 res.v4f[0] = prsqrt<Packet2d>(x.v4f[0]);
14586 res.v4f[1] = prsqrt<Packet2d>(x.v4f[1]);
14592 // end #include "src/Core/arch/ZVector/MathFunctions.h"
14593 // #include "src/Core/arch/ZVector/Complex.h"
14594 #ifndef EIGEN_COMPLEX32_ALTIVEC_H
14595 #define EIGEN_COMPLEX32_ALTIVEC_H
14597 namespace internal {
14598 static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2d_ZERO_, (Packet4ui) p2l_ZERO, 8);
14599 static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_ZERO_, 8);
14602 EIGEN_STRONG_INLINE Packet1cd() {}
14603 EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {}
14608 EIGEN_STRONG_INLINE Packet2cf() {}
14609 EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
14615 template<> struct packet_traits<std::complex<float> > : default_packet_traits
14617 typedef Packet2cf type;
14618 typedef Packet2cf half;
14621 AlignedOnScalar = 1,
14637 template<> struct packet_traits<std::complex<double> > : default_packet_traits
14639 typedef Packet1cd type;
14640 typedef Packet1cd half;
14643 AlignedOnScalar = 1,
14658 template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
14659 template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
14660 EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel);
14661 template<> EIGEN_STRONG_INLINE Packet2cf pload <Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>((const float*)from)); }
14662 template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
14663 template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>((const float*)from)); }
14664 template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
14665 template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
14666 template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
14667 template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
14668 template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
14669 template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
14670 { return ploadu<Packet1cd>(&from); }
14671 template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
14674 res.cd[0] = Packet1cd(vec_ld2f((const float *)&from));
14675 res.cd[1] = res.cd[0];
14678 template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
14680 std::complex<float> EIGEN_ALIGN16 af[2];
14681 af[0] = from[0*stride];
14682 af[1] = from[1*stride];
14683 return pload<Packet2cf>(af);
14685 template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride EIGEN_UNUSED)
14687 return pload<Packet1cd>(from);
14689 template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
14691 std::complex<float> EIGEN_ALIGN16 af[2];
14692 pstore<std::complex<float> >((std::complex<float> *) af, from);
14693 to[0*stride] = af[0];
14694 to[1*stride] = af[1];
14696 template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index stride EIGEN_UNUSED)
14698 pstore<std::complex<double> >(to, from);
14700 template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(padd<Packet4f>(a.v, b.v)); }
14701 template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v + b.v); }
14702 template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(psub<Packet4f>(a.v, b.v)); }
14703 template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v - b.v); }
14704 template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); }
14705 template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(Packet4f(a.v))); }
14706 template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd((Packet2d)vec_xor((Packet2d)a.v, (Packet2d)p2ul_CONJ_XOR2)); }
14707 template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
14710 res.v.v4f[0] = pconj(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[0]))).v;
14711 res.v.v4f[1] = pconj(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[1]))).v;
14714 template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
14716 Packet2d a_re, a_im, v1, v2;
14717 a_re = vec_perm(a.v, a.v, p16uc_PSET64_HI);
14718 a_im = vec_perm(a.v, a.v, p16uc_PSET64_LO);
14719 v1 = vec_madd(a_re, b.v, p2d_ZERO);
14720 v2 = vec_madd(a_im, b.v, p2d_ZERO);
14721 v2 = (Packet2d) vec_sld((Packet4ui)v2, (Packet4ui)v2, 8);
14722 v2 = (Packet2d) vec_xor((Packet2d)v2, (Packet2d) p2ul_CONJ_XOR1);
14723 return Packet1cd(v1 + v2);
14725 template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
14728 res.v.v4f[0] = pmul(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[0])), Packet1cd(reinterpret_cast<Packet2d>(b.v.v4f[0]))).v;
14729 res.v.v4f[1] = pmul(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[1])), Packet1cd(reinterpret_cast<Packet2d>(b.v.v4f[1]))).v;
14732 template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v,b.v)); }
14733 template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pand<Packet4f>(a.v,b.v)); }
14734 template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_or(a.v,b.v)); }
14735 template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(por<Packet4f>(a.v,b.v)); }
14736 template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_xor(a.v,b.v)); }
14737 template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pxor<Packet4f>(a.v,b.v)); }
14738 template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v, vec_nor(b.v,b.v))); }
14739 template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pandnot<Packet4f>(a.v,b.v)); }
14740 template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
14741 template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
14742 template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
14743 template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
14744 template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
14746 std::complex<double> EIGEN_ALIGN16 res;
14747 pstore<std::complex<double> >(&res, a);
14750 template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
14752 std::complex<float> EIGEN_ALIGN16 res[2];
14753 pstore<std::complex<float> >(res, a);
14756 template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
14757 template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
14760 res.cd[0] = a.cd[1];
14761 res.cd[1] = a.cd[0];
14764 template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a)
14768 template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
14770 std::complex<float> res;
14771 Packet1cd b = padd<Packet1cd>(a.cd[0], a.cd[1]);
14772 vec_st2f(b.v, (float*)&res);
14775 template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs)
14779 template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
14781 PacketBlock<Packet2cf,2> transpose;
14782 transpose.packet[0] = vecs[0];
14783 transpose.packet[1] = vecs[1];
14784 ptranspose(transpose);
14785 return padd<Packet2cf>(transpose.packet[0], transpose.packet[1]);
14787 template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a)
14791 template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
14793 std::complex<float> res;
14794 Packet1cd b = pmul<Packet1cd>(a.cd[0], a.cd[1]);
14795 vec_st2f(b.v, (float*)&res);
14798 template<int Offset>
14799 struct palign_impl<Offset,Packet1cd>
14801 static EIGEN_STRONG_INLINE void run(Packet1cd& , const Packet1cd& )
14805 template<int Offset>
14806 struct palign_impl<Offset,Packet2cf>
14808 static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
14811 first.cd[0] = first.cd[1];
14812 first.cd[1] = second.cd[0];
14816 template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
14818 EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
14819 { return padd(pmul(x,y),c); }
14820 EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
14822 return internal::pmul(a, pconj(b));
14825 template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
14827 EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
14828 { return padd(pmul(x,y),c); }
14829 EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
14831 return internal::pmul(pconj(a), b);
14834 template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
14836 EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
14837 { return padd(pmul(x,y),c); }
14838 EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
14840 return pconj(internal::pmul(a, b));
14843 template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
14845 EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
14846 { return padd(pmul(x,y),c); }
14847 EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
14849 return internal::pmul(a, pconj(b));
14852 template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
14854 EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
14855 { return padd(pmul(x,y),c); }
14856 EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
14858 return internal::pmul(pconj(a), b);
14861 template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
14863 EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
14864 { return padd(pmul(x,y),c); }
14865 EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
14867 return pconj(internal::pmul(a, b));
14870 template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
14872 Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
14873 Packet2d s = vec_madd(b.v, b.v, p2d_ZERO_);
14874 return Packet1cd(pdiv(res.v, s + vec_perm(s, s, p16uc_REVERSE64)));
14876 template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
14879 res.cd[0] = pdiv<Packet1cd>(a.cd[0], b.cd[0]);
14880 res.cd[1] = pdiv<Packet1cd>(a.cd[1], b.cd[1]);
14883 EIGEN_STRONG_INLINE Packet1cd pcplxflip(const Packet1cd& x)
14885 return Packet1cd(preverse(Packet2d(x.v)));
14887 EIGEN_STRONG_INLINE Packet2cf pcplxflip(const Packet2cf& x)
14890 res.cd[0] = pcplxflip(x.cd[0]);
14891 res.cd[1] = pcplxflip(x.cd[1]);
14894 EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)
14896 Packet2d tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI);
14897 kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO);
14898 kernel.packet[0].v = tmp;
14900 EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel)
14902 Packet1cd tmp = kernel.packet[0].cd[1];
14903 kernel.packet[0].cd[1] = kernel.packet[1].cd[0];
14904 kernel.packet[1].cd[0] = tmp;
14906 template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) {
14908 const Selector<4> ifPacket4 = { ifPacket.select[0], ifPacket.select[0], ifPacket.select[1], ifPacket.select[1] };
14909 result.v = pblend<Packet4f>(ifPacket4, thenPacket.v, elsePacket.v);
14915 // end #include "src/Core/arch/ZVector/Complex.h"
14917 #if defined EIGEN_VECTORIZE_CUDA
14918 // #include "src/Core/arch/CUDA/PacketMath.h"
14919 #ifndef EIGEN_PACKET_MATH_CUDA_H
14920 #define EIGEN_PACKET_MATH_CUDA_H
14922 namespace internal {
14923 #if defined(__CUDACC__) && defined(EIGEN_USE_GPU)
14924 template<> struct is_arithmetic<float4> { enum { value = true }; };
14925 template<> struct is_arithmetic<double2> { enum { value = true }; };
14926 template<> struct packet_traits<float> : default_packet_traits
14928 typedef float4 type;
14929 typedef float4 half;
14932 AlignedOnScalar = 1,
14954 template<> struct packet_traits<double> : default_packet_traits
14956 typedef double2 type;
14957 typedef double2 half;
14960 AlignedOnScalar = 1,
14980 template<> struct unpacket_traits<float4> { typedef float type; enum {size=4, alignment=Aligned16}; typedef float4 half; };
14981 template<> struct unpacket_traits<double2> { typedef double type; enum {size=2, alignment=Aligned16}; typedef double2 half; };
14982 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pset1<float4>(const float& from) {
14983 return make_float4(from, from, from, from);
14985 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pset1<double2>(const double& from) {
14986 return make_double2(from, from);
14988 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 plset<float4>(const float& a) {
14989 return make_float4(a, a+1, a+2, a+3);
14991 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 plset<double2>(const double& a) {
14992 return make_double2(a, a+1);
14994 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 padd<float4>(const float4& a, const float4& b) {
14995 return make_float4(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w);
14997 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 padd<double2>(const double2& a, const double2& b) {
14998 return make_double2(a.x+b.x, a.y+b.y);
15000 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 psub<float4>(const float4& a, const float4& b) {
15001 return make_float4(a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w);
15003 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 psub<double2>(const double2& a, const double2& b) {
15004 return make_double2(a.x-b.x, a.y-b.y);
15006 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pnegate(const float4& a) {
15007 return make_float4(-a.x, -a.y, -a.z, -a.w);
15009 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pnegate(const double2& a) {
15010 return make_double2(-a.x, -a.y);
15012 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pconj(const float4& a) { return a; }
15013 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pconj(const double2& a) { return a; }
15014 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pmul<float4>(const float4& a, const float4& b) {
15015 return make_float4(a.x*b.x, a.y*b.y, a.z*b.z, a.w*b.w);
15017 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pmul<double2>(const double2& a, const double2& b) {
15018 return make_double2(a.x*b.x, a.y*b.y);
15020 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pdiv<float4>(const float4& a, const float4& b) {
15021 return make_float4(a.x/b.x, a.y/b.y, a.z/b.z, a.w/b.w);
15023 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pdiv<double2>(const double2& a, const double2& b) {
15024 return make_double2(a.x/b.x, a.y/b.y);
15026 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pmin<float4>(const float4& a, const float4& b) {
15027 return make_float4(fminf(a.x, b.x), fminf(a.y, b.y), fminf(a.z, b.z), fminf(a.w, b.w));
15029 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pmin<double2>(const double2& a, const double2& b) {
15030 return make_double2(fmin(a.x, b.x), fmin(a.y, b.y));
15032 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pmax<float4>(const float4& a, const float4& b) {
15033 return make_float4(fmaxf(a.x, b.x), fmaxf(a.y, b.y), fmaxf(a.z, b.z), fmaxf(a.w, b.w));
15035 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pmax<double2>(const double2& a, const double2& b) {
15036 return make_double2(fmax(a.x, b.x), fmax(a.y, b.y));
15038 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pload<float4>(const float* from) {
15039 return *reinterpret_cast<const float4*>(from);
15041 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pload<double2>(const double* from) {
15042 return *reinterpret_cast<const double2*>(from);
15044 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 ploadu<float4>(const float* from) {
15045 return make_float4(from[0], from[1], from[2], from[3]);
15047 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 ploadu<double2>(const double* from) {
15048 return make_double2(from[0], from[1]);
15050 template<> EIGEN_STRONG_INLINE float4 ploaddup<float4>(const float* from) {
15051 return make_float4(from[0], from[0], from[1], from[1]);
15053 template<> EIGEN_STRONG_INLINE double2 ploaddup<double2>(const double* from) {
15054 return make_double2(from[0], from[0]);
15056 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<float>(float* to, const float4& from) {
15057 *reinterpret_cast<float4*>(to) = from;
15059 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<double>(double* to, const double2& from) {
15060 *reinterpret_cast<double2*>(to) = from;
15062 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const float4& from) {
15068 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const double2& from) {
15073 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Aligned>(const float* from) {
15074 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
15075 return __ldg((const float4*)from);
15077 return make_float4(from[0], from[1], from[2], from[3]);
15081 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Aligned>(const double* from) {
15082 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
15083 return __ldg((const double2*)from);
15085 return make_double2(from[0], from[1]);
15089 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Unaligned>(const float* from) {
15090 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
15091 return make_float4(__ldg(from+0), __ldg(from+1), __ldg(from+2), __ldg(from+3));
15093 return make_float4(from[0], from[1], from[2], from[3]);
15097 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Unaligned>(const double* from) {
15098 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
15099 return make_double2(__ldg(from+0), __ldg(from+1));
15101 return make_double2(from[0], from[1]);
15104 template<> EIGEN_DEVICE_FUNC inline float4 pgather<float, float4>(const float* from, Index stride) {
15105 return make_float4(from[0*stride], from[1*stride], from[2*stride], from[3*stride]);
15107 template<> EIGEN_DEVICE_FUNC inline double2 pgather<double, double2>(const double* from, Index stride) {
15108 return make_double2(from[0*stride], from[1*stride]);
15110 template<> EIGEN_DEVICE_FUNC inline void pscatter<float, float4>(float* to, const float4& from, Index stride) {
15111 to[stride*0] = from.x;
15112 to[stride*1] = from.y;
15113 to[stride*2] = from.z;
15114 to[stride*3] = from.w;
15116 template<> EIGEN_DEVICE_FUNC inline void pscatter<double, double2>(double* to, const double2& from, Index stride) {
15117 to[stride*0] = from.x;
15118 to[stride*1] = from.y;
15120 template<> EIGEN_DEVICE_FUNC inline float pfirst<float4>(const float4& a) {
15123 template<> EIGEN_DEVICE_FUNC inline double pfirst<double2>(const double2& a) {
15126 template<> EIGEN_DEVICE_FUNC inline float predux<float4>(const float4& a) {
15127 return a.x + a.y + a.z + a.w;
15129 template<> EIGEN_DEVICE_FUNC inline double predux<double2>(const double2& a) {
15132 template<> EIGEN_DEVICE_FUNC inline float predux_max<float4>(const float4& a) {
15133 return fmaxf(fmaxf(a.x, a.y), fmaxf(a.z, a.w));
15135 template<> EIGEN_DEVICE_FUNC inline double predux_max<double2>(const double2& a) {
15136 return fmax(a.x, a.y);
15138 template<> EIGEN_DEVICE_FUNC inline float predux_min<float4>(const float4& a) {
15139 return fminf(fminf(a.x, a.y), fminf(a.z, a.w));
15141 template<> EIGEN_DEVICE_FUNC inline double predux_min<double2>(const double2& a) {
15142 return fmin(a.x, a.y);
15144 template<> EIGEN_DEVICE_FUNC inline float predux_mul<float4>(const float4& a) {
15145 return a.x * a.y * a.z * a.w;
15147 template<> EIGEN_DEVICE_FUNC inline double predux_mul<double2>(const double2& a) {
15150 template<> EIGEN_DEVICE_FUNC inline float4 pabs<float4>(const float4& a) {
15151 return make_float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w));
15153 template<> EIGEN_DEVICE_FUNC inline double2 pabs<double2>(const double2& a) {
15154 return make_double2(fabs(a.x), fabs(a.y));
15156 EIGEN_DEVICE_FUNC inline void
15157 ptranspose(PacketBlock<float4,4>& kernel) {
15158 float tmp = kernel.packet[0].y;
15159 kernel.packet[0].y = kernel.packet[1].x;
15160 kernel.packet[1].x = tmp;
15161 tmp = kernel.packet[0].z;
15162 kernel.packet[0].z = kernel.packet[2].x;
15163 kernel.packet[2].x = tmp;
15164 tmp = kernel.packet[0].w;
15165 kernel.packet[0].w = kernel.packet[3].x;
15166 kernel.packet[3].x = tmp;
15167 tmp = kernel.packet[1].z;
15168 kernel.packet[1].z = kernel.packet[2].y;
15169 kernel.packet[2].y = tmp;
15170 tmp = kernel.packet[1].w;
15171 kernel.packet[1].w = kernel.packet[3].y;
15172 kernel.packet[3].y = tmp;
15173 tmp = kernel.packet[2].w;
15174 kernel.packet[2].w = kernel.packet[3].z;
15175 kernel.packet[3].z = tmp;
15177 EIGEN_DEVICE_FUNC inline void
15178 ptranspose(PacketBlock<double2,2>& kernel) {
15179 double tmp = kernel.packet[0].y;
15180 kernel.packet[0].y = kernel.packet[1].x;
15181 kernel.packet[1].x = tmp;
15187 // end #include "src/Core/arch/CUDA/PacketMath.h"
15188 // #include "src/Core/arch/CUDA/MathFunctions.h"
15189 #ifndef EIGEN_MATH_FUNCTIONS_CUDA_H
15190 #define EIGEN_MATH_FUNCTIONS_CUDA_H
15192 namespace internal {
15193 #if defined(__CUDACC__) && defined(EIGEN_USE_GPU)
15194 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
15195 float4 plog<float4>(const float4& a)
15197 return make_float4(logf(a.x), logf(a.y), logf(a.z), logf(a.w));
15199 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
15200 double2 plog<double2>(const double2& a)
15203 return make_double2(log(a.x), log(a.y));
15205 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
15206 float4 plog1p<float4>(const float4& a)
15208 return make_float4(log1pf(a.x), log1pf(a.y), log1pf(a.z), log1pf(a.w));
15210 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
15211 double2 plog1p<double2>(const double2& a)
15213 return make_double2(log1p(a.x), log1p(a.y));
15215 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
15216 float4 pexp<float4>(const float4& a)
15218 return make_float4(expf(a.x), expf(a.y), expf(a.z), expf(a.w));
15220 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
15221 double2 pexp<double2>(const double2& a)
15224 return make_double2(exp(a.x), exp(a.y));
15226 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
15227 float4 psqrt<float4>(const float4& a)
15229 return make_float4(sqrtf(a.x), sqrtf(a.y), sqrtf(a.z), sqrtf(a.w));
15231 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
15232 double2 psqrt<double2>(const double2& a)
15235 return make_double2(sqrt(a.x), sqrt(a.y));
15237 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
15238 float4 prsqrt<float4>(const float4& a)
15240 return make_float4(rsqrtf(a.x), rsqrtf(a.y), rsqrtf(a.z), rsqrtf(a.w));
15242 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
15243 double2 prsqrt<double2>(const double2& a)
15245 return make_double2(rsqrt(a.x), rsqrt(a.y));
15251 // end #include "src/Core/arch/CUDA/MathFunctions.h"
15253 // #include "src/Core/arch/Default/Settings.h"
15254 #ifndef EIGEN_DEFAULT_SETTINGS_H
15255 #define EIGEN_DEFAULT_SETTINGS_H
15256 #ifndef EIGEN_UNROLLING_LIMIT
15257 #define EIGEN_UNROLLING_LIMIT 100
15259 #ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
15260 #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
15262 #ifndef EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH
15263 #define EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH 8
15265 #ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
15266 #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 8
15269 // end #include "src/Core/arch/Default/Settings.h"
15270 // #include "src/Core/functors/BinaryFunctors.h"
15271 #ifndef EIGEN_BINARY_FUNCTORS_H
15272 #define EIGEN_BINARY_FUNCTORS_H
15274 namespace internal {
15275 template<typename Arg1, typename Arg2>
15276 struct binary_op_base
15278 typedef Arg1 first_argument_type;
15279 typedef Arg2 second_argument_type;
15281 template<typename LhsScalar,typename RhsScalar>
15282 struct scalar_sum_op : binary_op_base<LhsScalar,RhsScalar>
15284 typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_sum_op>::ReturnType result_type;
15285 #ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN
15286 EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op)
15289 EIGEN_SCALAR_BINARY_OP_PLUGIN
15292 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a + b; }
15293 template<typename Packet>
15294 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
15295 { return internal::padd(a,b); }
15296 template<typename Packet>
15297 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const
15298 { return internal::predux(a); }
15300 template<typename LhsScalar,typename RhsScalar>
15301 struct functor_traits<scalar_sum_op<LhsScalar,RhsScalar> > {
15303 Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2,
15304 PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasAdd && packet_traits<RhsScalar>::HasAdd
15307 template<> struct scalar_sum_op<bool,bool> : scalar_sum_op<int,int> {
15311 template<typename LhsScalar,typename RhsScalar>
15312 struct scalar_product_op : binary_op_base<LhsScalar,RhsScalar>
15314 typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_product_op>::ReturnType result_type;
15315 #ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN
15316 EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op)
15318 scalar_product_op() {
15319 EIGEN_SCALAR_BINARY_OP_PLUGIN
15322 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a * b; }
15323 template<typename Packet>
15324 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
15325 { return internal::pmul(a,b); }
15326 template<typename Packet>
15327 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const
15328 { return internal::predux_mul(a); }
15330 template<typename LhsScalar,typename RhsScalar>
15331 struct functor_traits<scalar_product_op<LhsScalar,RhsScalar> > {
15333 Cost = (NumTraits<LhsScalar>::MulCost + NumTraits<RhsScalar>::MulCost)/2,
15334 PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasMul && packet_traits<RhsScalar>::HasMul
15337 template<typename LhsScalar,typename RhsScalar>
15338 struct scalar_conj_product_op : binary_op_base<LhsScalar,RhsScalar>
15341 Conj = NumTraits<LhsScalar>::IsComplex
15343 typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_conj_product_op>::ReturnType result_type;
15344 EIGEN_EMPTY_STRUCT_CTOR(scalar_conj_product_op)
15345 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const
15346 { return conj_helper<LhsScalar,RhsScalar,Conj,false>().pmul(a,b); }
15347 template<typename Packet>
15348 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
15349 { return conj_helper<Packet,Packet,Conj,false>().pmul(a,b); }
15351 template<typename LhsScalar,typename RhsScalar>
15352 struct functor_traits<scalar_conj_product_op<LhsScalar,RhsScalar> > {
15354 Cost = NumTraits<LhsScalar>::MulCost,
15355 PacketAccess = internal::is_same<LhsScalar, RhsScalar>::value && packet_traits<LhsScalar>::HasMul
15358 template<typename LhsScalar,typename RhsScalar>
15359 struct scalar_min_op : binary_op_base<LhsScalar,RhsScalar>
15361 typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_min_op>::ReturnType result_type;
15362 EIGEN_EMPTY_STRUCT_CTOR(scalar_min_op)
15363 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return numext::mini(a, b); }
15364 template<typename Packet>
15365 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
15366 { return internal::pmin(a,b); }
15367 template<typename Packet>
15368 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const
15369 { return internal::predux_min(a); }
15371 template<typename LhsScalar,typename RhsScalar>
15372 struct functor_traits<scalar_min_op<LhsScalar,RhsScalar> > {
15374 Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2,
15375 PacketAccess = internal::is_same<LhsScalar, RhsScalar>::value && packet_traits<LhsScalar>::HasMin
15378 template<typename LhsScalar,typename RhsScalar>
15379 struct scalar_max_op : binary_op_base<LhsScalar,RhsScalar>
15381 typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_max_op>::ReturnType result_type;
15382 EIGEN_EMPTY_STRUCT_CTOR(scalar_max_op)
15383 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return numext::maxi(a, b); }
15384 template<typename Packet>
15385 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
15386 { return internal::pmax(a,b); }
15387 template<typename Packet>
15388 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const
15389 { return internal::predux_max(a); }
15391 template<typename LhsScalar,typename RhsScalar>
15392 struct functor_traits<scalar_max_op<LhsScalar,RhsScalar> > {
15394 Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2,
15395 PacketAccess = internal::is_same<LhsScalar, RhsScalar>::value && packet_traits<LhsScalar>::HasMax
15398 template<typename LhsScalar, typename RhsScalar, ComparisonName cmp> struct scalar_cmp_op;
15399 template<typename LhsScalar, typename RhsScalar, ComparisonName cmp>
15400 struct functor_traits<scalar_cmp_op<LhsScalar,RhsScalar, cmp> > {
15402 Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2,
15403 PacketAccess = false
15406 template<ComparisonName Cmp, typename LhsScalar, typename RhsScalar>
15407 struct result_of<scalar_cmp_op<LhsScalar, RhsScalar, Cmp>(LhsScalar,RhsScalar)> {
15410 template<typename LhsScalar, typename RhsScalar>
15411 struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_EQ> : binary_op_base<LhsScalar,RhsScalar>
15413 typedef bool result_type;
15414 EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
15415 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a==b;}
15417 template<typename LhsScalar, typename RhsScalar>
15418 struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_LT> : binary_op_base<LhsScalar,RhsScalar>
15420 typedef bool result_type;
15421 EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
15422 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a<b;}
15424 template<typename LhsScalar, typename RhsScalar>
15425 struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_LE> : binary_op_base<LhsScalar,RhsScalar>
15427 typedef bool result_type;
15428 EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
15429 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a<=b;}
15431 template<typename LhsScalar, typename RhsScalar>
15432 struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_GT> : binary_op_base<LhsScalar,RhsScalar>
15434 typedef bool result_type;
15435 EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
15436 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a>b;}
15438 template<typename LhsScalar, typename RhsScalar>
15439 struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_GE> : binary_op_base<LhsScalar,RhsScalar>
15441 typedef bool result_type;
15442 EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
15443 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a>=b;}
15445 template<typename LhsScalar, typename RhsScalar>
15446 struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_UNORD> : binary_op_base<LhsScalar,RhsScalar>
15448 typedef bool result_type;
15449 EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
15450 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return !(a<=b || b<=a);}
15452 template<typename LhsScalar, typename RhsScalar>
15453 struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_NEQ> : binary_op_base<LhsScalar,RhsScalar>
15455 typedef bool result_type;
15456 EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
15457 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a!=b;}
15459 template<typename Scalar>
15460 struct scalar_hypot_op<Scalar,Scalar> : binary_op_base<Scalar,Scalar>
15462 EIGEN_EMPTY_STRUCT_CTOR(scalar_hypot_op)
15463 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const
15465 EIGEN_USING_STD_MATH(sqrt)
15477 return p * sqrt(Scalar(1) + qp*qp);
15480 template<typename Scalar>
15481 struct functor_traits<scalar_hypot_op<Scalar,Scalar> > {
15484 Cost = 3 * NumTraits<Scalar>::AddCost +
15485 2 * NumTraits<Scalar>::MulCost +
15486 2 * scalar_div_cost<Scalar,false>::value,
15487 PacketAccess = false
15490 template<typename Scalar, typename Exponent>
15491 struct scalar_pow_op : binary_op_base<Scalar,Exponent>
15493 typedef typename ScalarBinaryOpTraits<Scalar,Exponent,scalar_pow_op>::ReturnType result_type;
15494 #ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN
15495 EIGEN_EMPTY_STRUCT_CTOR(scalar_pow_op)
15498 typedef Scalar LhsScalar;
15499 typedef Exponent RhsScalar;
15500 EIGEN_SCALAR_BINARY_OP_PLUGIN
15504 inline result_type operator() (const Scalar& a, const Exponent& b) const { return numext::pow(a, b); }
15506 template<typename Scalar, typename Exponent>
15507 struct functor_traits<scalar_pow_op<Scalar,Exponent> > {
15508 enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false };
15510 template<typename LhsScalar,typename RhsScalar>
15511 struct scalar_difference_op : binary_op_base<LhsScalar,RhsScalar>
15513 typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_difference_op>::ReturnType result_type;
15514 #ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN
15515 EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op)
15517 scalar_difference_op() {
15518 EIGEN_SCALAR_BINARY_OP_PLUGIN
15521 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a - b; }
15522 template<typename Packet>
15523 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
15524 { return internal::psub(a,b); }
15526 template<typename LhsScalar,typename RhsScalar>
15527 struct functor_traits<scalar_difference_op<LhsScalar,RhsScalar> > {
15529 Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2,
15530 PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasSub && packet_traits<RhsScalar>::HasSub
15533 template<typename LhsScalar,typename RhsScalar>
15534 struct scalar_quotient_op : binary_op_base<LhsScalar,RhsScalar>
15536 typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_quotient_op>::ReturnType result_type;
15537 #ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN
15538 EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op)
15540 scalar_quotient_op() {
15541 EIGEN_SCALAR_BINARY_OP_PLUGIN
15544 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a / b; }
15545 template<typename Packet>
15546 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
15547 { return internal::pdiv(a,b); }
15549 template<typename LhsScalar,typename RhsScalar>
15550 struct functor_traits<scalar_quotient_op<LhsScalar,RhsScalar> > {
15551 typedef typename scalar_quotient_op<LhsScalar,RhsScalar>::result_type result_type;
15553 PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasDiv && packet_traits<RhsScalar>::HasDiv,
15554 Cost = scalar_div_cost<result_type,PacketAccess>::value
15557 struct scalar_boolean_and_op {
15558 EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_and_op)
15559 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a && b; }
15561 template<> struct functor_traits<scalar_boolean_and_op> {
15563 Cost = NumTraits<bool>::AddCost,
15564 PacketAccess = false
15567 struct scalar_boolean_or_op {
15568 EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_or_op)
15569 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a || b; }
15571 template<> struct functor_traits<scalar_boolean_or_op> {
15573 Cost = NumTraits<bool>::AddCost,
15574 PacketAccess = false
15577 struct scalar_boolean_xor_op {
15578 EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_xor_op)
15579 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a ^ b; }
15581 template<> struct functor_traits<scalar_boolean_xor_op> {
15583 Cost = NumTraits<bool>::AddCost,
15584 PacketAccess = false
15587 template<typename BinaryOp> struct bind1st_op : BinaryOp {
15588 typedef typename BinaryOp::first_argument_type first_argument_type;
15589 typedef typename BinaryOp::second_argument_type second_argument_type;
15590 typedef typename BinaryOp::result_type result_type;
15591 bind1st_op(const first_argument_type &val) : m_value(val) {}
15592 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const second_argument_type& b) const { return BinaryOp::operator()(m_value,b); }
15593 template<typename Packet>
15594 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& b) const
15595 { return BinaryOp::packetOp(internal::pset1<Packet>(m_value), b); }
15596 first_argument_type m_value;
15598 template<typename BinaryOp> struct functor_traits<bind1st_op<BinaryOp> > : functor_traits<BinaryOp> {};
15599 template<typename BinaryOp> struct bind2nd_op : BinaryOp {
15600 typedef typename BinaryOp::first_argument_type first_argument_type;
15601 typedef typename BinaryOp::second_argument_type second_argument_type;
15602 typedef typename BinaryOp::result_type result_type;
15603 bind2nd_op(const second_argument_type &val) : m_value(val) {}
15604 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const first_argument_type& a) const { return BinaryOp::operator()(a,m_value); }
15605 template<typename Packet>
15606 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
15607 { return BinaryOp::packetOp(a,internal::pset1<Packet>(m_value)); }
15608 second_argument_type m_value;
15610 template<typename BinaryOp> struct functor_traits<bind2nd_op<BinaryOp> > : functor_traits<BinaryOp> {};
15614 // end #include "src/Core/functors/BinaryFunctors.h"
15615 // #include "src/Core/functors/UnaryFunctors.h"
15616 #ifndef EIGEN_UNARY_FUNCTORS_H
15617 #define EIGEN_UNARY_FUNCTORS_H
15619 namespace internal {
15620 template<typename Scalar> struct scalar_opposite_op {
15621 EIGEN_EMPTY_STRUCT_CTOR(scalar_opposite_op)
15622 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return -a; }
15623 template<typename Packet>
15624 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
15625 { return internal::pnegate(a); }
15627 template<typename Scalar>
15628 struct functor_traits<scalar_opposite_op<Scalar> >
15630 Cost = NumTraits<Scalar>::AddCost,
15631 PacketAccess = packet_traits<Scalar>::HasNegate };
15633 template<typename Scalar> struct scalar_abs_op {
15634 EIGEN_EMPTY_STRUCT_CTOR(scalar_abs_op)
15635 typedef typename NumTraits<Scalar>::Real result_type;
15636 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return numext::abs(a); }
15637 template<typename Packet>
15638 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
15639 { return internal::pabs(a); }
15641 template<typename Scalar>
15642 struct functor_traits<scalar_abs_op<Scalar> >
15645 Cost = NumTraits<Scalar>::AddCost,
15646 PacketAccess = packet_traits<Scalar>::HasAbs
15649 template<typename Scalar> struct scalar_score_coeff_op : scalar_abs_op<Scalar>
15651 typedef void Score_is_abs;
15653 template<typename Scalar>
15654 struct functor_traits<scalar_score_coeff_op<Scalar> > : functor_traits<scalar_abs_op<Scalar> > {};
15655 template<typename Scalar, typename=void> struct abs_knowing_score
15657 EIGEN_EMPTY_STRUCT_CTOR(abs_knowing_score)
15658 typedef typename NumTraits<Scalar>::Real result_type;
15659 template<typename Score>
15660 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a, const Score&) const { return numext::abs(a); }
15662 template<typename Scalar> struct abs_knowing_score<Scalar, typename scalar_score_coeff_op<Scalar>::Score_is_abs>
15664 EIGEN_EMPTY_STRUCT_CTOR(abs_knowing_score)
15665 typedef typename NumTraits<Scalar>::Real result_type;
15666 template<typename Scal>
15667 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scal&, const result_type& a) const { return a; }
15669 template<typename Scalar> struct scalar_abs2_op {
15670 EIGEN_EMPTY_STRUCT_CTOR(scalar_abs2_op)
15671 typedef typename NumTraits<Scalar>::Real result_type;
15673 EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return numext::abs2(a); }
15674 template<typename Packet>
15675 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
15676 { return internal::pmul(a,a); }
15678 template<typename Scalar>
15679 struct functor_traits<scalar_abs2_op<Scalar> >
15680 { enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasAbs2 }; };
15681 template<typename Scalar> struct scalar_conjugate_op {
15682 EIGEN_EMPTY_STRUCT_CTOR(scalar_conjugate_op)
15684 EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { using numext::conj; return conj(a); }
15685 template<typename Packet>
15686 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::pconj(a); }
15688 template<typename Scalar>
15689 struct functor_traits<scalar_conjugate_op<Scalar> >
15692 Cost = NumTraits<Scalar>::IsComplex ? NumTraits<Scalar>::AddCost : 0,
15693 PacketAccess = packet_traits<Scalar>::HasConj
15696 template<typename Scalar> struct scalar_arg_op {
15697 EIGEN_EMPTY_STRUCT_CTOR(scalar_arg_op)
15698 typedef typename NumTraits<Scalar>::Real result_type;
15699 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { using numext::arg; return arg(a); }
15700 template<typename Packet>
15701 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
15702 { return internal::parg(a); }
15704 template<typename Scalar>
15705 struct functor_traits<scalar_arg_op<Scalar> >
15708 Cost = NumTraits<Scalar>::IsComplex ? 5 * NumTraits<Scalar>::MulCost : NumTraits<Scalar>::AddCost,
15709 PacketAccess = packet_traits<Scalar>::HasArg
15712 template<typename Scalar, typename NewType>
15713 struct scalar_cast_op {
15714 EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
15715 typedef NewType result_type;
15716 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const NewType operator() (const Scalar& a) const { return cast<Scalar, NewType>(a); }
15718 template<typename Scalar, typename NewType>
15719 struct functor_traits<scalar_cast_op<Scalar,NewType> >
15720 { enum { Cost = is_same<Scalar, NewType>::value ? 0 : NumTraits<NewType>::AddCost, PacketAccess = false }; };
15721 template<typename Scalar>
15722 struct scalar_real_op {
15723 EIGEN_EMPTY_STRUCT_CTOR(scalar_real_op)
15724 typedef typename NumTraits<Scalar>::Real result_type;
15726 EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return numext::real(a); }
15728 template<typename Scalar>
15729 struct functor_traits<scalar_real_op<Scalar> >
15730 { enum { Cost = 0, PacketAccess = false }; };
15731 template<typename Scalar>
15732 struct scalar_imag_op {
15733 EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_op)
15734 typedef typename NumTraits<Scalar>::Real result_type;
15736 EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return numext::imag(a); }
15738 template<typename Scalar>
15739 struct functor_traits<scalar_imag_op<Scalar> >
15740 { enum { Cost = 0, PacketAccess = false }; };
15741 template<typename Scalar>
15742 struct scalar_real_ref_op {
15743 EIGEN_EMPTY_STRUCT_CTOR(scalar_real_ref_op)
15744 typedef typename NumTraits<Scalar>::Real result_type;
15746 EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return numext::real_ref(*const_cast<Scalar*>(&a)); }
15748 template<typename Scalar>
15749 struct functor_traits<scalar_real_ref_op<Scalar> >
15750 { enum { Cost = 0, PacketAccess = false }; };
15751 template<typename Scalar>
15752 struct scalar_imag_ref_op {
15753 EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_ref_op)
15754 typedef typename NumTraits<Scalar>::Real result_type;
15756 EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return numext::imag_ref(*const_cast<Scalar*>(&a)); }
15758 template<typename Scalar>
15759 struct functor_traits<scalar_imag_ref_op<Scalar> >
15760 { enum { Cost = 0, PacketAccess = false }; };
15761 template<typename Scalar> struct scalar_exp_op {
15762 EIGEN_EMPTY_STRUCT_CTOR(scalar_exp_op)
15763 EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::exp(a); }
15764 template <typename Packet>
15765 EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pexp(a); }
15767 template <typename Scalar>
15768 struct functor_traits<scalar_exp_op<Scalar> > {
15770 PacketAccess = packet_traits<Scalar>::HasExp,
15771 #ifdef EIGEN_VECTORIZE_FMA
15773 (sizeof(Scalar) == 4
15774 ? (8 * NumTraits<Scalar>::AddCost + 6 * NumTraits<Scalar>::MulCost)
15775 : (14 * NumTraits<Scalar>::AddCost +
15776 6 * NumTraits<Scalar>::MulCost +
15777 scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value))
15780 (sizeof(Scalar) == 4
15781 ? (21 * NumTraits<Scalar>::AddCost + 13 * NumTraits<Scalar>::MulCost)
15782 : (23 * NumTraits<Scalar>::AddCost +
15783 12 * NumTraits<Scalar>::MulCost +
15784 scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value))
15788 template<typename Scalar> struct scalar_log_op {
15789 EIGEN_EMPTY_STRUCT_CTOR(scalar_log_op)
15790 EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::log(a); }
15791 template <typename Packet>
15792 EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plog(a); }
15794 template <typename Scalar>
15795 struct functor_traits<scalar_log_op<Scalar> > {
15797 PacketAccess = packet_traits<Scalar>::HasLog,
15800 #ifdef EIGEN_VECTORIZE_FMA
15801 ? (20 * NumTraits<Scalar>::AddCost + 7 * NumTraits<Scalar>::MulCost)
15803 ? (36 * NumTraits<Scalar>::AddCost + 14 * NumTraits<Scalar>::MulCost)
15805 : sizeof(Scalar)==4 ? 40 : 85)
15808 template<typename Scalar> struct scalar_log1p_op {
15809 EIGEN_EMPTY_STRUCT_CTOR(scalar_log1p_op)
15810 EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::log1p(a); }
15811 template <typename Packet>
15812 EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plog1p(a); }
15814 template <typename Scalar>
15815 struct functor_traits<scalar_log1p_op<Scalar> > {
15817 PacketAccess = packet_traits<Scalar>::HasLog1p,
15818 Cost = functor_traits<scalar_log_op<Scalar> >::Cost
15821 template<typename Scalar> struct scalar_log10_op {
15822 EIGEN_EMPTY_STRUCT_CTOR(scalar_log10_op)
15823 EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { EIGEN_USING_STD_MATH(log10) return log10(a); }
15824 template <typename Packet>
15825 EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plog10(a); }
15827 template<typename Scalar>
15828 struct functor_traits<scalar_log10_op<Scalar> >
15829 { enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasLog10 }; };
15830 template<typename Scalar> struct scalar_sqrt_op {
15831 EIGEN_EMPTY_STRUCT_CTOR(scalar_sqrt_op)
15832 EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::sqrt(a); }
15833 template <typename Packet>
15834 EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psqrt(a); }
15836 template <typename Scalar>
15837 struct functor_traits<scalar_sqrt_op<Scalar> > {
15839 #if EIGEN_FAST_MATH
15840 Cost = (sizeof(Scalar) == 8 ? 28
15841 : (3 * NumTraits<Scalar>::AddCost +
15842 5 * NumTraits<Scalar>::MulCost)),
15844 Cost = (sizeof(Scalar) == 8 ? 28 : 14),
15846 PacketAccess = packet_traits<Scalar>::HasSqrt
15849 template<typename Scalar> struct scalar_rsqrt_op {
15850 EIGEN_EMPTY_STRUCT_CTOR(scalar_rsqrt_op)
15851 EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return Scalar(1)/numext::sqrt(a); }
15852 template <typename Packet>
15853 EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::prsqrt(a); }
15855 template<typename Scalar>
15856 struct functor_traits<scalar_rsqrt_op<Scalar> >
15858 Cost = 5 * NumTraits<Scalar>::MulCost,
15859 PacketAccess = packet_traits<Scalar>::HasRsqrt
15862 template<typename Scalar> struct scalar_cos_op {
15863 EIGEN_EMPTY_STRUCT_CTOR(scalar_cos_op)
15864 EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return numext::cos(a); }
15865 template <typename Packet>
15866 EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pcos(a); }
15868 template<typename Scalar>
15869 struct functor_traits<scalar_cos_op<Scalar> >
15872 Cost = 5 * NumTraits<Scalar>::MulCost,
15873 PacketAccess = packet_traits<Scalar>::HasCos
15876 template<typename Scalar> struct scalar_sin_op {
15877 EIGEN_EMPTY_STRUCT_CTOR(scalar_sin_op)
15878 EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::sin(a); }
15879 template <typename Packet>
15880 EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psin(a); }
15882 template<typename Scalar>
15883 struct functor_traits<scalar_sin_op<Scalar> >
15886 Cost = 5 * NumTraits<Scalar>::MulCost,
15887 PacketAccess = packet_traits<Scalar>::HasSin
15890 template<typename Scalar> struct scalar_tan_op {
15891 EIGEN_EMPTY_STRUCT_CTOR(scalar_tan_op)
15892 EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::tan(a); }
15893 template <typename Packet>
15894 EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::ptan(a); }
15896 template<typename Scalar>
15897 struct functor_traits<scalar_tan_op<Scalar> >
15900 Cost = 5 * NumTraits<Scalar>::MulCost,
15901 PacketAccess = packet_traits<Scalar>::HasTan
15904 template<typename Scalar> struct scalar_acos_op {
15905 EIGEN_EMPTY_STRUCT_CTOR(scalar_acos_op)
15906 EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::acos(a); }
15907 template <typename Packet>
15908 EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pacos(a); }
15910 template<typename Scalar>
15911 struct functor_traits<scalar_acos_op<Scalar> >
15914 Cost = 5 * NumTraits<Scalar>::MulCost,
15915 PacketAccess = packet_traits<Scalar>::HasACos
15918 template<typename Scalar> struct scalar_asin_op {
15919 EIGEN_EMPTY_STRUCT_CTOR(scalar_asin_op)
15920 EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::asin(a); }
15921 template <typename Packet>
15922 EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pasin(a); }
15924 template<typename Scalar>
15925 struct functor_traits<scalar_asin_op<Scalar> >
15928 Cost = 5 * NumTraits<Scalar>::MulCost,
15929 PacketAccess = packet_traits<Scalar>::HasASin
15932 template<typename Scalar> struct scalar_atan_op {
15933 EIGEN_EMPTY_STRUCT_CTOR(scalar_atan_op)
15934 EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::atan(a); }
15935 template <typename Packet>
15936 EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::patan(a); }
15938 template<typename Scalar>
15939 struct functor_traits<scalar_atan_op<Scalar> >
15942 Cost = 5 * NumTraits<Scalar>::MulCost,
15943 PacketAccess = packet_traits<Scalar>::HasATan
15946 template <typename Scalar>
15947 struct scalar_tanh_op {
15948 EIGEN_EMPTY_STRUCT_CTOR(scalar_tanh_op)
15949 EIGEN_DEVICE_FUNC inline const Scalar operator()(const Scalar& a) const { return numext::tanh(a); }
15950 template <typename Packet>
15951 EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& x) const { return ptanh(x); }
15953 template <typename Scalar>
15954 struct functor_traits<scalar_tanh_op<Scalar> > {
15956 PacketAccess = packet_traits<Scalar>::HasTanh,
15957 Cost = ( (EIGEN_FAST_MATH && is_same<Scalar,float>::value)
15958 #ifdef EIGEN_VECTORIZE_FMA
15959 ? (2 * NumTraits<Scalar>::AddCost +
15960 6 * NumTraits<Scalar>::MulCost +
15961 scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value)
15963 ? (11 * NumTraits<Scalar>::AddCost +
15964 11 * NumTraits<Scalar>::MulCost +
15965 scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value)
15967 : (6 * NumTraits<Scalar>::AddCost +
15968 3 * NumTraits<Scalar>::MulCost +
15969 2 * scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value +
15970 functor_traits<scalar_exp_op<Scalar> >::Cost))
15973 template<typename Scalar> struct scalar_sinh_op {
15974 EIGEN_EMPTY_STRUCT_CTOR(scalar_sinh_op)
15975 EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::sinh(a); }
15976 template <typename Packet>
15977 EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psinh(a); }
15979 template<typename Scalar>
15980 struct functor_traits<scalar_sinh_op<Scalar> >
15983 Cost = 5 * NumTraits<Scalar>::MulCost,
15984 PacketAccess = packet_traits<Scalar>::HasSinh
15987 template<typename Scalar> struct scalar_cosh_op {
15988 EIGEN_EMPTY_STRUCT_CTOR(scalar_cosh_op)
15989 EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::cosh(a); }
15990 template <typename Packet>
15991 EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pcosh(a); }
15993 template<typename Scalar>
15994 struct functor_traits<scalar_cosh_op<Scalar> >
15997 Cost = 5 * NumTraits<Scalar>::MulCost,
15998 PacketAccess = packet_traits<Scalar>::HasCosh
16001 template<typename Scalar>
16002 struct scalar_inverse_op {
16003 EIGEN_EMPTY_STRUCT_CTOR(scalar_inverse_op)
16004 EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return Scalar(1)/a; }
16005 template<typename Packet>
16006 EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const
16007 { return internal::pdiv(pset1<Packet>(Scalar(1)),a); }
16009 template<typename Scalar>
16010 struct functor_traits<scalar_inverse_op<Scalar> >
16011 { enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasDiv }; };
16012 template<typename Scalar>
16013 struct scalar_square_op {
16014 EIGEN_EMPTY_STRUCT_CTOR(scalar_square_op)
16015 EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a*a; }
16016 template<typename Packet>
16017 EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const
16018 { return internal::pmul(a,a); }
16020 template<typename Scalar>
16021 struct functor_traits<scalar_square_op<Scalar> >
16022 { enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasMul }; };
16023 template<typename Scalar>
16024 struct scalar_cube_op {
16025 EIGEN_EMPTY_STRUCT_CTOR(scalar_cube_op)
16026 EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a*a*a; }
16027 template<typename Packet>
16028 EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const
16029 { return internal::pmul(a,pmul(a,a)); }
16031 template<typename Scalar>
16032 struct functor_traits<scalar_cube_op<Scalar> >
16033 { enum { Cost = 2*NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasMul }; };
16034 template<typename Scalar> struct scalar_round_op {
16035 EIGEN_EMPTY_STRUCT_CTOR(scalar_round_op)
16036 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::round(a); }
16037 template <typename Packet>
16038 EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pround(a); }
16040 template<typename Scalar>
16041 struct functor_traits<scalar_round_op<Scalar> >
16044 Cost = NumTraits<Scalar>::MulCost,
16045 PacketAccess = packet_traits<Scalar>::HasRound
16048 template<typename Scalar> struct scalar_floor_op {
16049 EIGEN_EMPTY_STRUCT_CTOR(scalar_floor_op)
16050 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::floor(a); }
16051 template <typename Packet>
16052 EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pfloor(a); }
16054 template<typename Scalar>
16055 struct functor_traits<scalar_floor_op<Scalar> >
16058 Cost = NumTraits<Scalar>::MulCost,
16059 PacketAccess = packet_traits<Scalar>::HasFloor
16062 template<typename Scalar> struct scalar_ceil_op {
16063 EIGEN_EMPTY_STRUCT_CTOR(scalar_ceil_op)
16064 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::ceil(a); }
16065 template <typename Packet>
16066 EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pceil(a); }
16068 template<typename Scalar>
16069 struct functor_traits<scalar_ceil_op<Scalar> >
16072 Cost = NumTraits<Scalar>::MulCost,
16073 PacketAccess = packet_traits<Scalar>::HasCeil
16076 template<typename Scalar> struct scalar_isnan_op {
16077 EIGEN_EMPTY_STRUCT_CTOR(scalar_isnan_op)
16078 typedef bool result_type;
16079 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return (numext::isnan)(a); }
16081 template<typename Scalar>
16082 struct functor_traits<scalar_isnan_op<Scalar> >
16085 Cost = NumTraits<Scalar>::MulCost,
16086 PacketAccess = false
16089 template<typename Scalar> struct scalar_isinf_op {
16090 EIGEN_EMPTY_STRUCT_CTOR(scalar_isinf_op)
16091 typedef bool result_type;
16092 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return (numext::isinf)(a); }
16094 template<typename Scalar>
16095 struct functor_traits<scalar_isinf_op<Scalar> >
16098 Cost = NumTraits<Scalar>::MulCost,
16099 PacketAccess = false
16102 template<typename Scalar> struct scalar_isfinite_op {
16103 EIGEN_EMPTY_STRUCT_CTOR(scalar_isfinite_op)
16104 typedef bool result_type;
16105 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return (numext::isfinite)(a); }
16107 template<typename Scalar>
16108 struct functor_traits<scalar_isfinite_op<Scalar> >
16111 Cost = NumTraits<Scalar>::MulCost,
16112 PacketAccess = false
16115 template<typename Scalar> struct scalar_boolean_not_op {
16116 EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_not_op)
16117 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a) const { return !a; }
16119 template<typename Scalar>
16120 struct functor_traits<scalar_boolean_not_op<Scalar> > {
16122 Cost = NumTraits<bool>::AddCost,
16123 PacketAccess = false
16126 template<typename Scalar,bool iscpx=(NumTraits<Scalar>::IsComplex!=0) > struct scalar_sign_op;
16127 template<typename Scalar>
16128 struct scalar_sign_op<Scalar,false> {
16129 EIGEN_EMPTY_STRUCT_CTOR(scalar_sign_op)
16130 EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const
16132 return Scalar( (a>Scalar(0)) - (a<Scalar(0)) );
16135 template<typename Scalar>
16136 struct scalar_sign_op<Scalar,true> {
16137 EIGEN_EMPTY_STRUCT_CTOR(scalar_sign_op)
16138 EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const
16140 typedef typename NumTraits<Scalar>::Real real_type;
16141 real_type aa = numext::abs(a);
16142 if (aa==real_type(0))
16144 aa = real_type(1)/aa;
16145 return Scalar(real(a)*aa, imag(a)*aa );
16148 template<typename Scalar>
16149 struct functor_traits<scalar_sign_op<Scalar> >
16152 NumTraits<Scalar>::IsComplex
16153 ? ( 8*NumTraits<Scalar>::MulCost )
16154 : ( 3*NumTraits<Scalar>::AddCost),
16155 PacketAccess = packet_traits<Scalar>::HasSign
16159 template <typename T>
16160 struct scalar_logistic_op {
16161 EIGEN_EMPTY_STRUCT_CTOR(scalar_logistic_op)
16162 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(const T& x) const {
16163 const T one = T(1);
16164 return one / (one + numext::exp(-x));
16167 template <typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
16168 Packet packetOp(const Packet& x) const {
16169 const Packet one = pset1<Packet>(T(1));
16170 return pdiv(one, padd(one, pexp(pnegate(x))));
16173 template <typename T>
16174 struct functor_traits<scalar_logistic_op<T> > {
16176 Cost = NumTraits<T>::AddCost * 2 + NumTraits<T>::MulCost * 6,
16177 PacketAccess = packet_traits<T>::HasAdd && packet_traits<T>::HasDiv &&
16178 packet_traits<T>::HasNegate && packet_traits<T>::HasExp
16183 struct scalar_logistic_op<float> {
16184 EIGEN_EMPTY_STRUCT_CTOR(scalar_logistic_op)
16185 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float operator()(const float& x) const {
16186 const float one = 1.0f;
16187 return one / (one + numext::exp(-x));
16190 template <typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
16191 Packet packetOp(const Packet& _x) const {
16192 // Clamp the inputs to the range [-18, 18] since anything outside
16193 // this range is 0.0f or 1.0f in single-precision.
16194 const Packet x = pmax(pmin(_x, pset1<Packet>(18.0)), pset1<Packet>(-18.0));
16196 // The monomial coefficients of the numerator polynomial (odd).
16197 const Packet alpha_1 = pset1<Packet>(2.48287947061529e-01);
16198 const Packet alpha_3 = pset1<Packet>(8.51377133304701e-03);
16199 const Packet alpha_5 = pset1<Packet>(6.08574864600143e-05);
16200 const Packet alpha_7 = pset1<Packet>(1.15627324459942e-07);
16201 const Packet alpha_9 = pset1<Packet>(4.37031012579801e-11);
16203 // The monomial coefficients of the denominator polynomial (even).
16204 const Packet beta_0 = pset1<Packet>(9.93151921023180e-01);
16205 const Packet beta_2 = pset1<Packet>(1.16817656904453e-01);
16206 const Packet beta_4 = pset1<Packet>(1.70198817374094e-03);
16207 const Packet beta_6 = pset1<Packet>(6.29106785017040e-06);
16208 const Packet beta_8 = pset1<Packet>(5.76102136993427e-09);
16209 const Packet beta_10 = pset1<Packet>(6.10247389755681e-13);
16211 // Since the polynomials are odd/even, we need x^2.
16212 const Packet x2 = pmul(x, x);
16214 // Evaluate the numerator polynomial p.
16215 Packet p = pmadd(x2, alpha_9, alpha_7);
16216 p = pmadd(x2, p, alpha_5);
16217 p = pmadd(x2, p, alpha_3);
16218 p = pmadd(x2, p, alpha_1);
16221 // Evaluate the denominator polynomial p.
16222 Packet q = pmadd(x2, beta_10, beta_8);
16223 q = pmadd(x2, q, beta_6);
16224 q = pmadd(x2, q, beta_4);
16225 q = pmadd(x2, q, beta_2);
16226 q = pmadd(x2, q, beta_0);
16228 // Divide the numerator by the denominator and shift it up.
16229 return pmax(pmin(padd(pdiv(p, q), pset1<Packet>(0.5)), pset1<Packet>(1.0)),
16230 pset1<Packet>(0.0));
16236 // end #include "src/Core/functors/UnaryFunctors.h"
16237 // #include "src/Core/functors/NullaryFunctors.h"
16238 #ifndef EIGEN_NULLARY_FUNCTORS_H
16239 #define EIGEN_NULLARY_FUNCTORS_H
16241 namespace internal {
16242 template<typename Scalar>
16243 struct scalar_constant_op {
16244 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_constant_op(const scalar_constant_op& other) : m_other(other.m_other) { }
16245 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_constant_op(const Scalar& other) : m_other(other) { }
16246 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() () const { return m_other; }
16247 template<typename PacketType>
16248 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packetOp() const { return internal::pset1<PacketType>(m_other); }
16249 const Scalar m_other;
16251 template<typename Scalar>
16252 struct functor_traits<scalar_constant_op<Scalar> >
16253 { enum { Cost = 0 ,
16254 PacketAccess = packet_traits<Scalar>::Vectorizable, IsRepeatable = true }; };
16255 template<typename Scalar> struct scalar_identity_op {
16256 EIGEN_EMPTY_STRUCT_CTOR(scalar_identity_op)
16257 template<typename IndexType>
16258 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType row, IndexType col) const { return row==col ? Scalar(1) : Scalar(0); }
16260 template<typename Scalar>
16261 struct functor_traits<scalar_identity_op<Scalar> >
16262 { enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = false, IsRepeatable = true }; };
16263 template <typename Scalar, typename Packet, bool IsInteger> struct linspaced_op_impl;
16264 template <typename Scalar, typename Packet>
16265 struct linspaced_op_impl<Scalar,Packet,false>
16267 linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) :
16268 m_low(low), m_high(high), m_size1(num_steps==1 ? 1 : num_steps-1), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)),
16269 m_flip(numext::abs(high)<numext::abs(low))
16271 template<typename IndexType>
16272 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const {
16273 typedef typename NumTraits<Scalar>::Real RealScalar;
16275 return (i==0)? m_low : (m_high - RealScalar(m_size1-i)*m_step);
16277 return (i==m_size1)? m_high : (m_low + RealScalar(i)*m_step);
16279 template<typename IndexType>
16280 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(IndexType i) const
16284 Packet pi = plset<Packet>(Scalar(i-m_size1));
16285 Packet res = padd(pset1<Packet>(m_high), pmul(pset1<Packet>(m_step), pi));
16287 res = pinsertfirst(res, m_low);
16292 Packet pi = plset<Packet>(Scalar(i));
16293 Packet res = padd(pset1<Packet>(m_low), pmul(pset1<Packet>(m_step), pi));
16294 if(i==m_size1-unpacket_traits<Packet>::size+1)
16295 res = pinsertlast(res, m_high);
16299 const Scalar m_low;
16300 const Scalar m_high;
16301 const Index m_size1;
16302 const Scalar m_step;
16305 template <typename Scalar, typename Packet>
16306 struct linspaced_op_impl<Scalar,Packet,true>
16308 linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) :
16310 m_multiplier((high-low)/convert_index<Scalar>(num_steps<=1 ? 1 : num_steps-1)),
16311 m_divisor(convert_index<Scalar>((high>=low?num_steps:-num_steps)+(high-low))/((numext::abs(high-low)+1)==0?1:(numext::abs(high-low)+1))),
16312 m_use_divisor(num_steps>1 && (numext::abs(high-low)+1)<num_steps)
16314 template<typename IndexType>
16315 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
16316 const Scalar operator() (IndexType i) const
16318 if(m_use_divisor) return m_low + convert_index<Scalar>(i)/m_divisor;
16319 else return m_low + convert_index<Scalar>(i)*m_multiplier;
16321 const Scalar m_low;
16322 const Scalar m_multiplier;
16323 const Scalar m_divisor;
16324 const bool m_use_divisor;
16326 template <typename Scalar, typename PacketType> struct linspaced_op;
16327 template <typename Scalar, typename PacketType> struct functor_traits< linspaced_op<Scalar,PacketType> >
16332 PacketAccess = (!NumTraits<Scalar>::IsInteger) && packet_traits<Scalar>::HasSetLinear && packet_traits<Scalar>::HasBlend,
16333 IsRepeatable = true
16336 template <typename Scalar, typename PacketType> struct linspaced_op
16338 linspaced_op(const Scalar& low, const Scalar& high, Index num_steps)
16339 : impl((num_steps==1 ? high : low),high,num_steps)
16341 template<typename IndexType>
16342 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const { return impl(i); }
16343 template<typename Packet,typename IndexType>
16344 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(IndexType i) const { return impl.packetOp(i); }
16345 const linspaced_op_impl<Scalar,PacketType,NumTraits<Scalar>::IsInteger> impl;
16347 template<typename Functor> struct functor_has_linear_access { enum { ret = !has_binary_operator<Functor>::value }; };
16348 #if !( (EIGEN_COMP_MSVC>1600) || (EIGEN_GNUC_AT_LEAST(4,8)) || (EIGEN_COMP_ICC>=1600))
16349 template<typename Scalar,typename IndexType>
16350 struct has_nullary_operator<scalar_constant_op<Scalar>,IndexType> { enum { value = 1}; };
16351 template<typename Scalar,typename IndexType>
16352 struct has_unary_operator<scalar_constant_op<Scalar>,IndexType> { enum { value = 0}; };
16353 template<typename Scalar,typename IndexType>
16354 struct has_binary_operator<scalar_constant_op<Scalar>,IndexType> { enum { value = 0}; };
16355 template<typename Scalar,typename IndexType>
16356 struct has_nullary_operator<scalar_identity_op<Scalar>,IndexType> { enum { value = 0}; };
16357 template<typename Scalar,typename IndexType>
16358 struct has_unary_operator<scalar_identity_op<Scalar>,IndexType> { enum { value = 0}; };
16359 template<typename Scalar,typename IndexType>
16360 struct has_binary_operator<scalar_identity_op<Scalar>,IndexType> { enum { value = 1}; };
16361 template<typename Scalar, typename PacketType,typename IndexType>
16362 struct has_nullary_operator<linspaced_op<Scalar,PacketType>,IndexType> { enum { value = 0}; };
16363 template<typename Scalar, typename PacketType,typename IndexType>
16364 struct has_unary_operator<linspaced_op<Scalar,PacketType>,IndexType> { enum { value = 1}; };
16365 template<typename Scalar, typename PacketType,typename IndexType>
16366 struct has_binary_operator<linspaced_op<Scalar,PacketType>,IndexType> { enum { value = 0}; };
16367 template<typename Scalar,typename IndexType>
16368 struct has_nullary_operator<scalar_random_op<Scalar>,IndexType> { enum { value = 1}; };
16369 template<typename Scalar,typename IndexType>
16370 struct has_unary_operator<scalar_random_op<Scalar>,IndexType> { enum { value = 0}; };
16371 template<typename Scalar,typename IndexType>
16372 struct has_binary_operator<scalar_random_op<Scalar>,IndexType> { enum { value = 0}; };
16377 // end #include "src/Core/functors/NullaryFunctors.h"
16378 // #include "src/Core/functors/AssignmentFunctors.h"
16379 #ifndef EIGEN_ASSIGNMENT_FUNCTORS_H
16380 #define EIGEN_ASSIGNMENT_FUNCTORS_H
16382 namespace internal {
16383 template<typename DstScalar,typename SrcScalar> struct assign_op {
16384 EIGEN_EMPTY_STRUCT_CTOR(assign_op)
16385 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a = b; }
16386 template<int Alignment, typename Packet>
16387 EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
16388 { internal::pstoret<DstScalar,Packet,Alignment>(a,b); }
16390 template<typename DstScalar> struct assign_op<DstScalar,void> {};
16391 template<typename DstScalar,typename SrcScalar>
16392 struct functor_traits<assign_op<DstScalar,SrcScalar> > {
16394 Cost = NumTraits<DstScalar>::ReadCost,
16395 PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::Vectorizable && packet_traits<SrcScalar>::Vectorizable
16398 template<typename DstScalar,typename SrcScalar> struct add_assign_op {
16399 EIGEN_EMPTY_STRUCT_CTOR(add_assign_op)
16400 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a += b; }
16401 template<int Alignment, typename Packet>
16402 EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
16403 { internal::pstoret<DstScalar,Packet,Alignment>(a,internal::padd(internal::ploadt<Packet,Alignment>(a),b)); }
16405 template<typename DstScalar,typename SrcScalar>
16406 struct functor_traits<add_assign_op<DstScalar,SrcScalar> > {
16408 Cost = NumTraits<DstScalar>::ReadCost + NumTraits<DstScalar>::AddCost,
16409 PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::HasAdd
16412 template<typename DstScalar,typename SrcScalar> struct sub_assign_op {
16413 EIGEN_EMPTY_STRUCT_CTOR(sub_assign_op)
16414 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a -= b; }
16415 template<int Alignment, typename Packet>
16416 EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
16417 { internal::pstoret<DstScalar,Packet,Alignment>(a,internal::psub(internal::ploadt<Packet,Alignment>(a),b)); }
16419 template<typename DstScalar,typename SrcScalar>
16420 struct functor_traits<sub_assign_op<DstScalar,SrcScalar> > {
16422 Cost = NumTraits<DstScalar>::ReadCost + NumTraits<DstScalar>::AddCost,
16423 PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::HasSub
16426 template<typename DstScalar, typename SrcScalar=DstScalar>
16427 struct mul_assign_op {
16428 EIGEN_EMPTY_STRUCT_CTOR(mul_assign_op)
16429 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a *= b; }
16430 template<int Alignment, typename Packet>
16431 EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
16432 { internal::pstoret<DstScalar,Packet,Alignment>(a,internal::pmul(internal::ploadt<Packet,Alignment>(a),b)); }
16434 template<typename DstScalar, typename SrcScalar>
16435 struct functor_traits<mul_assign_op<DstScalar,SrcScalar> > {
16437 Cost = NumTraits<DstScalar>::ReadCost + NumTraits<DstScalar>::MulCost,
16438 PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::HasMul
16441 template<typename DstScalar, typename SrcScalar=DstScalar> struct div_assign_op {
16442 EIGEN_EMPTY_STRUCT_CTOR(div_assign_op)
16443 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a /= b; }
16444 template<int Alignment, typename Packet>
16445 EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
16446 { internal::pstoret<DstScalar,Packet,Alignment>(a,internal::pdiv(internal::ploadt<Packet,Alignment>(a),b)); }
16448 template<typename DstScalar, typename SrcScalar>
16449 struct functor_traits<div_assign_op<DstScalar,SrcScalar> > {
16451 Cost = NumTraits<DstScalar>::ReadCost + NumTraits<DstScalar>::MulCost,
16452 PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::HasDiv
16455 template<typename Scalar> struct swap_assign_op {
16456 EIGEN_EMPTY_STRUCT_CTOR(swap_assign_op)
16457 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const
16460 Scalar t=b; const_cast<Scalar&>(b)=a; a=t;
16463 swap(a,const_cast<Scalar&>(b));
16467 template<typename Scalar>
16468 struct functor_traits<swap_assign_op<Scalar> > {
16470 Cost = 3 * NumTraits<Scalar>::ReadCost,
16471 PacketAccess = packet_traits<Scalar>::Vectorizable
16477 // end #include "src/Core/functors/AssignmentFunctors.h"
16478 // #include "src/Core/DenseCoeffsBase.h"
16479 #ifndef EIGEN_DENSECOEFFSBASE_H
16480 #define EIGEN_DENSECOEFFSBASE_H
16482 namespace internal {
16483 template<typename T> struct add_const_on_value_type_if_arithmetic
16485 typedef typename conditional<is_arithmetic<T>::value, T, typename add_const_on_value_type<T>::type>::type type;
16488 template<typename Derived>
16489 class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
16492 typedef typename internal::traits<Derived>::StorageKind StorageKind;
16493 typedef typename internal::traits<Derived>::Scalar Scalar;
16494 typedef typename internal::packet_traits<Scalar>::type PacketScalar;
16495 typedef typename internal::conditional<bool(internal::traits<Derived>::Flags&LvalueBit),
16497 typename internal::conditional<internal::is_arithmetic<Scalar>::value, Scalar, const Scalar>::type
16498 >::type CoeffReturnType;
16499 typedef typename internal::add_const_on_value_type_if_arithmetic<
16500 typename internal::packet_traits<Scalar>::type
16501 >::type PacketReturnType;
16502 typedef EigenBase<Derived> Base;
16506 using Base::derived;
16508 EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner) const
16510 return int(Derived::RowsAtCompileTime) == 1 ? 0
16511 : int(Derived::ColsAtCompileTime) == 1 ? inner
16512 : int(Derived::Flags)&RowMajorBit ? outer
16516 EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner) const
16518 return int(Derived::ColsAtCompileTime) == 1 ? 0
16519 : int(Derived::RowsAtCompileTime) == 1 ? inner
16520 : int(Derived::Flags)&RowMajorBit ? inner
16524 EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const
16526 eigen_internal_assert(row >= 0 && row < rows()
16527 && col >= 0 && col < cols());
16528 return internal::evaluator<Derived>(derived()).coeff(row,col);
16531 EIGEN_STRONG_INLINE CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
16533 return coeff(rowIndexByOuterInner(outer, inner),
16534 colIndexByOuterInner(outer, inner));
16537 EIGEN_STRONG_INLINE CoeffReturnType operator()(Index row, Index col) const
16539 eigen_assert(row >= 0 && row < rows()
16540 && col >= 0 && col < cols());
16541 return coeff(row, col);
16544 EIGEN_STRONG_INLINE CoeffReturnType
16545 coeff(Index index) const
16547 EIGEN_STATIC_ASSERT(internal::evaluator<Derived>::Flags & LinearAccessBit,
16548 THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS)
16549 eigen_internal_assert(index >= 0 && index < size());
16550 return internal::evaluator<Derived>(derived()).coeff(index);
16553 EIGEN_STRONG_INLINE CoeffReturnType
16554 operator[](Index index) const
16556 EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime,
16557 THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD)
16558 eigen_assert(index >= 0 && index < size());
16559 return coeff(index);
16562 EIGEN_STRONG_INLINE CoeffReturnType
16563 operator()(Index index) const
16565 eigen_assert(index >= 0 && index < size());
16566 return coeff(index);
16569 EIGEN_STRONG_INLINE CoeffReturnType
16570 x() const { return (*this)[0]; }
16572 EIGEN_STRONG_INLINE CoeffReturnType
16575 EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=2, OUT_OF_RANGE_ACCESS);
16579 EIGEN_STRONG_INLINE CoeffReturnType
16582 EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=3, OUT_OF_RANGE_ACCESS);
16586 EIGEN_STRONG_INLINE CoeffReturnType
16589 EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=4, OUT_OF_RANGE_ACCESS);
16592 template<int LoadMode>
16593 EIGEN_STRONG_INLINE PacketReturnType packet(Index row, Index col) const
16595 typedef typename internal::packet_traits<Scalar>::type DefaultPacketType;
16596 eigen_internal_assert(row >= 0 && row < rows() && col >= 0 && col < cols());
16597 return internal::evaluator<Derived>(derived()).template packet<LoadMode,DefaultPacketType>(row,col);
16599 template<int LoadMode>
16600 EIGEN_STRONG_INLINE PacketReturnType packetByOuterInner(Index outer, Index inner) const
16602 return packet<LoadMode>(rowIndexByOuterInner(outer, inner),
16603 colIndexByOuterInner(outer, inner));
16605 template<int LoadMode>
16606 EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
16608 EIGEN_STATIC_ASSERT(internal::evaluator<Derived>::Flags & LinearAccessBit,
16609 THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS)
16610 typedef typename internal::packet_traits<Scalar>::type DefaultPacketType;
16611 eigen_internal_assert(index >= 0 && index < size());
16612 return internal::evaluator<Derived>(derived()).template packet<LoadMode,DefaultPacketType>(index);
16616 void coeffRefByOuterInner();
16617 void writePacket();
16618 void writePacketByOuterInner();
16620 void copyCoeffByOuterInner();
16622 void copyPacketByOuterInner();
16624 void innerStride();
16625 void outerStride();
16629 template<typename Derived>
16630 class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived, ReadOnlyAccessors>
16633 typedef DenseCoeffsBase<Derived, ReadOnlyAccessors> Base;
16634 typedef typename internal::traits<Derived>::StorageKind StorageKind;
16635 typedef typename internal::traits<Derived>::Scalar Scalar;
16636 typedef typename internal::packet_traits<Scalar>::type PacketScalar;
16637 typedef typename NumTraits<Scalar>::Real RealScalar;
16642 using Base::derived;
16643 using Base::rowIndexByOuterInner;
16644 using Base::colIndexByOuterInner;
16645 using Base::operator[];
16646 using Base::operator();
16652 EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col)
16654 eigen_internal_assert(row >= 0 && row < rows()
16655 && col >= 0 && col < cols());
16656 return internal::evaluator<Derived>(derived()).coeffRef(row,col);
16659 EIGEN_STRONG_INLINE Scalar&
16660 coeffRefByOuterInner(Index outer, Index inner)
16662 return coeffRef(rowIndexByOuterInner(outer, inner),
16663 colIndexByOuterInner(outer, inner));
16666 EIGEN_STRONG_INLINE Scalar&
16667 operator()(Index row, Index col)
16669 eigen_assert(row >= 0 && row < rows()
16670 && col >= 0 && col < cols());
16671 return coeffRef(row, col);
16674 EIGEN_STRONG_INLINE Scalar&
16675 coeffRef(Index index)
16677 EIGEN_STATIC_ASSERT(internal::evaluator<Derived>::Flags & LinearAccessBit,
16678 THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS)
16679 eigen_internal_assert(index >= 0 && index < size());
16680 return internal::evaluator<Derived>(derived()).coeffRef(index);
16683 EIGEN_STRONG_INLINE Scalar&
16684 operator[](Index index)
16686 EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime,
16687 THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD)
16688 eigen_assert(index >= 0 && index < size());
16689 return coeffRef(index);
16692 EIGEN_STRONG_INLINE Scalar&
16693 operator()(Index index)
16695 eigen_assert(index >= 0 && index < size());
16696 return coeffRef(index);
16699 EIGEN_STRONG_INLINE Scalar&
16700 x() { return (*this)[0]; }
16702 EIGEN_STRONG_INLINE Scalar&
16705 EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=2, OUT_OF_RANGE_ACCESS);
16709 EIGEN_STRONG_INLINE Scalar&
16712 EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=3, OUT_OF_RANGE_ACCESS);
16716 EIGEN_STRONG_INLINE Scalar&
16719 EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=4, OUT_OF_RANGE_ACCESS);
16723 template<typename Derived>
16724 class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived, ReadOnlyAccessors>
16727 typedef DenseCoeffsBase<Derived, ReadOnlyAccessors> Base;
16728 typedef typename internal::traits<Derived>::Scalar Scalar;
16729 typedef typename NumTraits<Scalar>::Real RealScalar;
16733 using Base::derived;
16735 inline Index innerStride() const
16737 return derived().innerStride();
16740 inline Index outerStride() const
16742 return derived().outerStride();
16744 inline Index stride() const
16746 return Derived::IsVectorAtCompileTime ? innerStride() : outerStride();
16749 inline Index rowStride() const
16751 return Derived::IsRowMajor ? outerStride() : innerStride();
16754 inline Index colStride() const
16756 return Derived::IsRowMajor ? innerStride() : outerStride();
16759 template<typename Derived>
16760 class DenseCoeffsBase<Derived, DirectWriteAccessors>
16761 : public DenseCoeffsBase<Derived, WriteAccessors>
16764 typedef DenseCoeffsBase<Derived, WriteAccessors> Base;
16765 typedef typename internal::traits<Derived>::Scalar Scalar;
16766 typedef typename NumTraits<Scalar>::Real RealScalar;
16770 using Base::derived;
16772 inline Index innerStride() const
16774 return derived().innerStride();
16777 inline Index outerStride() const
16779 return derived().outerStride();
16781 inline Index stride() const
16783 return Derived::IsVectorAtCompileTime ? innerStride() : outerStride();
16786 inline Index rowStride() const
16788 return Derived::IsRowMajor ? outerStride() : innerStride();
16791 inline Index colStride() const
16793 return Derived::IsRowMajor ? innerStride() : outerStride();
16796 namespace internal {
16797 template<int Alignment, typename Derived, bool JustReturnZero>
16798 struct first_aligned_impl
16800 static inline Index run(const Derived&)
16803 template<int Alignment, typename Derived>
16804 struct first_aligned_impl<Alignment, Derived, false>
16806 static inline Index run(const Derived& m)
16808 return internal::first_aligned<Alignment>(m.data(), m.size());
16811 template<int Alignment, typename Derived>
16812 static inline Index first_aligned(const DenseBase<Derived>& m)
16814 enum { ReturnZero = (int(evaluator<Derived>::Alignment) >= Alignment) || !(Derived::Flags & DirectAccessBit) };
16815 return first_aligned_impl<Alignment, Derived, ReturnZero>::run(m.derived());
16817 template<typename Derived>
16818 static inline Index first_default_aligned(const DenseBase<Derived>& m)
16820 typedef typename Derived::Scalar Scalar;
16821 typedef typename packet_traits<Scalar>::type DefaultPacketType;
16822 return internal::first_aligned<int(unpacket_traits<DefaultPacketType>::alignment),Derived>(m);
16824 template<typename Derived, bool HasDirectAccess = has_direct_access<Derived>::ret>
16825 struct inner_stride_at_compile_time
16827 enum { ret = traits<Derived>::InnerStrideAtCompileTime };
16829 template<typename Derived>
16830 struct inner_stride_at_compile_time<Derived, false>
16834 template<typename Derived, bool HasDirectAccess = has_direct_access<Derived>::ret>
16835 struct outer_stride_at_compile_time
16837 enum { ret = traits<Derived>::OuterStrideAtCompileTime };
16839 template<typename Derived>
16840 struct outer_stride_at_compile_time<Derived, false>
16847 // end #include "src/Core/DenseCoeffsBase.h"
16848 // #include "src/Core/DenseBase.h"
16849 #ifndef EIGEN_DENSEBASE_H
16850 #define EIGEN_DENSEBASE_H
16852 namespace internal {
16853 static inline void check_DenseIndex_is_signed() {
16854 EIGEN_STATIC_ASSERT(NumTraits<DenseIndex>::IsSigned,THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE);
16857 template<typename Derived> class DenseBase
16858 #ifndef EIGEN_PARSED_BY_DOXYGEN
16859 : public DenseCoeffsBase<Derived>
16861 : public DenseCoeffsBase<Derived,DirectWriteAccessors>
16865 typedef Eigen::InnerIterator<Derived> InnerIterator;
16866 typedef typename internal::traits<Derived>::StorageKind StorageKind;
16867 typedef typename internal::traits<Derived>::StorageIndex StorageIndex;
16868 typedef typename internal::traits<Derived>::Scalar Scalar;
16869 typedef Scalar value_type;
16870 typedef typename NumTraits<Scalar>::Real RealScalar;
16871 typedef DenseCoeffsBase<Derived> Base;
16872 using Base::derived;
16873 using Base::const_cast_derived;
16877 using Base::rowIndexByOuterInner;
16878 using Base::colIndexByOuterInner;
16880 using Base::coeffByOuterInner;
16881 using Base::operator();
16882 using Base::operator[];
16887 using Base::stride;
16888 using Base::innerStride;
16889 using Base::outerStride;
16890 using Base::rowStride;
16891 using Base::colStride;
16892 typedef typename Base::CoeffReturnType CoeffReturnType;
16894 RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
16895 ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
16896 SizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::RowsAtCompileTime,
16897 internal::traits<Derived>::ColsAtCompileTime>::ret),
16898 MaxRowsAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime,
16899 MaxColsAtCompileTime = internal::traits<Derived>::MaxColsAtCompileTime,
16900 MaxSizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::MaxRowsAtCompileTime,
16901 internal::traits<Derived>::MaxColsAtCompileTime>::ret),
16902 IsVectorAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime == 1
16903 || internal::traits<Derived>::MaxColsAtCompileTime == 1,
16904 Flags = internal::traits<Derived>::Flags,
16905 IsRowMajor = int(Flags) & RowMajorBit,
16906 InnerSizeAtCompileTime = int(IsVectorAtCompileTime) ? int(SizeAtCompileTime)
16907 : int(IsRowMajor) ? int(ColsAtCompileTime) : int(RowsAtCompileTime),
16908 InnerStrideAtCompileTime = internal::inner_stride_at_compile_time<Derived>::ret,
16909 OuterStrideAtCompileTime = internal::outer_stride_at_compile_time<Derived>::ret
16911 typedef typename internal::find_best_packet<Scalar,SizeAtCompileTime>::type PacketScalar;
16912 enum { IsPlainObjectBase = 0 };
16913 typedef Matrix<typename internal::traits<Derived>::Scalar,
16914 internal::traits<Derived>::RowsAtCompileTime,
16915 internal::traits<Derived>::ColsAtCompileTime,
16916 AutoAlign | (internal::traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
16917 internal::traits<Derived>::MaxRowsAtCompileTime,
16918 internal::traits<Derived>::MaxColsAtCompileTime
16920 typedef Array<typename internal::traits<Derived>::Scalar,
16921 internal::traits<Derived>::RowsAtCompileTime,
16922 internal::traits<Derived>::ColsAtCompileTime,
16923 AutoAlign | (internal::traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
16924 internal::traits<Derived>::MaxRowsAtCompileTime,
16925 internal::traits<Derived>::MaxColsAtCompileTime
16927 typedef typename internal::conditional<internal::is_same<typename internal::traits<Derived>::XprKind,MatrixXpr >::value,
16928 PlainMatrix, PlainArray>::type PlainObject;
16930 inline Index nonZeros() const { return size(); }
16932 Index outerSize() const
16934 return IsVectorAtCompileTime ? 1
16935 : int(IsRowMajor) ? this->rows() : this->cols();
16938 Index innerSize() const
16940 return IsVectorAtCompileTime ? this->size()
16941 : int(IsRowMajor) ? this->cols() : this->rows();
16944 void resize(Index newSize)
16946 EIGEN_ONLY_USED_FOR_DEBUG(newSize);
16947 eigen_assert(newSize == this->size()
16948 && "DenseBase::resize() does not actually allow to resize.");
16951 void resize(Index rows, Index cols)
16953 EIGEN_ONLY_USED_FOR_DEBUG(rows);
16954 EIGEN_ONLY_USED_FOR_DEBUG(cols);
16955 eigen_assert(rows == this->rows() && cols == this->cols()
16956 && "DenseBase::resize() does not actually allow to resize.");
16958 #ifndef EIGEN_PARSED_BY_DOXYGEN
16959 typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,PlainObject> ConstantReturnType;
16960 typedef CwiseNullaryOp<internal::linspaced_op<Scalar,PacketScalar>,PlainObject> SequentialLinSpacedReturnType;
16961 typedef CwiseNullaryOp<internal::linspaced_op<Scalar,PacketScalar>,PlainObject> RandomAccessLinSpacedReturnType;
16962 typedef Matrix<typename NumTraits<typename internal::traits<Derived>::Scalar>::Real, internal::traits<Derived>::ColsAtCompileTime, 1> EigenvaluesReturnType;
16964 template<typename OtherDerived>
16965 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
16966 Derived& operator=(const DenseBase<OtherDerived>& other);
16967 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
16968 Derived& operator=(const DenseBase& other);
16969 template<typename OtherDerived>
16971 Derived& operator=(const EigenBase<OtherDerived> &other);
16972 template<typename OtherDerived>
16974 Derived& operator+=(const EigenBase<OtherDerived> &other);
16975 template<typename OtherDerived>
16977 Derived& operator-=(const EigenBase<OtherDerived> &other);
16978 template<typename OtherDerived>
16980 Derived& operator=(const ReturnByValue<OtherDerived>& func);
16981 template<typename OtherDerived>
16983 Derived& lazyAssign(const DenseBase<OtherDerived>& other);
16985 CommaInitializer<Derived> operator<< (const Scalar& s);
16986 template<unsigned int Added,unsigned int Removed>
16988 const Derived& flagged() const
16989 { return derived(); }
16990 template<typename OtherDerived>
16992 CommaInitializer<Derived> operator<< (const DenseBase<OtherDerived>& other);
16993 typedef Transpose<Derived> TransposeReturnType;
16995 TransposeReturnType transpose();
16996 typedef typename internal::add_const<Transpose<const Derived> >::type ConstTransposeReturnType;
16998 ConstTransposeReturnType transpose() const;
17000 void transposeInPlace();
17001 EIGEN_DEVICE_FUNC static const ConstantReturnType
17002 Constant(Index rows, Index cols, const Scalar& value);
17003 EIGEN_DEVICE_FUNC static const ConstantReturnType
17004 Constant(Index size, const Scalar& value);
17005 EIGEN_DEVICE_FUNC static const ConstantReturnType
17006 Constant(const Scalar& value);
17007 EIGEN_DEVICE_FUNC static const SequentialLinSpacedReturnType
17008 LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high);
17009 EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType
17010 LinSpaced(Index size, const Scalar& low, const Scalar& high);
17011 EIGEN_DEVICE_FUNC static const SequentialLinSpacedReturnType
17012 LinSpaced(Sequential_t, const Scalar& low, const Scalar& high);
17013 EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType
17014 LinSpaced(const Scalar& low, const Scalar& high);
17015 template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
17016 static const CwiseNullaryOp<CustomNullaryOp, PlainObject>
17017 NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func);
17018 template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
17019 static const CwiseNullaryOp<CustomNullaryOp, PlainObject>
17020 NullaryExpr(Index size, const CustomNullaryOp& func);
17021 template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
17022 static const CwiseNullaryOp<CustomNullaryOp, PlainObject>
17023 NullaryExpr(const CustomNullaryOp& func);
17024 EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index rows, Index cols);
17025 EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index size);
17026 EIGEN_DEVICE_FUNC static const ConstantReturnType Zero();
17027 EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index rows, Index cols);
17028 EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index size);
17029 EIGEN_DEVICE_FUNC static const ConstantReturnType Ones();
17030 EIGEN_DEVICE_FUNC void fill(const Scalar& value);
17031 EIGEN_DEVICE_FUNC Derived& setConstant(const Scalar& value);
17032 EIGEN_DEVICE_FUNC Derived& setLinSpaced(Index size, const Scalar& low, const Scalar& high);
17033 EIGEN_DEVICE_FUNC Derived& setLinSpaced(const Scalar& low, const Scalar& high);
17034 EIGEN_DEVICE_FUNC Derived& setZero();
17035 EIGEN_DEVICE_FUNC Derived& setOnes();
17036 EIGEN_DEVICE_FUNC Derived& setRandom();
17037 template<typename OtherDerived> EIGEN_DEVICE_FUNC
17038 bool isApprox(const DenseBase<OtherDerived>& other,
17039 const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
17041 bool isMuchSmallerThan(const RealScalar& other,
17042 const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
17043 template<typename OtherDerived> EIGEN_DEVICE_FUNC
17044 bool isMuchSmallerThan(const DenseBase<OtherDerived>& other,
17045 const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
17046 EIGEN_DEVICE_FUNC bool isApproxToConstant(const Scalar& value, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
17047 EIGEN_DEVICE_FUNC bool isConstant(const Scalar& value, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
17048 EIGEN_DEVICE_FUNC bool isZero(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
17049 EIGEN_DEVICE_FUNC bool isOnes(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
17050 inline bool hasNaN() const;
17051 inline bool allFinite() const;
17052 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
17053 Derived& operator*=(const Scalar& other);
17054 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
17055 Derived& operator/=(const Scalar& other);
17056 typedef typename internal::add_const_on_value_type<typename internal::eval<Derived>::type>::type EvalReturnType;
17058 EIGEN_STRONG_INLINE EvalReturnType eval() const
17060 return typename internal::eval<Derived>::type(derived());
17062 template<typename OtherDerived>
17064 void swap(const DenseBase<OtherDerived>& other)
17066 EIGEN_STATIC_ASSERT(!OtherDerived::IsPlainObjectBase,THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY);
17067 eigen_assert(rows()==other.rows() && cols()==other.cols());
17068 call_assignment(derived(), other.const_cast_derived(), internal::swap_assign_op<Scalar>());
17070 template<typename OtherDerived>
17072 void swap(PlainObjectBase<OtherDerived>& other)
17074 eigen_assert(rows()==other.rows() && cols()==other.cols());
17075 call_assignment(derived(), other.derived(), internal::swap_assign_op<Scalar>());
17077 EIGEN_DEVICE_FUNC inline const NestByValue<Derived> nestByValue() const;
17078 EIGEN_DEVICE_FUNC inline const ForceAlignedAccess<Derived> forceAlignedAccess() const;
17079 EIGEN_DEVICE_FUNC inline ForceAlignedAccess<Derived> forceAlignedAccess();
17080 template<bool Enable> EIGEN_DEVICE_FUNC
17081 inline const typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf() const;
17082 template<bool Enable> EIGEN_DEVICE_FUNC
17083 inline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf();
17084 EIGEN_DEVICE_FUNC Scalar sum() const;
17085 EIGEN_DEVICE_FUNC Scalar mean() const;
17086 EIGEN_DEVICE_FUNC Scalar trace() const;
17087 EIGEN_DEVICE_FUNC Scalar prod() const;
17088 EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar minCoeff() const;
17089 EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar maxCoeff() const;
17090 template<typename IndexType> EIGEN_DEVICE_FUNC
17091 typename internal::traits<Derived>::Scalar minCoeff(IndexType* row, IndexType* col) const;
17092 template<typename IndexType> EIGEN_DEVICE_FUNC
17093 typename internal::traits<Derived>::Scalar maxCoeff(IndexType* row, IndexType* col) const;
17094 template<typename IndexType> EIGEN_DEVICE_FUNC
17095 typename internal::traits<Derived>::Scalar minCoeff(IndexType* index) const;
17096 template<typename IndexType> EIGEN_DEVICE_FUNC
17097 typename internal::traits<Derived>::Scalar maxCoeff(IndexType* index) const;
17098 template<typename BinaryOp>
17100 Scalar redux(const BinaryOp& func) const;
17101 template<typename Visitor>
17103 void visit(Visitor& func) const;
17104 inline const WithFormat<Derived> format(const IOFormat& fmt) const
17106 return WithFormat<Derived>(derived(), fmt);
17109 CoeffReturnType value() const
17111 EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
17112 eigen_assert(this->rows() == 1 && this->cols() == 1);
17113 return derived().coeff(0,0);
17115 EIGEN_DEVICE_FUNC bool all() const;
17116 EIGEN_DEVICE_FUNC bool any() const;
17117 EIGEN_DEVICE_FUNC Index count() const;
17118 typedef VectorwiseOp<Derived, Horizontal> RowwiseReturnType;
17119 typedef const VectorwiseOp<const Derived, Horizontal> ConstRowwiseReturnType;
17120 typedef VectorwiseOp<Derived, Vertical> ColwiseReturnType;
17121 typedef const VectorwiseOp<const Derived, Vertical> ConstColwiseReturnType;
17122 EIGEN_DEVICE_FUNC inline ConstRowwiseReturnType rowwise() const {
17123 return ConstRowwiseReturnType(derived());
17125 EIGEN_DEVICE_FUNC RowwiseReturnType rowwise();
17126 EIGEN_DEVICE_FUNC inline ConstColwiseReturnType colwise() const {
17127 return ConstColwiseReturnType(derived());
17129 EIGEN_DEVICE_FUNC ColwiseReturnType colwise();
17130 typedef CwiseNullaryOp<internal::scalar_random_op<Scalar>,PlainObject> RandomReturnType;
17131 static const RandomReturnType Random(Index rows, Index cols);
17132 static const RandomReturnType Random(Index size);
17133 static const RandomReturnType Random();
17134 template<typename ThenDerived,typename ElseDerived>
17135 const Select<Derived,ThenDerived,ElseDerived>
17136 select(const DenseBase<ThenDerived>& thenMatrix,
17137 const DenseBase<ElseDerived>& elseMatrix) const;
17138 template<typename ThenDerived>
17139 inline const Select<Derived,ThenDerived, typename ThenDerived::ConstantReturnType>
17140 select(const DenseBase<ThenDerived>& thenMatrix, const typename ThenDerived::Scalar& elseScalar) const;
17141 template<typename ElseDerived>
17142 inline const Select<Derived, typename ElseDerived::ConstantReturnType, ElseDerived >
17143 select(const typename ElseDerived::Scalar& thenScalar, const DenseBase<ElseDerived>& elseMatrix) const;
17144 template<int p> RealScalar lpNorm() const;
17145 template<int RowFactor, int ColFactor>
17147 const Replicate<Derived,RowFactor,ColFactor> replicate() const;
17149 const Replicate<Derived, Dynamic, Dynamic> replicate(Index rowFactor, Index colFactor) const
17151 return Replicate<Derived, Dynamic, Dynamic>(derived(), rowFactor, colFactor);
17153 typedef Reverse<Derived, BothDirections> ReverseReturnType;
17154 typedef const Reverse<const Derived, BothDirections> ConstReverseReturnType;
17155 EIGEN_DEVICE_FUNC ReverseReturnType reverse();
17156 EIGEN_DEVICE_FUNC ConstReverseReturnType reverse() const
17158 return ConstReverseReturnType(derived());
17160 EIGEN_DEVICE_FUNC void reverseInPlace();
17161 #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::DenseBase
17162 #define EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
17163 #define EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(COND)
17164 // # include "../plugins/BlockMethods.h"
17165 #ifndef EIGEN_PARSED_BY_DOXYGEN
17166 typedef Block<Derived, internal::traits<Derived>::RowsAtCompileTime, 1, !IsRowMajor> ColXpr;
17167 typedef const Block<const Derived, internal::traits<Derived>::RowsAtCompileTime, 1, !IsRowMajor> ConstColXpr;
17168 typedef Block<Derived, 1, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> RowXpr;
17169 typedef const Block<const Derived, 1, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> ConstRowXpr;
17170 typedef Block<Derived, internal::traits<Derived>::RowsAtCompileTime, Dynamic, !IsRowMajor> ColsBlockXpr;
17171 typedef const Block<const Derived, internal::traits<Derived>::RowsAtCompileTime, Dynamic, !IsRowMajor> ConstColsBlockXpr;
17172 typedef Block<Derived, Dynamic, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> RowsBlockXpr;
17173 typedef const Block<const Derived, Dynamic, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> ConstRowsBlockXpr;
17174 template<int N> struct NColsBlockXpr { typedef Block<Derived, internal::traits<Derived>::RowsAtCompileTime, N, !IsRowMajor> Type; };
17175 template<int N> struct ConstNColsBlockXpr { typedef const Block<const Derived, internal::traits<Derived>::RowsAtCompileTime, N, !IsRowMajor> Type; };
17176 template<int N> struct NRowsBlockXpr { typedef Block<Derived, N, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> Type; };
17177 template<int N> struct ConstNRowsBlockXpr { typedef const Block<const Derived, N, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> Type; };
17178 typedef Block<Derived> BlockXpr;
17179 typedef const Block<const Derived> ConstBlockXpr;
17180 template<int Rows, int Cols> struct FixedBlockXpr { typedef Block<Derived,Rows,Cols> Type; };
17181 template<int Rows, int Cols> struct ConstFixedBlockXpr { typedef Block<const Derived,Rows,Cols> Type; };
17182 typedef VectorBlock<Derived> SegmentReturnType;
17183 typedef const VectorBlock<const Derived> ConstSegmentReturnType;
17184 template<int Size> struct FixedSegmentReturnType { typedef VectorBlock<Derived, Size> Type; };
17185 template<int Size> struct ConstFixedSegmentReturnType { typedef const VectorBlock<const Derived, Size> Type; };
17187 EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
17189 inline BlockXpr block(Index startRow, Index startCol, Index blockRows, Index blockCols)
17191 return BlockXpr(derived(), startRow, startCol, blockRows, blockCols);
17194 inline const ConstBlockXpr block(Index startRow, Index startCol, Index blockRows, Index blockCols) const
17196 return ConstBlockXpr(derived(), startRow, startCol, blockRows, blockCols);
17198 EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
17200 inline BlockXpr topRightCorner(Index cRows, Index cCols)
17202 return BlockXpr(derived(), 0, cols() - cCols, cRows, cCols);
17205 inline const ConstBlockXpr topRightCorner(Index cRows, Index cCols) const
17207 return ConstBlockXpr(derived(), 0, cols() - cCols, cRows, cCols);
17209 EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
17210 template<int CRows, int CCols>
17212 inline typename FixedBlockXpr<CRows,CCols>::Type topRightCorner()
17214 return typename FixedBlockXpr<CRows,CCols>::Type(derived(), 0, cols() - CCols);
17216 template<int CRows, int CCols>
17218 inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topRightCorner() const
17220 return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), 0, cols() - CCols);
17222 EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
17223 template<int CRows, int CCols>
17224 inline typename FixedBlockXpr<CRows,CCols>::Type topRightCorner(Index cRows, Index cCols)
17226 return typename FixedBlockXpr<CRows,CCols>::Type(derived(), 0, cols() - cCols, cRows, cCols);
17228 template<int CRows, int CCols>
17229 inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topRightCorner(Index cRows, Index cCols) const
17231 return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), 0, cols() - cCols, cRows, cCols);
17233 EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
17235 inline BlockXpr topLeftCorner(Index cRows, Index cCols)
17237 return BlockXpr(derived(), 0, 0, cRows, cCols);
17240 inline const ConstBlockXpr topLeftCorner(Index cRows, Index cCols) const
17242 return ConstBlockXpr(derived(), 0, 0, cRows, cCols);
17244 EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
17245 template<int CRows, int CCols>
17247 inline typename FixedBlockXpr<CRows,CCols>::Type topLeftCorner()
17249 return typename FixedBlockXpr<CRows,CCols>::Type(derived(), 0, 0);
17251 template<int CRows, int CCols>
17253 inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topLeftCorner() const
17255 return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), 0, 0);
17257 EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
17258 template<int CRows, int CCols>
17259 inline typename FixedBlockXpr<CRows,CCols>::Type topLeftCorner(Index cRows, Index cCols)
17261 return typename FixedBlockXpr<CRows,CCols>::Type(derived(), 0, 0, cRows, cCols);
17263 template<int CRows, int CCols>
17264 inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topLeftCorner(Index cRows, Index cCols) const
17266 return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), 0, 0, cRows, cCols);
17268 EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
17270 inline BlockXpr bottomRightCorner(Index cRows, Index cCols)
17272 return BlockXpr(derived(), rows() - cRows, cols() - cCols, cRows, cCols);
17275 inline const ConstBlockXpr bottomRightCorner(Index cRows, Index cCols) const
17277 return ConstBlockXpr(derived(), rows() - cRows, cols() - cCols, cRows, cCols);
17279 EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
17280 template<int CRows, int CCols>
17282 inline typename FixedBlockXpr<CRows,CCols>::Type bottomRightCorner()
17284 return typename FixedBlockXpr<CRows,CCols>::Type(derived(), rows() - CRows, cols() - CCols);
17286 template<int CRows, int CCols>
17288 inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomRightCorner() const
17290 return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), rows() - CRows, cols() - CCols);
17292 EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
17293 template<int CRows, int CCols>
17294 inline typename FixedBlockXpr<CRows,CCols>::Type bottomRightCorner(Index cRows, Index cCols)
17296 return typename FixedBlockXpr<CRows,CCols>::Type(derived(), rows() - cRows, cols() - cCols, cRows, cCols);
17298 template<int CRows, int CCols>
17299 inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomRightCorner(Index cRows, Index cCols) const
17301 return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), rows() - cRows, cols() - cCols, cRows, cCols);
17303 EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
17305 inline BlockXpr bottomLeftCorner(Index cRows, Index cCols)
17307 return BlockXpr(derived(), rows() - cRows, 0, cRows, cCols);
17310 inline const ConstBlockXpr bottomLeftCorner(Index cRows, Index cCols) const
17312 return ConstBlockXpr(derived(), rows() - cRows, 0, cRows, cCols);
17314 EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
17315 template<int CRows, int CCols>
17317 inline typename FixedBlockXpr<CRows,CCols>::Type bottomLeftCorner()
17319 return typename FixedBlockXpr<CRows,CCols>::Type(derived(), rows() - CRows, 0);
17321 template<int CRows, int CCols>
17323 inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomLeftCorner() const
17325 return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), rows() - CRows, 0);
17327 EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
17328 template<int CRows, int CCols>
17329 inline typename FixedBlockXpr<CRows,CCols>::Type bottomLeftCorner(Index cRows, Index cCols)
17331 return typename FixedBlockXpr<CRows,CCols>::Type(derived(), rows() - cRows, 0, cRows, cCols);
17333 template<int CRows, int CCols>
17334 inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomLeftCorner(Index cRows, Index cCols) const
17336 return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), rows() - cRows, 0, cRows, cCols);
17338 EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)
17340 inline RowsBlockXpr topRows(Index n)
17342 return RowsBlockXpr(derived(), 0, 0, n, cols());
17345 inline ConstRowsBlockXpr topRows(Index n) const
17347 return ConstRowsBlockXpr(derived(), 0, 0, n, cols());
17349 EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)
17352 inline typename NRowsBlockXpr<N>::Type topRows(Index n = N)
17354 return typename NRowsBlockXpr<N>::Type(derived(), 0, 0, n, cols());
17358 inline typename ConstNRowsBlockXpr<N>::Type topRows(Index n = N) const
17360 return typename ConstNRowsBlockXpr<N>::Type(derived(), 0, 0, n, cols());
17362 EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)
17364 inline RowsBlockXpr bottomRows(Index n)
17366 return RowsBlockXpr(derived(), rows() - n, 0, n, cols());
17369 inline ConstRowsBlockXpr bottomRows(Index n) const
17371 return ConstRowsBlockXpr(derived(), rows() - n, 0, n, cols());
17373 EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)
17376 inline typename NRowsBlockXpr<N>::Type bottomRows(Index n = N)
17378 return typename NRowsBlockXpr<N>::Type(derived(), rows() - n, 0, n, cols());
17382 inline typename ConstNRowsBlockXpr<N>::Type bottomRows(Index n = N) const
17384 return typename ConstNRowsBlockXpr<N>::Type(derived(), rows() - n, 0, n, cols());
17386 EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)
17388 inline RowsBlockXpr middleRows(Index startRow, Index n)
17390 return RowsBlockXpr(derived(), startRow, 0, n, cols());
17393 inline ConstRowsBlockXpr middleRows(Index startRow, Index n) const
17395 return ConstRowsBlockXpr(derived(), startRow, 0, n, cols());
17397 EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)
17400 inline typename NRowsBlockXpr<N>::Type middleRows(Index startRow, Index n = N)
17402 return typename NRowsBlockXpr<N>::Type(derived(), startRow, 0, n, cols());
17406 inline typename ConstNRowsBlockXpr<N>::Type middleRows(Index startRow, Index n = N) const
17408 return typename ConstNRowsBlockXpr<N>::Type(derived(), startRow, 0, n, cols());
17410 EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)
17412 inline ColsBlockXpr leftCols(Index n)
17414 return ColsBlockXpr(derived(), 0, 0, rows(), n);
17417 inline ConstColsBlockXpr leftCols(Index n) const
17419 return ConstColsBlockXpr(derived(), 0, 0, rows(), n);
17421 EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)
17424 inline typename NColsBlockXpr<N>::Type leftCols(Index n = N)
17426 return typename NColsBlockXpr<N>::Type(derived(), 0, 0, rows(), n);
17430 inline typename ConstNColsBlockXpr<N>::Type leftCols(Index n = N) const
17432 return typename ConstNColsBlockXpr<N>::Type(derived(), 0, 0, rows(), n);
17434 EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)
17436 inline ColsBlockXpr rightCols(Index n)
17438 return ColsBlockXpr(derived(), 0, cols() - n, rows(), n);
17441 inline ConstColsBlockXpr rightCols(Index n) const
17443 return ConstColsBlockXpr(derived(), 0, cols() - n, rows(), n);
17445 EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)
17448 inline typename NColsBlockXpr<N>::Type rightCols(Index n = N)
17450 return typename NColsBlockXpr<N>::Type(derived(), 0, cols() - n, rows(), n);
17454 inline typename ConstNColsBlockXpr<N>::Type rightCols(Index n = N) const
17456 return typename ConstNColsBlockXpr<N>::Type(derived(), 0, cols() - n, rows(), n);
17458 EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)
17460 inline ColsBlockXpr middleCols(Index startCol, Index numCols)
17462 return ColsBlockXpr(derived(), 0, startCol, rows(), numCols);
17465 inline ConstColsBlockXpr middleCols(Index startCol, Index numCols) const
17467 return ConstColsBlockXpr(derived(), 0, startCol, rows(), numCols);
17469 EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)
17472 inline typename NColsBlockXpr<N>::Type middleCols(Index startCol, Index n = N)
17474 return typename NColsBlockXpr<N>::Type(derived(), 0, startCol, rows(), n);
17478 inline typename ConstNColsBlockXpr<N>::Type middleCols(Index startCol, Index n = N) const
17480 return typename ConstNColsBlockXpr<N>::Type(derived(), 0, startCol, rows(), n);
17482 EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
17483 template<int NRows, int NCols>
17485 inline typename FixedBlockXpr<NRows,NCols>::Type block(Index startRow, Index startCol)
17487 return typename FixedBlockXpr<NRows,NCols>::Type(derived(), startRow, startCol);
17489 template<int NRows, int NCols>
17491 inline const typename ConstFixedBlockXpr<NRows,NCols>::Type block(Index startRow, Index startCol) const
17493 return typename ConstFixedBlockXpr<NRows,NCols>::Type(derived(), startRow, startCol);
17495 EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
17496 template<int NRows, int NCols>
17497 inline typename FixedBlockXpr<NRows,NCols>::Type block(Index startRow, Index startCol,
17498 Index blockRows, Index blockCols)
17500 return typename FixedBlockXpr<NRows,NCols>::Type(derived(), startRow, startCol, blockRows, blockCols);
17502 template<int NRows, int NCols>
17503 inline const typename ConstFixedBlockXpr<NRows,NCols>::Type block(Index startRow, Index startCol,
17504 Index blockRows, Index blockCols) const
17506 return typename ConstFixedBlockXpr<NRows,NCols>::Type(derived(), startRow, startCol, blockRows, blockCols);
17508 EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)
17510 inline ColXpr col(Index i)
17512 return ColXpr(derived(), i);
17515 inline ConstColXpr col(Index i) const
17517 return ConstColXpr(derived(), i);
17519 EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)
17521 inline RowXpr row(Index i)
17523 return RowXpr(derived(), i);
17526 inline ConstRowXpr row(Index i) const
17528 return ConstRowXpr(derived(), i);
17531 inline SegmentReturnType segment(Index start, Index n)
17533 EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
17534 return SegmentReturnType(derived(), start, n);
17537 inline ConstSegmentReturnType segment(Index start, Index n) const
17539 EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
17540 return ConstSegmentReturnType(derived(), start, n);
17543 inline SegmentReturnType head(Index n)
17545 EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
17546 return SegmentReturnType(derived(), 0, n);
17549 inline ConstSegmentReturnType head(Index n) const
17551 EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
17552 return ConstSegmentReturnType(derived(), 0, n);
17555 inline SegmentReturnType tail(Index n)
17557 EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
17558 return SegmentReturnType(derived(), this->size() - n, n);
17561 inline ConstSegmentReturnType tail(Index n) const
17563 EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
17564 return ConstSegmentReturnType(derived(), this->size() - n, n);
17568 inline typename FixedSegmentReturnType<N>::Type segment(Index start, Index n = N)
17570 EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
17571 return typename FixedSegmentReturnType<N>::Type(derived(), start, n);
17575 inline typename ConstFixedSegmentReturnType<N>::Type segment(Index start, Index n = N) const
17577 EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
17578 return typename ConstFixedSegmentReturnType<N>::Type(derived(), start, n);
17582 inline typename FixedSegmentReturnType<N>::Type head(Index n = N)
17584 EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
17585 return typename FixedSegmentReturnType<N>::Type(derived(), 0, n);
17589 inline typename ConstFixedSegmentReturnType<N>::Type head(Index n = N) const
17591 EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
17592 return typename ConstFixedSegmentReturnType<N>::Type(derived(), 0, n);
17596 inline typename FixedSegmentReturnType<N>::Type tail(Index n = N)
17598 EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
17599 return typename FixedSegmentReturnType<N>::Type(derived(), size() - n);
17603 inline typename ConstFixedSegmentReturnType<N>::Type tail(Index n = N) const
17605 EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
17606 return typename ConstFixedSegmentReturnType<N>::Type(derived(), size() - n);
17608 // end # include "../plugins/BlockMethods.h"
17609 # ifdef EIGEN_DENSEBASE_PLUGIN
17610 # include EIGEN_DENSEBASE_PLUGIN
17612 #undef EIGEN_CURRENT_STORAGE_BASE_CLASS
17613 #undef EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
17614 #undef EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF
17615 template<typename Dest>
17617 inline void evalTo(Dest& ) const
17619 EIGEN_STATIC_ASSERT((internal::is_same<Dest,void>::value),THE_EVAL_EVALTO_FUNCTION_SHOULD_NEVER_BE_CALLED_FOR_DENSE_OBJECTS);
17622 EIGEN_DEVICE_FUNC DenseBase()
17624 #ifdef EIGEN_INTERNAL_DEBUGGING
17625 EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, int(IsRowMajor))
17626 && EIGEN_IMPLIES(MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1, int(!IsRowMajor))),
17627 INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION)
17631 EIGEN_DEVICE_FUNC explicit DenseBase(int);
17632 EIGEN_DEVICE_FUNC DenseBase(int,int);
17633 template<typename OtherDerived> EIGEN_DEVICE_FUNC explicit DenseBase(const DenseBase<OtherDerived>&);
17637 // end #include "src/Core/DenseBase.h"
17638 // #include "src/Core/MatrixBase.h"
17639 #ifndef EIGEN_MATRIXBASE_H
17640 #define EIGEN_MATRIXBASE_H
17642 template<typename Derived> class MatrixBase
17643 : public DenseBase<Derived>
17646 #ifndef EIGEN_PARSED_BY_DOXYGEN
17647 typedef MatrixBase StorageBaseType;
17648 typedef typename internal::traits<Derived>::StorageKind StorageKind;
17649 typedef typename internal::traits<Derived>::StorageIndex StorageIndex;
17650 typedef typename internal::traits<Derived>::Scalar Scalar;
17651 typedef typename internal::packet_traits<Scalar>::type PacketScalar;
17652 typedef typename NumTraits<Scalar>::Real RealScalar;
17653 typedef DenseBase<Derived> Base;
17654 using Base::RowsAtCompileTime;
17655 using Base::ColsAtCompileTime;
17656 using Base::SizeAtCompileTime;
17657 using Base::MaxRowsAtCompileTime;
17658 using Base::MaxColsAtCompileTime;
17659 using Base::MaxSizeAtCompileTime;
17660 using Base::IsVectorAtCompileTime;
17662 using Base::derived;
17663 using Base::const_cast_derived;
17668 using Base::coeffRef;
17669 using Base::lazyAssign;
17671 using Base::operator+=;
17672 using Base::operator-=;
17673 using Base::operator*=;
17674 using Base::operator/=;
17675 typedef typename Base::CoeffReturnType CoeffReturnType;
17676 typedef typename Base::ConstTransposeReturnType ConstTransposeReturnType;
17677 typedef typename Base::RowXpr RowXpr;
17678 typedef typename Base::ColXpr ColXpr;
17680 #ifndef EIGEN_PARSED_BY_DOXYGEN
17681 typedef Matrix<Scalar,EIGEN_SIZE_MAX(RowsAtCompileTime,ColsAtCompileTime),
17682 EIGEN_SIZE_MAX(RowsAtCompileTime,ColsAtCompileTime)> SquareMatrixType;
17685 inline Index diagonalSize() const { return (numext::mini)(rows(),cols()); }
17686 typedef typename Base::PlainObject PlainObject;
17687 #ifndef EIGEN_PARSED_BY_DOXYGEN
17688 typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,PlainObject> ConstantReturnType;
17689 typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
17690 CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, ConstTransposeReturnType>,
17691 ConstTransposeReturnType
17692 >::type AdjointReturnType;
17693 typedef Matrix<std::complex<RealScalar>, internal::traits<Derived>::ColsAtCompileTime, 1, ColMajor> EigenvaluesReturnType;
17694 typedef CwiseNullaryOp<internal::scalar_identity_op<Scalar>,PlainObject> IdentityReturnType;
17695 typedef Block<const CwiseNullaryOp<internal::scalar_identity_op<Scalar>, SquareMatrixType>,
17696 internal::traits<Derived>::RowsAtCompileTime,
17697 internal::traits<Derived>::ColsAtCompileTime> BasisReturnType;
17699 #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::MatrixBase
17700 #define EIGEN_DOC_UNARY_ADDONS(X,Y)
17701 // # include "../plugins/CommonCwiseUnaryOps.h"
17702 #ifndef EIGEN_PARSED_BY_DOXYGEN
17703 typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
17704 const CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, const Derived>,
17706 >::type ConjugateReturnType;
17707 typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
17708 const CwiseUnaryOp<internal::scalar_real_op<Scalar>, const Derived>,
17710 >::type RealReturnType;
17711 typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
17712 CwiseUnaryView<internal::scalar_real_ref_op<Scalar>, Derived>,
17714 >::type NonConstRealReturnType;
17715 typedef CwiseUnaryOp<internal::scalar_imag_op<Scalar>, const Derived> ImagReturnType;
17716 typedef CwiseUnaryView<internal::scalar_imag_ref_op<Scalar>, Derived> NonConstImagReturnType;
17717 typedef CwiseUnaryOp<internal::scalar_opposite_op<Scalar>, const Derived> NegativeReturnType;
17719 EIGEN_DOC_UNARY_ADDONS(operator-,opposite)
17721 inline const NegativeReturnType
17722 operator-() const { return NegativeReturnType(derived()); }
17723 template<class NewType> struct CastXpr { typedef typename internal::cast_return_type<Derived,const CwiseUnaryOp<internal::scalar_cast_op<Scalar, NewType>, const Derived> >::type Type; };
17724 EIGEN_DOC_UNARY_ADDONS(cast,conversion function)
17725 template<typename NewType>
17727 typename CastXpr<NewType>::Type
17730 return typename CastXpr<NewType>::Type(derived());
17732 EIGEN_DOC_UNARY_ADDONS(conjugate,complex conjugate)
17734 inline ConjugateReturnType
17737 return ConjugateReturnType(derived());
17739 EIGEN_DOC_UNARY_ADDONS(real,real part function)
17741 inline RealReturnType
17742 real() const { return RealReturnType(derived()); }
17743 EIGEN_DOC_UNARY_ADDONS(imag,imaginary part function)
17745 inline const ImagReturnType
17746 imag() const { return ImagReturnType(derived()); }
17747 EIGEN_DOC_UNARY_ADDONS(unaryExpr,unary function)
17748 template<typename CustomUnaryOp>
17750 inline const CwiseUnaryOp<CustomUnaryOp, const Derived>
17751 unaryExpr(const CustomUnaryOp& func = CustomUnaryOp()) const
17753 return CwiseUnaryOp<CustomUnaryOp, const Derived>(derived(), func);
17755 EIGEN_DOC_UNARY_ADDONS(unaryViewExpr,unary function)
17756 template<typename CustomViewOp>
17758 inline const CwiseUnaryView<CustomViewOp, const Derived>
17759 unaryViewExpr(const CustomViewOp& func = CustomViewOp()) const
17761 return CwiseUnaryView<CustomViewOp, const Derived>(derived(), func);
17763 EIGEN_DOC_UNARY_ADDONS(real,real part function)
17765 inline NonConstRealReturnType
17766 real() { return NonConstRealReturnType(derived()); }
17767 EIGEN_DOC_UNARY_ADDONS(imag,imaginary part function)
17769 inline NonConstImagReturnType
17770 imag() { return NonConstImagReturnType(derived()); }
17771 // end # include "../plugins/CommonCwiseUnaryOps.h"
17772 // # include "../plugins/CommonCwiseBinaryOps.h"
17773 EIGEN_MAKE_CWISE_BINARY_OP(operator-,difference)
17774 EIGEN_MAKE_CWISE_BINARY_OP(operator+,sum)
17775 template<typename CustomBinaryOp, typename OtherDerived>
17777 EIGEN_STRONG_INLINE const CwiseBinaryOp<CustomBinaryOp, const Derived, const OtherDerived>
17778 binaryExpr(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other, const CustomBinaryOp& func = CustomBinaryOp()) const
17780 return CwiseBinaryOp<CustomBinaryOp, const Derived, const OtherDerived>(derived(), other.derived(), func);
17782 #ifndef EIGEN_PARSED_BY_DOXYGEN
17783 EIGEN_MAKE_SCALAR_BINARY_OP(operator*,product)
17785 template<typename T>
17786 const CwiseBinaryOp<internal::scalar_product_op<Scalar,T>,Derived,Constant<T> > operator*(const T& scalar) const;
17787 template<typename T> friend
17788 const CwiseBinaryOp<internal::scalar_product_op<T,Scalar>,Constant<T>,Derived> operator*(const T& scalar, const StorageBaseType& expr);
17790 #ifndef EIGEN_PARSED_BY_DOXYGEN
17791 EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(operator/,quotient)
17793 template<typename T>
17794 const CwiseBinaryOp<internal::scalar_quotient_op<Scalar,T>,Derived,Constant<T> > operator/(const T& scalar) const;
17796 template<typename OtherDerived>
17798 inline const CwiseBinaryOp<internal::scalar_boolean_and_op, const Derived, const OtherDerived>
17799 operator&&(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
17801 EIGEN_STATIC_ASSERT((internal::is_same<bool,Scalar>::value && internal::is_same<bool,typename OtherDerived::Scalar>::value),
17802 THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL);
17803 return CwiseBinaryOp<internal::scalar_boolean_and_op, const Derived, const OtherDerived>(derived(),other.derived());
17805 template<typename OtherDerived>
17807 inline const CwiseBinaryOp<internal::scalar_boolean_or_op, const Derived, const OtherDerived>
17808 operator||(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
17810 EIGEN_STATIC_ASSERT((internal::is_same<bool,Scalar>::value && internal::is_same<bool,typename OtherDerived::Scalar>::value),
17811 THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL);
17812 return CwiseBinaryOp<internal::scalar_boolean_or_op, const Derived, const OtherDerived>(derived(),other.derived());
17814 // end # include "../plugins/CommonCwiseBinaryOps.h"
17815 // # include "../plugins/MatrixCwiseUnaryOps.h"
17816 typedef CwiseUnaryOp<internal::scalar_abs_op<Scalar>, const Derived> CwiseAbsReturnType;
17817 typedef CwiseUnaryOp<internal::scalar_abs2_op<Scalar>, const Derived> CwiseAbs2ReturnType;
17818 typedef CwiseUnaryOp<internal::scalar_sqrt_op<Scalar>, const Derived> CwiseSqrtReturnType;
17819 typedef CwiseUnaryOp<internal::scalar_sign_op<Scalar>, const Derived> CwiseSignReturnType;
17820 typedef CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const Derived> CwiseInverseReturnType;
17821 EIGEN_DOC_UNARY_ADDONS(cwiseAbs,absolute value)
17823 EIGEN_STRONG_INLINE const CwiseAbsReturnType
17824 cwiseAbs() const { return CwiseAbsReturnType(derived()); }
17825 EIGEN_DOC_UNARY_ADDONS(cwiseAbs2,squared absolute value)
17827 EIGEN_STRONG_INLINE const CwiseAbs2ReturnType
17828 cwiseAbs2() const { return CwiseAbs2ReturnType(derived()); }
17829 EIGEN_DOC_UNARY_ADDONS(cwiseSqrt,square-root)
17831 inline const CwiseSqrtReturnType
17832 cwiseSqrt() const { return CwiseSqrtReturnType(derived()); }
17833 EIGEN_DOC_UNARY_ADDONS(cwiseSign,sign function)
17835 inline const CwiseSignReturnType
17836 cwiseSign() const { return CwiseSignReturnType(derived()); }
17837 EIGEN_DOC_UNARY_ADDONS(cwiseInverse,inverse)
17839 inline const CwiseInverseReturnType
17840 cwiseInverse() const { return CwiseInverseReturnType(derived()); }
17841 // end # include "../plugins/MatrixCwiseUnaryOps.h"
17842 // # include "../plugins/MatrixCwiseBinaryOps.h"
17843 template<typename OtherDerived>
17845 EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product)
17846 cwiseProduct(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
17848 return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product)(derived(), other.derived());
17850 template<typename OtherDerived>
17852 inline const CwiseBinaryOp<std::equal_to<Scalar>, const Derived, const OtherDerived>
17853 cwiseEqual(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
17855 return CwiseBinaryOp<std::equal_to<Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
17857 template<typename OtherDerived>
17859 inline const CwiseBinaryOp<std::not_equal_to<Scalar>, const Derived, const OtherDerived>
17860 cwiseNotEqual(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
17862 return CwiseBinaryOp<std::not_equal_to<Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
17864 template<typename OtherDerived>
17866 EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_min_op<Scalar,Scalar>, const Derived, const OtherDerived>
17867 cwiseMin(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
17869 return CwiseBinaryOp<internal::scalar_min_op<Scalar,Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
17872 EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_min_op<Scalar,Scalar>, const Derived, const ConstantReturnType>
17873 cwiseMin(const Scalar &other) const
17875 return cwiseMin(Derived::Constant(rows(), cols(), other));
17877 template<typename OtherDerived>
17879 EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_max_op<Scalar,Scalar>, const Derived, const OtherDerived>
17880 cwiseMax(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
17882 return CwiseBinaryOp<internal::scalar_max_op<Scalar,Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
17885 EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_max_op<Scalar,Scalar>, const Derived, const ConstantReturnType>
17886 cwiseMax(const Scalar &other) const
17888 return cwiseMax(Derived::Constant(rows(), cols(), other));
17890 template<typename OtherDerived>
17892 EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_quotient_op<Scalar>, const Derived, const OtherDerived>
17893 cwiseQuotient(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
17895 return CwiseBinaryOp<internal::scalar_quotient_op<Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
17897 typedef CwiseBinaryOp<internal::scalar_cmp_op<Scalar,Scalar,internal::cmp_EQ>, const Derived, const ConstantReturnType> CwiseScalarEqualReturnType;
17899 inline const CwiseScalarEqualReturnType
17900 cwiseEqual(const Scalar& s) const
17902 return CwiseScalarEqualReturnType(derived(), Derived::Constant(rows(), cols(), s), internal::scalar_cmp_op<Scalar,Scalar,internal::cmp_EQ>());
17904 // end # include "../plugins/MatrixCwiseBinaryOps.h"
17905 # ifdef EIGEN_MATRIXBASE_PLUGIN
17906 # include EIGEN_MATRIXBASE_PLUGIN
17908 #undef EIGEN_CURRENT_STORAGE_BASE_CLASS
17909 #undef EIGEN_DOC_UNARY_ADDONS
17910 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
17911 Derived& operator=(const MatrixBase& other);
17912 template <typename OtherDerived>
17913 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
17914 Derived& operator=(const DenseBase<OtherDerived>& other);
17915 template <typename OtherDerived>
17917 Derived& operator=(const EigenBase<OtherDerived>& other);
17918 template<typename OtherDerived>
17920 Derived& operator=(const ReturnByValue<OtherDerived>& other);
17921 template<typename OtherDerived>
17922 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
17923 Derived& operator+=(const MatrixBase<OtherDerived>& other);
17924 template<typename OtherDerived>
17925 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
17926 Derived& operator-=(const MatrixBase<OtherDerived>& other);
17928 template<typename OtherDerived>
17930 const Product<Derived,OtherDerived,LazyProduct>
17931 operator*(const MatrixBase<OtherDerived> &other) const
17932 { return this->lazyProduct(other); }
17934 template<typename OtherDerived>
17935 const Product<Derived,OtherDerived>
17936 operator*(const MatrixBase<OtherDerived> &other) const;
17938 template<typename OtherDerived>
17940 const Product<Derived,OtherDerived,LazyProduct>
17941 lazyProduct(const MatrixBase<OtherDerived> &other) const;
17942 template<typename OtherDerived>
17943 Derived& operator*=(const EigenBase<OtherDerived>& other);
17944 template<typename OtherDerived>
17945 void applyOnTheLeft(const EigenBase<OtherDerived>& other);
17946 template<typename OtherDerived>
17947 void applyOnTheRight(const EigenBase<OtherDerived>& other);
17948 template<typename DiagonalDerived>
17950 const Product<Derived, DiagonalDerived, LazyProduct>
17951 operator*(const DiagonalBase<DiagonalDerived> &diagonal) const;
17952 template<typename OtherDerived>
17954 typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
17955 dot(const MatrixBase<OtherDerived>& other) const;
17956 EIGEN_DEVICE_FUNC RealScalar squaredNorm() const;
17957 EIGEN_DEVICE_FUNC RealScalar norm() const;
17958 RealScalar stableNorm() const;
17959 RealScalar blueNorm() const;
17960 RealScalar hypotNorm() const;
17961 EIGEN_DEVICE_FUNC const PlainObject normalized() const;
17962 EIGEN_DEVICE_FUNC const PlainObject stableNormalized() const;
17963 EIGEN_DEVICE_FUNC void normalize();
17964 EIGEN_DEVICE_FUNC void stableNormalize();
17965 EIGEN_DEVICE_FUNC const AdjointReturnType adjoint() const;
17966 EIGEN_DEVICE_FUNC void adjointInPlace();
17967 typedef Diagonal<Derived> DiagonalReturnType;
17969 DiagonalReturnType diagonal();
17970 typedef typename internal::add_const<Diagonal<const Derived> >::type ConstDiagonalReturnType;
17972 ConstDiagonalReturnType diagonal() const;
17973 template<int Index> struct DiagonalIndexReturnType { typedef Diagonal<Derived,Index> Type; };
17974 template<int Index> struct ConstDiagonalIndexReturnType { typedef const Diagonal<const Derived,Index> Type; };
17975 template<int Index>
17977 typename DiagonalIndexReturnType<Index>::Type diagonal();
17978 template<int Index>
17980 typename ConstDiagonalIndexReturnType<Index>::Type diagonal() const;
17981 typedef Diagonal<Derived,DynamicIndex> DiagonalDynamicIndexReturnType;
17982 typedef typename internal::add_const<Diagonal<const Derived,DynamicIndex> >::type ConstDiagonalDynamicIndexReturnType;
17984 DiagonalDynamicIndexReturnType diagonal(Index index);
17986 ConstDiagonalDynamicIndexReturnType diagonal(Index index) const;
17987 template<unsigned int Mode> struct TriangularViewReturnType { typedef TriangularView<Derived, Mode> Type; };
17988 template<unsigned int Mode> struct ConstTriangularViewReturnType { typedef const TriangularView<const Derived, Mode> Type; };
17989 template<unsigned int Mode>
17991 typename TriangularViewReturnType<Mode>::Type triangularView();
17992 template<unsigned int Mode>
17994 typename ConstTriangularViewReturnType<Mode>::Type triangularView() const;
17995 template<unsigned int UpLo> struct SelfAdjointViewReturnType { typedef SelfAdjointView<Derived, UpLo> Type; };
17996 template<unsigned int UpLo> struct ConstSelfAdjointViewReturnType { typedef const SelfAdjointView<const Derived, UpLo> Type; };
17997 template<unsigned int UpLo>
17999 typename SelfAdjointViewReturnType<UpLo>::Type selfadjointView();
18000 template<unsigned int UpLo>
18002 typename ConstSelfAdjointViewReturnType<UpLo>::Type selfadjointView() const;
18003 const SparseView<Derived> sparseView(const Scalar& m_reference = Scalar(0),
18004 const typename NumTraits<Scalar>::Real& m_epsilon = NumTraits<Scalar>::dummy_precision()) const;
18005 EIGEN_DEVICE_FUNC static const IdentityReturnType Identity();
18006 EIGEN_DEVICE_FUNC static const IdentityReturnType Identity(Index rows, Index cols);
18007 EIGEN_DEVICE_FUNC static const BasisReturnType Unit(Index size, Index i);
18008 EIGEN_DEVICE_FUNC static const BasisReturnType Unit(Index i);
18009 EIGEN_DEVICE_FUNC static const BasisReturnType UnitX();
18010 EIGEN_DEVICE_FUNC static const BasisReturnType UnitY();
18011 EIGEN_DEVICE_FUNC static const BasisReturnType UnitZ();
18012 EIGEN_DEVICE_FUNC static const BasisReturnType UnitW();
18014 const DiagonalWrapper<const Derived> asDiagonal() const;
18015 const PermutationWrapper<const Derived> asPermutation() const;
18017 Derived& setIdentity();
18019 Derived& setIdentity(Index rows, Index cols);
18020 bool isIdentity(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
18021 bool isDiagonal(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
18022 bool isUpperTriangular(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
18023 bool isLowerTriangular(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
18024 template<typename OtherDerived>
18025 bool isOrthogonal(const MatrixBase<OtherDerived>& other,
18026 const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
18027 bool isUnitary(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
18028 template<typename OtherDerived>
18029 EIGEN_DEVICE_FUNC inline bool operator==(const MatrixBase<OtherDerived>& other) const
18030 { return cwiseEqual(other).all(); }
18031 template<typename OtherDerived>
18032 EIGEN_DEVICE_FUNC inline bool operator!=(const MatrixBase<OtherDerived>& other) const
18033 { return cwiseNotEqual(other).any(); }
18034 NoAlias<Derived,Eigen::MatrixBase > noalias();
18035 inline const Derived& forceAlignedAccess() const { return derived(); }
18036 inline Derived& forceAlignedAccess() { return derived(); }
18037 template<bool Enable> inline const Derived& forceAlignedAccessIf() const { return derived(); }
18038 template<bool Enable> inline Derived& forceAlignedAccessIf() { return derived(); }
18039 EIGEN_DEVICE_FUNC Scalar trace() const;
18040 template<int p> EIGEN_DEVICE_FUNC RealScalar lpNorm() const;
18041 EIGEN_DEVICE_FUNC MatrixBase<Derived>& matrix() { return *this; }
18042 EIGEN_DEVICE_FUNC const MatrixBase<Derived>& matrix() const { return *this; }
18043 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ArrayWrapper<Derived> array() { return ArrayWrapper<Derived>(derived()); }
18044 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const ArrayWrapper<const Derived> array() const { return ArrayWrapper<const Derived>(derived()); }
18045 inline const FullPivLU<PlainObject> fullPivLu() const;
18046 inline const PartialPivLU<PlainObject> partialPivLu() const;
18047 inline const PartialPivLU<PlainObject> lu() const;
18048 inline const Inverse<Derived> inverse() const;
18049 template<typename ResultType>
18050 inline void computeInverseAndDetWithCheck(
18051 ResultType& inverse,
18052 typename ResultType::Scalar& determinant,
18054 const RealScalar& absDeterminantThreshold = NumTraits<Scalar>::dummy_precision()
18056 template<typename ResultType>
18057 inline void computeInverseWithCheck(
18058 ResultType& inverse,
18060 const RealScalar& absDeterminantThreshold = NumTraits<Scalar>::dummy_precision()
18062 Scalar determinant() const;
18063 inline const LLT<PlainObject> llt() const;
18064 inline const LDLT<PlainObject> ldlt() const;
18065 inline const HouseholderQR<PlainObject> householderQr() const;
18066 inline const ColPivHouseholderQR<PlainObject> colPivHouseholderQr() const;
18067 inline const FullPivHouseholderQR<PlainObject> fullPivHouseholderQr() const;
18068 inline const CompleteOrthogonalDecomposition<PlainObject> completeOrthogonalDecomposition() const;
18069 inline EigenvaluesReturnType eigenvalues() const;
18070 inline RealScalar operatorNorm() const;
18071 inline JacobiSVD<PlainObject> jacobiSvd(unsigned int computationOptions = 0) const;
18072 inline BDCSVD<PlainObject> bdcSvd(unsigned int computationOptions = 0) const;
18073 #ifndef EIGEN_PARSED_BY_DOXYGEN
18074 template<typename OtherDerived> struct cross_product_return_type {
18075 typedef typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType Scalar;
18076 typedef Matrix<Scalar,MatrixBase::RowsAtCompileTime,MatrixBase::ColsAtCompileTime> type;
18079 template<typename OtherDerived>
18081 #ifndef EIGEN_PARSED_BY_DOXYGEN
18082 inline typename cross_product_return_type<OtherDerived>::type
18086 cross(const MatrixBase<OtherDerived>& other) const;
18087 template<typename OtherDerived>
18089 inline PlainObject cross3(const MatrixBase<OtherDerived>& other) const;
18091 inline PlainObject unitOrthogonal(void) const;
18093 inline Matrix<Scalar,3,1> eulerAngles(Index a0, Index a1, Index a2) const;
18094 enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1&&RowsAtCompileTime==1 ? ((internal::traits<Derived>::Flags&RowMajorBit)==RowMajorBit ? Horizontal : Vertical)
18095 : ColsAtCompileTime==1 ? Vertical : Horizontal };
18096 typedef Homogeneous<Derived, HomogeneousReturnTypeDirection> HomogeneousReturnType;
18098 inline HomogeneousReturnType homogeneous() const;
18100 SizeMinusOne = SizeAtCompileTime==Dynamic ? Dynamic : SizeAtCompileTime-1
18102 typedef Block<const Derived,
18103 internal::traits<Derived>::ColsAtCompileTime==1 ? SizeMinusOne : 1,
18104 internal::traits<Derived>::ColsAtCompileTime==1 ? 1 : SizeMinusOne> ConstStartMinusOne;
18105 typedef EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(ConstStartMinusOne,Scalar,quotient) HNormalizedReturnType;
18107 inline const HNormalizedReturnType hnormalized() const;
18108 void makeHouseholderInPlace(Scalar& tau, RealScalar& beta);
18109 template<typename EssentialPart>
18110 void makeHouseholder(EssentialPart& essential,
18111 Scalar& tau, RealScalar& beta) const;
18112 template<typename EssentialPart>
18113 void applyHouseholderOnTheLeft(const EssentialPart& essential,
18115 Scalar* workspace);
18116 template<typename EssentialPart>
18117 void applyHouseholderOnTheRight(const EssentialPart& essential,
18119 Scalar* workspace);
18120 template<typename OtherScalar>
18121 void applyOnTheLeft(Index p, Index q, const JacobiRotation<OtherScalar>& j);
18122 template<typename OtherScalar>
18123 void applyOnTheRight(Index p, Index q, const JacobiRotation<OtherScalar>& j);
18124 template<typename OtherDerived>
18125 EIGEN_STRONG_INLINE const typename SparseMatrixBase<OtherDerived>::template CwiseProductDenseReturnType<Derived>::Type
18126 cwiseProduct(const SparseMatrixBase<OtherDerived> &other) const
18128 return other.cwiseProduct(derived());
18130 typedef typename internal::stem_function<Scalar>::type StemFunction;
18131 const MatrixExponentialReturnValue<Derived> exp() const;
18132 const MatrixFunctionReturnValue<Derived> matrixFunction(StemFunction f) const;
18133 const MatrixFunctionReturnValue<Derived> cosh() const;
18134 const MatrixFunctionReturnValue<Derived> sinh() const;
18135 const MatrixFunctionReturnValue<Derived> cos() const;
18136 const MatrixFunctionReturnValue<Derived> sin() const;
18137 const MatrixSquareRootReturnValue<Derived> sqrt() const;
18138 const MatrixLogarithmReturnValue<Derived> log() const;
18139 const MatrixPowerReturnValue<Derived> pow(const RealScalar& p) const;
18140 const MatrixComplexPowerReturnValue<Derived> pow(const std::complex<RealScalar>& p) const;
18142 EIGEN_DEVICE_FUNC MatrixBase() : Base() {}
18144 EIGEN_DEVICE_FUNC explicit MatrixBase(int);
18145 EIGEN_DEVICE_FUNC MatrixBase(int,int);
18146 template<typename OtherDerived> EIGEN_DEVICE_FUNC explicit MatrixBase(const MatrixBase<OtherDerived>&);
18148 template<typename OtherDerived> Derived& operator+=(const ArrayBase<OtherDerived>& )
18149 {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
18150 template<typename OtherDerived> Derived& operator-=(const ArrayBase<OtherDerived>& )
18151 {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
18153 template<typename Derived>
18154 template<typename OtherDerived>
18156 MatrixBase<Derived>::operator*=(const EigenBase<OtherDerived> &other)
18158 other.derived().applyThisOnTheRight(derived());
18161 template<typename Derived>
18162 template<typename OtherDerived>
18163 inline void MatrixBase<Derived>::applyOnTheRight(const EigenBase<OtherDerived> &other)
18165 other.derived().applyThisOnTheRight(derived());
18167 template<typename Derived>
18168 template<typename OtherDerived>
18169 inline void MatrixBase<Derived>::applyOnTheLeft(const EigenBase<OtherDerived> &other)
18171 other.derived().applyThisOnTheLeft(derived());
18175 // end #include "src/Core/MatrixBase.h"
18176 // #include "src/Core/EigenBase.h"
18177 #ifndef EIGEN_EIGENBASE_H
18178 #define EIGEN_EIGENBASE_H
18180 template<typename Derived> struct EigenBase
18182 typedef Eigen::Index Index;
18183 typedef typename internal::traits<Derived>::StorageKind StorageKind;
18185 Derived& derived() { return *static_cast<Derived*>(this); }
18187 const Derived& derived() const { return *static_cast<const Derived*>(this); }
18189 inline Derived& const_cast_derived() const
18190 { return *static_cast<Derived*>(const_cast<EigenBase*>(this)); }
18192 inline const Derived& const_derived() const
18193 { return *static_cast<const Derived*>(this); }
18195 inline Index rows() const { return derived().rows(); }
18197 inline Index cols() const { return derived().cols(); }
18199 inline Index size() const { return rows() * cols(); }
18200 template<typename Dest>
18202 inline void evalTo(Dest& dst) const
18203 { derived().evalTo(dst); }
18204 template<typename Dest>
18206 inline void addTo(Dest& dst) const
18208 typename Dest::PlainObject res(rows(),cols());
18212 template<typename Dest>
18214 inline void subTo(Dest& dst) const
18216 typename Dest::PlainObject res(rows(),cols());
18220 template<typename Dest>
18221 EIGEN_DEVICE_FUNC inline void applyThisOnTheRight(Dest& dst) const
18223 dst = dst * this->derived();
18225 template<typename Dest>
18226 EIGEN_DEVICE_FUNC inline void applyThisOnTheLeft(Dest& dst) const
18228 dst = this->derived() * dst;
18231 template<typename Derived>
18232 template<typename OtherDerived>
18234 Derived& DenseBase<Derived>::operator=(const EigenBase<OtherDerived> &other)
18236 call_assignment(derived(), other.derived());
18239 template<typename Derived>
18240 template<typename OtherDerived>
18242 Derived& DenseBase<Derived>::operator+=(const EigenBase<OtherDerived> &other)
18244 call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
18247 template<typename Derived>
18248 template<typename OtherDerived>
18250 Derived& DenseBase<Derived>::operator-=(const EigenBase<OtherDerived> &other)
18252 call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
18257 // end #include "src/Core/EigenBase.h"
18258 // #include "src/Core/Product.h"
18259 #ifndef EIGEN_PRODUCT_H
18260 #define EIGEN_PRODUCT_H
18262 template<typename Lhs, typename Rhs, int Option, typename StorageKind> class ProductImpl;
18263 namespace internal {
18264 template<typename Lhs, typename Rhs, int Option>
18265 struct traits<Product<Lhs, Rhs, Option> >
18267 typedef typename remove_all<Lhs>::type LhsCleaned;
18268 typedef typename remove_all<Rhs>::type RhsCleaned;
18269 typedef traits<LhsCleaned> LhsTraits;
18270 typedef traits<RhsCleaned> RhsTraits;
18271 typedef MatrixXpr XprKind;
18272 typedef typename ScalarBinaryOpTraits<typename traits<LhsCleaned>::Scalar, typename traits<RhsCleaned>::Scalar>::ReturnType Scalar;
18273 typedef typename product_promote_storage_type<typename LhsTraits::StorageKind,
18274 typename RhsTraits::StorageKind,
18275 internal::product_type<Lhs,Rhs>::ret>::ret StorageKind;
18276 typedef typename promote_index_type<typename LhsTraits::StorageIndex,
18277 typename RhsTraits::StorageIndex>::type StorageIndex;
18279 RowsAtCompileTime = LhsTraits::RowsAtCompileTime,
18280 ColsAtCompileTime = RhsTraits::ColsAtCompileTime,
18281 MaxRowsAtCompileTime = LhsTraits::MaxRowsAtCompileTime,
18282 MaxColsAtCompileTime = RhsTraits::MaxColsAtCompileTime,
18283 InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsTraits::ColsAtCompileTime, RhsTraits::RowsAtCompileTime),
18284 Flags = (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1) ? RowMajorBit
18285 : (MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1) ? 0
18286 : ( ((LhsTraits::Flags&NoPreferredStorageOrderBit) && (RhsTraits::Flags&RowMajorBit))
18287 || ((RhsTraits::Flags&NoPreferredStorageOrderBit) && (LhsTraits::Flags&RowMajorBit)) ) ? RowMajorBit
18288 : NoPreferredStorageOrderBit
18292 template<typename _Lhs, typename _Rhs, int Option>
18293 class Product : public ProductImpl<_Lhs,_Rhs,Option,
18294 typename internal::product_promote_storage_type<typename internal::traits<_Lhs>::StorageKind,
18295 typename internal::traits<_Rhs>::StorageKind,
18296 internal::product_type<_Lhs,_Rhs>::ret>::ret>
18301 typedef typename ProductImpl<
18303 typename internal::product_promote_storage_type<typename internal::traits<Lhs>::StorageKind,
18304 typename internal::traits<Rhs>::StorageKind,
18305 internal::product_type<Lhs,Rhs>::ret>::ret>::Base Base;
18306 EIGEN_GENERIC_PUBLIC_INTERFACE(Product)
18307 typedef typename internal::ref_selector<Lhs>::type LhsNested;
18308 typedef typename internal::ref_selector<Rhs>::type RhsNested;
18309 typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
18310 typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
18311 EIGEN_DEVICE_FUNC Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs)
18313 eigen_assert(lhs.cols() == rhs.rows()
18314 && "invalid matrix product"
18315 && "if you wanted a coeff-wise or a dot product use the respective explicit functions");
18317 EIGEN_DEVICE_FUNC inline Index rows() const { return m_lhs.rows(); }
18318 EIGEN_DEVICE_FUNC inline Index cols() const { return m_rhs.cols(); }
18319 EIGEN_DEVICE_FUNC const LhsNestedCleaned& lhs() const { return m_lhs; }
18320 EIGEN_DEVICE_FUNC const RhsNestedCleaned& rhs() const { return m_rhs; }
18325 namespace internal {
18326 template<typename Lhs, typename Rhs, int Option, int ProductTag = internal::product_type<Lhs,Rhs>::ret>
18327 class dense_product_base
18328 : public internal::dense_xpr_base<Product<Lhs,Rhs,Option> >::type
18330 template<typename Lhs, typename Rhs, int Option>
18331 class dense_product_base<Lhs, Rhs, Option, InnerProduct>
18332 : public internal::dense_xpr_base<Product<Lhs,Rhs,Option> >::type
18334 typedef Product<Lhs,Rhs,Option> ProductXpr;
18335 typedef typename internal::dense_xpr_base<ProductXpr>::type Base;
18337 using Base::derived;
18338 typedef typename Base::Scalar Scalar;
18339 operator const Scalar() const
18341 return internal::evaluator<ProductXpr>(derived()).coeff(0,0);
18345 template<typename Lhs, typename Rhs, int Option, typename StorageKind>
18346 class ProductImpl : public internal::generic_xpr_base<Product<Lhs,Rhs,Option>, MatrixXpr, StorageKind>::type
18349 typedef typename internal::generic_xpr_base<Product<Lhs,Rhs,Option>, MatrixXpr, StorageKind>::type Base;
18351 template<typename Lhs, typename Rhs, int Option>
18352 class ProductImpl<Lhs,Rhs,Option,Dense>
18353 : public internal::dense_product_base<Lhs,Rhs,Option>
18355 typedef Product<Lhs, Rhs, Option> Derived;
18357 typedef typename internal::dense_product_base<Lhs, Rhs, Option> Base;
18358 EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
18361 IsOneByOne = (RowsAtCompileTime == 1 || RowsAtCompileTime == Dynamic) &&
18362 (ColsAtCompileTime == 1 || ColsAtCompileTime == Dynamic),
18363 EnableCoeff = IsOneByOne || Option==LazyProduct
18366 EIGEN_DEVICE_FUNC Scalar coeff(Index row, Index col) const
18368 EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS);
18369 eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) );
18370 return internal::evaluator<Derived>(derived()).coeff(row,col);
18372 EIGEN_DEVICE_FUNC Scalar coeff(Index i) const
18374 EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS);
18375 eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) );
18376 return internal::evaluator<Derived>(derived()).coeff(i);
18381 // end #include "src/Core/Product.h"
18382 // #include "src/Core/CoreEvaluators.h"
18383 #ifndef EIGEN_COREEVALUATORS_H
18384 #define EIGEN_COREEVALUATORS_H
18386 namespace internal {
18387 template<typename StorageKind>
18388 struct storage_kind_to_evaluator_kind {
18389 typedef IndexBased Kind;
18391 template<typename StorageKind> struct storage_kind_to_shape;
18392 template<> struct storage_kind_to_shape<Dense> { typedef DenseShape Shape; };
18393 template<> struct storage_kind_to_shape<SolverStorage> { typedef SolverShape Shape; };
18394 template<> struct storage_kind_to_shape<PermutationStorage> { typedef PermutationShape Shape; };
18395 template<> struct storage_kind_to_shape<TranspositionsStorage> { typedef TranspositionsShape Shape; };
18396 template< typename T,
18397 typename Arg1Kind = typename evaluator_traits<typename T::Arg1>::Kind,
18398 typename Arg2Kind = typename evaluator_traits<typename T::Arg2>::Kind,
18399 typename Arg3Kind = typename evaluator_traits<typename T::Arg3>::Kind,
18400 typename Arg1Scalar = typename traits<typename T::Arg1>::Scalar,
18401 typename Arg2Scalar = typename traits<typename T::Arg2>::Scalar,
18402 typename Arg3Scalar = typename traits<typename T::Arg3>::Scalar> struct ternary_evaluator;
18403 template< typename T,
18404 typename LhsKind = typename evaluator_traits<typename T::Lhs>::Kind,
18405 typename RhsKind = typename evaluator_traits<typename T::Rhs>::Kind,
18406 typename LhsScalar = typename traits<typename T::Lhs>::Scalar,
18407 typename RhsScalar = typename traits<typename T::Rhs>::Scalar> struct binary_evaluator;
18408 template< typename T,
18409 typename Kind = typename evaluator_traits<typename T::NestedExpression>::Kind,
18410 typename Scalar = typename T::Scalar> struct unary_evaluator;
18411 template<typename T>
18412 struct evaluator_traits_base
18414 typedef typename storage_kind_to_evaluator_kind<typename traits<T>::StorageKind>::Kind Kind;
18415 typedef typename storage_kind_to_shape<typename traits<T>::StorageKind>::Shape Shape;
18417 template<typename T>
18418 struct evaluator_traits : public evaluator_traits_base<T>
18421 template<typename T, typename Shape = typename evaluator_traits<T>::Shape >
18422 struct evaluator_assume_aliasing {
18423 static const bool value = false;
18425 template<typename T>
18426 struct evaluator : public unary_evaluator<T>
18428 typedef unary_evaluator<T> Base;
18429 EIGEN_DEVICE_FUNC explicit evaluator(const T& xpr) : Base(xpr) {}
18431 template<typename T>
18432 struct evaluator<const T>
18436 explicit evaluator(const T& xpr) : evaluator<T>(xpr) {}
18438 template<typename ExpressionType>
18439 struct evaluator_base : public noncopyable
18441 typedef traits<ExpressionType> ExpressionTraits;
18446 template<typename Derived>
18447 struct evaluator<PlainObjectBase<Derived> >
18448 : evaluator_base<Derived>
18450 typedef PlainObjectBase<Derived> PlainObjectType;
18451 typedef typename PlainObjectType::Scalar Scalar;
18452 typedef typename PlainObjectType::CoeffReturnType CoeffReturnType;
18454 IsRowMajor = PlainObjectType::IsRowMajor,
18455 IsVectorAtCompileTime = PlainObjectType::IsVectorAtCompileTime,
18456 RowsAtCompileTime = PlainObjectType::RowsAtCompileTime,
18457 ColsAtCompileTime = PlainObjectType::ColsAtCompileTime,
18458 CoeffReadCost = NumTraits<Scalar>::ReadCost,
18459 Flags = traits<Derived>::EvaluatorFlags,
18460 Alignment = traits<Derived>::Alignment
18462 EIGEN_DEVICE_FUNC evaluator()
18464 m_outerStride(IsVectorAtCompileTime ? 0
18465 : int(IsRowMajor) ? ColsAtCompileTime
18466 : RowsAtCompileTime)
18468 EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
18470 EIGEN_DEVICE_FUNC explicit evaluator(const PlainObjectType& m)
18471 : m_data(m.data()), m_outerStride(IsVectorAtCompileTime ? 0 : m.outerStride())
18473 EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
18475 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
18476 CoeffReturnType coeff(Index row, Index col) const
18479 return m_data[row * m_outerStride.value() + col];
18481 return m_data[row + col * m_outerStride.value()];
18483 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
18484 CoeffReturnType coeff(Index index) const
18486 return m_data[index];
18488 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
18489 Scalar& coeffRef(Index row, Index col)
18492 return const_cast<Scalar*>(m_data)[row * m_outerStride.value() + col];
18494 return const_cast<Scalar*>(m_data)[row + col * m_outerStride.value()];
18496 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
18497 Scalar& coeffRef(Index index)
18499 return const_cast<Scalar*>(m_data)[index];
18501 template<int LoadMode, typename PacketType>
18502 EIGEN_STRONG_INLINE
18503 PacketType packet(Index row, Index col) const
18506 return ploadt<PacketType, LoadMode>(m_data + row * m_outerStride.value() + col);
18508 return ploadt<PacketType, LoadMode>(m_data + row + col * m_outerStride.value());
18510 template<int LoadMode, typename PacketType>
18511 EIGEN_STRONG_INLINE
18512 PacketType packet(Index index) const
18514 return ploadt<PacketType, LoadMode>(m_data + index);
18516 template<int StoreMode,typename PacketType>
18517 EIGEN_STRONG_INLINE
18518 void writePacket(Index row, Index col, const PacketType& x)
18521 return pstoret<Scalar, PacketType, StoreMode>
18522 (const_cast<Scalar*>(m_data) + row * m_outerStride.value() + col, x);
18524 return pstoret<Scalar, PacketType, StoreMode>
18525 (const_cast<Scalar*>(m_data) + row + col * m_outerStride.value(), x);
18527 template<int StoreMode, typename PacketType>
18528 EIGEN_STRONG_INLINE
18529 void writePacket(Index index, const PacketType& x)
18531 return pstoret<Scalar, PacketType, StoreMode>(const_cast<Scalar*>(m_data) + index, x);
18534 const Scalar *m_data;
18535 variable_if_dynamic<Index, IsVectorAtCompileTime ? 0
18536 : int(IsRowMajor) ? ColsAtCompileTime
18537 : RowsAtCompileTime> m_outerStride;
18539 template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
18540 struct evaluator<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
18541 : evaluator<PlainObjectBase<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > >
18543 typedef Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> XprType;
18544 EIGEN_DEVICE_FUNC evaluator() {}
18545 EIGEN_DEVICE_FUNC explicit evaluator(const XprType& m)
18546 : evaluator<PlainObjectBase<XprType> >(m)
18549 template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
18550 struct evaluator<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
18551 : evaluator<PlainObjectBase<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > >
18553 typedef Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> XprType;
18554 EIGEN_DEVICE_FUNC evaluator() {}
18555 EIGEN_DEVICE_FUNC explicit evaluator(const XprType& m)
18556 : evaluator<PlainObjectBase<XprType> >(m)
18559 template<typename ArgType>
18560 struct unary_evaluator<Transpose<ArgType>, IndexBased>
18561 : evaluator_base<Transpose<ArgType> >
18563 typedef Transpose<ArgType> XprType;
18565 CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
18566 Flags = evaluator<ArgType>::Flags ^ RowMajorBit,
18567 Alignment = evaluator<ArgType>::Alignment
18569 EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& t) : m_argImpl(t.nestedExpression()) {}
18570 typedef typename XprType::Scalar Scalar;
18571 typedef typename XprType::CoeffReturnType CoeffReturnType;
18572 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
18573 CoeffReturnType coeff(Index row, Index col) const
18575 return m_argImpl.coeff(col, row);
18577 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
18578 CoeffReturnType coeff(Index index) const
18580 return m_argImpl.coeff(index);
18582 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
18583 Scalar& coeffRef(Index row, Index col)
18585 return m_argImpl.coeffRef(col, row);
18587 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
18588 typename XprType::Scalar& coeffRef(Index index)
18590 return m_argImpl.coeffRef(index);
18592 template<int LoadMode, typename PacketType>
18593 EIGEN_STRONG_INLINE
18594 PacketType packet(Index row, Index col) const
18596 return m_argImpl.template packet<LoadMode,PacketType>(col, row);
18598 template<int LoadMode, typename PacketType>
18599 EIGEN_STRONG_INLINE
18600 PacketType packet(Index index) const
18602 return m_argImpl.template packet<LoadMode,PacketType>(index);
18604 template<int StoreMode, typename PacketType>
18605 EIGEN_STRONG_INLINE
18606 void writePacket(Index row, Index col, const PacketType& x)
18608 m_argImpl.template writePacket<StoreMode,PacketType>(col, row, x);
18610 template<int StoreMode, typename PacketType>
18611 EIGEN_STRONG_INLINE
18612 void writePacket(Index index, const PacketType& x)
18614 m_argImpl.template writePacket<StoreMode,PacketType>(index, x);
18617 evaluator<ArgType> m_argImpl;
18619 template<typename Scalar,typename NullaryOp,
18620 bool has_nullary = has_nullary_operator<NullaryOp>::value,
18621 bool has_unary = has_unary_operator<NullaryOp>::value,
18622 bool has_binary = has_binary_operator<NullaryOp>::value>
18623 struct nullary_wrapper
18625 template <typename IndexType>
18626 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const { return op(i,j); }
18627 template <typename IndexType>
18628 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const { return op(i); }
18629 template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const { return op.template packetOp<T>(i,j); }
18630 template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const { return op.template packetOp<T>(i); }
18632 template<typename Scalar,typename NullaryOp>
18633 struct nullary_wrapper<Scalar,NullaryOp,true,false,false>
18635 template <typename IndexType>
18636 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType=0, IndexType=0) const { return op(); }
18637 template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType=0, IndexType=0) const { return op.template packetOp<T>(); }
18639 template<typename Scalar,typename NullaryOp>
18640 struct nullary_wrapper<Scalar,NullaryOp,false,false,true>
18642 template <typename IndexType>
18643 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j=0) const { return op(i,j); }
18644 template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j=0) const { return op.template packetOp<T>(i,j); }
18646 template<typename Scalar,typename NullaryOp>
18647 struct nullary_wrapper<Scalar,NullaryOp,false,true,false>
18649 template <typename IndexType>
18650 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const {
18651 eigen_assert(i==0 || j==0);
18654 template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const {
18655 eigen_assert(i==0 || j==0);
18656 return op.template packetOp<T>(i+j);
18658 template <typename IndexType>
18659 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const { return op(i); }
18660 template <typename T, typename IndexType>
18661 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const { return op.template packetOp<T>(i); }
18663 template<typename Scalar,typename NullaryOp>
18664 struct nullary_wrapper<Scalar,NullaryOp,false,false,false> {};
18665 #if 0 && EIGEN_COMP_MSVC>0
18666 template<typename T> struct nullary_wrapper_workaround_msvc {
18667 nullary_wrapper_workaround_msvc(const T&);
18670 template<typename Scalar,typename NullaryOp>
18671 struct nullary_wrapper<Scalar,NullaryOp,true,true,true>
18673 template <typename IndexType>
18674 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const {
18675 return nullary_wrapper<Scalar,NullaryOp,
18676 has_nullary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
18677 has_unary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
18678 has_binary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value>().operator()(op,i,j);
18680 template <typename IndexType>
18681 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const {
18682 return nullary_wrapper<Scalar,NullaryOp,
18683 has_nullary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
18684 has_unary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
18685 has_binary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value>().operator()(op,i);
18687 template <typename T, typename IndexType>
18688 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const {
18689 return nullary_wrapper<Scalar,NullaryOp,
18690 has_nullary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
18691 has_unary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
18692 has_binary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value>().template packetOp<T>(op,i,j);
18694 template <typename T, typename IndexType>
18695 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const {
18696 return nullary_wrapper<Scalar,NullaryOp,
18697 has_nullary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
18698 has_unary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
18699 has_binary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value>().template packetOp<T>(op,i);
18703 template<typename NullaryOp, typename PlainObjectType>
18704 struct evaluator<CwiseNullaryOp<NullaryOp,PlainObjectType> >
18705 : evaluator_base<CwiseNullaryOp<NullaryOp,PlainObjectType> >
18707 typedef CwiseNullaryOp<NullaryOp,PlainObjectType> XprType;
18708 typedef typename internal::remove_all<PlainObjectType>::type PlainObjectTypeCleaned;
18710 CoeffReadCost = internal::functor_traits<NullaryOp>::Cost,
18711 Flags = (evaluator<PlainObjectTypeCleaned>::Flags
18713 | (functor_has_linear_access<NullaryOp>::ret ? LinearAccessBit : 0)
18714 | (functor_traits<NullaryOp>::PacketAccess ? PacketAccessBit : 0)))
18715 | (functor_traits<NullaryOp>::IsRepeatable ? 0 : EvalBeforeNestingBit),
18716 Alignment = AlignedMax
18718 EIGEN_DEVICE_FUNC explicit evaluator(const XprType& n)
18719 : m_functor(n.functor()), m_wrapper()
18721 EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
18723 typedef typename XprType::CoeffReturnType CoeffReturnType;
18724 template <typename IndexType>
18725 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
18726 CoeffReturnType coeff(IndexType row, IndexType col) const
18728 return m_wrapper(m_functor, row, col);
18730 template <typename IndexType>
18731 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
18732 CoeffReturnType coeff(IndexType index) const
18734 return m_wrapper(m_functor,index);
18736 template<int LoadMode, typename PacketType, typename IndexType>
18737 EIGEN_STRONG_INLINE
18738 PacketType packet(IndexType row, IndexType col) const
18740 return m_wrapper.template packetOp<PacketType>(m_functor, row, col);
18742 template<int LoadMode, typename PacketType, typename IndexType>
18743 EIGEN_STRONG_INLINE
18744 PacketType packet(IndexType index) const
18746 return m_wrapper.template packetOp<PacketType>(m_functor, index);
18749 const NullaryOp m_functor;
18750 const internal::nullary_wrapper<CoeffReturnType,NullaryOp> m_wrapper;
18752 template<typename UnaryOp, typename ArgType>
18753 struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased >
18754 : evaluator_base<CwiseUnaryOp<UnaryOp, ArgType> >
18756 typedef CwiseUnaryOp<UnaryOp, ArgType> XprType;
18758 CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost,
18759 Flags = evaluator<ArgType>::Flags
18760 & (HereditaryBits | LinearAccessBit | (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0)),
18761 Alignment = evaluator<ArgType>::Alignment
18763 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
18764 explicit unary_evaluator(const XprType& op)
18765 : m_functor(op.functor()),
18766 m_argImpl(op.nestedExpression())
18768 EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<UnaryOp>::Cost);
18769 EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
18771 typedef typename XprType::CoeffReturnType CoeffReturnType;
18772 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
18773 CoeffReturnType coeff(Index row, Index col) const
18775 return m_functor(m_argImpl.coeff(row, col));
18777 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
18778 CoeffReturnType coeff(Index index) const
18780 return m_functor(m_argImpl.coeff(index));
18782 template<int LoadMode, typename PacketType>
18783 EIGEN_STRONG_INLINE
18784 PacketType packet(Index row, Index col) const
18786 return m_functor.packetOp(m_argImpl.template packet<LoadMode, PacketType>(row, col));
18788 template<int LoadMode, typename PacketType>
18789 EIGEN_STRONG_INLINE
18790 PacketType packet(Index index) const
18792 return m_functor.packetOp(m_argImpl.template packet<LoadMode, PacketType>(index));
18795 const UnaryOp m_functor;
18796 evaluator<ArgType> m_argImpl;
18798 template<typename TernaryOp, typename Arg1, typename Arg2, typename Arg3>
18799 struct evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> >
18800 : public ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> >
18802 typedef CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> XprType;
18803 typedef ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> > Base;
18804 EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {}
18806 template<typename TernaryOp, typename Arg1, typename Arg2, typename Arg3>
18807 struct ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3>, IndexBased, IndexBased>
18808 : evaluator_base<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> >
18810 typedef CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> XprType;
18812 CoeffReadCost = evaluator<Arg1>::CoeffReadCost + evaluator<Arg2>::CoeffReadCost + evaluator<Arg3>::CoeffReadCost + functor_traits<TernaryOp>::Cost,
18813 Arg1Flags = evaluator<Arg1>::Flags,
18814 Arg2Flags = evaluator<Arg2>::Flags,
18815 Arg3Flags = evaluator<Arg3>::Flags,
18816 SameType = is_same<typename Arg1::Scalar,typename Arg2::Scalar>::value && is_same<typename Arg1::Scalar,typename Arg3::Scalar>::value,
18817 StorageOrdersAgree = (int(Arg1Flags)&RowMajorBit)==(int(Arg2Flags)&RowMajorBit) && (int(Arg1Flags)&RowMajorBit)==(int(Arg3Flags)&RowMajorBit),
18818 Flags0 = (int(Arg1Flags) | int(Arg2Flags) | int(Arg3Flags)) & (
18820 | (int(Arg1Flags) & int(Arg2Flags) & int(Arg3Flags) &
18821 ( (StorageOrdersAgree ? LinearAccessBit : 0)
18822 | (functor_traits<TernaryOp>::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0)
18826 Flags = (Flags0 & ~RowMajorBit) | (Arg1Flags & RowMajorBit),
18827 Alignment = EIGEN_PLAIN_ENUM_MIN(
18828 EIGEN_PLAIN_ENUM_MIN(evaluator<Arg1>::Alignment, evaluator<Arg2>::Alignment),
18829 evaluator<Arg3>::Alignment)
18831 EIGEN_DEVICE_FUNC explicit ternary_evaluator(const XprType& xpr)
18832 : m_functor(xpr.functor()),
18833 m_arg1Impl(xpr.arg1()),
18834 m_arg2Impl(xpr.arg2()),
18835 m_arg3Impl(xpr.arg3())
18837 EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<TernaryOp>::Cost);
18838 EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
18840 typedef typename XprType::CoeffReturnType CoeffReturnType;
18841 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
18842 CoeffReturnType coeff(Index row, Index col) const
18844 return m_functor(m_arg1Impl.coeff(row, col), m_arg2Impl.coeff(row, col), m_arg3Impl.coeff(row, col));
18846 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
18847 CoeffReturnType coeff(Index index) const
18849 return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index));
18851 template<int LoadMode, typename PacketType>
18852 EIGEN_STRONG_INLINE
18853 PacketType packet(Index row, Index col) const
18855 return m_functor.packetOp(m_arg1Impl.template packet<LoadMode,PacketType>(row, col),
18856 m_arg2Impl.template packet<LoadMode,PacketType>(row, col),
18857 m_arg3Impl.template packet<LoadMode,PacketType>(row, col));
18859 template<int LoadMode, typename PacketType>
18860 EIGEN_STRONG_INLINE
18861 PacketType packet(Index index) const
18863 return m_functor.packetOp(m_arg1Impl.template packet<LoadMode,PacketType>(index),
18864 m_arg2Impl.template packet<LoadMode,PacketType>(index),
18865 m_arg3Impl.template packet<LoadMode,PacketType>(index));
18868 const TernaryOp m_functor;
18869 evaluator<Arg1> m_arg1Impl;
18870 evaluator<Arg2> m_arg2Impl;
18871 evaluator<Arg3> m_arg3Impl;
18873 template<typename BinaryOp, typename Lhs, typename Rhs>
18874 struct evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
18875 : public binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
18877 typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
18878 typedef binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > Base;
18879 EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {}
18881 template<typename BinaryOp, typename Lhs, typename Rhs>
18882 struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBased>
18883 : evaluator_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
18885 typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
18887 CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
18888 LhsFlags = evaluator<Lhs>::Flags,
18889 RhsFlags = evaluator<Rhs>::Flags,
18890 SameType = is_same<typename Lhs::Scalar,typename Rhs::Scalar>::value,
18891 StorageOrdersAgree = (int(LhsFlags)&RowMajorBit)==(int(RhsFlags)&RowMajorBit),
18892 Flags0 = (int(LhsFlags) | int(RhsFlags)) & (
18894 | (int(LhsFlags) & int(RhsFlags) &
18895 ( (StorageOrdersAgree ? LinearAccessBit : 0)
18896 | (functor_traits<BinaryOp>::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0)
18900 Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit),
18901 Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<Lhs>::Alignment,evaluator<Rhs>::Alignment)
18903 EIGEN_DEVICE_FUNC explicit binary_evaluator(const XprType& xpr)
18904 : m_functor(xpr.functor()),
18905 m_lhsImpl(xpr.lhs()),
18906 m_rhsImpl(xpr.rhs())
18908 EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<BinaryOp>::Cost);
18909 EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
18911 typedef typename XprType::CoeffReturnType CoeffReturnType;
18912 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
18913 CoeffReturnType coeff(Index row, Index col) const
18915 return m_functor(m_lhsImpl.coeff(row, col), m_rhsImpl.coeff(row, col));
18917 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
18918 CoeffReturnType coeff(Index index) const
18920 return m_functor(m_lhsImpl.coeff(index), m_rhsImpl.coeff(index));
18922 template<int LoadMode, typename PacketType>
18923 EIGEN_STRONG_INLINE
18924 PacketType packet(Index row, Index col) const
18926 return m_functor.packetOp(m_lhsImpl.template packet<LoadMode,PacketType>(row, col),
18927 m_rhsImpl.template packet<LoadMode,PacketType>(row, col));
18929 template<int LoadMode, typename PacketType>
18930 EIGEN_STRONG_INLINE
18931 PacketType packet(Index index) const
18933 return m_functor.packetOp(m_lhsImpl.template packet<LoadMode,PacketType>(index),
18934 m_rhsImpl.template packet<LoadMode,PacketType>(index));
18937 const BinaryOp m_functor;
18938 evaluator<Lhs> m_lhsImpl;
18939 evaluator<Rhs> m_rhsImpl;
18941 template<typename UnaryOp, typename ArgType>
18942 struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType>, IndexBased>
18943 : evaluator_base<CwiseUnaryView<UnaryOp, ArgType> >
18945 typedef CwiseUnaryView<UnaryOp, ArgType> XprType;
18947 CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost,
18948 Flags = (evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit)),
18951 EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op)
18952 : m_unaryOp(op.functor()),
18953 m_argImpl(op.nestedExpression())
18955 EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<UnaryOp>::Cost);
18956 EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
18958 typedef typename XprType::Scalar Scalar;
18959 typedef typename XprType::CoeffReturnType CoeffReturnType;
18960 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
18961 CoeffReturnType coeff(Index row, Index col) const
18963 return m_unaryOp(m_argImpl.coeff(row, col));
18965 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
18966 CoeffReturnType coeff(Index index) const
18968 return m_unaryOp(m_argImpl.coeff(index));
18970 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
18971 Scalar& coeffRef(Index row, Index col)
18973 return m_unaryOp(m_argImpl.coeffRef(row, col));
18975 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
18976 Scalar& coeffRef(Index index)
18978 return m_unaryOp(m_argImpl.coeffRef(index));
18981 const UnaryOp m_unaryOp;
18982 evaluator<ArgType> m_argImpl;
18984 template<typename Derived, typename PlainObjectType>
18985 struct mapbase_evaluator;
18986 template<typename Derived, typename PlainObjectType>
18987 struct mapbase_evaluator : evaluator_base<Derived>
18989 typedef Derived XprType;
18990 typedef typename XprType::PointerType PointerType;
18991 typedef typename XprType::Scalar Scalar;
18992 typedef typename XprType::CoeffReturnType CoeffReturnType;
18994 IsRowMajor = XprType::RowsAtCompileTime,
18995 ColsAtCompileTime = XprType::ColsAtCompileTime,
18996 CoeffReadCost = NumTraits<Scalar>::ReadCost
18998 EIGEN_DEVICE_FUNC explicit mapbase_evaluator(const XprType& map)
18999 : m_data(const_cast<PointerType>(map.data())),
19000 m_innerStride(map.innerStride()),
19001 m_outerStride(map.outerStride())
19003 EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(evaluator<Derived>::Flags&PacketAccessBit, internal::inner_stride_at_compile_time<Derived>::ret==1),
19004 PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1);
19005 EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
19007 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
19008 CoeffReturnType coeff(Index row, Index col) const
19010 return m_data[col * colStride() + row * rowStride()];
19012 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
19013 CoeffReturnType coeff(Index index) const
19015 return m_data[index * m_innerStride.value()];
19017 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
19018 Scalar& coeffRef(Index row, Index col)
19020 return m_data[col * colStride() + row * rowStride()];
19022 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
19023 Scalar& coeffRef(Index index)
19025 return m_data[index * m_innerStride.value()];
19027 template<int LoadMode, typename PacketType>
19028 EIGEN_STRONG_INLINE
19029 PacketType packet(Index row, Index col) const
19031 PointerType ptr = m_data + row * rowStride() + col * colStride();
19032 return internal::ploadt<PacketType, LoadMode>(ptr);
19034 template<int LoadMode, typename PacketType>
19035 EIGEN_STRONG_INLINE
19036 PacketType packet(Index index) const
19038 return internal::ploadt<PacketType, LoadMode>(m_data + index * m_innerStride.value());
19040 template<int StoreMode, typename PacketType>
19041 EIGEN_STRONG_INLINE
19042 void writePacket(Index row, Index col, const PacketType& x)
19044 PointerType ptr = m_data + row * rowStride() + col * colStride();
19045 return internal::pstoret<Scalar, PacketType, StoreMode>(ptr, x);
19047 template<int StoreMode, typename PacketType>
19048 EIGEN_STRONG_INLINE
19049 void writePacket(Index index, const PacketType& x)
19051 internal::pstoret<Scalar, PacketType, StoreMode>(m_data + index * m_innerStride.value(), x);
19055 inline Index rowStride() const { return XprType::IsRowMajor ? m_outerStride.value() : m_innerStride.value(); }
19057 inline Index colStride() const { return XprType::IsRowMajor ? m_innerStride.value() : m_outerStride.value(); }
19058 PointerType m_data;
19059 const internal::variable_if_dynamic<Index, XprType::InnerStrideAtCompileTime> m_innerStride;
19060 const internal::variable_if_dynamic<Index, XprType::OuterStrideAtCompileTime> m_outerStride;
19062 template<typename PlainObjectType, int MapOptions, typename StrideType>
19063 struct evaluator<Map<PlainObjectType, MapOptions, StrideType> >
19064 : public mapbase_evaluator<Map<PlainObjectType, MapOptions, StrideType>, PlainObjectType>
19066 typedef Map<PlainObjectType, MapOptions, StrideType> XprType;
19067 typedef typename XprType::Scalar Scalar;
19068 typedef typename packet_traits<Scalar>::type PacketScalar;
19070 InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
19071 ? int(PlainObjectType::InnerStrideAtCompileTime)
19072 : int(StrideType::InnerStrideAtCompileTime),
19073 OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
19074 ? int(PlainObjectType::OuterStrideAtCompileTime)
19075 : int(StrideType::OuterStrideAtCompileTime),
19076 HasNoInnerStride = InnerStrideAtCompileTime == 1,
19077 HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0,
19078 HasNoStride = HasNoInnerStride && HasNoOuterStride,
19079 IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic,
19080 PacketAccessMask = bool(HasNoInnerStride) ? ~int(0) : ~int(PacketAccessBit),
19081 LinearAccessMask = bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime) ? ~int(0) : ~int(LinearAccessBit),
19082 Flags = int( evaluator<PlainObjectType>::Flags) & (LinearAccessMask&PacketAccessMask),
19083 Alignment = int(MapOptions)&int(AlignedMask)
19085 EIGEN_DEVICE_FUNC explicit evaluator(const XprType& map)
19086 : mapbase_evaluator<XprType, PlainObjectType>(map)
19089 template<typename PlainObjectType, int RefOptions, typename StrideType>
19090 struct evaluator<Ref<PlainObjectType, RefOptions, StrideType> >
19091 : public mapbase_evaluator<Ref<PlainObjectType, RefOptions, StrideType>, PlainObjectType>
19093 typedef Ref<PlainObjectType, RefOptions, StrideType> XprType;
19095 Flags = evaluator<Map<PlainObjectType, RefOptions, StrideType> >::Flags,
19096 Alignment = evaluator<Map<PlainObjectType, RefOptions, StrideType> >::Alignment
19098 EIGEN_DEVICE_FUNC explicit evaluator(const XprType& ref)
19099 : mapbase_evaluator<XprType, PlainObjectType>(ref)
19102 template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel,
19103 bool HasDirectAccess = internal::has_direct_access<ArgType>::ret> struct block_evaluator;
19104 template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
19105 struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
19106 : block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel>
19108 typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
19109 typedef typename XprType::Scalar Scalar;
19110 typedef typename packet_traits<Scalar>::type PacketScalar;
19112 CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
19113 RowsAtCompileTime = traits<XprType>::RowsAtCompileTime,
19114 ColsAtCompileTime = traits<XprType>::ColsAtCompileTime,
19115 MaxRowsAtCompileTime = traits<XprType>::MaxRowsAtCompileTime,
19116 MaxColsAtCompileTime = traits<XprType>::MaxColsAtCompileTime,
19117 ArgTypeIsRowMajor = (int(evaluator<ArgType>::Flags)&RowMajorBit) != 0,
19118 IsRowMajor = (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1) ? 1
19119 : (MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1) ? 0
19120 : ArgTypeIsRowMajor,
19121 HasSameStorageOrderAsArgType = (IsRowMajor == ArgTypeIsRowMajor),
19122 InnerSize = IsRowMajor ? int(ColsAtCompileTime) : int(RowsAtCompileTime),
19123 InnerStrideAtCompileTime = HasSameStorageOrderAsArgType
19124 ? int(inner_stride_at_compile_time<ArgType>::ret)
19125 : int(outer_stride_at_compile_time<ArgType>::ret),
19126 OuterStrideAtCompileTime = HasSameStorageOrderAsArgType
19127 ? int(outer_stride_at_compile_time<ArgType>::ret)
19128 : int(inner_stride_at_compile_time<ArgType>::ret),
19129 MaskPacketAccessBit = (InnerStrideAtCompileTime == 1) ? PacketAccessBit : 0,
19130 FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0,
19131 FlagsRowMajorBit = XprType::Flags&RowMajorBit,
19132 Flags0 = evaluator<ArgType>::Flags & ( (HereditaryBits & ~RowMajorBit) |
19134 MaskPacketAccessBit),
19135 Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit,
19136 PacketAlignment = unpacket_traits<PacketScalar>::alignment,
19137 Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0,
19138 Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ArgType>::Alignment, Alignment0)
19140 typedef block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> block_evaluator_type;
19141 EIGEN_DEVICE_FUNC explicit evaluator(const XprType& block) : block_evaluator_type(block)
19143 EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
19146 template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
19147 struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, false>
19148 : unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
19150 typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
19151 EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block)
19152 : unary_evaluator<XprType>(block)
19155 template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
19156 struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBased>
19157 : evaluator_base<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
19159 typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
19160 EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& block)
19161 : m_argImpl(block.nestedExpression()),
19162 m_startRow(block.startRow()),
19163 m_startCol(block.startCol())
19165 typedef typename XprType::Scalar Scalar;
19166 typedef typename XprType::CoeffReturnType CoeffReturnType;
19168 RowsAtCompileTime = XprType::RowsAtCompileTime
19170 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
19171 CoeffReturnType coeff(Index row, Index col) const
19173 return m_argImpl.coeff(m_startRow.value() + row, m_startCol.value() + col);
19175 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
19176 CoeffReturnType coeff(Index index) const
19178 return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
19180 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
19181 Scalar& coeffRef(Index row, Index col)
19183 return m_argImpl.coeffRef(m_startRow.value() + row, m_startCol.value() + col);
19185 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
19186 Scalar& coeffRef(Index index)
19188 return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
19190 template<int LoadMode, typename PacketType>
19191 EIGEN_STRONG_INLINE
19192 PacketType packet(Index row, Index col) const
19194 return m_argImpl.template packet<LoadMode,PacketType>(m_startRow.value() + row, m_startCol.value() + col);
19196 template<int LoadMode, typename PacketType>
19197 EIGEN_STRONG_INLINE
19198 PacketType packet(Index index) const
19200 return packet<LoadMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
19201 RowsAtCompileTime == 1 ? index : 0);
19203 template<int StoreMode, typename PacketType>
19204 EIGEN_STRONG_INLINE
19205 void writePacket(Index row, Index col, const PacketType& x)
19207 return m_argImpl.template writePacket<StoreMode,PacketType>(m_startRow.value() + row, m_startCol.value() + col, x);
19209 template<int StoreMode, typename PacketType>
19210 EIGEN_STRONG_INLINE
19211 void writePacket(Index index, const PacketType& x)
19213 return writePacket<StoreMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
19214 RowsAtCompileTime == 1 ? index : 0,
19218 evaluator<ArgType> m_argImpl;
19219 const variable_if_dynamic<Index, (ArgType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow;
19220 const variable_if_dynamic<Index, (ArgType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol;
19222 template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
19223 struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, true>
19224 : mapbase_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>,
19225 typename Block<ArgType, BlockRows, BlockCols, InnerPanel>::PlainObject>
19227 typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
19228 typedef typename XprType::Scalar Scalar;
19229 EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block)
19230 : mapbase_evaluator<XprType, typename XprType::PlainObject>(block)
19232 eigen_assert(((internal::UIntPtr(block.data()) % EIGEN_PLAIN_ENUM_MAX(1,evaluator<XprType>::Alignment)) == 0) && "data is not aligned");
19235 template<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType>
19236 struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
19237 : evaluator_base<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
19239 typedef Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> XprType;
19241 CoeffReadCost = evaluator<ConditionMatrixType>::CoeffReadCost
19242 + EIGEN_PLAIN_ENUM_MAX(evaluator<ThenMatrixType>::CoeffReadCost,
19243 evaluator<ElseMatrixType>::CoeffReadCost),
19244 Flags = (unsigned int)evaluator<ThenMatrixType>::Flags & evaluator<ElseMatrixType>::Flags & HereditaryBits,
19245 Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ThenMatrixType>::Alignment, evaluator<ElseMatrixType>::Alignment)
19247 EIGEN_DEVICE_FUNC explicit evaluator(const XprType& select)
19248 : m_conditionImpl(select.conditionMatrix()),
19249 m_thenImpl(select.thenMatrix()),
19250 m_elseImpl(select.elseMatrix())
19252 EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
19254 typedef typename XprType::CoeffReturnType CoeffReturnType;
19255 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
19256 CoeffReturnType coeff(Index row, Index col) const
19258 if (m_conditionImpl.coeff(row, col))
19259 return m_thenImpl.coeff(row, col);
19261 return m_elseImpl.coeff(row, col);
19263 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
19264 CoeffReturnType coeff(Index index) const
19266 if (m_conditionImpl.coeff(index))
19267 return m_thenImpl.coeff(index);
19269 return m_elseImpl.coeff(index);
19272 evaluator<ConditionMatrixType> m_conditionImpl;
19273 evaluator<ThenMatrixType> m_thenImpl;
19274 evaluator<ElseMatrixType> m_elseImpl;
19276 template<typename ArgType, int RowFactor, int ColFactor>
19277 struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
19278 : evaluator_base<Replicate<ArgType, RowFactor, ColFactor> >
19280 typedef Replicate<ArgType, RowFactor, ColFactor> XprType;
19281 typedef typename XprType::CoeffReturnType CoeffReturnType;
19283 Factor = (RowFactor==Dynamic || ColFactor==Dynamic) ? Dynamic : RowFactor*ColFactor
19285 typedef typename internal::nested_eval<ArgType,Factor>::type ArgTypeNested;
19286 typedef typename internal::remove_all<ArgTypeNested>::type ArgTypeNestedCleaned;
19288 CoeffReadCost = evaluator<ArgTypeNestedCleaned>::CoeffReadCost,
19289 LinearAccessMask = XprType::IsVectorAtCompileTime ? LinearAccessBit : 0,
19290 Flags = (evaluator<ArgTypeNestedCleaned>::Flags & (HereditaryBits|LinearAccessMask) & ~RowMajorBit) | (traits<XprType>::Flags & RowMajorBit),
19291 Alignment = evaluator<ArgTypeNestedCleaned>::Alignment
19293 EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& replicate)
19294 : m_arg(replicate.nestedExpression()),
19296 m_rows(replicate.nestedExpression().rows()),
19297 m_cols(replicate.nestedExpression().cols())
19299 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
19300 CoeffReturnType coeff(Index row, Index col) const
19302 const Index actual_row = internal::traits<XprType>::RowsAtCompileTime==1 ? 0
19303 : RowFactor==1 ? row
19304 : row % m_rows.value();
19305 const Index actual_col = internal::traits<XprType>::ColsAtCompileTime==1 ? 0
19306 : ColFactor==1 ? col
19307 : col % m_cols.value();
19308 return m_argImpl.coeff(actual_row, actual_col);
19310 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
19311 CoeffReturnType coeff(Index index) const
19313 const Index actual_index = internal::traits<XprType>::RowsAtCompileTime==1
19314 ? (ColFactor==1 ? index : index%m_cols.value())
19315 : (RowFactor==1 ? index : index%m_rows.value());
19316 return m_argImpl.coeff(actual_index);
19318 template<int LoadMode, typename PacketType>
19319 EIGEN_STRONG_INLINE
19320 PacketType packet(Index row, Index col) const
19322 const Index actual_row = internal::traits<XprType>::RowsAtCompileTime==1 ? 0
19323 : RowFactor==1 ? row
19324 : row % m_rows.value();
19325 const Index actual_col = internal::traits<XprType>::ColsAtCompileTime==1 ? 0
19326 : ColFactor==1 ? col
19327 : col % m_cols.value();
19328 return m_argImpl.template packet<LoadMode,PacketType>(actual_row, actual_col);
19330 template<int LoadMode, typename PacketType>
19331 EIGEN_STRONG_INLINE
19332 PacketType packet(Index index) const
19334 const Index actual_index = internal::traits<XprType>::RowsAtCompileTime==1
19335 ? (ColFactor==1 ? index : index%m_cols.value())
19336 : (RowFactor==1 ? index : index%m_rows.value());
19337 return m_argImpl.template packet<LoadMode,PacketType>(actual_index);
19340 const ArgTypeNested m_arg;
19341 evaluator<ArgTypeNestedCleaned> m_argImpl;
19342 const variable_if_dynamic<Index, ArgType::RowsAtCompileTime> m_rows;
19343 const variable_if_dynamic<Index, ArgType::ColsAtCompileTime> m_cols;
19345 template< typename ArgType, typename MemberOp, int Direction>
19346 struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
19347 : evaluator_base<PartialReduxExpr<ArgType, MemberOp, Direction> >
19349 typedef PartialReduxExpr<ArgType, MemberOp, Direction> XprType;
19350 typedef typename internal::nested_eval<ArgType,1>::type ArgTypeNested;
19351 typedef typename internal::remove_all<ArgTypeNested>::type ArgTypeNestedCleaned;
19352 typedef typename ArgType::Scalar InputScalar;
19353 typedef typename XprType::Scalar Scalar;
19355 TraversalSize = Direction==int(Vertical) ? int(ArgType::RowsAtCompileTime) : int(ArgType::ColsAtCompileTime)
19357 typedef typename MemberOp::template Cost<InputScalar,int(TraversalSize)> CostOpType;
19359 CoeffReadCost = TraversalSize==Dynamic ? HugeCost
19360 : TraversalSize * evaluator<ArgType>::CoeffReadCost + int(CostOpType::value),
19361 Flags = (traits<XprType>::Flags&RowMajorBit) | (evaluator<ArgType>::Flags&(HereditaryBits&(~RowMajorBit))) | LinearAccessBit,
19364 EIGEN_DEVICE_FUNC explicit evaluator(const XprType xpr)
19365 : m_arg(xpr.nestedExpression()), m_functor(xpr.functor())
19367 EIGEN_INTERNAL_CHECK_COST_VALUE(TraversalSize==Dynamic ? HugeCost : int(CostOpType::value));
19368 EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
19370 typedef typename XprType::CoeffReturnType CoeffReturnType;
19371 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
19372 const Scalar coeff(Index i, Index j) const
19374 if (Direction==Vertical)
19375 return m_functor(m_arg.col(j));
19377 return m_functor(m_arg.row(i));
19379 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
19380 const Scalar coeff(Index index) const
19382 if (Direction==Vertical)
19383 return m_functor(m_arg.col(index));
19385 return m_functor(m_arg.row(index));
19388 typename internal::add_const_on_value_type<ArgTypeNested>::type m_arg;
19389 const MemberOp m_functor;
19391 template<typename XprType>
19392 struct evaluator_wrapper_base
19393 : evaluator_base<XprType>
19395 typedef typename remove_all<typename XprType::NestedExpressionType>::type ArgType;
19397 CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
19398 Flags = evaluator<ArgType>::Flags,
19399 Alignment = evaluator<ArgType>::Alignment
19401 EIGEN_DEVICE_FUNC explicit evaluator_wrapper_base(const ArgType& arg) : m_argImpl(arg) {}
19402 typedef typename ArgType::Scalar Scalar;
19403 typedef typename ArgType::CoeffReturnType CoeffReturnType;
19404 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
19405 CoeffReturnType coeff(Index row, Index col) const
19407 return m_argImpl.coeff(row, col);
19409 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
19410 CoeffReturnType coeff(Index index) const
19412 return m_argImpl.coeff(index);
19414 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
19415 Scalar& coeffRef(Index row, Index col)
19417 return m_argImpl.coeffRef(row, col);
19419 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
19420 Scalar& coeffRef(Index index)
19422 return m_argImpl.coeffRef(index);
19424 template<int LoadMode, typename PacketType>
19425 EIGEN_STRONG_INLINE
19426 PacketType packet(Index row, Index col) const
19428 return m_argImpl.template packet<LoadMode,PacketType>(row, col);
19430 template<int LoadMode, typename PacketType>
19431 EIGEN_STRONG_INLINE
19432 PacketType packet(Index index) const
19434 return m_argImpl.template packet<LoadMode,PacketType>(index);
19436 template<int StoreMode, typename PacketType>
19437 EIGEN_STRONG_INLINE
19438 void writePacket(Index row, Index col, const PacketType& x)
19440 m_argImpl.template writePacket<StoreMode>(row, col, x);
19442 template<int StoreMode, typename PacketType>
19443 EIGEN_STRONG_INLINE
19444 void writePacket(Index index, const PacketType& x)
19446 m_argImpl.template writePacket<StoreMode>(index, x);
19449 evaluator<ArgType> m_argImpl;
19451 template<typename TArgType>
19452 struct unary_evaluator<ArrayWrapper<TArgType> >
19453 : evaluator_wrapper_base<ArrayWrapper<TArgType> >
19455 typedef ArrayWrapper<TArgType> XprType;
19456 EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& wrapper)
19457 : evaluator_wrapper_base<ArrayWrapper<TArgType> >(wrapper.nestedExpression())
19460 template<typename PacketType, bool ReversePacket> struct reverse_packet_cond;
19461 template<typename ArgType, int Direction>
19462 struct unary_evaluator<Reverse<ArgType, Direction> >
19463 : evaluator_base<Reverse<ArgType, Direction> >
19465 typedef Reverse<ArgType, Direction> XprType;
19466 typedef typename XprType::Scalar Scalar;
19467 typedef typename XprType::CoeffReturnType CoeffReturnType;
19469 IsRowMajor = XprType::IsRowMajor,
19470 IsColMajor = !IsRowMajor,
19471 ReverseRow = (Direction == Vertical) || (Direction == BothDirections),
19472 ReverseCol = (Direction == Horizontal) || (Direction == BothDirections),
19473 ReversePacket = (Direction == BothDirections)
19474 || ((Direction == Vertical) && IsColMajor)
19475 || ((Direction == Horizontal) && IsRowMajor),
19476 CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
19477 Flags0 = evaluator<ArgType>::Flags,
19478 LinearAccess = ( (Direction==BothDirections) && (int(Flags0)&PacketAccessBit) )
19479 || ((ReverseRow && XprType::ColsAtCompileTime==1) || (ReverseCol && XprType::RowsAtCompileTime==1))
19480 ? LinearAccessBit : 0,
19481 Flags = int(Flags0) & (HereditaryBits | PacketAccessBit | LinearAccess),
19484 EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& reverse)
19485 : m_argImpl(reverse.nestedExpression()),
19486 m_rows(ReverseRow ? reverse.nestedExpression().rows() : 1),
19487 m_cols(ReverseCol ? reverse.nestedExpression().cols() : 1)
19489 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
19490 CoeffReturnType coeff(Index row, Index col) const
19492 return m_argImpl.coeff(ReverseRow ? m_rows.value() - row - 1 : row,
19493 ReverseCol ? m_cols.value() - col - 1 : col);
19495 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
19496 CoeffReturnType coeff(Index index) const
19498 return m_argImpl.coeff(m_rows.value() * m_cols.value() - index - 1);
19500 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
19501 Scalar& coeffRef(Index row, Index col)
19503 return m_argImpl.coeffRef(ReverseRow ? m_rows.value() - row - 1 : row,
19504 ReverseCol ? m_cols.value() - col - 1 : col);
19506 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
19507 Scalar& coeffRef(Index index)
19509 return m_argImpl.coeffRef(m_rows.value() * m_cols.value() - index - 1);
19511 template<int LoadMode, typename PacketType>
19512 EIGEN_STRONG_INLINE
19513 PacketType packet(Index row, Index col) const
19516 PacketSize = unpacket_traits<PacketType>::size,
19517 OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1,
19518 OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1
19520 typedef internal::reverse_packet_cond<PacketType,ReversePacket> reverse_packet;
19521 return reverse_packet::run(m_argImpl.template packet<LoadMode,PacketType>(
19522 ReverseRow ? m_rows.value() - row - OffsetRow : row,
19523 ReverseCol ? m_cols.value() - col - OffsetCol : col));
19525 template<int LoadMode, typename PacketType>
19526 EIGEN_STRONG_INLINE
19527 PacketType packet(Index index) const
19529 enum { PacketSize = unpacket_traits<PacketType>::size };
19530 return preverse(m_argImpl.template packet<LoadMode,PacketType>(m_rows.value() * m_cols.value() - index - PacketSize));
19532 template<int LoadMode, typename PacketType>
19533 EIGEN_STRONG_INLINE
19534 void writePacket(Index row, Index col, const PacketType& x)
19537 PacketSize = unpacket_traits<PacketType>::size,
19538 OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1,
19539 OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1
19541 typedef internal::reverse_packet_cond<PacketType,ReversePacket> reverse_packet;
19542 m_argImpl.template writePacket<LoadMode>(
19543 ReverseRow ? m_rows.value() - row - OffsetRow : row,
19544 ReverseCol ? m_cols.value() - col - OffsetCol : col,
19545 reverse_packet::run(x));
19547 template<int LoadMode, typename PacketType>
19548 EIGEN_STRONG_INLINE
19549 void writePacket(Index index, const PacketType& x)
19551 enum { PacketSize = unpacket_traits<PacketType>::size };
19552 m_argImpl.template writePacket<LoadMode>
19553 (m_rows.value() * m_cols.value() - index - PacketSize, preverse(x));
19556 evaluator<ArgType> m_argImpl;
19557 const variable_if_dynamic<Index, ReverseRow ? ArgType::RowsAtCompileTime : 1> m_rows;
19558 const variable_if_dynamic<Index, ReverseCol ? ArgType::ColsAtCompileTime : 1> m_cols;
19560 template<typename ArgType, int DiagIndex>
19561 struct evaluator<Diagonal<ArgType, DiagIndex> >
19562 : evaluator_base<Diagonal<ArgType, DiagIndex> >
19564 typedef Diagonal<ArgType, DiagIndex> XprType;
19566 CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
19567 Flags = (unsigned int)(evaluator<ArgType>::Flags & (HereditaryBits | DirectAccessBit) & ~RowMajorBit) | LinearAccessBit,
19570 EIGEN_DEVICE_FUNC explicit evaluator(const XprType& diagonal)
19571 : m_argImpl(diagonal.nestedExpression()),
19572 m_index(diagonal.index())
19574 typedef typename XprType::Scalar Scalar;
19575 typedef typename XprType::CoeffReturnType CoeffReturnType;
19576 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
19577 CoeffReturnType coeff(Index row, Index) const
19579 return m_argImpl.coeff(row + rowOffset(), row + colOffset());
19581 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
19582 CoeffReturnType coeff(Index index) const
19584 return m_argImpl.coeff(index + rowOffset(), index + colOffset());
19586 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
19587 Scalar& coeffRef(Index row, Index)
19589 return m_argImpl.coeffRef(row + rowOffset(), row + colOffset());
19591 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
19592 Scalar& coeffRef(Index index)
19594 return m_argImpl.coeffRef(index + rowOffset(), index + colOffset());
19597 evaluator<ArgType> m_argImpl;
19598 const internal::variable_if_dynamicindex<Index, XprType::DiagIndex> m_index;
19600 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value() > 0 ? 0 : -m_index.value(); }
19601 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value() > 0 ? m_index.value() : 0; }
19603 template<typename ArgType> class EvalToTemp;
19604 template<typename ArgType>
19605 struct traits<EvalToTemp<ArgType> >
19606 : public traits<ArgType>
19608 template<typename ArgType>
19610 : public dense_xpr_base<EvalToTemp<ArgType> >::type
19613 typedef typename dense_xpr_base<EvalToTemp>::type Base;
19614 EIGEN_GENERIC_PUBLIC_INTERFACE(EvalToTemp)
19615 explicit EvalToTemp(const ArgType& arg)
19618 const ArgType& arg() const
19624 return m_arg.rows();
19628 return m_arg.cols();
19631 const ArgType& m_arg;
19633 template<typename ArgType>
19634 struct evaluator<EvalToTemp<ArgType> >
19635 : public evaluator<typename ArgType::PlainObject>
19637 typedef EvalToTemp<ArgType> XprType;
19638 typedef typename ArgType::PlainObject PlainObject;
19639 typedef evaluator<PlainObject> Base;
19640 EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
19641 : m_result(xpr.arg())
19643 ::new (static_cast<Base*>(this)) Base(m_result);
19645 EIGEN_DEVICE_FUNC evaluator(const ArgType& arg)
19648 ::new (static_cast<Base*>(this)) Base(m_result);
19651 PlainObject m_result;
19656 // end #include "src/Core/CoreEvaluators.h"
19657 // #include "src/Core/AssignEvaluator.h"
19658 #ifndef EIGEN_ASSIGN_EVALUATOR_H
19659 #define EIGEN_ASSIGN_EVALUATOR_H
19661 namespace internal {
19662 template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc>
19663 struct copy_using_evaluator_traits
19665 typedef typename DstEvaluator::XprType Dst;
19666 typedef typename Dst::Scalar DstScalar;
19668 DstFlags = DstEvaluator::Flags,
19669 SrcFlags = SrcEvaluator::Flags
19673 DstAlignment = DstEvaluator::Alignment,
19674 SrcAlignment = SrcEvaluator::Alignment,
19675 DstHasDirectAccess = DstFlags & DirectAccessBit,
19676 JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment)
19680 InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
19681 : int(DstFlags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
19682 : int(Dst::RowsAtCompileTime),
19683 InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
19684 : int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
19685 : int(Dst::MaxRowsAtCompileTime),
19686 OuterStride = int(outer_stride_at_compile_time<Dst>::ret),
19687 MaxSizeAtCompileTime = Dst::SizeAtCompileTime
19689 typedef typename find_best_packet<DstScalar,Dst::SizeAtCompileTime>::type LinearPacketType;
19690 typedef typename find_best_packet<DstScalar,InnerSize>::type InnerPacketType;
19692 LinearPacketSize = unpacket_traits<LinearPacketType>::size,
19693 InnerPacketSize = unpacket_traits<InnerPacketType>::size
19697 LinearRequiredAlignment = unpacket_traits<LinearPacketType>::alignment,
19698 InnerRequiredAlignment = unpacket_traits<InnerPacketType>::alignment
19702 DstIsRowMajor = DstFlags&RowMajorBit,
19703 SrcIsRowMajor = SrcFlags&RowMajorBit,
19704 StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)),
19705 MightVectorize = bool(StorageOrdersAgree)
19706 && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit)
19707 && bool(functor_traits<AssignFunc>::PacketAccess),
19708 MayInnerVectorize = MightVectorize
19709 && int(InnerSize)!=Dynamic && int(InnerSize)%int(InnerPacketSize)==0
19710 && int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0
19711 && (EIGEN_UNALIGNED_VECTORIZE || int(JointAlignment)>=int(InnerRequiredAlignment)),
19712 MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
19713 MayLinearVectorize = bool(MightVectorize) && MayLinearize && DstHasDirectAccess
19714 && (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),
19715 MaySliceVectorize = bool(MightVectorize) && bool(DstHasDirectAccess)
19716 && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=(EIGEN_UNALIGNED_VECTORIZE?InnerPacketSize:(3*InnerPacketSize)))
19720 Traversal = int(MayLinearVectorize) && (LinearPacketSize>InnerPacketSize) ? int(LinearVectorizedTraversal)
19721 : int(MayInnerVectorize) ? int(InnerVectorizedTraversal)
19722 : int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
19723 : int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
19724 : int(MayLinearize) ? int(LinearTraversal)
19725 : int(DefaultTraversal),
19726 Vectorized = int(Traversal) == InnerVectorizedTraversal
19727 || int(Traversal) == LinearVectorizedTraversal
19728 || int(Traversal) == SliceVectorizedTraversal
19730 typedef typename conditional<int(Traversal)==LinearVectorizedTraversal, LinearPacketType, InnerPacketType>::type PacketType;
19733 ActualPacketSize = int(Traversal)==LinearVectorizedTraversal ? LinearPacketSize
19734 : Vectorized ? InnerPacketSize
19736 UnrollingLimit = EIGEN_UNROLLING_LIMIT * ActualPacketSize,
19737 MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic
19738 && int(Dst::SizeAtCompileTime) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit),
19739 MayUnrollInner = int(InnerSize) != Dynamic
19740 && int(InnerSize) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit)
19744 Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal))
19746 int(MayUnrollCompletely) ? int(CompleteUnrolling)
19747 : int(MayUnrollInner) ? int(InnerUnrolling)
19750 : int(Traversal) == int(LinearVectorizedTraversal)
19751 ? ( bool(MayUnrollCompletely) && ( EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)))
19752 ? int(CompleteUnrolling)
19753 : int(NoUnrolling) )
19754 : int(Traversal) == int(LinearTraversal)
19755 ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling)
19756 : int(NoUnrolling) )
19757 #if EIGEN_UNALIGNED_VECTORIZE
19758 : int(Traversal) == int(SliceVectorizedTraversal)
19759 ? ( bool(MayUnrollInner) ? int(InnerUnrolling)
19760 : int(NoUnrolling) )
19764 #ifdef EIGEN_DEBUG_ASSIGN
19765 static void debug()
19767 std::cerr << "DstXpr: " << typeid(typename DstEvaluator::XprType).name() << std::endl;
19768 std::cerr << "SrcXpr: " << typeid(typename SrcEvaluator::XprType).name() << std::endl;
19769 std::cerr.setf(std::ios::hex, std::ios::basefield);
19770 std::cerr << "DstFlags" << " = " << DstFlags << " (" << demangle_flags(DstFlags) << " )" << std::endl;
19771 std::cerr << "SrcFlags" << " = " << SrcFlags << " (" << demangle_flags(SrcFlags) << " )" << std::endl;
19772 std::cerr.unsetf(std::ios::hex);
19773 EIGEN_DEBUG_VAR(DstAlignment)
19774 EIGEN_DEBUG_VAR(SrcAlignment)
19775 EIGEN_DEBUG_VAR(LinearRequiredAlignment)
19776 EIGEN_DEBUG_VAR(InnerRequiredAlignment)
19777 EIGEN_DEBUG_VAR(JointAlignment)
19778 EIGEN_DEBUG_VAR(InnerSize)
19779 EIGEN_DEBUG_VAR(InnerMaxSize)
19780 EIGEN_DEBUG_VAR(LinearPacketSize)
19781 EIGEN_DEBUG_VAR(InnerPacketSize)
19782 EIGEN_DEBUG_VAR(ActualPacketSize)
19783 EIGEN_DEBUG_VAR(StorageOrdersAgree)
19784 EIGEN_DEBUG_VAR(MightVectorize)
19785 EIGEN_DEBUG_VAR(MayLinearize)
19786 EIGEN_DEBUG_VAR(MayInnerVectorize)
19787 EIGEN_DEBUG_VAR(MayLinearVectorize)
19788 EIGEN_DEBUG_VAR(MaySliceVectorize)
19789 std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl;
19790 EIGEN_DEBUG_VAR(SrcEvaluator::CoeffReadCost)
19791 EIGEN_DEBUG_VAR(UnrollingLimit)
19792 EIGEN_DEBUG_VAR(MayUnrollCompletely)
19793 EIGEN_DEBUG_VAR(MayUnrollInner)
19794 std::cerr << "Unrolling" << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl;
19795 std::cerr << std::endl;
19799 template<typename Kernel, int Index, int Stop>
19800 struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
19802 typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
19803 typedef typename DstEvaluatorType::XprType DstXprType;
19805 outer = Index / DstXprType::InnerSizeAtCompileTime,
19806 inner = Index % DstXprType::InnerSizeAtCompileTime
19808 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
19810 kernel.assignCoeffByOuterInner(outer, inner);
19811 copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
19814 template<typename Kernel, int Stop>
19815 struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Stop, Stop>
19817 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
19819 template<typename Kernel, int Index_, int Stop>
19820 struct copy_using_evaluator_DefaultTraversal_InnerUnrolling
19822 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
19824 kernel.assignCoeffByOuterInner(outer, Index_);
19825 copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Index_+1, Stop>::run(kernel, outer);
19828 template<typename Kernel, int Stop>
19829 struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Stop, Stop>
19831 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index) { }
19833 template<typename Kernel, int Index, int Stop>
19834 struct copy_using_evaluator_LinearTraversal_CompleteUnrolling
19836 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel)
19838 kernel.assignCoeff(Index);
19839 copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
19842 template<typename Kernel, int Stop>
19843 struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Stop, Stop>
19845 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
19847 template<typename Kernel, int Index, int Stop>
19848 struct copy_using_evaluator_innervec_CompleteUnrolling
19850 typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
19851 typedef typename DstEvaluatorType::XprType DstXprType;
19852 typedef typename Kernel::PacketType PacketType;
19854 outer = Index / DstXprType::InnerSizeAtCompileTime,
19855 inner = Index % DstXprType::InnerSizeAtCompileTime,
19856 SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
19857 DstAlignment = Kernel::AssignmentTraits::DstAlignment
19859 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
19861 kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
19862 enum { NextIndex = Index + unpacket_traits<PacketType>::size };
19863 copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
19866 template<typename Kernel, int Stop>
19867 struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop>
19869 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
19871 template<typename Kernel, int Index_, int Stop, int SrcAlignment, int DstAlignment>
19872 struct copy_using_evaluator_innervec_InnerUnrolling
19874 typedef typename Kernel::PacketType PacketType;
19875 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
19877 kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, Index_);
19878 enum { NextIndex = Index_ + unpacket_traits<PacketType>::size };
19879 copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop, SrcAlignment, DstAlignment>::run(kernel, outer);
19882 template<typename Kernel, int Stop, int SrcAlignment, int DstAlignment>
19883 struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop, SrcAlignment, DstAlignment>
19885 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &, Index) { }
19887 template<typename Kernel,
19888 int Traversal = Kernel::AssignmentTraits::Traversal,
19889 int Unrolling = Kernel::AssignmentTraits::Unrolling>
19890 struct dense_assignment_loop;
19891 template<typename Kernel>
19892 struct dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling>
19894 EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel &kernel)
19896 for(Index outer = 0; outer < kernel.outerSize(); ++outer) {
19897 for(Index inner = 0; inner < kernel.innerSize(); ++inner) {
19898 kernel.assignCoeffByOuterInner(outer, inner);
19903 template<typename Kernel>
19904 struct dense_assignment_loop<Kernel, DefaultTraversal, CompleteUnrolling>
19906 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
19908 typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
19909 copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
19912 template<typename Kernel>
19913 struct dense_assignment_loop<Kernel, DefaultTraversal, InnerUnrolling>
19915 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
19917 typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
19918 const Index outerSize = kernel.outerSize();
19919 for(Index outer = 0; outer < outerSize; ++outer)
19920 copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel, outer);
19923 template <bool IsAligned = false>
19924 struct unaligned_dense_assignment_loop
19926 template <typename Kernel>
19927 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index, Index) {}
19930 struct unaligned_dense_assignment_loop<false>
19932 #if EIGEN_COMP_MSVC
19933 template <typename Kernel>
19934 static EIGEN_DONT_INLINE void run(Kernel &kernel,
19938 template <typename Kernel>
19939 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel,
19944 for (Index index = start; index < end; ++index)
19945 kernel.assignCoeff(index);
19948 template<typename Kernel>
19949 struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling>
19951 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
19953 const Index size = kernel.size();
19954 typedef typename Kernel::Scalar Scalar;
19955 typedef typename Kernel::PacketType PacketType;
19957 requestedAlignment = Kernel::AssignmentTraits::LinearRequiredAlignment,
19958 packetSize = unpacket_traits<PacketType>::size,
19959 dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
19960 dstAlignment = packet_traits<Scalar>::AlignedOnScalar ? int(requestedAlignment)
19961 : int(Kernel::AssignmentTraits::DstAlignment),
19962 srcAlignment = Kernel::AssignmentTraits::JointAlignment
19964 const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned<requestedAlignment>(kernel.dstDataPtr(), size);
19965 const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
19966 unaligned_dense_assignment_loop<dstIsAligned!=0>::run(kernel, 0, alignedStart);
19967 for(Index index = alignedStart; index < alignedEnd; index += packetSize)
19968 kernel.template assignPacket<dstAlignment, srcAlignment, PacketType>(index);
19969 unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size);
19972 template<typename Kernel>
19973 struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrolling>
19975 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
19977 typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
19978 typedef typename Kernel::PacketType PacketType;
19979 enum { size = DstXprType::SizeAtCompileTime,
19980 packetSize =unpacket_traits<PacketType>::size,
19981 alignedSize = (size/packetSize)*packetSize };
19982 copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel);
19983 copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, alignedSize, size>::run(kernel);
19986 template<typename Kernel>
19987 struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
19989 typedef typename Kernel::PacketType PacketType;
19991 SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
19992 DstAlignment = Kernel::AssignmentTraits::DstAlignment
19994 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
19996 const Index innerSize = kernel.innerSize();
19997 const Index outerSize = kernel.outerSize();
19998 const Index packetSize = unpacket_traits<PacketType>::size;
19999 for(Index outer = 0; outer < outerSize; ++outer)
20000 for(Index inner = 0; inner < innerSize; inner+=packetSize)
20001 kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
20004 template<typename Kernel>
20005 struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, CompleteUnrolling>
20007 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
20009 typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
20010 copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
20013 template<typename Kernel>
20014 struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling>
20016 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
20018 typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
20019 typedef typename Kernel::AssignmentTraits Traits;
20020 const Index outerSize = kernel.outerSize();
20021 for(Index outer = 0; outer < outerSize; ++outer)
20022 copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime,
20023 Traits::SrcAlignment, Traits::DstAlignment>::run(kernel, outer);
20026 template<typename Kernel>
20027 struct dense_assignment_loop<Kernel, LinearTraversal, NoUnrolling>
20029 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
20031 const Index size = kernel.size();
20032 for(Index i = 0; i < size; ++i)
20033 kernel.assignCoeff(i);
20036 template<typename Kernel>
20037 struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling>
20039 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
20041 typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
20042 copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
20045 template<typename Kernel>
20046 struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
20048 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
20050 typedef typename Kernel::Scalar Scalar;
20051 typedef typename Kernel::PacketType PacketType;
20053 packetSize = unpacket_traits<PacketType>::size,
20054 requestedAlignment = int(Kernel::AssignmentTraits::InnerRequiredAlignment),
20055 alignable = packet_traits<Scalar>::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment)>=sizeof(Scalar),
20056 dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
20057 dstAlignment = alignable ? int(requestedAlignment)
20058 : int(Kernel::AssignmentTraits::DstAlignment)
20060 const Scalar *dst_ptr = kernel.dstDataPtr();
20061 if((!bool(dstIsAligned)) && (UIntPtr(dst_ptr) % sizeof(Scalar))>0)
20063 return dense_assignment_loop<Kernel,DefaultTraversal,NoUnrolling>::run(kernel);
20065 const Index packetAlignedMask = packetSize - 1;
20066 const Index innerSize = kernel.innerSize();
20067 const Index outerSize = kernel.outerSize();
20068 const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0;
20069 Index alignedStart = ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned<requestedAlignment>(dst_ptr, innerSize);
20070 for(Index outer = 0; outer < outerSize; ++outer)
20072 const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
20073 for(Index inner = 0; inner<alignedStart ; ++inner)
20074 kernel.assignCoeffByOuterInner(outer, inner);
20075 for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize)
20076 kernel.template assignPacketByOuterInner<dstAlignment, Unaligned, PacketType>(outer, inner);
20077 for(Index inner = alignedEnd; inner<innerSize ; ++inner)
20078 kernel.assignCoeffByOuterInner(outer, inner);
20079 alignedStart = numext::mini((alignedStart+alignedStep)%packetSize, innerSize);
20083 #if EIGEN_UNALIGNED_VECTORIZE
20084 template<typename Kernel>
20085 struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, InnerUnrolling>
20087 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
20089 typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
20090 typedef typename Kernel::PacketType PacketType;
20091 enum { size = DstXprType::InnerSizeAtCompileTime,
20092 packetSize =unpacket_traits<PacketType>::size,
20093 vectorizableSize = (size/packetSize)*packetSize };
20094 for(Index outer = 0; outer < kernel.outerSize(); ++outer)
20096 copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, vectorizableSize, 0, 0>::run(kernel, outer);
20097 copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, vectorizableSize, size>::run(kernel, outer);
20102 template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized>
20103 class generic_dense_assignment_kernel
20106 typedef typename DstEvaluatorTypeT::XprType DstXprType;
20107 typedef typename SrcEvaluatorTypeT::XprType SrcXprType;
20109 typedef DstEvaluatorTypeT DstEvaluatorType;
20110 typedef SrcEvaluatorTypeT SrcEvaluatorType;
20111 typedef typename DstEvaluatorType::Scalar Scalar;
20112 typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor> AssignmentTraits;
20113 typedef typename AssignmentTraits::PacketType PacketType;
20114 EIGEN_DEVICE_FUNC generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
20115 : m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr)
20117 #ifdef EIGEN_DEBUG_ASSIGN
20118 AssignmentTraits::debug();
20121 EIGEN_DEVICE_FUNC Index size() const { return m_dstExpr.size(); }
20122 EIGEN_DEVICE_FUNC Index innerSize() const { return m_dstExpr.innerSize(); }
20123 EIGEN_DEVICE_FUNC Index outerSize() const { return m_dstExpr.outerSize(); }
20124 EIGEN_DEVICE_FUNC Index rows() const { return m_dstExpr.rows(); }
20125 EIGEN_DEVICE_FUNC Index cols() const { return m_dstExpr.cols(); }
20126 EIGEN_DEVICE_FUNC Index outerStride() const { return m_dstExpr.outerStride(); }
20127 EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() { return m_dst; }
20128 EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const { return m_src; }
20129 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col)
20131 m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col));
20133 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index)
20135 m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index));
20137 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner)
20139 Index row = rowIndexByOuterInner(outer, inner);
20140 Index col = colIndexByOuterInner(outer, inner);
20141 assignCoeff(row, col);
20143 template<int StoreMode, int LoadMode, typename PacketType>
20144 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col)
20146 m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode,PacketType>(row,col));
20148 template<int StoreMode, int LoadMode, typename PacketType>
20149 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index)
20151 m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode,PacketType>(index));
20153 template<int StoreMode, int LoadMode, typename PacketType>
20154 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner)
20156 Index row = rowIndexByOuterInner(outer, inner);
20157 Index col = colIndexByOuterInner(outer, inner);
20158 assignPacket<StoreMode,LoadMode,PacketType>(row, col);
20160 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner)
20162 typedef typename DstEvaluatorType::ExpressionTraits Traits;
20163 return int(Traits::RowsAtCompileTime) == 1 ? 0
20164 : int(Traits::ColsAtCompileTime) == 1 ? inner
20165 : int(DstEvaluatorType::Flags)&RowMajorBit ? outer
20168 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner)
20170 typedef typename DstEvaluatorType::ExpressionTraits Traits;
20171 return int(Traits::ColsAtCompileTime) == 1 ? 0
20172 : int(Traits::RowsAtCompileTime) == 1 ? inner
20173 : int(DstEvaluatorType::Flags)&RowMajorBit ? inner
20176 EIGEN_DEVICE_FUNC const Scalar* dstDataPtr() const
20178 return m_dstExpr.data();
20181 DstEvaluatorType& m_dst;
20182 const SrcEvaluatorType& m_src;
20183 const Functor &m_functor;
20184 DstXprType& m_dstExpr;
20186 template<typename DstXprType,typename SrcXprType, typename Functor>
20187 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
20188 void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const Functor &)
20190 EIGEN_ONLY_USED_FOR_DEBUG(dst);
20191 EIGEN_ONLY_USED_FOR_DEBUG(src);
20192 eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
20194 template<typename DstXprType,typename SrcXprType, typename T1, typename T2>
20195 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
20196 void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const internal::assign_op<T1,T2> &)
20198 Index dstRows = src.rows();
20199 Index dstCols = src.cols();
20200 if(((dst.rows()!=dstRows) || (dst.cols()!=dstCols)))
20201 dst.resize(dstRows, dstCols);
20202 eigen_assert(dst.rows() == dstRows && dst.cols() == dstCols);
20204 template<typename DstXprType, typename SrcXprType, typename Functor>
20205 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func)
20207 typedef evaluator<DstXprType> DstEvaluatorType;
20208 typedef evaluator<SrcXprType> SrcEvaluatorType;
20209 SrcEvaluatorType srcEvaluator(src);
20210 resize_if_allowed(dst, src, func);
20211 DstEvaluatorType dstEvaluator(dst);
20212 typedef generic_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;
20213 Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
20214 dense_assignment_loop<Kernel>::run(kernel);
20216 template<typename DstXprType, typename SrcXprType>
20217 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src)
20219 call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar>());
20221 template<typename DstShape, typename SrcShape> struct AssignmentKind;
20222 struct Dense2Dense {};
20223 struct EigenBase2EigenBase {};
20224 template<typename,typename> struct AssignmentKind { typedef EigenBase2EigenBase Kind; };
20225 template<> struct AssignmentKind<DenseShape,DenseShape> { typedef Dense2Dense Kind; };
20226 template< typename DstXprType, typename SrcXprType, typename Functor,
20227 typename Kind = typename AssignmentKind< typename evaluator_traits<DstXprType>::Shape , typename evaluator_traits<SrcXprType>::Shape >::Kind,
20228 typename EnableIf = void>
20230 template<typename Dst, typename Src>
20231 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
20232 void call_assignment(Dst& dst, const Src& src)
20234 call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
20236 template<typename Dst, typename Src>
20237 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
20238 void call_assignment(const Dst& dst, const Src& src)
20240 call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
20242 template<typename Dst, typename Src, typename Func>
20243 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
20244 void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if< evaluator_assume_aliasing<Src>::value, void*>::type = 0)
20246 typename plain_matrix_type<Src>::type tmp(src);
20247 call_assignment_no_alias(dst, tmp, func);
20249 template<typename Dst, typename Src, typename Func>
20250 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
20251 void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<!evaluator_assume_aliasing<Src>::value, void*>::type = 0)
20253 call_assignment_no_alias(dst, src, func);
20255 template<typename Dst, template <typename> class StorageBase, typename Src, typename Func>
20256 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
20257 void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func)
20259 call_assignment_no_alias(dst.expression(), src, func);
20261 template<typename Dst, typename Src, typename Func>
20262 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
20263 void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
20266 NeedToTranspose = ( (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1)
20267 || (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1)
20268 ) && int(Dst::SizeAtCompileTime) != 1
20270 typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst>::type ActualDstTypeCleaned;
20271 typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst&>::type ActualDstType;
20272 ActualDstType actualDst(dst);
20273 EIGEN_STATIC_ASSERT_LVALUE(Dst)
20274 EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src)
20275 EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar);
20276 Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func);
20278 template<typename Dst, typename Src>
20279 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
20280 void call_assignment_no_alias(Dst& dst, const Src& src)
20282 call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
20284 template<typename Dst, typename Src, typename Func>
20285 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
20286 void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func)
20288 EIGEN_STATIC_ASSERT_LVALUE(Dst)
20289 EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src)
20290 EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename Dst::Scalar,typename Src::Scalar);
20291 Assignment<Dst,Src,Func>::run(dst, src, func);
20293 template<typename Dst, typename Src>
20294 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
20295 void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src)
20297 call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
20299 template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, const Src &src);
20300 template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
20301 struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Weak>
20304 static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
20306 #ifndef EIGEN_NO_DEBUG
20307 internal::check_for_aliasing(dst, src);
20309 call_dense_assignment_loop(dst, src, func);
20312 template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
20313 struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Weak>
20316 static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &)
20318 Index dstRows = src.rows();
20319 Index dstCols = src.cols();
20320 if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
20321 dst.resize(dstRows, dstCols);
20322 eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
20325 template<typename SrcScalarType>
20327 static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar,SrcScalarType> &)
20329 Index dstRows = src.rows();
20330 Index dstCols = src.cols();
20331 if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
20332 dst.resize(dstRows, dstCols);
20333 eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
20336 template<typename SrcScalarType>
20338 static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar,SrcScalarType> &)
20340 Index dstRows = src.rows();
20341 Index dstCols = src.cols();
20342 if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
20343 dst.resize(dstRows, dstCols);
20344 eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
20351 // end #include "src/Core/AssignEvaluator.h"
20352 #ifndef EIGEN_PARSED_BY_DOXYGEN
20353 // #include "src/Core/Assign.h"
20354 #ifndef EIGEN_ASSIGN_H
20355 #define EIGEN_ASSIGN_H
20357 template<typename Derived>
20358 template<typename OtherDerived>
20359 EIGEN_STRONG_INLINE Derived& DenseBase<Derived>
20360 ::lazyAssign(const DenseBase<OtherDerived>& other)
20363 SameType = internal::is_same<typename Derived::Scalar,typename OtherDerived::Scalar>::value
20365 EIGEN_STATIC_ASSERT_LVALUE(Derived)
20366 EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived)
20367 EIGEN_STATIC_ASSERT(SameType,YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
20368 eigen_assert(rows() == other.rows() && cols() == other.cols());
20369 internal::call_assignment_no_alias(derived(),other.derived());
20372 template<typename Derived>
20373 template<typename OtherDerived>
20375 EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(const DenseBase<OtherDerived>& other)
20377 internal::call_assignment(derived(), other.derived());
20380 template<typename Derived>
20382 EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(const DenseBase& other)
20384 internal::call_assignment(derived(), other.derived());
20387 template<typename Derived>
20389 EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const MatrixBase& other)
20391 internal::call_assignment(derived(), other.derived());
20394 template<typename Derived>
20395 template <typename OtherDerived>
20397 EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const DenseBase<OtherDerived>& other)
20399 internal::call_assignment(derived(), other.derived());
20402 template<typename Derived>
20403 template <typename OtherDerived>
20405 EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const EigenBase<OtherDerived>& other)
20407 internal::call_assignment(derived(), other.derived());
20410 template<typename Derived>
20411 template<typename OtherDerived>
20413 EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const ReturnByValue<OtherDerived>& other)
20415 other.derived().evalTo(derived());
20420 // end #include "src/Core/Assign.h"
20422 // #include "src/Core/ArrayBase.h"
20423 #ifndef EIGEN_ARRAYBASE_H
20424 #define EIGEN_ARRAYBASE_H
20426 template<typename Derived> class ArrayBase
20427 : public DenseBase<Derived>
20430 #ifndef EIGEN_PARSED_BY_DOXYGEN
20431 typedef ArrayBase StorageBaseType;
20432 typedef ArrayBase Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl;
20433 typedef typename internal::traits<Derived>::StorageKind StorageKind;
20434 typedef typename internal::traits<Derived>::Scalar Scalar;
20435 typedef typename internal::packet_traits<Scalar>::type PacketScalar;
20436 typedef typename NumTraits<Scalar>::Real RealScalar;
20437 typedef DenseBase<Derived> Base;
20438 using Base::RowsAtCompileTime;
20439 using Base::ColsAtCompileTime;
20440 using Base::SizeAtCompileTime;
20441 using Base::MaxRowsAtCompileTime;
20442 using Base::MaxColsAtCompileTime;
20443 using Base::MaxSizeAtCompileTime;
20444 using Base::IsVectorAtCompileTime;
20446 using Base::derived;
20447 using Base::const_cast_derived;
20452 using Base::coeffRef;
20453 using Base::lazyAssign;
20454 using Base::operator=;
20455 using Base::operator+=;
20456 using Base::operator-=;
20457 using Base::operator*=;
20458 using Base::operator/=;
20459 typedef typename Base::CoeffReturnType CoeffReturnType;
20461 #ifndef EIGEN_PARSED_BY_DOXYGEN
20462 typedef typename Base::PlainObject PlainObject;
20463 typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,PlainObject> ConstantReturnType;
20465 #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::ArrayBase
20466 #define EIGEN_DOC_UNARY_ADDONS(X,Y)
20467 // # include "../plugins/CommonCwiseUnaryOps.h"
20468 #ifndef EIGEN_PARSED_BY_DOXYGEN
20469 typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
20470 const CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, const Derived>,
20472 >::type ConjugateReturnType;
20473 typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
20474 const CwiseUnaryOp<internal::scalar_real_op<Scalar>, const Derived>,
20476 >::type RealReturnType;
20477 typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
20478 CwiseUnaryView<internal::scalar_real_ref_op<Scalar>, Derived>,
20480 >::type NonConstRealReturnType;
20481 typedef CwiseUnaryOp<internal::scalar_imag_op<Scalar>, const Derived> ImagReturnType;
20482 typedef CwiseUnaryView<internal::scalar_imag_ref_op<Scalar>, Derived> NonConstImagReturnType;
20483 typedef CwiseUnaryOp<internal::scalar_opposite_op<Scalar>, const Derived> NegativeReturnType;
20485 EIGEN_DOC_UNARY_ADDONS(operator-,opposite)
20487 inline const NegativeReturnType
20488 operator-() const { return NegativeReturnType(derived()); }
20489 template<class NewType> struct CastXpr { typedef typename internal::cast_return_type<Derived,const CwiseUnaryOp<internal::scalar_cast_op<Scalar, NewType>, const Derived> >::type Type; };
20490 EIGEN_DOC_UNARY_ADDONS(cast,conversion function)
20491 template<typename NewType>
20493 typename CastXpr<NewType>::Type
20496 return typename CastXpr<NewType>::Type(derived());
20498 EIGEN_DOC_UNARY_ADDONS(conjugate,complex conjugate)
20500 inline ConjugateReturnType
20503 return ConjugateReturnType(derived());
20505 EIGEN_DOC_UNARY_ADDONS(real,real part function)
20507 inline RealReturnType
20508 real() const { return RealReturnType(derived()); }
20509 EIGEN_DOC_UNARY_ADDONS(imag,imaginary part function)
20511 inline const ImagReturnType
20512 imag() const { return ImagReturnType(derived()); }
20513 EIGEN_DOC_UNARY_ADDONS(unaryExpr,unary function)
20514 template<typename CustomUnaryOp>
20516 inline const CwiseUnaryOp<CustomUnaryOp, const Derived>
20517 unaryExpr(const CustomUnaryOp& func = CustomUnaryOp()) const
20519 return CwiseUnaryOp<CustomUnaryOp, const Derived>(derived(), func);
20521 EIGEN_DOC_UNARY_ADDONS(unaryViewExpr,unary function)
20522 template<typename CustomViewOp>
20524 inline const CwiseUnaryView<CustomViewOp, const Derived>
20525 unaryViewExpr(const CustomViewOp& func = CustomViewOp()) const
20527 return CwiseUnaryView<CustomViewOp, const Derived>(derived(), func);
20529 EIGEN_DOC_UNARY_ADDONS(real,real part function)
20531 inline NonConstRealReturnType
20532 real() { return NonConstRealReturnType(derived()); }
20533 EIGEN_DOC_UNARY_ADDONS(imag,imaginary part function)
20535 inline NonConstImagReturnType
20536 imag() { return NonConstImagReturnType(derived()); }
20537 // end # include "../plugins/CommonCwiseUnaryOps.h"
20538 // # include "../plugins/MatrixCwiseUnaryOps.h"
20539 typedef CwiseUnaryOp<internal::scalar_abs_op<Scalar>, const Derived> CwiseAbsReturnType;
20540 typedef CwiseUnaryOp<internal::scalar_abs2_op<Scalar>, const Derived> CwiseAbs2ReturnType;
20541 typedef CwiseUnaryOp<internal::scalar_sqrt_op<Scalar>, const Derived> CwiseSqrtReturnType;
20542 typedef CwiseUnaryOp<internal::scalar_sign_op<Scalar>, const Derived> CwiseSignReturnType;
20543 typedef CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const Derived> CwiseInverseReturnType;
20544 EIGEN_DOC_UNARY_ADDONS(cwiseAbs,absolute value)
20546 EIGEN_STRONG_INLINE const CwiseAbsReturnType
20547 cwiseAbs() const { return CwiseAbsReturnType(derived()); }
20548 EIGEN_DOC_UNARY_ADDONS(cwiseAbs2,squared absolute value)
20550 EIGEN_STRONG_INLINE const CwiseAbs2ReturnType
20551 cwiseAbs2() const { return CwiseAbs2ReturnType(derived()); }
20552 EIGEN_DOC_UNARY_ADDONS(cwiseSqrt,square-root)
20554 inline const CwiseSqrtReturnType
20555 cwiseSqrt() const { return CwiseSqrtReturnType(derived()); }
20556 EIGEN_DOC_UNARY_ADDONS(cwiseSign,sign function)
20558 inline const CwiseSignReturnType
20559 cwiseSign() const { return CwiseSignReturnType(derived()); }
20560 EIGEN_DOC_UNARY_ADDONS(cwiseInverse,inverse)
20562 inline const CwiseInverseReturnType
20563 cwiseInverse() const { return CwiseInverseReturnType(derived()); }
20564 // end # include "../plugins/MatrixCwiseUnaryOps.h"
20565 // # include "../plugins/ArrayCwiseUnaryOps.h"
20566 typedef CwiseUnaryOp<internal::scalar_abs_op<Scalar>, const Derived> AbsReturnType;
20567 typedef CwiseUnaryOp<internal::scalar_arg_op<Scalar>, const Derived> ArgReturnType;
20568 typedef CwiseUnaryOp<internal::scalar_abs2_op<Scalar>, const Derived> Abs2ReturnType;
20569 typedef CwiseUnaryOp<internal::scalar_sqrt_op<Scalar>, const Derived> SqrtReturnType;
20570 typedef CwiseUnaryOp<internal::scalar_rsqrt_op<Scalar>, const Derived> RsqrtReturnType;
20571 typedef CwiseUnaryOp<internal::scalar_sign_op<Scalar>, const Derived> SignReturnType;
20572 typedef CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const Derived> InverseReturnType;
20573 typedef CwiseUnaryOp<internal::scalar_boolean_not_op<Scalar>, const Derived> BooleanNotReturnType;
20574 typedef CwiseUnaryOp<internal::scalar_exp_op<Scalar>, const Derived> ExpReturnType;
20575 typedef CwiseUnaryOp<internal::scalar_log_op<Scalar>, const Derived> LogReturnType;
20576 typedef CwiseUnaryOp<internal::scalar_log1p_op<Scalar>, const Derived> Log1pReturnType;
20577 typedef CwiseUnaryOp<internal::scalar_log10_op<Scalar>, const Derived> Log10ReturnType;
20578 typedef CwiseUnaryOp<internal::scalar_cos_op<Scalar>, const Derived> CosReturnType;
20579 typedef CwiseUnaryOp<internal::scalar_sin_op<Scalar>, const Derived> SinReturnType;
20580 typedef CwiseUnaryOp<internal::scalar_tan_op<Scalar>, const Derived> TanReturnType;
20581 typedef CwiseUnaryOp<internal::scalar_acos_op<Scalar>, const Derived> AcosReturnType;
20582 typedef CwiseUnaryOp<internal::scalar_asin_op<Scalar>, const Derived> AsinReturnType;
20583 typedef CwiseUnaryOp<internal::scalar_atan_op<Scalar>, const Derived> AtanReturnType;
20584 typedef CwiseUnaryOp<internal::scalar_tanh_op<Scalar>, const Derived> TanhReturnType;
20585 typedef CwiseUnaryOp<internal::scalar_logistic_op<Scalar>, const Derived> LogisticReturnType;
20586 typedef CwiseUnaryOp<internal::scalar_sinh_op<Scalar>, const Derived> SinhReturnType;
20587 typedef CwiseUnaryOp<internal::scalar_cosh_op<Scalar>, const Derived> CoshReturnType;
20588 typedef CwiseUnaryOp<internal::scalar_square_op<Scalar>, const Derived> SquareReturnType;
20589 typedef CwiseUnaryOp<internal::scalar_cube_op<Scalar>, const Derived> CubeReturnType;
20590 typedef CwiseUnaryOp<internal::scalar_round_op<Scalar>, const Derived> RoundReturnType;
20591 typedef CwiseUnaryOp<internal::scalar_floor_op<Scalar>, const Derived> FloorReturnType;
20592 typedef CwiseUnaryOp<internal::scalar_ceil_op<Scalar>, const Derived> CeilReturnType;
20593 typedef CwiseUnaryOp<internal::scalar_isnan_op<Scalar>, const Derived> IsNaNReturnType;
20594 typedef CwiseUnaryOp<internal::scalar_isinf_op<Scalar>, const Derived> IsInfReturnType;
20595 typedef CwiseUnaryOp<internal::scalar_isfinite_op<Scalar>, const Derived> IsFiniteReturnType;
20597 EIGEN_STRONG_INLINE const AbsReturnType
20600 return AbsReturnType(derived());
20603 EIGEN_STRONG_INLINE const ArgReturnType
20606 return ArgReturnType(derived());
20609 EIGEN_STRONG_INLINE const Abs2ReturnType
20612 return Abs2ReturnType(derived());
20615 inline const ExpReturnType
20618 return ExpReturnType(derived());
20621 inline const LogReturnType
20624 return LogReturnType(derived());
20627 inline const Log1pReturnType
20630 return Log1pReturnType(derived());
20633 inline const Log10ReturnType
20636 return Log10ReturnType(derived());
20639 inline const SqrtReturnType
20642 return SqrtReturnType(derived());
20645 inline const RsqrtReturnType
20648 return RsqrtReturnType(derived());
20651 inline const SignReturnType
20654 return SignReturnType(derived());
20657 inline const CosReturnType
20660 return CosReturnType(derived());
20663 inline const SinReturnType
20666 return SinReturnType(derived());
20669 inline const TanReturnType
20672 return TanReturnType(derived());
20675 inline const AtanReturnType
20678 return AtanReturnType(derived());
20681 inline const AcosReturnType
20684 return AcosReturnType(derived());
20687 inline const AsinReturnType
20690 return AsinReturnType(derived());
20693 inline const TanhReturnType
20696 return TanhReturnType(derived());
20699 inline const SinhReturnType
20702 return SinhReturnType(derived());
20705 inline const CoshReturnType
20708 return CoshReturnType(derived());
20711 inline const LogisticReturnType
20714 return LogisticReturnType(derived());
20717 inline const InverseReturnType
20720 return InverseReturnType(derived());
20723 inline const SquareReturnType
20726 return SquareReturnType(derived());
20729 inline const CubeReturnType
20732 return CubeReturnType(derived());
20735 inline const RoundReturnType
20738 return RoundReturnType(derived());
20741 inline const FloorReturnType
20744 return FloorReturnType(derived());
20747 inline const CeilReturnType
20750 return CeilReturnType(derived());
20753 inline const IsNaNReturnType
20756 return IsNaNReturnType(derived());
20759 inline const IsInfReturnType
20762 return IsInfReturnType(derived());
20765 inline const IsFiniteReturnType
20768 return IsFiniteReturnType(derived());
20771 inline const BooleanNotReturnType
20774 EIGEN_STATIC_ASSERT((internal::is_same<bool,Scalar>::value),
20775 THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL);
20776 return BooleanNotReturnType(derived());
20778 typedef CwiseUnaryOp<internal::scalar_lgamma_op<Scalar>, const Derived> LgammaReturnType;
20779 typedef CwiseUnaryOp<internal::scalar_digamma_op<Scalar>, const Derived> DigammaReturnType;
20780 typedef CwiseUnaryOp<internal::scalar_erf_op<Scalar>, const Derived> ErfReturnType;
20781 typedef CwiseUnaryOp<internal::scalar_erfc_op<Scalar>, const Derived> ErfcReturnType;
20783 inline const LgammaReturnType
20786 return LgammaReturnType(derived());
20789 inline const DigammaReturnType
20792 return DigammaReturnType(derived());
20795 inline const ErfReturnType
20798 return ErfReturnType(derived());
20801 inline const ErfcReturnType
20804 return ErfcReturnType(derived());
20806 // end # include "../plugins/ArrayCwiseUnaryOps.h"
20807 // # include "../plugins/CommonCwiseBinaryOps.h"
20808 EIGEN_MAKE_CWISE_BINARY_OP(operator-,difference)
20809 EIGEN_MAKE_CWISE_BINARY_OP(operator+,sum)
20810 template<typename CustomBinaryOp, typename OtherDerived>
20812 EIGEN_STRONG_INLINE const CwiseBinaryOp<CustomBinaryOp, const Derived, const OtherDerived>
20813 binaryExpr(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other, const CustomBinaryOp& func = CustomBinaryOp()) const
20815 return CwiseBinaryOp<CustomBinaryOp, const Derived, const OtherDerived>(derived(), other.derived(), func);
20817 #ifndef EIGEN_PARSED_BY_DOXYGEN
20818 EIGEN_MAKE_SCALAR_BINARY_OP(operator*,product)
20820 template<typename T>
20821 const CwiseBinaryOp<internal::scalar_product_op<Scalar,T>,Derived,Constant<T> > operator*(const T& scalar) const;
20822 template<typename T> friend
20823 const CwiseBinaryOp<internal::scalar_product_op<T,Scalar>,Constant<T>,Derived> operator*(const T& scalar, const StorageBaseType& expr);
20825 #ifndef EIGEN_PARSED_BY_DOXYGEN
20826 EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(operator/,quotient)
20828 template<typename T>
20829 const CwiseBinaryOp<internal::scalar_quotient_op<Scalar,T>,Derived,Constant<T> > operator/(const T& scalar) const;
20831 template<typename OtherDerived>
20833 inline const CwiseBinaryOp<internal::scalar_boolean_and_op, const Derived, const OtherDerived>
20834 operator&&(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
20836 EIGEN_STATIC_ASSERT((internal::is_same<bool,Scalar>::value && internal::is_same<bool,typename OtherDerived::Scalar>::value),
20837 THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL);
20838 return CwiseBinaryOp<internal::scalar_boolean_and_op, const Derived, const OtherDerived>(derived(),other.derived());
20840 template<typename OtherDerived>
20842 inline const CwiseBinaryOp<internal::scalar_boolean_or_op, const Derived, const OtherDerived>
20843 operator||(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
20845 EIGEN_STATIC_ASSERT((internal::is_same<bool,Scalar>::value && internal::is_same<bool,typename OtherDerived::Scalar>::value),
20846 THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL);
20847 return CwiseBinaryOp<internal::scalar_boolean_or_op, const Derived, const OtherDerived>(derived(),other.derived());
20849 // end # include "../plugins/CommonCwiseBinaryOps.h"
20850 // # include "../plugins/MatrixCwiseBinaryOps.h"
20851 template<typename OtherDerived>
20853 EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product)
20854 cwiseProduct(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
20856 return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product)(derived(), other.derived());
20858 template<typename OtherDerived>
20860 inline const CwiseBinaryOp<std::equal_to<Scalar>, const Derived, const OtherDerived>
20861 cwiseEqual(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
20863 return CwiseBinaryOp<std::equal_to<Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
20865 template<typename OtherDerived>
20867 inline const CwiseBinaryOp<std::not_equal_to<Scalar>, const Derived, const OtherDerived>
20868 cwiseNotEqual(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
20870 return CwiseBinaryOp<std::not_equal_to<Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
20872 template<typename OtherDerived>
20874 EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_min_op<Scalar,Scalar>, const Derived, const OtherDerived>
20875 cwiseMin(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
20877 return CwiseBinaryOp<internal::scalar_min_op<Scalar,Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
20880 EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_min_op<Scalar,Scalar>, const Derived, const ConstantReturnType>
20881 cwiseMin(const Scalar &other) const
20883 return cwiseMin(Derived::Constant(rows(), cols(), other));
20885 template<typename OtherDerived>
20887 EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_max_op<Scalar,Scalar>, const Derived, const OtherDerived>
20888 cwiseMax(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
20890 return CwiseBinaryOp<internal::scalar_max_op<Scalar,Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
20893 EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_max_op<Scalar,Scalar>, const Derived, const ConstantReturnType>
20894 cwiseMax(const Scalar &other) const
20896 return cwiseMax(Derived::Constant(rows(), cols(), other));
20898 template<typename OtherDerived>
20900 EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_quotient_op<Scalar>, const Derived, const OtherDerived>
20901 cwiseQuotient(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
20903 return CwiseBinaryOp<internal::scalar_quotient_op<Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
20905 typedef CwiseBinaryOp<internal::scalar_cmp_op<Scalar,Scalar,internal::cmp_EQ>, const Derived, const ConstantReturnType> CwiseScalarEqualReturnType;
20907 inline const CwiseScalarEqualReturnType
20908 cwiseEqual(const Scalar& s) const
20910 return CwiseScalarEqualReturnType(derived(), Derived::Constant(rows(), cols(), s), internal::scalar_cmp_op<Scalar,Scalar,internal::cmp_EQ>());
20912 // end # include "../plugins/MatrixCwiseBinaryOps.h"
20913 // # include "../plugins/ArrayCwiseBinaryOps.h"
20914 template<typename OtherDerived>
20916 EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product)
20917 operator*(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
20919 return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product)(derived(), other.derived());
20921 template<typename OtherDerived>
20923 EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_quotient_op<Scalar,typename OtherDerived::Scalar>, const Derived, const OtherDerived>
20924 operator/(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
20926 return CwiseBinaryOp<internal::scalar_quotient_op<Scalar,typename OtherDerived::Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
20928 EIGEN_MAKE_CWISE_BINARY_OP(min,min)
20930 EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_min_op<Scalar,Scalar>, const Derived,
20931 const CwiseNullaryOp<internal::scalar_constant_op<Scalar>, PlainObject> >
20932 #ifdef EIGEN_PARSED_BY_DOXYGEN
20937 (const Scalar &other) const
20939 return (min)(Derived::PlainObject::Constant(rows(), cols(), other));
20941 EIGEN_MAKE_CWISE_BINARY_OP(max,max)
20943 EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_max_op<Scalar,Scalar>, const Derived,
20944 const CwiseNullaryOp<internal::scalar_constant_op<Scalar>, PlainObject> >
20945 #ifdef EIGEN_PARSED_BY_DOXYGEN
20950 (const Scalar &other) const
20952 return (max)(Derived::PlainObject::Constant(rows(), cols(), other));
20954 EIGEN_MAKE_CWISE_BINARY_OP(pow,pow)
20955 #ifndef EIGEN_PARSED_BY_DOXYGEN
20956 EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(pow,pow)
20958 template<typename T>
20959 const CwiseBinaryOp<internal::scalar_pow_op<Scalar,T>,Derived,Constant<T> > pow(const T& exponent) const;
20961 #define EIGEN_MAKE_CWISE_COMP_OP(OP, COMPARATOR) \
20962 template<typename OtherDerived> \
20963 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_cmp_op<Scalar, typename OtherDerived::Scalar, internal::cmp_ ## COMPARATOR>, const Derived, const OtherDerived> \
20964 OP(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const \
20966 return CwiseBinaryOp<internal::scalar_cmp_op<Scalar, typename OtherDerived::Scalar, internal::cmp_ ## COMPARATOR>, const Derived, const OtherDerived>(derived(), other.derived()); \
20968 typedef CwiseBinaryOp<internal::scalar_cmp_op<Scalar,Scalar, internal::cmp_ ## COMPARATOR>, const Derived, const CwiseNullaryOp<internal::scalar_constant_op<Scalar>, PlainObject> > Cmp ## COMPARATOR ## ReturnType; \
20969 typedef CwiseBinaryOp<internal::scalar_cmp_op<Scalar,Scalar, internal::cmp_ ## COMPARATOR>, const CwiseNullaryOp<internal::scalar_constant_op<Scalar>, PlainObject>, const Derived > RCmp ## COMPARATOR ## ReturnType; \
20970 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Cmp ## COMPARATOR ## ReturnType \
20971 OP(const Scalar& s) const { \
20972 return this->OP(Derived::PlainObject::Constant(rows(), cols(), s)); \
20974 EIGEN_DEVICE_FUNC friend EIGEN_STRONG_INLINE const RCmp ## COMPARATOR ## ReturnType \
20975 OP(const Scalar& s, const Derived& d) { \
20976 return Derived::PlainObject::Constant(d.rows(), d.cols(), s).OP(d); \
20978 #define EIGEN_MAKE_CWISE_COMP_R_OP(OP, R_OP, RCOMPARATOR) \
20979 template<typename OtherDerived> \
20980 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_cmp_op<typename OtherDerived::Scalar, Scalar, internal::cmp_##RCOMPARATOR>, const OtherDerived, const Derived> \
20981 OP(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const \
20983 return CwiseBinaryOp<internal::scalar_cmp_op<typename OtherDerived::Scalar, Scalar, internal::cmp_##RCOMPARATOR>, const OtherDerived, const Derived>(other.derived(), derived()); \
20985 EIGEN_DEVICE_FUNC \
20986 inline const RCmp ## RCOMPARATOR ## ReturnType \
20987 OP(const Scalar& s) const { \
20988 return Derived::PlainObject::Constant(rows(), cols(), s).R_OP(*this); \
20990 friend inline const Cmp ## RCOMPARATOR ## ReturnType \
20991 OP(const Scalar& s, const Derived& d) { \
20992 return d.R_OP(Derived::PlainObject::Constant(d.rows(), d.cols(), s)); \
20994 EIGEN_MAKE_CWISE_COMP_OP(operator<, LT)
20995 EIGEN_MAKE_CWISE_COMP_OP(operator<=, LE)
20996 EIGEN_MAKE_CWISE_COMP_R_OP(operator>, operator<, LT)
20997 EIGEN_MAKE_CWISE_COMP_R_OP(operator>=, operator<=, LE)
20998 EIGEN_MAKE_CWISE_COMP_OP(operator==, EQ)
20999 EIGEN_MAKE_CWISE_COMP_OP(operator!=, NEQ)
21000 #undef EIGEN_MAKE_CWISE_COMP_OP
21001 #undef EIGEN_MAKE_CWISE_COMP_R_OP
21002 #ifndef EIGEN_PARSED_BY_DOXYGEN
21003 EIGEN_MAKE_SCALAR_BINARY_OP(operator+,sum)
21005 template<typename T>
21006 const CwiseBinaryOp<internal::scalar_sum_op<Scalar,T>,Derived,Constant<T> > operator+(const T& scalar) const;
21007 template<typename T> friend
21008 const CwiseBinaryOp<internal::scalar_sum_op<T,Scalar>,Constant<T>,Derived> operator+(const T& scalar, const StorageBaseType& expr);
21010 #ifndef EIGEN_PARSED_BY_DOXYGEN
21011 EIGEN_MAKE_SCALAR_BINARY_OP(operator-,difference)
21013 template<typename T>
21014 const CwiseBinaryOp<internal::scalar_difference_op<Scalar,T>,Derived,Constant<T> > operator-(const T& scalar) const;
21015 template<typename T> friend
21016 const CwiseBinaryOp<internal::scalar_difference_op<T,Scalar>,Constant<T>,Derived> operator-(const T& scalar, const StorageBaseType& expr);
21018 #ifndef EIGEN_PARSED_BY_DOXYGEN
21019 EIGEN_MAKE_SCALAR_BINARY_OP_ONTHELEFT(operator/,quotient)
21021 template<typename T> friend
21022 inline const CwiseBinaryOp<internal::scalar_quotient_op<T,Scalar>,Constant<T>,Derived>
21023 operator/(const T& s,const StorageBaseType& a);
21025 template<typename OtherDerived>
21027 inline const CwiseBinaryOp<internal::scalar_boolean_xor_op, const Derived, const OtherDerived>
21028 operator^(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
21030 EIGEN_STATIC_ASSERT((internal::is_same<bool,Scalar>::value && internal::is_same<bool,typename OtherDerived::Scalar>::value),
21031 THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL);
21032 return CwiseBinaryOp<internal::scalar_boolean_xor_op, const Derived, const OtherDerived>(derived(),other.derived());
21035 template<typename DerivedN>
21036 inline const CwiseBinaryOp<internal::scalar_polygamma_op<Scalar>, const DerivedN, const Derived>
21037 polygamma(const EIGEN_CURRENT_STORAGE_BASE_CLASS<DerivedN> &n) const
21039 return CwiseBinaryOp<internal::scalar_polygamma_op<Scalar>, const DerivedN, const Derived>(n.derived(), this->derived());
21042 template<typename DerivedQ>
21043 inline const CwiseBinaryOp<internal::scalar_zeta_op<Scalar>, const Derived, const DerivedQ>
21044 zeta(const EIGEN_CURRENT_STORAGE_BASE_CLASS<DerivedQ> &q) const
21046 return CwiseBinaryOp<internal::scalar_zeta_op<Scalar>, const Derived, const DerivedQ>(this->derived(), q.derived());
21048 // end # include "../plugins/ArrayCwiseBinaryOps.h"
21049 # ifdef EIGEN_ARRAYBASE_PLUGIN
21050 # include EIGEN_ARRAYBASE_PLUGIN
21052 #undef EIGEN_CURRENT_STORAGE_BASE_CLASS
21053 #undef EIGEN_DOC_UNARY_ADDONS
21054 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
21055 Derived& operator=(const ArrayBase& other)
21057 internal::call_assignment(derived(), other.derived());
21060 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
21061 Derived& operator=(const Scalar &value)
21062 { Base::setConstant(value); return derived(); }
21063 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
21064 Derived& operator+=(const Scalar& scalar);
21065 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
21066 Derived& operator-=(const Scalar& scalar);
21067 template<typename OtherDerived>
21068 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
21069 Derived& operator+=(const ArrayBase<OtherDerived>& other);
21070 template<typename OtherDerived>
21071 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
21072 Derived& operator-=(const ArrayBase<OtherDerived>& other);
21073 template<typename OtherDerived>
21074 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
21075 Derived& operator*=(const ArrayBase<OtherDerived>& other);
21076 template<typename OtherDerived>
21077 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
21078 Derived& operator/=(const ArrayBase<OtherDerived>& other);
21081 ArrayBase<Derived>& array() { return *this; }
21083 const ArrayBase<Derived>& array() const { return *this; }
21086 ArrayBase() : Base() {}
21088 explicit ArrayBase(Index);
21089 ArrayBase(Index,Index);
21090 template<typename OtherDerived> explicit ArrayBase(const ArrayBase<OtherDerived>&);
21092 template<typename OtherDerived> Derived& operator+=(const MatrixBase<OtherDerived>& )
21093 {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
21094 template<typename OtherDerived> Derived& operator-=(const MatrixBase<OtherDerived>& )
21095 {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
21097 template<typename Derived>
21098 template<typename OtherDerived>
21099 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
21100 ArrayBase<Derived>::operator-=(const ArrayBase<OtherDerived> &other)
21102 call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
21105 template<typename Derived>
21106 template<typename OtherDerived>
21107 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
21108 ArrayBase<Derived>::operator+=(const ArrayBase<OtherDerived>& other)
21110 call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
21113 template<typename Derived>
21114 template<typename OtherDerived>
21115 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
21116 ArrayBase<Derived>::operator*=(const ArrayBase<OtherDerived>& other)
21118 call_assignment(derived(), other.derived(), internal::mul_assign_op<Scalar,typename OtherDerived::Scalar>());
21121 template<typename Derived>
21122 template<typename OtherDerived>
21123 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
21124 ArrayBase<Derived>::operator/=(const ArrayBase<OtherDerived>& other)
21126 call_assignment(derived(), other.derived(), internal::div_assign_op<Scalar,typename OtherDerived::Scalar>());
21131 // end #include "src/Core/ArrayBase.h"
21132 // #include "src/Core/util/BlasUtil.h"
21133 #ifndef EIGEN_BLASUTIL_H
21134 #define EIGEN_BLASUTIL_H
21136 namespace internal {
21137 template<typename LhsScalar, typename RhsScalar, typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs=false, bool ConjugateRhs=false>
21138 struct gebp_kernel;
21139 template<typename Scalar, typename Index, typename DataMapper, int nr, int StorageOrder, bool Conjugate = false, bool PanelMode=false>
21140 struct gemm_pack_rhs;
21141 template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, int StorageOrder, bool Conjugate = false, bool PanelMode = false>
21142 struct gemm_pack_lhs;
21145 typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
21146 typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
21147 int ResStorageOrder>
21148 struct general_matrix_matrix_product;
21149 template<typename Index,
21150 typename LhsScalar, typename LhsMapper, int LhsStorageOrder, bool ConjugateLhs,
21151 typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version=Specialized>
21152 struct general_matrix_vector_product;
21153 template<bool Conjugate> struct conj_if;
21154 template<> struct conj_if<true> {
21155 template<typename T>
21156 inline T operator()(const T& x) const { return numext::conj(x); }
21157 template<typename T>
21158 inline T pconj(const T& x) const { return internal::pconj(x); }
21160 template<> struct conj_if<false> {
21161 template<typename T>
21162 inline const T& operator()(const T& x) const { return x; }
21163 template<typename T>
21164 inline const T& pconj(const T& x) const { return x; }
21166 template<typename LhsScalar, typename RhsScalar, bool ConjLhs, bool ConjRhs>
21169 typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar>::ReturnType Scalar;
21170 EIGEN_STRONG_INLINE Scalar pmadd(const LhsScalar& x, const RhsScalar& y, const Scalar& c) const
21171 { return padd(c, pmul(x,y)); }
21172 EIGEN_STRONG_INLINE Scalar pmul(const LhsScalar& x, const RhsScalar& y) const
21173 { return conj_if<ConjLhs>()(x) * conj_if<ConjRhs>()(y); }
21175 template<typename Scalar> struct conj_helper<Scalar,Scalar,false,false>
21177 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const { return internal::pmadd(x,y,c); }
21178 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const { return internal::pmul(x,y); }
21180 template<typename RealScalar> struct conj_helper<std::complex<RealScalar>, std::complex<RealScalar>, false,true>
21182 typedef std::complex<RealScalar> Scalar;
21183 EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const
21184 { return c + pmul(x,y); }
21185 EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const
21186 { return Scalar(numext::real(x)*numext::real(y) + numext::imag(x)*numext::imag(y), numext::imag(x)*numext::real(y) - numext::real(x)*numext::imag(y)); }
21188 template<typename RealScalar> struct conj_helper<std::complex<RealScalar>, std::complex<RealScalar>, true,false>
21190 typedef std::complex<RealScalar> Scalar;
21191 EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const
21192 { return c + pmul(x,y); }
21193 EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const
21194 { return Scalar(numext::real(x)*numext::real(y) + numext::imag(x)*numext::imag(y), numext::real(x)*numext::imag(y) - numext::imag(x)*numext::real(y)); }
21196 template<typename RealScalar> struct conj_helper<std::complex<RealScalar>, std::complex<RealScalar>, true,true>
21198 typedef std::complex<RealScalar> Scalar;
21199 EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const
21200 { return c + pmul(x,y); }
21201 EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const
21202 { return Scalar(numext::real(x)*numext::real(y) - numext::imag(x)*numext::imag(y), - numext::real(x)*numext::imag(y) - numext::imag(x)*numext::real(y)); }
21204 template<typename RealScalar,bool Conj> struct conj_helper<std::complex<RealScalar>, RealScalar, Conj,false>
21206 typedef std::complex<RealScalar> Scalar;
21207 EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const RealScalar& y, const Scalar& c) const
21208 { return padd(c, pmul(x,y)); }
21209 EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const RealScalar& y) const
21210 { return conj_if<Conj>()(x)*y; }
21212 template<typename RealScalar,bool Conj> struct conj_helper<RealScalar, std::complex<RealScalar>, false,Conj>
21214 typedef std::complex<RealScalar> Scalar;
21215 EIGEN_STRONG_INLINE Scalar pmadd(const RealScalar& x, const Scalar& y, const Scalar& c) const
21216 { return padd(c, pmul(x,y)); }
21217 EIGEN_STRONG_INLINE Scalar pmul(const RealScalar& x, const Scalar& y) const
21218 { return x*conj_if<Conj>()(y); }
21220 template<typename From,typename To> struct get_factor {
21221 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE To run(const From& x) { return To(x); }
21223 template<typename Scalar> struct get_factor<Scalar,typename NumTraits<Scalar>::Real> {
21225 static EIGEN_STRONG_INLINE typename NumTraits<Scalar>::Real run(const Scalar& x) { return numext::real(x); }
21227 template<typename Scalar, typename Index>
21228 class BlasVectorMapper {
21230 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasVectorMapper(Scalar *data) : m_data(data) {}
21231 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const {
21234 template <typename Packet, int AlignmentType>
21235 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet load(Index i) const {
21236 return ploadt<Packet, AlignmentType>(m_data + i);
21238 template <typename Packet>
21239 EIGEN_DEVICE_FUNC bool aligned(Index i) const {
21240 return (UIntPtr(m_data+i)%sizeof(Packet))==0;
21245 template<typename Scalar, typename Index, int AlignmentType>
21246 class BlasLinearMapper {
21248 typedef typename packet_traits<Scalar>::type Packet;
21249 typedef typename packet_traits<Scalar>::half HalfPacket;
21250 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data) : m_data(data) {}
21251 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void prefetch(int i) const {
21252 internal::prefetch(&operator()(i));
21254 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar& operator()(Index i) const {
21257 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const {
21258 return ploadt<Packet, AlignmentType>(m_data + i);
21260 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i) const {
21261 return ploadt<HalfPacket, AlignmentType>(m_data + i);
21263 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, const Packet &p) const {
21264 pstoret<Scalar, Packet, AlignmentType>(m_data + i, p);
21269 template<typename Scalar, typename Index, int StorageOrder, int AlignmentType = Unaligned>
21270 class blas_data_mapper {
21272 typedef typename packet_traits<Scalar>::type Packet;
21273 typedef typename packet_traits<Scalar>::half HalfPacket;
21274 typedef BlasLinearMapper<Scalar, Index, AlignmentType> LinearMapper;
21275 typedef BlasVectorMapper<Scalar, Index> VectorMapper;
21276 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride) : m_data(data), m_stride(stride) {}
21277 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType>
21278 getSubMapper(Index i, Index j) const {
21279 return blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType>(&operator()(i, j), m_stride);
21281 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const {
21282 return LinearMapper(&operator()(i, j));
21284 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const {
21285 return VectorMapper(&operator()(i, j));
21288 EIGEN_ALWAYS_INLINE Scalar& operator()(Index i, Index j) const {
21289 return m_data[StorageOrder==RowMajor ? j + i*m_stride : i + j*m_stride];
21291 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const {
21292 return ploadt<Packet, AlignmentType>(&operator()(i, j));
21294 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i, Index j) const {
21295 return ploadt<HalfPacket, AlignmentType>(&operator()(i, j));
21297 template<typename SubPacket>
21298 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, const SubPacket &p) const {
21299 pscatter<Scalar, SubPacket>(&operator()(i, j), p, m_stride);
21301 template<typename SubPacket>
21302 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE SubPacket gatherPacket(Index i, Index j) const {
21303 return pgather<Scalar, SubPacket>(&operator()(i, j), m_stride);
21305 EIGEN_DEVICE_FUNC const Index stride() const { return m_stride; }
21306 EIGEN_DEVICE_FUNC const Scalar* data() const { return m_data; }
21307 EIGEN_DEVICE_FUNC Index firstAligned(Index size) const {
21308 if (UIntPtr(m_data)%sizeof(Scalar)) {
21311 return internal::first_default_aligned(m_data, size);
21314 Scalar* EIGEN_RESTRICT m_data;
21315 const Index m_stride;
21317 template<typename Scalar, typename Index, int StorageOrder>
21318 class const_blas_data_mapper : public blas_data_mapper<const Scalar, Index, StorageOrder> {
21320 EIGEN_ALWAYS_INLINE const_blas_data_mapper(const Scalar *data, Index stride) : blas_data_mapper<const Scalar, Index, StorageOrder>(data, stride) {}
21321 EIGEN_ALWAYS_INLINE const_blas_data_mapper<Scalar, Index, StorageOrder> getSubMapper(Index i, Index j) const {
21322 return const_blas_data_mapper<Scalar, Index, StorageOrder>(&(this->operator()(i, j)), this->m_stride);
21325 template<typename XprType> struct blas_traits
21327 typedef typename traits<XprType>::Scalar Scalar;
21328 typedef const XprType& ExtractType;
21329 typedef XprType _ExtractType;
21331 IsComplex = NumTraits<Scalar>::IsComplex,
21332 IsTransposed = false,
21333 NeedToConjugate = false,
21334 HasUsableDirectAccess = ( (int(XprType::Flags)&DirectAccessBit)
21335 && ( bool(XprType::IsVectorAtCompileTime)
21336 || int(inner_stride_at_compile_time<XprType>::ret) == 1)
21339 typedef typename conditional<bool(HasUsableDirectAccess),
21341 typename _ExtractType::PlainObject
21342 >::type DirectLinearAccessType;
21343 static inline ExtractType extract(const XprType& x) { return x; }
21344 static inline const Scalar extractScalarFactor(const XprType&) { return Scalar(1); }
21346 template<typename Scalar, typename NestedXpr>
21347 struct blas_traits<CwiseUnaryOp<scalar_conjugate_op<Scalar>, NestedXpr> >
21348 : blas_traits<NestedXpr>
21350 typedef blas_traits<NestedXpr> Base;
21351 typedef CwiseUnaryOp<scalar_conjugate_op<Scalar>, NestedXpr> XprType;
21352 typedef typename Base::ExtractType ExtractType;
21354 IsComplex = NumTraits<Scalar>::IsComplex,
21355 NeedToConjugate = Base::NeedToConjugate ? 0 : IsComplex
21357 static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
21358 static inline Scalar extractScalarFactor(const XprType& x) { return conj(Base::extractScalarFactor(x.nestedExpression())); }
21360 template<typename Scalar, typename NestedXpr, typename Plain>
21361 struct blas_traits<CwiseBinaryOp<scalar_product_op<Scalar>, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain>, NestedXpr> >
21362 : blas_traits<NestedXpr>
21364 typedef blas_traits<NestedXpr> Base;
21365 typedef CwiseBinaryOp<scalar_product_op<Scalar>, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain>, NestedXpr> XprType;
21366 typedef typename Base::ExtractType ExtractType;
21367 static inline ExtractType extract(const XprType& x) { return Base::extract(x.rhs()); }
21368 static inline Scalar extractScalarFactor(const XprType& x)
21369 { return x.lhs().functor().m_other * Base::extractScalarFactor(x.rhs()); }
21371 template<typename Scalar, typename NestedXpr, typename Plain>
21372 struct blas_traits<CwiseBinaryOp<scalar_product_op<Scalar>, NestedXpr, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain> > >
21373 : blas_traits<NestedXpr>
21375 typedef blas_traits<NestedXpr> Base;
21376 typedef CwiseBinaryOp<scalar_product_op<Scalar>, NestedXpr, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain> > XprType;
21377 typedef typename Base::ExtractType ExtractType;
21378 static inline ExtractType extract(const XprType& x) { return Base::extract(x.lhs()); }
21379 static inline Scalar extractScalarFactor(const XprType& x)
21380 { return Base::extractScalarFactor(x.lhs()) * x.rhs().functor().m_other; }
21382 template<typename Scalar, typename Plain1, typename Plain2>
21383 struct blas_traits<CwiseBinaryOp<scalar_product_op<Scalar>, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain1>,
21384 const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain2> > >
21385 : blas_traits<CwiseNullaryOp<scalar_constant_op<Scalar>,Plain1> >
21387 template<typename Scalar, typename NestedXpr>
21388 struct blas_traits<CwiseUnaryOp<scalar_opposite_op<Scalar>, NestedXpr> >
21389 : blas_traits<NestedXpr>
21391 typedef blas_traits<NestedXpr> Base;
21392 typedef CwiseUnaryOp<scalar_opposite_op<Scalar>, NestedXpr> XprType;
21393 typedef typename Base::ExtractType ExtractType;
21394 static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
21395 static inline Scalar extractScalarFactor(const XprType& x)
21396 { return - Base::extractScalarFactor(x.nestedExpression()); }
21398 template<typename NestedXpr>
21399 struct blas_traits<Transpose<NestedXpr> >
21400 : blas_traits<NestedXpr>
21402 typedef typename NestedXpr::Scalar Scalar;
21403 typedef blas_traits<NestedXpr> Base;
21404 typedef Transpose<NestedXpr> XprType;
21405 typedef Transpose<const typename Base::_ExtractType> ExtractType;
21406 typedef Transpose<const typename Base::_ExtractType> _ExtractType;
21407 typedef typename conditional<bool(Base::HasUsableDirectAccess),
21409 typename ExtractType::PlainObject
21410 >::type DirectLinearAccessType;
21412 IsTransposed = Base::IsTransposed ? 0 : 1
21414 static inline ExtractType extract(const XprType& x) { return ExtractType(Base::extract(x.nestedExpression())); }
21415 static inline Scalar extractScalarFactor(const XprType& x) { return Base::extractScalarFactor(x.nestedExpression()); }
21417 template<typename T>
21418 struct blas_traits<const T>
21421 template<typename T, bool HasUsableDirectAccess=blas_traits<T>::HasUsableDirectAccess>
21422 struct extract_data_selector {
21423 static const typename T::Scalar* run(const T& m)
21425 return blas_traits<T>::extract(m).data();
21428 template<typename T>
21429 struct extract_data_selector<T,false> {
21430 static typename T::Scalar* run(const T&) { return 0; }
21432 template<typename T> const typename T::Scalar* extract_data(const T& m)
21434 return extract_data_selector<T>::run(m);
21439 // end #include "src/Core/util/BlasUtil.h"
21440 // #include "src/Core/DenseStorage.h"
21441 #ifndef EIGEN_MATRIXSTORAGE_H
21442 #define EIGEN_MATRIXSTORAGE_H
21443 #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
21444 #define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(X) X; EIGEN_DENSE_STORAGE_CTOR_PLUGIN;
21446 #define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(X)
21449 namespace internal {
21450 template<typename T>
21451 T generic_fast_tanh_float(const T& a_x)
21453 // Clamp the inputs to the range [-9, 9] since anything outside
21454 // this range is +/-1.0f in single-precision.
21455 const T plus_9 = pset1<T>(9.f);
21456 const T minus_9 = pset1<T>(-9.f);
21457 const T x = pmax(pmin(a_x, plus_9), minus_9);
21458 // The monomial coefficients of the numerator polynomial (odd).
21459 const T alpha_1 = pset1<T>(4.89352455891786e-03f);
21460 const T alpha_3 = pset1<T>(6.37261928875436e-04f);
21461 const T alpha_5 = pset1<T>(1.48572235717979e-05f);
21462 const T alpha_7 = pset1<T>(5.12229709037114e-08f);
21463 const T alpha_9 = pset1<T>(-8.60467152213735e-11f);
21464 const T alpha_11 = pset1<T>(2.00018790482477e-13f);
21465 const T alpha_13 = pset1<T>(-2.76076847742355e-16f);
21467 // The monomial coefficients of the denominator polynomial (even).
21468 const T beta_0 = pset1<T>(4.89352518554385e-03f);
21469 const T beta_2 = pset1<T>(2.26843463243900e-03f);
21470 const T beta_4 = pset1<T>(1.18534705686654e-04f);
21471 const T beta_6 = pset1<T>(1.19825839466702e-06f);
21473 // Since the polynomials are odd/even, we need x^2.
21474 const T x2 = pmul(x, x);
21476 // Evaluate the numerator polynomial p.
21477 T p = pmadd(x2, alpha_13, alpha_11);
21478 p = pmadd(x2, p, alpha_9);
21479 p = pmadd(x2, p, alpha_7);
21480 p = pmadd(x2, p, alpha_5);
21481 p = pmadd(x2, p, alpha_3);
21482 p = pmadd(x2, p, alpha_1);
21485 // Evaluate the denominator polynomial p.
21486 T q = pmadd(x2, beta_6, beta_4);
21487 q = pmadd(x2, q, beta_2);
21488 q = pmadd(x2, q, beta_0);
21490 // Divide the numerator by the denominator.
21494 struct constructor_without_unaligned_array_assert {};
21495 template<typename T, int Size>
21497 void check_static_allocation_size()
21499 #if EIGEN_STACK_ALLOCATION_LIMIT
21500 EIGEN_STATIC_ASSERT(Size * sizeof(T) <= EIGEN_STACK_ALLOCATION_LIMIT, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG);
21503 template <typename T, int Size, int MatrixOrArrayOptions,
21504 int Alignment = (MatrixOrArrayOptions&DontAlign) ? 0
21505 : compute_default_alignment<T,Size>::value >
21512 check_static_allocation_size<T,Size>();
21515 plain_array(constructor_without_unaligned_array_assert)
21517 check_static_allocation_size<T,Size>();
21520 #if defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT)
21521 #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask)
21522 #elif EIGEN_GNUC_AT_LEAST(4,7)
21523 template<typename PtrType>
21524 EIGEN_ALWAYS_INLINE PtrType eigen_unaligned_array_assert_workaround_gcc47(PtrType array) { return array; }
21525 #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \
21526 eigen_assert((internal::UIntPtr(eigen_unaligned_array_assert_workaround_gcc47(array)) & (sizemask)) == 0 \
21527 && "this assertion is explained here: " \
21528 "http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \
21529 " **** READ THIS WEB PAGE !!! ****");
21531 #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \
21532 eigen_assert((internal::UIntPtr(array) & (sizemask)) == 0 \
21533 && "this assertion is explained here: " \
21534 "http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \
21535 " **** READ THIS WEB PAGE !!! ****");
21537 template <typename T, int Size, int MatrixOrArrayOptions>
21538 struct plain_array<T, Size, MatrixOrArrayOptions, 8>
21540 EIGEN_ALIGN_TO_BOUNDARY(8) T array[Size];
21544 EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(7);
21545 check_static_allocation_size<T,Size>();
21548 plain_array(constructor_without_unaligned_array_assert)
21550 check_static_allocation_size<T,Size>();
21553 template <typename T, int Size, int MatrixOrArrayOptions>
21554 struct plain_array<T, Size, MatrixOrArrayOptions, 16>
21556 EIGEN_ALIGN_TO_BOUNDARY(16) T array[Size];
21560 EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(15);
21561 check_static_allocation_size<T,Size>();
21564 plain_array(constructor_without_unaligned_array_assert)
21566 check_static_allocation_size<T,Size>();
21569 template <typename T, int Size, int MatrixOrArrayOptions>
21570 struct plain_array<T, Size, MatrixOrArrayOptions, 32>
21572 EIGEN_ALIGN_TO_BOUNDARY(32) T array[Size];
21576 EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(31);
21577 check_static_allocation_size<T,Size>();
21580 plain_array(constructor_without_unaligned_array_assert)
21582 check_static_allocation_size<T,Size>();
21585 template <typename T, int Size, int MatrixOrArrayOptions>
21586 struct plain_array<T, Size, MatrixOrArrayOptions, 64>
21588 EIGEN_ALIGN_TO_BOUNDARY(64) T array[Size];
21592 EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(63);
21593 check_static_allocation_size<T,Size>();
21596 plain_array(constructor_without_unaligned_array_assert)
21598 check_static_allocation_size<T,Size>();
21601 template <typename T, int MatrixOrArrayOptions, int Alignment>
21602 struct plain_array<T, 0, MatrixOrArrayOptions, Alignment>
21605 EIGEN_DEVICE_FUNC plain_array() {}
21606 EIGEN_DEVICE_FUNC plain_array(constructor_without_unaligned_array_assert) {}
21609 template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseStorage;
21610 template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseStorage
21612 internal::plain_array<T,Size,_Options> m_data;
21614 EIGEN_DEVICE_FUNC DenseStorage() {
21615 EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = Size)
21618 explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
21619 : m_data(internal::constructor_without_unaligned_array_assert()) {}
21621 DenseStorage(const DenseStorage& other) : m_data(other.m_data) {
21622 EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = Size)
21625 DenseStorage& operator=(const DenseStorage& other)
21627 if (this != &other) m_data = other.m_data;
21630 EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) {
21631 EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
21632 eigen_internal_assert(size==rows*cols && rows==_Rows && cols==_Cols);
21633 EIGEN_UNUSED_VARIABLE(size);
21634 EIGEN_UNUSED_VARIABLE(rows);
21635 EIGEN_UNUSED_VARIABLE(cols);
21637 EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); }
21638 EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;}
21639 EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;}
21640 EIGEN_DEVICE_FUNC void conservativeResize(Index,Index,Index) {}
21641 EIGEN_DEVICE_FUNC void resize(Index,Index,Index) {}
21642 EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
21643 EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
21645 template<typename T, int _Rows, int _Cols, int _Options> class DenseStorage<T, 0, _Rows, _Cols, _Options>
21648 EIGEN_DEVICE_FUNC DenseStorage() {}
21649 EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) {}
21650 EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage&) {}
21651 EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage&) { return *this; }
21652 EIGEN_DEVICE_FUNC DenseStorage(Index,Index,Index) {}
21653 EIGEN_DEVICE_FUNC void swap(DenseStorage& ) {}
21654 EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;}
21655 EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;}
21656 EIGEN_DEVICE_FUNC void conservativeResize(Index,Index,Index) {}
21657 EIGEN_DEVICE_FUNC void resize(Index,Index,Index) {}
21658 EIGEN_DEVICE_FUNC const T *data() const { return 0; }
21659 EIGEN_DEVICE_FUNC T *data() { return 0; }
21661 template<typename T, int _Options> class DenseStorage<T, 0, Dynamic, Dynamic, _Options>
21662 : public DenseStorage<T, 0, 0, 0, _Options> { };
21663 template<typename T, int _Rows, int _Options> class DenseStorage<T, 0, _Rows, Dynamic, _Options>
21664 : public DenseStorage<T, 0, 0, 0, _Options> { };
21665 template<typename T, int _Cols, int _Options> class DenseStorage<T, 0, Dynamic, _Cols, _Options>
21666 : public DenseStorage<T, 0, 0, 0, _Options> { };
21667 template<typename T, int Size, int _Options> class DenseStorage<T, Size, Dynamic, Dynamic, _Options>
21669 internal::plain_array<T,Size,_Options> m_data;
21673 EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0), m_cols(0) {}
21674 EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
21675 : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0), m_cols(0) {}
21676 EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows), m_cols(other.m_cols) {}
21677 EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
21679 if (this != &other)
21681 m_data = other.m_data;
21682 m_rows = other.m_rows;
21683 m_cols = other.m_cols;
21687 EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index cols) : m_rows(rows), m_cols(cols) {}
21688 EIGEN_DEVICE_FUNC void swap(DenseStorage& other)
21689 { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
21690 EIGEN_DEVICE_FUNC Index rows() const {return m_rows;}
21691 EIGEN_DEVICE_FUNC Index cols() const {return m_cols;}
21692 EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index cols) { m_rows = rows; m_cols = cols; }
21693 EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index cols) { m_rows = rows; m_cols = cols; }
21694 EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
21695 EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
21697 template<typename T, int Size, int _Cols, int _Options> class DenseStorage<T, Size, Dynamic, _Cols, _Options>
21699 internal::plain_array<T,Size,_Options> m_data;
21702 EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0) {}
21703 EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
21704 : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0) {}
21705 EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows) {}
21706 EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
21708 if (this != &other)
21710 m_data = other.m_data;
21711 m_rows = other.m_rows;
21715 EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index) : m_rows(rows) {}
21716 EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
21717 EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;}
21718 EIGEN_DEVICE_FUNC Index cols(void) const {return _Cols;}
21719 EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index) { m_rows = rows; }
21720 EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index) { m_rows = rows; }
21721 EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
21722 EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
21724 template<typename T, int Size, int _Rows, int _Options> class DenseStorage<T, Size, _Rows, Dynamic, _Options>
21726 internal::plain_array<T,Size,_Options> m_data;
21729 EIGEN_DEVICE_FUNC DenseStorage() : m_cols(0) {}
21730 EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
21731 : m_data(internal::constructor_without_unaligned_array_assert()), m_cols(0) {}
21732 EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_cols(other.m_cols) {}
21733 EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
21735 if (this != &other)
21737 m_data = other.m_data;
21738 m_cols = other.m_cols;
21742 EIGEN_DEVICE_FUNC DenseStorage(Index, Index, Index cols) : m_cols(cols) {}
21743 EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
21744 EIGEN_DEVICE_FUNC Index rows(void) const {return _Rows;}
21745 EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;}
21746 void conservativeResize(Index, Index, Index cols) { m_cols = cols; }
21747 void resize(Index, Index, Index cols) { m_cols = cols; }
21748 EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
21749 EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
21751 template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynamic, _Options>
21757 EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0), m_cols(0) {}
21758 EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
21759 : m_data(0), m_rows(0), m_cols(0) {}
21760 EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols)
21761 : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(rows), m_cols(cols)
21763 EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
21764 eigen_internal_assert(size==rows*cols && rows>=0 && cols >=0);
21766 EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
21767 : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(other.m_rows*other.m_cols))
21768 , m_rows(other.m_rows)
21769 , m_cols(other.m_cols)
21771 EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_rows*m_cols)
21772 internal::smart_copy(other.m_data, other.m_data+other.m_rows*other.m_cols, m_data);
21774 EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
21776 if (this != &other)
21778 DenseStorage tmp(other);
21783 #if EIGEN_HAS_RVALUE_REFERENCES
21785 DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT
21786 : m_data(std::move(other.m_data))
21787 , m_rows(std::move(other.m_rows))
21788 , m_cols(std::move(other.m_cols))
21790 other.m_data = nullptr;
21795 DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT
21798 swap(m_data, other.m_data);
21799 swap(m_rows, other.m_rows);
21800 swap(m_cols, other.m_cols);
21804 EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols); }
21805 EIGEN_DEVICE_FUNC void swap(DenseStorage& other)
21806 { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
21807 EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;}
21808 EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;}
21809 void conservativeResize(Index size, Index rows, Index cols)
21811 m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*m_cols);
21815 EIGEN_DEVICE_FUNC void resize(Index size, Index rows, Index cols)
21817 if(size != m_rows*m_cols)
21819 internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols);
21821 m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
21824 EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
21829 EIGEN_DEVICE_FUNC const T *data() const { return m_data; }
21830 EIGEN_DEVICE_FUNC T *data() { return m_data; }
21832 template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Rows, Dynamic, _Options>
21837 EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_cols(0) {}
21838 explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {}
21839 EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_cols(cols)
21841 EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
21842 eigen_internal_assert(size==rows*cols && rows==_Rows && cols >=0);
21843 EIGEN_UNUSED_VARIABLE(rows);
21845 EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
21846 : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(_Rows*other.m_cols))
21847 , m_cols(other.m_cols)
21849 EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_cols*_Rows)
21850 internal::smart_copy(other.m_data, other.m_data+_Rows*m_cols, m_data);
21852 EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
21854 if (this != &other)
21856 DenseStorage tmp(other);
21861 #if EIGEN_HAS_RVALUE_REFERENCES
21863 DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT
21864 : m_data(std::move(other.m_data))
21865 , m_cols(std::move(other.m_cols))
21867 other.m_data = nullptr;
21871 DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT
21874 swap(m_data, other.m_data);
21875 swap(m_cols, other.m_cols);
21879 EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols); }
21880 EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
21881 EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;}
21882 EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;}
21883 EIGEN_DEVICE_FUNC void conservativeResize(Index size, Index, Index cols)
21885 m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, _Rows*m_cols);
21888 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(Index size, Index, Index cols)
21890 if(size != _Rows*m_cols)
21892 internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols);
21894 m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
21897 EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
21901 EIGEN_DEVICE_FUNC const T *data() const { return m_data; }
21902 EIGEN_DEVICE_FUNC T *data() { return m_data; }
21904 template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dynamic, _Cols, _Options>
21909 EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0) {}
21910 explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {}
21911 EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(rows)
21913 EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
21914 eigen_internal_assert(size==rows*cols && rows>=0 && cols == _Cols);
21915 EIGEN_UNUSED_VARIABLE(cols);
21917 EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
21918 : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(other.m_rows*_Cols))
21919 , m_rows(other.m_rows)
21921 EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_rows*_Cols)
21922 internal::smart_copy(other.m_data, other.m_data+other.m_rows*_Cols, m_data);
21924 EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
21926 if (this != &other)
21928 DenseStorage tmp(other);
21933 #if EIGEN_HAS_RVALUE_REFERENCES
21935 DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT
21936 : m_data(std::move(other.m_data))
21937 , m_rows(std::move(other.m_rows))
21939 other.m_data = nullptr;
21943 DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT
21946 swap(m_data, other.m_data);
21947 swap(m_rows, other.m_rows);
21951 EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows); }
21952 EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
21953 EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;}
21954 EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;}
21955 void conservativeResize(Index size, Index rows, Index)
21957 m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*_Cols);
21960 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(Index size, Index rows, Index)
21962 if(size != m_rows*_Cols)
21964 internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows);
21966 m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
21969 EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
21973 EIGEN_DEVICE_FUNC const T *data() const { return m_data; }
21974 EIGEN_DEVICE_FUNC T *data() { return m_data; }
21978 // end #include "src/Core/DenseStorage.h"
21979 // #include "src/Core/NoAlias.h"
21980 #ifndef EIGEN_NOALIAS_H
21981 #define EIGEN_NOALIAS_H
21983 template<typename ExpressionType, template <typename> class StorageBase>
21987 typedef typename ExpressionType::Scalar Scalar;
21988 explicit NoAlias(ExpressionType& expression) : m_expression(expression) {}
21989 template<typename OtherDerived>
21991 EIGEN_STRONG_INLINE ExpressionType& operator=(const StorageBase<OtherDerived>& other)
21993 call_assignment_no_alias(m_expression, other.derived(), internal::assign_op<Scalar,typename OtherDerived::Scalar>());
21994 return m_expression;
21996 template<typename OtherDerived>
21998 EIGEN_STRONG_INLINE ExpressionType& operator+=(const StorageBase<OtherDerived>& other)
22000 call_assignment_no_alias(m_expression, other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
22001 return m_expression;
22003 template<typename OtherDerived>
22005 EIGEN_STRONG_INLINE ExpressionType& operator-=(const StorageBase<OtherDerived>& other)
22007 call_assignment_no_alias(m_expression, other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
22008 return m_expression;
22011 ExpressionType& expression() const
22013 return m_expression;
22016 ExpressionType& m_expression;
22018 template<typename Derived>
22019 NoAlias<Derived,MatrixBase> MatrixBase<Derived>::noalias()
22021 return NoAlias<Derived, Eigen::MatrixBase >(derived());
22025 // end #include "src/Core/NoAlias.h"
22026 // #include "src/Core/PlainObjectBase.h"
22027 #ifndef EIGEN_DENSESTORAGEBASE_H
22028 #define EIGEN_DENSESTORAGEBASE_H
22029 #if defined(EIGEN_INITIALIZE_MATRICES_BY_ZERO)
22030 # define EIGEN_INITIALIZE_COEFFS
22031 # define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED for(int i=0;i<base().size();++i) coeffRef(i)=Scalar(0);
22032 #elif defined(EIGEN_INITIALIZE_MATRICES_BY_NAN)
22033 # define EIGEN_INITIALIZE_COEFFS
22034 # define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED for(int i=0;i<base().size();++i) coeffRef(i)=std::numeric_limits<Scalar>::quiet_NaN();
22036 # undef EIGEN_INITIALIZE_COEFFS
22037 # define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
22040 namespace internal {
22041 template<int MaxSizeAtCompileTime> struct check_rows_cols_for_overflow {
22042 template<typename Index>
22044 static EIGEN_ALWAYS_INLINE void run(Index, Index)
22048 template<> struct check_rows_cols_for_overflow<Dynamic> {
22049 template<typename Index>
22051 static EIGEN_ALWAYS_INLINE void run(Index rows, Index cols)
22053 Index max_index = (std::size_t(1) << (8 * sizeof(Index) - 1)) - 1;
22054 bool error = (rows == 0 || cols == 0) ? false
22055 : (rows > max_index / cols);
22057 throw_std_bad_alloc();
22060 template <typename Derived,
22061 typename OtherDerived = Derived,
22062 bool IsVector = bool(Derived::IsVectorAtCompileTime) && bool(OtherDerived::IsVectorAtCompileTime)>
22063 struct conservative_resize_like_impl;
22064 template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers> struct matrix_swap_impl;
22066 #ifdef EIGEN_PARSED_BY_DOXYGEN
22067 namespace doxygen {
22068 template<typename Derived> struct dense_xpr_base_dispatcher;
22069 template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
22070 struct dense_xpr_base_dispatcher<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
22071 : public MatrixBase {};
22072 template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
22073 struct dense_xpr_base_dispatcher<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
22074 : public ArrayBase {};
22076 template<typename Derived>
22077 class PlainObjectBase : public doxygen::dense_xpr_base_dispatcher<Derived>
22079 template<typename Derived>
22080 class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
22084 enum { Options = internal::traits<Derived>::Options };
22085 typedef typename internal::dense_xpr_base<Derived>::type Base;
22086 typedef typename internal::traits<Derived>::StorageKind StorageKind;
22087 typedef typename internal::traits<Derived>::Scalar Scalar;
22088 typedef typename internal::packet_traits<Scalar>::type PacketScalar;
22089 typedef typename NumTraits<Scalar>::Real RealScalar;
22090 typedef Derived DenseType;
22091 using Base::RowsAtCompileTime;
22092 using Base::ColsAtCompileTime;
22093 using Base::SizeAtCompileTime;
22094 using Base::MaxRowsAtCompileTime;
22095 using Base::MaxColsAtCompileTime;
22096 using Base::MaxSizeAtCompileTime;
22097 using Base::IsVectorAtCompileTime;
22099 template<typename PlainObjectType, int MapOptions, typename StrideType> friend class Eigen::Map;
22100 friend class Eigen::Map<Derived, Unaligned>;
22101 typedef Eigen::Map<Derived, Unaligned> MapType;
22102 friend class Eigen::Map<const Derived, Unaligned>;
22103 typedef const Eigen::Map<const Derived, Unaligned> ConstMapType;
22104 #if EIGEN_MAX_ALIGN_BYTES>0
22105 friend class Eigen::Map<Derived, AlignedMax>;
22106 friend class Eigen::Map<const Derived, AlignedMax>;
22108 typedef Eigen::Map<Derived, AlignedMax> AlignedMapType;
22109 typedef const Eigen::Map<const Derived, AlignedMax> ConstAlignedMapType;
22110 template<typename StrideType> struct StridedMapType { typedef Eigen::Map<Derived, Unaligned, StrideType> type; };
22111 template<typename StrideType> struct StridedConstMapType { typedef Eigen::Map<const Derived, Unaligned, StrideType> type; };
22112 template<typename StrideType> struct StridedAlignedMapType { typedef Eigen::Map<Derived, AlignedMax, StrideType> type; };
22113 template<typename StrideType> struct StridedConstAlignedMapType { typedef Eigen::Map<const Derived, AlignedMax, StrideType> type; };
22115 DenseStorage<Scalar, Base::MaxSizeAtCompileTime, Base::RowsAtCompileTime, Base::ColsAtCompileTime, Options> m_storage;
22117 enum { NeedsToAlign = (SizeAtCompileTime != Dynamic) && (internal::traits<Derived>::Alignment>0) };
22118 EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
22120 Base& base() { return *static_cast<Base*>(this); }
22122 const Base& base() const { return *static_cast<const Base*>(this); }
22124 EIGEN_STRONG_INLINE Index rows() const { return m_storage.rows(); }
22126 EIGEN_STRONG_INLINE Index cols() const { return m_storage.cols(); }
22128 EIGEN_STRONG_INLINE const Scalar& coeff(Index rowId, Index colId) const
22130 if(Flags & RowMajorBit)
22131 return m_storage.data()[colId + rowId * m_storage.cols()];
22133 return m_storage.data()[rowId + colId * m_storage.rows()];
22136 EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const
22138 return m_storage.data()[index];
22141 EIGEN_STRONG_INLINE Scalar& coeffRef(Index rowId, Index colId)
22143 if(Flags & RowMajorBit)
22144 return m_storage.data()[colId + rowId * m_storage.cols()];
22146 return m_storage.data()[rowId + colId * m_storage.rows()];
22149 EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)
22151 return m_storage.data()[index];
22154 EIGEN_STRONG_INLINE const Scalar& coeffRef(Index rowId, Index colId) const
22156 if(Flags & RowMajorBit)
22157 return m_storage.data()[colId + rowId * m_storage.cols()];
22159 return m_storage.data()[rowId + colId * m_storage.rows()];
22162 EIGEN_STRONG_INLINE const Scalar& coeffRef(Index index) const
22164 return m_storage.data()[index];
22166 template<int LoadMode>
22167 EIGEN_STRONG_INLINE PacketScalar packet(Index rowId, Index colId) const
22169 return internal::ploadt<PacketScalar, LoadMode>
22170 (m_storage.data() + (Flags & RowMajorBit
22171 ? colId + rowId * m_storage.cols()
22172 : rowId + colId * m_storage.rows()));
22174 template<int LoadMode>
22175 EIGEN_STRONG_INLINE PacketScalar packet(Index index) const
22177 return internal::ploadt<PacketScalar, LoadMode>(m_storage.data() + index);
22179 template<int StoreMode>
22180 EIGEN_STRONG_INLINE void writePacket(Index rowId, Index colId, const PacketScalar& val)
22182 internal::pstoret<Scalar, PacketScalar, StoreMode>
22183 (m_storage.data() + (Flags & RowMajorBit
22184 ? colId + rowId * m_storage.cols()
22185 : rowId + colId * m_storage.rows()), val);
22187 template<int StoreMode>
22188 EIGEN_STRONG_INLINE void writePacket(Index index, const PacketScalar& val)
22190 internal::pstoret<Scalar, PacketScalar, StoreMode>(m_storage.data() + index, val);
22192 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar *data() const
22193 { return m_storage.data(); }
22194 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar *data()
22195 { return m_storage.data(); }
22197 EIGEN_STRONG_INLINE void resize(Index rows, Index cols)
22199 eigen_assert( EIGEN_IMPLIES(RowsAtCompileTime!=Dynamic,rows==RowsAtCompileTime)
22200 && EIGEN_IMPLIES(ColsAtCompileTime!=Dynamic,cols==ColsAtCompileTime)
22201 && EIGEN_IMPLIES(RowsAtCompileTime==Dynamic && MaxRowsAtCompileTime!=Dynamic,rows<=MaxRowsAtCompileTime)
22202 && EIGEN_IMPLIES(ColsAtCompileTime==Dynamic && MaxColsAtCompileTime!=Dynamic,cols<=MaxColsAtCompileTime)
22203 && rows>=0 && cols>=0 && "Invalid sizes when resizing a matrix or array.");
22204 internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(rows, cols);
22205 #ifdef EIGEN_INITIALIZE_COEFFS
22206 Index size = rows*cols;
22207 bool size_changed = size != this->size();
22208 m_storage.resize(size, rows, cols);
22209 if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
22211 m_storage.resize(rows*cols, rows, cols);
22215 inline void resize(Index size)
22217 EIGEN_STATIC_ASSERT_VECTOR_ONLY(PlainObjectBase)
22218 eigen_assert(((SizeAtCompileTime == Dynamic && (MaxSizeAtCompileTime==Dynamic || size<=MaxSizeAtCompileTime)) || SizeAtCompileTime == size) && size>=0);
22219 #ifdef EIGEN_INITIALIZE_COEFFS
22220 bool size_changed = size != this->size();
22222 if(RowsAtCompileTime == 1)
22223 m_storage.resize(size, 1, size);
22225 m_storage.resize(size, size, 1);
22226 #ifdef EIGEN_INITIALIZE_COEFFS
22227 if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
22231 inline void resize(NoChange_t, Index cols)
22233 resize(rows(), cols);
22236 inline void resize(Index rows, NoChange_t)
22238 resize(rows, cols());
22240 template<typename OtherDerived>
22242 EIGEN_STRONG_INLINE void resizeLike(const EigenBase<OtherDerived>& _other)
22244 const OtherDerived& other = _other.derived();
22245 internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(other.rows(), other.cols());
22246 const Index othersize = other.rows()*other.cols();
22247 if(RowsAtCompileTime == 1)
22249 eigen_assert(other.rows() == 1 || other.cols() == 1);
22250 resize(1, othersize);
22252 else if(ColsAtCompileTime == 1)
22254 eigen_assert(other.rows() == 1 || other.cols() == 1);
22255 resize(othersize, 1);
22257 else resize(other.rows(), other.cols());
22260 EIGEN_STRONG_INLINE void conservativeResize(Index rows, Index cols)
22262 internal::conservative_resize_like_impl<Derived>::run(*this, rows, cols);
22265 EIGEN_STRONG_INLINE void conservativeResize(Index rows, NoChange_t)
22267 conservativeResize(rows, cols());
22270 EIGEN_STRONG_INLINE void conservativeResize(NoChange_t, Index cols)
22272 conservativeResize(rows(), cols);
22275 EIGEN_STRONG_INLINE void conservativeResize(Index size)
22277 internal::conservative_resize_like_impl<Derived>::run(*this, size);
22279 template<typename OtherDerived>
22281 EIGEN_STRONG_INLINE void conservativeResizeLike(const DenseBase<OtherDerived>& other)
22283 internal::conservative_resize_like_impl<Derived,OtherDerived>::run(*this, other);
22286 EIGEN_STRONG_INLINE Derived& operator=(const PlainObjectBase& other)
22288 return _set(other);
22290 template<typename OtherDerived>
22292 EIGEN_STRONG_INLINE Derived& lazyAssign(const DenseBase<OtherDerived>& other)
22294 _resize_to_match(other);
22295 return Base::lazyAssign(other.derived());
22297 template<typename OtherDerived>
22299 EIGEN_STRONG_INLINE Derived& operator=(const ReturnByValue<OtherDerived>& func)
22301 resize(func.rows(), func.cols());
22302 return Base::operator=(func);
22306 EIGEN_STRONG_INLINE PlainObjectBase() : m_storage()
22309 #ifndef EIGEN_PARSED_BY_DOXYGEN
22311 explicit PlainObjectBase(internal::constructor_without_unaligned_array_assert)
22312 : m_storage(internal::constructor_without_unaligned_array_assert())
22316 #if EIGEN_HAS_RVALUE_REFERENCES
22318 PlainObjectBase(PlainObjectBase&& other) EIGEN_NOEXCEPT
22319 : m_storage( std::move(other.m_storage) )
22323 PlainObjectBase& operator=(PlainObjectBase&& other) EIGEN_NOEXCEPT
22326 swap(m_storage, other.m_storage);
22331 EIGEN_STRONG_INLINE PlainObjectBase(const PlainObjectBase& other)
22332 : Base(), m_storage(other.m_storage) { }
22334 EIGEN_STRONG_INLINE PlainObjectBase(Index size, Index rows, Index cols)
22335 : m_storage(size, rows, cols)
22338 template<typename OtherDerived>
22340 EIGEN_STRONG_INLINE PlainObjectBase(const DenseBase<OtherDerived> &other)
22343 _check_template_params();
22345 _set_noalias(other);
22347 template<typename OtherDerived>
22349 EIGEN_STRONG_INLINE PlainObjectBase(const EigenBase<OtherDerived> &other)
22352 _check_template_params();
22354 *this = other.derived();
22356 template<typename OtherDerived>
22358 EIGEN_STRONG_INLINE PlainObjectBase(const ReturnByValue<OtherDerived>& other)
22360 _check_template_params();
22361 resize(other.rows(), other.cols());
22362 other.evalTo(this->derived());
22365 template<typename OtherDerived>
22367 EIGEN_STRONG_INLINE Derived& operator=(const EigenBase<OtherDerived> &other)
22369 _resize_to_match(other);
22370 Base::operator=(other.derived());
22371 return this->derived();
22373 static inline ConstMapType Map(const Scalar* data)
22374 { return ConstMapType(data); }
22375 static inline MapType Map(Scalar* data)
22376 { return MapType(data); }
22377 static inline ConstMapType Map(const Scalar* data, Index size)
22378 { return ConstMapType(data, size); }
22379 static inline MapType Map(Scalar* data, Index size)
22380 { return MapType(data, size); }
22381 static inline ConstMapType Map(const Scalar* data, Index rows, Index cols)
22382 { return ConstMapType(data, rows, cols); }
22383 static inline MapType Map(Scalar* data, Index rows, Index cols)
22384 { return MapType(data, rows, cols); }
22385 static inline ConstAlignedMapType MapAligned(const Scalar* data)
22386 { return ConstAlignedMapType(data); }
22387 static inline AlignedMapType MapAligned(Scalar* data)
22388 { return AlignedMapType(data); }
22389 static inline ConstAlignedMapType MapAligned(const Scalar* data, Index size)
22390 { return ConstAlignedMapType(data, size); }
22391 static inline AlignedMapType MapAligned(Scalar* data, Index size)
22392 { return AlignedMapType(data, size); }
22393 static inline ConstAlignedMapType MapAligned(const Scalar* data, Index rows, Index cols)
22394 { return ConstAlignedMapType(data, rows, cols); }
22395 static inline AlignedMapType MapAligned(Scalar* data, Index rows, Index cols)
22396 { return AlignedMapType(data, rows, cols); }
22397 template<int Outer, int Inner>
22398 static inline typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, const Stride<Outer, Inner>& stride)
22399 { return typename StridedConstMapType<Stride<Outer, Inner> >::type(data, stride); }
22400 template<int Outer, int Inner>
22401 static inline typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, const Stride<Outer, Inner>& stride)
22402 { return typename StridedMapType<Stride<Outer, Inner> >::type(data, stride); }
22403 template<int Outer, int Inner>
22404 static inline typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, Index size, const Stride<Outer, Inner>& stride)
22405 { return typename StridedConstMapType<Stride<Outer, Inner> >::type(data, size, stride); }
22406 template<int Outer, int Inner>
22407 static inline typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, Index size, const Stride<Outer, Inner>& stride)
22408 { return typename StridedMapType<Stride<Outer, Inner> >::type(data, size, stride); }
22409 template<int Outer, int Inner>
22410 static inline typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
22411 { return typename StridedConstMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
22412 template<int Outer, int Inner>
22413 static inline typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
22414 { return typename StridedMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
22415 template<int Outer, int Inner>
22416 static inline typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, const Stride<Outer, Inner>& stride)
22417 { return typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type(data, stride); }
22418 template<int Outer, int Inner>
22419 static inline typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, const Stride<Outer, Inner>& stride)
22420 { return typename StridedAlignedMapType<Stride<Outer, Inner> >::type(data, stride); }
22421 template<int Outer, int Inner>
22422 static inline typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, Index size, const Stride<Outer, Inner>& stride)
22423 { return typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type(data, size, stride); }
22424 template<int Outer, int Inner>
22425 static inline typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, Index size, const Stride<Outer, Inner>& stride)
22426 { return typename StridedAlignedMapType<Stride<Outer, Inner> >::type(data, size, stride); }
22427 template<int Outer, int Inner>
22428 static inline typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
22429 { return typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
22430 template<int Outer, int Inner>
22431 static inline typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
22432 { return typename StridedAlignedMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
22433 using Base::setConstant;
22434 EIGEN_DEVICE_FUNC Derived& setConstant(Index size, const Scalar& val);
22435 EIGEN_DEVICE_FUNC Derived& setConstant(Index rows, Index cols, const Scalar& val);
22436 using Base::setZero;
22437 EIGEN_DEVICE_FUNC Derived& setZero(Index size);
22438 EIGEN_DEVICE_FUNC Derived& setZero(Index rows, Index cols);
22439 using Base::setOnes;
22440 EIGEN_DEVICE_FUNC Derived& setOnes(Index size);
22441 EIGEN_DEVICE_FUNC Derived& setOnes(Index rows, Index cols);
22442 using Base::setRandom;
22443 Derived& setRandom(Index size);
22444 Derived& setRandom(Index rows, Index cols);
22445 #ifdef EIGEN_PLAINOBJECTBASE_PLUGIN
22446 #include EIGEN_PLAINOBJECTBASE_PLUGIN
22449 template<typename OtherDerived>
22451 EIGEN_STRONG_INLINE void _resize_to_match(const EigenBase<OtherDerived>& other)
22453 #ifdef EIGEN_NO_AUTOMATIC_RESIZING
22454 eigen_assert((this->size()==0 || (IsVectorAtCompileTime ? (this->size() == other.size())
22455 : (rows() == other.rows() && cols() == other.cols())))
22456 && "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined");
22457 EIGEN_ONLY_USED_FOR_DEBUG(other);
22462 template<typename OtherDerived>
22464 EIGEN_STRONG_INLINE Derived& _set(const DenseBase<OtherDerived>& other)
22466 internal::call_assignment(this->derived(), other.derived());
22467 return this->derived();
22469 template<typename OtherDerived>
22471 EIGEN_STRONG_INLINE Derived& _set_noalias(const DenseBase<OtherDerived>& other)
22473 internal::call_assignment_no_alias(this->derived(), other.derived(), internal::assign_op<Scalar,typename OtherDerived::Scalar>());
22474 return this->derived();
22476 template<typename T0, typename T1>
22478 EIGEN_STRONG_INLINE void _init2(Index rows, Index cols, typename internal::enable_if<Base::SizeAtCompileTime!=2,T0>::type* = 0)
22480 EIGEN_STATIC_ASSERT(bool(NumTraits<T0>::IsInteger) &&
22481 bool(NumTraits<T1>::IsInteger),
22482 FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED)
22485 template<typename T0, typename T1>
22487 EIGEN_STRONG_INLINE void _init2(const T0& val0, const T1& val1, typename internal::enable_if<Base::SizeAtCompileTime==2,T0>::type* = 0)
22489 EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2)
22490 m_storage.data()[0] = Scalar(val0);
22491 m_storage.data()[1] = Scalar(val1);
22493 template<typename T0, typename T1>
22495 EIGEN_STRONG_INLINE void _init2(const Index& val0, const Index& val1,
22496 typename internal::enable_if< (!internal::is_same<Index,Scalar>::value)
22497 && (internal::is_same<T0,Index>::value)
22498 && (internal::is_same<T1,Index>::value)
22499 && Base::SizeAtCompileTime==2,T1>::type* = 0)
22501 EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2)
22502 m_storage.data()[0] = Scalar(val0);
22503 m_storage.data()[1] = Scalar(val1);
22505 template<typename T>
22507 EIGEN_STRONG_INLINE void _init1(Index size, typename internal::enable_if< (Base::SizeAtCompileTime!=1 || !internal::is_convertible<T, Scalar>::value)
22508 && ((!internal::is_same<typename internal::traits<Derived>::XprKind,ArrayXpr>::value || Base::SizeAtCompileTime==Dynamic)),T>::type* = 0)
22510 const bool is_integer = NumTraits<T>::IsInteger;
22511 EIGEN_UNUSED_VARIABLE(is_integer);
22512 EIGEN_STATIC_ASSERT(is_integer,
22513 FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED)
22516 template<typename T>
22518 EIGEN_STRONG_INLINE void _init1(const Scalar& val0, typename internal::enable_if<Base::SizeAtCompileTime==1 && internal::is_convertible<T, Scalar>::value,T>::type* = 0)
22520 EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 1)
22521 m_storage.data()[0] = val0;
22523 template<typename T>
22525 EIGEN_STRONG_INLINE void _init1(const Index& val0,
22526 typename internal::enable_if< (!internal::is_same<Index,Scalar>::value)
22527 && (internal::is_same<Index,T>::value)
22528 && Base::SizeAtCompileTime==1
22529 && internal::is_convertible<T, Scalar>::value,T*>::type* = 0)
22531 EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 1)
22532 m_storage.data()[0] = Scalar(val0);
22534 template<typename T>
22536 EIGEN_STRONG_INLINE void _init1(const Scalar* data){
22537 this->_set_noalias(ConstMapType(data));
22539 template<typename T, typename OtherDerived>
22541 EIGEN_STRONG_INLINE void _init1(const DenseBase<OtherDerived>& other){
22542 this->_set_noalias(other);
22544 template<typename T>
22546 EIGEN_STRONG_INLINE void _init1(const Derived& other){
22547 this->_set_noalias(other);
22549 template<typename T, typename OtherDerived>
22551 EIGEN_STRONG_INLINE void _init1(const EigenBase<OtherDerived>& other){
22552 this->derived() = other;
22554 template<typename T, typename OtherDerived>
22556 EIGEN_STRONG_INLINE void _init1(const ReturnByValue<OtherDerived>& other)
22558 resize(other.rows(), other.cols());
22559 other.evalTo(this->derived());
22561 template<typename T, typename OtherDerived, int ColsAtCompileTime>
22563 EIGEN_STRONG_INLINE void _init1(const RotationBase<OtherDerived,ColsAtCompileTime>& r)
22565 this->derived() = r;
22567 template<typename T>
22569 EIGEN_STRONG_INLINE void _init1(const Scalar& val0,
22570 typename internal::enable_if< Base::SizeAtCompileTime!=Dynamic
22571 && Base::SizeAtCompileTime!=1
22572 && internal::is_convertible<T, Scalar>::value
22573 && internal::is_same<typename internal::traits<Derived>::XprKind,ArrayXpr>::value,T>::type* = 0)
22575 Base::setConstant(val0);
22577 template<typename T>
22579 EIGEN_STRONG_INLINE void _init1(const Index& val0,
22580 typename internal::enable_if< (!internal::is_same<Index,Scalar>::value)
22581 && (internal::is_same<Index,T>::value)
22582 && Base::SizeAtCompileTime!=Dynamic
22583 && Base::SizeAtCompileTime!=1
22584 && internal::is_convertible<T, Scalar>::value
22585 && internal::is_same<typename internal::traits<Derived>::XprKind,ArrayXpr>::value,T*>::type* = 0)
22587 Base::setConstant(val0);
22589 template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers>
22590 friend struct internal::matrix_swap_impl;
22592 #ifndef EIGEN_PARSED_BY_DOXYGEN
22593 template<typename OtherDerived>
22595 void swap(DenseBase<OtherDerived> & other)
22597 enum { SwapPointers = internal::is_same<Derived, OtherDerived>::value && Base::SizeAtCompileTime==Dynamic };
22598 internal::matrix_swap_impl<Derived, OtherDerived, bool(SwapPointers)>::run(this->derived(), other.derived());
22600 template<typename OtherDerived>
22602 void swap(DenseBase<OtherDerived> const & other)
22603 { Base::swap(other.derived()); }
22605 static EIGEN_STRONG_INLINE void _check_template_params()
22607 EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, (Options&RowMajor)==RowMajor)
22608 && EIGEN_IMPLIES(MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1, (Options&RowMajor)==0)
22609 && ((RowsAtCompileTime == Dynamic) || (RowsAtCompileTime >= 0))
22610 && ((ColsAtCompileTime == Dynamic) || (ColsAtCompileTime >= 0))
22611 && ((MaxRowsAtCompileTime == Dynamic) || (MaxRowsAtCompileTime >= 0))
22612 && ((MaxColsAtCompileTime == Dynamic) || (MaxColsAtCompileTime >= 0))
22613 && (MaxRowsAtCompileTime == RowsAtCompileTime || RowsAtCompileTime==Dynamic)
22614 && (MaxColsAtCompileTime == ColsAtCompileTime || ColsAtCompileTime==Dynamic)
22615 && (Options & (DontAlign|RowMajor)) == Options),
22616 INVALID_MATRIX_TEMPLATE_PARAMETERS)
22618 enum { IsPlainObjectBase = 1 };
22621 namespace internal {
22622 template <typename Derived, typename OtherDerived, bool IsVector>
22623 struct conservative_resize_like_impl
22625 static void run(DenseBase<Derived>& _this, Index rows, Index cols)
22627 if (_this.rows() == rows && _this.cols() == cols) return;
22628 EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(Derived)
22629 if ( ( Derived::IsRowMajor && _this.cols() == cols) ||
22630 (!Derived::IsRowMajor && _this.rows() == rows) )
22632 internal::check_rows_cols_for_overflow<Derived::MaxSizeAtCompileTime>::run(rows, cols);
22633 _this.derived().m_storage.conservativeResize(rows*cols,rows,cols);
22637 typename Derived::PlainObject tmp(rows,cols);
22638 const Index common_rows = numext::mini(rows, _this.rows());
22639 const Index common_cols = numext::mini(cols, _this.cols());
22640 tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
22641 _this.derived().swap(tmp);
22644 static void run(DenseBase<Derived>& _this, const DenseBase<OtherDerived>& other)
22646 if (_this.rows() == other.rows() && _this.cols() == other.cols()) return;
22647 EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(Derived)
22648 EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(OtherDerived)
22649 if ( ( Derived::IsRowMajor && _this.cols() == other.cols()) ||
22650 (!Derived::IsRowMajor && _this.rows() == other.rows()) )
22652 const Index new_rows = other.rows() - _this.rows();
22653 const Index new_cols = other.cols() - _this.cols();
22654 _this.derived().m_storage.conservativeResize(other.size(),other.rows(),other.cols());
22656 _this.bottomRightCorner(new_rows, other.cols()) = other.bottomRows(new_rows);
22657 else if (new_cols>0)
22658 _this.bottomRightCorner(other.rows(), new_cols) = other.rightCols(new_cols);
22662 typename Derived::PlainObject tmp(other);
22663 const Index common_rows = numext::mini(tmp.rows(), _this.rows());
22664 const Index common_cols = numext::mini(tmp.cols(), _this.cols());
22665 tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
22666 _this.derived().swap(tmp);
22670 template <typename Derived, typename OtherDerived>
22671 struct conservative_resize_like_impl<Derived,OtherDerived,true>
22672 : conservative_resize_like_impl<Derived,OtherDerived,false>
22674 using conservative_resize_like_impl<Derived,OtherDerived,false>::run;
22675 static void run(DenseBase<Derived>& _this, Index size)
22677 const Index new_rows = Derived::RowsAtCompileTime==1 ? 1 : size;
22678 const Index new_cols = Derived::RowsAtCompileTime==1 ? size : 1;
22679 _this.derived().m_storage.conservativeResize(size,new_rows,new_cols);
22681 static void run(DenseBase<Derived>& _this, const DenseBase<OtherDerived>& other)
22683 if (_this.rows() == other.rows() && _this.cols() == other.cols()) return;
22684 const Index num_new_elements = other.size() - _this.size();
22685 const Index new_rows = Derived::RowsAtCompileTime==1 ? 1 : other.rows();
22686 const Index new_cols = Derived::RowsAtCompileTime==1 ? other.cols() : 1;
22687 _this.derived().m_storage.conservativeResize(other.size(),new_rows,new_cols);
22688 if (num_new_elements > 0)
22689 _this.tail(num_new_elements) = other.tail(num_new_elements);
22692 template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers>
22693 struct matrix_swap_impl
22696 static inline void run(MatrixTypeA& a, MatrixTypeB& b)
22701 template<typename MatrixTypeA, typename MatrixTypeB>
22702 struct matrix_swap_impl<MatrixTypeA, MatrixTypeB, true>
22705 static inline void run(MatrixTypeA& a, MatrixTypeB& b)
22707 static_cast<typename MatrixTypeA::Base&>(a).m_storage.swap(static_cast<typename MatrixTypeB::Base&>(b).m_storage);
22713 // end #include "src/Core/PlainObjectBase.h"
22714 // #include "src/Core/Matrix.h"
22715 #ifndef EIGEN_MATRIX_H
22716 #define EIGEN_MATRIX_H
22718 namespace internal {
22719 template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
22720 struct traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
22723 enum { size = internal::size_at_compile_time<_Rows,_Cols>::ret };
22724 typedef typename find_best_packet<_Scalar,size>::type PacketScalar;
22726 row_major_bit = _Options&RowMajor ? RowMajorBit : 0,
22727 is_dynamic_size_storage = _MaxRows==Dynamic || _MaxCols==Dynamic,
22728 max_size = is_dynamic_size_storage ? Dynamic : _MaxRows*_MaxCols,
22729 default_alignment = compute_default_alignment<_Scalar,max_size>::value,
22730 actual_alignment = ((_Options&DontAlign)==0) ? default_alignment : 0,
22731 required_alignment = unpacket_traits<PacketScalar>::alignment,
22732 packet_access_bit = (packet_traits<_Scalar>::Vectorizable && (EIGEN_UNALIGNED_VECTORIZE || (actual_alignment>=required_alignment))) ? PacketAccessBit : 0
22735 typedef _Scalar Scalar;
22736 typedef Dense StorageKind;
22737 typedef Eigen::Index StorageIndex;
22738 typedef MatrixXpr XprKind;
22740 RowsAtCompileTime = _Rows,
22741 ColsAtCompileTime = _Cols,
22742 MaxRowsAtCompileTime = _MaxRows,
22743 MaxColsAtCompileTime = _MaxCols,
22744 Flags = compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret,
22745 Options = _Options,
22746 InnerStrideAtCompileTime = 1,
22747 OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime,
22748 EvaluatorFlags = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit,
22749 Alignment = actual_alignment
22753 template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
22755 : public PlainObjectBase<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
22758 typedef PlainObjectBase<Matrix> Base;
22759 enum { Options = _Options };
22760 EIGEN_DENSE_PUBLIC_INTERFACE(Matrix)
22761 typedef typename Base::PlainObject PlainObject;
22763 using Base::coeffRef;
22765 EIGEN_STRONG_INLINE Matrix& operator=(const Matrix& other)
22767 return Base::_set(other);
22769 template<typename OtherDerived>
22771 EIGEN_STRONG_INLINE Matrix& operator=(const DenseBase<OtherDerived>& other)
22773 return Base::_set(other);
22775 template<typename OtherDerived>
22777 EIGEN_STRONG_INLINE Matrix& operator=(const EigenBase<OtherDerived> &other)
22779 return Base::operator=(other);
22781 template<typename OtherDerived>
22783 EIGEN_STRONG_INLINE Matrix& operator=(const ReturnByValue<OtherDerived>& func)
22785 return Base::operator=(func);
22788 EIGEN_STRONG_INLINE Matrix() : Base()
22790 Base::_check_template_params();
22791 EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
22794 explicit Matrix(internal::constructor_without_unaligned_array_assert)
22795 : Base(internal::constructor_without_unaligned_array_assert())
22796 { Base::_check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED }
22797 #if EIGEN_HAS_RVALUE_REFERENCES
22799 Matrix(Matrix&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_constructible<Scalar>::value)
22800 : Base(std::move(other))
22802 Base::_check_template_params();
22803 if (RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic)
22804 Base::_set_noalias(other);
22807 Matrix& operator=(Matrix&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value)
22813 #ifndef EIGEN_PARSED_BY_DOXYGEN
22814 template<typename T>
22816 EIGEN_STRONG_INLINE explicit Matrix(const T& x)
22818 Base::_check_template_params();
22819 Base::template _init1<T>(x);
22821 template<typename T0, typename T1>
22823 EIGEN_STRONG_INLINE Matrix(const T0& x, const T1& y)
22825 Base::_check_template_params();
22826 Base::template _init2<T0,T1>(x, y);
22830 explicit Matrix(const Scalar *data);
22831 EIGEN_STRONG_INLINE explicit Matrix(Index dim);
22832 Matrix(const Scalar& x);
22834 Matrix(Index rows, Index cols);
22835 Matrix(const Scalar& x, const Scalar& y);
22838 EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z)
22840 Base::_check_template_params();
22841 EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Matrix, 3)
22842 m_storage.data()[0] = x;
22843 m_storage.data()[1] = y;
22844 m_storage.data()[2] = z;
22847 EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z, const Scalar& w)
22849 Base::_check_template_params();
22850 EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Matrix, 4)
22851 m_storage.data()[0] = x;
22852 m_storage.data()[1] = y;
22853 m_storage.data()[2] = z;
22854 m_storage.data()[3] = w;
22857 EIGEN_STRONG_INLINE Matrix(const Matrix& other) : Base(other)
22859 template<typename OtherDerived>
22861 EIGEN_STRONG_INLINE Matrix(const EigenBase<OtherDerived> &other)
22862 : Base(other.derived())
22864 EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; }
22865 EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); }
22866 template<typename OtherDerived>
22868 explicit Matrix(const RotationBase<OtherDerived,ColsAtCompileTime>& r);
22869 template<typename OtherDerived>
22871 Matrix& operator=(const RotationBase<OtherDerived,ColsAtCompileTime>& r);
22872 #ifdef EIGEN_MATRIX_PLUGIN
22873 #include EIGEN_MATRIX_PLUGIN
22876 template <typename Derived, typename OtherDerived, bool IsVector>
22877 friend struct internal::conservative_resize_like_impl;
22878 using Base::m_storage;
22880 #define EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, Size, SizeSuffix) \
22882 typedef Matrix<Type, Size, Size> Matrix##SizeSuffix##TypeSuffix; \
22884 typedef Matrix<Type, Size, 1> Vector##SizeSuffix##TypeSuffix; \
22886 typedef Matrix<Type, 1, Size> RowVector##SizeSuffix##TypeSuffix;
22887 #define EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, Size) \
22889 typedef Matrix<Type, Size, Dynamic> Matrix##Size##X##TypeSuffix; \
22891 typedef Matrix<Type, Dynamic, Size> Matrix##X##Size##TypeSuffix;
22892 #define EIGEN_MAKE_TYPEDEFS_ALL_SIZES(Type, TypeSuffix) \
22893 EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 2, 2) \
22894 EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 3, 3) \
22895 EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 4, 4) \
22896 EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, Dynamic, X) \
22897 EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, 2) \
22898 EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, 3) \
22899 EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, 4)
22900 EIGEN_MAKE_TYPEDEFS_ALL_SIZES(int, i)
22901 EIGEN_MAKE_TYPEDEFS_ALL_SIZES(float, f)
22902 EIGEN_MAKE_TYPEDEFS_ALL_SIZES(double, d)
22903 EIGEN_MAKE_TYPEDEFS_ALL_SIZES(std::complex<float>, cf)
22904 EIGEN_MAKE_TYPEDEFS_ALL_SIZES(std::complex<double>, cd)
22905 #undef EIGEN_MAKE_TYPEDEFS_ALL_SIZES
22906 #undef EIGEN_MAKE_TYPEDEFS
22907 #undef EIGEN_MAKE_FIXED_TYPEDEFS
22910 // end #include "src/Core/Matrix.h"
22911 // #include "src/Core/Array.h"
22912 #ifndef EIGEN_ARRAY_H
22913 #define EIGEN_ARRAY_H
22915 namespace internal {
22916 template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
22917 struct traits<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > : traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
22919 typedef ArrayXpr XprKind;
22920 typedef ArrayBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > XprBase;
22923 template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
22925 : public PlainObjectBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
22928 typedef PlainObjectBase<Array> Base;
22929 EIGEN_DENSE_PUBLIC_INTERFACE(Array)
22930 enum { Options = _Options };
22931 typedef typename Base::PlainObject PlainObject;
22933 template <typename Derived, typename OtherDerived, bool IsVector>
22934 friend struct internal::conservative_resize_like_impl;
22935 using Base::m_storage;
22939 using Base::coeffRef;
22940 template<typename OtherDerived>
22942 EIGEN_STRONG_INLINE Array& operator=(const EigenBase<OtherDerived> &other)
22944 return Base::operator=(other);
22947 EIGEN_STRONG_INLINE Array& operator=(const Scalar &value)
22949 Base::setConstant(value);
22952 template<typename OtherDerived>
22954 EIGEN_STRONG_INLINE Array& operator=(const DenseBase<OtherDerived>& other)
22956 return Base::_set(other);
22959 EIGEN_STRONG_INLINE Array& operator=(const Array& other)
22961 return Base::_set(other);
22964 EIGEN_STRONG_INLINE Array() : Base()
22966 Base::_check_template_params();
22967 EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
22969 #ifndef EIGEN_PARSED_BY_DOXYGEN
22971 Array(internal::constructor_without_unaligned_array_assert)
22972 : Base(internal::constructor_without_unaligned_array_assert())
22974 Base::_check_template_params();
22975 EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
22978 #if EIGEN_HAS_RVALUE_REFERENCES
22980 Array(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_constructible<Scalar>::value)
22981 : Base(std::move(other))
22983 Base::_check_template_params();
22984 if (RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic)
22985 Base::_set_noalias(other);
22988 Array& operator=(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value)
22994 #ifndef EIGEN_PARSED_BY_DOXYGEN
22995 template<typename T>
22997 EIGEN_STRONG_INLINE explicit Array(const T& x)
22999 Base::_check_template_params();
23000 Base::template _init1<T>(x);
23002 template<typename T0, typename T1>
23004 EIGEN_STRONG_INLINE Array(const T0& val0, const T1& val1)
23006 Base::_check_template_params();
23007 this->template _init2<T0,T1>(val0, val1);
23010 EIGEN_DEVICE_FUNC explicit Array(const Scalar *data);
23012 EIGEN_STRONG_INLINE explicit Array(Index dim);
23013 Array(const Scalar& value);
23014 Array(Index rows, Index cols);
23015 Array(const Scalar& val0, const Scalar& val1);
23018 EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2)
23020 Base::_check_template_params();
23021 EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Array, 3)
23022 m_storage.data()[0] = val0;
23023 m_storage.data()[1] = val1;
23024 m_storage.data()[2] = val2;
23027 EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2, const Scalar& val3)
23029 Base::_check_template_params();
23030 EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Array, 4)
23031 m_storage.data()[0] = val0;
23032 m_storage.data()[1] = val1;
23033 m_storage.data()[2] = val2;
23034 m_storage.data()[3] = val3;
23037 EIGEN_STRONG_INLINE Array(const Array& other)
23041 struct PrivateType {};
23043 template<typename OtherDerived>
23045 EIGEN_STRONG_INLINE Array(const EigenBase<OtherDerived> &other,
23046 typename internal::enable_if<internal::is_convertible<typename OtherDerived::Scalar,Scalar>::value,
23047 PrivateType>::type = PrivateType())
23048 : Base(other.derived())
23050 EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; }
23051 EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); }
23052 #ifdef EIGEN_ARRAY_PLUGIN
23053 #include EIGEN_ARRAY_PLUGIN
23056 template<typename MatrixType, typename OtherDerived, bool SwapPointers>
23057 friend struct internal::matrix_swap_impl;
23059 #define EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, Size, SizeSuffix) \
23061 typedef Array<Type, Size, Size> Array##SizeSuffix##SizeSuffix##TypeSuffix; \
23063 typedef Array<Type, Size, 1> Array##SizeSuffix##TypeSuffix;
23064 #define EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, Size) \
23066 typedef Array<Type, Size, Dynamic> Array##Size##X##TypeSuffix; \
23068 typedef Array<Type, Dynamic, Size> Array##X##Size##TypeSuffix;
23069 #define EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(Type, TypeSuffix) \
23070 EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, 2, 2) \
23071 EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, 3, 3) \
23072 EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, 4, 4) \
23073 EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, Dynamic, X) \
23074 EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, 2) \
23075 EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, 3) \
23076 EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, 4)
23077 EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(int, i)
23078 EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(float, f)
23079 EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(double, d)
23080 EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(std::complex<float>, cf)
23081 EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(std::complex<double>, cd)
23082 #undef EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES
23083 #undef EIGEN_MAKE_ARRAY_TYPEDEFS
23084 #undef EIGEN_MAKE_ARRAY_TYPEDEFS_LARGE
23085 #define EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, SizeSuffix) \
23086 using Eigen::Matrix##SizeSuffix##TypeSuffix; \
23087 using Eigen::Vector##SizeSuffix##TypeSuffix; \
23088 using Eigen::RowVector##SizeSuffix##TypeSuffix;
23089 #define EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(TypeSuffix) \
23090 EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 2) \
23091 EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 3) \
23092 EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 4) \
23093 EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, X)
23094 #define EIGEN_USING_ARRAY_TYPEDEFS \
23095 EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(i) \
23096 EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(f) \
23097 EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(d) \
23098 EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(cf) \
23099 EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(cd)
23102 // end #include "src/Core/Array.h"
23103 // #include "src/Core/CwiseBinaryOp.h"
23104 #ifndef EIGEN_CWISE_BINARY_OP_H
23105 #define EIGEN_CWISE_BINARY_OP_H
23107 namespace internal {
23108 template<typename BinaryOp, typename Lhs, typename Rhs>
23109 struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
23111 typedef typename remove_all<Lhs>::type Ancestor;
23112 typedef typename traits<Ancestor>::XprKind XprKind;
23114 RowsAtCompileTime = traits<Ancestor>::RowsAtCompileTime,
23115 ColsAtCompileTime = traits<Ancestor>::ColsAtCompileTime,
23116 MaxRowsAtCompileTime = traits<Ancestor>::MaxRowsAtCompileTime,
23117 MaxColsAtCompileTime = traits<Ancestor>::MaxColsAtCompileTime
23119 typedef typename result_of<
23121 const typename Lhs::Scalar&,
23122 const typename Rhs::Scalar&
23125 typedef typename cwise_promote_storage_type<typename traits<Lhs>::StorageKind,
23126 typename traits<Rhs>::StorageKind,
23127 BinaryOp>::ret StorageKind;
23128 typedef typename promote_index_type<typename traits<Lhs>::StorageIndex,
23129 typename traits<Rhs>::StorageIndex>::type StorageIndex;
23130 typedef typename Lhs::Nested LhsNested;
23131 typedef typename Rhs::Nested RhsNested;
23132 typedef typename remove_reference<LhsNested>::type _LhsNested;
23133 typedef typename remove_reference<RhsNested>::type _RhsNested;
23135 Flags = cwise_promote_storage_order<typename traits<Lhs>::StorageKind,typename traits<Rhs>::StorageKind,_LhsNested::Flags & RowMajorBit,_RhsNested::Flags & RowMajorBit>::value
23139 template<typename BinaryOp, typename Lhs, typename Rhs, typename StorageKind>
23140 class CwiseBinaryOpImpl;
23141 template<typename BinaryOp, typename LhsType, typename RhsType>
23142 class CwiseBinaryOp :
23143 public CwiseBinaryOpImpl<
23144 BinaryOp, LhsType, RhsType,
23145 typename internal::cwise_promote_storage_type<typename internal::traits<LhsType>::StorageKind,
23146 typename internal::traits<RhsType>::StorageKind,
23148 internal::no_assignment_operator
23151 typedef typename internal::remove_all<BinaryOp>::type Functor;
23152 typedef typename internal::remove_all<LhsType>::type Lhs;
23153 typedef typename internal::remove_all<RhsType>::type Rhs;
23154 typedef typename CwiseBinaryOpImpl<
23155 BinaryOp, LhsType, RhsType,
23156 typename internal::cwise_promote_storage_type<typename internal::traits<LhsType>::StorageKind,
23157 typename internal::traits<Rhs>::StorageKind,
23158 BinaryOp>::ret>::Base Base;
23159 EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseBinaryOp)
23160 typedef typename internal::ref_selector<LhsType>::type LhsNested;
23161 typedef typename internal::ref_selector<RhsType>::type RhsNested;
23162 typedef typename internal::remove_reference<LhsNested>::type _LhsNested;
23163 typedef typename internal::remove_reference<RhsNested>::type _RhsNested;
23165 EIGEN_STRONG_INLINE CwiseBinaryOp(const Lhs& aLhs, const Rhs& aRhs, const BinaryOp& func = BinaryOp())
23166 : m_lhs(aLhs), m_rhs(aRhs), m_functor(func)
23168 EIGEN_CHECK_BINARY_COMPATIBILIY(BinaryOp,typename Lhs::Scalar,typename Rhs::Scalar);
23169 EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Lhs, Rhs)
23170 eigen_assert(aLhs.rows() == aRhs.rows() && aLhs.cols() == aRhs.cols());
23173 EIGEN_STRONG_INLINE Index rows() const {
23174 if (internal::traits<typename internal::remove_all<LhsNested>::type>::RowsAtCompileTime==Dynamic)
23175 return m_rhs.rows();
23177 return m_lhs.rows();
23180 EIGEN_STRONG_INLINE Index cols() const {
23181 if (internal::traits<typename internal::remove_all<LhsNested>::type>::ColsAtCompileTime==Dynamic)
23182 return m_rhs.cols();
23184 return m_lhs.cols();
23187 const _LhsNested& lhs() const { return m_lhs; }
23189 const _RhsNested& rhs() const { return m_rhs; }
23191 const BinaryOp& functor() const { return m_functor; }
23195 const BinaryOp m_functor;
23197 template<typename BinaryOp, typename Lhs, typename Rhs, typename StorageKind>
23198 class CwiseBinaryOpImpl
23199 : public internal::generic_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type
23202 typedef typename internal::generic_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type Base;
23204 template<typename Derived>
23205 template<typename OtherDerived>
23206 EIGEN_STRONG_INLINE Derived &
23207 MatrixBase<Derived>::operator-=(const MatrixBase<OtherDerived> &other)
23209 call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
23212 template<typename Derived>
23213 template<typename OtherDerived>
23214 EIGEN_STRONG_INLINE Derived &
23215 MatrixBase<Derived>::operator+=(const MatrixBase<OtherDerived>& other)
23217 call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
23222 // end #include "src/Core/CwiseBinaryOp.h"
23223 // #include "src/Core/CwiseUnaryOp.h"
23224 #ifndef EIGEN_CWISE_UNARY_OP_H
23225 #define EIGEN_CWISE_UNARY_OP_H
23227 namespace internal {
23228 template<typename UnaryOp, typename XprType>
23229 struct traits<CwiseUnaryOp<UnaryOp, XprType> >
23232 typedef typename result_of<
23233 UnaryOp(const typename XprType::Scalar&)
23235 typedef typename XprType::Nested XprTypeNested;
23236 typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
23238 Flags = _XprTypeNested::Flags & RowMajorBit
23242 template<typename UnaryOp, typename XprType, typename StorageKind>
23243 class CwiseUnaryOpImpl;
23244 template<typename UnaryOp, typename XprType>
23245 class CwiseUnaryOp : public CwiseUnaryOpImpl<UnaryOp, XprType, typename internal::traits<XprType>::StorageKind>, internal::no_assignment_operator
23248 typedef typename CwiseUnaryOpImpl<UnaryOp, XprType,typename internal::traits<XprType>::StorageKind>::Base Base;
23249 EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryOp)
23250 typedef typename internal::ref_selector<XprType>::type XprTypeNested;
23251 typedef typename internal::remove_all<XprType>::type NestedExpression;
23252 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
23253 explicit CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp())
23254 : m_xpr(xpr), m_functor(func) {}
23255 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
23256 Index rows() const { return m_xpr.rows(); }
23257 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
23258 Index cols() const { return m_xpr.cols(); }
23259 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
23260 const UnaryOp& functor() const { return m_functor; }
23261 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
23262 const typename internal::remove_all<XprTypeNested>::type&
23263 nestedExpression() const { return m_xpr; }
23264 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
23265 typename internal::remove_all<XprTypeNested>::type&
23266 nestedExpression() { return m_xpr; }
23268 XprTypeNested m_xpr;
23269 const UnaryOp m_functor;
23271 template<typename UnaryOp, typename XprType, typename StorageKind>
23272 class CwiseUnaryOpImpl
23273 : public internal::generic_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type
23276 typedef typename internal::generic_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type Base;
23280 // end #include "src/Core/CwiseUnaryOp.h"
23281 // #include "src/Core/CwiseNullaryOp.h"
23282 #ifndef EIGEN_CWISE_NULLARY_OP_H
23283 #define EIGEN_CWISE_NULLARY_OP_H
23285 namespace internal {
23286 template<typename NullaryOp, typename PlainObjectType>
23287 struct traits<CwiseNullaryOp<NullaryOp, PlainObjectType> > : traits<PlainObjectType>
23290 Flags = traits<PlainObjectType>::Flags & RowMajorBit
23294 template<typename NullaryOp, typename PlainObjectType>
23295 class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp, PlainObjectType> >::type, internal::no_assignment_operator
23298 typedef typename internal::dense_xpr_base<CwiseNullaryOp>::type Base;
23299 EIGEN_DENSE_PUBLIC_INTERFACE(CwiseNullaryOp)
23301 CwiseNullaryOp(Index rows, Index cols, const NullaryOp& func = NullaryOp())
23302 : m_rows(rows), m_cols(cols), m_functor(func)
23304 eigen_assert(rows >= 0
23305 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
23307 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols));
23310 EIGEN_STRONG_INLINE Index rows() const { return m_rows.value(); }
23312 EIGEN_STRONG_INLINE Index cols() const { return m_cols.value(); }
23314 const NullaryOp& functor() const { return m_functor; }
23316 const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_rows;
23317 const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_cols;
23318 const NullaryOp m_functor;
23320 template<typename Derived>
23321 template<typename CustomNullaryOp>
23322 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
23323 DenseBase<Derived>::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func)
23325 return CwiseNullaryOp<CustomNullaryOp, PlainObject>(rows, cols, func);
23327 template<typename Derived>
23328 template<typename CustomNullaryOp>
23329 EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
23330 DenseBase<Derived>::NullaryExpr(Index size, const CustomNullaryOp& func)
23332 EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
23333 if(RowsAtCompileTime == 1) return CwiseNullaryOp<CustomNullaryOp, PlainObject>(1, size, func);
23334 else return CwiseNullaryOp<CustomNullaryOp, PlainObject>(size, 1, func);
23336 template<typename Derived>
23337 template<typename CustomNullaryOp>
23338 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
23339 DenseBase<Derived>::NullaryExpr(const CustomNullaryOp& func)
23341 return CwiseNullaryOp<CustomNullaryOp, PlainObject>(RowsAtCompileTime, ColsAtCompileTime, func);
23343 template<typename Derived>
23344 EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
23345 DenseBase<Derived>::Constant(Index rows, Index cols, const Scalar& value)
23347 return DenseBase<Derived>::NullaryExpr(rows, cols, internal::scalar_constant_op<Scalar>(value));
23349 template<typename Derived>
23350 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
23351 DenseBase<Derived>::Constant(Index size, const Scalar& value)
23353 return DenseBase<Derived>::NullaryExpr(size, internal::scalar_constant_op<Scalar>(value));
23355 template<typename Derived>
23356 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
23357 DenseBase<Derived>::Constant(const Scalar& value)
23359 EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
23360 return DenseBase<Derived>::NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_constant_op<Scalar>(value));
23362 template<typename Derived>
23363 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
23364 DenseBase<Derived>::LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high)
23366 EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
23367 return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,PacketScalar>(low,high,size));
23369 template<typename Derived>
23370 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
23371 DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& high)
23373 EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
23374 EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
23375 return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,PacketScalar>(low,high,Derived::SizeAtCompileTime));
23377 template<typename Derived>
23378 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
23379 DenseBase<Derived>::LinSpaced(Index size, const Scalar& low, const Scalar& high)
23381 EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
23382 return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,PacketScalar>(low,high,size));
23384 template<typename Derived>
23385 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
23386 DenseBase<Derived>::LinSpaced(const Scalar& low, const Scalar& high)
23388 EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
23389 EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
23390 return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,PacketScalar>(low,high,Derived::SizeAtCompileTime));
23392 template<typename Derived>
23393 EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isApproxToConstant
23394 (const Scalar& val, const RealScalar& prec) const
23396 typename internal::nested_eval<Derived,1>::type self(derived());
23397 for(Index j = 0; j < cols(); ++j)
23398 for(Index i = 0; i < rows(); ++i)
23399 if(!internal::isApprox(self.coeff(i, j), val, prec))
23403 template<typename Derived>
23404 EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isConstant
23405 (const Scalar& val, const RealScalar& prec) const
23407 return isApproxToConstant(val, prec);
23409 template<typename Derived>
23410 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void DenseBase<Derived>::fill(const Scalar& val)
23414 template<typename Derived>
23415 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setConstant(const Scalar& val)
23417 return derived() = Constant(rows(), cols(), val);
23419 template<typename Derived>
23420 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
23421 PlainObjectBase<Derived>::setConstant(Index size, const Scalar& val)
23424 return setConstant(val);
23426 template<typename Derived>
23427 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
23428 PlainObjectBase<Derived>::setConstant(Index rows, Index cols, const Scalar& val)
23430 resize(rows, cols);
23431 return setConstant(val);
23433 template<typename Derived>
23434 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, const Scalar& low, const Scalar& high)
23436 EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
23437 return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op<Scalar,PacketScalar>(low,high,newSize));
23439 template<typename Derived>
23440 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(const Scalar& low, const Scalar& high)
23442 EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
23443 return setLinSpaced(size(), low, high);
23445 template<typename Derived>
23446 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
23447 DenseBase<Derived>::Zero(Index rows, Index cols)
23449 return Constant(rows, cols, Scalar(0));
23451 template<typename Derived>
23452 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
23453 DenseBase<Derived>::Zero(Index size)
23455 return Constant(size, Scalar(0));
23457 template<typename Derived>
23458 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
23459 DenseBase<Derived>::Zero()
23461 return Constant(Scalar(0));
23463 template<typename Derived>
23464 EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isZero(const RealScalar& prec) const
23466 typename internal::nested_eval<Derived,1>::type self(derived());
23467 for(Index j = 0; j < cols(); ++j)
23468 for(Index i = 0; i < rows(); ++i)
23469 if(!internal::isMuchSmallerThan(self.coeff(i, j), static_cast<Scalar>(1), prec))
23473 template<typename Derived>
23474 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setZero()
23476 return setConstant(Scalar(0));
23478 template<typename Derived>
23479 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
23480 PlainObjectBase<Derived>::setZero(Index newSize)
23483 return setConstant(Scalar(0));
23485 template<typename Derived>
23486 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
23487 PlainObjectBase<Derived>::setZero(Index rows, Index cols)
23489 resize(rows, cols);
23490 return setConstant(Scalar(0));
23492 template<typename Derived>
23493 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
23494 DenseBase<Derived>::Ones(Index rows, Index cols)
23496 return Constant(rows, cols, Scalar(1));
23498 template<typename Derived>
23499 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
23500 DenseBase<Derived>::Ones(Index newSize)
23502 return Constant(newSize, Scalar(1));
23504 template<typename Derived>
23505 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
23506 DenseBase<Derived>::Ones()
23508 return Constant(Scalar(1));
23510 template<typename Derived>
23511 EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isOnes
23512 (const RealScalar& prec) const
23514 return isApproxToConstant(Scalar(1), prec);
23516 template<typename Derived>
23517 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setOnes()
23519 return setConstant(Scalar(1));
23521 template<typename Derived>
23522 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
23523 PlainObjectBase<Derived>::setOnes(Index newSize)
23526 return setConstant(Scalar(1));
23528 template<typename Derived>
23529 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
23530 PlainObjectBase<Derived>::setOnes(Index rows, Index cols)
23532 resize(rows, cols);
23533 return setConstant(Scalar(1));
23535 template<typename Derived>
23536 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType
23537 MatrixBase<Derived>::Identity(Index rows, Index cols)
23539 return DenseBase<Derived>::NullaryExpr(rows, cols, internal::scalar_identity_op<Scalar>());
23541 template<typename Derived>
23542 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType
23543 MatrixBase<Derived>::Identity()
23545 EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
23546 return MatrixBase<Derived>::NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_identity_op<Scalar>());
23548 template<typename Derived>
23549 bool MatrixBase<Derived>::isIdentity
23550 (const RealScalar& prec) const
23552 typename internal::nested_eval<Derived,1>::type self(derived());
23553 for(Index j = 0; j < cols(); ++j)
23555 for(Index i = 0; i < rows(); ++i)
23559 if(!internal::isApprox(self.coeff(i, j), static_cast<Scalar>(1), prec))
23564 if(!internal::isMuchSmallerThan(self.coeff(i, j), static_cast<RealScalar>(1), prec))
23571 namespace internal {
23572 template<typename Derived, bool Big = (Derived::SizeAtCompileTime>=16)>
23573 struct setIdentity_impl
23576 static EIGEN_STRONG_INLINE Derived& run(Derived& m)
23578 return m = Derived::Identity(m.rows(), m.cols());
23581 template<typename Derived>
23582 struct setIdentity_impl<Derived, true>
23585 static EIGEN_STRONG_INLINE Derived& run(Derived& m)
23588 const Index size = numext::mini(m.rows(), m.cols());
23589 for(Index i = 0; i < size; ++i) m.coeffRef(i,i) = typename Derived::Scalar(1);
23594 template<typename Derived>
23595 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity()
23597 return internal::setIdentity_impl<Derived>::run(derived());
23599 template<typename Derived>
23600 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity(Index rows, Index cols)
23602 derived().resize(rows, cols);
23603 return setIdentity();
23605 template<typename Derived>
23606 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index newSize, Index i)
23608 EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
23609 return BasisReturnType(SquareMatrixType::Identity(newSize,newSize), i);
23611 template<typename Derived>
23612 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index i)
23614 EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
23615 return BasisReturnType(SquareMatrixType::Identity(),i);
23617 template<typename Derived>
23618 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitX()
23619 { return Derived::Unit(0); }
23620 template<typename Derived>
23621 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitY()
23622 { return Derived::Unit(1); }
23623 template<typename Derived>
23624 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitZ()
23625 { return Derived::Unit(2); }
23626 template<typename Derived>
23627 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitW()
23628 { return Derived::Unit(3); }
23631 // end #include "src/Core/CwiseNullaryOp.h"
23632 // #include "src/Core/Stride.h"
23633 #ifndef EIGEN_STRIDE_H
23634 #define EIGEN_STRIDE_H
23636 template<int _OuterStrideAtCompileTime, int _InnerStrideAtCompileTime>
23640 typedef Eigen::Index Index;
23642 InnerStrideAtCompileTime = _InnerStrideAtCompileTime,
23643 OuterStrideAtCompileTime = _OuterStrideAtCompileTime
23647 : m_outer(OuterStrideAtCompileTime), m_inner(InnerStrideAtCompileTime)
23649 eigen_assert(InnerStrideAtCompileTime != Dynamic && OuterStrideAtCompileTime != Dynamic);
23652 Stride(Index outerStride, Index innerStride)
23653 : m_outer(outerStride), m_inner(innerStride)
23655 eigen_assert(innerStride>=0 && outerStride>=0);
23658 Stride(const Stride& other)
23659 : m_outer(other.outer()), m_inner(other.inner())
23662 inline Index outer() const { return m_outer.value(); }
23664 inline Index inner() const { return m_inner.value(); }
23666 internal::variable_if_dynamic<Index, OuterStrideAtCompileTime> m_outer;
23667 internal::variable_if_dynamic<Index, InnerStrideAtCompileTime> m_inner;
23669 template<int Value>
23670 class InnerStride : public Stride<0, Value>
23672 typedef Stride<0, Value> Base;
23674 EIGEN_DEVICE_FUNC InnerStride() : Base() {}
23675 EIGEN_DEVICE_FUNC InnerStride(Index v) : Base(0, v) {}
23677 template<int Value>
23678 class OuterStride : public Stride<Value, 0>
23680 typedef Stride<Value, 0> Base;
23682 EIGEN_DEVICE_FUNC OuterStride() : Base() {}
23683 EIGEN_DEVICE_FUNC OuterStride(Index v) : Base(v,0) {}
23687 // end #include "src/Core/Stride.h"
23688 // #include "src/Core/MapBase.h"
23689 #ifndef EIGEN_MAPBASE_H
23690 #define EIGEN_MAPBASE_H
23691 #define EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) \
23692 EIGEN_STATIC_ASSERT((int(internal::evaluator<Derived>::Flags) & LinearAccessBit) || Derived::IsVectorAtCompileTime, \
23693 YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT)
23695 template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
23696 : public internal::dense_xpr_base<Derived>::type
23699 typedef typename internal::dense_xpr_base<Derived>::type Base;
23701 RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
23702 ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
23703 SizeAtCompileTime = Base::SizeAtCompileTime
23705 typedef typename internal::traits<Derived>::StorageKind StorageKind;
23706 typedef typename internal::traits<Derived>::Scalar Scalar;
23707 typedef typename internal::packet_traits<Scalar>::type PacketScalar;
23708 typedef typename NumTraits<Scalar>::Real RealScalar;
23709 typedef typename internal::conditional<
23710 bool(internal::is_lvalue<Derived>::value),
23712 const Scalar *>::type
23714 using Base::derived;
23715 using Base::MaxRowsAtCompileTime;
23716 using Base::MaxColsAtCompileTime;
23717 using Base::MaxSizeAtCompileTime;
23718 using Base::IsVectorAtCompileTime;
23720 using Base::IsRowMajor;
23725 using Base::coeffRef;
23726 using Base::lazyAssign;
23728 using Base::innerStride;
23729 using Base::outerStride;
23730 using Base::rowStride;
23731 using Base::colStride;
23732 using Base::operator=;
23733 typedef typename Base::CoeffReturnType CoeffReturnType;
23734 EIGEN_DEVICE_FUNC inline Index rows() const { return m_rows.value(); }
23735 EIGEN_DEVICE_FUNC inline Index cols() const { return m_cols.value(); }
23736 EIGEN_DEVICE_FUNC inline const Scalar* data() const { return m_data; }
23738 inline const Scalar& coeff(Index rowId, Index colId) const
23740 return m_data[colId * colStride() + rowId * rowStride()];
23743 inline const Scalar& coeff(Index index) const
23745 EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
23746 return m_data[index * innerStride()];
23749 inline const Scalar& coeffRef(Index rowId, Index colId) const
23751 return this->m_data[colId * colStride() + rowId * rowStride()];
23754 inline const Scalar& coeffRef(Index index) const
23756 EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
23757 return this->m_data[index * innerStride()];
23759 template<int LoadMode>
23760 inline PacketScalar packet(Index rowId, Index colId) const
23762 return internal::ploadt<PacketScalar, LoadMode>
23763 (m_data + (colId * colStride() + rowId * rowStride()));
23765 template<int LoadMode>
23766 inline PacketScalar packet(Index index) const
23768 EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
23769 return internal::ploadt<PacketScalar, LoadMode>(m_data + index * innerStride());
23772 explicit inline MapBase(PointerType dataPtr) : m_data(dataPtr), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime)
23774 EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
23775 checkSanity<Derived>();
23778 inline MapBase(PointerType dataPtr, Index vecSize)
23780 m_rows(RowsAtCompileTime == Dynamic ? vecSize : Index(RowsAtCompileTime)),
23781 m_cols(ColsAtCompileTime == Dynamic ? vecSize : Index(ColsAtCompileTime))
23783 EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
23784 eigen_assert(vecSize >= 0);
23785 eigen_assert(dataPtr == 0 || SizeAtCompileTime == Dynamic || SizeAtCompileTime == vecSize);
23786 checkSanity<Derived>();
23789 inline MapBase(PointerType dataPtr, Index rows, Index cols)
23790 : m_data(dataPtr), m_rows(rows), m_cols(cols)
23792 eigen_assert( (dataPtr == 0)
23793 || ( rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
23794 && cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols)));
23795 checkSanity<Derived>();
23797 #ifdef EIGEN_MAPBASE_PLUGIN
23798 #include EIGEN_MAPBASE_PLUGIN
23801 template<typename T>
23803 void checkSanity(typename internal::enable_if<(internal::traits<T>::Alignment>0),void*>::type = 0) const
23805 #if EIGEN_MAX_ALIGN_BYTES>0
23806 eigen_assert(( ((internal::UIntPtr(m_data) % internal::traits<Derived>::Alignment) == 0)
23807 || (cols() * rows() * innerStride() * sizeof(Scalar)) < internal::traits<Derived>::Alignment ) && "data is not aligned");
23810 template<typename T>
23812 void checkSanity(typename internal::enable_if<internal::traits<T>::Alignment==0,void*>::type = 0) const
23814 PointerType m_data;
23815 const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_rows;
23816 const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_cols;
23818 template<typename Derived> class MapBase<Derived, WriteAccessors>
23819 : public MapBase<Derived, ReadOnlyAccessors>
23821 typedef MapBase<Derived, ReadOnlyAccessors> ReadOnlyMapBase;
23823 typedef MapBase<Derived, ReadOnlyAccessors> Base;
23824 typedef typename Base::Scalar Scalar;
23825 typedef typename Base::PacketScalar PacketScalar;
23826 typedef typename Base::StorageIndex StorageIndex;
23827 typedef typename Base::PointerType PointerType;
23828 using Base::derived;
23833 using Base::coeffRef;
23834 using Base::innerStride;
23835 using Base::outerStride;
23836 using Base::rowStride;
23837 using Base::colStride;
23838 typedef typename internal::conditional<
23839 internal::is_lvalue<Derived>::value,
23842 >::type ScalarWithConstIfNotLvalue;
23844 inline const Scalar* data() const { return this->m_data; }
23846 inline ScalarWithConstIfNotLvalue* data() { return this->m_data; }
23848 inline ScalarWithConstIfNotLvalue& coeffRef(Index row, Index col)
23850 return this->m_data[col * colStride() + row * rowStride()];
23853 inline ScalarWithConstIfNotLvalue& coeffRef(Index index)
23855 EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
23856 return this->m_data[index * innerStride()];
23858 template<int StoreMode>
23859 inline void writePacket(Index row, Index col, const PacketScalar& val)
23861 internal::pstoret<Scalar, PacketScalar, StoreMode>
23862 (this->m_data + (col * colStride() + row * rowStride()), val);
23864 template<int StoreMode>
23865 inline void writePacket(Index index, const PacketScalar& val)
23867 EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
23868 internal::pstoret<Scalar, PacketScalar, StoreMode>
23869 (this->m_data + index * innerStride(), val);
23871 EIGEN_DEVICE_FUNC explicit inline MapBase(PointerType dataPtr) : Base(dataPtr) {}
23872 EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index vecSize) : Base(dataPtr, vecSize) {}
23873 EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index rows, Index cols) : Base(dataPtr, rows, cols) {}
23875 Derived& operator=(const MapBase& other)
23877 ReadOnlyMapBase::Base::operator=(other);
23880 using ReadOnlyMapBase::Base::operator=;
23882 #undef EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS
23885 // end #include "src/Core/MapBase.h"
23886 // #include "src/Core/Map.h"
23887 #ifndef EIGEN_MAP_H
23888 #define EIGEN_MAP_H
23890 namespace internal {
23891 template<typename PlainObjectType, int MapOptions, typename StrideType>
23892 struct traits<Map<PlainObjectType, MapOptions, StrideType> >
23893 : public traits<PlainObjectType>
23895 typedef traits<PlainObjectType> TraitsBase;
23897 InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
23898 ? int(PlainObjectType::InnerStrideAtCompileTime)
23899 : int(StrideType::InnerStrideAtCompileTime),
23900 OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
23901 ? int(PlainObjectType::OuterStrideAtCompileTime)
23902 : int(StrideType::OuterStrideAtCompileTime),
23903 Alignment = int(MapOptions)&int(AlignedMask),
23904 Flags0 = TraitsBase::Flags & (~NestByRefBit),
23905 Flags = is_lvalue<PlainObjectType>::value ? int(Flags0) : (int(Flags0) & ~LvalueBit)
23911 template<typename PlainObjectType, int MapOptions, typename StrideType> class Map
23912 : public MapBase<Map<PlainObjectType, MapOptions, StrideType> >
23915 typedef MapBase<Map> Base;
23916 EIGEN_DENSE_PUBLIC_INTERFACE(Map)
23917 typedef typename Base::PointerType PointerType;
23918 typedef PointerType PointerArgType;
23920 inline PointerType cast_to_pointer_type(PointerArgType ptr) { return ptr; }
23922 inline Index innerStride() const
23924 return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1;
23927 inline Index outerStride() const
23929 return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer()
23930 : IsVectorAtCompileTime ? this->size()
23931 : int(Flags)&RowMajorBit ? this->cols()
23935 explicit inline Map(PointerArgType dataPtr, const StrideType& stride = StrideType())
23936 : Base(cast_to_pointer_type(dataPtr)), m_stride(stride)
23938 PlainObjectType::Base::_check_template_params();
23941 inline Map(PointerArgType dataPtr, Index size, const StrideType& stride = StrideType())
23942 : Base(cast_to_pointer_type(dataPtr), size), m_stride(stride)
23944 PlainObjectType::Base::_check_template_params();
23947 inline Map(PointerArgType dataPtr, Index rows, Index cols, const StrideType& stride = StrideType())
23948 : Base(cast_to_pointer_type(dataPtr), rows, cols), m_stride(stride)
23950 PlainObjectType::Base::_check_template_params();
23952 EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Map)
23954 StrideType m_stride;
23958 // end #include "src/Core/Map.h"
23959 // #include "src/Core/Block.h"
23960 #ifndef EIGEN_BLOCK_H
23961 #define EIGEN_BLOCK_H
23963 namespace internal {
23964 template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
23965 struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprType>
23967 typedef typename traits<XprType>::Scalar Scalar;
23968 typedef typename traits<XprType>::StorageKind StorageKind;
23969 typedef typename traits<XprType>::XprKind XprKind;
23970 typedef typename ref_selector<XprType>::type XprTypeNested;
23971 typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
23973 MatrixRows = traits<XprType>::RowsAtCompileTime,
23974 MatrixCols = traits<XprType>::ColsAtCompileTime,
23975 RowsAtCompileTime = MatrixRows == 0 ? 0 : BlockRows,
23976 ColsAtCompileTime = MatrixCols == 0 ? 0 : BlockCols,
23977 MaxRowsAtCompileTime = BlockRows==0 ? 0
23978 : RowsAtCompileTime != Dynamic ? int(RowsAtCompileTime)
23979 : int(traits<XprType>::MaxRowsAtCompileTime),
23980 MaxColsAtCompileTime = BlockCols==0 ? 0
23981 : ColsAtCompileTime != Dynamic ? int(ColsAtCompileTime)
23982 : int(traits<XprType>::MaxColsAtCompileTime),
23983 XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0,
23984 IsRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
23985 : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
23986 : XprTypeIsRowMajor,
23987 HasSameStorageOrderAsXprType = (IsRowMajor == XprTypeIsRowMajor),
23988 InnerSize = IsRowMajor ? int(ColsAtCompileTime) : int(RowsAtCompileTime),
23989 InnerStrideAtCompileTime = HasSameStorageOrderAsXprType
23990 ? int(inner_stride_at_compile_time<XprType>::ret)
23991 : int(outer_stride_at_compile_time<XprType>::ret),
23992 OuterStrideAtCompileTime = HasSameStorageOrderAsXprType
23993 ? int(outer_stride_at_compile_time<XprType>::ret)
23994 : int(inner_stride_at_compile_time<XprType>::ret),
23995 FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0,
23996 FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0,
23997 Flags = (traits<XprType>::Flags & (DirectAccessBit | (InnerPanel?CompressedAccessBit:0))) | FlagsLvalueBit | FlagsRowMajorBit,
24001 template<typename XprType, int BlockRows=Dynamic, int BlockCols=Dynamic, bool InnerPanel = false,
24002 bool HasDirectAccess = internal::has_direct_access<XprType>::ret> class BlockImpl_dense;
24004 template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, typename StorageKind> class BlockImpl;
24005 template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class Block
24006 : public BlockImpl<XprType, BlockRows, BlockCols, InnerPanel, typename internal::traits<XprType>::StorageKind>
24008 typedef BlockImpl<XprType, BlockRows, BlockCols, InnerPanel, typename internal::traits<XprType>::StorageKind> Impl;
24011 EIGEN_GENERIC_PUBLIC_INTERFACE(Block)
24012 EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Block)
24013 typedef typename internal::remove_all<XprType>::type NestedExpression;
24015 inline Block(XprType& xpr, Index i) : Impl(xpr,i)
24017 eigen_assert( (i>=0) && (
24018 ((BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) && i<xpr.rows())
24019 ||((BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) && i<xpr.cols())));
24022 inline Block(XprType& xpr, Index startRow, Index startCol)
24023 : Impl(xpr, startRow, startCol)
24025 EIGEN_STATIC_ASSERT(RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic,THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE)
24026 eigen_assert(startRow >= 0 && BlockRows >= 0 && startRow + BlockRows <= xpr.rows()
24027 && startCol >= 0 && BlockCols >= 0 && startCol + BlockCols <= xpr.cols());
24030 inline Block(XprType& xpr,
24031 Index startRow, Index startCol,
24032 Index blockRows, Index blockCols)
24033 : Impl(xpr, startRow, startCol, blockRows, blockCols)
24035 eigen_assert((RowsAtCompileTime==Dynamic || RowsAtCompileTime==blockRows)
24036 && (ColsAtCompileTime==Dynamic || ColsAtCompileTime==blockCols));
24037 eigen_assert(startRow >= 0 && blockRows >= 0 && startRow <= xpr.rows() - blockRows
24038 && startCol >= 0 && blockCols >= 0 && startCol <= xpr.cols() - blockCols);
24041 template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
24042 class BlockImpl<XprType, BlockRows, BlockCols, InnerPanel, Dense>
24043 : public internal::BlockImpl_dense<XprType, BlockRows, BlockCols, InnerPanel>
24045 typedef internal::BlockImpl_dense<XprType, BlockRows, BlockCols, InnerPanel> Impl;
24046 typedef typename XprType::StorageIndex StorageIndex;
24049 EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl)
24050 EIGEN_DEVICE_FUNC inline BlockImpl(XprType& xpr, Index i) : Impl(xpr,i) {}
24051 EIGEN_DEVICE_FUNC inline BlockImpl(XprType& xpr, Index startRow, Index startCol) : Impl(xpr, startRow, startCol) {}
24053 inline BlockImpl(XprType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols)
24054 : Impl(xpr, startRow, startCol, blockRows, blockCols) {}
24056 namespace internal {
24057 template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool HasDirectAccess> class BlockImpl_dense
24058 : public internal::dense_xpr_base<Block<XprType, BlockRows, BlockCols, InnerPanel> >::type
24060 typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType;
24061 typedef typename internal::ref_selector<XprType>::non_const_type XprTypeNested;
24063 typedef typename internal::dense_xpr_base<BlockType>::type Base;
24064 EIGEN_DENSE_PUBLIC_INTERFACE(BlockType)
24065 EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl_dense)
24067 inline BlockImpl_dense(XprType& xpr, Index i)
24069 m_startRow( (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0),
24070 m_startCol( (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0),
24071 m_blockRows(BlockRows==1 ? 1 : xpr.rows()),
24072 m_blockCols(BlockCols==1 ? 1 : xpr.cols())
24075 inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol)
24076 : m_xpr(xpr), m_startRow(startRow), m_startCol(startCol),
24077 m_blockRows(BlockRows), m_blockCols(BlockCols)
24080 inline BlockImpl_dense(XprType& xpr,
24081 Index startRow, Index startCol,
24082 Index blockRows, Index blockCols)
24083 : m_xpr(xpr), m_startRow(startRow), m_startCol(startCol),
24084 m_blockRows(blockRows), m_blockCols(blockCols)
24086 EIGEN_DEVICE_FUNC inline Index rows() const { return m_blockRows.value(); }
24087 EIGEN_DEVICE_FUNC inline Index cols() const { return m_blockCols.value(); }
24089 inline Scalar& coeffRef(Index rowId, Index colId)
24091 EIGEN_STATIC_ASSERT_LVALUE(XprType)
24092 return m_xpr.coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
24095 inline const Scalar& coeffRef(Index rowId, Index colId) const
24097 return m_xpr.derived().coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
24100 EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index rowId, Index colId) const
24102 return m_xpr.coeff(rowId + m_startRow.value(), colId + m_startCol.value());
24105 inline Scalar& coeffRef(Index index)
24107 EIGEN_STATIC_ASSERT_LVALUE(XprType)
24108 return m_xpr.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
24109 m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
24112 inline const Scalar& coeffRef(Index index) const
24114 return m_xpr.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
24115 m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
24118 inline const CoeffReturnType coeff(Index index) const
24120 return m_xpr.coeff(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
24121 m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
24123 template<int LoadMode>
24124 inline PacketScalar packet(Index rowId, Index colId) const
24126 return m_xpr.template packet<Unaligned>(rowId + m_startRow.value(), colId + m_startCol.value());
24128 template<int LoadMode>
24129 inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
24131 m_xpr.template writePacket<Unaligned>(rowId + m_startRow.value(), colId + m_startCol.value(), val);
24133 template<int LoadMode>
24134 inline PacketScalar packet(Index index) const
24136 return m_xpr.template packet<Unaligned>
24137 (m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
24138 m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
24140 template<int LoadMode>
24141 inline void writePacket(Index index, const PacketScalar& val)
24143 m_xpr.template writePacket<Unaligned>
24144 (m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
24145 m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0), val);
24147 #ifdef EIGEN_PARSED_BY_DOXYGEN
24148 EIGEN_DEVICE_FUNC inline const Scalar* data() const;
24149 EIGEN_DEVICE_FUNC inline Index innerStride() const;
24150 EIGEN_DEVICE_FUNC inline Index outerStride() const;
24153 const typename internal::remove_all<XprTypeNested>::type& nestedExpression() const
24158 XprType& nestedExpression() { return m_xpr; }
24160 StorageIndex startRow() const
24162 return m_startRow.value();
24165 StorageIndex startCol() const
24167 return m_startCol.value();
24170 XprTypeNested m_xpr;
24171 const internal::variable_if_dynamic<StorageIndex, (XprType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow;
24172 const internal::variable_if_dynamic<StorageIndex, (XprType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol;
24173 const internal::variable_if_dynamic<StorageIndex, RowsAtCompileTime> m_blockRows;
24174 const internal::variable_if_dynamic<StorageIndex, ColsAtCompileTime> m_blockCols;
24176 template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
24177 class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
24178 : public MapBase<Block<XprType, BlockRows, BlockCols, InnerPanel> >
24180 typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType;
24181 typedef typename internal::ref_selector<XprType>::non_const_type XprTypeNested;
24183 XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0
24186 typedef MapBase<BlockType> Base;
24187 EIGEN_DENSE_PUBLIC_INTERFACE(BlockType)
24188 EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl_dense)
24190 inline BlockImpl_dense(XprType& xpr, Index i)
24191 : Base(xpr.data() + i * ( ((BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) && (!XprTypeIsRowMajor))
24192 || ((BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) && ( XprTypeIsRowMajor)) ? xpr.innerStride() : xpr.outerStride()),
24193 BlockRows==1 ? 1 : xpr.rows(),
24194 BlockCols==1 ? 1 : xpr.cols()),
24196 m_startRow( (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0),
24197 m_startCol( (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0)
24202 inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol)
24203 : Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol)),
24204 m_xpr(xpr), m_startRow(startRow), m_startCol(startCol)
24209 inline BlockImpl_dense(XprType& xpr,
24210 Index startRow, Index startCol,
24211 Index blockRows, Index blockCols)
24212 : Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol), blockRows, blockCols),
24213 m_xpr(xpr), m_startRow(startRow), m_startCol(startCol)
24218 const typename internal::remove_all<XprTypeNested>::type& nestedExpression() const
24223 XprType& nestedExpression() { return m_xpr; }
24225 inline Index innerStride() const
24227 return internal::traits<BlockType>::HasSameStorageOrderAsXprType
24228 ? m_xpr.innerStride()
24229 : m_xpr.outerStride();
24232 inline Index outerStride() const
24234 return m_outerStride;
24237 StorageIndex startRow() const
24239 return m_startRow.value();
24242 StorageIndex startCol() const
24244 return m_startCol.value();
24246 #ifndef __SUNPRO_CC
24249 #ifndef EIGEN_PARSED_BY_DOXYGEN
24251 inline BlockImpl_dense(XprType& xpr, const Scalar* data, Index blockRows, Index blockCols)
24252 : Base(data, blockRows, blockCols), m_xpr(xpr)
24261 m_outerStride = internal::traits<BlockType>::HasSameStorageOrderAsXprType
24262 ? m_xpr.outerStride()
24263 : m_xpr.innerStride();
24265 XprTypeNested m_xpr;
24266 const internal::variable_if_dynamic<StorageIndex, (XprType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow;
24267 const internal::variable_if_dynamic<StorageIndex, (XprType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol;
24268 Index m_outerStride;
24273 // end #include "src/Core/Block.h"
24274 // #include "src/Core/Transpose.h"
24275 #ifndef EIGEN_TRANSPOSE_H
24276 #define EIGEN_TRANSPOSE_H
24278 namespace internal {
24279 template<typename MatrixType>
24280 struct traits<Transpose<MatrixType> > : public traits<MatrixType>
24282 typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
24283 typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNestedPlain;
24285 RowsAtCompileTime = MatrixType::ColsAtCompileTime,
24286 ColsAtCompileTime = MatrixType::RowsAtCompileTime,
24287 MaxRowsAtCompileTime = MatrixType::MaxColsAtCompileTime,
24288 MaxColsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
24289 FlagsLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
24290 Flags0 = traits<MatrixTypeNestedPlain>::Flags & ~(LvalueBit | NestByRefBit),
24291 Flags1 = Flags0 | FlagsLvalueBit,
24292 Flags = Flags1 ^ RowMajorBit,
24293 InnerStrideAtCompileTime = inner_stride_at_compile_time<MatrixType>::ret,
24294 OuterStrideAtCompileTime = outer_stride_at_compile_time<MatrixType>::ret
24298 template<typename MatrixType, typename StorageKind> class TransposeImpl;
24299 template<typename MatrixType> class Transpose
24300 : public TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind>
24303 typedef typename internal::ref_selector<MatrixType>::non_const_type MatrixTypeNested;
24304 typedef typename TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base;
24305 EIGEN_GENERIC_PUBLIC_INTERFACE(Transpose)
24306 typedef typename internal::remove_all<MatrixType>::type NestedExpression;
24308 explicit inline Transpose(MatrixType& matrix) : m_matrix(matrix) {}
24309 EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Transpose)
24310 EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.cols(); }
24311 EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.rows(); }
24313 const typename internal::remove_all<MatrixTypeNested>::type&
24314 nestedExpression() const { return m_matrix; }
24316 typename internal::remove_reference<MatrixTypeNested>::type&
24317 nestedExpression() { return m_matrix; }
24318 void resize(Index nrows, Index ncols) {
24319 m_matrix.resize(ncols,nrows);
24322 typename internal::ref_selector<MatrixType>::non_const_type m_matrix;
24324 namespace internal {
24325 template<typename MatrixType, bool HasDirectAccess = has_direct_access<MatrixType>::ret>
24326 struct TransposeImpl_base
24328 typedef typename dense_xpr_base<Transpose<MatrixType> >::type type;
24330 template<typename MatrixType>
24331 struct TransposeImpl_base<MatrixType, false>
24333 typedef typename dense_xpr_base<Transpose<MatrixType> >::type type;
24336 template<typename XprType, typename StorageKind>
24337 class TransposeImpl
24338 : public internal::generic_xpr_base<Transpose<XprType> >::type
24341 typedef typename internal::generic_xpr_base<Transpose<XprType> >::type Base;
24343 template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
24344 : public internal::TransposeImpl_base<MatrixType>::type
24347 typedef typename internal::TransposeImpl_base<MatrixType>::type Base;
24348 using Base::coeffRef;
24349 EIGEN_DENSE_PUBLIC_INTERFACE(Transpose<MatrixType>)
24350 EIGEN_INHERIT_ASSIGNMENT_OPERATORS(TransposeImpl)
24351 EIGEN_DEVICE_FUNC inline Index innerStride() const { return derived().nestedExpression().innerStride(); }
24352 EIGEN_DEVICE_FUNC inline Index outerStride() const { return derived().nestedExpression().outerStride(); }
24353 typedef typename internal::conditional<
24354 internal::is_lvalue<MatrixType>::value,
24357 >::type ScalarWithConstIfNotLvalue;
24358 EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue* data() { return derived().nestedExpression().data(); }
24359 EIGEN_DEVICE_FUNC inline const Scalar* data() const { return derived().nestedExpression().data(); }
24361 inline const Scalar& coeffRef(Index rowId, Index colId) const
24363 return derived().nestedExpression().coeffRef(colId, rowId);
24366 inline const Scalar& coeffRef(Index index) const
24368 return derived().nestedExpression().coeffRef(index);
24371 template<typename Derived>
24372 inline Transpose<Derived>
24373 DenseBase<Derived>::transpose()
24375 return TransposeReturnType(derived());
24377 template<typename Derived>
24378 inline typename DenseBase<Derived>::ConstTransposeReturnType
24379 DenseBase<Derived>::transpose() const
24381 return ConstTransposeReturnType(derived());
24383 template<typename Derived>
24384 inline const typename MatrixBase<Derived>::AdjointReturnType
24385 MatrixBase<Derived>::adjoint() const
24387 return AdjointReturnType(this->transpose());
24389 namespace internal {
24390 template<typename MatrixType,
24391 bool IsSquare = (MatrixType::RowsAtCompileTime == MatrixType::ColsAtCompileTime) && MatrixType::RowsAtCompileTime!=Dynamic,
24392 bool MatchPacketSize =
24393 (int(MatrixType::RowsAtCompileTime) == int(internal::packet_traits<typename MatrixType::Scalar>::size))
24394 && (internal::evaluator<MatrixType>::Flags&PacketAccessBit) >
24395 struct inplace_transpose_selector;
24396 template<typename MatrixType>
24397 struct inplace_transpose_selector<MatrixType,true,false> {
24398 static void run(MatrixType& m) {
24399 m.matrix().template triangularView<StrictlyUpper>().swap(m.matrix().transpose());
24402 template<typename MatrixType>
24403 struct inplace_transpose_selector<MatrixType,true,true> {
24404 static void run(MatrixType& m) {
24405 typedef typename MatrixType::Scalar Scalar;
24406 typedef typename internal::packet_traits<typename MatrixType::Scalar>::type Packet;
24407 const Index PacketSize = internal::packet_traits<Scalar>::size;
24408 const Index Alignment = internal::evaluator<MatrixType>::Alignment;
24409 PacketBlock<Packet> A;
24410 for (Index i=0; i<PacketSize; ++i)
24411 A.packet[i] = m.template packetByOuterInner<Alignment>(i,0);
24412 internal::ptranspose(A);
24413 for (Index i=0; i<PacketSize; ++i)
24414 m.template writePacket<Alignment>(m.rowIndexByOuterInner(i,0), m.colIndexByOuterInner(i,0), A.packet[i]);
24417 template<typename MatrixType,bool MatchPacketSize>
24418 struct inplace_transpose_selector<MatrixType,false,MatchPacketSize> {
24419 static void run(MatrixType& m) {
24420 if (m.rows()==m.cols())
24421 m.matrix().template triangularView<StrictlyUpper>().swap(m.matrix().transpose());
24423 m = m.transpose().eval();
24427 template<typename Derived>
24428 inline void DenseBase<Derived>::transposeInPlace()
24430 eigen_assert((rows() == cols() || (RowsAtCompileTime == Dynamic && ColsAtCompileTime == Dynamic))
24431 && "transposeInPlace() called on a non-square non-resizable matrix");
24432 internal::inplace_transpose_selector<Derived>::run(derived());
24434 template<typename Derived>
24435 inline void MatrixBase<Derived>::adjointInPlace()
24437 derived() = adjoint().eval();
24439 #ifndef EIGEN_NO_DEBUG
24440 namespace internal {
24441 template<bool DestIsTransposed, typename OtherDerived>
24442 struct check_transpose_aliasing_compile_time_selector
24444 enum { ret = bool(blas_traits<OtherDerived>::IsTransposed) != DestIsTransposed };
24446 template<bool DestIsTransposed, typename BinOp, typename DerivedA, typename DerivedB>
24447 struct check_transpose_aliasing_compile_time_selector<DestIsTransposed,CwiseBinaryOp<BinOp,DerivedA,DerivedB> >
24449 enum { ret = bool(blas_traits<DerivedA>::IsTransposed) != DestIsTransposed
24450 || bool(blas_traits<DerivedB>::IsTransposed) != DestIsTransposed
24453 template<typename Scalar, bool DestIsTransposed, typename OtherDerived>
24454 struct check_transpose_aliasing_run_time_selector
24456 static bool run(const Scalar* dest, const OtherDerived& src)
24458 return (bool(blas_traits<OtherDerived>::IsTransposed) != DestIsTransposed) && (dest!=0 && dest==(const Scalar*)extract_data(src));
24461 template<typename Scalar, bool DestIsTransposed, typename BinOp, typename DerivedA, typename DerivedB>
24462 struct check_transpose_aliasing_run_time_selector<Scalar,DestIsTransposed,CwiseBinaryOp<BinOp,DerivedA,DerivedB> >
24464 static bool run(const Scalar* dest, const CwiseBinaryOp<BinOp,DerivedA,DerivedB>& src)
24466 return ((blas_traits<DerivedA>::IsTransposed != DestIsTransposed) && (dest!=0 && dest==(const Scalar*)extract_data(src.lhs())))
24467 || ((blas_traits<DerivedB>::IsTransposed != DestIsTransposed) && (dest!=0 && dest==(const Scalar*)extract_data(src.rhs())));
24470 template<typename Derived, typename OtherDerived,
24471 bool MightHaveTransposeAliasing
24472 = check_transpose_aliasing_compile_time_selector
24473 <blas_traits<Derived>::IsTransposed,OtherDerived>::ret
24475 struct checkTransposeAliasing_impl
24477 static void run(const Derived& dst, const OtherDerived& other)
24479 eigen_assert((!check_transpose_aliasing_run_time_selector
24480 <typename Derived::Scalar,blas_traits<Derived>::IsTransposed,OtherDerived>
24481 ::run(extract_data(dst), other))
24482 && "aliasing detected during transposition, use transposeInPlace() "
24483 "or evaluate the rhs into a temporary using .eval()");
24486 template<typename Derived, typename OtherDerived>
24487 struct checkTransposeAliasing_impl<Derived, OtherDerived, false>
24489 static void run(const Derived&, const OtherDerived&)
24493 template<typename Dst, typename Src>
24494 void check_for_aliasing(const Dst &dst, const Src &src)
24496 internal::checkTransposeAliasing_impl<Dst, Src>::run(dst, src);
24502 // end #include "src/Core/Transpose.h"
24503 // #include "src/Core/Redux.h"
24504 #ifndef EIGEN_REDUX_H
24505 #define EIGEN_REDUX_H
24507 namespace internal {
24508 template<typename Func, typename Derived>
24509 struct redux_traits
24512 typedef typename find_best_packet<typename Derived::Scalar,Derived::SizeAtCompileTime>::type PacketType;
24514 PacketSize = unpacket_traits<PacketType>::size,
24515 InnerMaxSize = int(Derived::IsRowMajor)
24516 ? Derived::MaxColsAtCompileTime
24517 : Derived::MaxRowsAtCompileTime
24520 MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit)
24521 && (functor_traits<Func>::PacketAccess),
24522 MayLinearVectorize = bool(MightVectorize) && (int(Derived::Flags)&LinearAccessBit),
24523 MaySliceVectorize = bool(MightVectorize) && int(InnerMaxSize)>=3*PacketSize
24527 Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
24528 : int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
24529 : int(DefaultTraversal)
24533 Cost = Derived::SizeAtCompileTime == Dynamic ? HugeCost
24534 : Derived::SizeAtCompileTime * Derived::CoeffReadCost + (Derived::SizeAtCompileTime-1) * functor_traits<Func>::Cost,
24535 UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
24539 Unrolling = Cost <= UnrollingLimit ? CompleteUnrolling : NoUnrolling
24541 #ifdef EIGEN_DEBUG_ASSIGN
24542 static void debug()
24544 std::cerr << "Xpr: " << typeid(typename Derived::XprType).name() << std::endl;
24545 std::cerr.setf(std::ios::hex, std::ios::basefield);
24546 EIGEN_DEBUG_VAR(Derived::Flags)
24547 std::cerr.unsetf(std::ios::hex);
24548 EIGEN_DEBUG_VAR(InnerMaxSize)
24549 EIGEN_DEBUG_VAR(PacketSize)
24550 EIGEN_DEBUG_VAR(MightVectorize)
24551 EIGEN_DEBUG_VAR(MayLinearVectorize)
24552 EIGEN_DEBUG_VAR(MaySliceVectorize)
24553 EIGEN_DEBUG_VAR(Traversal)
24554 EIGEN_DEBUG_VAR(UnrollingLimit)
24555 EIGEN_DEBUG_VAR(Unrolling)
24556 std::cerr << std::endl;
24560 template<typename Func, typename Derived, int Start, int Length>
24561 struct redux_novec_unroller
24564 HalfLength = Length/2
24566 typedef typename Derived::Scalar Scalar;
24568 static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
24570 return func(redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
24571 redux_novec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func));
24574 template<typename Func, typename Derived, int Start>
24575 struct redux_novec_unroller<Func, Derived, Start, 1>
24578 outer = Start / Derived::InnerSizeAtCompileTime,
24579 inner = Start % Derived::InnerSizeAtCompileTime
24581 typedef typename Derived::Scalar Scalar;
24583 static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&)
24585 return mat.coeffByOuterInner(outer, inner);
24588 template<typename Func, typename Derived, int Start>
24589 struct redux_novec_unroller<Func, Derived, Start, 0>
24591 typedef typename Derived::Scalar Scalar;
24593 static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); }
24595 template<typename Func, typename Derived, int Start, int Length>
24596 struct redux_vec_unroller
24599 PacketSize = redux_traits<Func, Derived>::PacketSize,
24600 HalfLength = Length/2
24602 typedef typename Derived::Scalar Scalar;
24603 typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
24604 static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func& func)
24606 return func.packetOp(
24607 redux_vec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
24608 redux_vec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func) );
24611 template<typename Func, typename Derived, int Start>
24612 struct redux_vec_unroller<Func, Derived, Start, 1>
24615 index = Start * redux_traits<Func, Derived>::PacketSize,
24616 outer = index / int(Derived::InnerSizeAtCompileTime),
24617 inner = index % int(Derived::InnerSizeAtCompileTime),
24618 alignment = Derived::Alignment
24620 typedef typename Derived::Scalar Scalar;
24621 typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
24622 static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&)
24624 return mat.template packetByOuterInner<alignment,PacketScalar>(outer, inner);
24627 template<typename Func, typename Derived,
24628 int Traversal = redux_traits<Func, Derived>::Traversal,
24629 int Unrolling = redux_traits<Func, Derived>::Unrolling
24632 template<typename Func, typename Derived>
24633 struct redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>
24635 typedef typename Derived::Scalar Scalar;
24637 static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
24639 eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
24641 res = mat.coeffByOuterInner(0, 0);
24642 for(Index i = 1; i < mat.innerSize(); ++i)
24643 res = func(res, mat.coeffByOuterInner(0, i));
24644 for(Index i = 1; i < mat.outerSize(); ++i)
24645 for(Index j = 0; j < mat.innerSize(); ++j)
24646 res = func(res, mat.coeffByOuterInner(i, j));
24650 template<typename Func, typename Derived>
24651 struct redux_impl<Func,Derived, DefaultTraversal, CompleteUnrolling>
24652 : public redux_novec_unroller<Func,Derived, 0, Derived::SizeAtCompileTime>
24654 template<typename Func, typename Derived>
24655 struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
24657 typedef typename Derived::Scalar Scalar;
24658 typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
24659 static Scalar run(const Derived &mat, const Func& func)
24661 const Index size = mat.size();
24662 const Index packetSize = redux_traits<Func, Derived>::PacketSize;
24663 const int packetAlignment = unpacket_traits<PacketScalar>::alignment;
24665 alignment0 = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned),
24666 alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Derived::Alignment)
24668 const Index alignedStart = internal::first_default_aligned(mat.nestedExpression());
24669 const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
24670 const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
24671 const Index alignedEnd2 = alignedStart + alignedSize2;
24672 const Index alignedEnd = alignedStart + alignedSize;
24676 PacketScalar packet_res0 = mat.template packet<alignment,PacketScalar>(alignedStart);
24677 if(alignedSize>packetSize)
24679 PacketScalar packet_res1 = mat.template packet<alignment,PacketScalar>(alignedStart+packetSize);
24680 for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize)
24682 packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment,PacketScalar>(index));
24683 packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment,PacketScalar>(index+packetSize));
24685 packet_res0 = func.packetOp(packet_res0,packet_res1);
24686 if(alignedEnd>alignedEnd2)
24687 packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment,PacketScalar>(alignedEnd2));
24689 res = func.predux(packet_res0);
24690 for(Index index = 0; index < alignedStart; ++index)
24691 res = func(res,mat.coeff(index));
24692 for(Index index = alignedEnd; index < size; ++index)
24693 res = func(res,mat.coeff(index));
24697 res = mat.coeff(0);
24698 for(Index index = 1; index < size; ++index)
24699 res = func(res,mat.coeff(index));
24704 template<typename Func, typename Derived, int Unrolling>
24705 struct redux_impl<Func, Derived, SliceVectorizedTraversal, Unrolling>
24707 typedef typename Derived::Scalar Scalar;
24708 typedef typename redux_traits<Func, Derived>::PacketType PacketType;
24709 EIGEN_DEVICE_FUNC static Scalar run(const Derived &mat, const Func& func)
24711 eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
24712 const Index innerSize = mat.innerSize();
24713 const Index outerSize = mat.outerSize();
24715 packetSize = redux_traits<Func, Derived>::PacketSize
24717 const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize;
24719 if(packetedInnerSize)
24721 PacketType packet_res = mat.template packet<Unaligned,PacketType>(0,0);
24722 for(Index j=0; j<outerSize; ++j)
24723 for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize))
24724 packet_res = func.packetOp(packet_res, mat.template packetByOuterInner<Unaligned,PacketType>(j,i));
24725 res = func.predux(packet_res);
24726 for(Index j=0; j<outerSize; ++j)
24727 for(Index i=packetedInnerSize; i<innerSize; ++i)
24728 res = func(res, mat.coeffByOuterInner(j,i));
24732 res = redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>::run(mat, func);
24737 template<typename Func, typename Derived>
24738 struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
24740 typedef typename Derived::Scalar Scalar;
24741 typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
24743 PacketSize = redux_traits<Func, Derived>::PacketSize,
24744 Size = Derived::SizeAtCompileTime,
24745 VectorizedSize = (Size / PacketSize) * PacketSize
24747 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
24749 eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
24750 if (VectorizedSize > 0) {
24751 Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));
24752 if (VectorizedSize != Size)
24753 res = func(res,redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func));
24757 return redux_novec_unroller<Func, Derived, 0, Size>::run(mat,func);
24761 template<typename _XprType>
24762 class redux_evaluator
24765 typedef _XprType XprType;
24766 EIGEN_DEVICE_FUNC explicit redux_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {}
24767 typedef typename XprType::Scalar Scalar;
24768 typedef typename XprType::CoeffReturnType CoeffReturnType;
24769 typedef typename XprType::PacketScalar PacketScalar;
24770 typedef typename XprType::PacketReturnType PacketReturnType;
24772 MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime,
24773 MaxColsAtCompileTime = XprType::MaxColsAtCompileTime,
24774 Flags = evaluator<XprType>::Flags & ~DirectAccessBit,
24775 IsRowMajor = XprType::IsRowMajor,
24776 SizeAtCompileTime = XprType::SizeAtCompileTime,
24777 InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime,
24778 CoeffReadCost = evaluator<XprType>::CoeffReadCost,
24779 Alignment = evaluator<XprType>::Alignment
24781 EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); }
24782 EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); }
24783 EIGEN_DEVICE_FUNC Index size() const { return m_xpr.size(); }
24784 EIGEN_DEVICE_FUNC Index innerSize() const { return m_xpr.innerSize(); }
24785 EIGEN_DEVICE_FUNC Index outerSize() const { return m_xpr.outerSize(); }
24787 CoeffReturnType coeff(Index row, Index col) const
24788 { return m_evaluator.coeff(row, col); }
24790 CoeffReturnType coeff(Index index) const
24791 { return m_evaluator.coeff(index); }
24792 template<int LoadMode, typename PacketType>
24793 PacketType packet(Index row, Index col) const
24794 { return m_evaluator.template packet<LoadMode,PacketType>(row, col); }
24795 template<int LoadMode, typename PacketType>
24796 PacketType packet(Index index) const
24797 { return m_evaluator.template packet<LoadMode,PacketType>(index); }
24799 CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
24800 { return m_evaluator.coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
24801 template<int LoadMode, typename PacketType>
24802 PacketType packetByOuterInner(Index outer, Index inner) const
24803 { return m_evaluator.template packet<LoadMode,PacketType>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
24804 const XprType & nestedExpression() const { return m_xpr; }
24806 internal::evaluator<XprType> m_evaluator;
24807 const XprType &m_xpr;
24810 template<typename Derived>
24811 template<typename Func>
24812 typename internal::traits<Derived>::Scalar
24813 DenseBase<Derived>::redux(const Func& func) const
24815 eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
24816 typedef typename internal::redux_evaluator<Derived> ThisEvaluator;
24817 ThisEvaluator thisEval(derived());
24818 return internal::redux_impl<Func, ThisEvaluator>::run(thisEval, func);
24820 template<typename Derived>
24821 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
24822 DenseBase<Derived>::minCoeff() const
24824 return derived().redux(Eigen::internal::scalar_min_op<Scalar,Scalar>());
24826 template<typename Derived>
24827 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
24828 DenseBase<Derived>::maxCoeff() const
24830 return derived().redux(Eigen::internal::scalar_max_op<Scalar,Scalar>());
24832 template<typename Derived>
24833 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
24834 DenseBase<Derived>::sum() const
24836 if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
24838 return derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>());
24840 template<typename Derived>
24841 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
24842 DenseBase<Derived>::mean() const
24844 #ifdef __INTEL_COMPILER
24845 #pragma warning push
24846 #pragma warning ( disable : 2259 )
24848 return Scalar(derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>())) / Scalar(this->size());
24849 #ifdef __INTEL_COMPILER
24850 #pragma warning pop
24853 template<typename Derived>
24854 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
24855 DenseBase<Derived>::prod() const
24857 if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
24859 return derived().redux(Eigen::internal::scalar_product_op<Scalar>());
24861 template<typename Derived>
24862 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
24863 MatrixBase<Derived>::trace() const
24865 return derived().diagonal().sum();
24869 // end #include "src/Core/Redux.h"
24870 // #include "src/Core/GeneralProduct.h"
24871 #ifndef EIGEN_GENERAL_PRODUCT_H
24872 #define EIGEN_GENERAL_PRODUCT_H
24878 namespace internal {
24879 template<int Rows, int Cols, int Depth> struct product_type_selector;
24880 template<int Size, int MaxSize> struct product_size_category
24882 enum { is_large = MaxSize == Dynamic ||
24883 Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ||
24884 (Size==Dynamic && MaxSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD),
24885 value = is_large ? Large
24890 template<typename Lhs, typename Rhs> struct product_type
24892 typedef typename remove_all<Lhs>::type _Lhs;
24893 typedef typename remove_all<Rhs>::type _Rhs;
24895 MaxRows = traits<_Lhs>::MaxRowsAtCompileTime,
24896 Rows = traits<_Lhs>::RowsAtCompileTime,
24897 MaxCols = traits<_Rhs>::MaxColsAtCompileTime,
24898 Cols = traits<_Rhs>::ColsAtCompileTime,
24899 MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::MaxColsAtCompileTime,
24900 traits<_Rhs>::MaxRowsAtCompileTime),
24901 Depth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::ColsAtCompileTime,
24902 traits<_Rhs>::RowsAtCompileTime)
24906 rows_select = product_size_category<Rows,MaxRows>::value,
24907 cols_select = product_size_category<Cols,MaxCols>::value,
24908 depth_select = product_size_category<Depth,MaxDepth>::value
24910 typedef product_type_selector<rows_select, cols_select, depth_select> selector;
24913 value = selector::ret,
24914 ret = selector::ret
24916 #ifdef EIGEN_DEBUG_PRODUCT
24917 static void debug()
24919 EIGEN_DEBUG_VAR(Rows);
24920 EIGEN_DEBUG_VAR(Cols);
24921 EIGEN_DEBUG_VAR(Depth);
24922 EIGEN_DEBUG_VAR(rows_select);
24923 EIGEN_DEBUG_VAR(cols_select);
24924 EIGEN_DEBUG_VAR(depth_select);
24925 EIGEN_DEBUG_VAR(value);
24929 template<int M, int N> struct product_type_selector<M,N,1> { enum { ret = OuterProduct }; };
24930 template<int M> struct product_type_selector<M, 1, 1> { enum { ret = LazyCoeffBasedProductMode }; };
24931 template<int N> struct product_type_selector<1, N, 1> { enum { ret = LazyCoeffBasedProductMode }; };
24932 template<int Depth> struct product_type_selector<1, 1, Depth> { enum { ret = InnerProduct }; };
24933 template<> struct product_type_selector<1, 1, 1> { enum { ret = InnerProduct }; };
24934 template<> struct product_type_selector<Small,1, Small> { enum { ret = CoeffBasedProductMode }; };
24935 template<> struct product_type_selector<1, Small,Small> { enum { ret = CoeffBasedProductMode }; };
24936 template<> struct product_type_selector<Small,Small,Small> { enum { ret = CoeffBasedProductMode }; };
24937 template<> struct product_type_selector<Small, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; };
24938 template<> struct product_type_selector<Small, Large, 1> { enum { ret = LazyCoeffBasedProductMode }; };
24939 template<> struct product_type_selector<Large, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; };
24940 template<> struct product_type_selector<1, Large,Small> { enum { ret = CoeffBasedProductMode }; };
24941 template<> struct product_type_selector<1, Large,Large> { enum { ret = GemvProduct }; };
24942 template<> struct product_type_selector<1, Small,Large> { enum { ret = CoeffBasedProductMode }; };
24943 template<> struct product_type_selector<Large,1, Small> { enum { ret = CoeffBasedProductMode }; };
24944 template<> struct product_type_selector<Large,1, Large> { enum { ret = GemvProduct }; };
24945 template<> struct product_type_selector<Small,1, Large> { enum { ret = CoeffBasedProductMode }; };
24946 template<> struct product_type_selector<Small,Small,Large> { enum { ret = GemmProduct }; };
24947 template<> struct product_type_selector<Large,Small,Large> { enum { ret = GemmProduct }; };
24948 template<> struct product_type_selector<Small,Large,Large> { enum { ret = GemmProduct }; };
24949 template<> struct product_type_selector<Large,Large,Large> { enum { ret = GemmProduct }; };
24950 template<> struct product_type_selector<Large,Small,Small> { enum { ret = CoeffBasedProductMode }; };
24951 template<> struct product_type_selector<Small,Large,Small> { enum { ret = CoeffBasedProductMode }; };
24952 template<> struct product_type_selector<Large,Large,Small> { enum { ret = GemmProduct }; };
24954 namespace internal {
24955 template<int Side, int StorageOrder, bool BlasCompatible>
24956 struct gemv_dense_selector;
24958 namespace internal {
24959 template<typename Scalar,int Size,int MaxSize,bool Cond> struct gemv_static_vector_if;
24960 template<typename Scalar,int Size,int MaxSize>
24961 struct gemv_static_vector_if<Scalar,Size,MaxSize,false>
24963 EIGEN_STRONG_INLINE Scalar* data() { eigen_internal_assert(false && "should never be called"); return 0; }
24965 template<typename Scalar,int Size>
24966 struct gemv_static_vector_if<Scalar,Size,Dynamic,true>
24968 EIGEN_STRONG_INLINE Scalar* data() { return 0; }
24970 template<typename Scalar,int Size,int MaxSize>
24971 struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
24974 ForceAlignment = internal::packet_traits<Scalar>::Vectorizable,
24975 PacketSize = internal::packet_traits<Scalar>::size
24977 #if EIGEN_MAX_STATIC_ALIGN_BYTES!=0
24978 internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0,EIGEN_PLAIN_ENUM_MIN(AlignedMax,PacketSize)> m_data;
24979 EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
24981 internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?EIGEN_MAX_ALIGN_BYTES:0),0> m_data;
24982 EIGEN_STRONG_INLINE Scalar* data() {
24983 return ForceAlignment
24984 ? reinterpret_cast<Scalar*>((internal::UIntPtr(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES)
24989 template<int StorageOrder, bool BlasCompatible>
24990 struct gemv_dense_selector<OnTheLeft,StorageOrder,BlasCompatible>
24992 template<typename Lhs, typename Rhs, typename Dest>
24993 static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
24995 Transpose<Dest> destT(dest);
24996 enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor };
24997 gemv_dense_selector<OnTheRight,OtherStorageOrder,BlasCompatible>
24998 ::run(rhs.transpose(), lhs.transpose(), destT, alpha);
25001 template<> struct gemv_dense_selector<OnTheRight,ColMajor,true>
25003 template<typename Lhs, typename Rhs, typename Dest>
25004 static inline void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
25006 typedef typename Lhs::Scalar LhsScalar;
25007 typedef typename Rhs::Scalar RhsScalar;
25008 typedef typename Dest::Scalar ResScalar;
25009 typedef typename Dest::RealScalar RealScalar;
25010 typedef internal::blas_traits<Lhs> LhsBlasTraits;
25011 typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
25012 typedef internal::blas_traits<Rhs> RhsBlasTraits;
25013 typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
25014 typedef Map<Matrix<ResScalar,Dynamic,1>, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits<ResScalar>::size)> MappedDest;
25015 ActualLhsType actualLhs = LhsBlasTraits::extract(lhs);
25016 ActualRhsType actualRhs = RhsBlasTraits::extract(rhs);
25017 ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
25018 * RhsBlasTraits::extractScalarFactor(rhs);
25019 typedef typename conditional<Dest::IsVectorAtCompileTime, Dest, typename Dest::ColXpr>::type ActualDest;
25021 EvalToDestAtCompileTime = (ActualDest::InnerStrideAtCompileTime==1),
25022 ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
25023 MightCannotUseDest = (!EvalToDestAtCompileTime) || ComplexByReal
25025 typedef const_blas_data_mapper<LhsScalar,Index,ColMajor> LhsMapper;
25026 typedef const_blas_data_mapper<RhsScalar,Index,RowMajor> RhsMapper;
25027 RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
25028 if(!MightCannotUseDest)
25030 general_matrix_vector_product
25031 <Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
25032 actualLhs.rows(), actualLhs.cols(),
25033 LhsMapper(actualLhs.data(), actualLhs.outerStride()),
25034 RhsMapper(actualRhs.data(), actualRhs.innerStride()),
25040 gemv_static_vector_if<ResScalar,ActualDest::SizeAtCompileTime,ActualDest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
25041 const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0));
25042 const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
25043 ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
25044 evalToDest ? dest.data() : static_dest.data());
25047 #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
25048 Index size = dest.size();
25049 EIGEN_DENSE_STORAGE_CTOR_PLUGIN
25051 if(!alphaIsCompatible)
25053 MappedDest(actualDestPtr, dest.size()).setZero();
25054 compatibleAlpha = RhsScalar(1);
25057 MappedDest(actualDestPtr, dest.size()) = dest;
25059 general_matrix_vector_product
25060 <Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
25061 actualLhs.rows(), actualLhs.cols(),
25062 LhsMapper(actualLhs.data(), actualLhs.outerStride()),
25063 RhsMapper(actualRhs.data(), actualRhs.innerStride()),
25068 if(!alphaIsCompatible)
25069 dest.matrix() += actualAlpha * MappedDest(actualDestPtr, dest.size());
25071 dest = MappedDest(actualDestPtr, dest.size());
25076 template<> struct gemv_dense_selector<OnTheRight,RowMajor,true>
25078 template<typename Lhs, typename Rhs, typename Dest>
25079 static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
25081 typedef typename Lhs::Scalar LhsScalar;
25082 typedef typename Rhs::Scalar RhsScalar;
25083 typedef typename Dest::Scalar ResScalar;
25084 typedef internal::blas_traits<Lhs> LhsBlasTraits;
25085 typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
25086 typedef internal::blas_traits<Rhs> RhsBlasTraits;
25087 typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
25088 typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
25089 typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
25090 typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
25091 ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
25092 * RhsBlasTraits::extractScalarFactor(rhs);
25094 DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1
25096 gemv_static_vector_if<RhsScalar,ActualRhsTypeCleaned::SizeAtCompileTime,ActualRhsTypeCleaned::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;
25097 ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(),
25098 DirectlyUseRhs ? const_cast<RhsScalar*>(actualRhs.data()) : static_rhs.data());
25099 if(!DirectlyUseRhs)
25101 #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
25102 Index size = actualRhs.size();
25103 EIGEN_DENSE_STORAGE_CTOR_PLUGIN
25105 Map<typename ActualRhsTypeCleaned::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
25107 typedef const_blas_data_mapper<LhsScalar,Index,RowMajor> LhsMapper;
25108 typedef const_blas_data_mapper<RhsScalar,Index,ColMajor> RhsMapper;
25109 general_matrix_vector_product
25110 <Index,LhsScalar,LhsMapper,RowMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
25111 actualLhs.rows(), actualLhs.cols(),
25112 LhsMapper(actualLhs.data(), actualLhs.outerStride()),
25113 RhsMapper(actualRhsPtr, 1),
25114 dest.data(), dest.col(0).innerStride(),
25118 template<> struct gemv_dense_selector<OnTheRight,ColMajor,false>
25120 template<typename Lhs, typename Rhs, typename Dest>
25121 static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
25123 EIGEN_STATIC_ASSERT((!nested_eval<Lhs,1>::Evaluate),EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE);
25124 typename nested_eval<Rhs,1>::type actual_rhs(rhs);
25125 const Index size = rhs.rows();
25126 for(Index k=0; k<size; ++k)
25127 dest += (alpha*actual_rhs.coeff(k)) * lhs.col(k);
25130 template<> struct gemv_dense_selector<OnTheRight,RowMajor,false>
25132 template<typename Lhs, typename Rhs, typename Dest>
25133 static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
25135 EIGEN_STATIC_ASSERT((!nested_eval<Lhs,1>::Evaluate),EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE);
25136 typename nested_eval<Rhs,Lhs::RowsAtCompileTime>::type actual_rhs(rhs);
25137 const Index rows = dest.rows();
25138 for(Index i=0; i<rows; ++i)
25139 dest.coeffRef(i) += alpha * (lhs.row(i).cwiseProduct(actual_rhs.transpose())).sum();
25144 template<typename Derived>
25145 template<typename OtherDerived>
25146 inline const Product<Derived, OtherDerived>
25147 MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
25150 ProductIsValid = Derived::ColsAtCompileTime==Dynamic
25151 || OtherDerived::RowsAtCompileTime==Dynamic
25152 || int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
25153 AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
25154 SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
25156 EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
25157 INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
25158 EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
25159 INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
25160 EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
25161 #ifdef EIGEN_DEBUG_PRODUCT
25162 internal::product_type<Derived,OtherDerived>::debug();
25164 return Product<Derived, OtherDerived>(derived(), other.derived());
25167 template<typename Derived>
25168 template<typename OtherDerived>
25169 const Product<Derived,OtherDerived,LazyProduct>
25170 MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const
25173 ProductIsValid = Derived::ColsAtCompileTime==Dynamic
25174 || OtherDerived::RowsAtCompileTime==Dynamic
25175 || int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
25176 AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
25177 SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
25179 EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
25180 INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
25181 EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
25182 INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
25183 EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
25184 return Product<Derived,OtherDerived,LazyProduct>(derived(), other.derived());
25188 // end #include "src/Core/GeneralProduct.h"
25189 // #include "src/Core/products/GeneralBlockPanelKernel.h"
25190 #ifndef EIGEN_GENERAL_BLOCK_PANEL_H
25191 #define EIGEN_GENERAL_BLOCK_PANEL_H
25193 namespace internal {
25194 template<typename _LhsScalar, typename _RhsScalar, bool _ConjLhs=false, bool _ConjRhs=false>
25196 inline std::ptrdiff_t manage_caching_sizes_helper(std::ptrdiff_t a, std::ptrdiff_t b)
25198 return a<=0 ? b : a;
25200 #if EIGEN_ARCH_i386_OR_x86_64
25201 const std::ptrdiff_t defaultL1CacheSize = 32*1024;
25202 const std::ptrdiff_t defaultL2CacheSize = 256*1024;
25203 const std::ptrdiff_t defaultL3CacheSize = 2*1024*1024;
25205 const std::ptrdiff_t defaultL1CacheSize = 16*1024;
25206 const std::ptrdiff_t defaultL2CacheSize = 512*1024;
25207 const std::ptrdiff_t defaultL3CacheSize = 512*1024;
25209 struct CacheSizes {
25210 CacheSizes(): m_l1(-1),m_l2(-1),m_l3(-1) {
25211 int l1CacheSize, l2CacheSize, l3CacheSize;
25212 queryCacheSizes(l1CacheSize, l2CacheSize, l3CacheSize);
25213 m_l1 = manage_caching_sizes_helper(l1CacheSize, defaultL1CacheSize);
25214 m_l2 = manage_caching_sizes_helper(l2CacheSize, defaultL2CacheSize);
25215 m_l3 = manage_caching_sizes_helper(l3CacheSize, defaultL3CacheSize);
25217 std::ptrdiff_t m_l1;
25218 std::ptrdiff_t m_l2;
25219 std::ptrdiff_t m_l3;
25221 inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff_t* l2, std::ptrdiff_t* l3)
25223 static CacheSizes m_cacheSizes;
25224 if(action==SetAction)
25226 eigen_internal_assert(l1!=0 && l2!=0);
25227 m_cacheSizes.m_l1 = *l1;
25228 m_cacheSizes.m_l2 = *l2;
25229 m_cacheSizes.m_l3 = *l3;
25231 else if(action==GetAction)
25233 eigen_internal_assert(l1!=0 && l2!=0);
25234 *l1 = m_cacheSizes.m_l1;
25235 *l2 = m_cacheSizes.m_l2;
25236 *l3 = m_cacheSizes.m_l3;
25240 eigen_internal_assert(false);
25243 template<typename LhsScalar, typename RhsScalar, int KcFactor, typename Index>
25244 void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index num_threads = 1)
25246 typedef gebp_traits<LhsScalar,RhsScalar> Traits;
25247 std::ptrdiff_t l1, l2, l3;
25248 manage_caching_sizes(GetAction, &l1, &l2, &l3);
25249 if (num_threads > 1) {
25250 typedef typename Traits::ResScalar ResScalar;
25252 kdiv = KcFactor * (Traits::mr * sizeof(LhsScalar) + Traits::nr * sizeof(RhsScalar)),
25253 ksub = Traits::mr * Traits::nr * sizeof(ResScalar),
25258 const Index k_cache = (numext::mini<Index>)((l1-ksub)/kdiv, 320);
25260 k = k_cache - (k_cache % kr);
25261 eigen_internal_assert(k > 0);
25263 const Index n_cache = (l2-l1) / (nr * sizeof(RhsScalar) * k);
25264 const Index n_per_thread = numext::div_ceil(n, num_threads);
25265 if (n_cache <= n_per_thread) {
25266 eigen_internal_assert(n_cache >= static_cast<Index>(nr));
25267 n = n_cache - (n_cache % nr);
25268 eigen_internal_assert(n > 0);
25270 n = (numext::mini<Index>)(n, (n_per_thread + nr - 1) - ((n_per_thread + nr - 1) % nr));
25273 const Index m_cache = (l3-l2) / (sizeof(LhsScalar) * k * num_threads);
25274 const Index m_per_thread = numext::div_ceil(m, num_threads);
25275 if(m_cache < m_per_thread && m_cache >= static_cast<Index>(mr)) {
25276 m = m_cache - (m_cache % mr);
25277 eigen_internal_assert(m > 0);
25279 m = (numext::mini<Index>)(m, (m_per_thread + mr - 1) - ((m_per_thread + mr - 1) % mr));
25284 #ifdef EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS
25289 if((numext::maxi)(k,(numext::maxi)(m,n))<48)
25291 typedef typename Traits::ResScalar ResScalar;
25294 k_div = KcFactor * (Traits::mr * sizeof(LhsScalar) + Traits::nr * sizeof(RhsScalar)),
25295 k_sub = Traits::mr * Traits::nr * sizeof(ResScalar)
25297 const Index max_kc = numext::maxi<Index>(((l1-k_sub)/k_div) & (~(k_peeling-1)),1);
25298 const Index old_k = k;
25301 k = (k%max_kc)==0 ? max_kc
25302 : max_kc - k_peeling * ((max_kc-1-(k%max_kc))/(k_peeling*(k/max_kc+1)));
25303 eigen_internal_assert(((old_k/k) == (old_k/max_kc)) && "the number of sweeps has to remain the same");
25305 #ifdef EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS
25306 const Index actual_l2 = l3;
25308 const Index actual_l2 = 1572864;
25311 const Index lhs_bytes = m * k * sizeof(LhsScalar);
25312 const Index remaining_l1 = l1- k_sub - lhs_bytes;
25313 if(remaining_l1 >= Index(Traits::nr*sizeof(RhsScalar))*k)
25315 max_nc = remaining_l1 / (k*sizeof(RhsScalar));
25319 max_nc = (3*actual_l2)/(2*2*max_kc*sizeof(RhsScalar));
25321 Index nc = numext::mini<Index>(actual_l2/(2*k*sizeof(RhsScalar)), max_nc) & (~(Traits::nr-1));
25325 : (nc - Traits::nr * ((nc-(n%nc))/(Traits::nr*(n/nc+1))));
25329 Index problem_size = k*n*sizeof(LhsScalar);
25330 Index actual_lm = actual_l2;
25332 if(problem_size<=1024)
25336 else if(l3!=0 && problem_size<=32768)
25339 max_mc = (numext::mini<Index>)(576,max_mc);
25341 Index mc = (numext::mini<Index>)(actual_lm/(3*k*sizeof(LhsScalar)), max_mc);
25342 if (mc > Traits::mr) mc -= mc % Traits::mr;
25343 else if (mc==0) return;
25345 : (mc - Traits::mr * ((mc-(m%mc))/(Traits::mr*(m/mc+1))));
25349 template <typename Index>
25350 inline bool useSpecificBlockingSizes(Index& k, Index& m, Index& n)
25352 #ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES
25353 if (EIGEN_TEST_SPECIFIC_BLOCKING_SIZES) {
25354 k = numext::mini<Index>(k, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K);
25355 m = numext::mini<Index>(m, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M);
25356 n = numext::mini<Index>(n, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N);
25360 EIGEN_UNUSED_VARIABLE(k)
25361 EIGEN_UNUSED_VARIABLE(m)
25362 EIGEN_UNUSED_VARIABLE(n)
25366 template<typename LhsScalar, typename RhsScalar, int KcFactor, typename Index>
25367 void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1)
25369 if (!useSpecificBlockingSizes(k, m, n)) {
25370 evaluateProductBlockingSizesHeuristic<LhsScalar, RhsScalar, KcFactor, Index>(k, m, n, num_threads);
25373 template<typename LhsScalar, typename RhsScalar, typename Index>
25374 inline void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1)
25376 computeProductBlockingSizes<LhsScalar,RhsScalar,1,Index>(k, m, n, num_threads);
25378 #ifdef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
25379 #define CJMADD(CJ,A,B,C,T) C = CJ.pmadd(A,B,C);
25381 template<typename CJ, typename A, typename B, typename C, typename T> struct gebp_madd_selector {
25382 EIGEN_ALWAYS_INLINE static void run(const CJ& cj, A& a, B& b, C& c, T& )
25384 c = cj.pmadd(a,b,c);
25387 template<typename CJ, typename T> struct gebp_madd_selector<CJ,T,T,T,T> {
25388 EIGEN_ALWAYS_INLINE static void run(const CJ& cj, T& a, T& b, T& c, T& t)
25390 t = b; t = cj.pmul(a,t); c = padd(c,t);
25393 template<typename CJ, typename A, typename B, typename C, typename T>
25394 EIGEN_STRONG_INLINE void gebp_madd(const CJ& cj, A& a, B& b, C& c, T& t)
25396 gebp_madd_selector<CJ,A,B,C,T>::run(cj,a,b,c,t);
25398 #define CJMADD(CJ,A,B,C,T) gebp_madd(CJ,A,B,C,T);
25400 template<typename _LhsScalar, typename _RhsScalar, bool _ConjLhs, bool _ConjRhs>
25404 typedef _LhsScalar LhsScalar;
25405 typedef _RhsScalar RhsScalar;
25406 typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
25408 ConjLhs = _ConjLhs,
25409 ConjRhs = _ConjRhs,
25410 Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable,
25411 LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
25412 RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
25413 ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1,
25414 NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
25416 default_mr = (EIGEN_PLAIN_ENUM_MIN(16,NumberOfRegisters)/2/nr)*LhsPacketSize,
25417 #if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX)
25418 mr = Vectorizable ? 3*LhsPacketSize : default_mr,
25422 LhsProgress = LhsPacketSize,
25425 typedef typename packet_traits<LhsScalar>::type _LhsPacket;
25426 typedef typename packet_traits<RhsScalar>::type _RhsPacket;
25427 typedef typename packet_traits<ResScalar>::type _ResPacket;
25428 typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
25429 typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
25430 typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
25431 typedef ResPacket AccPacket;
25432 EIGEN_STRONG_INLINE void initAcc(AccPacket& p)
25434 p = pset1<ResPacket>(ResScalar(0));
25436 EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, RhsPacket& b0, RhsPacket& b1, RhsPacket& b2, RhsPacket& b3)
25438 pbroadcast4(b, b0, b1, b2, b3);
25440 template<typename RhsPacketType>
25441 EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacketType& dest) const
25443 dest = pset1<RhsPacketType>(*b);
25445 EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const
25447 dest = ploadquad<RhsPacket>(b);
25449 template<typename LhsPacketType>
25450 EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacketType& dest) const
25452 dest = pload<LhsPacketType>(a);
25454 template<typename LhsPacketType>
25455 EIGEN_STRONG_INLINE void loadLhsUnaligned(const LhsScalar* a, LhsPacketType& dest) const
25457 dest = ploadu<LhsPacketType>(a);
25459 template<typename LhsPacketType, typename RhsPacketType, typename AccPacketType>
25460 EIGEN_STRONG_INLINE void madd(const LhsPacketType& a, const RhsPacketType& b, AccPacketType& c, AccPacketType& tmp) const
25462 conj_helper<LhsPacketType,RhsPacketType,ConjLhs,ConjRhs> cj;
25463 #ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
25464 EIGEN_UNUSED_VARIABLE(tmp);
25465 c = cj.pmadd(a,b,c);
25467 tmp = b; tmp = cj.pmul(a,tmp); c = padd(c,tmp);
25470 EIGEN_STRONG_INLINE void acc(const AccPacket& c, const ResPacket& alpha, ResPacket& r) const
25472 r = pmadd(c,alpha,r);
25474 template<typename ResPacketHalf>
25475 EIGEN_STRONG_INLINE void acc(const ResPacketHalf& c, const ResPacketHalf& alpha, ResPacketHalf& r) const
25477 r = pmadd(c,alpha,r);
25480 template<typename RealScalar, bool _ConjLhs>
25481 class gebp_traits<std::complex<RealScalar>, RealScalar, _ConjLhs, false>
25484 typedef std::complex<RealScalar> LhsScalar;
25485 typedef RealScalar RhsScalar;
25486 typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
25488 ConjLhs = _ConjLhs,
25490 Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable,
25491 LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
25492 RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
25493 ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1,
25494 NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
25496 #if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX)
25497 mr = 3*LhsPacketSize,
25499 mr = (EIGEN_PLAIN_ENUM_MIN(16,NumberOfRegisters)/2/nr)*LhsPacketSize,
25501 LhsProgress = LhsPacketSize,
25504 typedef typename packet_traits<LhsScalar>::type _LhsPacket;
25505 typedef typename packet_traits<RhsScalar>::type _RhsPacket;
25506 typedef typename packet_traits<ResScalar>::type _ResPacket;
25507 typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
25508 typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
25509 typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
25510 typedef ResPacket AccPacket;
25511 EIGEN_STRONG_INLINE void initAcc(AccPacket& p)
25513 p = pset1<ResPacket>(ResScalar(0));
25515 EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const
25517 dest = pset1<RhsPacket>(*b);
25519 EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const
25521 dest = pset1<RhsPacket>(*b);
25523 EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const
25525 dest = pload<LhsPacket>(a);
25527 EIGEN_STRONG_INLINE void loadLhsUnaligned(const LhsScalar* a, LhsPacket& dest) const
25529 dest = ploadu<LhsPacket>(a);
25531 EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, RhsPacket& b0, RhsPacket& b1, RhsPacket& b2, RhsPacket& b3)
25533 pbroadcast4(b, b0, b1, b2, b3);
25535 EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp) const
25537 madd_impl(a, b, c, tmp, typename conditional<Vectorizable,true_type,false_type>::type());
25539 EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const
25541 #ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
25542 EIGEN_UNUSED_VARIABLE(tmp);
25543 c.v = pmadd(a.v,b,c.v);
25545 tmp = b; tmp = pmul(a.v,tmp); c.v = padd(c.v,tmp);
25548 EIGEN_STRONG_INLINE void madd_impl(const LhsScalar& a, const RhsScalar& b, ResScalar& c, RhsScalar& , const false_type&) const
25552 EIGEN_STRONG_INLINE void acc(const AccPacket& c, const ResPacket& alpha, ResPacket& r) const
25554 r = cj.pmadd(c,alpha,r);
25557 conj_helper<ResPacket,ResPacket,ConjLhs,false> cj;
25559 template<typename Packet>
25560 struct DoublePacket
25565 template<typename Packet>
25566 DoublePacket<Packet> padd(const DoublePacket<Packet> &a, const DoublePacket<Packet> &b)
25568 DoublePacket<Packet> res;
25569 res.first = padd(a.first, b.first);
25570 res.second = padd(a.second,b.second);
25573 template<typename Packet>
25574 const DoublePacket<Packet>& predux_downto4(const DoublePacket<Packet> &a)
25578 template<typename Packet> struct unpacket_traits<DoublePacket<Packet> > { typedef DoublePacket<Packet> half; };
25579 template<typename RealScalar, bool _ConjLhs, bool _ConjRhs>
25580 class gebp_traits<std::complex<RealScalar>, std::complex<RealScalar>, _ConjLhs, _ConjRhs >
25583 typedef std::complex<RealScalar> Scalar;
25584 typedef std::complex<RealScalar> LhsScalar;
25585 typedef std::complex<RealScalar> RhsScalar;
25586 typedef std::complex<RealScalar> ResScalar;
25588 ConjLhs = _ConjLhs,
25589 ConjRhs = _ConjRhs,
25590 Vectorizable = packet_traits<RealScalar>::Vectorizable
25591 && packet_traits<Scalar>::Vectorizable,
25592 RealPacketSize = Vectorizable ? packet_traits<RealScalar>::size : 1,
25593 ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1,
25594 LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
25595 RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
25597 mr = ResPacketSize,
25598 LhsProgress = ResPacketSize,
25601 typedef typename packet_traits<RealScalar>::type RealPacket;
25602 typedef typename packet_traits<Scalar>::type ScalarPacket;
25603 typedef DoublePacket<RealPacket> DoublePacketType;
25604 typedef typename conditional<Vectorizable,RealPacket, Scalar>::type LhsPacket;
25605 typedef typename conditional<Vectorizable,DoublePacketType,Scalar>::type RhsPacket;
25606 typedef typename conditional<Vectorizable,ScalarPacket,Scalar>::type ResPacket;
25607 typedef typename conditional<Vectorizable,DoublePacketType,Scalar>::type AccPacket;
25608 EIGEN_STRONG_INLINE void initAcc(Scalar& p) { p = Scalar(0); }
25609 EIGEN_STRONG_INLINE void initAcc(DoublePacketType& p)
25611 p.first = pset1<RealPacket>(RealScalar(0));
25612 p.second = pset1<RealPacket>(RealScalar(0));
25614 EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, ResPacket& dest) const
25616 dest = pset1<ResPacket>(*b);
25618 EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, DoublePacketType& dest) const
25620 dest.first = pset1<RealPacket>(real(*b));
25621 dest.second = pset1<RealPacket>(imag(*b));
25623 EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, ResPacket& dest) const
25627 EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, DoublePacketType& dest) const
25629 eigen_internal_assert(unpacket_traits<ScalarPacket>::size<=4);
25632 EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, RhsPacket& b0, RhsPacket& b1, RhsPacket& b2, RhsPacket& b3)
25639 EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, DoublePacketType& b0, DoublePacketType& b1)
25644 EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, RhsScalar& b0, RhsScalar& b1)
25649 EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const
25651 dest = pload<LhsPacket>((const typename unpacket_traits<LhsPacket>::type*)(a));
25653 EIGEN_STRONG_INLINE void loadLhsUnaligned(const LhsScalar* a, LhsPacket& dest) const
25655 dest = ploadu<LhsPacket>((const typename unpacket_traits<LhsPacket>::type*)(a));
25657 EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, DoublePacketType& c, RhsPacket& ) const
25659 c.first = padd(pmul(a,b.first), c.first);
25660 c.second = padd(pmul(a,b.second),c.second);
25662 EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, ResPacket& c, RhsPacket& ) const
25664 c = cj.pmadd(a,b,c);
25666 EIGEN_STRONG_INLINE void acc(const Scalar& c, const Scalar& alpha, Scalar& r) const { r += alpha * c; }
25667 EIGEN_STRONG_INLINE void acc(const DoublePacketType& c, const ResPacket& alpha, ResPacket& r) const
25670 if((!ConjLhs)&&(!ConjRhs))
25672 tmp = pcplxflip(pconj(ResPacket(c.second)));
25673 tmp = padd(ResPacket(c.first),tmp);
25675 else if((!ConjLhs)&&(ConjRhs))
25677 tmp = pconj(pcplxflip(ResPacket(c.second)));
25678 tmp = padd(ResPacket(c.first),tmp);
25680 else if((ConjLhs)&&(!ConjRhs))
25682 tmp = pcplxflip(ResPacket(c.second));
25683 tmp = padd(pconj(ResPacket(c.first)),tmp);
25685 else if((ConjLhs)&&(ConjRhs))
25687 tmp = pcplxflip(ResPacket(c.second));
25688 tmp = psub(pconj(ResPacket(c.first)),tmp);
25690 r = pmadd(tmp,alpha,r);
25693 conj_helper<LhsScalar,RhsScalar,ConjLhs,ConjRhs> cj;
25695 template<typename RealScalar, bool _ConjRhs>
25696 class gebp_traits<RealScalar, std::complex<RealScalar>, false, _ConjRhs >
25699 typedef std::complex<RealScalar> Scalar;
25700 typedef RealScalar LhsScalar;
25701 typedef Scalar RhsScalar;
25702 typedef Scalar ResScalar;
25705 ConjRhs = _ConjRhs,
25706 Vectorizable = packet_traits<RealScalar>::Vectorizable
25707 && packet_traits<Scalar>::Vectorizable,
25708 LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
25709 RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
25710 ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1,
25711 NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
25713 mr = (EIGEN_PLAIN_ENUM_MIN(16,NumberOfRegisters)/2/nr)*ResPacketSize,
25714 LhsProgress = ResPacketSize,
25717 typedef typename packet_traits<LhsScalar>::type _LhsPacket;
25718 typedef typename packet_traits<RhsScalar>::type _RhsPacket;
25719 typedef typename packet_traits<ResScalar>::type _ResPacket;
25720 typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
25721 typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
25722 typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
25723 typedef ResPacket AccPacket;
25724 EIGEN_STRONG_INLINE void initAcc(AccPacket& p)
25726 p = pset1<ResPacket>(ResScalar(0));
25728 EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const
25730 dest = pset1<RhsPacket>(*b);
25732 void broadcastRhs(const RhsScalar* b, RhsPacket& b0, RhsPacket& b1, RhsPacket& b2, RhsPacket& b3)
25734 pbroadcast4(b, b0, b1, b2, b3);
25736 EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const
25738 dest = ploaddup<LhsPacket>(a);
25740 EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const
25742 eigen_internal_assert(unpacket_traits<RhsPacket>::size<=4);
25745 EIGEN_STRONG_INLINE void loadLhsUnaligned(const LhsScalar* a, LhsPacket& dest) const
25747 dest = ploaddup<LhsPacket>(a);
25749 EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp) const
25751 madd_impl(a, b, c, tmp, typename conditional<Vectorizable,true_type,false_type>::type());
25753 EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const
25755 #ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
25756 EIGEN_UNUSED_VARIABLE(tmp);
25757 c.v = pmadd(a,b.v,c.v);
25759 tmp = b; tmp.v = pmul(a,tmp.v); c = padd(c,tmp);
25762 EIGEN_STRONG_INLINE void madd_impl(const LhsScalar& a, const RhsScalar& b, ResScalar& c, RhsScalar& , const false_type&) const
25766 EIGEN_STRONG_INLINE void acc(const AccPacket& c, const ResPacket& alpha, ResPacket& r) const
25768 r = cj.pmadd(alpha,c,r);
25771 conj_helper<ResPacket,ResPacket,false,ConjRhs> cj;
25773 template<typename LhsScalar, typename RhsScalar, typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>
25776 typedef gebp_traits<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> Traits;
25777 typedef typename Traits::ResScalar ResScalar;
25778 typedef typename Traits::LhsPacket LhsPacket;
25779 typedef typename Traits::RhsPacket RhsPacket;
25780 typedef typename Traits::ResPacket ResPacket;
25781 typedef typename Traits::AccPacket AccPacket;
25782 typedef gebp_traits<RhsScalar,LhsScalar,ConjugateRhs,ConjugateLhs> SwappedTraits;
25783 typedef typename SwappedTraits::ResScalar SResScalar;
25784 typedef typename SwappedTraits::LhsPacket SLhsPacket;
25785 typedef typename SwappedTraits::RhsPacket SRhsPacket;
25786 typedef typename SwappedTraits::ResPacket SResPacket;
25787 typedef typename SwappedTraits::AccPacket SAccPacket;
25788 typedef typename DataMapper::LinearMapper LinearMapper;
25790 Vectorizable = Traits::Vectorizable,
25791 LhsProgress = Traits::LhsProgress,
25792 RhsProgress = Traits::RhsProgress,
25793 ResPacketSize = Traits::ResPacketSize
25796 void operator()(const DataMapper& res, const LhsScalar* blockA, const RhsScalar* blockB,
25797 Index rows, Index depth, Index cols, ResScalar alpha,
25798 Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0);
25800 template<typename LhsScalar, typename RhsScalar, typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>
25802 void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,ConjugateRhs>
25803 ::operator()(const DataMapper& res, const LhsScalar* blockA, const RhsScalar* blockB,
25804 Index rows, Index depth, Index cols, ResScalar alpha,
25805 Index strideA, Index strideB, Index offsetA, Index offsetB)
25808 SwappedTraits straits;
25809 if(strideA==-1) strideA = depth;
25810 if(strideB==-1) strideB = depth;
25811 conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;
25812 Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0;
25813 const Index peeled_mc3 = mr>=3*Traits::LhsProgress ? (rows/(3*LhsProgress))*(3*LhsProgress) : 0;
25814 const Index peeled_mc2 = mr>=2*Traits::LhsProgress ? peeled_mc3+((rows-peeled_mc3)/(2*LhsProgress))*(2*LhsProgress) : 0;
25815 const Index peeled_mc1 = mr>=1*Traits::LhsProgress ? (rows/(1*LhsProgress))*(1*LhsProgress) : 0;
25817 const Index peeled_kc = depth & ~(pk-1);
25818 const Index prefetch_res_offset = 32/sizeof(ResScalar);
25819 if(mr>=3*Traits::LhsProgress)
25821 const Index l1 = defaultL1CacheSize;
25822 const Index actual_panel_rows = (3*LhsProgress) * std::max<Index>(1,( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 3*LhsProgress) ));
25823 for(Index i1=0; i1<peeled_mc3; i1+=actual_panel_rows)
25825 const Index actual_panel_end = (std::min)(i1+actual_panel_rows, peeled_mc3);
25826 for(Index j2=0; j2<packet_cols4; j2+=nr)
25828 for(Index i=i1; i<actual_panel_end; i+=3*LhsProgress)
25830 const LhsScalar* blA = &blockA[i*strideA+offsetA*(3*LhsProgress)];
25832 AccPacket C0, C1, C2, C3,
25835 traits.initAcc(C0); traits.initAcc(C1); traits.initAcc(C2); traits.initAcc(C3);
25836 traits.initAcc(C4); traits.initAcc(C5); traits.initAcc(C6); traits.initAcc(C7);
25837 traits.initAcc(C8); traits.initAcc(C9); traits.initAcc(C10); traits.initAcc(C11);
25838 LinearMapper r0 = res.getLinearMapper(i, j2 + 0);
25839 LinearMapper r1 = res.getLinearMapper(i, j2 + 1);
25840 LinearMapper r2 = res.getLinearMapper(i, j2 + 2);
25841 LinearMapper r3 = res.getLinearMapper(i, j2 + 3);
25846 const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr];
25849 for(Index k=0; k<peeled_kc; k+=pk)
25851 EIGEN_ASM_COMMENT("begin gebp micro kernel 3pX4");
25854 #define EIGEN_GEBP_ONESTEP(K) \
25856 EIGEN_ASM_COMMENT("begin step of gebp micro kernel 3pX4"); \
25857 EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
25858 internal::prefetch(blA+(3*K+16)*LhsProgress); \
25859 if (EIGEN_ARCH_ARM) { internal::prefetch(blB+(4*K+16)*RhsProgress); } \
25860 traits.loadLhs(&blA[(0+3*K)*LhsProgress], A0); \
25861 traits.loadLhs(&blA[(1+3*K)*LhsProgress], A1); \
25862 traits.loadLhs(&blA[(2+3*K)*LhsProgress], A2); \
25863 traits.loadRhs(blB + (0+4*K)*Traits::RhsProgress, B_0); \
25864 traits.madd(A0, B_0, C0, T0); \
25865 traits.madd(A1, B_0, C4, T0); \
25866 traits.madd(A2, B_0, C8, B_0); \
25867 traits.loadRhs(blB + (1+4*K)*Traits::RhsProgress, B_0); \
25868 traits.madd(A0, B_0, C1, T0); \
25869 traits.madd(A1, B_0, C5, T0); \
25870 traits.madd(A2, B_0, C9, B_0); \
25871 traits.loadRhs(blB + (2+4*K)*Traits::RhsProgress, B_0); \
25872 traits.madd(A0, B_0, C2, T0); \
25873 traits.madd(A1, B_0, C6, T0); \
25874 traits.madd(A2, B_0, C10, B_0); \
25875 traits.loadRhs(blB + (3+4*K)*Traits::RhsProgress, B_0); \
25876 traits.madd(A0, B_0, C3 , T0); \
25877 traits.madd(A1, B_0, C7, T0); \
25878 traits.madd(A2, B_0, C11, B_0); \
25879 EIGEN_ASM_COMMENT("end step of gebp micro kernel 3pX4"); \
25881 internal::prefetch(blB);
25882 EIGEN_GEBP_ONESTEP(0);
25883 EIGEN_GEBP_ONESTEP(1);
25884 EIGEN_GEBP_ONESTEP(2);
25885 EIGEN_GEBP_ONESTEP(3);
25886 EIGEN_GEBP_ONESTEP(4);
25887 EIGEN_GEBP_ONESTEP(5);
25888 EIGEN_GEBP_ONESTEP(6);
25889 EIGEN_GEBP_ONESTEP(7);
25890 blB += pk*4*RhsProgress;
25891 blA += pk*3*Traits::LhsProgress;
25892 EIGEN_ASM_COMMENT("end gebp micro kernel 3pX4");
25894 for(Index k=peeled_kc; k<depth; k++)
25898 EIGEN_GEBP_ONESTEP(0);
25899 blB += 4*RhsProgress;
25900 blA += 3*Traits::LhsProgress;
25902 #undef EIGEN_GEBP_ONESTEP
25903 ResPacket R0, R1, R2;
25904 ResPacket alphav = pset1<ResPacket>(alpha);
25905 R0 = r0.loadPacket(0 * Traits::ResPacketSize);
25906 R1 = r0.loadPacket(1 * Traits::ResPacketSize);
25907 R2 = r0.loadPacket(2 * Traits::ResPacketSize);
25908 traits.acc(C0, alphav, R0);
25909 traits.acc(C4, alphav, R1);
25910 traits.acc(C8, alphav, R2);
25911 r0.storePacket(0 * Traits::ResPacketSize, R0);
25912 r0.storePacket(1 * Traits::ResPacketSize, R1);
25913 r0.storePacket(2 * Traits::ResPacketSize, R2);
25914 R0 = r1.loadPacket(0 * Traits::ResPacketSize);
25915 R1 = r1.loadPacket(1 * Traits::ResPacketSize);
25916 R2 = r1.loadPacket(2 * Traits::ResPacketSize);
25917 traits.acc(C1, alphav, R0);
25918 traits.acc(C5, alphav, R1);
25919 traits.acc(C9, alphav, R2);
25920 r1.storePacket(0 * Traits::ResPacketSize, R0);
25921 r1.storePacket(1 * Traits::ResPacketSize, R1);
25922 r1.storePacket(2 * Traits::ResPacketSize, R2);
25923 R0 = r2.loadPacket(0 * Traits::ResPacketSize);
25924 R1 = r2.loadPacket(1 * Traits::ResPacketSize);
25925 R2 = r2.loadPacket(2 * Traits::ResPacketSize);
25926 traits.acc(C2, alphav, R0);
25927 traits.acc(C6, alphav, R1);
25928 traits.acc(C10, alphav, R2);
25929 r2.storePacket(0 * Traits::ResPacketSize, R0);
25930 r2.storePacket(1 * Traits::ResPacketSize, R1);
25931 r2.storePacket(2 * Traits::ResPacketSize, R2);
25932 R0 = r3.loadPacket(0 * Traits::ResPacketSize);
25933 R1 = r3.loadPacket(1 * Traits::ResPacketSize);
25934 R2 = r3.loadPacket(2 * Traits::ResPacketSize);
25935 traits.acc(C3, alphav, R0);
25936 traits.acc(C7, alphav, R1);
25937 traits.acc(C11, alphav, R2);
25938 r3.storePacket(0 * Traits::ResPacketSize, R0);
25939 r3.storePacket(1 * Traits::ResPacketSize, R1);
25940 r3.storePacket(2 * Traits::ResPacketSize, R2);
25943 for(Index j2=packet_cols4; j2<cols; j2++)
25945 for(Index i=i1; i<actual_panel_end; i+=3*LhsProgress)
25947 const LhsScalar* blA = &blockA[i*strideA+offsetA*(3*Traits::LhsProgress)];
25949 AccPacket C0, C4, C8;
25950 traits.initAcc(C0);
25951 traits.initAcc(C4);
25952 traits.initAcc(C8);
25953 LinearMapper r0 = res.getLinearMapper(i, j2);
25955 const RhsScalar* blB = &blockB[j2*strideB+offsetB];
25956 LhsPacket A0, A1, A2;
25957 for(Index k=0; k<peeled_kc; k+=pk)
25959 EIGEN_ASM_COMMENT("begin gebp micro kernel 3pX1");
25961 #define EIGEN_GEBGP_ONESTEP(K) \
25963 EIGEN_ASM_COMMENT("begin step of gebp micro kernel 3pX1"); \
25964 EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
25965 traits.loadLhs(&blA[(0+3*K)*LhsProgress], A0); \
25966 traits.loadLhs(&blA[(1+3*K)*LhsProgress], A1); \
25967 traits.loadLhs(&blA[(2+3*K)*LhsProgress], A2); \
25968 traits.loadRhs(&blB[(0+K)*RhsProgress], B_0); \
25969 traits.madd(A0, B_0, C0, B_0); \
25970 traits.madd(A1, B_0, C4, B_0); \
25971 traits.madd(A2, B_0, C8, B_0); \
25972 EIGEN_ASM_COMMENT("end step of gebp micro kernel 3pX1"); \
25974 EIGEN_GEBGP_ONESTEP(0);
25975 EIGEN_GEBGP_ONESTEP(1);
25976 EIGEN_GEBGP_ONESTEP(2);
25977 EIGEN_GEBGP_ONESTEP(3);
25978 EIGEN_GEBGP_ONESTEP(4);
25979 EIGEN_GEBGP_ONESTEP(5);
25980 EIGEN_GEBGP_ONESTEP(6);
25981 EIGEN_GEBGP_ONESTEP(7);
25982 blB += pk*RhsProgress;
25983 blA += pk*3*Traits::LhsProgress;
25984 EIGEN_ASM_COMMENT("end gebp micro kernel 3pX1");
25986 for(Index k=peeled_kc; k<depth; k++)
25989 EIGEN_GEBGP_ONESTEP(0);
25990 blB += RhsProgress;
25991 blA += 3*Traits::LhsProgress;
25993 #undef EIGEN_GEBGP_ONESTEP
25994 ResPacket R0, R1, R2;
25995 ResPacket alphav = pset1<ResPacket>(alpha);
25996 R0 = r0.loadPacket(0 * Traits::ResPacketSize);
25997 R1 = r0.loadPacket(1 * Traits::ResPacketSize);
25998 R2 = r0.loadPacket(2 * Traits::ResPacketSize);
25999 traits.acc(C0, alphav, R0);
26000 traits.acc(C4, alphav, R1);
26001 traits.acc(C8, alphav, R2);
26002 r0.storePacket(0 * Traits::ResPacketSize, R0);
26003 r0.storePacket(1 * Traits::ResPacketSize, R1);
26004 r0.storePacket(2 * Traits::ResPacketSize, R2);
26009 if(mr>=2*Traits::LhsProgress)
26011 const Index l1 = defaultL1CacheSize;
26012 Index actual_panel_rows = (2*LhsProgress) * std::max<Index>(1,( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 2*LhsProgress) ));
26013 for(Index i1=peeled_mc3; i1<peeled_mc2; i1+=actual_panel_rows)
26015 Index actual_panel_end = (std::min)(i1+actual_panel_rows, peeled_mc2);
26016 for(Index j2=0; j2<packet_cols4; j2+=nr)
26018 for(Index i=i1; i<actual_panel_end; i+=2*LhsProgress)
26020 const LhsScalar* blA = &blockA[i*strideA+offsetA*(2*Traits::LhsProgress)];
26022 AccPacket C0, C1, C2, C3,
26024 traits.initAcc(C0); traits.initAcc(C1); traits.initAcc(C2); traits.initAcc(C3);
26025 traits.initAcc(C4); traits.initAcc(C5); traits.initAcc(C6); traits.initAcc(C7);
26026 LinearMapper r0 = res.getLinearMapper(i, j2 + 0);
26027 LinearMapper r1 = res.getLinearMapper(i, j2 + 1);
26028 LinearMapper r2 = res.getLinearMapper(i, j2 + 2);
26029 LinearMapper r3 = res.getLinearMapper(i, j2 + 3);
26030 r0.prefetch(prefetch_res_offset);
26031 r1.prefetch(prefetch_res_offset);
26032 r2.prefetch(prefetch_res_offset);
26033 r3.prefetch(prefetch_res_offset);
26034 const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr];
26037 for(Index k=0; k<peeled_kc; k+=pk)
26039 EIGEN_ASM_COMMENT("begin gebp micro kernel 2pX4");
26040 RhsPacket B_0, B1, B2, B3, T0;
26041 #define EIGEN_GEBGP_ONESTEP(K) \
26043 EIGEN_ASM_COMMENT("begin step of gebp micro kernel 2pX4"); \
26044 EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
26045 traits.loadLhs(&blA[(0+2*K)*LhsProgress], A0); \
26046 traits.loadLhs(&blA[(1+2*K)*LhsProgress], A1); \
26047 traits.broadcastRhs(&blB[(0+4*K)*RhsProgress], B_0, B1, B2, B3); \
26048 traits.madd(A0, B_0, C0, T0); \
26049 traits.madd(A1, B_0, C4, B_0); \
26050 traits.madd(A0, B1, C1, T0); \
26051 traits.madd(A1, B1, C5, B1); \
26052 traits.madd(A0, B2, C2, T0); \
26053 traits.madd(A1, B2, C6, B2); \
26054 traits.madd(A0, B3, C3, T0); \
26055 traits.madd(A1, B3, C7, B3); \
26056 EIGEN_ASM_COMMENT("end step of gebp micro kernel 2pX4"); \
26058 internal::prefetch(blB+(48+0));
26059 EIGEN_GEBGP_ONESTEP(0);
26060 EIGEN_GEBGP_ONESTEP(1);
26061 EIGEN_GEBGP_ONESTEP(2);
26062 EIGEN_GEBGP_ONESTEP(3);
26063 internal::prefetch(blB+(48+16));
26064 EIGEN_GEBGP_ONESTEP(4);
26065 EIGEN_GEBGP_ONESTEP(5);
26066 EIGEN_GEBGP_ONESTEP(6);
26067 EIGEN_GEBGP_ONESTEP(7);
26068 blB += pk*4*RhsProgress;
26069 blA += pk*(2*Traits::LhsProgress);
26070 EIGEN_ASM_COMMENT("end gebp micro kernel 2pX4");
26072 for(Index k=peeled_kc; k<depth; k++)
26074 RhsPacket B_0, B1, B2, B3, T0;
26075 EIGEN_GEBGP_ONESTEP(0);
26076 blB += 4*RhsProgress;
26077 blA += 2*Traits::LhsProgress;
26079 #undef EIGEN_GEBGP_ONESTEP
26080 ResPacket R0, R1, R2, R3;
26081 ResPacket alphav = pset1<ResPacket>(alpha);
26082 R0 = r0.loadPacket(0 * Traits::ResPacketSize);
26083 R1 = r0.loadPacket(1 * Traits::ResPacketSize);
26084 R2 = r1.loadPacket(0 * Traits::ResPacketSize);
26085 R3 = r1.loadPacket(1 * Traits::ResPacketSize);
26086 traits.acc(C0, alphav, R0);
26087 traits.acc(C4, alphav, R1);
26088 traits.acc(C1, alphav, R2);
26089 traits.acc(C5, alphav, R3);
26090 r0.storePacket(0 * Traits::ResPacketSize, R0);
26091 r0.storePacket(1 * Traits::ResPacketSize, R1);
26092 r1.storePacket(0 * Traits::ResPacketSize, R2);
26093 r1.storePacket(1 * Traits::ResPacketSize, R3);
26094 R0 = r2.loadPacket(0 * Traits::ResPacketSize);
26095 R1 = r2.loadPacket(1 * Traits::ResPacketSize);
26096 R2 = r3.loadPacket(0 * Traits::ResPacketSize);
26097 R3 = r3.loadPacket(1 * Traits::ResPacketSize);
26098 traits.acc(C2, alphav, R0);
26099 traits.acc(C6, alphav, R1);
26100 traits.acc(C3, alphav, R2);
26101 traits.acc(C7, alphav, R3);
26102 r2.storePacket(0 * Traits::ResPacketSize, R0);
26103 r2.storePacket(1 * Traits::ResPacketSize, R1);
26104 r3.storePacket(0 * Traits::ResPacketSize, R2);
26105 r3.storePacket(1 * Traits::ResPacketSize, R3);
26108 for(Index j2=packet_cols4; j2<cols; j2++)
26110 for(Index i=i1; i<actual_panel_end; i+=2*LhsProgress)
26112 const LhsScalar* blA = &blockA[i*strideA+offsetA*(2*Traits::LhsProgress)];
26115 traits.initAcc(C0);
26116 traits.initAcc(C4);
26117 LinearMapper r0 = res.getLinearMapper(i, j2);
26118 r0.prefetch(prefetch_res_offset);
26119 const RhsScalar* blB = &blockB[j2*strideB+offsetB];
26121 for(Index k=0; k<peeled_kc; k+=pk)
26123 EIGEN_ASM_COMMENT("begin gebp micro kernel 2pX1");
26125 #define EIGEN_GEBGP_ONESTEP(K) \
26127 EIGEN_ASM_COMMENT("begin step of gebp micro kernel 2pX1"); \
26128 EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
26129 traits.loadLhs(&blA[(0+2*K)*LhsProgress], A0); \
26130 traits.loadLhs(&blA[(1+2*K)*LhsProgress], A1); \
26131 traits.loadRhs(&blB[(0+K)*RhsProgress], B_0); \
26132 traits.madd(A0, B_0, C0, B1); \
26133 traits.madd(A1, B_0, C4, B_0); \
26134 EIGEN_ASM_COMMENT("end step of gebp micro kernel 2pX1"); \
26136 EIGEN_GEBGP_ONESTEP(0);
26137 EIGEN_GEBGP_ONESTEP(1);
26138 EIGEN_GEBGP_ONESTEP(2);
26139 EIGEN_GEBGP_ONESTEP(3);
26140 EIGEN_GEBGP_ONESTEP(4);
26141 EIGEN_GEBGP_ONESTEP(5);
26142 EIGEN_GEBGP_ONESTEP(6);
26143 EIGEN_GEBGP_ONESTEP(7);
26144 blB += pk*RhsProgress;
26145 blA += pk*2*Traits::LhsProgress;
26146 EIGEN_ASM_COMMENT("end gebp micro kernel 2pX1");
26148 for(Index k=peeled_kc; k<depth; k++)
26151 EIGEN_GEBGP_ONESTEP(0);
26152 blB += RhsProgress;
26153 blA += 2*Traits::LhsProgress;
26155 #undef EIGEN_GEBGP_ONESTEP
26157 ResPacket alphav = pset1<ResPacket>(alpha);
26158 R0 = r0.loadPacket(0 * Traits::ResPacketSize);
26159 R1 = r0.loadPacket(1 * Traits::ResPacketSize);
26160 traits.acc(C0, alphav, R0);
26161 traits.acc(C4, alphav, R1);
26162 r0.storePacket(0 * Traits::ResPacketSize, R0);
26163 r0.storePacket(1 * Traits::ResPacketSize, R1);
26168 if(mr>=1*Traits::LhsProgress)
26170 for(Index i=peeled_mc2; i<peeled_mc1; i+=1*LhsProgress)
26172 for(Index j2=0; j2<packet_cols4; j2+=nr)
26174 const LhsScalar* blA = &blockA[i*strideA+offsetA*(1*Traits::LhsProgress)];
26176 AccPacket C0, C1, C2, C3;
26177 traits.initAcc(C0);
26178 traits.initAcc(C1);
26179 traits.initAcc(C2);
26180 traits.initAcc(C3);
26181 LinearMapper r0 = res.getLinearMapper(i, j2 + 0);
26182 LinearMapper r1 = res.getLinearMapper(i, j2 + 1);
26183 LinearMapper r2 = res.getLinearMapper(i, j2 + 2);
26184 LinearMapper r3 = res.getLinearMapper(i, j2 + 3);
26185 r0.prefetch(prefetch_res_offset);
26186 r1.prefetch(prefetch_res_offset);
26187 r2.prefetch(prefetch_res_offset);
26188 r3.prefetch(prefetch_res_offset);
26189 const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr];
26192 for(Index k=0; k<peeled_kc; k+=pk)
26194 EIGEN_ASM_COMMENT("begin gebp micro kernel 1pX4");
26195 RhsPacket B_0, B1, B2, B3;
26196 #define EIGEN_GEBGP_ONESTEP(K) \
26198 EIGEN_ASM_COMMENT("begin step of gebp micro kernel 1pX4"); \
26199 EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
26200 traits.loadLhs(&blA[(0+1*K)*LhsProgress], A0); \
26201 traits.broadcastRhs(&blB[(0+4*K)*RhsProgress], B_0, B1, B2, B3); \
26202 traits.madd(A0, B_0, C0, B_0); \
26203 traits.madd(A0, B1, C1, B1); \
26204 traits.madd(A0, B2, C2, B2); \
26205 traits.madd(A0, B3, C3, B3); \
26206 EIGEN_ASM_COMMENT("end step of gebp micro kernel 1pX4"); \
26208 internal::prefetch(blB+(48+0));
26209 EIGEN_GEBGP_ONESTEP(0);
26210 EIGEN_GEBGP_ONESTEP(1);
26211 EIGEN_GEBGP_ONESTEP(2);
26212 EIGEN_GEBGP_ONESTEP(3);
26213 internal::prefetch(blB+(48+16));
26214 EIGEN_GEBGP_ONESTEP(4);
26215 EIGEN_GEBGP_ONESTEP(5);
26216 EIGEN_GEBGP_ONESTEP(6);
26217 EIGEN_GEBGP_ONESTEP(7);
26218 blB += pk*4*RhsProgress;
26219 blA += pk*1*LhsProgress;
26220 EIGEN_ASM_COMMENT("end gebp micro kernel 1pX4");
26222 for(Index k=peeled_kc; k<depth; k++)
26224 RhsPacket B_0, B1, B2, B3;
26225 EIGEN_GEBGP_ONESTEP(0);
26226 blB += 4*RhsProgress;
26227 blA += 1*LhsProgress;
26229 #undef EIGEN_GEBGP_ONESTEP
26231 ResPacket alphav = pset1<ResPacket>(alpha);
26232 R0 = r0.loadPacket(0 * Traits::ResPacketSize);
26233 R1 = r1.loadPacket(0 * Traits::ResPacketSize);
26234 traits.acc(C0, alphav, R0);
26235 traits.acc(C1, alphav, R1);
26236 r0.storePacket(0 * Traits::ResPacketSize, R0);
26237 r1.storePacket(0 * Traits::ResPacketSize, R1);
26238 R0 = r2.loadPacket(0 * Traits::ResPacketSize);
26239 R1 = r3.loadPacket(0 * Traits::ResPacketSize);
26240 traits.acc(C2, alphav, R0);
26241 traits.acc(C3, alphav, R1);
26242 r2.storePacket(0 * Traits::ResPacketSize, R0);
26243 r3.storePacket(0 * Traits::ResPacketSize, R1);
26245 for(Index j2=packet_cols4; j2<cols; j2++)
26247 const LhsScalar* blA = &blockA[i*strideA+offsetA*(1*Traits::LhsProgress)];
26250 traits.initAcc(C0);
26251 LinearMapper r0 = res.getLinearMapper(i, j2);
26252 const RhsScalar* blB = &blockB[j2*strideB+offsetB];
26254 for(Index k=0; k<peeled_kc; k+=pk)
26256 EIGEN_ASM_COMMENT("begin gebp micro kernel 1pX1");
26258 #define EIGEN_GEBGP_ONESTEP(K) \
26260 EIGEN_ASM_COMMENT("begin step of gebp micro kernel 1pX1"); \
26261 EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
26262 traits.loadLhs(&blA[(0+1*K)*LhsProgress], A0); \
26263 traits.loadRhs(&blB[(0+K)*RhsProgress], B_0); \
26264 traits.madd(A0, B_0, C0, B_0); \
26265 EIGEN_ASM_COMMENT("end step of gebp micro kernel 1pX1"); \
26267 EIGEN_GEBGP_ONESTEP(0);
26268 EIGEN_GEBGP_ONESTEP(1);
26269 EIGEN_GEBGP_ONESTEP(2);
26270 EIGEN_GEBGP_ONESTEP(3);
26271 EIGEN_GEBGP_ONESTEP(4);
26272 EIGEN_GEBGP_ONESTEP(5);
26273 EIGEN_GEBGP_ONESTEP(6);
26274 EIGEN_GEBGP_ONESTEP(7);
26275 blB += pk*RhsProgress;
26276 blA += pk*1*Traits::LhsProgress;
26277 EIGEN_ASM_COMMENT("end gebp micro kernel 1pX1");
26279 for(Index k=peeled_kc; k<depth; k++)
26282 EIGEN_GEBGP_ONESTEP(0);
26283 blB += RhsProgress;
26284 blA += 1*Traits::LhsProgress;
26286 #undef EIGEN_GEBGP_ONESTEP
26288 ResPacket alphav = pset1<ResPacket>(alpha);
26289 R0 = r0.loadPacket(0 * Traits::ResPacketSize);
26290 traits.acc(C0, alphav, R0);
26291 r0.storePacket(0 * Traits::ResPacketSize, R0);
26295 if(peeled_mc1<rows)
26297 for(Index j2=0; j2<packet_cols4; j2+=nr)
26299 for(Index i=peeled_mc1; i<rows; i+=1)
26301 const LhsScalar* blA = &blockA[i*strideA+offsetA];
26303 const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr];
26304 typedef typename unpacket_traits<SResPacket>::half SResPacketHalf;
26305 if ((SwappedTraits::LhsProgress % 4) == 0 &&
26306 (SwappedTraits::LhsProgress <= 8) &&
26307 (SwappedTraits::LhsProgress!=8 || unpacket_traits<SResPacketHalf>::size==nr))
26309 SAccPacket C0, C1, C2, C3;
26310 straits.initAcc(C0);
26311 straits.initAcc(C1);
26312 straits.initAcc(C2);
26313 straits.initAcc(C3);
26314 const Index spk = (std::max)(1,SwappedTraits::LhsProgress/4);
26315 const Index endk = (depth/spk)*spk;
26316 const Index endk4 = (depth/(spk*4))*(spk*4);
26318 for(; k<endk4; k+=4*spk)
26321 SRhsPacket B_0,B_1;
26322 straits.loadLhsUnaligned(blB+0*SwappedTraits::LhsProgress, A0);
26323 straits.loadLhsUnaligned(blB+1*SwappedTraits::LhsProgress, A1);
26324 straits.loadRhsQuad(blA+0*spk, B_0);
26325 straits.loadRhsQuad(blA+1*spk, B_1);
26326 straits.madd(A0,B_0,C0,B_0);
26327 straits.madd(A1,B_1,C1,B_1);
26328 straits.loadLhsUnaligned(blB+2*SwappedTraits::LhsProgress, A0);
26329 straits.loadLhsUnaligned(blB+3*SwappedTraits::LhsProgress, A1);
26330 straits.loadRhsQuad(blA+2*spk, B_0);
26331 straits.loadRhsQuad(blA+3*spk, B_1);
26332 straits.madd(A0,B_0,C2,B_0);
26333 straits.madd(A1,B_1,C3,B_1);
26334 blB += 4*SwappedTraits::LhsProgress;
26337 C0 = padd(padd(C0,C1),padd(C2,C3));
26338 for(; k<endk; k+=spk)
26342 straits.loadLhsUnaligned(blB, A0);
26343 straits.loadRhsQuad(blA, B_0);
26344 straits.madd(A0,B_0,C0,B_0);
26345 blB += SwappedTraits::LhsProgress;
26348 if(SwappedTraits::LhsProgress==8)
26350 typedef typename conditional<SwappedTraits::LhsProgress>=8,typename unpacket_traits<SResPacket>::half,SResPacket>::type SResPacketHalf;
26351 typedef typename conditional<SwappedTraits::LhsProgress>=8,typename unpacket_traits<SLhsPacket>::half,SLhsPacket>::type SLhsPacketHalf;
26352 typedef typename conditional<SwappedTraits::LhsProgress>=8,typename unpacket_traits<SLhsPacket>::half,SRhsPacket>::type SRhsPacketHalf;
26353 typedef typename conditional<SwappedTraits::LhsProgress>=8,typename unpacket_traits<SAccPacket>::half,SAccPacket>::type SAccPacketHalf;
26354 SResPacketHalf R = res.template gatherPacket<SResPacketHalf>(i, j2);
26355 SResPacketHalf alphav = pset1<SResPacketHalf>(alpha);
26360 straits.loadLhsUnaligned(blB, a0);
26361 straits.loadRhs(blA, b0);
26362 SAccPacketHalf c0 = predux_downto4(C0);
26363 straits.madd(a0,b0,c0,b0);
26364 straits.acc(c0, alphav, R);
26368 straits.acc(predux_downto4(C0), alphav, R);
26370 res.scatterPacket(i, j2, R);
26374 SResPacket R = res.template gatherPacket<SResPacket>(i, j2);
26375 SResPacket alphav = pset1<SResPacket>(alpha);
26376 straits.acc(C0, alphav, R);
26377 res.scatterPacket(i, j2, R);
26382 ResScalar C0(0), C1(0), C2(0), C3(0);
26383 for(Index k=0; k<depth; k++)
26386 RhsScalar B_0, B_1;
26390 CJMADD(cj,A0,B_0,C0, B_0);
26391 CJMADD(cj,A0,B_1,C1, B_1);
26394 CJMADD(cj,A0,B_0,C2, B_0);
26395 CJMADD(cj,A0,B_1,C3, B_1);
26398 res(i, j2 + 0) += alpha * C0;
26399 res(i, j2 + 1) += alpha * C1;
26400 res(i, j2 + 2) += alpha * C2;
26401 res(i, j2 + 3) += alpha * C3;
26405 for(Index j2=packet_cols4; j2<cols; j2++)
26407 for(Index i=peeled_mc1; i<rows; i+=1)
26409 const LhsScalar* blA = &blockA[i*strideA+offsetA];
26412 const RhsScalar* blB = &blockB[j2*strideB+offsetB];
26413 for(Index k=0; k<depth; k++)
26415 LhsScalar A0 = blA[k];
26416 RhsScalar B_0 = blB[k];
26417 CJMADD(cj, A0, B_0, C0, B_0);
26419 res(i, j2) += alpha * C0;
26425 template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, bool Conjugate, bool PanelMode>
26426 struct gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, ColMajor, Conjugate, PanelMode>
26428 typedef typename DataMapper::LinearMapper LinearMapper;
26429 EIGEN_DONT_INLINE void operator()(Scalar* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride=0, Index offset=0);
26431 template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, bool Conjugate, bool PanelMode>
26432 EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, ColMajor, Conjugate, PanelMode>
26433 ::operator()(Scalar* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride, Index offset)
26435 typedef typename packet_traits<Scalar>::type Packet;
26436 enum { PacketSize = packet_traits<Scalar>::size };
26437 EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK LHS");
26438 EIGEN_UNUSED_VARIABLE(stride);
26439 EIGEN_UNUSED_VARIABLE(offset);
26440 eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
26441 eigen_assert( ((Pack1%PacketSize)==0 && Pack1<=4*PacketSize) || (Pack1<=4) );
26442 conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
26444 const Index peeled_mc3 = Pack1>=3*PacketSize ? (rows/(3*PacketSize))*(3*PacketSize) : 0;
26445 const Index peeled_mc2 = Pack1>=2*PacketSize ? peeled_mc3+((rows-peeled_mc3)/(2*PacketSize))*(2*PacketSize) : 0;
26446 const Index peeled_mc1 = Pack1>=1*PacketSize ? (rows/(1*PacketSize))*(1*PacketSize) : 0;
26447 const Index peeled_mc0 = Pack2>=1*PacketSize ? peeled_mc1
26448 : Pack2>1 ? (rows/Pack2)*Pack2 : 0;
26450 if(Pack1>=3*PacketSize)
26452 for(; i<peeled_mc3; i+=3*PacketSize)
26454 if(PanelMode) count += (3*PacketSize) * offset;
26455 for(Index k=0; k<depth; k++)
26458 A = lhs.loadPacket(i+0*PacketSize, k);
26459 B = lhs.loadPacket(i+1*PacketSize, k);
26460 C = lhs.loadPacket(i+2*PacketSize, k);
26461 pstore(blockA+count, cj.pconj(A)); count+=PacketSize;
26462 pstore(blockA+count, cj.pconj(B)); count+=PacketSize;
26463 pstore(blockA+count, cj.pconj(C)); count+=PacketSize;
26465 if(PanelMode) count += (3*PacketSize) * (stride-offset-depth);
26468 if(Pack1>=2*PacketSize)
26470 for(; i<peeled_mc2; i+=2*PacketSize)
26472 if(PanelMode) count += (2*PacketSize) * offset;
26473 for(Index k=0; k<depth; k++)
26476 A = lhs.loadPacket(i+0*PacketSize, k);
26477 B = lhs.loadPacket(i+1*PacketSize, k);
26478 pstore(blockA+count, cj.pconj(A)); count+=PacketSize;
26479 pstore(blockA+count, cj.pconj(B)); count+=PacketSize;
26481 if(PanelMode) count += (2*PacketSize) * (stride-offset-depth);
26484 if(Pack1>=1*PacketSize)
26486 for(; i<peeled_mc1; i+=1*PacketSize)
26488 if(PanelMode) count += (1*PacketSize) * offset;
26489 for(Index k=0; k<depth; k++)
26492 A = lhs.loadPacket(i+0*PacketSize, k);
26493 pstore(blockA+count, cj.pconj(A));
26496 if(PanelMode) count += (1*PacketSize) * (stride-offset-depth);
26499 if(Pack2<PacketSize && Pack2>1)
26501 for(; i<peeled_mc0; i+=Pack2)
26503 if(PanelMode) count += Pack2 * offset;
26504 for(Index k=0; k<depth; k++)
26505 for(Index w=0; w<Pack2; w++)
26506 blockA[count++] = cj(lhs(i+w, k));
26507 if(PanelMode) count += Pack2 * (stride-offset-depth);
26512 if(PanelMode) count += offset;
26513 for(Index k=0; k<depth; k++)
26514 blockA[count++] = cj(lhs(i, k));
26515 if(PanelMode) count += (stride-offset-depth);
26518 template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, bool Conjugate, bool PanelMode>
26519 struct gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, RowMajor, Conjugate, PanelMode>
26521 typedef typename DataMapper::LinearMapper LinearMapper;
26522 EIGEN_DONT_INLINE void operator()(Scalar* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride=0, Index offset=0);
26524 template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, bool Conjugate, bool PanelMode>
26525 EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, RowMajor, Conjugate, PanelMode>
26526 ::operator()(Scalar* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride, Index offset)
26528 typedef typename packet_traits<Scalar>::type Packet;
26529 enum { PacketSize = packet_traits<Scalar>::size };
26530 EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK LHS");
26531 EIGEN_UNUSED_VARIABLE(stride);
26532 EIGEN_UNUSED_VARIABLE(offset);
26533 eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
26534 conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
26540 Index remaining_rows = rows-i;
26541 Index peeled_mc = i+(remaining_rows/pack)*pack;
26542 for(; i<peeled_mc; i+=pack)
26544 if(PanelMode) count += pack * offset;
26545 const Index peeled_k = (depth/PacketSize)*PacketSize;
26547 if(pack>=PacketSize)
26549 for(; k<peeled_k; k+=PacketSize)
26551 for (Index m = 0; m < pack; m += PacketSize)
26553 PacketBlock<Packet> kernel;
26554 for (int p = 0; p < PacketSize; ++p) kernel.packet[p] = lhs.loadPacket(i+p+m, k);
26555 ptranspose(kernel);
26556 for (int p = 0; p < PacketSize; ++p) pstore(blockA+count+m+(pack)*p, cj.pconj(kernel.packet[p]));
26558 count += PacketSize*pack;
26561 for(; k<depth; k++)
26564 for(; w<pack-3; w+=4)
26566 Scalar a(cj(lhs(i+w+0, k))),
26567 b(cj(lhs(i+w+1, k))),
26568 c(cj(lhs(i+w+2, k))),
26569 d(cj(lhs(i+w+3, k)));
26570 blockA[count++] = a;
26571 blockA[count++] = b;
26572 blockA[count++] = c;
26573 blockA[count++] = d;
26577 blockA[count++] = cj(lhs(i+w, k));
26579 if(PanelMode) count += pack * (stride-offset-depth);
26581 pack -= PacketSize;
26582 if(pack<Pack2 && (pack+PacketSize)!=Pack2)
26587 if(PanelMode) count += offset;
26588 for(Index k=0; k<depth; k++)
26589 blockA[count++] = cj(lhs(i, k));
26590 if(PanelMode) count += (stride-offset-depth);
26593 template<typename Scalar, typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode>
26594 struct gemm_pack_rhs<Scalar, Index, DataMapper, nr, ColMajor, Conjugate, PanelMode>
26596 typedef typename packet_traits<Scalar>::type Packet;
26597 typedef typename DataMapper::LinearMapper LinearMapper;
26598 enum { PacketSize = packet_traits<Scalar>::size };
26599 EIGEN_DONT_INLINE void operator()(Scalar* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride=0, Index offset=0);
26601 template<typename Scalar, typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode>
26602 EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, DataMapper, nr, ColMajor, Conjugate, PanelMode>
26603 ::operator()(Scalar* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride, Index offset)
26605 EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS COLMAJOR");
26606 EIGEN_UNUSED_VARIABLE(stride);
26607 EIGEN_UNUSED_VARIABLE(offset);
26608 eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
26609 conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
26610 Index packet_cols8 = nr>=8 ? (cols/8) * 8 : 0;
26611 Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0;
26613 const Index peeled_k = (depth/PacketSize)*PacketSize;
26616 for(Index j2=packet_cols8; j2<packet_cols4; j2+=4)
26618 if(PanelMode) count += 4 * offset;
26619 const LinearMapper dm0 = rhs.getLinearMapper(0, j2 + 0);
26620 const LinearMapper dm1 = rhs.getLinearMapper(0, j2 + 1);
26621 const LinearMapper dm2 = rhs.getLinearMapper(0, j2 + 2);
26622 const LinearMapper dm3 = rhs.getLinearMapper(0, j2 + 3);
26624 if((PacketSize%4)==0)
26626 for(; k<peeled_k; k+=PacketSize) {
26627 PacketBlock<Packet,(PacketSize%4)==0?4:PacketSize> kernel;
26628 kernel.packet[0] = dm0.loadPacket(k);
26629 kernel.packet[1%PacketSize] = dm1.loadPacket(k);
26630 kernel.packet[2%PacketSize] = dm2.loadPacket(k);
26631 kernel.packet[3%PacketSize] = dm3.loadPacket(k);
26632 ptranspose(kernel);
26633 pstoreu(blockB+count+0*PacketSize, cj.pconj(kernel.packet[0]));
26634 pstoreu(blockB+count+1*PacketSize, cj.pconj(kernel.packet[1%PacketSize]));
26635 pstoreu(blockB+count+2*PacketSize, cj.pconj(kernel.packet[2%PacketSize]));
26636 pstoreu(blockB+count+3*PacketSize, cj.pconj(kernel.packet[3%PacketSize]));
26637 count+=4*PacketSize;
26640 for(; k<depth; k++)
26642 blockB[count+0] = cj(dm0(k));
26643 blockB[count+1] = cj(dm1(k));
26644 blockB[count+2] = cj(dm2(k));
26645 blockB[count+3] = cj(dm3(k));
26648 if(PanelMode) count += 4 * (stride-offset-depth);
26651 for(Index j2=packet_cols4; j2<cols; ++j2)
26653 if(PanelMode) count += offset;
26654 const LinearMapper dm0 = rhs.getLinearMapper(0, j2);
26655 for(Index k=0; k<depth; k++)
26657 blockB[count] = cj(dm0(k));
26660 if(PanelMode) count += (stride-offset-depth);
26663 template<typename Scalar, typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode>
26664 struct gemm_pack_rhs<Scalar, Index, DataMapper, nr, RowMajor, Conjugate, PanelMode>
26666 typedef typename packet_traits<Scalar>::type Packet;
26667 typedef typename DataMapper::LinearMapper LinearMapper;
26668 enum { PacketSize = packet_traits<Scalar>::size };
26669 EIGEN_DONT_INLINE void operator()(Scalar* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride=0, Index offset=0);
26671 template<typename Scalar, typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode>
26672 EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, DataMapper, nr, RowMajor, Conjugate, PanelMode>
26673 ::operator()(Scalar* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride, Index offset)
26675 EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS ROWMAJOR");
26676 EIGEN_UNUSED_VARIABLE(stride);
26677 EIGEN_UNUSED_VARIABLE(offset);
26678 eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
26679 conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
26680 Index packet_cols8 = nr>=8 ? (cols/8) * 8 : 0;
26681 Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0;
26685 for(Index j2=packet_cols8; j2<packet_cols4; j2+=4)
26687 if(PanelMode) count += 4 * offset;
26688 for(Index k=0; k<depth; k++)
26690 if (PacketSize==4) {
26691 Packet A = rhs.loadPacket(k, j2);
26692 pstoreu(blockB+count, cj.pconj(A));
26693 count += PacketSize;
26695 const LinearMapper dm0 = rhs.getLinearMapper(k, j2);
26696 blockB[count+0] = cj(dm0(0));
26697 blockB[count+1] = cj(dm0(1));
26698 blockB[count+2] = cj(dm0(2));
26699 blockB[count+3] = cj(dm0(3));
26703 if(PanelMode) count += 4 * (stride-offset-depth);
26706 for(Index j2=packet_cols4; j2<cols; ++j2)
26708 if(PanelMode) count += offset;
26709 for(Index k=0; k<depth; k++)
26711 blockB[count] = cj(rhs(k, j2));
26714 if(PanelMode) count += stride-offset-depth;
26718 inline std::ptrdiff_t l1CacheSize()
26720 std::ptrdiff_t l1, l2, l3;
26721 internal::manage_caching_sizes(GetAction, &l1, &l2, &l3);
26724 inline std::ptrdiff_t l2CacheSize()
26726 std::ptrdiff_t l1, l2, l3;
26727 internal::manage_caching_sizes(GetAction, &l1, &l2, &l3);
26730 inline std::ptrdiff_t l3CacheSize()
26732 std::ptrdiff_t l1, l2, l3;
26733 internal::manage_caching_sizes(GetAction, &l1, &l2, &l3);
26736 inline void setCpuCacheSizes(std::ptrdiff_t l1, std::ptrdiff_t l2, std::ptrdiff_t l3)
26738 internal::manage_caching_sizes(SetAction, &l1, &l2, &l3);
26742 // end #include "src/Core/products/GeneralBlockPanelKernel.h"
26743 // #include "src/Core/products/Parallelizer.h"
26744 #ifndef EIGEN_PARALLELIZER_H
26745 #define EIGEN_PARALLELIZER_H
26748 namespace internal {
26749 inline void manage_multi_threading(Action action, int* v)
26751 static EIGEN_UNUSED int m_maxThreads = -1;
26752 if(action==SetAction)
26754 eigen_internal_assert(v!=0);
26757 else if(action==GetAction)
26759 eigen_internal_assert(v!=0);
26760 #ifdef EIGEN_HAS_OPENMP
26764 *v = omp_get_max_threads();
26771 eigen_internal_assert(false);
26775 inline void initParallel()
26778 internal::manage_multi_threading(GetAction, &nbt);
26779 std::ptrdiff_t l1, l2, l3;
26780 internal::manage_caching_sizes(GetAction, &l1, &l2, &l3);
26782 inline int nbThreads()
26785 internal::manage_multi_threading(GetAction, &ret);
26788 inline void setNbThreads(int v)
26790 internal::manage_multi_threading(SetAction, &v);
26792 namespace internal {
26793 template<typename Index> struct GemmParallelInfo
26795 GemmParallelInfo() : sync(-1), users(0), lhs_start(0), lhs_length(0) {}
26796 std::atomic<Index> sync;
26797 std::atomic<int> users;
26801 template<bool Condition, typename Functor, typename Index>
26802 void parallelize_gemm(const Functor& func, Index rows, Index cols, Index depth, bool transpose)
26804 #if !(defined (EIGEN_HAS_OPENMP)) || defined (EIGEN_USE_BLAS)
26805 EIGEN_UNUSED_VARIABLE(depth);
26806 EIGEN_UNUSED_VARIABLE(transpose);
26807 func(0,rows, 0,cols);
26809 Index size = transpose ? rows : cols;
26810 Index pb_max_threads = std::max<Index>(1,size / Functor::Traits::nr);
26811 double work = static_cast<double>(rows) * static_cast<double>(cols) *
26812 static_cast<double>(depth);
26813 double kMinTaskSize = 50000;
26814 pb_max_threads = std::max<Index>(1, std::min<Index>(pb_max_threads, work / kMinTaskSize));
26815 Index threads = std::min<Index>(nbThreads(), pb_max_threads);
26816 if((!Condition) || (threads==1) || (omp_get_num_threads()>1))
26817 return func(0,rows, 0,cols);
26818 Eigen::initParallel();
26819 func.initParallelSession(threads);
26821 std::swap(rows,cols);
26822 ei_declare_aligned_stack_constructed_variable(GemmParallelInfo<Index>,info,threads,0);
26823 #pragma omp parallel num_threads(threads)
26825 Index i = omp_get_thread_num();
26826 Index actual_threads = omp_get_num_threads();
26827 Index blockCols = (cols / actual_threads) & ~Index(0x3);
26828 Index blockRows = (rows / actual_threads);
26829 blockRows = (blockRows/Functor::Traits::mr)*Functor::Traits::mr;
26830 Index r0 = i*blockRows;
26831 Index actualBlockRows = (i+1==actual_threads) ? rows-r0 : blockRows;
26832 Index c0 = i*blockCols;
26833 Index actualBlockCols = (i+1==actual_threads) ? cols-c0 : blockCols;
26834 info[i].lhs_start = r0;
26835 info[i].lhs_length = actualBlockRows;
26836 if(transpose) func(c0, actualBlockCols, 0, rows, info);
26837 else func(0, rows, c0, actualBlockCols, info);
26844 // end #include "src/Core/products/Parallelizer.h"
26845 // #include "src/Core/ProductEvaluators.h"
26846 #ifndef EIGEN_PRODUCTEVALUATORS_H
26847 #define EIGEN_PRODUCTEVALUATORS_H
26849 namespace internal {
26850 template<typename Lhs, typename Rhs, int Options>
26851 struct evaluator<Product<Lhs, Rhs, Options> >
26852 : public product_evaluator<Product<Lhs, Rhs, Options> >
26854 typedef Product<Lhs, Rhs, Options> XprType;
26855 typedef product_evaluator<XprType> Base;
26856 EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {}
26858 template<typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1>
26859 struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
26860 const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
26861 const Product<Lhs, Rhs, DefaultProduct> > >
26863 static const bool value = true;
26865 template<typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1>
26866 struct evaluator<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
26867 const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
26868 const Product<Lhs, Rhs, DefaultProduct> > >
26869 : public evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1,Lhs,product), Rhs, DefaultProduct> >
26871 typedef CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
26872 const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
26873 const Product<Lhs, Rhs, DefaultProduct> > XprType;
26874 typedef evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1,Lhs,product), Rhs, DefaultProduct> > Base;
26875 EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
26876 : Base(xpr.lhs().functor().m_other * xpr.rhs().lhs() * xpr.rhs().rhs())
26879 template<typename Lhs, typename Rhs, int DiagIndex>
26880 struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> >
26881 : public evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> >
26883 typedef Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> XprType;
26884 typedef evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> > Base;
26885 EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
26886 : Base(Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>(
26887 Product<Lhs, Rhs, LazyProduct>(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()),
26891 template< typename Lhs, typename Rhs,
26892 typename LhsShape = typename evaluator_traits<Lhs>::Shape,
26893 typename RhsShape = typename evaluator_traits<Rhs>::Shape,
26894 int ProductType = internal::product_type<Lhs,Rhs>::value>
26895 struct generic_product_impl;
26896 template<typename Lhs, typename Rhs>
26897 struct evaluator_assume_aliasing<Product<Lhs, Rhs, DefaultProduct> > {
26898 static const bool value = true;
26900 template<typename Lhs, typename Rhs, int Options, int ProductTag, typename LhsShape, typename RhsShape>
26901 struct product_evaluator<Product<Lhs, Rhs, Options>, ProductTag, LhsShape, RhsShape>
26902 : public evaluator<typename Product<Lhs, Rhs, Options>::PlainObject>
26904 typedef Product<Lhs, Rhs, Options> XprType;
26905 typedef typename XprType::PlainObject PlainObject;
26906 typedef evaluator<PlainObject> Base;
26908 Flags = Base::Flags | EvalBeforeNestingBit
26910 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
26911 explicit product_evaluator(const XprType& xpr)
26912 : m_result(xpr.rows(), xpr.cols())
26914 ::new (static_cast<Base*>(this)) Base(m_result);
26915 generic_product_impl<Lhs, Rhs, LhsShape, RhsShape, ProductTag>::evalTo(m_result, xpr.lhs(), xpr.rhs());
26918 PlainObject m_result;
26920 template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
26921 struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scalar,Scalar>, Dense2Dense,
26922 typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
26924 typedef Product<Lhs,Rhs,Options> SrcXprType;
26925 static EIGEN_STRONG_INLINE
26926 void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
26928 Index dstRows = src.rows();
26929 Index dstCols = src.cols();
26930 if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
26931 dst.resize(dstRows, dstCols);
26932 generic_product_impl<Lhs, Rhs>::evalTo(dst, src.lhs(), src.rhs());
26935 template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
26936 struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<Scalar,Scalar>, Dense2Dense,
26937 typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
26939 typedef Product<Lhs,Rhs,Options> SrcXprType;
26940 static EIGEN_STRONG_INLINE
26941 void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,Scalar> &)
26943 eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
26944 generic_product_impl<Lhs, Rhs>::addTo(dst, src.lhs(), src.rhs());
26947 template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
26948 struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<Scalar,Scalar>, Dense2Dense,
26949 typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
26951 typedef Product<Lhs,Rhs,Options> SrcXprType;
26952 static EIGEN_STRONG_INLINE
26953 void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,Scalar> &)
26955 eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
26956 generic_product_impl<Lhs, Rhs>::subTo(dst, src.lhs(), src.rhs());
26959 template< typename DstXprType, typename Lhs, typename Rhs, typename AssignFunc, typename Scalar, typename ScalarBis, typename Plain>
26960 struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_product_op<ScalarBis,Scalar>, const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>,Plain>,
26961 const Product<Lhs,Rhs,DefaultProduct> >, AssignFunc, Dense2Dense>
26963 typedef CwiseBinaryOp<internal::scalar_product_op<ScalarBis,Scalar>,
26964 const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>,Plain>,
26965 const Product<Lhs,Rhs,DefaultProduct> > SrcXprType;
26966 static EIGEN_STRONG_INLINE
26967 void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func)
26969 call_assignment_no_alias(dst, (src.lhs().functor().m_other * src.rhs().lhs())*src.rhs().rhs(), func);
26972 template<typename OtherXpr, typename Lhs, typename Rhs>
26973 struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_sum_op<typename OtherXpr::Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, const OtherXpr,
26974 const Product<Lhs,Rhs,DefaultProduct> >, DenseShape > {
26975 static const bool value = true;
26977 template<typename OtherXpr, typename Lhs, typename Rhs>
26978 struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_difference_op<typename OtherXpr::Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, const OtherXpr,
26979 const Product<Lhs,Rhs,DefaultProduct> >, DenseShape > {
26980 static const bool value = true;
26982 template<typename DstXprType, typename OtherXpr, typename ProductType, typename Func1, typename Func2>
26983 struct assignment_from_xpr_op_product
26985 template<typename SrcXprType, typename InitialFunc>
26986 static EIGEN_STRONG_INLINE
26987 void run(DstXprType &dst, const SrcXprType &src, const InitialFunc& )
26989 call_assignment_no_alias(dst, src.lhs(), Func1());
26990 call_assignment_no_alias(dst, src.rhs(), Func2());
26993 #define EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(ASSIGN_OP,BINOP,ASSIGN_OP2) \
26994 template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename DstScalar, typename SrcScalar, typename OtherScalar,typename ProdScalar> \
26995 struct Assignment<DstXprType, CwiseBinaryOp<internal::BINOP<OtherScalar,ProdScalar>, const OtherXpr, \
26996 const Product<Lhs,Rhs,DefaultProduct> >, internal::ASSIGN_OP<DstScalar,SrcScalar>, Dense2Dense> \
26997 : assignment_from_xpr_op_product<DstXprType, OtherXpr, Product<Lhs,Rhs,DefaultProduct>, internal::ASSIGN_OP<DstScalar,OtherScalar>, internal::ASSIGN_OP2<DstScalar,ProdScalar> > \
26999 EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op, scalar_sum_op,add_assign_op);
27000 EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op,scalar_sum_op,add_assign_op);
27001 EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op,scalar_sum_op,sub_assign_op);
27002 EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op, scalar_difference_op,sub_assign_op);
27003 EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op,scalar_difference_op,sub_assign_op);
27004 EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op,scalar_difference_op,add_assign_op);
27005 template<typename Lhs, typename Rhs>
27006 struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>
27008 template<typename Dst>
27009 static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
27011 dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
27013 template<typename Dst>
27014 static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
27016 dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum();
27018 template<typename Dst>
27019 static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
27020 { dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); }
27022 template<typename Dst, typename Lhs, typename Rhs, typename Func>
27023 void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&)
27025 evaluator<Rhs> rhsEval(rhs);
27026 typename nested_eval<Lhs,Rhs::SizeAtCompileTime>::type actual_lhs(lhs);
27027 const Index cols = dst.cols();
27028 for (Index j=0; j<cols; ++j)
27029 func(dst.col(j), rhsEval.coeff(Index(0),j) * actual_lhs);
27031 template<typename Dst, typename Lhs, typename Rhs, typename Func>
27032 void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&)
27034 evaluator<Lhs> lhsEval(lhs);
27035 typename nested_eval<Rhs,Lhs::SizeAtCompileTime>::type actual_rhs(rhs);
27036 const Index rows = dst.rows();
27037 for (Index i=0; i<rows; ++i)
27038 func(dst.row(i), lhsEval.coeff(i,Index(0)) * actual_rhs);
27040 template<typename Lhs, typename Rhs>
27041 struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,OuterProduct>
27043 template<typename T> struct is_row_major : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {};
27044 typedef typename Product<Lhs,Rhs>::Scalar Scalar;
27045 struct set { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } };
27046 struct add { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } };
27047 struct sub { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } };
27050 explicit adds(const Scalar& s) : m_scale(s) {}
27051 template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const {
27052 dst.const_cast_derived() += m_scale * src;
27055 template<typename Dst>
27056 static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
27058 internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major<Dst>());
27060 template<typename Dst>
27061 static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
27063 internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major<Dst>());
27065 template<typename Dst>
27066 static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
27068 internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major<Dst>());
27070 template<typename Dst>
27071 static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
27073 internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major<Dst>());
27076 template<typename Lhs, typename Rhs, typename Derived>
27077 struct generic_product_impl_base
27079 typedef typename Product<Lhs,Rhs>::Scalar Scalar;
27080 template<typename Dst>
27081 static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
27082 { dst.setZero(); scaleAndAddTo(dst, lhs, rhs, Scalar(1)); }
27083 template<typename Dst>
27084 static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
27085 { scaleAndAddTo(dst,lhs, rhs, Scalar(1)); }
27086 template<typename Dst>
27087 static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
27088 { scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); }
27089 template<typename Dst>
27090 static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
27091 { Derived::scaleAndAddTo(dst,lhs,rhs,alpha); }
27093 template<typename Lhs, typename Rhs>
27094 struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct>
27095 : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct> >
27097 typedef typename nested_eval<Lhs,1>::type LhsNested;
27098 typedef typename nested_eval<Rhs,1>::type RhsNested;
27099 typedef typename Product<Lhs,Rhs>::Scalar Scalar;
27100 enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
27101 typedef typename internal::remove_all<typename internal::conditional<int(Side)==OnTheRight,LhsNested,RhsNested>::type>::type MatrixType;
27102 template<typename Dest>
27103 static EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
27105 LhsNested actual_lhs(lhs);
27106 RhsNested actual_rhs(rhs);
27107 internal::gemv_dense_selector<Side,
27108 (int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
27109 bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)
27110 >::run(actual_lhs, actual_rhs, dst, alpha);
27113 template<typename Lhs, typename Rhs>
27114 struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
27116 typedef typename Product<Lhs,Rhs>::Scalar Scalar;
27117 template<typename Dst>
27118 static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
27120 call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op<typename Dst::Scalar,Scalar>());
27122 template<typename Dst>
27123 static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
27125 call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op<typename Dst::Scalar,Scalar>());
27127 template<typename Dst>
27128 static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
27130 call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op<typename Dst::Scalar,Scalar>());
27133 template<typename Lhs, typename Rhs>
27134 struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,LazyCoeffBasedProductMode>
27135 : generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode> {};
27136 template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
27137 struct etor_product_coeff_impl;
27138 template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
27139 struct etor_product_packet_impl;
27140 template<typename Lhs, typename Rhs, int ProductTag>
27141 struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, DenseShape>
27142 : evaluator_base<Product<Lhs, Rhs, LazyProduct> >
27144 typedef Product<Lhs, Rhs, LazyProduct> XprType;
27145 typedef typename XprType::Scalar Scalar;
27146 typedef typename XprType::CoeffReturnType CoeffReturnType;
27147 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
27148 explicit product_evaluator(const XprType& xpr)
27149 : m_lhs(xpr.lhs()),
27153 m_innerDim(xpr.lhs().cols())
27155 EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost);
27156 EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::AddCost);
27157 EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
27159 std::cerr << "LhsOuterStrideBytes= " << LhsOuterStrideBytes << "\n";
27160 std::cerr << "RhsOuterStrideBytes= " << RhsOuterStrideBytes << "\n";
27161 std::cerr << "LhsAlignment= " << LhsAlignment << "\n";
27162 std::cerr << "RhsAlignment= " << RhsAlignment << "\n";
27163 std::cerr << "CanVectorizeLhs= " << CanVectorizeLhs << "\n";
27164 std::cerr << "CanVectorizeRhs= " << CanVectorizeRhs << "\n";
27165 std::cerr << "CanVectorizeInner= " << CanVectorizeInner << "\n";
27166 std::cerr << "EvalToRowMajor= " << EvalToRowMajor << "\n";
27167 std::cerr << "Alignment= " << Alignment << "\n";
27168 std::cerr << "Flags= " << Flags << "\n";
27171 typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
27172 typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
27173 typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
27174 typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
27175 typedef evaluator<LhsNestedCleaned> LhsEtorType;
27176 typedef evaluator<RhsNestedCleaned> RhsEtorType;
27178 RowsAtCompileTime = LhsNestedCleaned::RowsAtCompileTime,
27179 ColsAtCompileTime = RhsNestedCleaned::ColsAtCompileTime,
27180 InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsNestedCleaned::ColsAtCompileTime, RhsNestedCleaned::RowsAtCompileTime),
27181 MaxRowsAtCompileTime = LhsNestedCleaned::MaxRowsAtCompileTime,
27182 MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime
27184 typedef typename find_best_packet<Scalar,RowsAtCompileTime>::type LhsVecPacketType;
27185 typedef typename find_best_packet<Scalar,ColsAtCompileTime>::type RhsVecPacketType;
27187 LhsCoeffReadCost = LhsEtorType::CoeffReadCost,
27188 RhsCoeffReadCost = RhsEtorType::CoeffReadCost,
27189 CoeffReadCost = InnerSize==0 ? NumTraits<Scalar>::ReadCost
27190 : InnerSize == Dynamic ? HugeCost
27191 : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
27192 + (InnerSize - 1) * NumTraits<Scalar>::AddCost,
27193 Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
27194 LhsFlags = LhsEtorType::Flags,
27195 RhsFlags = RhsEtorType::Flags,
27196 LhsRowMajor = LhsFlags & RowMajorBit,
27197 RhsRowMajor = RhsFlags & RowMajorBit,
27198 LhsVecPacketSize = unpacket_traits<LhsVecPacketType>::size,
27199 RhsVecPacketSize = unpacket_traits<RhsVecPacketType>::size,
27200 LhsAlignment = EIGEN_PLAIN_ENUM_MIN(LhsEtorType::Alignment,LhsVecPacketSize*int(sizeof(typename LhsNestedCleaned::Scalar))),
27201 RhsAlignment = EIGEN_PLAIN_ENUM_MIN(RhsEtorType::Alignment,RhsVecPacketSize*int(sizeof(typename RhsNestedCleaned::Scalar))),
27202 SameType = is_same<typename LhsNestedCleaned::Scalar,typename RhsNestedCleaned::Scalar>::value,
27203 CanVectorizeRhs = bool(RhsRowMajor) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime!=1),
27204 CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime!=1),
27205 EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
27206 : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
27207 : (bool(RhsRowMajor) && !CanVectorizeLhs),
27208 Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
27209 | (EvalToRowMajor ? RowMajorBit : 0)
27210 | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0)
27211 | (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0),
27212 LhsOuterStrideBytes = int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)),
27213 RhsOuterStrideBytes = int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)),
27214 Alignment = bool(CanVectorizeLhs) ? (LhsOuterStrideBytes<=0 || (int(LhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,LhsAlignment))!=0 ? 0 : LhsAlignment)
27215 : bool(CanVectorizeRhs) ? (RhsOuterStrideBytes<=0 || (int(RhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,RhsAlignment))!=0 ? 0 : RhsAlignment)
27217 CanVectorizeInner = SameType
27220 && (LhsFlags & RhsFlags & ActualPacketAccessBit)
27221 && (InnerSize % packet_traits<Scalar>::size == 0)
27223 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const
27225 return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum();
27227 EIGEN_DEVICE_FUNC const CoeffReturnType coeff(Index index) const
27229 const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index;
27230 const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? index : 0;
27231 return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum();
27233 template<int LoadMode, typename PacketType>
27234 const PacketType packet(Index row, Index col) const
27237 typedef etor_product_packet_impl<bool(int(Flags)&RowMajorBit) ? RowMajor : ColMajor,
27238 Unroll ? int(InnerSize) : Dynamic,
27239 LhsEtorType, RhsEtorType, PacketType, LoadMode> PacketImpl;
27240 PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);
27243 template<int LoadMode, typename PacketType>
27244 const PacketType packet(Index index) const
27246 const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index;
27247 const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? index : 0;
27248 return packet<LoadMode,PacketType>(row,col);
27251 typename internal::add_const_on_value_type<LhsNested>::type m_lhs;
27252 typename internal::add_const_on_value_type<RhsNested>::type m_rhs;
27253 LhsEtorType m_lhsImpl;
27254 RhsEtorType m_rhsImpl;
27257 template<typename Lhs, typename Rhs>
27258 struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, LazyCoeffBasedProductMode, DenseShape, DenseShape>
27259 : product_evaluator<Product<Lhs, Rhs, LazyProduct>, CoeffBasedProductMode, DenseShape, DenseShape>
27261 typedef Product<Lhs, Rhs, DefaultProduct> XprType;
27262 typedef Product<Lhs, Rhs, LazyProduct> BaseProduct;
27263 typedef product_evaluator<BaseProduct, CoeffBasedProductMode, DenseShape, DenseShape> Base;
27265 Flags = Base::Flags | EvalBeforeNestingBit
27267 EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
27268 : Base(BaseProduct(xpr.lhs(),xpr.rhs()))
27271 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
27272 struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
27274 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
27276 etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
27277 res = pmadd(pset1<Packet>(lhs.coeff(row, Index(UnrollingIndex-1))), rhs.template packet<LoadMode,Packet>(Index(UnrollingIndex-1), col), res);
27280 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
27281 struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
27283 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
27285 etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
27286 res = pmadd(lhs.template packet<LoadMode,Packet>(row, Index(UnrollingIndex-1)), pset1<Packet>(rhs.coeff(Index(UnrollingIndex-1), col)), res);
27289 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
27290 struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode>
27292 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index , Packet &res)
27294 res = pmul(pset1<Packet>(lhs.coeff(row, Index(0))),rhs.template packet<LoadMode,Packet>(Index(0), col));
27297 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
27298 struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode>
27300 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index , Packet &res)
27302 res = pmul(lhs.template packet<LoadMode,Packet>(row, Index(0)), pset1<Packet>(rhs.coeff(Index(0), col)));
27305 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
27306 struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
27308 static EIGEN_STRONG_INLINE void run(Index , Index , const Lhs& , const Rhs& , Index , Packet &res)
27310 res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
27313 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
27314 struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
27316 static EIGEN_STRONG_INLINE void run(Index , Index , const Lhs& , const Rhs& , Index , Packet &res)
27318 res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
27321 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
27322 struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
27324 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
27326 res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
27327 for(Index i = 0; i < innerDim; ++i)
27328 res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode,Packet>(i, col), res);
27331 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
27332 struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
27334 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
27336 res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
27337 for(Index i = 0; i < innerDim; ++i)
27338 res = pmadd(lhs.template packet<LoadMode,Packet>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
27341 template<int Mode, bool LhsIsTriangular,
27342 typename Lhs, bool LhsIsVector,
27343 typename Rhs, bool RhsIsVector>
27344 struct triangular_product_impl;
27345 template<typename Lhs, typename Rhs, int ProductTag>
27346 struct generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag>
27347 : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag> >
27349 typedef typename Product<Lhs,Rhs>::Scalar Scalar;
27350 template<typename Dest>
27351 static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
27353 triangular_product_impl<Lhs::Mode,true,typename Lhs::MatrixType,false,Rhs, Rhs::ColsAtCompileTime==1>
27354 ::run(dst, lhs.nestedExpression(), rhs, alpha);
27357 template<typename Lhs, typename Rhs, int ProductTag>
27358 struct generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag>
27359 : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag> >
27361 typedef typename Product<Lhs,Rhs>::Scalar Scalar;
27362 template<typename Dest>
27363 static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
27365 triangular_product_impl<Rhs::Mode,false,Lhs,Lhs::RowsAtCompileTime==1, typename Rhs::MatrixType, false>::run(dst, lhs, rhs.nestedExpression(), alpha);
27368 template <typename Lhs, int LhsMode, bool LhsIsVector,
27369 typename Rhs, int RhsMode, bool RhsIsVector>
27370 struct selfadjoint_product_impl;
27371 template<typename Lhs, typename Rhs, int ProductTag>
27372 struct generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag>
27373 : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag> >
27375 typedef typename Product<Lhs,Rhs>::Scalar Scalar;
27376 template<typename Dest>
27377 static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
27379 selfadjoint_product_impl<typename Lhs::MatrixType,Lhs::Mode,false,Rhs,0,Rhs::IsVectorAtCompileTime>::run(dst, lhs.nestedExpression(), rhs, alpha);
27382 template<typename Lhs, typename Rhs, int ProductTag>
27383 struct generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag>
27384 : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag> >
27386 typedef typename Product<Lhs,Rhs>::Scalar Scalar;
27387 template<typename Dest>
27388 static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
27390 selfadjoint_product_impl<Lhs,0,Lhs::IsVectorAtCompileTime,typename Rhs::MatrixType,Rhs::Mode,false>::run(dst, lhs, rhs.nestedExpression(), alpha);
27393 template<typename MatrixType, typename DiagonalType, typename Derived, int ProductOrder>
27394 struct diagonal_product_evaluator_base
27395 : evaluator_base<Derived>
27397 typedef typename ScalarBinaryOpTraits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar;
27400 CoeffReadCost = NumTraits<Scalar>::MulCost + evaluator<MatrixType>::CoeffReadCost + evaluator<DiagonalType>::CoeffReadCost,
27401 MatrixFlags = evaluator<MatrixType>::Flags,
27402 DiagFlags = evaluator<DiagonalType>::Flags,
27403 _StorageOrder = MatrixFlags & RowMajorBit ? RowMajor : ColMajor,
27404 _ScalarAccessOnDiag = !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft)
27405 ||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)),
27406 _SameTypes = is_same<typename MatrixType::Scalar, typename DiagonalType::Scalar>::value,
27407 _Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))),
27408 _LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0,
27409 Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0),
27410 Alignment = evaluator<MatrixType>::Alignment
27412 diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag)
27413 : m_diagImpl(diag), m_matImpl(mat)
27415 EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost);
27416 EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
27418 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const
27420 return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx);
27423 template<int LoadMode,typename PacketType>
27424 EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::true_type) const
27426 return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col),
27427 internal::pset1<PacketType>(m_diagImpl.coeff(id)));
27429 template<int LoadMode,typename PacketType>
27430 EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::false_type) const
27433 InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime,
27434 DiagonalPacketLoadMode = EIGEN_PLAIN_ENUM_MIN(LoadMode,((InnerSize%16) == 0) ? int(Aligned16) : int(evaluator<DiagonalType>::Alignment))
27436 return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col),
27437 m_diagImpl.template packet<DiagonalPacketLoadMode,PacketType>(id));
27439 evaluator<DiagonalType> m_diagImpl;
27440 evaluator<MatrixType> m_matImpl;
27442 template<typename Lhs, typename Rhs, int ProductKind, int ProductTag>
27443 struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalShape, DenseShape>
27444 : diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheLeft>
27446 typedef diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheLeft> Base;
27447 using Base::m_diagImpl;
27448 using Base::m_matImpl;
27450 typedef typename Base::Scalar Scalar;
27451 typedef Product<Lhs, Rhs, ProductKind> XprType;
27452 typedef typename XprType::PlainObject PlainObject;
27454 StorageOrder = int(Rhs::Flags) & RowMajorBit ? RowMajor : ColMajor
27456 EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
27457 : Base(xpr.rhs(), xpr.lhs().diagonal())
27460 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
27462 return m_diagImpl.coeff(row) * m_matImpl.coeff(row, col);
27465 template<int LoadMode,typename PacketType>
27466 EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
27468 return this->template packet_impl<LoadMode,PacketType>(row,col, row,
27469 typename internal::conditional<int(StorageOrder)==RowMajor, internal::true_type, internal::false_type>::type());
27471 template<int LoadMode,typename PacketType>
27472 EIGEN_STRONG_INLINE PacketType packet(Index idx) const
27474 return packet<LoadMode,PacketType>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx);
27478 template<typename Lhs, typename Rhs, int ProductKind, int ProductTag>
27479 struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape, DiagonalShape>
27480 : diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheRight>
27482 typedef diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheRight> Base;
27483 using Base::m_diagImpl;
27484 using Base::m_matImpl;
27486 typedef typename Base::Scalar Scalar;
27487 typedef Product<Lhs, Rhs, ProductKind> XprType;
27488 typedef typename XprType::PlainObject PlainObject;
27489 enum { StorageOrder = int(Lhs::Flags) & RowMajorBit ? RowMajor : ColMajor };
27490 EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
27491 : Base(xpr.lhs(), xpr.rhs().diagonal())
27494 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
27496 return m_matImpl.coeff(row, col) * m_diagImpl.coeff(col);
27499 template<int LoadMode,typename PacketType>
27500 EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
27502 return this->template packet_impl<LoadMode,PacketType>(row,col, col,
27503 typename internal::conditional<int(StorageOrder)==ColMajor, internal::true_type, internal::false_type>::type());
27505 template<int LoadMode,typename PacketType>
27506 EIGEN_STRONG_INLINE PacketType packet(Index idx) const
27508 return packet<LoadMode,PacketType>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx);
27512 template<typename ExpressionType, int Side, bool Transposed, typename ExpressionShape>
27513 struct permutation_matrix_product;
27514 template<typename ExpressionType, int Side, bool Transposed>
27515 struct permutation_matrix_product<ExpressionType, Side, Transposed, DenseShape>
27517 typedef typename nested_eval<ExpressionType, 1>::type MatrixType;
27518 typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;
27519 template<typename Dest, typename PermutationType>
27520 static inline void run(Dest& dst, const PermutationType& perm, const ExpressionType& xpr)
27522 MatrixType mat(xpr);
27523 const Index n = Side==OnTheLeft ? mat.rows() : mat.cols();
27524 if(is_same_dense(dst, mat))
27526 Matrix<bool,PermutationType::RowsAtCompileTime,1,0,PermutationType::MaxRowsAtCompileTime> mask(perm.size());
27529 while(r < perm.size())
27531 while(r<perm.size() && mask[r]) r++;
27536 mask.coeffRef(k0) = true;
27537 for(Index k=perm.indices().coeff(k0); k!=k0; k=perm.indices().coeff(k))
27539 Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>(dst, k)
27540 .swap(Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>
27541 (dst,((Side==OnTheLeft) ^ Transposed) ? k0 : kPrev));
27542 mask.coeffRef(k) = true;
27549 for(Index i = 0; i < n; ++i)
27551 Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>
27552 (dst, ((Side==OnTheLeft) ^ Transposed) ? perm.indices().coeff(i) : i)
27554 Block<const MatrixTypeCleaned,Side==OnTheLeft ? 1 : MatrixTypeCleaned::RowsAtCompileTime,Side==OnTheRight ? 1 : MatrixTypeCleaned::ColsAtCompileTime>
27555 (mat, ((Side==OnTheRight) ^ Transposed) ? perm.indices().coeff(i) : i);
27560 template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
27561 struct generic_product_impl<Lhs, Rhs, PermutationShape, MatrixShape, ProductTag>
27563 template<typename Dest>
27564 static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
27566 permutation_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
27569 template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
27570 struct generic_product_impl<Lhs, Rhs, MatrixShape, PermutationShape, ProductTag>
27572 template<typename Dest>
27573 static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
27575 permutation_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
27578 template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
27579 struct generic_product_impl<Inverse<Lhs>, Rhs, PermutationShape, MatrixShape, ProductTag>
27581 template<typename Dest>
27582 static void evalTo(Dest& dst, const Inverse<Lhs>& lhs, const Rhs& rhs)
27584 permutation_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
27587 template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
27588 struct generic_product_impl<Lhs, Inverse<Rhs>, MatrixShape, PermutationShape, ProductTag>
27590 template<typename Dest>
27591 static void evalTo(Dest& dst, const Lhs& lhs, const Inverse<Rhs>& rhs)
27593 permutation_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
27596 template<typename ExpressionType, int Side, bool Transposed, typename ExpressionShape>
27597 struct transposition_matrix_product
27599 typedef typename nested_eval<ExpressionType, 1>::type MatrixType;
27600 typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;
27601 template<typename Dest, typename TranspositionType>
27602 static inline void run(Dest& dst, const TranspositionType& tr, const ExpressionType& xpr)
27604 MatrixType mat(xpr);
27605 typedef typename TranspositionType::StorageIndex StorageIndex;
27606 const Index size = tr.size();
27607 StorageIndex j = 0;
27608 if(!is_same_dense(dst,mat))
27610 for(Index k=(Transposed?size-1:0) ; Transposed?k>=0:k<size ; Transposed?--k:++k)
27611 if(Index(j=tr.coeff(k))!=k)
27613 if(Side==OnTheLeft) dst.row(k).swap(dst.row(j));
27614 else if(Side==OnTheRight) dst.col(k).swap(dst.col(j));
27618 template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
27619 struct generic_product_impl<Lhs, Rhs, TranspositionsShape, MatrixShape, ProductTag>
27621 template<typename Dest>
27622 static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
27624 transposition_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
27627 template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
27628 struct generic_product_impl<Lhs, Rhs, MatrixShape, TranspositionsShape, ProductTag>
27630 template<typename Dest>
27631 static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
27633 transposition_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
27636 template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
27637 struct generic_product_impl<Transpose<Lhs>, Rhs, TranspositionsShape, MatrixShape, ProductTag>
27639 template<typename Dest>
27640 static void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs)
27642 transposition_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
27645 template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
27646 struct generic_product_impl<Lhs, Transpose<Rhs>, MatrixShape, TranspositionsShape, ProductTag>
27648 template<typename Dest>
27649 static void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs)
27651 transposition_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
27657 // end #include "src/Core/ProductEvaluators.h"
27658 // #include "src/Core/products/GeneralMatrixVector.h"
27659 #ifndef EIGEN_GENERAL_MATRIX_VECTOR_H
27660 #define EIGEN_GENERAL_MATRIX_VECTOR_H
27662 namespace internal {
27663 template<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version>
27664 struct general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>
27666 typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
27668 Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable
27669 && int(packet_traits<LhsScalar>::size)==int(packet_traits<RhsScalar>::size),
27670 LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
27671 RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
27672 ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1
27674 typedef typename packet_traits<LhsScalar>::type _LhsPacket;
27675 typedef typename packet_traits<RhsScalar>::type _RhsPacket;
27676 typedef typename packet_traits<ResScalar>::type _ResPacket;
27677 typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
27678 typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
27679 typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
27680 EIGEN_DONT_INLINE static void run(
27681 Index rows, Index cols,
27682 const LhsMapper& lhs,
27683 const RhsMapper& rhs,
27684 ResScalar* res, Index resIncr,
27687 template<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version>
27688 EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>::run(
27689 Index rows, Index cols,
27690 const LhsMapper& lhs,
27691 const RhsMapper& rhs,
27692 ResScalar* res, Index resIncr,
27695 EIGEN_UNUSED_VARIABLE(resIncr);
27696 eigen_internal_assert(resIncr==1);
27697 #ifdef _EIGEN_ACCUMULATE_PACKETS
27698 #error _EIGEN_ACCUMULATE_PACKETS has already been defined
27700 #define _EIGEN_ACCUMULATE_PACKETS(Alignment0,Alignment13,Alignment2) \
27702 padd(pload<ResPacket>(&res[j]), \
27704 padd(pcj.pmul(lhs0.template load<LhsPacket, Alignment0>(j), ptmp0), \
27705 pcj.pmul(lhs1.template load<LhsPacket, Alignment13>(j), ptmp1)), \
27706 padd(pcj.pmul(lhs2.template load<LhsPacket, Alignment2>(j), ptmp2), \
27707 pcj.pmul(lhs3.template load<LhsPacket, Alignment13>(j), ptmp3)) )))
27708 typedef typename LhsMapper::VectorMapper LhsScalars;
27709 conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;
27710 conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj;
27712 alpha = numext::conj(alpha);
27713 enum { AllAligned = 0, EvenAligned, FirstAligned, NoneAligned };
27714 const Index columnsAtOnce = 4;
27715 const Index peels = 2;
27716 const Index LhsPacketAlignedMask = LhsPacketSize-1;
27717 const Index ResPacketAlignedMask = ResPacketSize-1;
27718 const Index size = rows;
27719 const Index lhsStride = lhs.stride();
27720 Index alignedStart = internal::first_default_aligned(res,size);
27721 Index alignedSize = ResPacketSize>1 ? alignedStart + ((size-alignedStart) & ~ResPacketAlignedMask) : 0;
27722 const Index peeledSize = alignedSize - RhsPacketSize*peels - RhsPacketSize + 1;
27723 const Index alignmentStep = LhsPacketSize>1 ? (LhsPacketSize - lhsStride % LhsPacketSize) & LhsPacketAlignedMask : 0;
27724 Index alignmentPattern = alignmentStep==0 ? AllAligned
27725 : alignmentStep==(LhsPacketSize/2) ? EvenAligned
27727 const Index lhsAlignmentOffset = lhs.firstAligned(size);
27728 Index skipColumns = 0;
27729 if( (lhsAlignmentOffset < 0) || (lhsAlignmentOffset == size) || (UIntPtr(res)%sizeof(ResScalar)) )
27733 alignmentPattern = NoneAligned;
27735 else if(LhsPacketSize > 4)
27737 alignmentPattern = NoneAligned;
27739 else if (LhsPacketSize>1)
27741 while (skipColumns<LhsPacketSize &&
27742 alignedStart != ((lhsAlignmentOffset + alignmentStep*skipColumns)%LhsPacketSize))
27744 if (skipColumns==LhsPacketSize)
27746 alignmentPattern = NoneAligned;
27751 skipColumns = (std::min)(skipColumns,cols);
27754 else if(Vectorizable)
27757 alignedSize = size;
27758 alignmentPattern = AllAligned;
27760 const Index offset1 = (FirstAligned && alignmentStep==1)?3:1;
27761 const Index offset3 = (FirstAligned && alignmentStep==1)?1:3;
27762 Index columnBound = ((cols-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns;
27763 for (Index i=skipColumns; i<columnBound; i+=columnsAtOnce)
27765 RhsPacket ptmp0 = pset1<RhsPacket>(alpha*rhs(i, 0)),
27766 ptmp1 = pset1<RhsPacket>(alpha*rhs(i+offset1, 0)),
27767 ptmp2 = pset1<RhsPacket>(alpha*rhs(i+2, 0)),
27768 ptmp3 = pset1<RhsPacket>(alpha*rhs(i+offset3, 0));
27769 const LhsScalars lhs0 = lhs.getVectorMapper(0, i+0), lhs1 = lhs.getVectorMapper(0, i+offset1),
27770 lhs2 = lhs.getVectorMapper(0, i+2), lhs3 = lhs.getVectorMapper(0, i+offset3);
27773 for (Index j=0; j<alignedStart; ++j)
27775 res[j] = cj.pmadd(lhs0(j), pfirst(ptmp0), res[j]);
27776 res[j] = cj.pmadd(lhs1(j), pfirst(ptmp1), res[j]);
27777 res[j] = cj.pmadd(lhs2(j), pfirst(ptmp2), res[j]);
27778 res[j] = cj.pmadd(lhs3(j), pfirst(ptmp3), res[j]);
27780 if (alignedSize>alignedStart)
27782 switch(alignmentPattern)
27785 for (Index j = alignedStart; j<alignedSize; j+=ResPacketSize)
27786 _EIGEN_ACCUMULATE_PACKETS(Aligned,Aligned,Aligned);
27789 for (Index j = alignedStart; j<alignedSize; j+=ResPacketSize)
27790 _EIGEN_ACCUMULATE_PACKETS(Aligned,Unaligned,Aligned);
27794 Index j = alignedStart;
27797 LhsPacket A00, A01, A02, A03, A10, A11, A12, A13;
27799 A01 = lhs1.template load<LhsPacket, Aligned>(alignedStart-1);
27800 A02 = lhs2.template load<LhsPacket, Aligned>(alignedStart-2);
27801 A03 = lhs3.template load<LhsPacket, Aligned>(alignedStart-3);
27802 for (; j<peeledSize; j+=peels*ResPacketSize)
27804 A11 = lhs1.template load<LhsPacket, Aligned>(j-1+LhsPacketSize); palign<1>(A01,A11);
27805 A12 = lhs2.template load<LhsPacket, Aligned>(j-2+LhsPacketSize); palign<2>(A02,A12);
27806 A13 = lhs3.template load<LhsPacket, Aligned>(j-3+LhsPacketSize); palign<3>(A03,A13);
27807 A00 = lhs0.template load<LhsPacket, Aligned>(j);
27808 A10 = lhs0.template load<LhsPacket, Aligned>(j+LhsPacketSize);
27809 T0 = pcj.pmadd(A00, ptmp0, pload<ResPacket>(&res[j]));
27810 T1 = pcj.pmadd(A10, ptmp0, pload<ResPacket>(&res[j+ResPacketSize]));
27811 T0 = pcj.pmadd(A01, ptmp1, T0);
27812 A01 = lhs1.template load<LhsPacket, Aligned>(j-1+2*LhsPacketSize); palign<1>(A11,A01);
27813 T0 = pcj.pmadd(A02, ptmp2, T0);
27814 A02 = lhs2.template load<LhsPacket, Aligned>(j-2+2*LhsPacketSize); palign<2>(A12,A02);
27815 T0 = pcj.pmadd(A03, ptmp3, T0);
27816 pstore(&res[j],T0);
27817 A03 = lhs3.template load<LhsPacket, Aligned>(j-3+2*LhsPacketSize); palign<3>(A13,A03);
27818 T1 = pcj.pmadd(A11, ptmp1, T1);
27819 T1 = pcj.pmadd(A12, ptmp2, T1);
27820 T1 = pcj.pmadd(A13, ptmp3, T1);
27821 pstore(&res[j+ResPacketSize],T1);
27824 for (; j<alignedSize; j+=ResPacketSize)
27825 _EIGEN_ACCUMULATE_PACKETS(Aligned,Unaligned,Unaligned);
27829 for (Index j = alignedStart; j<alignedSize; j+=ResPacketSize)
27830 _EIGEN_ACCUMULATE_PACKETS(Unaligned,Unaligned,Unaligned);
27835 for (Index j=alignedSize; j<size; ++j)
27837 res[j] = cj.pmadd(lhs0(j), pfirst(ptmp0), res[j]);
27838 res[j] = cj.pmadd(lhs1(j), pfirst(ptmp1), res[j]);
27839 res[j] = cj.pmadd(lhs2(j), pfirst(ptmp2), res[j]);
27840 res[j] = cj.pmadd(lhs3(j), pfirst(ptmp3), res[j]);
27844 Index start = columnBound;
27847 for (Index k=start; k<end; ++k)
27849 RhsPacket ptmp0 = pset1<RhsPacket>(alpha*rhs(k, 0));
27850 const LhsScalars lhs0 = lhs.getVectorMapper(0, k);
27853 for (Index j=0; j<alignedStart; ++j)
27854 res[j] += cj.pmul(lhs0(j), pfirst(ptmp0));
27855 if (lhs0.template aligned<LhsPacket>(alignedStart))
27856 for (Index i = alignedStart;i<alignedSize;i+=ResPacketSize)
27857 pstore(&res[i], pcj.pmadd(lhs0.template load<LhsPacket, Aligned>(i), ptmp0, pload<ResPacket>(&res[i])));
27859 for (Index i = alignedStart;i<alignedSize;i+=ResPacketSize)
27860 pstore(&res[i], pcj.pmadd(lhs0.template load<LhsPacket, Unaligned>(i), ptmp0, pload<ResPacket>(&res[i])));
27862 for (Index i=alignedSize; i<size; ++i)
27863 res[i] += cj.pmul(lhs0(i), pfirst(ptmp0));
27873 } while(Vectorizable);
27874 #undef _EIGEN_ACCUMULATE_PACKETS
27876 template<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version>
27877 struct general_matrix_vector_product<Index,LhsScalar,LhsMapper,RowMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>
27879 typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
27881 Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable
27882 && int(packet_traits<LhsScalar>::size)==int(packet_traits<RhsScalar>::size),
27883 LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
27884 RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
27885 ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1
27887 typedef typename packet_traits<LhsScalar>::type _LhsPacket;
27888 typedef typename packet_traits<RhsScalar>::type _RhsPacket;
27889 typedef typename packet_traits<ResScalar>::type _ResPacket;
27890 typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
27891 typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
27892 typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
27893 EIGEN_DONT_INLINE static void run(
27894 Index rows, Index cols,
27895 const LhsMapper& lhs,
27896 const RhsMapper& rhs,
27897 ResScalar* res, Index resIncr,
27900 template<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version>
27901 EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,RowMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>::run(
27902 Index rows, Index cols,
27903 const LhsMapper& lhs,
27904 const RhsMapper& rhs,
27905 ResScalar* res, Index resIncr,
27908 eigen_internal_assert(rhs.stride()==1);
27909 #ifdef _EIGEN_ACCUMULATE_PACKETS
27910 #error _EIGEN_ACCUMULATE_PACKETS has already been defined
27912 #define _EIGEN_ACCUMULATE_PACKETS(Alignment0,Alignment13,Alignment2) {\
27913 RhsPacket b = rhs.getVectorMapper(j, 0).template load<RhsPacket, Aligned>(0); \
27914 ptmp0 = pcj.pmadd(lhs0.template load<LhsPacket, Alignment0>(j), b, ptmp0); \
27915 ptmp1 = pcj.pmadd(lhs1.template load<LhsPacket, Alignment13>(j), b, ptmp1); \
27916 ptmp2 = pcj.pmadd(lhs2.template load<LhsPacket, Alignment2>(j), b, ptmp2); \
27917 ptmp3 = pcj.pmadd(lhs3.template load<LhsPacket, Alignment13>(j), b, ptmp3); }
27918 conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;
27919 conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj;
27920 typedef typename LhsMapper::VectorMapper LhsScalars;
27921 enum { AllAligned=0, EvenAligned=1, FirstAligned=2, NoneAligned=3 };
27922 const Index rowsAtOnce = 4;
27923 const Index peels = 2;
27924 const Index RhsPacketAlignedMask = RhsPacketSize-1;
27925 const Index LhsPacketAlignedMask = LhsPacketSize-1;
27926 const Index depth = cols;
27927 const Index lhsStride = lhs.stride();
27928 Index alignedStart = rhs.firstAligned(depth);
27929 Index alignedSize = RhsPacketSize>1 ? alignedStart + ((depth-alignedStart) & ~RhsPacketAlignedMask) : 0;
27930 const Index peeledSize = alignedSize - RhsPacketSize*peels - RhsPacketSize + 1;
27931 const Index alignmentStep = LhsPacketSize>1 ? (LhsPacketSize - lhsStride % LhsPacketSize) & LhsPacketAlignedMask : 0;
27932 Index alignmentPattern = alignmentStep==0 ? AllAligned
27933 : alignmentStep==(LhsPacketSize/2) ? EvenAligned
27935 const Index lhsAlignmentOffset = lhs.firstAligned(depth);
27936 const Index rhsAlignmentOffset = rhs.firstAligned(rows);
27937 Index skipRows = 0;
27938 if( (sizeof(LhsScalar)!=sizeof(RhsScalar)) ||
27939 (lhsAlignmentOffset < 0) || (lhsAlignmentOffset == depth) ||
27940 (rhsAlignmentOffset < 0) || (rhsAlignmentOffset == rows) )
27944 alignmentPattern = NoneAligned;
27946 else if(LhsPacketSize > 4)
27948 alignmentPattern = NoneAligned;
27950 else if (LhsPacketSize>1)
27952 while (skipRows<LhsPacketSize &&
27953 alignedStart != ((lhsAlignmentOffset + alignmentStep*skipRows)%LhsPacketSize))
27955 if (skipRows==LhsPacketSize)
27957 alignmentPattern = NoneAligned;
27962 skipRows = (std::min)(skipRows,Index(rows));
27965 else if(Vectorizable)
27968 alignedSize = depth;
27969 alignmentPattern = AllAligned;
27971 const Index offset1 = (FirstAligned && alignmentStep==1)?3:1;
27972 const Index offset3 = (FirstAligned && alignmentStep==1)?1:3;
27973 Index rowBound = ((rows-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows;
27974 for (Index i=skipRows; i<rowBound; i+=rowsAtOnce)
27976 EIGEN_ALIGN_MAX ResScalar tmp0 = ResScalar(0);
27977 ResScalar tmp1 = ResScalar(0), tmp2 = ResScalar(0), tmp3 = ResScalar(0);
27978 const LhsScalars lhs0 = lhs.getVectorMapper(i+0, 0), lhs1 = lhs.getVectorMapper(i+offset1, 0),
27979 lhs2 = lhs.getVectorMapper(i+2, 0), lhs3 = lhs.getVectorMapper(i+offset3, 0);
27982 ResPacket ptmp0 = pset1<ResPacket>(ResScalar(0)), ptmp1 = pset1<ResPacket>(ResScalar(0)),
27983 ptmp2 = pset1<ResPacket>(ResScalar(0)), ptmp3 = pset1<ResPacket>(ResScalar(0));
27984 for (Index j=0; j<alignedStart; ++j)
27986 RhsScalar b = rhs(j, 0);
27987 tmp0 += cj.pmul(lhs0(j),b); tmp1 += cj.pmul(lhs1(j),b);
27988 tmp2 += cj.pmul(lhs2(j),b); tmp3 += cj.pmul(lhs3(j),b);
27990 if (alignedSize>alignedStart)
27992 switch(alignmentPattern)
27995 for (Index j = alignedStart; j<alignedSize; j+=RhsPacketSize)
27996 _EIGEN_ACCUMULATE_PACKETS(Aligned,Aligned,Aligned);
27999 for (Index j = alignedStart; j<alignedSize; j+=RhsPacketSize)
28000 _EIGEN_ACCUMULATE_PACKETS(Aligned,Unaligned,Aligned);
28004 Index j = alignedStart;
28007 LhsPacket A01, A02, A03, A11, A12, A13;
28008 A01 = lhs1.template load<LhsPacket, Aligned>(alignedStart-1);
28009 A02 = lhs2.template load<LhsPacket, Aligned>(alignedStart-2);
28010 A03 = lhs3.template load<LhsPacket, Aligned>(alignedStart-3);
28011 for (; j<peeledSize; j+=peels*RhsPacketSize)
28013 RhsPacket b = rhs.getVectorMapper(j, 0).template load<RhsPacket, Aligned>(0);
28014 A11 = lhs1.template load<LhsPacket, Aligned>(j-1+LhsPacketSize); palign<1>(A01,A11);
28015 A12 = lhs2.template load<LhsPacket, Aligned>(j-2+LhsPacketSize); palign<2>(A02,A12);
28016 A13 = lhs3.template load<LhsPacket, Aligned>(j-3+LhsPacketSize); palign<3>(A03,A13);
28017 ptmp0 = pcj.pmadd(lhs0.template load<LhsPacket, Aligned>(j), b, ptmp0);
28018 ptmp1 = pcj.pmadd(A01, b, ptmp1);
28019 A01 = lhs1.template load<LhsPacket, Aligned>(j-1+2*LhsPacketSize); palign<1>(A11,A01);
28020 ptmp2 = pcj.pmadd(A02, b, ptmp2);
28021 A02 = lhs2.template load<LhsPacket, Aligned>(j-2+2*LhsPacketSize); palign<2>(A12,A02);
28022 ptmp3 = pcj.pmadd(A03, b, ptmp3);
28023 A03 = lhs3.template load<LhsPacket, Aligned>(j-3+2*LhsPacketSize); palign<3>(A13,A03);
28024 b = rhs.getVectorMapper(j+RhsPacketSize, 0).template load<RhsPacket, Aligned>(0);
28025 ptmp0 = pcj.pmadd(lhs0.template load<LhsPacket, Aligned>(j+LhsPacketSize), b, ptmp0);
28026 ptmp1 = pcj.pmadd(A11, b, ptmp1);
28027 ptmp2 = pcj.pmadd(A12, b, ptmp2);
28028 ptmp3 = pcj.pmadd(A13, b, ptmp3);
28031 for (; j<alignedSize; j+=RhsPacketSize)
28032 _EIGEN_ACCUMULATE_PACKETS(Aligned,Unaligned,Unaligned);
28036 for (Index j = alignedStart; j<alignedSize; j+=RhsPacketSize)
28037 _EIGEN_ACCUMULATE_PACKETS(Unaligned,Unaligned,Unaligned);
28040 tmp0 += predux(ptmp0);
28041 tmp1 += predux(ptmp1);
28042 tmp2 += predux(ptmp2);
28043 tmp3 += predux(ptmp3);
28046 for (Index j=alignedSize; j<depth; ++j)
28048 RhsScalar b = rhs(j, 0);
28049 tmp0 += cj.pmul(lhs0(j),b); tmp1 += cj.pmul(lhs1(j),b);
28050 tmp2 += cj.pmul(lhs2(j),b); tmp3 += cj.pmul(lhs3(j),b);
28052 res[i*resIncr] += alpha*tmp0;
28053 res[(i+offset1)*resIncr] += alpha*tmp1;
28054 res[(i+2)*resIncr] += alpha*tmp2;
28055 res[(i+offset3)*resIncr] += alpha*tmp3;
28058 Index start = rowBound;
28061 for (Index i=start; i<end; ++i)
28063 EIGEN_ALIGN_MAX ResScalar tmp0 = ResScalar(0);
28064 ResPacket ptmp0 = pset1<ResPacket>(tmp0);
28065 const LhsScalars lhs0 = lhs.getVectorMapper(i, 0);
28066 for (Index j=0; j<alignedStart; ++j)
28067 tmp0 += cj.pmul(lhs0(j), rhs(j, 0));
28068 if (alignedSize>alignedStart)
28070 if (lhs0.template aligned<LhsPacket>(alignedStart))
28071 for (Index j = alignedStart;j<alignedSize;j+=RhsPacketSize)
28072 ptmp0 = pcj.pmadd(lhs0.template load<LhsPacket, Aligned>(j), rhs.getVectorMapper(j, 0).template load<RhsPacket, Aligned>(0), ptmp0);
28074 for (Index j = alignedStart;j<alignedSize;j+=RhsPacketSize)
28075 ptmp0 = pcj.pmadd(lhs0.template load<LhsPacket, Unaligned>(j), rhs.getVectorMapper(j, 0).template load<RhsPacket, Aligned>(0), ptmp0);
28076 tmp0 += predux(ptmp0);
28078 for (Index j=alignedSize; j<depth; ++j)
28079 tmp0 += cj.pmul(lhs0(j), rhs(j, 0));
28080 res[i*resIncr] += alpha*tmp0;
28090 } while(Vectorizable);
28091 #undef _EIGEN_ACCUMULATE_PACKETS
28096 // end #include "src/Core/products/GeneralMatrixVector.h"
28097 // #include "src/Core/products/GeneralMatrixMatrix.h"
28098 #ifndef EIGEN_GENERAL_MATRIX_MATRIX_H
28099 #define EIGEN_GENERAL_MATRIX_MATRIX_H
28101 namespace internal {
28102 template<typename _LhsScalar, typename _RhsScalar> class level3_blocking;
28105 typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
28106 typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs>
28107 struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor>
28109 typedef gebp_traits<RhsScalar,LhsScalar> Traits;
28110 typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
28111 static EIGEN_STRONG_INLINE void run(
28112 Index rows, Index cols, Index depth,
28113 const LhsScalar* lhs, Index lhsStride,
28114 const RhsScalar* rhs, Index rhsStride,
28115 ResScalar* res, Index resStride,
28117 level3_blocking<RhsScalar,LhsScalar>& blocking,
28118 GemmParallelInfo<Index>* info = 0)
28120 general_matrix_matrix_product<Index,
28121 RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs,
28122 LhsScalar, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs,
28124 ::run(cols,rows,depth,rhs,rhsStride,lhs,lhsStride,res,resStride,alpha,blocking,info);
28129 typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
28130 typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs>
28131 struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor>
28133 typedef gebp_traits<LhsScalar,RhsScalar> Traits;
28134 typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
28135 static void run(Index rows, Index cols, Index depth,
28136 const LhsScalar* _lhs, Index lhsStride,
28137 const RhsScalar* _rhs, Index rhsStride,
28138 ResScalar* _res, Index resStride,
28140 level3_blocking<LhsScalar,RhsScalar>& blocking,
28141 GemmParallelInfo<Index>* info = 0)
28143 typedef const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> LhsMapper;
28144 typedef const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> RhsMapper;
28145 typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper;
28146 LhsMapper lhs(_lhs,lhsStride);
28147 RhsMapper rhs(_rhs,rhsStride);
28148 ResMapper res(_res, resStride);
28149 Index kc = blocking.kc();
28150 Index mc = (std::min)(rows,blocking.mc());
28151 Index nc = (std::min)(cols,blocking.nc());
28152 gemm_pack_lhs<LhsScalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
28153 gemm_pack_rhs<RhsScalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;
28154 gebp_kernel<LhsScalar, RhsScalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp;
28155 #ifdef EIGEN_HAS_OPENMP
28158 int tid = omp_get_thread_num();
28159 int threads = omp_get_num_threads();
28160 LhsScalar* blockA = blocking.blockA();
28161 eigen_internal_assert(blockA!=0);
28162 std::size_t sizeB = kc*nc;
28163 ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, 0);
28164 for(Index k=0; k<depth; k+=kc)
28166 const Index actual_kc = (std::min)(k+kc,depth)-k;
28167 pack_rhs(blockB, rhs.getSubMapper(k,0), actual_kc, nc);
28168 while(info[tid].users!=0) {}
28169 info[tid].users += threads;
28170 pack_lhs(blockA+info[tid].lhs_start*actual_kc, lhs.getSubMapper(info[tid].lhs_start,k), actual_kc, info[tid].lhs_length);
28171 info[tid].sync = k;
28172 for(int shift=0; shift<threads; ++shift)
28174 int i = (tid+shift)%threads;
28176 while(info[i].sync!=k) {
28179 gebp(res.getSubMapper(info[i].lhs_start, 0), blockA+info[i].lhs_start*actual_kc, blockB, info[i].lhs_length, actual_kc, nc, alpha);
28181 for(Index j=nc; j<cols; j+=nc)
28183 const Index actual_nc = (std::min)(j+nc,cols)-j;
28184 pack_rhs(blockB, rhs.getSubMapper(k,j), actual_kc, actual_nc);
28185 gebp(res.getSubMapper(0, j), blockA, blockB, rows, actual_kc, actual_nc, alpha);
28187 for(Index i=0; i<threads; ++i)
28188 info[i].users -= 1;
28194 EIGEN_UNUSED_VARIABLE(info);
28195 std::size_t sizeA = kc*mc;
28196 std::size_t sizeB = kc*nc;
28197 ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, blocking.blockA());
28198 ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, blocking.blockB());
28199 const bool pack_rhs_once = mc!=rows && kc==depth && nc==cols;
28200 for(Index i2=0; i2<rows; i2+=mc)
28202 const Index actual_mc = (std::min)(i2+mc,rows)-i2;
28203 for(Index k2=0; k2<depth; k2+=kc)
28205 const Index actual_kc = (std::min)(k2+kc,depth)-k2;
28206 pack_lhs(blockA, lhs.getSubMapper(i2,k2), actual_kc, actual_mc);
28207 for(Index j2=0; j2<cols; j2+=nc)
28209 const Index actual_nc = (std::min)(j2+nc,cols)-j2;
28210 if((!pack_rhs_once) || i2==0)
28211 pack_rhs(blockB, rhs.getSubMapper(k2,j2), actual_kc, actual_nc);
28212 gebp(res.getSubMapper(i2, j2), blockA, blockB, actual_mc, actual_kc, actual_nc, alpha);
28219 template<typename Scalar, typename Index, typename Gemm, typename Lhs, typename Rhs, typename Dest, typename BlockingType>
28220 struct gemm_functor
28222 gemm_functor(const Lhs& lhs, const Rhs& rhs, Dest& dest, const Scalar& actualAlpha, BlockingType& blocking)
28223 : m_lhs(lhs), m_rhs(rhs), m_dest(dest), m_actualAlpha(actualAlpha), m_blocking(blocking)
28225 void initParallelSession(Index num_threads) const
28227 m_blocking.initParallel(m_lhs.rows(), m_rhs.cols(), m_lhs.cols(), num_threads);
28228 m_blocking.allocateA();
28230 void operator() (Index row, Index rows, Index col=0, Index cols=-1, GemmParallelInfo<Index>* info=0) const
28233 cols = m_rhs.cols();
28234 Gemm::run(rows, cols, m_lhs.cols(),
28235 &m_lhs.coeffRef(row,0), m_lhs.outerStride(),
28236 &m_rhs.coeffRef(0,col), m_rhs.outerStride(),
28237 (Scalar*)&(m_dest.coeffRef(row,col)), m_dest.outerStride(),
28238 m_actualAlpha, m_blocking, info);
28240 typedef typename Gemm::Traits Traits;
28245 Scalar m_actualAlpha;
28246 BlockingType& m_blocking;
28248 template<int StorageOrder, typename LhsScalar, typename RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor=1,
28249 bool FiniteAtCompileTime = MaxRows!=Dynamic && MaxCols!=Dynamic && MaxDepth != Dynamic> class gemm_blocking_space;
28250 template<typename _LhsScalar, typename _RhsScalar>
28251 class level3_blocking
28253 typedef _LhsScalar LhsScalar;
28254 typedef _RhsScalar RhsScalar;
28256 LhsScalar* m_blockA;
28257 RhsScalar* m_blockB;
28263 : m_blockA(0), m_blockB(0), m_mc(0), m_nc(0), m_kc(0)
28265 inline Index mc() const { return m_mc; }
28266 inline Index nc() const { return m_nc; }
28267 inline Index kc() const { return m_kc; }
28268 inline LhsScalar* blockA() { return m_blockA; }
28269 inline RhsScalar* blockB() { return m_blockB; }
28271 template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor>
28272 class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, KcFactor, true >
28273 : public level3_blocking<
28274 typename conditional<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::type,
28275 typename conditional<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::type>
28278 Transpose = StorageOrder==RowMajor,
28279 ActualRows = Transpose ? MaxCols : MaxRows,
28280 ActualCols = Transpose ? MaxRows : MaxCols
28282 typedef typename conditional<Transpose,_RhsScalar,_LhsScalar>::type LhsScalar;
28283 typedef typename conditional<Transpose,_LhsScalar,_RhsScalar>::type RhsScalar;
28284 typedef gebp_traits<LhsScalar,RhsScalar> Traits;
28286 SizeA = ActualRows * MaxDepth,
28287 SizeB = ActualCols * MaxDepth
28289 #if EIGEN_MAX_STATIC_ALIGN_BYTES >= EIGEN_DEFAULT_ALIGN_BYTES
28290 EIGEN_ALIGN_MAX LhsScalar m_staticA[SizeA];
28291 EIGEN_ALIGN_MAX RhsScalar m_staticB[SizeB];
28293 EIGEN_ALIGN_MAX char m_staticA[SizeA * sizeof(LhsScalar) + EIGEN_DEFAULT_ALIGN_BYTES-1];
28294 EIGEN_ALIGN_MAX char m_staticB[SizeB * sizeof(RhsScalar) + EIGEN_DEFAULT_ALIGN_BYTES-1];
28297 gemm_blocking_space(Index , Index , Index , Index , bool )
28299 this->m_mc = ActualRows;
28300 this->m_nc = ActualCols;
28301 this->m_kc = MaxDepth;
28302 #if EIGEN_MAX_STATIC_ALIGN_BYTES >= EIGEN_DEFAULT_ALIGN_BYTES
28303 this->m_blockA = m_staticA;
28304 this->m_blockB = m_staticB;
28306 this->m_blockA = reinterpret_cast<LhsScalar*>((internal::UIntPtr(m_staticA) + (EIGEN_DEFAULT_ALIGN_BYTES-1)) & ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1));
28307 this->m_blockB = reinterpret_cast<RhsScalar*>((internal::UIntPtr(m_staticB) + (EIGEN_DEFAULT_ALIGN_BYTES-1)) & ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1));
28310 void initParallel(Index, Index, Index, Index)
28312 inline void allocateA() {}
28313 inline void allocateB() {}
28314 inline void allocateAll() {}
28316 template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor>
28317 class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, KcFactor, false>
28318 : public level3_blocking<
28319 typename conditional<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::type,
28320 typename conditional<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::type>
28323 Transpose = StorageOrder==RowMajor
28325 typedef typename conditional<Transpose,_RhsScalar,_LhsScalar>::type LhsScalar;
28326 typedef typename conditional<Transpose,_LhsScalar,_RhsScalar>::type RhsScalar;
28327 typedef gebp_traits<LhsScalar,RhsScalar> Traits;
28331 gemm_blocking_space(Index rows, Index cols, Index depth, Index num_threads, bool l3_blocking)
28333 this->m_mc = Transpose ? cols : rows;
28334 this->m_nc = Transpose ? rows : cols;
28335 this->m_kc = depth;
28338 computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, this->m_mc, this->m_nc, num_threads);
28342 Index n = this->m_nc;
28343 computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, this->m_mc, n, num_threads);
28345 m_sizeA = this->m_mc * this->m_kc;
28346 m_sizeB = this->m_kc * this->m_nc;
28348 void initParallel(Index rows, Index cols, Index depth, Index num_threads)
28350 this->m_mc = Transpose ? cols : rows;
28351 this->m_nc = Transpose ? rows : cols;
28352 this->m_kc = depth;
28353 eigen_internal_assert(this->m_blockA==0 && this->m_blockB==0);
28354 Index m = this->m_mc;
28355 computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, m, this->m_nc, num_threads);
28356 m_sizeA = this->m_mc * this->m_kc;
28357 m_sizeB = this->m_kc * this->m_nc;
28361 if(this->m_blockA==0)
28362 this->m_blockA = aligned_new<LhsScalar>(m_sizeA);
28366 if(this->m_blockB==0)
28367 this->m_blockB = aligned_new<RhsScalar>(m_sizeB);
28374 ~gemm_blocking_space()
28376 aligned_delete(this->m_blockA, m_sizeA);
28377 aligned_delete(this->m_blockB, m_sizeB);
28381 namespace internal {
28382 template<typename Lhs, typename Rhs>
28383 struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
28384 : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct> >
28386 typedef typename Product<Lhs,Rhs>::Scalar Scalar;
28387 typedef typename Lhs::Scalar LhsScalar;
28388 typedef typename Rhs::Scalar RhsScalar;
28389 typedef internal::blas_traits<Lhs> LhsBlasTraits;
28390 typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
28391 typedef typename internal::remove_all<ActualLhsType>::type ActualLhsTypeCleaned;
28392 typedef internal::blas_traits<Rhs> RhsBlasTraits;
28393 typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
28394 typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
28396 MaxDepthAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(Lhs::MaxColsAtCompileTime,Rhs::MaxRowsAtCompileTime)
28398 typedef generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode> lazyproduct;
28399 template<typename Dst>
28400 static void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
28402 if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0)
28403 lazyproduct::evalTo(dst, lhs, rhs);
28407 scaleAndAddTo(dst, lhs, rhs, Scalar(1));
28410 template<typename Dst>
28411 static void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
28413 if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0)
28414 lazyproduct::addTo(dst, lhs, rhs);
28416 scaleAndAddTo(dst,lhs, rhs, Scalar(1));
28418 template<typename Dst>
28419 static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
28421 if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0)
28422 lazyproduct::subTo(dst, lhs, rhs);
28424 scaleAndAddTo(dst, lhs, rhs, Scalar(-1));
28426 template<typename Dest>
28427 static void scaleAndAddTo(Dest& dst, const Lhs& a_lhs, const Rhs& a_rhs, const Scalar& alpha)
28429 eigen_assert(dst.rows()==a_lhs.rows() && dst.cols()==a_rhs.cols());
28430 if(a_lhs.cols()==0 || a_lhs.rows()==0 || a_rhs.cols()==0)
28432 typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
28433 typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);
28434 Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs)
28435 * RhsBlasTraits::extractScalarFactor(a_rhs);
28436 typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,LhsScalar,RhsScalar,
28437 Dest::MaxRowsAtCompileTime,Dest::MaxColsAtCompileTime,MaxDepthAtCompileTime> BlockingType;
28438 typedef internal::gemm_functor<
28440 internal::general_matrix_matrix_product<
28442 LhsScalar, (ActualLhsTypeCleaned::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(LhsBlasTraits::NeedToConjugate),
28443 RhsScalar, (ActualRhsTypeCleaned::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(RhsBlasTraits::NeedToConjugate),
28444 (Dest::Flags&RowMajorBit) ? RowMajor : ColMajor>,
28445 ActualLhsTypeCleaned, ActualRhsTypeCleaned, Dest, BlockingType> GemmFunctor;
28446 BlockingType blocking(dst.rows(), dst.cols(), lhs.cols(), 1, true);
28447 internal::parallelize_gemm<(Dest::MaxRowsAtCompileTime>32 || Dest::MaxRowsAtCompileTime==Dynamic)>
28448 (GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), a_lhs.rows(), a_rhs.cols(), a_lhs.cols(), Dest::Flags&RowMajorBit);
28454 // end #include "src/Core/products/GeneralMatrixMatrix.h"
28455 // #include "src/Core/VectorwiseOp.h"
28456 #ifndef EIGEN_PARTIAL_REDUX_H
28457 #define EIGEN_PARTIAL_REDUX_H
28459 template< typename MatrixType, typename MemberOp, int Direction>
28460 class PartialReduxExpr;
28461 namespace internal {
28462 template<typename MatrixType, typename MemberOp, int Direction>
28463 struct traits<PartialReduxExpr<MatrixType, MemberOp, Direction> >
28464 : traits<MatrixType>
28466 typedef typename MemberOp::result_type Scalar;
28467 typedef typename traits<MatrixType>::StorageKind StorageKind;
28468 typedef typename traits<MatrixType>::XprKind XprKind;
28469 typedef typename MatrixType::Scalar InputScalar;
28471 RowsAtCompileTime = Direction==Vertical ? 1 : MatrixType::RowsAtCompileTime,
28472 ColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::ColsAtCompileTime,
28473 MaxRowsAtCompileTime = Direction==Vertical ? 1 : MatrixType::MaxRowsAtCompileTime,
28474 MaxColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::MaxColsAtCompileTime,
28475 Flags = RowsAtCompileTime == 1 ? RowMajorBit : 0,
28476 TraversalSize = Direction==Vertical ? MatrixType::RowsAtCompileTime : MatrixType::ColsAtCompileTime
28480 template< typename MatrixType, typename MemberOp, int Direction>
28481 class PartialReduxExpr : public internal::dense_xpr_base< PartialReduxExpr<MatrixType, MemberOp, Direction> >::type,
28482 internal::no_assignment_operator
28485 typedef typename internal::dense_xpr_base<PartialReduxExpr>::type Base;
28486 EIGEN_DENSE_PUBLIC_INTERFACE(PartialReduxExpr)
28488 explicit PartialReduxExpr(const MatrixType& mat, const MemberOp& func = MemberOp())
28489 : m_matrix(mat), m_functor(func) {}
28491 Index rows() const { return (Direction==Vertical ? 1 : m_matrix.rows()); }
28493 Index cols() const { return (Direction==Horizontal ? 1 : m_matrix.cols()); }
28495 typename MatrixType::Nested nestedExpression() const { return m_matrix; }
28497 const MemberOp& functor() const { return m_functor; }
28499 typename MatrixType::Nested m_matrix;
28500 const MemberOp m_functor;
28502 #define EIGEN_MEMBER_FUNCTOR(MEMBER,COST) \
28503 template <typename ResultType> \
28504 struct member_##MEMBER { \
28505 EIGEN_EMPTY_STRUCT_CTOR(member_##MEMBER) \
28506 typedef ResultType result_type; \
28507 template<typename Scalar, int Size> struct Cost \
28508 { enum { value = COST }; }; \
28509 template<typename XprType> \
28510 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
28511 ResultType operator()(const XprType& mat) const \
28512 { return mat.MEMBER(); } \
28514 namespace internal {
28515 EIGEN_MEMBER_FUNCTOR(squaredNorm, Size * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost);
28516 EIGEN_MEMBER_FUNCTOR(norm, (Size+5) * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost);
28517 EIGEN_MEMBER_FUNCTOR(stableNorm, (Size+5) * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost);
28518 EIGEN_MEMBER_FUNCTOR(blueNorm, (Size+5) * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost);
28519 EIGEN_MEMBER_FUNCTOR(hypotNorm, (Size-1) * functor_traits<scalar_hypot_op<Scalar> >::Cost );
28520 EIGEN_MEMBER_FUNCTOR(sum, (Size-1)*NumTraits<Scalar>::AddCost);
28521 EIGEN_MEMBER_FUNCTOR(mean, (Size-1)*NumTraits<Scalar>::AddCost + NumTraits<Scalar>::MulCost);
28522 EIGEN_MEMBER_FUNCTOR(minCoeff, (Size-1)*NumTraits<Scalar>::AddCost);
28523 EIGEN_MEMBER_FUNCTOR(maxCoeff, (Size-1)*NumTraits<Scalar>::AddCost);
28524 EIGEN_MEMBER_FUNCTOR(all, (Size-1)*NumTraits<Scalar>::AddCost);
28525 EIGEN_MEMBER_FUNCTOR(any, (Size-1)*NumTraits<Scalar>::AddCost);
28526 EIGEN_MEMBER_FUNCTOR(count, (Size-1)*NumTraits<Scalar>::AddCost);
28527 EIGEN_MEMBER_FUNCTOR(prod, (Size-1)*NumTraits<Scalar>::MulCost);
28528 template <int p, typename ResultType>
28529 struct member_lpnorm {
28530 typedef ResultType result_type;
28531 template<typename Scalar, int Size> struct Cost
28532 { enum { value = (Size+5) * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost }; };
28533 EIGEN_DEVICE_FUNC member_lpnorm() {}
28534 template<typename XprType>
28535 EIGEN_DEVICE_FUNC inline ResultType operator()(const XprType& mat) const
28536 { return mat.template lpNorm<p>(); }
28538 template <typename BinaryOp, typename Scalar>
28539 struct member_redux {
28540 typedef typename result_of<
28541 BinaryOp(const Scalar&,const Scalar&)
28542 >::type result_type;
28543 template<typename _Scalar, int Size> struct Cost
28544 { enum { value = (Size-1) * functor_traits<BinaryOp>::Cost }; };
28545 EIGEN_DEVICE_FUNC explicit member_redux(const BinaryOp func) : m_functor(func) {}
28546 template<typename Derived>
28547 EIGEN_DEVICE_FUNC inline result_type operator()(const DenseBase<Derived>& mat) const
28548 { return mat.redux(m_functor); }
28549 const BinaryOp m_functor;
28552 template<typename ExpressionType, int Direction> class VectorwiseOp
28555 typedef typename ExpressionType::Scalar Scalar;
28556 typedef typename ExpressionType::RealScalar RealScalar;
28557 typedef Eigen::Index Index;
28558 typedef typename internal::ref_selector<ExpressionType>::non_const_type ExpressionTypeNested;
28559 typedef typename internal::remove_all<ExpressionTypeNested>::type ExpressionTypeNestedCleaned;
28560 template<template<typename _Scalar> class Functor,
28561 typename Scalar_=Scalar> struct ReturnType
28563 typedef PartialReduxExpr<ExpressionType,
28568 template<typename BinaryOp> struct ReduxReturnType
28570 typedef PartialReduxExpr<ExpressionType,
28571 internal::member_redux<BinaryOp,Scalar>,
28576 isVertical = (Direction==Vertical) ? 1 : 0,
28577 isHorizontal = (Direction==Horizontal) ? 1 : 0
28580 typedef typename internal::conditional<isVertical,
28581 typename ExpressionType::ColXpr,
28582 typename ExpressionType::RowXpr>::type SubVector;
28584 SubVector subVector(Index i)
28586 return SubVector(m_matrix.derived(),i);
28589 Index subVectors() const
28590 { return isVertical?m_matrix.cols():m_matrix.rows(); }
28591 template<typename OtherDerived> struct ExtendedType {
28592 typedef Replicate<OtherDerived,
28593 isVertical ? 1 : ExpressionType::RowsAtCompileTime,
28594 isHorizontal ? 1 : ExpressionType::ColsAtCompileTime> Type;
28596 template<typename OtherDerived>
28598 typename ExtendedType<OtherDerived>::Type
28599 extendedTo(const DenseBase<OtherDerived>& other) const
28601 EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isVertical, OtherDerived::MaxColsAtCompileTime==1),
28602 YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED)
28603 EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isHorizontal, OtherDerived::MaxRowsAtCompileTime==1),
28604 YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED)
28605 return typename ExtendedType<OtherDerived>::Type
28607 isVertical ? 1 : m_matrix.rows(),
28608 isHorizontal ? 1 : m_matrix.cols());
28610 template<typename OtherDerived> struct OppositeExtendedType {
28611 typedef Replicate<OtherDerived,
28612 isHorizontal ? 1 : ExpressionType::RowsAtCompileTime,
28613 isVertical ? 1 : ExpressionType::ColsAtCompileTime> Type;
28615 template<typename OtherDerived>
28617 typename OppositeExtendedType<OtherDerived>::Type
28618 extendedToOpposite(const DenseBase<OtherDerived>& other) const
28620 EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isHorizontal, OtherDerived::MaxColsAtCompileTime==1),
28621 YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED)
28622 EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isVertical, OtherDerived::MaxRowsAtCompileTime==1),
28623 YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED)
28624 return typename OppositeExtendedType<OtherDerived>::Type
28626 isHorizontal ? 1 : m_matrix.rows(),
28627 isVertical ? 1 : m_matrix.cols());
28631 explicit inline VectorwiseOp(ExpressionType& matrix) : m_matrix(matrix) {}
28633 inline const ExpressionType& _expression() const { return m_matrix; }
28634 template<typename BinaryOp>
28636 const typename ReduxReturnType<BinaryOp>::Type
28637 redux(const BinaryOp& func = BinaryOp()) const
28638 { return typename ReduxReturnType<BinaryOp>::Type(_expression(), internal::member_redux<BinaryOp,Scalar>(func)); }
28639 typedef typename ReturnType<internal::member_minCoeff>::Type MinCoeffReturnType;
28640 typedef typename ReturnType<internal::member_maxCoeff>::Type MaxCoeffReturnType;
28641 typedef typename ReturnType<internal::member_squaredNorm,RealScalar>::Type SquaredNormReturnType;
28642 typedef typename ReturnType<internal::member_norm,RealScalar>::Type NormReturnType;
28643 typedef typename ReturnType<internal::member_blueNorm,RealScalar>::Type BlueNormReturnType;
28644 typedef typename ReturnType<internal::member_stableNorm,RealScalar>::Type StableNormReturnType;
28645 typedef typename ReturnType<internal::member_hypotNorm,RealScalar>::Type HypotNormReturnType;
28646 typedef typename ReturnType<internal::member_sum>::Type SumReturnType;
28647 typedef typename ReturnType<internal::member_mean>::Type MeanReturnType;
28648 typedef typename ReturnType<internal::member_all>::Type AllReturnType;
28649 typedef typename ReturnType<internal::member_any>::Type AnyReturnType;
28650 typedef PartialReduxExpr<ExpressionType, internal::member_count<Index>, Direction> CountReturnType;
28651 typedef typename ReturnType<internal::member_prod>::Type ProdReturnType;
28652 typedef Reverse<const ExpressionType, Direction> ConstReverseReturnType;
28653 typedef Reverse<ExpressionType, Direction> ReverseReturnType;
28654 template<int p> struct LpNormReturnType {
28655 typedef PartialReduxExpr<ExpressionType, internal::member_lpnorm<p,RealScalar>,Direction> Type;
28658 const MinCoeffReturnType minCoeff() const
28659 { return MinCoeffReturnType(_expression()); }
28661 const MaxCoeffReturnType maxCoeff() const
28662 { return MaxCoeffReturnType(_expression()); }
28664 const SquaredNormReturnType squaredNorm() const
28665 { return SquaredNormReturnType(_expression()); }
28667 const NormReturnType norm() const
28668 { return NormReturnType(_expression()); }
28671 const typename LpNormReturnType<p>::Type lpNorm() const
28672 { return typename LpNormReturnType<p>::Type(_expression()); }
28674 const BlueNormReturnType blueNorm() const
28675 { return BlueNormReturnType(_expression()); }
28677 const StableNormReturnType stableNorm() const
28678 { return StableNormReturnType(_expression()); }
28680 const HypotNormReturnType hypotNorm() const
28681 { return HypotNormReturnType(_expression()); }
28683 const SumReturnType sum() const
28684 { return SumReturnType(_expression()); }
28686 const MeanReturnType mean() const
28687 { return MeanReturnType(_expression()); }
28689 const AllReturnType all() const
28690 { return AllReturnType(_expression()); }
28692 const AnyReturnType any() const
28693 { return AnyReturnType(_expression()); }
28695 const CountReturnType count() const
28696 { return CountReturnType(_expression()); }
28698 const ProdReturnType prod() const
28699 { return ProdReturnType(_expression()); }
28701 const ConstReverseReturnType reverse() const
28702 { return ConstReverseReturnType( _expression() ); }
28704 ReverseReturnType reverse()
28705 { return ReverseReturnType( _expression() ); }
28706 typedef Replicate<ExpressionType,(isVertical?Dynamic:1),(isHorizontal?Dynamic:1)> ReplicateReturnType;
28708 const ReplicateReturnType replicate(Index factor) const;
28709 template<int Factor> const Replicate<ExpressionType,isVertical*Factor+isHorizontal,isHorizontal*Factor+isVertical>
28711 replicate(Index factor = Factor) const
28713 return Replicate<ExpressionType,(isVertical?Factor:1),(isHorizontal?Factor:1)>
28714 (_expression(),isVertical?factor:1,isHorizontal?factor:1);
28716 template<typename OtherDerived>
28718 ExpressionType& operator=(const DenseBase<OtherDerived>& other)
28720 EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
28721 EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
28722 return const_cast<ExpressionType&>(m_matrix = extendedTo(other.derived()));
28724 template<typename OtherDerived>
28726 ExpressionType& operator+=(const DenseBase<OtherDerived>& other)
28728 EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
28729 EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
28730 return const_cast<ExpressionType&>(m_matrix += extendedTo(other.derived()));
28732 template<typename OtherDerived>
28734 ExpressionType& operator-=(const DenseBase<OtherDerived>& other)
28736 EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
28737 EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
28738 return const_cast<ExpressionType&>(m_matrix -= extendedTo(other.derived()));
28740 template<typename OtherDerived>
28742 ExpressionType& operator*=(const DenseBase<OtherDerived>& other)
28744 EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
28745 EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
28746 EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
28747 m_matrix *= extendedTo(other.derived());
28748 return const_cast<ExpressionType&>(m_matrix);
28750 template<typename OtherDerived>
28752 ExpressionType& operator/=(const DenseBase<OtherDerived>& other)
28754 EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
28755 EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
28756 EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
28757 m_matrix /= extendedTo(other.derived());
28758 return const_cast<ExpressionType&>(m_matrix);
28760 template<typename OtherDerived> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
28761 CwiseBinaryOp<internal::scalar_sum_op<Scalar,typename OtherDerived::Scalar>,
28762 const ExpressionTypeNestedCleaned,
28763 const typename ExtendedType<OtherDerived>::Type>
28764 operator+(const DenseBase<OtherDerived>& other) const
28766 EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
28767 EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
28768 return m_matrix + extendedTo(other.derived());
28770 template<typename OtherDerived>
28772 CwiseBinaryOp<internal::scalar_difference_op<Scalar,typename OtherDerived::Scalar>,
28773 const ExpressionTypeNestedCleaned,
28774 const typename ExtendedType<OtherDerived>::Type>
28775 operator-(const DenseBase<OtherDerived>& other) const
28777 EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
28778 EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
28779 return m_matrix - extendedTo(other.derived());
28781 template<typename OtherDerived> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
28782 CwiseBinaryOp<internal::scalar_product_op<Scalar>,
28783 const ExpressionTypeNestedCleaned,
28784 const typename ExtendedType<OtherDerived>::Type>
28786 operator*(const DenseBase<OtherDerived>& other) const
28788 EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
28789 EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
28790 EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
28791 return m_matrix * extendedTo(other.derived());
28793 template<typename OtherDerived>
28795 CwiseBinaryOp<internal::scalar_quotient_op<Scalar>,
28796 const ExpressionTypeNestedCleaned,
28797 const typename ExtendedType<OtherDerived>::Type>
28798 operator/(const DenseBase<OtherDerived>& other) const
28800 EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
28801 EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
28802 EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
28803 return m_matrix / extendedTo(other.derived());
28806 CwiseBinaryOp<internal::scalar_quotient_op<Scalar>,
28807 const ExpressionTypeNestedCleaned,
28808 const typename OppositeExtendedType<typename ReturnType<internal::member_norm,RealScalar>::Type>::Type>
28809 normalized() const { return m_matrix.cwiseQuotient(extendedToOpposite(this->norm())); }
28810 EIGEN_DEVICE_FUNC void normalize() {
28811 m_matrix = this->normalized();
28813 EIGEN_DEVICE_FUNC inline void reverseInPlace();
28814 typedef Homogeneous<ExpressionType,Direction> HomogeneousReturnType;
28816 HomogeneousReturnType homogeneous() const;
28817 typedef typename ExpressionType::PlainObject CrossReturnType;
28818 template<typename OtherDerived>
28820 const CrossReturnType cross(const MatrixBase<OtherDerived>& other) const;
28822 HNormalized_Size = Direction==Vertical ? internal::traits<ExpressionType>::RowsAtCompileTime
28823 : internal::traits<ExpressionType>::ColsAtCompileTime,
28824 HNormalized_SizeMinusOne = HNormalized_Size==Dynamic ? Dynamic : HNormalized_Size-1
28826 typedef Block<const ExpressionType,
28827 Direction==Vertical ? int(HNormalized_SizeMinusOne)
28828 : int(internal::traits<ExpressionType>::RowsAtCompileTime),
28829 Direction==Horizontal ? int(HNormalized_SizeMinusOne)
28830 : int(internal::traits<ExpressionType>::ColsAtCompileTime)>
28832 typedef Block<const ExpressionType,
28833 Direction==Vertical ? 1 : int(internal::traits<ExpressionType>::RowsAtCompileTime),
28834 Direction==Horizontal ? 1 : int(internal::traits<ExpressionType>::ColsAtCompileTime)>
28835 HNormalized_Factors;
28836 typedef CwiseBinaryOp<internal::scalar_quotient_op<typename internal::traits<ExpressionType>::Scalar>,
28837 const HNormalized_Block,
28838 const Replicate<HNormalized_Factors,
28839 Direction==Vertical ? HNormalized_SizeMinusOne : 1,
28840 Direction==Horizontal ? HNormalized_SizeMinusOne : 1> >
28841 HNormalizedReturnType;
28843 const HNormalizedReturnType hnormalized() const;
28845 ExpressionTypeNested m_matrix;
28847 template<typename Derived>
28848 inline typename DenseBase<Derived>::ColwiseReturnType
28849 DenseBase<Derived>::colwise()
28851 return ColwiseReturnType(derived());
28853 template<typename Derived>
28854 inline typename DenseBase<Derived>::RowwiseReturnType
28855 DenseBase<Derived>::rowwise()
28857 return RowwiseReturnType(derived());
28861 // end #include "src/Core/VectorwiseOp.h"
28862 // #include "src/Core/Replicate.h"
28863 #ifndef EIGEN_REPLICATE_H
28864 #define EIGEN_REPLICATE_H
28866 namespace internal {
28867 template<typename MatrixType,int RowFactor,int ColFactor>
28868 struct traits<Replicate<MatrixType,RowFactor,ColFactor> >
28869 : traits<MatrixType>
28871 typedef typename MatrixType::Scalar Scalar;
28872 typedef typename traits<MatrixType>::StorageKind StorageKind;
28873 typedef typename traits<MatrixType>::XprKind XprKind;
28874 typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
28875 typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;
28877 RowsAtCompileTime = RowFactor==Dynamic || int(MatrixType::RowsAtCompileTime)==Dynamic
28879 : RowFactor * MatrixType::RowsAtCompileTime,
28880 ColsAtCompileTime = ColFactor==Dynamic || int(MatrixType::ColsAtCompileTime)==Dynamic
28882 : ColFactor * MatrixType::ColsAtCompileTime,
28883 MaxRowsAtCompileTime = RowsAtCompileTime,
28884 MaxColsAtCompileTime = ColsAtCompileTime,
28885 IsRowMajor = MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1 ? 1
28886 : MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1 ? 0
28887 : (MatrixType::Flags & RowMajorBit) ? 1 : 0,
28888 Flags = IsRowMajor ? RowMajorBit : 0
28892 template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
28893 : public internal::dense_xpr_base< Replicate<MatrixType,RowFactor,ColFactor> >::type
28895 typedef typename internal::traits<Replicate>::MatrixTypeNested MatrixTypeNested;
28896 typedef typename internal::traits<Replicate>::_MatrixTypeNested _MatrixTypeNested;
28898 typedef typename internal::dense_xpr_base<Replicate>::type Base;
28899 EIGEN_DENSE_PUBLIC_INTERFACE(Replicate)
28900 typedef typename internal::remove_all<MatrixType>::type NestedExpression;
28901 template<typename OriginalMatrixType>
28903 inline explicit Replicate(const OriginalMatrixType& matrix)
28904 : m_matrix(matrix), m_rowFactor(RowFactor), m_colFactor(ColFactor)
28906 EIGEN_STATIC_ASSERT((internal::is_same<typename internal::remove_const<MatrixType>::type,OriginalMatrixType>::value),
28907 THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE)
28908 eigen_assert(RowFactor!=Dynamic && ColFactor!=Dynamic);
28910 template<typename OriginalMatrixType>
28912 inline Replicate(const OriginalMatrixType& matrix, Index rowFactor, Index colFactor)
28913 : m_matrix(matrix), m_rowFactor(rowFactor), m_colFactor(colFactor)
28915 EIGEN_STATIC_ASSERT((internal::is_same<typename internal::remove_const<MatrixType>::type,OriginalMatrixType>::value),
28916 THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE)
28919 inline Index rows() const { return m_matrix.rows() * m_rowFactor.value(); }
28921 inline Index cols() const { return m_matrix.cols() * m_colFactor.value(); }
28923 const _MatrixTypeNested& nestedExpression() const
28928 MatrixTypeNested m_matrix;
28929 const internal::variable_if_dynamic<Index, RowFactor> m_rowFactor;
28930 const internal::variable_if_dynamic<Index, ColFactor> m_colFactor;
28932 template<typename Derived>
28933 template<int RowFactor, int ColFactor>
28934 const Replicate<Derived,RowFactor,ColFactor>
28935 DenseBase<Derived>::replicate() const
28937 return Replicate<Derived,RowFactor,ColFactor>(derived());
28939 template<typename ExpressionType, int Direction>
28940 const typename VectorwiseOp<ExpressionType,Direction>::ReplicateReturnType
28941 VectorwiseOp<ExpressionType,Direction>::replicate(Index factor) const
28943 return typename VectorwiseOp<ExpressionType,Direction>::ReplicateReturnType
28944 (_expression(),Direction==Vertical?factor:1,Direction==Horizontal?factor:1);
28948 // end #include "src/Core/Replicate.h"
28949 // #include "src/Core/ArrayWrapper.h"
28950 #ifndef EIGEN_ARRAYWRAPPER_H
28951 #define EIGEN_ARRAYWRAPPER_H
28953 namespace internal {
28954 template<typename ExpressionType>
28955 struct traits<ArrayWrapper<ExpressionType> >
28956 : public traits<typename remove_all<typename ExpressionType::Nested>::type >
28958 typedef ArrayXpr XprKind;
28960 Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags,
28961 LvalueBitFlag = is_lvalue<ExpressionType>::value ? LvalueBit : 0,
28962 Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag
28966 template<typename ExpressionType>
28967 class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
28970 typedef ArrayBase<ArrayWrapper> Base;
28971 EIGEN_DENSE_PUBLIC_INTERFACE(ArrayWrapper)
28972 EIGEN_INHERIT_ASSIGNMENT_OPERATORS(ArrayWrapper)
28973 typedef typename internal::remove_all<ExpressionType>::type NestedExpression;
28974 typedef typename internal::conditional<
28975 internal::is_lvalue<ExpressionType>::value,
28978 >::type ScalarWithConstIfNotLvalue;
28979 typedef typename internal::ref_selector<ExpressionType>::non_const_type NestedExpressionType;
28980 using Base::coeffRef;
28982 explicit EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {}
28984 inline Index rows() const { return m_expression.rows(); }
28986 inline Index cols() const { return m_expression.cols(); }
28988 inline Index outerStride() const { return m_expression.outerStride(); }
28990 inline Index innerStride() const { return m_expression.innerStride(); }
28992 inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
28994 inline const Scalar* data() const { return m_expression.data(); }
28996 inline const Scalar& coeffRef(Index rowId, Index colId) const
28998 return m_expression.coeffRef(rowId, colId);
29001 inline const Scalar& coeffRef(Index index) const
29003 return m_expression.coeffRef(index);
29005 template<typename Dest>
29007 inline void evalTo(Dest& dst) const { dst = m_expression; }
29008 const typename internal::remove_all<NestedExpressionType>::type&
29010 nestedExpression() const
29012 return m_expression;
29015 void resize(Index newSize) { m_expression.resize(newSize); }
29017 void resize(Index rows, Index cols) { m_expression.resize(rows,cols); }
29019 NestedExpressionType m_expression;
29023 // end #include "src/Core/ArrayWrapper.h"
29024 // #include "src/Core/SelfCwiseBinaryOp.h"
29026 template<typename Derived>
29027 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator/=(const Scalar& other)
29029 internal::call_assignment(this->derived(), PlainObject::Constant(rows(), cols(),other), internal::div_assign_op<Scalar, Scalar>());
29033 // end #include "src/Core/SelfCwiseBinaryOp.h"