libstdc++: Always-inline most of non-cmath fixed_size implementation
authorMatthias Kretz <m.kretz@gsi.de>
Mon, 20 Feb 2023 16:49:37 +0000 (17:49 +0100)
committerMatthias Kretz <m.kretz@gsi.de>
Fri, 24 Feb 2023 18:34:28 +0000 (19:34 +0100)
For simd, the inlining behavior should be similar to builtin types. (No
operator on buitin types is ever translated into a function call.)
Therefore, always_inline is the right choice (i.e. inline on -O0 as
well).

Signed-off-by: Matthias Kretz <m.kretz@gsi.de>
libstdc++-v3/ChangeLog:

PR libstdc++/108030
* include/experimental/bits/simd_fixed_size.h
(_SimdImplFixedSize::_S_broadcast): Replace inline with
_GLIBCXX_SIMD_INTRINSIC.
(_SimdImplFixedSize::_S_generate): Likewise.
(_SimdImplFixedSize::_S_load): Likewise.
(_SimdImplFixedSize::_S_masked_load): Likewise.
(_SimdImplFixedSize::_S_store): Likewise.
(_SimdImplFixedSize::_S_masked_store): Likewise.
(_SimdImplFixedSize::_S_min): Likewise.
(_SimdImplFixedSize::_S_max): Likewise.
(_SimdImplFixedSize::_S_complement): Likewise.
(_SimdImplFixedSize::_S_unary_minus): Likewise.
(_SimdImplFixedSize::_S_plus): Likewise.
(_SimdImplFixedSize::_S_minus): Likewise.
(_SimdImplFixedSize::_S_multiplies): Likewise.
(_SimdImplFixedSize::_S_divides): Likewise.
(_SimdImplFixedSize::_S_modulus): Likewise.
(_SimdImplFixedSize::_S_bit_and): Likewise.
(_SimdImplFixedSize::_S_bit_or): Likewise.
(_SimdImplFixedSize::_S_bit_xor): Likewise.
(_SimdImplFixedSize::_S_bit_shift_left): Likewise.
(_SimdImplFixedSize::_S_bit_shift_right): Likewise.
(_SimdImplFixedSize::_S_remquo): Add inline keyword (to be
explicit about not always-inline, yet).
(_SimdImplFixedSize::_S_isinf): Likewise.
(_SimdImplFixedSize::_S_isfinite): Likewise.
(_SimdImplFixedSize::_S_isnan): Likewise.
(_SimdImplFixedSize::_S_isnormal): Likewise.
(_SimdImplFixedSize::_S_signbit): Likewise.

libstdc++-v3/include/experimental/bits/simd_fixed_size.h

index 3ac6eaa..88a9b27 100644 (file)
@@ -1284,7 +1284,8 @@ template <int _Np, typename>
 
     // broadcast {{{2
     template <typename _Tp>
-      static constexpr inline _SimdMember<_Tp> _S_broadcast(_Tp __x) noexcept
+      _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdMember<_Tp>
+      _S_broadcast(_Tp __x) noexcept
       {
        return _SimdMember<_Tp>::_S_generate(
                 [&](auto __meta) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
@@ -1294,8 +1295,8 @@ template <int _Np, typename>
 
     // _S_generator {{{2
     template <typename _Fp, typename _Tp>
-      static constexpr inline _SimdMember<_Tp> _S_generator(_Fp&& __gen,
-                                                           _TypeTag<_Tp>)
+      _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdMember<_Tp>
+      _S_generator(_Fp&& __gen, _TypeTag<_Tp>)
       {
        return _SimdMember<_Tp>::_S_generate(
                 [&__gen](auto __meta) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
@@ -1310,8 +1311,8 @@ template <int _Np, typename>
 
     // _S_load {{{2
     template <typename _Tp, typename _Up>
-      static inline _SimdMember<_Tp> _S_load(const _Up* __mem,
-                                            _TypeTag<_Tp>) noexcept
+      _GLIBCXX_SIMD_INTRINSIC static _SimdMember<_Tp>
+      _S_load(const _Up* __mem, _TypeTag<_Tp>) noexcept
       {
        return _SimdMember<_Tp>::_S_generate(
                 [&](auto __meta) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
@@ -1321,7 +1322,7 @@ template <int _Np, typename>
 
     // _S_masked_load {{{2
     template <typename _Tp, typename... _As, typename _Up>
-      static inline _SimdTuple<_Tp, _As...>
+      _GLIBCXX_SIMD_INTRINSIC static _SimdTuple<_Tp, _As...>
       _S_masked_load(const _SimdTuple<_Tp, _As...>& __old,
                     const _MaskMember __bits, const _Up* __mem) noexcept
       {
@@ -1344,8 +1345,8 @@ template <int _Np, typename>
 
     // _S_store {{{2
     template <typename _Tp, typename _Up>
-      static inline void _S_store(const _SimdMember<_Tp>& __v, _Up* __mem,
-                                 _TypeTag<_Tp>) noexcept
+      _GLIBCXX_SIMD_INTRINSIC static void
+      _S_store(const _SimdMember<_Tp>& __v, _Up* __mem, _TypeTag<_Tp>) noexcept
       {
        __for_each(__v, [&](auto __meta, auto __native) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
          __meta._S_store(__native, &__mem[__meta._S_offset], _TypeTag<_Tp>());
@@ -1354,9 +1355,9 @@ template <int _Np, typename>
 
     // _S_masked_store {{{2
     template <typename _Tp, typename... _As, typename _Up>
-      static inline void _S_masked_store(const _SimdTuple<_Tp, _As...>& __v,
-                                        _Up* __mem,
-                                        const _MaskMember __bits) noexcept
+      _GLIBCXX_SIMD_INTRINSIC static void
+      _S_masked_store(const _SimdTuple<_Tp, _As...>& __v, _Up* __mem,
+                     const _MaskMember __bits) noexcept
       {
        __for_each(__v, [&](auto __meta, auto __native) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
          if (__meta._S_submask(__bits).any())
@@ -1464,7 +1465,7 @@ template <int _Np, typename>
 
     // _S_min, _S_max {{{2
     template <typename _Tp, typename... _As>
-      static inline constexpr _SimdTuple<_Tp, _As...>
+      _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...>
       _S_min(const _SimdTuple<_Tp, _As...>& __a,
             const _SimdTuple<_Tp, _As...>& __b)
       {
@@ -1476,7 +1477,7 @@ template <int _Np, typename>
       }
 
     template <typename _Tp, typename... _As>
-      static inline constexpr _SimdTuple<_Tp, _As...>
+      _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...>
       _S_max(const _SimdTuple<_Tp, _As...>& __a,
             const _SimdTuple<_Tp, _As...>& __b)
       {
@@ -1489,7 +1490,7 @@ template <int _Np, typename>
 
     // _S_complement {{{2
     template <typename _Tp, typename... _As>
-      static inline constexpr _SimdTuple<_Tp, _As...>
+      _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...>
       _S_complement(const _SimdTuple<_Tp, _As...>& __x) noexcept
       {
        return __x._M_apply_per_chunk(
@@ -1500,7 +1501,7 @@ template <int _Np, typename>
 
     // _S_unary_minus {{{2
     template <typename _Tp, typename... _As>
-      static inline constexpr _SimdTuple<_Tp, _As...>
+      _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...>
       _S_unary_minus(const _SimdTuple<_Tp, _As...>& __x) noexcept
       {
        return __x._M_apply_per_chunk(
@@ -1513,7 +1514,7 @@ template <int _Np, typename>
 
 #define _GLIBCXX_SIMD_FIXED_OP(name_, op_)                                                     \
     template <typename _Tp, typename... _As>                                                   \
-      static inline constexpr _SimdTuple<_Tp, _As...> name_(                                   \
+      _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...> name_(                  \
        const _SimdTuple<_Tp, _As...>& __x, const _SimdTuple<_Tp, _As...>& __y)                \
       {                                                                                        \
        return __x._M_apply_per_chunk(                                                         \
@@ -1536,7 +1537,7 @@ template <int _Np, typename>
 #undef _GLIBCXX_SIMD_FIXED_OP
 
     template <typename _Tp, typename... _As>
-      static inline constexpr _SimdTuple<_Tp, _As...>
+      _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...>
       _S_bit_shift_left(const _SimdTuple<_Tp, _As...>& __x, int __y)
       {
        return __x._M_apply_per_chunk(
@@ -1546,7 +1547,7 @@ template <int _Np, typename>
       }
 
     template <typename _Tp, typename... _As>
-      static inline constexpr _SimdTuple<_Tp, _As...>
+      _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...>
       _S_bit_shift_right(const _SimdTuple<_Tp, _As...>& __x, int __y)
       {
        return __x._M_apply_per_chunk(
@@ -1665,10 +1666,9 @@ template <int _Np, typename>
 #undef _GLIBCXX_SIMD_APPLY_ON_TUPLE
 
     template <typename _Tp, typename... _Abis>
-      static _SimdTuple<_Tp, _Abis...> _S_remquo(
-       const _SimdTuple<_Tp, _Abis...>& __x,
-       const _SimdTuple<_Tp, _Abis...>& __y,
-       __fixed_size_storage_t<int, _SimdTuple<_Tp, _Abis...>::_S_size()>* __z)
+      static inline _SimdTuple<_Tp, _Abis...>
+      _S_remquo(const _SimdTuple<_Tp, _Abis...>& __x, const _SimdTuple<_Tp, _Abis...>& __y,
+               __fixed_size_storage_t<int, _SimdTuple<_Tp, _Abis...>::_S_size()>* __z)
       {
        return __x._M_apply_per_chunk(
                 [](auto __impl, const auto __xx, const auto __yy, auto& __zz)
@@ -1689,14 +1689,14 @@ template <int _Np, typename>
                 }, __exp);
       }
 
-#define _GLIBCXX_SIMD_TEST_ON_TUPLE_(name_)                                    \
-    template <typename _Tp, typename... _As>                                   \
-      static inline _MaskMember                                                \
-       _S_##name_(const _SimdTuple<_Tp, _As...>& __x) noexcept                \
-      {                                                                        \
-       return _M_test([](auto __impl,                                         \
-                         auto __xx) { return __impl._S_##name_(__xx); },      \
-                      __x);                                                   \
+#define _GLIBCXX_SIMD_TEST_ON_TUPLE_(name_)                                              \
+    template <typename _Tp, typename... _As>                                             \
+      static inline _MaskMember                                                          \
+       _S_##name_(const _SimdTuple<_Tp, _As...>& __x) noexcept                          \
+      {                                                                                  \
+       return _M_test([] (auto __impl, auto __xx) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA  { \
+                return __impl._S_##name_(__xx);                                         \
+              }, __x);                                                                  \
       }
 
     _GLIBCXX_SIMD_TEST_ON_TUPLE_(isinf)