libstdc++: Annotate most lambdas with always_inline
authorMatthias Kretz <m.kretz@gsi.de>
Sat, 14 Jan 2023 16:07:59 +0000 (17:07 +0100)
committerMatthias Kretz <m.kretz@gsi.de>
Thu, 16 Feb 2023 14:58:33 +0000 (15:58 +0100)
All of the annotated lambdas are simply a necessary means for
implementing these functions and should never result in an actual
function call. Many of these lambdas would go away if C++ had better
language support for packs.

Signed-off-by: Matthias Kretz <m.kretz@gsi.de>
libstdc++-v3/ChangeLog:

PR libstdc++/108030
* include/experimental/bits/simd_detail.h: Define
_GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA.
* include/experimental/bits/simd.h: Annotate lambdas with
_GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA.
* include/experimental/bits/simd_builtin.h: Ditto.
* include/experimental/bits/simd_converter.h: Ditto.
* include/experimental/bits/simd_fixed_size.h: Ditto.
* include/experimental/bits/simd_math.h: Ditto.
* include/experimental/bits/simd_neon.h: Ditto.
* include/experimental/bits/simd_x86.h: Ditto.

libstdc++-v3/include/experimental/bits/simd.h
libstdc++-v3/include/experimental/bits/simd_builtin.h
libstdc++-v3/include/experimental/bits/simd_converter.h
libstdc++-v3/include/experimental/bits/simd_detail.h
libstdc++-v3/include/experimental/bits/simd_fixed_size.h
libstdc++-v3/include/experimental/bits/simd_math.h
libstdc++-v3/include/experimental/bits/simd_neon.h
libstdc++-v3/include/experimental/bits/simd_x86.h

index 3de966bbf2295b624061ddaa9cfbfa54beedf202..ffe72fa6ccf9e13a5affaa026cae7f4be06a4771 100644 (file)
@@ -609,28 +609,34 @@ template <size_t _Bytes>
          operator&(_Ip __rhs) const
          {
            return __generate_from_n_evaluations<_Np, _Ip>(
-             [&](auto __i) { return __rhs._M_data[__i] & _M_data[__i]; });
+             [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+               return __rhs._M_data[__i] & _M_data[__i];
+             });
          }
 
          _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
          operator|(_Ip __rhs) const
          {
            return __generate_from_n_evaluations<_Np, _Ip>(
-             [&](auto __i) { return __rhs._M_data[__i] | _M_data[__i]; });
+             [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+               return __rhs._M_data[__i] | _M_data[__i];
+             });
          }
 
          _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
          operator^(_Ip __rhs) const
          {
            return __generate_from_n_evaluations<_Np, _Ip>(
-             [&](auto __i) { return __rhs._M_data[__i] ^ _M_data[__i]; });
+             [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+               return __rhs._M_data[__i] ^ _M_data[__i];
+             });
          }
 
          _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
          operator~() const
          {
            return __generate_from_n_evaluations<_Np, _Ip>(
-             [&](auto __i) { return ~_M_data[__i]; });
+             [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return ~_M_data[__i]; });
          }
        };
        return _Ip{};
@@ -1391,7 +1397,7 @@ template <size_t _Np, bool _Sanitized>
     operator^=(const _BitMask& __b) & noexcept
     {
       __execute_n_times<_S_array_size>(
-       [&](auto __i) { _M_bits[__i] ^= __b._M_bits[__i]; });
+       [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { _M_bits[__i] ^= __b._M_bits[__i]; });
       return *this;
     }
 
@@ -1399,7 +1405,7 @@ template <size_t _Np, bool _Sanitized>
     operator|=(const _BitMask& __b) & noexcept
     {
       __execute_n_times<_S_array_size>(
-       [&](auto __i) { _M_bits[__i] |= __b._M_bits[__i]; });
+       [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { _M_bits[__i] |= __b._M_bits[__i]; });
       return *this;
     }
 
@@ -1407,7 +1413,7 @@ template <size_t _Np, bool _Sanitized>
     operator&=(const _BitMask& __b) & noexcept
     {
       __execute_n_times<_S_array_size>(
-       [&](auto __i) { _M_bits[__i] &= __b._M_bits[__i]; });
+       [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { _M_bits[__i] &= __b._M_bits[__i]; });
       return *this;
     }
 
@@ -1797,8 +1803,9 @@ template <size_t _Np, typename _Tp>
   __vector_broadcast(_Tp __x)
   {
     return __call_with_n_evaluations<_Np>(
-      [](auto... __xx) { return __vector_type_t<_Tp, _Np>{__xx...}; },
-      [&__x](int) { return __x; });
+      [](auto... __xx) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+       return __vector_type_t<_Tp, _Np>{__xx...};
+      }, [&__x](int) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x; });
   }
 
 // }}}
@@ -2205,7 +2212,7 @@ template <int _Offset,
 #endif
        constexpr int _O = _Offset * __return_width;
        return __call_with_subscripts<__return_width, _O>(
-         __x, [](auto... __entries) {
+         __x, [](auto... __entries) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
            return reinterpret_cast<_R>(_Up{__entries...});
          });
       }
@@ -2607,7 +2614,7 @@ template <typename _Tp, size_t _Width>
 
     _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper(initializer_list<_Tp> __init)
       : _Base(__generate_from_n_evaluations<_Width, _BuiltinType>(
-       [&](auto __i) { return __init.begin()[__i.value]; })) {}
+       [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __init.begin()[__i.value]; })) {}
 
     _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper() = default;
     _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper(const _SimdWrapper&)
@@ -2632,10 +2639,9 @@ template <typename _Tp, size_t _Width>
       _GLIBCXX_SIMD_INTRINSIC constexpr
       operator _SimdTuple<_Tp, _As...>() const
       {
-       const auto& dd = _M_data; // workaround for GCC7 ICE
-       return __generate_from_n_evaluations<sizeof...(_As),
-                                            _SimdTuple<_Tp, _As...>>([&](
-         auto __i) constexpr { return dd[int(__i)]; });
+       return __generate_from_n_evaluations<sizeof...(_As), _SimdTuple<_Tp, _As...>>(
+                [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
+                { return _M_data[int(__i)]; });
       }
 
     _GLIBCXX_SIMD_INTRINSIC constexpr operator const _BuiltinType&() const
@@ -3192,21 +3198,19 @@ template <typename _Tp, int _Np>
   { return __x; }
 
 template <typename _Tp, typename _Ap>
-  _GLIBCXX_SIMD_INTRINSIC auto
+  _GLIBCXX_SIMD_INTRINSIC fixed_size_simd<_Tp, simd_size_v<_Tp, _Ap>>
   to_fixed_size(const simd<_Tp, _Ap>& __x)
   {
-    return simd<_Tp, simd_abi::fixed_size<simd_size_v<_Tp, _Ap>>>([&__x](
-      auto __i) constexpr { return __x[__i]; });
+    using _Rp = fixed_size_simd<_Tp, simd_size_v<_Tp, _Ap>>;
+    return _Rp([&__x](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
   }
 
 template <typename _Tp, typename _Ap>
-  _GLIBCXX_SIMD_INTRINSIC auto
+  _GLIBCXX_SIMD_INTRINSIC fixed_size_simd_mask<_Tp, simd_size_v<_Tp, _Ap>>
   to_fixed_size(const simd_mask<_Tp, _Ap>& __x)
   {
-    constexpr int _Np = simd_mask<_Tp, _Ap>::size();
-    fixed_size_simd_mask<_Tp, _Np> __r;
-    __execute_n_times<_Np>([&](auto __i) constexpr { __r[__i] = __x[__i]; });
-    return __r;
+    return {__private_init,
+           [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; }};
   }
 
 // to_native {{{2
@@ -3225,7 +3229,9 @@ template <typename _Tp, size_t _Np>
   enable_if_t<(_Np == native_simd_mask<_Tp>::size()), native_simd_mask<_Tp>>
   to_native(const fixed_size_simd_mask<_Tp, _Np>& __x)
   {
-    return native_simd_mask<_Tp>([&](auto __i) constexpr { return __x[__i]; });
+    return native_simd_mask<_Tp>(
+            __private_init,
+            [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
   }
 
 // to_compatible {{{2
@@ -3242,7 +3248,10 @@ template <typename _Tp, size_t _Np>
   _GLIBCXX_SIMD_INTRINSIC
   enable_if_t<(_Np == simd_mask<_Tp>::size()), simd_mask<_Tp>>
   to_compatible(const simd_mask<_Tp, simd_abi::fixed_size<_Np>>& __x)
-  { return simd_mask<_Tp>([&](auto __i) constexpr { return __x[__i]; }); }
+  {
+    return simd_mask<_Tp>(
+            [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
+  }
 
 // masked assignment [simd_mask.where] {{{1
 
@@ -3400,9 +3409,9 @@ template <typename _M, typename _Tp>
       _Impl::template _S_masked_cassign(                                       \
        __data(_M_k), __data(_M_value),                                        \
        __to_value_type_or_member_type<_Tp>(static_cast<_Up&&>(__x)),          \
-       [](auto __impl, auto __lhs, auto __rhs) constexpr {                    \
-       return __impl.__name(__lhs, __rhs);                                    \
-       });                                                                    \
+       [](auto __impl, auto __lhs, auto __rhs)                                \
+         constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA                         \
+       { return __impl.__name(__lhs, __rhs); });                              \
     }                                                                          \
   static_assert(true)
     _GLIBCXX_SIMD_OP_(+, _S_plus);
@@ -3899,12 +3908,11 @@ template <typename _V, typename _Ap,
       }
     else if (__x._M_is_constprop())
       {
-       return __generate_from_n_evaluations<Parts, array<_V, Parts>>([&](
-         auto __i) constexpr {
-         return _V([&](auto __j) constexpr {
-           return __x[__i * _V::size() + __j];
-         });
-       });
+       return __generate_from_n_evaluations<Parts, array<_V, Parts>>(
+                [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                  return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
+                            { return __x[__i * _V::size() + __j]; });
+                });
       }
     else if constexpr (
       __is_fixed_size_abi_v<_Ap>
@@ -3917,41 +3925,40 @@ template <typename _V, typename _Ap,
 #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
       const __may_alias<_Tp>* const __element_ptr
        = reinterpret_cast<const __may_alias<_Tp>*>(&__data(__x));
-      return __generate_from_n_evaluations<Parts, array<_V, Parts>>([&](
-       auto __i) constexpr {
-       return _V(__element_ptr + __i * _V::size(), vector_aligned);
-      });
+      return __generate_from_n_evaluations<Parts, array<_V, Parts>>(
+              [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
+              { return _V(__element_ptr + __i * _V::size(), vector_aligned); });
 #else
       const auto& __xx = __data(__x);
-      return __generate_from_n_evaluations<Parts, array<_V, Parts>>([&](
-       auto __i) constexpr {
-       [[maybe_unused]] constexpr size_t __offset
-         = decltype(__i)::value * _V::size();
-       return _V([&](auto __j) constexpr {
-         constexpr _SizeConstant<__j + __offset> __k;
-         return __xx[__k];
-       });
-      });
+      return __generate_from_n_evaluations<Parts, array<_V, Parts>>(
+              [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                [[maybe_unused]] constexpr size_t __offset
+                  = decltype(__i)::value * _V::size();
+                return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                         constexpr _SizeConstant<__j + __offset> __k;
+                         return __xx[__k];
+                       });
+              });
 #endif
     }
   else if constexpr (is_same_v<typename _V::abi_type, simd_abi::scalar>)
     {
       // normally memcpy should work here as well
-      return __generate_from_n_evaluations<Parts, array<_V, Parts>>([&](
-       auto __i) constexpr { return __x[__i]; });
+      return __generate_from_n_evaluations<Parts, array<_V, Parts>>(
+              [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
     }
   else
     {
-      return __generate_from_n_evaluations<Parts, array<_V, Parts>>([&](
-       auto __i) constexpr {
-       if constexpr (__is_fixed_size_abi_v<typename _V::abi_type>)
-         return _V([&](auto __j) constexpr {
-           return __x[__i * _V::size() + __j];
-         });
-       else
-         return _V(__private_init,
-                   __extract_part<decltype(__i)::value, Parts>(__data(__x)));
-      });
+      return __generate_from_n_evaluations<Parts, array<_V, Parts>>(
+              [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                if constexpr (__is_fixed_size_abi_v<typename _V::abi_type>)
+                  return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                           return __x[__i * _V::size() + __j];
+                         });
+                else
+                  return _V(__private_init,
+                            __extract_part<decltype(__i)::value, Parts>(__data(__x)));
+              });
     }
   }
 
@@ -3975,22 +3982,22 @@ template <typename _V, typename _Ap,
     else if constexpr (_V::size() <= __CHAR_BIT__ * sizeof(_ULLong))
       {
        const bitset __bits = __x.__to_bitset();
-       return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>([&](
-         auto __i) constexpr {
-         constexpr size_t __offset = __i * _V::size();
-         return _V(__bitset_init, (__bits >> __offset).to_ullong());
-       });
+       return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
+                [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                  constexpr size_t __offset = __i * _V::size();
+                  return _V(__bitset_init, (__bits >> __offset).to_ullong());
+                });
       }
     else
       {
-       return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>([&](
-         auto __i) constexpr {
-         constexpr size_t __offset = __i * _V::size();
-         return _V(
-           __private_init, [&](auto __j) constexpr {
-             return __x[__j + __offset];
-           });
-       });
+       return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
+                [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                  constexpr size_t __offset = __i * _V::size();
+                  return _V(__private_init,
+                            [&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                              return __x[__j + __offset];
+                            });
+                });
       }
   }
 
@@ -4008,12 +4015,14 @@ template <size_t... _Sizes, typename _Tp, typename _Ap, typename>
     using _V = __deduced_simd<_Tp, _N0>;
 
     if (__x._M_is_constprop())
-      return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>([&](
-       auto __i) constexpr {
-       using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
-       constexpr size_t __offset = _SL::_S_before(__i);
-       return _Vi([&](auto __j) constexpr { return __x[__offset + __j]; });
-      });
+      return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>(
+              [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
+                constexpr size_t __offset = _SL::_S_before(__i);
+                return _Vi([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                         return __x[__offset + __j];
+                       });
+              });
     else if constexpr (_Np == _N0)
       {
        static_assert(sizeof...(_Sizes) == 1);
@@ -4080,28 +4089,28 @@ template <size_t... _Sizes, typename _Tp, typename _Ap, typename>
 #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
     const __may_alias<_Tp>* const __element_ptr
       = reinterpret_cast<const __may_alias<_Tp>*>(&__x);
-    return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>([&](
-      auto __i) constexpr {
-      using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
-      constexpr size_t __offset = _SL::_S_before(__i);
-      constexpr size_t __base_align = alignof(simd<_Tp, _Ap>);
-      constexpr size_t __a
-       = __base_align - ((__offset * sizeof(_Tp)) % __base_align);
-      constexpr size_t __b = ((__a - 1) & __a) ^ __a;
-      constexpr size_t __alignment = __b == 0 ? __a : __b;
-      return _Vi(__element_ptr + __offset, overaligned<__alignment>);
-    });
+    return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>(
+            [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+              using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
+              constexpr size_t __offset = _SL::_S_before(__i);
+              constexpr size_t __base_align = alignof(simd<_Tp, _Ap>);
+              constexpr size_t __a
+                = __base_align - ((__offset * sizeof(_Tp)) % __base_align);
+              constexpr size_t __b = ((__a - 1) & __a) ^ __a;
+              constexpr size_t __alignment = __b == 0 ? __a : __b;
+              return _Vi(__element_ptr + __offset, overaligned<__alignment>);
+            });
 #else
-    return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>([&](
-      auto __i) constexpr {
-      using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
-      const auto& __xx = __data(__x);
-      using _Offset = decltype(_SL::_S_before(__i));
-      return _Vi([&](auto __j) constexpr {
-       constexpr _SizeConstant<_Offset::value + __j> __k;
-       return __xx[__k];
-      });
-    });
+    return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>(
+            [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+              using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
+              const auto& __xx = __data(__x);
+              using _Offset = decltype(_SL::_S_before(__i));
+              return _Vi([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                       constexpr _SizeConstant<_Offset::value + __j> __k;
+                       return __xx[__k];
+                     });
+            });
 #endif
   }
 
@@ -4143,8 +4152,9 @@ template <typename _Tp, typename... _As, typename = __detail::__odr_helper>
       return simd_cast<_Rp>(__xs...);
     else if ((... && __xs._M_is_constprop()))
       return simd<_Tp,
-                 simd_abi::deduce_t<_Tp, (simd_size_v<_Tp, _As> + ...)>>([&](
-       auto __i) constexpr { return __subscript_in_pack<__i>(__xs...); });
+                 simd_abi::deduce_t<_Tp, (simd_size_v<_Tp, _As> + ...)>>(
+              [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
+              { return __subscript_in_pack<__i>(__xs...); });
     else
       {
        _Rp __r{};
@@ -4160,9 +4170,10 @@ template <typename _Tp, typename _Abi, size_t _Np>
   _GLIBCXX_SIMD_CONSTEXPR __deduced_simd<_Tp, simd_size_v<_Tp, _Abi> * _Np>
   concat(const array<simd<_Tp, _Abi>, _Np>& __x)
   {
-    return __call_with_subscripts<_Np>(__x, [](const auto&... __xs) {
-      return concat(__xs...);
-    });
+    return __call_with_subscripts<_Np>(
+            __x, [](const auto&... __xs) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+              return concat(__xs...);
+            });
   }
 
 // }}}
@@ -4695,7 +4706,7 @@ template <typename _Tp, typename _Abi>
       simd_mask(_PrivateInit, _Fp&& __gen)
       : _M_data()
       {
-       __execute_n_times<size()>([&](auto __i) constexpr {
+       __execute_n_times<size()>([&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
          _Impl::_S_set(_M_data, __i, __gen(__i));
        });
       }
@@ -4881,7 +4892,9 @@ template <typename _Tp, typename _Abi>
     if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
       {
        const int __r = __call_with_subscripts<simd_size_v<_Tp, _Abi>>(
-         __k, [](auto... __elements) { return ((__elements != 0) + ...); });
+                         __k, [](auto... __elements) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                           return ((__elements != 0) + ...);
+                         });
        if (__builtin_is_constant_evaluated() || __builtin_constant_p(__r))
          return __r;
       }
@@ -4896,8 +4909,11 @@ template <typename _Tp, typename _Abi>
       {
        constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
        const size_t _Idx = __call_with_n_evaluations<_Np>(
-         [](auto... __indexes) { return std::min({__indexes...}); },
-         [&](auto __i) { return __k[__i] ? +__i : _Np; });
+                             [](auto... __indexes) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                               return std::min({__indexes...});
+                             }, [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                               return __k[__i] ? +__i : _Np;
+                             });
        if (_Idx >= _Np)
          __invoke_ub("find_first_set(empty mask) is UB");
        if (__builtin_constant_p(_Idx))
@@ -4914,8 +4930,11 @@ template <typename _Tp, typename _Abi>
       {
        constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
        const int _Idx = __call_with_n_evaluations<_Np>(
-         [](auto... __indexes) { return std::max({__indexes...}); },
-         [&](auto __i) { return __k[__i] ? int(__i) : -1; });
+                          [](auto... __indexes) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                            return std::max({__indexes...});
+                          }, [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                            return __k[__i] ? int(__i) : -1;
+                          });
        if (_Idx < 0)
          __invoke_ub("find_first_set(empty mask) is UB");
        if (__builtin_constant_p(_Idx))
index 8851da69800f3bbdb65f725d0387b866918ae727..792439a81bf20db8032e24dadb18ceb06becc494 100644 (file)
@@ -194,8 +194,11 @@ template <unsigned __shift, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
        using _Up = decltype(__w);
        return __intrin_bitcast<_Tp>(
          __call_with_n_evaluations<(sizeof(_Tp) - __shift) / __chunksize>(
-           [](auto... __chunks) { return _Up{__chunks...}; },
-           [&](auto __i) { return __w[__shift / __chunksize + __i]; }));
+           [](auto... __chunks) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+             return _Up{__chunks...};
+           }, [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+             return __w[__shift / __chunksize + __i];
+           }));
       }
   }
 
@@ -225,7 +228,9 @@ template <int _Index, int _Total, int _Combine, typename _Tp, size_t _Np>
        // by _Total");
        if (__x._M_is_constprop())
          return __generate_from_n_evaluations<__return_size, _R>(
-           [&](auto __i) { return __x[__values_to_skip + __i]; });
+           [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+             return __x[__values_to_skip + __i];
+           });
        if constexpr (_Index == 0 && _Total == 1)
          return __x;
        else if constexpr (_Index == 0)
@@ -570,7 +575,9 @@ template <typename _To,
        constexpr auto _Np
          = _NParts == 0 ? _FromVT::_S_partial_width - _Offset : _NParts;
        return __generate_from_n_evaluations<_Np, array<_To, _Np>>(
-         [&](auto __i) { return static_cast<_To>(__v[__i + _Offset]); });
+                [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                  return static_cast<_To>(__v[__i + _Offset]);
+                });
       }
     else
       {
@@ -611,13 +618,14 @@ template <typename _To,
              return __vector_bitcast<_FromT, decltype(__n)::value>(__vv);
            };
            [[maybe_unused]] const auto __vi = __to_intrin(__v);
-           auto&& __make_array = [](auto __x0, [[maybe_unused]] auto __x1) {
-             if constexpr (_Np == 1)
-               return _R{__intrin_bitcast<_To>(__x0)};
-             else
-               return _R{__intrin_bitcast<_To>(__x0),
-                         __intrin_bitcast<_To>(__x1)};
-           };
+           auto&& __make_array
+               = [](auto __x0, [[maybe_unused]] auto __x1) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                 if constexpr (_Np == 1)
+                   return _R{__intrin_bitcast<_To>(__x0)};
+                 else
+                   return _R{__intrin_bitcast<_To>(__x0),
+                             __intrin_bitcast<_To>(__x1)};
+               };
 
            if constexpr (_Np == 0)
              return _R{};
@@ -642,7 +650,7 @@ template <typename _To,
                      = __convert_all<__vector_type16_t<int>, _Np>(
                        __adjust(_SizeConstant<_Np * 4>(), __v));
                    return __generate_from_n_evaluations<_Np, _R>(
-                     [&](auto __i) {
+                     [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
                        return __vector_convert<_To>(__as_wrapper(__ints[__i]));
                      });
                  }
@@ -687,36 +695,40 @@ template <typename _To,
                  __vector_bitcast<int>(_mm_unpacklo_epi16(__vv[1], __vv[1])),
                  __vector_bitcast<int>(_mm_unpackhi_epi16(__vv[1], __vv[1]))};
                if constexpr (sizeof(_ToT) == 4)
-                 return __generate_from_n_evaluations<_Np, _R>([&](auto __i) {
-                   return __vector_convert<_To>(
-                     _SimdWrapper<int, 4>(__vvvv[__i] >> 24));
-                 });
+                 return __generate_from_n_evaluations<_Np, _R>(
+                          [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                            return __vector_convert<_To>(
+                                     _SimdWrapper<int, 4>(__vvvv[__i] >> 24));
+                          });
                else if constexpr (is_integral_v<_ToT>)
-                 return __generate_from_n_evaluations<_Np, _R>([&](auto __i) {
-                   const auto __signbits = __to_intrin(__vvvv[__i / 2] >> 31);
-                   const auto __sx32 = __to_intrin(__vvvv[__i / 2] >> 24);
-                   return __vector_bitcast<_ToT>(
-                     __i % 2 == 0 ? _mm_unpacklo_epi32(__sx32, __signbits)
-                                  : _mm_unpackhi_epi32(__sx32, __signbits));
-                 });
+                 return __generate_from_n_evaluations<_Np, _R>(
+                          [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                            const auto __signbits = __to_intrin(__vvvv[__i / 2] >> 31);
+                            const auto __sx32 = __to_intrin(__vvvv[__i / 2] >> 24);
+                            return __vector_bitcast<_ToT>(
+                                     __i % 2 == 0 ? _mm_unpacklo_epi32(__sx32, __signbits)
+                                                  : _mm_unpackhi_epi32(__sx32, __signbits));
+                          });
                else
-                 return __generate_from_n_evaluations<_Np, _R>([&](auto __i) {
-                   const _SimdWrapper<int, 4> __int4 = __vvvv[__i / 2] >> 24;
-                   return __vector_convert<_To>(
-                     __i % 2 == 0 ? __int4
-                                  : _SimdWrapper<int, 4>(
-                                    _mm_unpackhi_epi64(__to_intrin(__int4),
-                                                       __to_intrin(__int4))));
-                 });
+                 return __generate_from_n_evaluations<_Np, _R>(
+                          [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                            const _SimdWrapper<int, 4> __int4 = __vvvv[__i / 2] >> 24;
+                            return __vector_convert<_To>(
+                                     __i % 2 == 0 ? __int4
+                                                  : _SimdWrapper<int, 4>(
+                                                      _mm_unpackhi_epi64(__to_intrin(__int4),
+                                                                         __to_intrin(__int4))));
+                          });
              }
            else if constexpr (sizeof(_FromT) == 1 && sizeof(_ToT) == 4)
              {
                const auto __shorts = __convert_all<__vector_type16_t<
                  conditional_t<is_signed_v<_FromT>, short, unsigned short>>>(
                  __adjust(_SizeConstant<(_Np + 1) / 2 * 8>(), __v));
-               return __generate_from_n_evaluations<_Np, _R>([&](auto __i) {
-                 return __convert_all<_To>(__shorts[__i / 2])[__i % 2];
-               });
+               return __generate_from_n_evaluations<_Np, _R>(
+                        [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                          return __convert_all<_To>(__shorts[__i / 2])[__i % 2];
+                        });
              }
            else if constexpr (sizeof(_FromT) == 2 && sizeof(_ToT) == 8
                               && is_signed_v<_FromT> && is_integral_v<_ToT>)
@@ -736,9 +748,10 @@ template <typename _To,
                     __vector_bitcast<int>(
                       _mm_unpackhi_epi32(_mm_srai_epi32(__vv[1], 16),
                                          _mm_srai_epi32(__vv[1], 31)))};
-               return __generate_from_n_evaluations<_Np, _R>([&](auto __i) {
-                 return __vector_bitcast<_ToT>(__vvvv[__i]);
-               });
+               return __generate_from_n_evaluations<_Np, _R>(
+                        [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                          return __vector_bitcast<_ToT>(__vvvv[__i]);
+                        });
              }
            else if constexpr (sizeof(_FromT) <= 2 && sizeof(_ToT) == 8)
              {
@@ -747,9 +760,10 @@ template <typename _To,
                    is_signed_v<_FromT> || is_floating_point_v<_ToT>, int,
                    unsigned int>>>(
                    __adjust(_SizeConstant<(_Np + 1) / 2 * 4>(), __v));
-               return __generate_from_n_evaluations<_Np, _R>([&](auto __i) {
-                 return __convert_all<_To>(__ints[__i / 2])[__i % 2];
-               });
+               return __generate_from_n_evaluations<_Np, _R>(
+                        [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                          return __convert_all<_To>(__ints[__i / 2])[__i % 2];
+                        });
              }
            else
              __assert_unreachable<_To>();
@@ -779,14 +793,14 @@ template <typename _To,
                __extract_part<_Offset, _FromVT::_S_partial_width,
                               _ToVT::_S_full_size>(__v))};
            else
-             return __generate_from_n_evaluations<_Np, _R>([&](
-               auto __i) constexpr {
-               auto __part
-                 = __extract_part<__i * _ToVT::_S_full_size + _Offset,
-                                  _FromVT::_S_partial_width,
-                                  _ToVT::_S_full_size>(__v);
-               return __vector_convert<_To>(__part);
-             });
+             return __generate_from_n_evaluations<_Np, _R>(
+                      [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                        auto __part
+                          = __extract_part<__i * _ToVT::_S_full_size + _Offset,
+                                           _FromVT::_S_partial_width,
+                                           _ToVT::_S_full_size>(__v);
+                        return __vector_convert<_To>(__part);
+                      });
          }
        else if constexpr (_Offset == 0)
          return array<_To, 1>{__vector_convert<_To>(__v)};
@@ -1017,8 +1031,9 @@ template <int _UsedBytes>
        else
          {
            constexpr auto __size = _S_size<_Tp>;
-           _GLIBCXX_SIMD_USE_CONSTEXPR auto __r = __generate_vector<_UV>(
-             [](auto __i) constexpr { return __i < __size ? -1 : 0; });
+           _GLIBCXX_SIMD_USE_CONSTEXPR auto __r
+             = __generate_vector<_UV>([](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
+                                      { return __i < __size ? -1 : 0; });
            return __r;
          }
       }
@@ -1208,7 +1223,7 @@ template <int _UsedBytes>
            if constexpr (is_integral_v<typename _TVT::value_type>)
              return __x
                     | __generate_vector<_Tp, _S_full_size<_Tp>>(
-                      [](auto __i) -> _Tp {
+                      [](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA -> _Tp {
                         if (__i < _Np)
                           return 0;
                         else
@@ -1348,26 +1363,27 @@ struct _CommonImplBuiltin
        }
       else
        {
-         __execute_n_times<__div_roundup(_Np, 4)>([&](auto __i) {
-           constexpr int __offset = __i * 4;
-           constexpr int __remaining = _Np - __offset;
-           if constexpr (__remaining > 4 && __remaining <= 7)
-             {
-               const _ULLong __bool7
-                 = (__x.template _M_extract<__offset>()._M_to_bits()
-                    * 0x40810204081ULL)
-                   & 0x0101010101010101ULL;
-               _S_store<__remaining>(__bool7, __mem + __offset);
-             }
-           else if constexpr (__remaining >= 4)
-             {
-               int __bits = __x.template _M_extract<__offset>()._M_to_bits();
-               if constexpr (__remaining > 7)
-                 __bits &= 0xf;
-               const int __bool4 = (__bits * 0x204081) & 0x01010101;
-               _S_store<4>(__bool4, __mem + __offset);
-             }
-         });
+         __execute_n_times<__div_roundup(_Np, 4)>(
+           [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+             constexpr int __offset = __i * 4;
+             constexpr int __remaining = _Np - __offset;
+             if constexpr (__remaining > 4 && __remaining <= 7)
+               {
+                 const _ULLong __bool7
+                   = (__x.template _M_extract<__offset>()._M_to_bits()
+                        * 0x40810204081ULL)
+                       & 0x0101010101010101ULL;
+                 _S_store<__remaining>(__bool7, __mem + __offset);
+               }
+             else if constexpr (__remaining >= 4)
+               {
+                 int __bits = __x.template _M_extract<__offset>()._M_to_bits();
+                 if constexpr (__remaining > 7)
+                   __bits &= 0xf;
+                 const int __bool4 = (__bits * 0x204081) & 0x01010101;
+                 _S_store<4>(__bool4, __mem + __offset);
+               }
+           });
        }
     }
 
@@ -1434,13 +1450,13 @@ template <typename _Abi, typename>
       inline static constexpr _SimdMember<_Tp> _S_generator(_Fp&& __gen,
                                                            _TypeTag<_Tp>)
       {
-       return __generate_vector<_Tp, _S_full_size<_Tp>>([&](
-         auto __i) constexpr {
-         if constexpr (__i < _S_size<_Tp>)
-           return __gen(__i);
-         else
-           return 0;
-       });
+       return __generate_vector<_Tp, _S_full_size<_Tp>>(
+                [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                  if constexpr (__i < _S_size<_Tp>)
+                    return __gen(__i);
+                  else
+                    return 0;
+                });
       }
 
     // _S_load {{{2
@@ -1455,10 +1471,10 @@ template <typename _Abi, typename>
                                                                      : 16;
        constexpr size_t __bytes_to_load = sizeof(_Up) * _Np;
        if constexpr (sizeof(_Up) > 8)
-         return __generate_vector<_Tp, _SimdMember<_Tp>::_S_full_size>([&](
-           auto __i) constexpr {
-           return static_cast<_Tp>(__i < _Np ? __mem[__i] : 0);
-         });
+         return __generate_vector<_Tp, _SimdMember<_Tp>::_S_full_size>(
+                  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                    return static_cast<_Tp>(__i < _Np ? __mem[__i] : 0);
+                  });
        else if constexpr (is_same_v<_Up, _Tp>)
          return _CommonImpl::template _S_load<_Tp, _S_full_size<_Tp>,
                                               _Np * sizeof(_Tp)>(__mem);
@@ -1470,13 +1486,12 @@ template <typename _Abi, typename>
            constexpr size_t __n_loads = __bytes_to_load / __max_load_size;
            constexpr size_t __elements_per_load = _Np / __n_loads;
            return __call_with_n_evaluations<__n_loads>(
-             [](auto... __uncvted) {
-               return __convert<_SimdMember<_Tp>>(__uncvted...);
-             },
-             [&](auto __i) {
-               return _CommonImpl::template _S_load<_Up, __elements_per_load>(
-                 __mem + __i * __elements_per_load);
-             });
+                    [](auto... __uncvted) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                      return __convert<_SimdMember<_Tp>>(__uncvted...);
+                    }, [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                      return _CommonImpl::template _S_load<_Up, __elements_per_load>(
+                                                     __mem + __i * __elements_per_load);
+                    });
          }
        else if constexpr (__bytes_to_load % (__max_load_size / 2) == 0
                           && __max_load_size > 16)
@@ -1485,20 +1500,19 @@ template <typename _Abi, typename>
              = __bytes_to_load / (__max_load_size / 2);
            constexpr size_t __elements_per_load = _Np / __n_loads;
            return __call_with_n_evaluations<__n_loads>(
-             [](auto... __uncvted) {
-               return __convert<_SimdMember<_Tp>>(__uncvted...);
-             },
-             [&](auto __i) {
-               return _CommonImpl::template _S_load<_Up, __elements_per_load>(
-                 __mem + __i * __elements_per_load);
-             });
+                    [](auto... __uncvted) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                      return __convert<_SimdMember<_Tp>>(__uncvted...);
+                    }, [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                      return _CommonImpl::template _S_load<_Up, __elements_per_load>(
+                                                     __mem + __i * __elements_per_load);
+                    });
          }
        else // e.g. int[] -> <char, 9>
          return __call_with_subscripts(
-           __mem, make_index_sequence<_Np>(), [](auto... __args) {
-             return __vector_type_t<_Tp, _S_full_size<_Tp>>{
-               static_cast<_Tp>(__args)...};
-           });
+           __mem, make_index_sequence<_Np>(),
+                  [](auto... __args) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                    return __vector_type_t<_Tp, _S_full_size<_Tp>>{static_cast<_Tp>(__args)...};
+                  });
       }
 
     // _S_masked_load {{{2
@@ -1507,9 +1521,10 @@ template <typename _Abi, typename>
       _S_masked_load(_SimdWrapper<_Tp, _Np> __merge, _MaskMember<_Tp> __k,
                     const _Up* __mem) noexcept
       {
-       _BitOps::_S_bit_iteration(_MaskImpl::_S_to_bits(__k), [&](auto __i) {
-         __merge._M_set(__i, static_cast<_Tp>(__mem[__i]));
-       });
+       _BitOps::_S_bit_iteration(_MaskImpl::_S_to_bits(__k),
+                                 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                                   __merge._M_set(__i, static_cast<_Tp>(__mem[__i]));
+                                 });
        return __merge;
       }
 
@@ -1523,7 +1538,7 @@ template <typename _Abi, typename>
        constexpr size_t __max_store_size
          = _SuperImpl::template _S_max_store_size<_Up>;
        if constexpr (sizeof(_Up) > 8)
-         __execute_n_times<_Np>([&](auto __i) constexpr {
+         __execute_n_times<_Np>([&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
            __mem[__i] = __v[__i];
          });
        else if constexpr (is_same_v<_Up, _Tp>)
@@ -1540,9 +1555,10 @@ template <typename _Abi, typename>
            using _V = __vector_type_t<_Up, __vsize>;
            const array<_V, __stores> __converted
              = __convert_all<_V, __stores>(__v);
-           __execute_n_times<__full_stores>([&](auto __i) constexpr {
-             _CommonImpl::_S_store(__converted[__i], __mem + __i * __vsize);
-           });
+           __execute_n_times<__full_stores>(
+             [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+               _CommonImpl::_S_store(__converted[__i], __mem + __i * __vsize);
+             });
            if constexpr (__full_stores < __stores)
              _CommonImpl::template _S_store<(_Np - __full_stores * __vsize)
                                             * sizeof(_Up)>(
@@ -1557,7 +1573,8 @@ template <typename _Abi, typename>
                            _MaskMember<_Tp> __k)
       {
        _BitOps::_S_bit_iteration(
-         _MaskImpl::_S_to_bits(__k), [&](auto __i) constexpr {
+         _MaskImpl::_S_to_bits(__k),
+         [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
            __mem[__i] = __v[__i];
          });
       }
@@ -1579,7 +1596,7 @@ template <typename _Abi, typename>
            _Up> || (is_integral_v<_Tp> && is_integral_v<_Up> && sizeof(_Tp) == sizeof(_Up)))
          {
            // bitwise or no conversion, reinterpret:
-           const _MaskMember<_Up> __kk = [&]() {
+           const _MaskMember<_Up> __kk = [&]() _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
              if constexpr (__is_bitmask_v<decltype(__k)>)
                return _MaskMember<_Up>(__k._M_data);
              else
@@ -1618,7 +1635,7 @@ template <typename _Abi, typename>
                constexpr size_t _NParts = _S_full_size<_Tp> / _UW_size;
                const array<_UV, _NAllStores> __converted
                  = __convert_all<_UV, _NAllStores>(__v);
-               __execute_n_times<_NFullStores>([&](auto __i) {
+               __execute_n_times<_NFullStores>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
                  _SuperImpl::_S_masked_store_nocvt(
                    _UW(__converted[__i]), __mem + __i * _UW_size,
                    _UAbi::_MaskImpl::template _S_convert<
@@ -1637,10 +1654,10 @@ template <typename _Abi, typename>
              }
          }
        else
-         _BitOps::_S_bit_iteration(
-           _MaskImpl::_S_to_bits(__k), [&](auto __i) constexpr {
-             __mem[__i] = static_cast<_Up>(__v[__i]);
-           });
+         _BitOps::_S_bit_iteration(_MaskImpl::_S_to_bits(__k),
+                                   [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                                     __mem[__i] = static_cast<_Up>(__v[__i]);
+                                   });
       }
 
     // _S_complement {{{2
@@ -1932,7 +1949,9 @@ template <typename _Abi, typename>
       static _Tp _S_##__name(const _Tp& __x, const _More&... __more)           \
       {                                                                        \
        return __generate_vector<_Tp>(                                         \
-         [&](auto __i) { return __name(__x[__i], __more[__i]...); });         \
+                [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {            \
+                  return __name(__x[__i], __more[__i]...);                    \
+                });                                                           \
       }
 
 #define _GLIBCXX_SIMD_MATH_FALLBACK_MASKRET(__name)                            \
@@ -1941,23 +1960,25 @@ template <typename _Abi, typename>
                                                 const _More&... __more)       \
       {                                                                        \
        return __generate_vector<_Tp>(                                         \
-         [&](auto __i) { return __name(__x[__i], __more[__i]...); });         \
-      }
-
-#define _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(_RetTp, __name)                   \
-    template <typename _Tp, typename... _More>                                 \
-      static auto _S_##__name(const _Tp& __x, const _More&... __more)          \
-      {                                                                        \
-       return __fixed_size_storage_t<_RetTp,                                  \
-                                     _VectorTraits<_Tp>::_S_partial_width>::  \
-         _S_generate([&](auto __meta) constexpr {                             \
-           return __meta._S_generator(                                        \
-             [&](auto __i) {                                                  \
-               return __name(__x[__meta._S_offset + __i],                     \
-                             __more[__meta._S_offset + __i]...);              \
-             },                                                               \
-             static_cast<_RetTp*>(nullptr));                                  \
-         });                                                                  \
+                [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {            \
+                  return __name(__x[__i], __more[__i]...);                    \
+                });                                                           \
+      }
+
+#define _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(_RetTp, __name)                          \
+    template <typename _Tp, typename... _More>                                        \
+      static auto _S_##__name(const _Tp& __x, const _More&... __more)                 \
+      {                                                                               \
+       return __fixed_size_storage_t<_RetTp,                                         \
+                                     _VectorTraits<_Tp>::_S_partial_width>::         \
+         _S_generate([&](auto __meta) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { \
+           return __meta._S_generator(                                               \
+             [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {                      \
+               return __name(__x[__meta._S_offset + __i],                            \
+                             __more[__meta._S_offset + __i]...);                     \
+             },                                                                      \
+             static_cast<_RetTp*>(nullptr));                                         \
+         });                                                                         \
       }
 
     _GLIBCXX_SIMD_MATH_FALLBACK(acos)
@@ -2010,7 +2031,7 @@ template <typename _Abi, typename>
       _S_remquo(const _Tp __x, const _Tp __y,
                __fixed_size_storage_t<int, _TVT::_S_partial_width>* __z)
       {
-       return __generate_vector<_Tp>([&](auto __i) {
+       return __generate_vector<_Tp>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
          int __tmp;
          auto __r = remquo(__x[__i], __y[__i], &__tmp);
          __z->_M_set(__i, __tmp);
@@ -2423,7 +2444,7 @@ template <typename _Abi, typename>
   #endif // _GLIBCXX_SIMD_X86INTRIN
       else if constexpr (__fixed_size_storage_t<int, _Np>::_S_tuple_size == 1)
        return {__call_with_subscripts<_Np>(__vector_bitcast<_LLong>(__tmp),
-                                           [](auto... __l) {
+                                           [](auto... __l) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
                                              return __make_wrapper<int>(__l...);
                                            })};
       else
@@ -2554,13 +2575,13 @@ struct _MaskImplBuiltinMixin
     _S_to_maskvector(_BitMask<_Np, _Sanitized> __x)
     {
       static_assert(is_same_v<_Up, __int_for_sizeof_t<_Up>>);
-      return __generate_vector<__vector_type_t<_Up, _ToN>>([&](
-       auto __i) constexpr {
-       if constexpr (__i < _Np)
-         return __x[__i] ? ~_Up() : _Up();
-       else
-         return _Up();
-      });
+      return __generate_vector<__vector_type_t<_Up, _ToN>>(
+              [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                if constexpr (__i < _Np)
+                  return __x[__i] ? ~_Up() : _Up();
+                else
+                  return _Up();
+              });
     }
 
   template <typename _Up, size_t _UpN = 0, typename _Tp, size_t _Np,
@@ -2601,13 +2622,13 @@ struct _MaskImplBuiltinMixin
          -1, -1, -1, -1, -1>(__y); else
          */
          {
-           return __generate_vector<__vector_type_t<_Up, _ToN>>([&](
-             auto __i) constexpr {
-             if constexpr (__i < _Np)
-               return _Up(__x[__i.value]);
-             else
-               return _Up();
-           });
+           return __generate_vector<__vector_type_t<_Up, _ToN>>(
+                    [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                      if constexpr (__i < _Np)
+                        return _Up(__x[__i.value]);
+                      else
+                        return _Up();
+                    });
          }
        }
     }
@@ -2625,7 +2646,9 @@ struct _MaskImplBuiltinMixin
        = __vector_bitcast<_Up>(__x) >> (sizeof(_Up) * __CHAR_BIT__ - 1);
       _ULLong __r = 0;
       __execute_n_times<_Np>(
-       [&](auto __i) { __r |= _ULLong(__bools[__i.value]) << __i; });
+       [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+         __r |= _ULLong(__bools[__i.value]) << __i;
+       });
       return __r;
     }
 
@@ -2677,9 +2700,10 @@ template <typename _Abi, typename>
            return __bools > 0;
          }
        else
-         return __generate_vector<_I, _S_size<_Tp>>([&](auto __i) constexpr {
-           return __mem[__i] ? ~_I() : _I();
-         });
+         return __generate_vector<_I, _S_size<_Tp>>(
+                  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                    return __mem[__i] ? ~_I() : _I();
+                  });
       }
 
     // }}}
@@ -2752,7 +2776,7 @@ template <typename _Abi, typename>
        // AVX(2) has 32/64 bit maskload, but nothing at 8 bit granularity
        auto __tmp = __wrapper_bitcast<__int_for_sizeof_t<_Tp>>(__merge);
        _BitOps::_S_bit_iteration(_SuperImpl::_S_to_bits(__mask),
-                                 [&](auto __i) {
+                                 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
                                    __tmp._M_set(__i, -__mem[__i]);
                                  });
        __merge = __wrapper_bitcast<_Tp>(__tmp);
@@ -2764,7 +2788,7 @@ template <typename _Abi, typename>
       _GLIBCXX_SIMD_INTRINSIC static void _S_store(_SimdWrapper<_Tp, _Np> __v,
                                                   bool* __mem) noexcept
       {
-       __execute_n_times<_Np>([&](auto __i) constexpr {
+       __execute_n_times<_Np>([&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
          __mem[__i] = __v[__i];
        });
       }
@@ -2775,10 +2799,10 @@ template <typename _Abi, typename>
       _S_masked_store(const _SimdWrapper<_Tp, _Np> __v, bool* __mem,
                      const _SimdWrapper<_Tp, _Np> __k) noexcept
       {
-       _BitOps::_S_bit_iteration(
-         _SuperImpl::_S_to_bits(__k), [&](auto __i) constexpr {
-           __mem[__i] = __v[__i];
-         });
+       _BitOps::_S_bit_iteration(_SuperImpl::_S_to_bits(__k),
+                                 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                                   __mem[__i] = __v[__i];
+                                 });
       }
 
     // _S_from_bitmask{{{2
@@ -2845,7 +2869,7 @@ template <typename _Abi, typename>
              {
                __k = __generate_from_n_evaluations<_Np,
                                                    __vector_type_t<_Tp, _Np>>(
-                 [&](auto __j) {
+                 [&](auto __j) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
                    if (__i == __j)
                      return _Tp(-__x);
                    else
@@ -2890,7 +2914,8 @@ template <typename _Abi, typename>
       {
        return __call_with_subscripts(
          __data(__k), make_index_sequence<_S_size<_Tp>>(),
-         [](const auto... __ent) constexpr { return (... && !(__ent == 0)); });
+         [](const auto... __ent) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
+         { return (... && !(__ent == 0)); });
       }
 
     // }}}
@@ -2901,7 +2926,8 @@ template <typename _Abi, typename>
       {
        return __call_with_subscripts(
          __data(__k), make_index_sequence<_S_size<_Tp>>(),
-         [](const auto... __ent) constexpr { return (... || !(__ent == 0)); });
+         [](const auto... __ent) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
+         { return (... || !(__ent == 0)); });
       }
 
     // }}}
@@ -2912,7 +2938,8 @@ template <typename _Abi, typename>
       {
        return __call_with_subscripts(
          __data(__k), make_index_sequence<_S_size<_Tp>>(),
-         [](const auto... __ent) constexpr { return (... && (__ent == 0)); });
+         [](const auto... __ent) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
+         { return (... && (__ent == 0)); });
       }
 
     // }}}
index 00b91c099ee70244e7f81298c5aa9e2aa686e1e6..3160e25163209df5a614c295c9d023b03a69ac40 100644 (file)
@@ -121,7 +121,7 @@ template <typename _From, typename _To, int _Np>
        {
          return __call_with_subscripts(
            __x, make_index_sequence<_Np>(),
-           [](auto... __values) constexpr->_Ret {
+           [](auto... __values) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA -> _Ret {
              return __make_simd_tuple<_To, decltype((void) __values,
                                                     simd_abi::scalar())...>(
                static_cast<_To>(__values)...);
@@ -233,7 +233,9 @@ template <typename _From, typename _To, int _Np>
          static_assert(_Ret::_FirstAbi::template _S_is_partial<_To>);
          return _Ret{__generate_from_n_evaluations<
            _Np, typename _VectorTraits<typename _Ret::_FirstType>::type>(
-           [&](auto __i) { return static_cast<_To>(__x[__i]); })};
+           [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+             return static_cast<_To>(__x[__i]);
+           })};
        }
       else
        {
@@ -241,7 +243,7 @@ template <typename _From, typename _To, int _Np>
          constexpr auto __n
            = __div_roundup(_Ret::_S_first_size, _Arg::_S_first_size);
          return __call_with_n_evaluations<__n>(
-           [&__x](auto... __uncvted) {
+           [&__x](auto... __uncvted) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
              // assuming _Arg Abi tags for all __i are _Arg::_FirstAbi
              _SimdConverter<_From, typename _Arg::_FirstAbi, _To,
                             typename _Ret::_FirstAbi>
@@ -255,8 +257,9 @@ template <typename _From, typename _To, int _Np>
                    _From, simd_abi::fixed_size<_Np - _Ret::_S_first_size>, _To,
                    simd_abi::fixed_size<_Np - _Ret::_S_first_size>>()(
                    __simd_tuple_pop_front<_Ret::_S_first_size>(__x))};
-           },
-           [&__x](auto __i) { return __get_tuple_at<__i>(__x); });
+           }, [&__x](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+             return __get_tuple_at<__i>(__x);
+           });
        }
     }
   };
@@ -322,13 +325,14 @@ template <typename _From, int _Np, typename _To, typename _Ap>
        return __vector_convert<__vector_type_t<_To, _Np>>(__x.first);
       else if constexpr (_Arg::_S_is_homogeneous)
        return __call_with_n_evaluations<_Arg::_S_tuple_size>(
-         [](auto... __members) {
+         [](auto... __members) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
            if constexpr ((is_convertible_v<decltype(__members), _To> && ...))
              return __vector_type_t<_To, _Np>{static_cast<_To>(__members)...};
            else
              return __vector_convert<__vector_type_t<_To, _Np>>(__members...);
-         },
-         [&](auto __i) { return __get_tuple_at<__i>(__x); });
+         }, [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+           return __get_tuple_at<__i>(__x);
+         });
       else if constexpr (__fixed_size_storage_t<_To, _Np>::_S_tuple_size == 1)
        {
          _SimdConverter<_From, simd_abi::fixed_size<_Np>, _To,
@@ -340,7 +344,7 @@ template <typename _From, int _Np, typename _To, typename _Ap>
        {
          const _SimdWrapper<_From, _Np> __xv
            = __generate_from_n_evaluations<_Np, __vector_type_t<_From, _Np>>(
-             [&](auto __i) { return __x[__i]; });
+               [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
          return __vector_convert<__vector_type_t<_To, _Np>>(__xv);
        }
     }
index 8cabc504863f499264377874dcc6c5ae375f0a0f..a0ad10efe0f6adb0604fd3db367d7867601bc7e4 100644 (file)
 #define _GLIBCXX_SIMD_INTRINSIC                                                \
   [[__gnu__::__always_inline__, __gnu__::__artificial__]] inline
 #define _GLIBCXX_SIMD_ALWAYS_INLINE [[__gnu__::__always_inline__]] inline
+#define _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA __attribute__((__always_inline__))
 #define _GLIBCXX_SIMD_IS_UNLIKELY(__x) __builtin_expect(__x, 0)
 #define _GLIBCXX_SIMD_IS_LIKELY(__x) __builtin_expect(__x, 1)
 
 #ifdef _GLIBCXX_SIMD_NO_ALWAYS_INLINE
 #undef _GLIBCXX_SIMD_ALWAYS_INLINE
 #define _GLIBCXX_SIMD_ALWAYS_INLINE inline
+#undef _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
+#define _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
 #undef _GLIBCXX_SIMD_INTRINSIC
 #define _GLIBCXX_SIMD_INTRINSIC inline
 #endif
index 9ecc8e521caedcb588841227f2138269927212aa..3ac6eaa3f6bf5edfbf8301b689aa1ed9522ec9a1 100644 (file)
@@ -434,14 +434,15 @@ template <typename _Tp, typename _Abi0, typename... _Abis>
        if constexpr (is_same_v<_SimdTuple, __remove_cvref_t<_Tup>>)
          return __tup.first;
        else if (__builtin_is_constant_evaluated())
-         return __fixed_size_storage_t<_TupT, _S_first_size>::_S_generate([&](
-           auto __meta) constexpr {
-           return __meta._S_generator(
-             [&](auto __i) constexpr { return __tup[__i]; },
-             static_cast<_TupT*>(nullptr));
+         return __fixed_size_storage_t<_TupT, _S_first_size>::_S_generate(
+                  [&](auto __meta) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                    return __meta._S_generator(
+                             [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                               return __tup[__i];
+                             }, static_cast<_TupT*>(nullptr));
          });
        else
-         return [&]() {
+         return [&]() { // not always_inline; allow the compiler to decide
            __fixed_size_storage_t<_TupT, _S_first_size> __r;
            __builtin_memcpy(__r._M_as_charptr(), __tup._M_as_charptr(),
                             sizeof(__r));
@@ -515,12 +516,11 @@ template <typename _Tp, typename _Abi0, typename... _Abis>
                         negation<is_const<remove_reference_t<_More>>>>) )
          {
            // need to write back at least one of __more after calling __fun
-           auto&& __first = [&](auto... __args) constexpr
-           {
+           auto&& __first = [&](auto... __args) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
              auto __r = __fun(__tuple_element_meta<_Tp, _Abi0, 0>(), first,
                               __args...);
              [[maybe_unused]] auto&& __ignore_me = {(
-               [](auto&& __dst, const auto& __src) {
+               [](auto&& __dst, const auto& __src) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
                  if constexpr (is_assignable_v<decltype(__dst),
                                                decltype(__dst)>)
                    {
@@ -530,8 +530,7 @@ template <typename _Tp, typename _Abi0, typename... _Abis>
                }(static_cast<_More&&>(__more), __args),
                0)...};
              return __r;
-           }
-           (_M_extract_argument(__more)...);
+           }(_M_extract_argument(__more)...);
            if constexpr (_S_tuple_size == 1)
              return {__first};
            else
@@ -776,18 +775,18 @@ template <typename _Tp, size_t _Np, typename _V, size_t _NV, typename... _VX>
          sizeof...(_VX) == 0,
          "An array of scalars must be the last argument to __to_simd_tuple");
        return __call_with_subscripts(
-         __from,
-         make_index_sequence<_NV>(), [&](const auto... __args) constexpr {
-           return __simd_tuple_concat(
-             _SimdTuple<_Tp, simd_abi::scalar>{__args}..., _SimdTuple<_Tp>());
-         });
+                __from, make_index_sequence<_NV>(),
+                [&](const auto... __args) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                  return __simd_tuple_concat(
+                           _SimdTuple<_Tp, simd_abi::scalar>{__args}..., _SimdTuple<_Tp>());
+                });
       }
     else
       return __call_with_subscripts(
-       __from,
-       make_index_sequence<_NV>(), [&](const auto... __args) constexpr {
-         return __to_simd_tuple<_Tp, _Np>(__args..., __fromX...);
-       });
+              __from, make_index_sequence<_NV>(),
+              [&](const auto... __args) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                return __to_simd_tuple<_Tp, _Np>(__args..., __fromX...);
+              });
   }
 
 template <size_t, typename _Tp>
@@ -841,7 +840,7 @@ template <typename _Tp, typename _A0, typename _A1, typename... _Abis,
                       || _A0::template _S_is_partial<_Tp>)
       return {__generate_from_n_evaluations<_R::_S_first_size,
                                            typename _R::_FirstType>(
-               [&](auto __i) { return __x[__i]; }),
+               [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; }),
              __optimize_simd_tuple(
                __simd_tuple_pop_front<_R::_S_first_size>(__x))};
     else if constexpr (is_same_v<_A0, _A1>
@@ -994,10 +993,11 @@ template <int _Index, int _Total, int _Combine, typename _Tp, typename _A0,
        return __as_vector(simd<_Tp, _RetAbi>(element_ptr, element_aligned));
 #else
        [[maybe_unused]] constexpr size_t __offset = __values_to_skip;
-       return __as_vector(simd<_Tp, _RetAbi>([&](auto __i) constexpr {
-         constexpr _SizeConstant<__i + __offset> __k;
-         return __x[__k];
-       }));
+       return __as_vector(simd<_Tp, _RetAbi>(
+                            [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                              constexpr _SizeConstant<__i + __offset> __k;
+                              return __x[__k];
+                            }));
 #endif
       }
 
@@ -1286,9 +1286,10 @@ template <int _Np, typename>
     template <typename _Tp>
       static constexpr inline _SimdMember<_Tp> _S_broadcast(_Tp __x) noexcept
       {
-       return _SimdMember<_Tp>::_S_generate([&](auto __meta) constexpr {
-         return __meta._S_broadcast(__x);
-       });
+       return _SimdMember<_Tp>::_S_generate(
+                [&](auto __meta) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                  return __meta._S_broadcast(__x);
+                });
       }
 
     // _S_generator {{{2
@@ -1296,14 +1297,15 @@ template <int _Np, typename>
       static constexpr inline _SimdMember<_Tp> _S_generator(_Fp&& __gen,
                                                            _TypeTag<_Tp>)
       {
-       return _SimdMember<_Tp>::_S_generate([&__gen](auto __meta) constexpr {
-         return __meta._S_generator(
-           [&](auto __i) constexpr {
-             return __i < _Np ? __gen(_SizeConstant<__meta._S_offset + __i>())
-                              : 0;
-           },
-           _TypeTag<_Tp>());
-       });
+       return _SimdMember<_Tp>::_S_generate(
+                [&__gen](auto __meta) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                  return __meta._S_generator(
+                           [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                             return __i < _Np ? __gen(_SizeConstant<__meta._S_offset + __i>())
+                                              : 0;
+                           },
+                           _TypeTag<_Tp>());
+                });
       }
 
     // _S_load {{{2
@@ -1311,9 +1313,10 @@ template <int _Np, typename>
       static inline _SimdMember<_Tp> _S_load(const _Up* __mem,
                                             _TypeTag<_Tp>) noexcept
       {
-       return _SimdMember<_Tp>::_S_generate([&](auto __meta) {
-         return __meta._S_load(&__mem[__meta._S_offset], _TypeTag<_Tp>());
-       });
+       return _SimdMember<_Tp>::_S_generate(
+                [&](auto __meta) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                  return __meta._S_load(&__mem[__meta._S_offset], _TypeTag<_Tp>());
+                });
       }
 
     // _S_masked_load {{{2
@@ -1323,7 +1326,7 @@ template <int _Np, typename>
                     const _MaskMember __bits, const _Up* __mem) noexcept
       {
        auto __merge = __old;
-       __for_each(__merge, [&](auto __meta, auto& __native) {
+       __for_each(__merge, [&](auto __meta, auto& __native) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
          if (__meta._S_submask(__bits).any())
 #pragma GCC diagnostic push
          // __mem + __mem._S_offset could be UB ([expr.add]/4.3, but it punts
@@ -1344,7 +1347,7 @@ template <int _Np, typename>
       static inline void _S_store(const _SimdMember<_Tp>& __v, _Up* __mem,
                                  _TypeTag<_Tp>) noexcept
       {
-       __for_each(__v, [&](auto __meta, auto __native) {
+       __for_each(__v, [&](auto __meta, auto __native) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
          __meta._S_store(__native, &__mem[__meta._S_offset], _TypeTag<_Tp>());
        });
       }
@@ -1355,7 +1358,7 @@ template <int _Np, typename>
                                         _Up* __mem,
                                         const _MaskMember __bits) noexcept
       {
-       __for_each(__v, [&](auto __meta, auto __native) {
+       __for_each(__v, [&](auto __meta, auto __native) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
          if (__meta._S_submask(__bits).any())
 #pragma GCC diagnostic push
          // __mem + __mem._S_offset could be UB ([expr.add]/4.3, but it punts
@@ -1376,7 +1379,7 @@ template <int _Np, typename>
       {
        _MaskMember __bits = 0;
        __for_each(
-         __x, [&__bits](auto __meta, auto __native) constexpr {
+         __x, [&__bits](auto __meta, auto __native) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
            __bits
              |= __meta._S_mask_to_shifted_ullong(__meta._S_negate(__native));
          });
@@ -1414,7 +1417,7 @@ template <int _Np, typename>
          {
            const auto& __x2 = __call_with_n_evaluations<
              __div_roundup(_Tup::_S_tuple_size, 2)>(
-             [](auto __first_simd, auto... __remaining) {
+             [](auto __first_simd, auto... __remaining) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
                if constexpr (sizeof...(__remaining) == 0)
                  return __first_simd;
                else
@@ -1428,7 +1431,7 @@ template <int _Np, typename>
                      __make_simd_tuple(__first_simd, __remaining...));
                  }
              },
-             [&](auto __i) {
+             [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
                auto __left = __tup.template _M_simd_at<2 * __i>();
                if constexpr (2 * __i + 1 == _Tup::_S_tuple_size)
                  return __left;
@@ -1444,7 +1447,9 @@ template <int _Np, typename>
                        _GLIBCXX_SIMD_USE_CONSTEXPR_API
                        typename _LT::mask_type __k(
                          __private_init,
-                         [](auto __j) constexpr { return __j < _RT::size(); });
+                         [](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                           return __j < _RT::size();
+                         });
                        _LT __ext_right = __left;
                        where(__k, __ext_right)
                          = __proposed::resizing_simd_cast<_LT>(__right);
@@ -1464,7 +1469,7 @@ template <int _Np, typename>
             const _SimdTuple<_Tp, _As...>& __b)
       {
        return __a._M_apply_per_chunk(
-         [](auto __impl, auto __aa, auto __bb) constexpr {
+         [](auto __impl, auto __aa, auto __bb) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
            return __impl._S_min(__aa, __bb);
          },
          __b);
@@ -1476,7 +1481,7 @@ template <int _Np, typename>
             const _SimdTuple<_Tp, _As...>& __b)
       {
        return __a._M_apply_per_chunk(
-         [](auto __impl, auto __aa, auto __bb) constexpr {
+         [](auto __impl, auto __aa, auto __bb) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
            return __impl._S_max(__aa, __bb);
          },
          __b);
@@ -1487,9 +1492,10 @@ template <int _Np, typename>
       static inline constexpr _SimdTuple<_Tp, _As...>
       _S_complement(const _SimdTuple<_Tp, _As...>& __x) noexcept
       {
-       return __x._M_apply_per_chunk([](auto __impl, auto __xx) constexpr {
-         return __impl._S_complement(__xx);
-       });
+       return __x._M_apply_per_chunk(
+                [](auto __impl, auto __xx) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                  return __impl._S_complement(__xx);
+                });
       }
 
     // _S_unary_minus {{{2
@@ -1497,23 +1503,24 @@ template <int _Np, typename>
       static inline constexpr _SimdTuple<_Tp, _As...>
       _S_unary_minus(const _SimdTuple<_Tp, _As...>& __x) noexcept
       {
-       return __x._M_apply_per_chunk([](auto __impl, auto __xx) constexpr {
-         return __impl._S_unary_minus(__xx);
-       });
+       return __x._M_apply_per_chunk(
+                [](auto __impl, auto __xx) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                  return __impl._S_unary_minus(__xx);
+                });
       }
 
     // arithmetic operators {{{2
 
-#define _GLIBCXX_SIMD_FIXED_OP(name_, op_)                                     \
-    template <typename _Tp, typename... _As>                                   \
-      static inline constexpr _SimdTuple<_Tp, _As...> name_(                   \
-       const _SimdTuple<_Tp, _As...>& __x, const _SimdTuple<_Tp, _As...>& __y)\
-      {                                                                        \
-       return __x._M_apply_per_chunk(                                         \
-         [](auto __impl, auto __xx, auto __yy) constexpr {                    \
-           return __impl.name_(__xx, __yy);                                   \
-         },                                                                   \
-         __y);                                                                \
+#define _GLIBCXX_SIMD_FIXED_OP(name_, op_)                                                     \
+    template <typename _Tp, typename... _As>                                                   \
+      static inline constexpr _SimdTuple<_Tp, _As...> name_(                                   \
+       const _SimdTuple<_Tp, _As...>& __x, const _SimdTuple<_Tp, _As...>& __y)                \
+      {                                                                                        \
+       return __x._M_apply_per_chunk(                                                         \
+         [](auto __impl, auto __xx, auto __yy) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { \
+           return __impl.name_(__xx, __yy);                                                   \
+         },                                                                                   \
+         __y);                                                                                \
       }
 
     _GLIBCXX_SIMD_FIXED_OP(_S_plus, +)
@@ -1532,18 +1539,20 @@ template <int _Np, typename>
       static inline constexpr _SimdTuple<_Tp, _As...>
       _S_bit_shift_left(const _SimdTuple<_Tp, _As...>& __x, int __y)
       {
-       return __x._M_apply_per_chunk([__y](auto __impl, auto __xx) constexpr {
-         return __impl._S_bit_shift_left(__xx, __y);
-       });
+       return __x._M_apply_per_chunk(
+                [__y](auto __impl, auto __xx) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                  return __impl._S_bit_shift_left(__xx, __y);
+                });
       }
 
     template <typename _Tp, typename... _As>
       static inline constexpr _SimdTuple<_Tp, _As...>
       _S_bit_shift_right(const _SimdTuple<_Tp, _As...>& __x, int __y)
       {
-       return __x._M_apply_per_chunk([__y](auto __impl, auto __xx) constexpr {
-         return __impl._S_bit_shift_right(__xx, __y);
-       });
+       return __x._M_apply_per_chunk(
+                [__y](auto __impl, auto __xx) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                  return __impl._S_bit_shift_right(__xx, __y);
+                });
       }
 
   // math {{{2
@@ -1557,35 +1566,40 @@ template <int _Np, typename>
          {                                                                    \
            if constexpr (is_same_v<_Tp, _RetTp>)                              \
              return __x._M_apply_per_chunk(                                   \
-               [](auto __impl, auto __xx) constexpr {                         \
-                 using _V = typename decltype(__impl)::simd_type;             \
-                 return __data(__name(_V(__private_init, __xx)));             \
-               });                                                            \
+                      [](auto __impl, auto __xx)                              \
+                        constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA          \
+                      {                                                       \
+                        using _V = typename decltype(__impl)::simd_type;      \
+                        return __data(__name(_V(__private_init, __xx)));      \
+                      });                                                     \
            else                                                               \
              return __optimize_simd_tuple(                                    \
-               __x.template _M_apply_r<_RetTp>([](auto __impl, auto __xx) {   \
-                 return __impl._S_##__name(__xx);                             \
-               }));                                                           \
+                      __x.template _M_apply_r<_RetTp>(                        \
+                        [](auto __impl, auto __xx)                            \
+                          _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA                  \
+                        { return __impl._S_##__name(__xx); }));               \
          }                                                                    \
        else if constexpr (                                                    \
          is_same_v<                                                           \
            _Tp,                                                               \
            _RetTp> && (... && is_same_v<_SimdTuple<_Tp, _As...>, _More>) )    \
          return __x._M_apply_per_chunk(                                       \
-           [](auto __impl, auto __xx, auto... __pack) constexpr {             \
-             using _V = typename decltype(__impl)::simd_type;                 \
-             return __data(__name(_V(__private_init, __xx),                   \
-                                  _V(__private_init, __pack)...));            \
-           },                                                                 \
-           __more...);                                                        \
+                  [](auto __impl, auto __xx, auto... __pack)                  \
+                    constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA              \
+                  {                                                           \
+                    using _V = typename decltype(__impl)::simd_type;          \
+                    return __data(__name(_V(__private_init, __xx),            \
+                                         _V(__private_init, __pack)...));     \
+                  }, __more...);                                              \
        else if constexpr (is_same_v<_Tp, _RetTp>)                             \
          return __x._M_apply_per_chunk(                                       \
-           [](auto __impl, auto __xx, auto... __pack) constexpr {             \
-             using _V = typename decltype(__impl)::simd_type;                 \
-             return __data(__name(_V(__private_init, __xx),                   \
-                                  __autocvt_to_simd(__pack)...));             \
-           },                                                                 \
-           __more...);                                                        \
+                  [](auto __impl, auto __xx, auto... __pack)                  \
+                    constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA              \
+                  {                                                           \
+                    using _V = typename decltype(__impl)::simd_type;          \
+                    return __data(__name(_V(__private_init, __xx),            \
+                                         __autocvt_to_simd(__pack)...));      \
+                  }, __more...);                                              \
        else                                                                   \
          __assert_unreachable<_Tp>();                                         \
       }
@@ -1657,10 +1671,10 @@ template <int _Np, typename>
        __fixed_size_storage_t<int, _SimdTuple<_Tp, _Abis...>::_S_size()>* __z)
       {
        return __x._M_apply_per_chunk(
-         [](auto __impl, const auto __xx, const auto __yy, auto& __zz) {
-           return __impl._S_remquo(__xx, __yy, &__zz);
-         },
-         __y, *__z);
+                [](auto __impl, const auto __xx, const auto __yy, auto& __zz)
+                  _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
+                { return __impl._S_remquo(__xx, __yy, &__zz); },
+                __y, *__z);
       }
 
     template <typename _Tp, typename... _As>
@@ -1669,12 +1683,10 @@ template <int _Np, typename>
               __fixed_size_storage_t<int, _Np>& __exp) noexcept
       {
        return __x._M_apply_per_chunk(
-         [](auto __impl, const auto& __a, auto& __b) {
-           return __data(
-             frexp(typename decltype(__impl)::simd_type(__private_init, __a),
-                   __autocvt_to_simd(__b)));
-         },
-         __exp);
+                [](auto __impl, const auto& __a, auto& __b) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                  return __data(frexp(typename decltype(__impl)::simd_type(__private_init, __a),
+                                      __autocvt_to_simd(__b)));
+                }, __exp);
       }
 
 #define _GLIBCXX_SIMD_TEST_ON_TUPLE_(name_)                                    \
@@ -1700,7 +1712,7 @@ template <int _Np, typename>
       _S_increment(_SimdTuple<_Ts...>& __x)
       {
        __for_each(
-         __x, [](auto __meta, auto& native) constexpr {
+         __x, [](auto __meta, auto& native) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
            __meta._S_increment(native);
          });
       }
@@ -1710,7 +1722,7 @@ template <int _Np, typename>
       _S_decrement(_SimdTuple<_Ts...>& __x)
       {
        __for_each(
-         __x, [](auto __meta, auto& native) constexpr {
+         __x, [](auto __meta, auto& native) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
            __meta._S_decrement(native);
          });
       }
@@ -1722,11 +1734,10 @@ template <int _Np, typename>
       __cmp(const _SimdTuple<_Tp, _As...>& __x,                                \
            const _SimdTuple<_Tp, _As...>& __y)                                \
       {                                                                        \
-       return _M_test(                                                        \
-         [](auto __impl, auto __xx, auto __yy) constexpr {                    \
-           return __impl.__cmp(__xx, __yy);                                   \
-         },                                                                   \
-         __x, __y);                                                           \
+       return _M_test([](auto __impl, auto __xx, auto __yy)                   \
+                        constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA          \
+                      { return __impl.__cmp(__xx, __yy); },                   \
+                      __x, __y);                                              \
       }
 
     _GLIBCXX_SIMD_CMP_OPERATIONS(_S_equal_to)
@@ -1753,12 +1764,13 @@ template <int _Np, typename>
       _S_masked_assign(const _MaskMember __bits, _SimdTuple<_Tp, _As...>& __lhs,
                       const __type_identity_t<_SimdTuple<_Tp, _As...>>& __rhs)
       {
-       __for_each(
-         __lhs, __rhs,
-         [&](auto __meta, auto& __native_lhs, auto __native_rhs) constexpr {
-           __meta._S_masked_assign(__meta._S_make_mask(__bits), __native_lhs,
-                                   __native_rhs);
-         });
+       __for_each(__lhs, __rhs,
+                  [&](auto __meta, auto& __native_lhs, auto __native_rhs)
+                    constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
+                  {
+                    __meta._S_masked_assign(__meta._S_make_mask(__bits), __native_lhs,
+                                            __native_rhs);
+                  });
       }
 
     // Optimization for the case where the RHS is a scalar. No need to broadcast
@@ -1769,7 +1781,7 @@ template <int _Np, typename>
                       const __type_identity_t<_Tp> __rhs)
       {
        __for_each(
-         __lhs, [&](auto __meta, auto& __native_lhs) constexpr {
+         __lhs, [&](auto __meta, auto& __native_lhs) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
            __meta._S_masked_assign(__meta._S_make_mask(__bits), __native_lhs,
                                    __rhs);
          });
@@ -1782,12 +1794,13 @@ template <int _Np, typename>
                                           const _SimdTuple<_Tp, _As...>& __rhs,
                                           _Op __op)
       {
-       __for_each(
-         __lhs, __rhs,
-         [&](auto __meta, auto& __native_lhs, auto __native_rhs) constexpr {
-           __meta.template _S_masked_cassign(__meta._S_make_mask(__bits),
-                                             __native_lhs, __native_rhs, __op);
-         });
+       __for_each(__lhs, __rhs,
+                  [&](auto __meta, auto& __native_lhs, auto __native_rhs)
+                    constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
+                  {
+                    __meta.template _S_masked_cassign(__meta._S_make_mask(__bits),
+                                                      __native_lhs, __native_rhs, __op);
+                  });
       }
 
     // Optimization for the case where the RHS is a scalar. No need to broadcast
@@ -1798,7 +1811,7 @@ template <int _Np, typename>
                                           const _Tp& __rhs, _Op __op)
       {
        __for_each(
-         __lhs, [&](auto __meta, auto& __native_lhs) constexpr {
+         __lhs, [&](auto __meta, auto& __native_lhs) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
            __meta.template _S_masked_cassign(__meta._S_make_mask(__bits),
                                              __native_lhs, __rhs, __op);
          });
@@ -1899,7 +1912,7 @@ template <int _Np, typename>
       // _Np _UShort, _UInt, _ULLong, float, and double can be more efficient.
       _ULLong __r = 0;
       using _Vs = __fixed_size_storage_t<_UChar, _Np>;
-      __for_each(_Vs{}, [&](auto __meta, auto) {
+      __for_each(_Vs{}, [&](auto __meta, auto) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
        __r |= __meta._S_mask_to_shifted_ullong(
          __meta._S_mask_impl._S_load(&__mem[__meta._S_offset],
                                      _SizeConstant<__meta._S_size()>()));
@@ -1912,9 +1925,10 @@ template <int _Np, typename>
                                             _MaskMember __mask,
                                             const bool* __mem) noexcept
     {
-      _BitOps::_S_bit_iteration(__mask.to_ullong(), [&](auto __i) {
-       __merge.set(__i, __mem[__i]);
-      });
+      _BitOps::_S_bit_iteration(__mask.to_ullong(),
+                               [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                                 __merge.set(__i, __mem[__i]);
+                               });
       return __merge;
     }
 
@@ -1932,7 +1946,8 @@ template <int _Np, typename>
     static inline void _S_masked_store(const _MaskMember __v, bool* __mem,
                                       const _MaskMember __k) noexcept
     {
-      _BitOps::_S_bit_iteration(__k, [&](auto __i) { __mem[__i] = __v[__i]; });
+      _BitOps::_S_bit_iteration(
+       __k, [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { __mem[__i] = __v[__i]; });
     }
 
     // logical and bitwise operators {{{2
index 2aff8ff5fa4a5dbf38c04e4068d762acebae2b5a..c20315e4e3047a818874d49211c1c576f562ae99 100644 (file)
@@ -788,7 +788,7 @@ template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
 
        // __exponent(__x) returns the exponent value (bias removed) as
        // simd<_Up> with integral _Up
-       auto&& __exponent = [](const _V& __v) {
+       auto&& __exponent = [](const _V& __v) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
          using namespace std::experimental::__proposed;
          using _IV = rebind_simd_t<
            conditional_t<sizeof(_Tp) == sizeof(_LLong), _LLong, int>, _V>;
@@ -931,7 +931,7 @@ template <typename _R, typename _ToApply, typename _Tp, typename... _Tps>
   {
     return {__private_init,
            __data(__arg0)._M_apply_per_chunk(
-             [&](auto __impl, const auto&... __inner) {
+             [&](auto __impl, const auto&... __inner) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
                using _V = typename decltype(__impl)::simd_type;
                return __data(__apply(_V(__private_init, __inner)...));
              },
@@ -1092,8 +1092,9 @@ _GLIBCXX_SIMD_CVTING2(hypot)
     if constexpr (__is_fixed_size_abi_v<_Abi> && _V::size() > 1)
       {
        return __fixed_size_apply<simd<_Tp, _Abi>>(
-         [](auto __a, auto __b, auto __c) { return hypot(__a, __b, __c); },
-         __x, __y, __z);
+                [](auto __a, auto __b, auto __c) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                  return hypot(__a, __b, __c);
+                }, __x, __y, __z);
       }
     else
       {
@@ -1380,9 +1381,9 @@ template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
                 const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
                 const simd<_Tp, _Abi>& __x)
   {
-    return simd<_Tp, _Abi>([&](auto __i) {
-      return std::assoc_laguerre(__n[__i], __m[__i], __x[__i]);
-    });
+    return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+            return std::assoc_laguerre(__n[__i], __m[__i], __x[__i]);
+          });
   }
 
 template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
@@ -1391,9 +1392,9 @@ template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
                 const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
                 const simd<_Tp, _Abi>& __x)
   {
-    return simd<_Tp, _Abi>([&](auto __i) {
-      return std::assoc_legendre(__n[__i], __m[__i], __x[__i]);
-    });
+    return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+            return std::assoc_legendre(__n[__i], __m[__i], __x[__i]);
+          });
   }
 
 _GLIBCXX_SIMD_MATH_CALL2_(beta, _Tp)
@@ -1414,8 +1415,9 @@ template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
   hermite(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
          const simd<_Tp, _Abi>& __x)
   {
-    return simd<_Tp, _Abi>(
-      [&](auto __i) { return std::hermite(__n[__i], __x[__i]); });
+    return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+            return std::hermite(__n[__i], __x[__i]);
+          });
   }
 
 template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
@@ -1423,8 +1425,9 @@ template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
   laguerre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
           const simd<_Tp, _Abi>& __x)
   {
-    return simd<_Tp, _Abi>(
-      [&](auto __i) { return std::laguerre(__n[__i], __x[__i]); });
+    return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+            return std::laguerre(__n[__i], __x[__i]);
+          });
   }
 
 template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
@@ -1432,8 +1435,9 @@ template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
   legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
           const simd<_Tp, _Abi>& __x)
   {
-    return simd<_Tp, _Abi>(
-      [&](auto __i) { return std::legendre(__n[__i], __x[__i]); });
+    return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+            return std::legendre(__n[__i], __x[__i]);
+          });
   }
 
 _GLIBCXX_SIMD_MATH_CALL_(riemann_zeta)
@@ -1443,8 +1447,9 @@ template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
   sph_bessel(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
             const simd<_Tp, _Abi>& __x)
   {
-    return simd<_Tp, _Abi>(
-      [&](auto __i) { return std::sph_bessel(__n[__i], __x[__i]); });
+    return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+            return std::sph_bessel(__n[__i], __x[__i]);
+          });
   }
 
 template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
@@ -1453,9 +1458,9 @@ template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
               const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
               const simd<_Tp, _Abi>& theta)
   {
-    return simd<_Tp, _Abi>([&](auto __i) {
-      return std::assoc_legendre(__l[__i], __m[__i], theta[__i]);
-    });
+    return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+            return std::assoc_legendre(__l[__i], __m[__i], theta[__i]);
+          });
   }
 
 template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
@@ -1463,8 +1468,9 @@ template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
   sph_neumann(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
              const simd<_Tp, _Abi>& __x)
   {
-    return simd<_Tp, _Abi>(
-      [&](auto __i) { return std::sph_neumann(__n[__i], __x[__i]); });
+    return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+            return std::sph_neumann(__n[__i], __x[__i]);
+          });
   }
 // }}}
 
index 8429c252196547a3d1a9b37132631477e7e4f61c..7e4cb17b205d882c890e7292370fccdeb607f32b 100644 (file)
@@ -61,7 +61,7 @@ template <typename _Abi, typename>
       _S_masked_load(_SimdWrapper<_Tp, _Np> __merge, _MaskMember<_Tp> __k,
                     const _Up* __mem) noexcept
       {
-       __execute_n_times<_Np>([&](auto __i) {
+       __execute_n_times<_Np>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
          if (__k[__i] != 0)
            __merge._M_set(__i, static_cast<_Tp>(__mem[__i]));
        });
@@ -75,7 +75,7 @@ template <typename _Abi, typename>
       _S_masked_store_nocvt(_SimdWrapper<_Tp, _Np> __v, _Tp* __mem,
                            _MaskMember<_Tp> __k)
       {
-       __execute_n_times<_Np>([&](auto __i) {
+       __execute_n_times<_Np>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
          if (__k[__i] != 0)
            __mem[__i] = __v[__i];
        });
@@ -286,7 +286,7 @@ struct _MaskImplNeonMixin
            {
              constexpr auto __bitsel
                = __generate_from_n_evaluations<16, __vector_type_t<_I, 16>>(
-                 [&](auto __i) {
+                 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
                    return static_cast<_I>(
                      __i < _Np ? (__i < 8 ? 1 << __i : 1 << (__i - 8)) : 0);
                  });
@@ -306,7 +306,7 @@ struct _MaskImplNeonMixin
            {
              constexpr auto __bitsel
                = __generate_from_n_evaluations<8, __vector_type_t<_I, 8>>(
-                 [&](auto __i) {
+                 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
                    return static_cast<_I>(__i < _Np ? 1 << __i : 0);
                  });
              __asint &= __bitsel;
@@ -322,7 +322,7 @@ struct _MaskImplNeonMixin
            {
              constexpr auto __bitsel
                = __generate_from_n_evaluations<4, __vector_type_t<_I, 4>>(
-                 [&](auto __i) {
+                 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
                    return static_cast<_I>(__i < _Np ? 1 << __i : 0);
                  });
              __asint &= __bitsel;
@@ -346,7 +346,7 @@ struct _MaskImplNeonMixin
            {
              constexpr auto __bitsel
                = __generate_from_n_evaluations<8, __vector_type_t<_I, 8>>(
-                 [&](auto __i) {
+                 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
                    return static_cast<_I>(__i < _Np ? 1 << __i : 0);
                  });
              __asint &= __bitsel;
@@ -361,7 +361,7 @@ struct _MaskImplNeonMixin
            {
              constexpr auto __bitsel
                = __generate_from_n_evaluations<4, __vector_type_t<_I, 4>>(
-                 [&](auto __i) {
+                 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
                    return static_cast<_I>(__i < _Np ? 1 << __i : 0);
                  });
              __asint &= __bitsel;
index 0f4aa95e1a46f580977fc3d76f210cf75e48f802..60e80d394ba2cd36ae3cf44f08498a1b65d3bd7d 100644 (file)
@@ -537,16 +537,17 @@ struct _CommonImplX86 : _CommonImplBuiltin
     _S_store_bool_array(const _BitMask<_Np, _Sanitized> __x, bool* __mem)
     {
       if constexpr (__have_avx512bw_vl) // don't care for BW w/o VL
-       _S_store<_Np>(1 & __vector_bitcast<_UChar, _Np>([=]() constexpr {
-                       if constexpr (_Np <= 16)
-                         return _mm_movm_epi8(__x._M_to_bits());
-                       else if constexpr (_Np <= 32)
-                         return _mm256_movm_epi8(__x._M_to_bits());
-                       else if constexpr (_Np <= 64)
-                         return _mm512_movm_epi8(__x._M_to_bits());
-                       else
-                         __assert_unreachable<_SizeConstant<_Np>>();
-                     }()),
+       _S_store<_Np>(1 & __vector_bitcast<_UChar, _Np>(
+                           [=]() constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                             if constexpr (_Np <= 16)
+                               return _mm_movm_epi8(__x._M_to_bits());
+                             else if constexpr (_Np <= 32)
+                               return _mm256_movm_epi8(__x._M_to_bits());
+                             else if constexpr (_Np <= 64)
+                               return _mm512_movm_epi8(__x._M_to_bits());
+                             else
+                               __assert_unreachable<_SizeConstant<_Np>>();
+                           }()),
                      __mem);
       else if constexpr (__have_bmi2)
        {
@@ -554,7 +555,7 @@ struct _CommonImplX86 : _CommonImplBuiltin
            _S_store<_Np>(_pdep_u32(__x._M_to_bits(), 0x01010101U), __mem);
          else
            __execute_n_times<__div_roundup(_Np, sizeof(size_t))>(
-             [&](auto __i) {
+             [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
                constexpr size_t __offset = __i * sizeof(size_t);
                constexpr int __todo = std::min(sizeof(size_t), _Np - __offset);
                if constexpr (__todo == 1)
@@ -575,7 +576,7 @@ struct _CommonImplX86 : _CommonImplBuiltin
              });
        }
       else if constexpr (__have_sse2 && _Np > 7)
-       __execute_n_times<__div_roundup(_Np, 16)>([&](auto __i) {
+       __execute_n_times<__div_roundup(_Np, 16)>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
          constexpr int __offset = __i * 16;
          constexpr int __todo = std::min(16, int(_Np) - __offset);
          const int __bits = __x.template _M_extract<__offset>()._M_to_bits();
@@ -765,9 +766,10 @@ struct _CommonImplX86 : _CommonImplBuiltin
       static_assert(is_same_v<_Tp, _Tp> && __have_avx512f);
       if (__k._M_is_constprop() && __at0._M_is_constprop()
          && __at1._M_is_constprop())
-       return __generate_from_n_evaluations<_Np,
-                                            __vector_type_t<_Tp, _Np>>([&](
-         auto __i) constexpr { return __k[__i] ? __at1[__i] : __at0[__i]; });
+       return __generate_from_n_evaluations<_Np, __vector_type_t<_Tp, _Np>>(
+                [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                  return __k[__i] ? __at1[__i] : __at0[__i];
+                });
       else if constexpr (sizeof(__at0) == 64
                         || (__have_avx512vl && sizeof(__at0) >= 16))
        return _S_blend_avx512(__k._M_data, __at0._M_data, __at1._M_data);
@@ -994,9 +996,8 @@ template <typename _Abi, typename>
              }
            else
              _BitOps::_S_bit_iteration(_MaskImpl::_S_to_bits(__k),
-                                       [&](auto __i) {
-                                         __merge._M_set(__i, static_cast<_Tp>(
-                                                               __mem[__i]));
+                                       [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                                         __merge._M_set(__i, static_cast<_Tp>(__mem[__i]));
                                        });
          }
        /* Very uncertain, that the following improves anything. Needs
@@ -1417,11 +1418,12 @@ template <typename _Abi, typename>
              const auto __yf = __convert_all<_FloatV, __n_floatv>(
                _Abi::__make_padding_nonzero(__as_vector(__y)));
              return __call_with_n_evaluations<__n_floatv>(
-               [](auto... __quotients) {
+               [](auto... __quotients) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
                  return __vector_convert<_R>(__quotients...);
                },
-               [&__xf,
-                &__yf](auto __i) -> _SimdWrapper<_Float, __n_intermediate> {
+               [&__xf, &__yf](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
+                 -> _SimdWrapper<_Float, __n_intermediate>
+               {
 #if !defined __clang__ && __GCC_IEC_559 == 0
                  // If -freciprocal-math is active, using the `/` operator is
                  // incorrect because it may be translated to an imprecise
@@ -1980,7 +1982,7 @@ template <typename _Abi, typename>
              {
                auto __mask = __vector_bitcast<_UChar>(
                  __vector_bitcast<_UShort>(__iy) << 5);
-               auto __maskl = [&]() {
+               auto __maskl = [&]() _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
                  return __to_intrin(__vector_bitcast<_UShort>(__mask) << 8);
                };
                auto __xh = __vector_bitcast<short>(__ix);
@@ -2067,19 +2069,20 @@ template <typename _Abi, typename>
          }                                                      //}}}
        else if constexpr (sizeof(_Up) == 2 && sizeof(__x) >= 4) //{{{
          {
-           [[maybe_unused]] auto __blend_0xaa = [](auto __a, auto __b) {
-             if constexpr (sizeof(__a) == 16)
-               return _mm_blend_epi16(__to_intrin(__a), __to_intrin(__b),
-                                      0xaa);
-             else if constexpr (sizeof(__a) == 32)
-               return _mm256_blend_epi16(__to_intrin(__a), __to_intrin(__b),
-                                         0xaa);
-             else if constexpr (sizeof(__a) == 64)
-               return _mm512_mask_blend_epi16(0xaaaa'aaaaU, __to_intrin(__a),
-                                              __to_intrin(__b));
-             else
-               __assert_unreachable<decltype(__a)>();
-           };
+           [[maybe_unused]] auto __blend_0xaa
+             = [](auto __a, auto __b) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+               if constexpr (sizeof(__a) == 16)
+                 return _mm_blend_epi16(__to_intrin(__a), __to_intrin(__b),
+                                        0xaa);
+               else if constexpr (sizeof(__a) == 32)
+                 return _mm256_blend_epi16(__to_intrin(__a), __to_intrin(__b),
+                                           0xaa);
+               else if constexpr (sizeof(__a) == 64)
+                 return _mm512_mask_blend_epi16(0xaaaa'aaaaU, __to_intrin(__a),
+                                                __to_intrin(__b));
+               else
+                 __assert_unreachable<decltype(__a)>();
+             };
            if constexpr (__have_avx512bw_vl && sizeof(_Tp) <= 16)
              return __intrin_bitcast<_V>(is_signed_v<_Up>
                                            ? _mm_srav_epi16(__ix, __iy)
@@ -2136,9 +2139,10 @@ template <typename _Abi, typename>
              {
                auto __k = __vector_bitcast<_UShort>(__iy) << 11;
                auto __x128 = __vector_bitcast<_Up>(__ix);
-               auto __mask = [](__vector_type16_t<_UShort> __kk) {
-                 return __vector_bitcast<short>(__kk) < 0;
-               };
+               auto __mask
+                 = [](__vector_type16_t<_UShort> __kk) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                   return __vector_bitcast<short>(__kk) < 0;
+                 };
                // do __x128 = 0 where __y[4] is set
                __x128 = __mask(__k) ? decltype(__x128)() : __x128;
                // do __x128 =>> 8 where __y[3] is set
@@ -2178,7 +2182,7 @@ template <typename _Abi, typename>
              }
            else
              {
-               auto __shift = [](auto __a, auto __b) {
+               auto __shift = [](auto __a, auto __b) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
                  if constexpr (is_signed_v<_Up>)
                    return _mm_sra_epi32(__a, __b);
                  else
@@ -3492,7 +3496,7 @@ struct _MaskImplX86Mixin
        return _S_to_maskvector<_Up, _ToN>(__k);
       else if (__x._M_is_constprop() || __builtin_is_constant_evaluated())
        return __generate_from_n_evaluations<std::min(_ToN, _Np), _UV>(
-         [&](auto __i) -> _Up { return -__x[__i.value]; });
+         [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA -> _Up { return -__x[__i.value]; });
       else if constexpr (sizeof(_Up) == 1)
        {
          if constexpr (sizeof(_UI) == 16)
@@ -3737,9 +3741,9 @@ struct _MaskImplX86Mixin
       else if constexpr (__bits_per_element >= _ToN)
        {
          constexpr auto __bitmask
-           = __generate_vector<_V>([](auto __i) constexpr->_UpUInt {
-               return __i < _ToN ? 1ull << __i : 0;
-             });
+           = __generate_vector<_V>([](auto __i)
+                                   constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA -> _UpUInt
+                                   { return __i < _ToN ? 1ull << __i : 0; });
          const auto __bits
            = __vector_broadcast<_ToN, _UpUInt>(__k) & __bitmask;
          if constexpr (__bits_per_element > _ToN)
@@ -3750,11 +3754,11 @@ struct _MaskImplX86Mixin
       else
        {
          const _V __tmp
-           = __generate_vector<_V>([&](auto __i) constexpr {
+           = __generate_vector<_V>([&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
                return static_cast<_UpUInt>(
                  __k >> (__bits_per_element * (__i / __bits_per_element)));
              })
-             & __generate_vector<_V>([](auto __i) constexpr {
+             & __generate_vector<_V>([](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
                  return static_cast<_UpUInt>(1ull
                                              << (__i % __bits_per_element));
                }); // mask bit index
@@ -3790,7 +3794,7 @@ struct _MaskImplX86Mixin
              const auto __y = __vector_bitcast<__int_for_sizeof_t<_Tp>>(__x);
              return __generate_from_n_evaluations<std::min(_ToN, _Np),
                                                   __vector_type_t<_Up, _ToN>>(
-               [&](auto __i) -> _Up { return __y[__i.value]; });
+               [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA -> _Up { return __y[__i.value]; });
            }
          using _To = __vector_type_t<_Up, _ToN>;
          [[maybe_unused]] constexpr size_t _FromN = _Np;
@@ -4125,8 +4129,11 @@ struct _MaskImplX86Mixin
            {
              const auto __bools = -__x._M_data;
              const _ULLong __k = __call_with_n_evaluations<_Np>(
-               [](auto... __bits) { return (__bits | ...); },
-               [&](auto __i) { return _ULLong(__bools[+__i]) << __i; });
+               [](auto... __bits) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                 return (__bits | ...);
+               }, [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+                 return _ULLong(__bools[+__i]) << __i;
+               });
              if (__builtin_is_constant_evaluated()
                  || __builtin_constant_p(__k))
                return __k;
@@ -4282,13 +4289,14 @@ template <typename _Abi, typename>
        static_assert(is_same_v<_Tp, __int_for_sizeof_t<_Tp>>);
        if constexpr (__have_avx512bw)
          {
-           const auto __to_vec_or_bits = [](auto __bits) -> decltype(auto) {
-             if constexpr (__is_avx512_abi<_Abi>())
-               return __bits;
-             else
-               return _S_to_maskvector<_Tp>(
-                 _BitMask<_S_size<_Tp>>(__bits)._M_sanitized());
-           };
+           const auto __to_vec_or_bits
+             = [](auto __bits) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA -> decltype(auto) {
+               if constexpr (__is_avx512_abi<_Abi>())
+                 return __bits;
+               else
+                 return _S_to_maskvector<_Tp>(
+                          _BitMask<_S_size<_Tp>>(__bits)._M_sanitized());
+             };
 
            if constexpr (_S_size<_Tp> <= 16 && __have_avx512vl)
              {
@@ -4475,7 +4483,7 @@ template <typename _Abi, typename>
              }
            else
              {
-               _BitOps::_S_bit_iteration(__mask, [&](auto __i) {
+               _BitOps::_S_bit_iteration(__mask, [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
                  __merge._M_set(__i, __mem[__i]);
                });
                return __merge;
@@ -4554,7 +4562,7 @@ template <typename _Abi, typename>
          {
            if constexpr (__have_avx512bw_vl)
              _CommonImplX86::_S_store<_Np>(
-               __vector_bitcast<char>([](auto __data) {
+               __vector_bitcast<char>([](auto __data) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
                  if constexpr (_Np <= 16)
                    return _mm_maskz_set1_epi8(__data, 1);
                  else if constexpr (_Np <= 32)