libstdc++: Improve simd fixed_size codegen
authorMatthias Kretz <m.kretz@gsi.de>
Thu, 24 Jun 2021 13:20:14 +0000 (14:20 +0100)
committerJonathan Wakely <jwakely@redhat.com>
Thu, 24 Jun 2021 13:20:14 +0000 (14:20 +0100)
Sometimes fixed_size objects will get unnecessarily copied on the stack.
The simd implementation should never pass _SimdTuple by value to avoid
requiring the optimizer to see through these copies.

Signed-off-by: Matthias Kretz <m.kretz@gsi.de>
libstdc++-v3/ChangeLog:

* include/experimental/bits/simd_converter.h
(_SimdConverter::operator()): Pass _SimdTuple by const-ref.
* include/experimental/bits/simd_fixed_size.h
(_GLIBCXX_SIMD_FIXED_OP): Pass binary operator _SimdTuple
arguments by const-ref.
(_S_masked_unary): Pass _SimdTuple by const-ref.

libstdc++-v3/include/experimental/bits/simd_converter.h
libstdc++-v3/include/experimental/bits/simd_fixed_size.h

index 9c8bf38..11999df 100644 (file)
@@ -316,7 +316,7 @@ template <typename _From, int _Np, typename _To, typename _Ap>
 
     _GLIBCXX_SIMD_INTRINSIC constexpr
       typename _SimdTraits<_To, _Ap>::_SimdMember
-      operator()(_Arg __x) const noexcept
+      operator()(const _Arg& __x) const noexcept
     {
       if constexpr (_Arg::_S_tuple_size == 1)
        return __vector_convert<__vector_type_t<_To, _Np>>(__x.first);
index b6fb47c..dc2fb90 100644 (file)
@@ -1480,7 +1480,7 @@ template <int _Np>
 #define _GLIBCXX_SIMD_FIXED_OP(name_, op_)                                     \
     template <typename _Tp, typename... _As>                                   \
       static inline constexpr _SimdTuple<_Tp, _As...> name_(                   \
-       const _SimdTuple<_Tp, _As...> __x, const _SimdTuple<_Tp, _As...> __y)  \
+       const _SimdTuple<_Tp, _As...>& __x, const _SimdTuple<_Tp, _As...>& __y)\
       {                                                                        \
        return __x._M_apply_per_chunk(                                         \
          [](auto __impl, auto __xx, auto __yy) constexpr {                    \
@@ -1780,8 +1780,7 @@ template <int _Np>
     // _S_masked_unary {{{2
     template <template <typename> class _Op, typename _Tp, typename... _As>
       static inline _SimdTuple<_Tp, _As...>
-      _S_masked_unary(const _MaskMember __bits,
-                     const _SimdTuple<_Tp, _As...> __v) // TODO: const-ref __v?
+      _S_masked_unary(const _MaskMember __bits, const _SimdTuple<_Tp, _As...>& __v)
       {
        return __v._M_apply_wrapped([&__bits](auto __meta,
                                              auto __native) constexpr {