1, args[0]);
gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
break;
+ BUILTIN_VSDQ_I_DI (BINOP, ashl, 3, NONE)
+ if (TREE_CODE (args[1]) == INTEGER_CST
+ && wi::ltu_p (wi::to_wide (args[1]), element_precision (args[0])))
+ new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
+ LSHIFT_EXPR, args[0], args[1]);
+ break;
+ BUILTIN_VSDQ_I_DI (BINOP, sshl, 0, NONE)
+ BUILTIN_VSDQ_I_DI (BINOP_UUS, ushl, 0, NONE)
+ {
+ tree cst = args[1];
+ tree ctype = TREE_TYPE (cst);
+ /* Left shifts can be both scalar or vector, e.g. uint64x1_t is
+ treated as a scalar type not a vector one. */
+ if ((cst = uniform_integer_cst_p (cst)) != NULL_TREE)
+ {
+ wide_int wcst = wi::to_wide (cst);
+ tree unit_ty = TREE_TYPE (cst);
+
+ wide_int abs_cst = wi::abs (wcst);
+ if (wi::geu_p (abs_cst, element_precision (args[0])))
+ break;
+
+ if (wi::neg_p (wcst, TYPE_SIGN (ctype)))
+ {
+ tree final_cst;
+ final_cst = wide_int_to_tree (unit_ty, abs_cst);
+ if (TREE_CODE (cst) != INTEGER_CST)
+ final_cst = build_uniform_cst (ctype, final_cst);
+
+ new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
+ RSHIFT_EXPR, args[0],
+ final_cst);
+ }
+ else
+ new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
+ LSHIFT_EXPR, args[0], args[1]);
+ }
+ }
+ break;
+ BUILTIN_VDQ_I (SHIFTIMM, ashr, 3, NONE)
+ VAR1 (SHIFTIMM, ashr_simd, 0, NONE, di)
+ BUILTIN_VDQ_I (USHIFTIMM, lshr, 3, NONE)
+ VAR1 (USHIFTIMM, lshr_simd, 0, NONE, di)
+ if (TREE_CODE (args[1]) == INTEGER_CST
+ && wi::ltu_p (wi::to_wide (args[1]), element_precision (args[0])))
+ new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
+ RSHIFT_EXPR, args[0], args[1]);
+ break;
BUILTIN_GPF (BINOP, fmulx, 0, ALL)
{
gcc_assert (nargs == 2);
BUILTIN_VDQ_I (SHIFTIMM, ashr, 3, NONE)
VAR1 (SHIFTIMM, ashr_simd, 0, NONE, di)
- BUILTIN_VDQ_I (SHIFTIMM, lshr, 3, NONE)
+ BUILTIN_VDQ_I (USHIFTIMM, lshr, 3, NONE)
VAR1 (USHIFTIMM, lshr_simd, 0, NONE, di)
/* Implemented by aarch64_<sur>shr_n<mode>. */
BUILTIN_VSDQ_I_DI (SHIFTIMM, srshr_n, 0, NONE)
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vshr_n_u8 (uint8x8_t __a, const int __b)
{
- return (uint8x8_t) __builtin_aarch64_lshrv8qi ((int8x8_t) __a, __b);
+ return __builtin_aarch64_lshrv8qi_uus (__a, __b);
}
__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vshr_n_u16 (uint16x4_t __a, const int __b)
{
- return (uint16x4_t) __builtin_aarch64_lshrv4hi ((int16x4_t) __a, __b);
+ return __builtin_aarch64_lshrv4hi_uus (__a, __b);
}
__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vshr_n_u32 (uint32x2_t __a, const int __b)
{
- return (uint32x2_t) __builtin_aarch64_lshrv2si ((int32x2_t) __a, __b);
+ return __builtin_aarch64_lshrv2si_uus (__a, __b);
}
__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vshrq_n_u8 (uint8x16_t __a, const int __b)
{
- return (uint8x16_t) __builtin_aarch64_lshrv16qi ((int8x16_t) __a, __b);
+ return __builtin_aarch64_lshrv16qi_uus (__a, __b);
}
__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vshrq_n_u16 (uint16x8_t __a, const int __b)
{
- return (uint16x8_t) __builtin_aarch64_lshrv8hi ((int16x8_t) __a, __b);
+ return __builtin_aarch64_lshrv8hi_uus (__a, __b);
}
__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vshrq_n_u32 (uint32x4_t __a, const int __b)
{
- return (uint32x4_t) __builtin_aarch64_lshrv4si ((int32x4_t) __a, __b);
+ return __builtin_aarch64_lshrv4si_uus (__a, __b);
}
__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vshrq_n_u64 (uint64x2_t __a, const int __b)
{
- return (uint64x2_t) __builtin_aarch64_lshrv2di ((int64x2_t) __a, __b);
+ return __builtin_aarch64_lshrv2di_uus (__a, __b);
}
__extension__ extern __inline int64_t
--- /dev/null
+/* { dg-do assemble { target aarch64*-*-* } } */
+/* { dg-additional-options "--save-temps" } */
+
+#include <arm_neon.h>
+
+uint8x8_t foo (uint8x8_t a)
+{
+ return vshr_n_u8 (a, 2);
+}
+
+/* { dg-final { scan-assembler-times {\tushr\t.+, 2} 1 } } */
--- /dev/null
+/* { dg-do assemble { target aarch64*-*-* } } */
+/* { dg-additional-options "--save-temps" } */
+
+#include <arm_neon.h>
+
+int16x8_t foo (int16x8_t a)
+{
+ return vshrq_n_s16 (a, 8);
+}
+
+/* { dg-final { scan-assembler-times {\tsshr\t.+, 8} 1 } } */
--- /dev/null
+/* { dg-do assemble { target aarch64*-*-* } } */
+/* { dg-additional-options "--save-temps" } */
+
+#include <arm_neon.h>
+
+int16x8_t foo (int16x8_t a)
+{
+ return vshrq_n_s16 (a, 16);
+}
+
+/* { dg-final { scan-assembler-times {\tsshr\t.+, 16} 1 } } */
--- /dev/null
+/* { dg-do assemble { target aarch64*-*-* } } */
+/* { dg-additional-options "--save-temps" } */
+
+#include <arm_neon.h>
+
+int64x1_t foo (int64x1_t a)
+{
+ return vshl_s64 (a, vdup_n_s64(80));
+}
+
+/* { dg-final { scan-assembler-times {\tsshl\t.+, d[0-9]+} 1 } } */
--- /dev/null
+/* { dg-do assemble { target aarch64*-*-* } } */
+/* { dg-additional-options "--save-temps" } */
+/* { dg-skip-if "no optimizations" { *-*-* } { "-O0" } { "" } } */
+
+#include <arm_neon.h>
+
+int64x1_t foo (int64x1_t a)
+{
+ return vshl_s64 (a, vdup_n_s64(-6));
+}
+
+/* { dg-final { scan-assembler-times {\tsshr\t.+, 6} 1 } } */
--- /dev/null
+/* { dg-do assemble { target aarch64*-*-* } } */
+/* { dg-additional-options "--save-temps" } */
+
+#include <arm_neon.h>
+
+int32x4_t foo (int32x4_t x) {
+ return vshlq_s32(x, vdupq_n_s32(256));
+}
+
+/* { dg-final { scan-assembler-times {\tsshl\t.+, v[0-9].4s} 1 } } */
--- /dev/null
+/* { dg-do assemble { target aarch64*-*-* } } */
+/* { dg-additional-options "--save-temps" } */
+/* { dg-skip-if "no optimizations" { *-*-* } { "-O0" } { "" } } */
+
+#include <arm_neon.h>
+
+int32x4_t foo (int32x4_t x) {
+ return vshlq_s32(vdupq_n_s32(1), vdupq_n_s32(10));
+}
+
+/* { dg-final { scan-assembler-not {\tsshl\t} } } */
+/* { dg-final { scan-assembler-times {\tmovi\t} 1 } } */
--- /dev/null
+/* { dg-do assemble { target aarch64*-*-* } } */
+/* { dg-additional-options "--save-temps" } */
+
+#include <arm_neon.h>
+
+int32x4_t foo (int32x4_t x) {
+ return vshlq_s32(x, vdupq_n_s32(-64));
+}
+
+/* { dg-final { scan-assembler-times {\tsshl\t.+, v[0-9]+.4s} 1 } } */
--- /dev/null
+/* { dg-do assemble } */
+/* { dg-options "-O1 --save-temps" } */
+
+#include <arm_neon.h>
+
+int32x2_t foo1 (int32x2_t a)
+{
+ return vshr_n_s32 (vneg_s32 (a), 31);
+}
+
+int32x4_t foo2 (int32x4_t a)
+{
+ return vshrq_n_s32 (vnegq_s32 (a), 31);
+}
+
+int16x8_t foo3 (int16x8_t a)
+{
+ return vshrq_n_s16 (vnegq_s16 (a), 15);
+}
+
+int16x4_t foo4 (int16x4_t a)
+{
+ return vshr_n_s16 (vneg_s16 (a), 15);
+}
+
+int8x16_t foo5 (int8x16_t a)
+{
+ return vshrq_n_s8 (vnegq_s8 (a), 7);
+}
+
+int8x8_t foo6 (int8x8_t a)
+{
+ return vshr_n_s8 (vneg_s8 (a), 7);
+}
+
+/* { dg-final { scan-assembler-times {\tcmgt\t} 6 } } */