DONE;
})
-;; Using mode VDQ_BHSI as there is no V2DImode neg!
-;; Negating individual lanes most certainly offsets the
-;; gain from vectorization.
(define_expand "vashr<mode>3"
- [(match_operand:VDQ_BHSI 0 "register_operand")
- (match_operand:VDQ_BHSI 1 "register_operand")
- (match_operand:VDQ_BHSI 2 "register_operand")]
+ [(match_operand:VDQ_I 0 "register_operand")
+ (match_operand:VDQ_I 1 "register_operand")
+ (match_operand:VDQ_I 2 "register_operand")]
"TARGET_SIMD"
{
rtx neg = gen_reg_rtx (<MODE>mode);
)
(define_expand "vlshr<mode>3"
- [(match_operand:VDQ_BHSI 0 "register_operand")
- (match_operand:VDQ_BHSI 1 "register_operand")
- (match_operand:VDQ_BHSI 2 "register_operand")]
+ [(match_operand:VDQ_I 0 "register_operand")
+ (match_operand:VDQ_I 1 "register_operand")
+ (match_operand:VDQ_I 2 "register_operand")]
"TARGET_SIMD"
{
rtx neg = gen_reg_rtx (<MODE>mode);
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -march=armv8.2-a" } */
+
+#include "vect-shr-reg.c"
+
+int
+main(void)
+{
+ int64_t a[16];
+ int64_t b[16];
+ int64_t c[17];
+
+ uint64_t ua[16];
+ uint64_t ub[16];
+ uint64_t uc[17];
+
+ int64_t res_a[16];
+ uint64_t res_ua[16];
+
+ int i;
+
+ /* Set up inputs. */
+ for (i = 0; i < 16; i++)
+ {
+ b[i] = -2;
+ c[i] = 34;
+ ub[i] = 0xffffffffffffffff;
+ uc[i] = 52;
+ }
+
+ /* Set up reference values. */
+ for (i = 0; i < 16; i++)
+ {
+ res_a[i] = -1LL;
+ res_ua[i] = 0x0fffLL;
+ }
+
+ /* Do the shifts. */
+ f (ua, ub, uc);
+ g (a, b, c);
+
+ /* Compare outputs against reference values. */
+ for (i = 0; i < 16; i++)
+ {
+ if (a[i] != res_a[i])
+ __builtin_abort ();
+
+ if (ua[i] != res_ua[i])
+ __builtin_abort ();
+ }
+
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8.2-a" } */
+
+#include <stdint.h>
+#include <stdio.h>
+
+#pragma GCC target "+nosve"
+
+int __attribute__((noinline))
+f(uint64_t *__restrict a, uint64_t *__restrict b, uint64_t *__restrict c)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ a[i] = b[i] >> c[i];
+}
+
+
+int __attribute__((noinline))
+g(int64_t *__restrict a, int64_t *__restrict b, int64_t *__restrict c)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ a[i] = b[i] >> c[i];
+}
+
+/* { dg-final { scan-assembler "neg\\tv" } } */
+/* { dg-final { scan-assembler "ushl\\tv" } } */
+/* { dg-final { scan-assembler "sshl\\tv" } } */