With -fpu=neon DI mode shifts are expanded after reload.
authorWilco Dijkstra <wdijkstr@arm.com>
Tue, 25 Oct 2016 10:25:28 +0000 (10:25 +0000)
committerWilco Dijkstra <wilco@gcc.gnu.org>
Tue, 25 Oct 2016 10:25:28 +0000 (10:25 +0000)
With -fpu=neon DI mode shifts are expanded after reload.  DI mode registers can
either fully or partially overlap on both ARM and Thumb-2.  However the shift
expansion code can only deal with the full overlap case, and generates incorrect
code for partial overlaps.  The fix is to add new variants that support either
full overlap or no overlap.

    gcc/
PR target/78041
* config/arm/neon.md (ashldi3_neon): Add "r 0 i" and "&r r i" variants.
Remove partial overlap check for shift by 1.
(ashldi3_neon): Likewise.
    testsuite/
* gcc.target/arm/pr78041.c: New test.

From-SVN: r241508

gcc/ChangeLog
gcc/config/arm/neon.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/arm/pr78041.c [new file with mode: 0644]

index c1d8f94..aaf07f4 100644 (file)
@@ -1,3 +1,10 @@
+2016-10-25  Wilco Dijkstra  <wdijkstr@arm.com>
+
+       PR target/78041
+       * config/arm/neon.md (ashldi3_neon): Add "r 0 i" and "&r r i" variants.
+       Remove partial overlap check for shift by 1.
+       (ashldi3_neon): Likewise.
+
 2016-10-25  Thomas Preud'homme  <thomas.preudhomme@arm.com>
 
        * config/arm/constraints.md (Q constraint): Document its use for
index 0532333..59316de 100644 (file)
 )
 
 (define_insn_and_split "ashldi3_neon"
-  [(set (match_operand:DI 0 "s_register_operand"           "= w, w,?&r,?r, ?w,w")
-       (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r, 0w,w")
-                  (match_operand:SI 2 "general_operand"    "rUm, i,  r, i,rUm,i")))
-   (clobber (match_scratch:SI 3                                    "= X, X,?&r, X,  X,X"))
-   (clobber (match_scratch:SI 4                                    "= X, X,?&r, X,  X,X"))
-   (clobber (match_scratch:DI 5                                    "=&w, X,  X, X, &w,X"))
+  [(set (match_operand:DI 0 "s_register_operand"           "= w, w,?&r,?r,?&r, ?w,w")
+       (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0,  r, 0w,w")
+                  (match_operand:SI 2 "general_operand"    "rUm, i,  r, i,  i,rUm,i")))
+   (clobber (match_scratch:SI 3                                    "= X, X,?&r, X,  X,  X,X"))
+   (clobber (match_scratch:SI 4                                    "= X, X,?&r, X,  X,  X,X"))
+   (clobber (match_scratch:DI 5                                    "=&w, X,  X, X,  X, &w,X"))
    (clobber (reg:CC_C CC_REGNUM))]
   "TARGET_NEON"
   "#"
       }
     else
       {
-       if (operands[2] == CONST1_RTX (SImode)
-           && (!reg_overlap_mentioned_p (operands[0], operands[1])
-               || REGNO (operands[0]) == REGNO (operands[1])))
+       /* The shift expanders support either full overlap or no overlap.  */
+       gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
+                   || REGNO (operands[0]) == REGNO (operands[1]));
+
+       if (operands[2] == CONST1_RTX (SImode))
          /* This clobbers CC.  */
          emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
        else
       }
     DONE;
   }"
-  [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
-   (set_attr "opt" "*,*,speed,speed,*,*")
+  [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
+   (set_attr "opt" "*,*,speed,speed,speed,*,*")
    (set_attr "type" "multiple")]
 )
 
 ;; ashrdi3_neon
 ;; lshrdi3_neon
 (define_insn_and_split "<shift>di3_neon"
-  [(set (match_operand:DI 0 "s_register_operand"            "= w, w,?&r,?r,?w,?w")
-       (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w")
-                   (match_operand:SI 2 "reg_or_int_operand" "  r, i,  r, i, r, i")))
-   (clobber (match_scratch:SI 3                                     "=2r, X, &r, X,2r, X"))
-   (clobber (match_scratch:SI 4                                     "= X, X, &r, X, X, X"))
-   (clobber (match_scratch:DI 5                                     "=&w, X,  X, X,&w, X"))
+  [(set (match_operand:DI 0 "s_register_operand"            "= w, w,?&r,?r,?&r,?w,?w")
+       (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0,  r,0w, w")
+                   (match_operand:SI 2 "reg_or_int_operand" "  r, i,  r, i,  i, r, i")))
+   (clobber (match_scratch:SI 3                                     "=2r, X, &r, X,  X,2r, X"))
+   (clobber (match_scratch:SI 4                                     "= X, X, &r, X,  X, X, X"))
+   (clobber (match_scratch:DI 5                                     "=&w, X,  X, X, X,&w, X"))
    (clobber (reg:CC CC_REGNUM))]
   "TARGET_NEON"
   "#"
       }
     else
       {
-       if (operands[2] == CONST1_RTX (SImode)
-           && (!reg_overlap_mentioned_p (operands[0], operands[1])
-               || REGNO (operands[0]) == REGNO (operands[1])))
+       /* The shift expanders support either full overlap or no overlap.  */
+       gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
+                   || REGNO (operands[0]) == REGNO (operands[1]));
+
+       if (operands[2] == CONST1_RTX (SImode))
          /* This clobbers CC.  */
          emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1]));
        else
 
     DONE;
   }"
-  [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
-   (set_attr "opt" "*,*,speed,speed,*,*")
+  [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
+   (set_attr "opt" "*,*,speed,speed,speed,*,*")
    (set_attr "type" "multiple")]
 )
 
index 1dc0732..02d8ac6 100644 (file)
@@ -1,3 +1,8 @@
+2016-10-25  Wilco Dijkstra  <wdijkstr@arm.com>
+
+       PR target/78041
+       * gcc.target/arm/pr78041.c: New test.
+
 2016-10-25  Jakub Jelinek  <jakub@redhat.com>
 
        * g++.dg/cpp1z/launder1.C: New test.
diff --git a/gcc/testsuite/gcc.target/arm/pr78041.c b/gcc/testsuite/gcc.target/arm/pr78041.c
new file mode 100644 (file)
index 0000000..340ab5c
--- /dev/null
@@ -0,0 +1,20 @@
+/* { dg-require-effective-target arm_thumb2_ok } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-fno-inline -mthumb -O1 -mfpu=neon -w" } */
+
+extern void abort (void);
+
+register long long x asm ("r1");
+
+long long f (void)
+{
+  return x << 5;
+}
+
+int main ()
+{
+  x = 0x0100000001;
+  if (f () != 0x2000000020)
+    abort ();
+  return 0;
+}