[AArch64] Optimise aarch64_add_offset for SVE VL constants

author Richard Sandiford <richard.sandiford@arm.com>

Thu, 15 Aug 2019 08:50:00 +0000 (08:50 +0000)

committer Richard Sandiford <rsandifo@gcc.gnu.org>

Thu, 15 Aug 2019 08:50:00 +0000 (08:50 +0000)
author Richard Sandiford <richard.sandiford@arm.com>
Thu, 15 Aug 2019 08:50:00 +0000 (08:50 +0000)
committer Richard Sandiford <rsandifo@gcc.gnu.org>
Thu, 15 Aug 2019 08:50:00 +0000 (08:50 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index c3aa113..5fbf319 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,12 @@
  2019-08-15  Richard Sandiford  <richard.sandiford@arm.com>
  
+       * config/aarch64/aarch64.c (aarch64_add_offset): In the fallback
+       multiplication case, try to compute VG * (lowest set bit) directly
+       rather than always basing the multiplication on VG.  Use
+       expand_mult for the multiplication if we can.
+
+2019-08-15  Richard Sandiford  <richard.sandiford@arm.com>
+
         * config/aarch64/aarch64-protos.h
         (aarch64_sve_scalar_inc_dec_immediate_p): Declare.
         (aarch64_sve_inc_dec_immediate_p): Rename to...
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c

index b8c947e..3b0dcea 100644 (file)
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -73,6 +73,7 @@
  #include "selftest-rtl.h"
  #include "rtx-vector-builder.h"
  #include "intl.h"
+#include "expmed.h"
  
  /* This file should be included last.  */
  #include "target-def.h"
@@ -3465,20 +3466,36 @@ aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx src,
         }
        else
         {
-         /* Use CNTD, then multiply it by FACTOR.  */
-         val = gen_int_mode (poly_int64 (2, 2), mode);
+         /* Base the factor on LOW_BIT if we can calculate LOW_BIT
+            directly, since that should increase the chances of being
+            able to use a shift and add sequence.  If LOW_BIT itself
+            is out of range, just use CNTD.  */
+         if (low_bit <= 16 * 8)
+           factor /= low_bit;
+         else
+           low_bit = 1;
+
+         val = gen_int_mode (poly_int64 (low_bit * 2, low_bit * 2), mode);
           val = aarch64_force_temporary (mode, temp1, val);
  
-         /* Go back to using a negative multiplication factor if we have
-            no register from which to subtract.  */
-         if (code == MINUS && src == const0_rtx)
+         if (can_create_pseudo_p ())
             {
-             factor = -factor;
-             code = PLUS;
+             rtx coeff1 = gen_int_mode (factor, mode);
+             val = expand_mult (mode, val, coeff1, NULL_RTX, false, true);
+           }
+         else
+           {
+             /* Go back to using a negative multiplication factor if we have
+                no register from which to subtract.  */
+             if (code == MINUS && src == const0_rtx)
+               {
+                 factor = -factor;
+                 code = PLUS;
+               }
+             rtx coeff1 = gen_int_mode (factor, mode);
+             coeff1 = aarch64_force_temporary (mode, temp2, coeff1);
+             val = gen_rtx_MULT (mode, val, coeff1);
             }
-         rtx coeff1 = gen_int_mode (factor, mode);
-         coeff1 = aarch64_force_temporary (mode, temp2, coeff1);
-         val = gen_rtx_MULT (mode, val, coeff1);
         }
  
        if (shift > 0)
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index 9f667f2..0008ff9 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,10 @@
  2019-08-15  Richard Sandiford  <richard.sandiford@arm.com>
  
+       * gcc.target/aarch64/sve/loop_add_4.c: Expect 10 INCWs and
+       INCDs rather than 8.
+
+2019-08-15  Richard Sandiford  <richard.sandiford@arm.com>
+
         * gcc.target/aarch64/sve/revb_1.c: Restrict to little-endian targets.
         Avoid including stdint.h.
         * gcc.target/aarch64/sve/revh_1.c: Likewise.
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/loop_add_4.c b/gcc/testsuite/gcc.target/aarch64/sve/loop_add_4.c

index 7f02497..9ead9c2 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sve/loop_add_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/loop_add_4.c
@@ -68,7 +68,8 @@ TEST_ALL (LOOP)
  /* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.s, w[0-9]+, w[0-9]+\n} 3 } } */
  /* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]+/z, \[x[0-9]+, x[0-9]+, lsl 2\]} 8 } } */
  /* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7]+, \[x[0-9]+, x[0-9]+, lsl 2\]} 8 } } */
-/* { dg-final { scan-assembler-times {\tincw\tx[0-9]+\n} 8 } } */
+/* 2 for the calculations of -17 and 17.  */
+/* { dg-final { scan-assembler-times {\tincw\tx[0-9]+\n} 10 } } */
  
  /* { dg-final { scan-assembler-times {\tdecw\tz[0-9]+\.s, all, mul #16\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tdecw\tz[0-9]+\.s, all, mul #15\n} 1 } } */
@@ -85,7 +86,8 @@ TEST_ALL (LOOP)
  /* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.d, x[0-9]+, x[0-9]+\n} 3 } } */
  /* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]+/z, \[x[0-9]+, x[0-9]+, lsl 3\]} 8 } } */
  /* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7]+, \[x[0-9]+, x[0-9]+, lsl 3\]} 8 } } */
-/* { dg-final { scan-assembler-times {\tincd\tx[0-9]+\n} 8 } } */
+/* 2 for the calculations of -17 and 17.  */
+/* { dg-final { scan-assembler-times {\tincd\tx[0-9]+\n} 10 } } */
  
  /* { dg-final { scan-assembler-times {\tdecd\tz[0-9]+\.d, all, mul #16\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tdecd\tz[0-9]+\.d, all, mul #15\n} 1 } } */
author	Richard Sandiford <richard.sandiford@arm.com>
	Thu, 15 Aug 2019 08:50:00 +0000 (08:50 +0000)
committer	Richard Sandiford <rsandifo@gcc.gnu.org>
	Thu, 15 Aug 2019 08:50:00 +0000 (08:50 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/config/aarch64/aarch64.c		patch \| blob \| history
gcc/testsuite/ChangeLog		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/sve/loop_add_4.c		patch \| blob \| history