[AArch64][2/5] Implement adrp+add fusion 25/45825/1
authorktkachov <ktkachov@138bc75d-0d04-0410-961f-82ee72b054a4>
Mon, 24 Nov 2014 10:50:45 +0000 (10:50 +0000)
committerNikolai Bozhenov <n.bozhenov@samsung.com>
Tue, 11 Aug 2015 14:12:39 +0000 (17:12 +0300)
* config/aarch64/aarch64.c: Include tm-constrs.h
(AARCH64_FUSE_ADRP_ADD): Define.
(cortexa57_tunings): Add AARCH64_FUSE_ADRP_ADD to fuseable_ops.
(cortexa53_tunings): Likewise.
(aarch_macro_fusion_pair_p): Handle AARCH64_FUSE_ADRP_ADD.

Change-Id: I7eb8804fa52bdfbb4c4fdc3ad72371efecdcd13c
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@218010 138bc75d-0d04-0410-961f-82ee72b054a4

gcc/ChangeLog
gcc/config/aarch64/aarch64.c

index aa43a9e..c96484c 100644 (file)
@@ -1,5 +1,13 @@
 2014-11-24  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
 
+       * config/aarch64/aarch64.c: Include tm-constrs.h
+       (AARCH64_FUSE_ADRP_ADD): Define.
+       (cortexa57_tunings): Add AARCH64_FUSE_ADRP_ADD to fuseable_ops.
+       (cortexa53_tunings): Likewise.
+       (aarch_macro_fusion_pair_p): Handle AARCH64_FUSE_ADRP_ADD.
+
+2014-11-24  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
        * config/aarch64/aarch64-protos.h (struct tune_params): Add
        fuseable_ops field.
        * config/aarch64/aarch64.c (generic_tunings): Specify fuseable_ops.
index 2448aa8..3e18c0a 100644 (file)
@@ -64,6 +64,7 @@
 #include "tree-vectorizer.h"
 #include "config/arm/aarch-cost-tables.h"
 #include "dumpfile.h"
+#include "tm-constrs.h"
 
 /* Defined for convenience.  */
 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
@@ -266,6 +267,7 @@ static const struct cpu_vector_cost cortexa57_vector_cost =
 
 #define AARCH64_FUSE_NOTHING   (0)
 #define AARCH64_FUSE_MOV_MOVK  (1 << 0)
+#define AARCH64_FUSE_ADRP_ADD  (1 << 1)
 
 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 __extension__
@@ -289,7 +291,7 @@ static const struct tune_params cortexa53_tunings =
   &generic_vector_cost,
   NAMED_PARAM (memmov_cost, 4),
   NAMED_PARAM (issue_rate, 2),
-  NAMED_PARAM (fuseable_ops, AARCH64_FUSE_MOV_MOVK)
+  NAMED_PARAM (fuseable_ops, (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD))
 };
 
 static const struct tune_params cortexa57_tunings =
@@ -300,7 +302,7 @@ static const struct tune_params cortexa57_tunings =
   &cortexa57_vector_cost,
   NAMED_PARAM (memmov_cost, 4),
   NAMED_PARAM (issue_rate, 3),
-  NAMED_PARAM (fuseable_ops, AARCH64_FUSE_MOV_MOVK)
+  NAMED_PARAM (fuseable_ops, (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD))
 };
 
 /* A processor implementing AArch64.  */
@@ -9391,6 +9393,32 @@ aarch_macro_fusion_pair_p (rtx prev, rtx curr)
         }
     }
 
+  if (simple_sets_p
+      && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_ADRP_ADD))
+    {
+
+      /*  We're trying to match:
+          prev (adrp) == (set (reg r1)
+                              (high (symbol_ref ("SYM"))))
+          curr (add) == (set (reg r0)
+                             (lo_sum (reg r1)
+                                     (symbol_ref ("SYM"))))
+          Note that r0 need not necessarily be the same as r1, especially
+          during pre-regalloc scheduling.  */
+
+      if (satisfies_constraint_Ush (SET_SRC (prev_set))
+          && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
+        {
+          if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
+              && REG_P (XEXP (SET_SRC (curr_set), 0))
+              && REGNO (XEXP (SET_SRC (curr_set), 0))
+                 == REGNO (SET_DEST (prev_set))
+              && rtx_equal_p (XEXP (SET_SRC (prev_set), 0),
+                              XEXP (SET_SRC (curr_set), 1)))
+            return true;
+        }
+    }
+
   return false;
 }