2013-11-14 Julian Brown <julian@codesourcery.com>

author jye2 <jye2@138bc75d-0d04-0410-961f-82ee72b054a4>

Thu, 14 Nov 2013 08:38:54 +0000 (08:38 +0000)

committer jye2 <jye2@138bc75d-0d04-0410-961f-82ee72b054a4>

Thu, 14 Nov 2013 08:38:54 +0000 (08:38 +0000)
author jye2 <jye2@138bc75d-0d04-0410-961f-82ee72b054a4>
Thu, 14 Nov 2013 08:38:54 +0000 (08:38 +0000)
committer jye2 <jye2@138bc75d-0d04-0410-961f-82ee72b054a4>
Thu, 14 Nov 2013 08:38:54 +0000 (08:38 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index 1a591cb..636cdba 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,16 @@
+2013-11-14  Julian Brown  <julian@codesourcery.com>
+       Joey Ye  <joey.ye@arm.com>
+
+       * config/arm/arm.c (arm_cortex_m_branch_cost): New.
+       (arm_v7m_tune): New.
+       (arm_slowmul_tune, arm_fastmul_tune,
+       arm_strongarm_tune, arm_9e_tune, arm_v6t2_tune,
+       arm_cortex_tune, arm_cortex_a15_tune,
+       arm_cortex_a5_tune, arm_v6m_tune): Add comments
+       for Sched adj cost.
+       * config/arm/arm-cores.def (cortex-m4, cortex-m3):
+       Use arm_v7m_tune.
+
  2013-11-14  Kirill Yukhin  <kirill.yukhin@intel.com>
  
         PR target/57491
diff --git a/gcc/config/arm/arm-cores.def b/gcc/config/arm/arm-cores.def

index 79e2e87..d664e73 100644 (file)
--- a/gcc/config/arm/arm-cores.def
+++ b/gcc/config/arm/arm-cores.def
@@ -134,8 +134,8 @@ ARM_CORE("cortex-r4",         cortexr4,     7R,                              FL_LDSCHED, cortex)
  ARM_CORE("cortex-r4f",   cortexr4f,    7R,                              FL_LDSCHED, cortex)
  ARM_CORE("cortex-r5",    cortexr5,     7R,                              FL_LDSCHED | FL_ARM_DIV, cortex)
  ARM_CORE("cortex-r7",    cortexr7,     7R,                              FL_LDSCHED | FL_ARM_DIV, cortex)
-ARM_CORE("cortex-m4",    cortexm4,     7EM,                             FL_LDSCHED, cortex)
-ARM_CORE("cortex-m3",    cortexm3,     7M,                              FL_LDSCHED, cortex)
+ARM_CORE("cortex-m4",    cortexm4,     7EM,                             FL_LDSCHED, v7m)
+ARM_CORE("cortex-m3",    cortexm3,     7M,                              FL_LDSCHED, v7m)
  ARM_CORE("cortex-m1",    cortexm1,     6M,                              FL_LDSCHED, v6m)
  ARM_CORE("cortex-m0",    cortexm0,     6M,                              FL_LDSCHED, v6m)
  ARM_CORE("cortex-m0plus", cortexm0plus,        6M,                              FL_LDSCHED, v6m)
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c

index d3734c3..64c6b49 100644 (file)
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -266,6 +266,7 @@ static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
  static unsigned int arm_autovectorize_vector_sizes (void);
  static int arm_default_branch_cost (bool, bool);
  static int arm_cortex_a5_branch_cost (bool, bool);
+static int arm_cortex_m_branch_cost (bool, bool);
  
  static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
                                              const unsigned char *sel);
@@ -1260,7 +1261,7 @@ const struct tune_params arm_slowmul_tune =
  {
    arm_slowmul_rtx_costs,
    NULL,
-  NULL,
+  NULL,                                                /* Sched adj cost.  */
    3,                                           /* Constant limit.  */
    5,                                           /* Max cond insns.  */
    ARM_PREFETCH_NOT_BENEFICIAL,
@@ -1276,7 +1277,7 @@ const struct tune_params arm_fastmul_tune =
  {
    arm_fastmul_rtx_costs,
    NULL,
-  NULL,
+  NULL,                                                /* Sched adj cost.  */
    1,                                           /* Constant limit.  */
    5,                                           /* Max cond insns.  */
    ARM_PREFETCH_NOT_BENEFICIAL,
@@ -1295,7 +1296,7 @@ const struct tune_params arm_strongarm_tune =
  {
    arm_fastmul_rtx_costs,
    NULL,
-  NULL,
+  NULL,                                                /* Sched adj cost.  */
    1,                                           /* Constant limit.  */
    3,                                           /* Max cond insns.  */
    ARM_PREFETCH_NOT_BENEFICIAL,
@@ -1327,7 +1328,7 @@ const struct tune_params arm_9e_tune =
  {
    arm_9e_rtx_costs,
    NULL,
-  NULL,
+  NULL,                                                /* Sched adj cost.  */
    1,                                           /* Constant limit.  */
    5,                                           /* Max cond insns.  */
    ARM_PREFETCH_NOT_BENEFICIAL,
@@ -1343,7 +1344,7 @@ const struct tune_params arm_v6t2_tune =
  {
    arm_9e_rtx_costs,
    NULL,
-  NULL,
+  NULL,                                                /* Sched adj cost.  */
    1,                                           /* Constant limit.  */
    5,                                           /* Max cond insns.  */
    ARM_PREFETCH_NOT_BENEFICIAL,
@@ -1360,7 +1361,7 @@ const struct tune_params arm_cortex_tune =
  {
    arm_9e_rtx_costs,
    &generic_extra_costs,
-  NULL,
+  NULL,                                                /* Sched adj cost.  */
    1,                                           /* Constant limit.  */
    5,                                           /* Max cond insns.  */
    ARM_PREFETCH_NOT_BENEFICIAL,
@@ -1392,7 +1393,7 @@ const struct tune_params arm_cortex_a15_tune =
  {
    arm_9e_rtx_costs,
    &cortexa15_extra_costs,
-  NULL,
+  NULL,                                                /* Sched adj cost.  */
    1,                                           /* Constant limit.  */
    2,                                           /* Max cond insns.  */
    ARM_PREFETCH_NOT_BENEFICIAL,
@@ -1411,7 +1412,7 @@ const struct tune_params arm_cortex_a5_tune =
  {
    arm_9e_rtx_costs,
    NULL,
-  NULL,
+  NULL,                                                /* Sched adj cost.  */
    1,                                           /* Constant limit.  */
    1,                                           /* Max cond insns.  */
    ARM_PREFETCH_NOT_BENEFICIAL,
@@ -1439,13 +1440,36 @@ const struct tune_params arm_cortex_a9_tune =
    false                                         /* Prefer Neon for 64-bits bitops.  */
  };
  
+/* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
+   cycle to execute each.  An LDR from the constant pool also takes two cycles
+   to execute, but mildly increases pipelining opportunity (consecutive
+   loads/stores can be pipelined together, saving one cycle), and may also
+   improve icache utilisation.  Hence we prefer the constant pool for such
+   processors.  */
+
+const struct tune_params arm_v7m_tune =
+{
+  arm_9e_rtx_costs,
+  &generic_extra_costs,
+  NULL,                                                /* Sched adj cost.  */
+  1,                                           /* Constant limit.  */
+  5,                                           /* Max cond insns.  */
+  ARM_PREFETCH_NOT_BENEFICIAL,
+  true,                                                /* Prefer constant pool.  */
+  arm_cortex_m_branch_cost,
+  false,                                       /* Prefer LDRD/STRD.  */
+  {false, false},                              /* Prefer non short circuit.  */
+  &arm_default_vec_cost,                        /* Vectorizer costs.  */
+  false                                         /* Prefer Neon for 64-bits bitops.  */
+};
+
  /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
     arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus.  */
  const struct tune_params arm_v6m_tune =
  {
    arm_9e_rtx_costs,
    NULL,
-  NULL,
+  NULL,                                                /* Sched adj cost.  */
    1,                                           /* Constant limit.  */
    5,                                           /* Max cond insns.  */
    ARM_PREFETCH_NOT_BENEFICIAL,
@@ -11241,6 +11265,20 @@ arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
    return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
  }
  
+/* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
+   on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
+   sequences of non-executed instructions in IT blocks probably take the same
+   amount of time as executed instructions (and the IT instruction itself takes
+   space in icache).  This function was experimentally determined to give good
+   results on a popular embedded benchmark.  */
+
+static int
+arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
+{
+  return (TARGET_32BIT && speed_p) ? 1
+         : arm_default_branch_cost (speed_p, predictable_p);
+}
+
  static bool fp_consts_inited = false;
  
  static REAL_VALUE_TYPE value_fp0;
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index ec8d248..6d8e43a 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,11 @@
+2013-11-14  Joey Ye  <joey.ye@arm.com>
+
+       * gcc.dg/tree-ssa/forwprop-28.c: Disable for cortex_m.
+       * gcc.dg/tree-ssa/vrp47.c: Likewise.
+       * gcc.dg/tree-ssa/vrp87.c: Likewise.
+       * gcc.dg/tree-ssa/ssa-dom-thread-4.c: Ingore for cortex_m.
+       * gcc.dg/tree-ssa/ssa-vrp-thread-1.c: Likewise.
+
  2013-11-14  Adam Butcher  <adam@jessamine.co.uk>
  
         PR c++/58533
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-28.c b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-28.c

index 06b406f..1a4bf4a 100644 (file)
--- a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-28.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-28.c
@@ -1,5 +1,9 @@
  /* { dg-do compile { target { ! "m68k*-*-* mmix*-*-* mep*-*-* bfin*-*-* v850*-*-* picochip*-*-* moxie*-*-* cris*-*-* m32c*-*-* fr30*-*-* mcore*-*-* powerpc*-*-* xtensa*-*-* arc*-*-*"} } } */
  /* { dg-options "-O2 -fdump-tree-forwprop1" } */
+/* Skip on ARM Cortex-M, where LOGICAL_OP_NON_SHORT_CIRCUIT is set to false,
+   leading to two conditional jumps when evaluating an && condition.  Forwprop1
+   is not able to optimize this.  */
+/* { dg-skip-if "" { arm_cortex_m } } */
  
  extern char *frob (void);
  extern _Bool testit (void);
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-4.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-4.c

index fec3075..0e4797c 100644 (file)
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-4.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-4.c
@@ -59,9 +59,9 @@ bitmap_ior_and_compl (bitmap dst, const_bitmap a, const_bitmap b,
     code we missed the edge when the first conditional is false
     (b_elt is zero, which means the second conditional is always
     zero.  */
-/* ARM Cortex-M0 defined LOGICAL_OP_NON_SHORT_CIRCUIT to false,
+/* ARM Cortex-M defined LOGICAL_OP_NON_SHORT_CIRCUIT to false,
     so skip below test.  */
-/* { dg-final { scan-tree-dump-times "Threaded" 3 "dom1" { target { ! { { mips*-*-* avr-*-* arc*-*-* } || { arm_cortex_m && arm_thumb1 } } } } } } */
+/* { dg-final { scan-tree-dump-times "Threaded" 3 "dom1" { target { ! { { mips*-*-* avr-*-* arc*-*-* } || { arm_cortex_m } } } } } } */
  /* MIPS defines LOGICAL_OP_NON_SHORT_CIRCUIT to 0, so we split both
     "a_elt || b_elt" and "b_elt && kill_elt" into two conditions each,
     rather than using "(var1 != 0) op (var2 != 0)".  Also, as on other targets,
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-vrp-thread-1.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-vrp-thread-1.c

index 9d9473e..b498d8b 100644 (file)
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-vrp-thread-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-vrp-thread-1.c
@@ -26,6 +26,8 @@ build_omp_regions_1 (basic_block bb, struct omp_region *parent,
    oof ();
  }
  
-/* { dg-final { scan-tree-dump-times "Threaded" 1 "vrp1" }  } */
+/* ARM Cortex-M defined LOGICAL_OP_NON_SHORT_CIRCUIT to false,
+   so skip below test.  */
+/* { dg-final { { scan-tree-dump-times "Threaded" 1 "vrp1" } || { arm_cortex_m } }  } */
  /* { dg-final { cleanup-tree-dump "vrp1" } } */
  
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/vrp47.c b/gcc/testsuite/gcc.dg/tree-ssa/vrp47.c

index 74b520b..5a09fa0 100644 (file)
--- a/gcc/testsuite/gcc.dg/tree-ssa/vrp47.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/vrp47.c
@@ -6,10 +6,10 @@
  /* { dg-do compile { target { ! "mips*-*-* arc*-*-* s390*-*-*  avr-*-* mn10300-*-*" } } } */
  /* { dg-options "-O2 -fdump-tree-vrp1 -fdump-tree-dom1 -fdump-tree-vrp2" } */
  /* { dg-additional-options "-march=i586" { target { { i?86-*-* x86_64-*-* } && ia32 } } } */
-/* Skip on ARM Cortex-M0, where LOGICAL_OP_NON_SHORT_CIRCUIT is set to false,
+/* Skip on ARM Cortex-M, where LOGICAL_OP_NON_SHORT_CIRCUIT is set to false,
     leading to two conditional jumps when evaluating an && condition.  VRP is
     not able to optimize this.  */
-/* { dg-skip-if "" { arm_cortex_m && arm_thumb1} } */
+/* { dg-skip-if "" { arm_cortex_m } } */
  
  int h(int x, int y)
  {
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/vrp87.c b/gcc/testsuite/gcc.dg/tree-ssa/vrp87.c

index aa85191..9aff0a6 100644 (file)
--- a/gcc/testsuite/gcc.dg/tree-ssa/vrp87.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/vrp87.c
@@ -2,6 +2,10 @@
  
  /* { dg-options "-O2 -fdump-tree-vrp2-details -fdump-tree-cddce2-details" } */
  /* { dg-additional-options "-mbranch-cost=2" { target avr-*-* } } */
+/* Skip on ARM Cortex-M, where LOGICAL_OP_NON_SHORT_CIRCUIT is set to false,
+   leading to two conditional jumps when evaluating an && condition.  VRP is
+   not able to optimize this.  */
+/* { dg-skip-if "" { arm_cortex_m } } */
  
  struct bitmap_head_def;
  typedef struct bitmap_head_def *bitmap;
author	jye2 <jye2@138bc75d-0d04-0410-961f-82ee72b054a4>
	Thu, 14 Nov 2013 08:38:54 +0000 (08:38 +0000)
committer	jye2 <jye2@138bc75d-0d04-0410-961f-82ee72b054a4>
	Thu, 14 Nov 2013 08:38:54 +0000 (08:38 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/config/arm/arm-cores.def		patch \| blob \| history
gcc/config/arm/arm.c		patch \| blob \| history
gcc/testsuite/ChangeLog		patch \| blob \| history
gcc/testsuite/gcc.dg/tree-ssa/forwprop-28.c		patch \| blob \| history
gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-4.c		patch \| blob \| history
gcc/testsuite/gcc.dg/tree-ssa/ssa-vrp-thread-1.c		patch \| blob \| history
gcc/testsuite/gcc.dg/tree-ssa/vrp47.c		patch \| blob \| history
gcc/testsuite/gcc.dg/tree-ssa/vrp87.c		patch \| blob \| history