2015-04-07 Michael Collison <michael.collison@linaro.org>
authorcollison <collison@138bc75d-0d04-0410-961f-82ee72b054a4>
Tue, 7 Apr 2015 21:05:46 +0000 (21:05 +0000)
committercollison <collison@138bc75d-0d04-0410-961f-82ee72b054a4>
Tue, 7 Apr 2015 21:05:46 +0000 (21:05 +0000)
Backport from trunk r219679.
2015-01-15  Richard Earnshaw  <rearnsha@arm.com>

* arm.c (arm_xgene_tune): Add default initializer for instruction
fusion.

2015-04-07  Michael Collison  <michael.collison@linaro.org>

Backport from trunk r219661.
2015-01-15  Philipp Tomsich  <philipp.tomsich@theobroma-systems.com>

* config/arm/arm.md (generic_sched): Specify xgene1 in 'no' list.
Include xgene1.md.
* config/arm/arm.c (arm_issue_rate): Specify 4 for xgene1.
* config/arm/arm-cores.def (xgene1): New entry.
* config/arm/arm-tables.opt: Regenerate.
* config/arm/arm-tune.md: Regenerate.
* config/arm/bpabi.h (BE8_LINK_SPEC): Specify mcpu=xgene1.

2015-04-07  Michael Collison  <michael.collison@linaro.org>

Backport from trunk r219657.
2015-01-15  Philipp Tomsich  <ptomsich@theobroma-systems.com>

* config/aarch64/aarch64.md: Include xgene1.md.
* config/aarch64/xgene1.md: New file.

2015-04-07  Michael Collison  <michael.collison@linaro.org>

Backport from trunk r219656.
2015-01-15  Philipp Tomsich  <philipp.tomsich@theobroma-systems.com>

* config/aarch64/aarch64-cores.def (xgene1): Update/add the
xgene1 (APM XGene-1) core definition.
* gcc/config/aarch64/aarch64.c: Add cost tables for APM XGene-1
* config/arm/aarch-cost-tables.h: Add cost tables for APM XGene-1
* doc/invoke.texi: Document -mcpu=xgene1.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/linaro/gcc-4_9-branch@221911 138bc75d-0d04-0410-961f-82ee72b054a4

15 files changed:
gcc/ChangeLog.linaro
gcc/config/aarch64/aarch64-cores.def
gcc/config/aarch64/aarch64-tune.md
gcc/config/aarch64/aarch64.c
gcc/config/aarch64/aarch64.md
gcc/config/arm/aarch-cost-tables.h
gcc/config/arm/arm-cores.def
gcc/config/arm/arm-tables.opt
gcc/config/arm/arm-tune.md
gcc/config/arm/arm.c
gcc/config/arm/arm.md
gcc/config/arm/bpabi.h
gcc/config/arm/t-arm
gcc/config/arm/xgene1.md [new file with mode: 0644]
gcc/doc/invoke.texi

index 499068a..a05a965 100644 (file)
@@ -1,3 +1,43 @@
+2015-04-07  Michael Collison  <michael.collison@linaro.org>
+
+       Backport from trunk r219679.
+       2015-01-15  Richard Earnshaw  <rearnsha@arm.com>
+
+       * arm.c (arm_xgene_tune): Add default initializer for instruction
+       fusion.
+
+2015-04-07  Michael Collison  <michael.collison@linaro.org>
+
+       Backport from trunk r219661.
+       2015-01-15  Philipp Tomsich  <philipp.tomsich@theobroma-systems.com>
+
+       * config/arm/arm.md (generic_sched): Specify xgene1 in 'no' list.
+       Include xgene1.md.
+       * config/arm/arm.c (arm_issue_rate): Specify 4 for xgene1.
+       * config/arm/arm-cores.def (xgene1): New entry.
+       * config/arm/arm-tables.opt: Regenerate.
+       * config/arm/arm-tune.md: Regenerate.
+       * config/arm/bpabi.h (BE8_LINK_SPEC): Specify mcpu=xgene1.
+
+2015-04-07  Michael Collison  <michael.collison@linaro.org>
+
+       Backport from trunk r219657.
+       2015-01-15  Philipp Tomsich  <ptomsich@theobroma-systems.com>
+
+       * config/aarch64/aarch64.md: Include xgene1.md.
+       * config/aarch64/xgene1.md: New file.
+
+2015-04-07  Michael Collison  <michael.collison@linaro.org>
+
+       Backport from trunk r219656.
+       2015-01-15  Philipp Tomsich  <philipp.tomsich@theobroma-systems.com>
+
+       * config/aarch64/aarch64-cores.def (xgene1): Update/add the
+       xgene1 (APM XGene-1) core definition.
+       * gcc/config/aarch64/aarch64.c: Add cost tables for APM XGene-1
+       * config/arm/aarch-cost-tables.h: Add cost tables for APM XGene-1
+       * doc/invoke.texi: Document -mcpu=xgene1.
+
 2015-04-07  Yvan Roux  <yvan.roux@linaro.org>
 
        Backport from trunk r217062, r217646, r218658.
index 110b41f..2b694e6 100644 (file)
@@ -37,6 +37,7 @@
 AARCH64_CORE("cortex-a53",  cortexa53, cortexa53, 8,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53)
 AARCH64_CORE("cortex-a57",  cortexa15, cortexa15, 8,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57)
 AARCH64_CORE("thunderx",    thunderx,  thunderx, 8,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx)
+AARCH64_CORE("xgene1",      xgene1,    xgene1,    8,  AARCH64_FL_FOR_ARCH8, xgene1)
 
 /* V8 big.LITTLE implementations.  */
 
index c717ea8..6409082 100644 (file)
@@ -1,5 +1,5 @@
 ;; -*- buffer-read-only: t -*-
 ;; Generated automatically by gentune.sh from aarch64-cores.def
 (define_attr "tune"
-       "cortexa53,cortexa15,thunderx,cortexa57cortexa53"
+       "cortexa53,cortexa15,thunderx,xgene1,cortexa57cortexa53"
        (const (symbol_ref "((enum attr_tune) aarch64_tune)")))
index ae2f1aa..ce0ac18 100644 (file)
@@ -215,6 +215,27 @@ static const struct cpu_addrcost_table cortexa57_addrcost_table =
 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 __extension__
 #endif
+static const struct cpu_addrcost_table xgene1_addrcost_table =
+{
+#if HAVE_DESIGNATED_INITIALIZERS
+  .addr_scale_costs =
+#endif
+    {
+      NAMED_PARAM (hi, 1),
+      NAMED_PARAM (si, 0),
+      NAMED_PARAM (di, 0),
+      NAMED_PARAM (ti, 1),
+    },
+  NAMED_PARAM (pre_modify, 1),
+  NAMED_PARAM (post_modify, 0),
+  NAMED_PARAM (register_offset, 0),
+  NAMED_PARAM (register_extend, 1),
+  NAMED_PARAM (imm_offset, 0),
+};
+
+#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
+__extension__
+#endif
 static const struct cpu_regmove_cost generic_regmove_cost =
 {
   NAMED_PARAM (GP2GP, 1),
@@ -253,6 +274,16 @@ static const struct cpu_regmove_cost thunderx_regmove_cost =
   NAMED_PARAM (FP2FP, 4)
 };
 
+static const struct cpu_regmove_cost xgene1_regmove_cost =
+{
+  NAMED_PARAM (GP2GP, 1),
+  /* Avoid the use of slow int<->fp moves for spilling by setting
+     their cost higher than memmov_cost.  */
+  NAMED_PARAM (GP2FP, 8),
+  NAMED_PARAM (FP2GP, 8),
+  NAMED_PARAM (FP2FP, 2)
+};
+
 /* Generic costs for vector insn classes.  */
 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 __extension__
@@ -293,6 +324,26 @@ static const struct cpu_vector_cost cortexa57_vector_cost =
   NAMED_PARAM (cond_not_taken_branch_cost, 1)
 };
 
+/* Generic costs for vector insn classes.  */
+#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
+__extension__
+#endif
+static const struct cpu_vector_cost xgene1_vector_cost =
+{
+  NAMED_PARAM (scalar_stmt_cost, 1),
+  NAMED_PARAM (scalar_load_cost, 5),
+  NAMED_PARAM (scalar_store_cost, 1),
+  NAMED_PARAM (vec_stmt_cost, 2),
+  NAMED_PARAM (vec_to_scalar_cost, 4),
+  NAMED_PARAM (scalar_to_vec_cost, 4),
+  NAMED_PARAM (vec_align_load_cost, 10),
+  NAMED_PARAM (vec_unalign_load_cost, 10),
+  NAMED_PARAM (vec_unalign_store_cost, 2),
+  NAMED_PARAM (vec_store_cost, 2),
+  NAMED_PARAM (cond_taken_branch_cost, 2),
+  NAMED_PARAM (cond_not_taken_branch_cost, 1)
+};
+
 #define AARCH64_FUSE_NOTHING   (0)
 #define AARCH64_FUSE_MOV_MOVK  (1 << 0)
 #define AARCH64_FUSE_ADRP_ADD  (1 << 1)
@@ -360,6 +411,17 @@ static const struct tune_params thunderx_tunings =
   1    /* vec_reassoc_width.  */
 };
 
+static const struct tune_params xgene1_tunings =
+{
+  &xgene1_extra_costs,
+  &xgene1_addrcost_table,
+  &xgene1_regmove_cost,
+  &xgene1_vector_cost,
+  NAMED_PARAM (memmov_cost, 6),
+  NAMED_PARAM (issue_rate, 4),
+  NAMED_PARAM (fuseable_ops, AARCH64_FUSE_NOTHING)
+};
+
 /* A processor implementing AArch64.  */
 struct processor
 {
index 9788016..0ab9c38 100644 (file)
 (include "../arm/cortex-a53.md")
 (include "../arm/cortex-a15.md")
 (include "thunderx.md")
+(include "../arm/xgene1.md")
 
 ;; -------------------------------------------------------------------
 ;; Jumps and other miscellaneous insns
index adf8708..7f74006 100644 (file)
@@ -325,4 +325,105 @@ const struct cpu_cost_table cortexa57_extra_costs =
   }
 };
 
+const struct cpu_cost_table xgene1_extra_costs =
+{
+  /* ALU */
+  {
+    0,                 /* arith.  */
+    0,                 /* logical.  */
+    0,                 /* shift.  */
+    COSTS_N_INSNS (1), /* shift_reg.  */
+    COSTS_N_INSNS (1), /* arith_shift.  */
+    COSTS_N_INSNS (1), /* arith_shift_reg.  */
+    COSTS_N_INSNS (1), /* log_shift.  */
+    COSTS_N_INSNS (1), /* log_shift_reg.  */
+    COSTS_N_INSNS (1), /* extend.  */
+    0,                 /* extend_arithm.  */
+    COSTS_N_INSNS (1), /* bfi.  */
+    COSTS_N_INSNS (1), /* bfx.  */
+    0,                 /* clz.  */
+    COSTS_N_INSNS (1), /* rev.  */
+    0,                 /* non_exec.  */
+    true               /* non_exec_costs_exec.  */
+  },
+  {
+    /* MULT SImode */
+    {
+      COSTS_N_INSNS (4),       /* simple.  */
+      COSTS_N_INSNS (4),       /* flag_setting.  */
+      COSTS_N_INSNS (4),       /* extend.  */
+      COSTS_N_INSNS (4),       /* add.  */
+      COSTS_N_INSNS (4),       /* extend_add.  */
+      COSTS_N_INSNS (20)       /* idiv.  */
+    },
+    /* MULT DImode */
+    {
+      COSTS_N_INSNS (5),       /* simple.  */
+      0,                       /* flag_setting (N/A).  */
+      COSTS_N_INSNS (5),       /* extend.  */
+      COSTS_N_INSNS (5),       /* add.  */
+      COSTS_N_INSNS (5),       /* extend_add.  */
+      COSTS_N_INSNS (21)       /* idiv.  */
+    }
+  },
+  /* LD/ST */
+  {
+    COSTS_N_INSNS (5),         /* load.  */
+    COSTS_N_INSNS (6),         /* load_sign_extend.  */
+    COSTS_N_INSNS (5),         /* ldrd.  */
+    COSTS_N_INSNS (5),         /* ldm_1st.  */
+    1,                         /* ldm_regs_per_insn_1st.  */
+    1,                         /* ldm_regs_per_insn_subsequent.  */
+    COSTS_N_INSNS (10),        /* loadf.  */
+    COSTS_N_INSNS (10),        /* loadd.  */
+    COSTS_N_INSNS (5),         /* load_unaligned.  */
+    0,                         /* store.  */
+    0,                         /* strd.  */
+    0,                         /* stm_1st.  */
+    1,                         /* stm_regs_per_insn_1st.  */
+    1,                         /* stm_regs_per_insn_subsequent.  */
+    0,                         /* storef.  */
+    0,                         /* stored.  */
+    0,                         /* store_unaligned.  */
+  },
+  {
+    /* FP SFmode */
+    {
+      COSTS_N_INSNS (23),      /* div.  */
+      COSTS_N_INSNS (5),       /* mult.  */
+      COSTS_N_INSNS (5),       /* mult_addsub. */
+      COSTS_N_INSNS (5),       /* fma.  */
+      COSTS_N_INSNS (5),       /* addsub.  */
+      COSTS_N_INSNS (2),       /* fpconst. */
+      COSTS_N_INSNS (3),       /* neg.  */
+      COSTS_N_INSNS (2),       /* compare.  */
+      COSTS_N_INSNS (6),       /* widen.  */
+      COSTS_N_INSNS (6),       /* narrow.  */
+      COSTS_N_INSNS (4),       /* toint.  */
+      COSTS_N_INSNS (4),       /* fromint.  */
+      COSTS_N_INSNS (4)        /* roundint.  */
+    },
+    /* FP DFmode */
+    {
+      COSTS_N_INSNS (29),      /* div.  */
+      COSTS_N_INSNS (5),       /* mult.  */
+      COSTS_N_INSNS (5),       /* mult_addsub.  */
+      COSTS_N_INSNS (5),       /* fma.  */
+      COSTS_N_INSNS (5),       /* addsub.  */
+      COSTS_N_INSNS (3),       /* fpconst.  */
+      COSTS_N_INSNS (3),       /* neg.  */
+      COSTS_N_INSNS (2),       /* compare.  */
+      COSTS_N_INSNS (6),       /* widen.  */
+      COSTS_N_INSNS (6),       /* narrow.  */
+      COSTS_N_INSNS (4),       /* toint.  */
+      COSTS_N_INSNS (4),       /* fromint.  */
+      COSTS_N_INSNS (4)        /* roundint.  */
+    }
+  },
+  /* Vector */
+  {
+    COSTS_N_INSNS (2)  /* alu.  */
+  }
+};
+
 #endif /* GCC_AARCH_COST_TABLES_H */
index d5067b0..910431a 100644 (file)
@@ -165,6 +165,7 @@ ARM_CORE("cortex-a15.cortex-a7", cortexa15cortexa7, cortexa7,       7A,  FL_LDSCHED |
 /* V8 Architecture Processors */
 ARM_CORE("cortex-a53", cortexa53, cortexa53,   8A, FL_LDSCHED | FL_CRC32, cortex_a53)
 ARM_CORE("cortex-a57", cortexa57, cortexa15,   8A, FL_LDSCHED | FL_CRC32, cortex_a57)
+ARM_CORE("xgene1",      xgene1,    xgene1,      8A, FL_LDSCHED,            xgene1)
 
 /* V8 big.LITTLE implementations */
 ARM_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A,  FL_LDSCHED | FL_CRC32, cortex_a57)
index 9c7e944..7efd2b6 100644 (file)
@@ -304,6 +304,9 @@ EnumValue
 Enum(processor_type) String(cortex-a57) Value(cortexa57)
 
 EnumValue
+Enum(processor_type) String(xgene1) Value(xgene1)
+
+EnumValue
 Enum(processor_type) String(cortex-a57.cortex-a53) Value(cortexa57cortexa53)
 
 Enum
index 84355d6..97cca44 100644 (file)
@@ -32,5 +32,5 @@
        cortexr5,cortexr7,cortexm7,
        cortexm4,cortexm3,marvell_pj4,
        cortexa15cortexa7,cortexa53,cortexa57,
-       cortexa57cortexa53"
+       xgene1,cortexa57cortexa53"
        (const (symbol_ref "((enum attr_tune) arm_tune)")))
index 1c741ed..b02f56d 100644 (file)
@@ -1944,6 +1944,25 @@ const struct tune_params arm_cortex_a57_tune =
   ARM_FUSE_MOVW_MOVT                           /* Fuseable pairs of instructions.  */
 };
 
+const struct tune_params arm_xgene1_tune =
+{
+  arm_9e_rtx_costs,
+  &xgene1_extra_costs,
+  NULL,                                        /* Scheduler cost adjustment.  */
+  1,                                           /* Constant limit.  */
+  2,                                           /* Max cond insns.  */
+  ARM_PREFETCH_NOT_BENEFICIAL,
+  false,                                       /* Prefer constant pool.  */
+  arm_default_branch_cost,
+  true,                                        /* Prefer LDRD/STRD.  */
+  {true, true},                                /* Prefer non short circuit.  */
+  &arm_default_vec_cost,                       /* Vectorizer costs.  */
+  false,                                       /* Prefer Neon for 64-bits bitops.  */
+  true, true,                                  /* Prefer 32-bit encodings.  */
+  ARM_SCHED_AUTOPREF_OFF,                      /* Sched L2 autopref.  */
+  ARM_FUSE_NOTHING                             /* Fuseable pairs of instructions.  */
+};
+
 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
    less appealing.  Set max_insns_skipped to a low value.  */
 
@@ -29929,6 +29948,9 @@ arm_issue_rate (void)
 {
   switch (arm_tune)
     {
+    case xgene1:
+      return 4;
+
     case cortexa15:
     case cortexa57:
       return 3;
index d931ed4..829421c 100644 (file)
 ;; given instruction does not shift one of its input operands.
 (define_attr "shift" "" (const_int 0))
 
+;; [For compatibility with AArch64 in pipeline models]
+;; Attribute that specifies whether or not the instruction touches fp
+;; registers.
+(define_attr "fp" "no,yes" (const_string "no"))
+
 ; Floating Point Unit.  If we only have floating point emulation, then there
 ; is no point in scheduling the floating point insns.  (Well, for best
 ; performance we should try and group them together).
                                 arm926ejs,arm1020e,arm1026ejs,arm1136js,\
                                 arm1136jfs,cortexa5,cortexa7,cortexa8,\
                                 cortexa9,cortexa12,cortexa15,cortexa53,\
-                                cortexm4,cortexm7,marvell_pj4")
+                                cortexm4,cortexm7,marvell_pj4,
+                               xgene1")
               (eq_attr "tune_cortexr4" "yes"))
           (const_string "no")
           (const_string "yes"))))
          (and (eq_attr "fpu" "vfp")
               (eq_attr "tune" "!arm1020e,arm1022e,cortexa5,cortexa7,\
                                 cortexa8,cortexa9,cortexa53,cortexm4,\
-                                cortexm7,marvell_pj4")
+                                cortexm7,marvell_pj4,xgene1")
               (eq_attr "tune_cortexr4" "no"))
          (const_string "yes")
          (const_string "no"))))
 (include "cortex-m4-fpu.md")
 (include "vfp11.md")
 (include "marvell-pj4.md")
+(include "xgene1.md")
 
 \f
 ;;---------------------------------------------------------------------------
index f99e1af..574500e 100644 (file)
@@ -70,6 +70,7 @@
    |mcpu=cortex-a53                                    \
    |mcpu=cortex-a57                                    \
    |mcpu=cortex-a57.cortex-a53                         \
+   |mcpu=xgene1                                         \
    |mcpu=cortex-m1.small-multiply                       \
    |mcpu=cortex-m0.small-multiply                       \
    |mcpu=cortex-m0plus.small-multiply                  \
@@ -90,6 +91,7 @@
    |mcpu=cortex-a53                                    \
    |mcpu=cortex-a57                                    \
    |mcpu=cortex-a57.cortex-a53                         \
+   |mcpu=xgene1                                         \
    |mcpu=cortex-m1.small-multiply                       \
    |mcpu=cortex-m0.small-multiply                       \
    |mcpu=cortex-m0plus.small-multiply                   \
index 2ad7bf3..52d0c82 100644 (file)
@@ -40,6 +40,7 @@ MD_INCLUDES=  $(srcdir)/config/arm/arm1020e.md \
                $(srcdir)/config/arm/cortex-a9.md \
                $(srcdir)/config/arm/cortex-a9-neon.md \
                $(srcdir)/config/arm/cortex-a53.md \
+               $(srcdir)/config/arm/xgene1.md \
                $(srcdir)/config/arm/cortex-m4-fpu.md \
                $(srcdir)/config/arm/cortex-m4.md \
                $(srcdir)/config/arm/cortex-r4f.md \
diff --git a/gcc/config/arm/xgene1.md b/gcc/config/arm/xgene1.md
new file mode 100644 (file)
index 0000000..1fba538
--- /dev/null
@@ -0,0 +1,531 @@
+;; Machine description for AppliedMicro xgene1 core.
+;; Copyright (C) 2012-2015 Free Software Foundation, Inc.
+;; Contributed by Theobroma Systems Design und Consulting GmbH.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Pipeline description for the xgene1 micro-architecture
+
+(define_automaton "xgene1")
+
+(define_cpu_unit "xgene1_decode_out0" "xgene1")
+(define_cpu_unit "xgene1_decode_out1" "xgene1")
+(define_cpu_unit "xgene1_decode_out2" "xgene1")
+(define_cpu_unit "xgene1_decode_out3" "xgene1")
+
+(define_cpu_unit "xgene1_divide" "xgene1")
+(define_cpu_unit "xgene1_fp_divide" "xgene1")
+(define_cpu_unit "xgene1_fsu" "xgene1")
+(define_cpu_unit "xgene1_fcmp" "xgene1")
+
+(define_reservation "xgene1_decode1op"
+        "( xgene1_decode_out0 )
+        |( xgene1_decode_out1 )
+        |( xgene1_decode_out2 )
+        |( xgene1_decode_out3 )"
+)
+(define_reservation "xgene1_decode2op"
+        "( xgene1_decode_out0 + xgene1_decode_out1 )
+        |( xgene1_decode_out0 + xgene1_decode_out2 )
+        |( xgene1_decode_out0 + xgene1_decode_out3 )
+        |( xgene1_decode_out1 + xgene1_decode_out2 )
+        |( xgene1_decode_out1 + xgene1_decode_out3 )
+        |( xgene1_decode_out2 + xgene1_decode_out3 )"
+)
+(define_reservation "xgene1_decodeIsolated"
+        "( xgene1_decode_out0 + xgene1_decode_out1 + xgene1_decode_out2 + xgene1_decode_out3 )"
+)
+
+(define_insn_reservation "xgene1_branch" 1
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "branch"))
+  "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_nop" 1
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "no_insn"))
+  "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_call" 1
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "call"))
+  "xgene1_decode2op")
+
+(define_insn_reservation "xgene1_f_load" 10
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "f_loadd,f_loads"))
+  "xgene1_decode2op")
+
+(define_insn_reservation "xgene1_f_store" 4
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "f_stored,f_stores"))
+  "xgene1_decode2op")
+
+(define_insn_reservation "xgene1_fmov" 2
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "fmov,fconsts,fconstd"))
+  "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_f_mcr" 10
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "f_mcr"))
+  "xgene1_decodeIsolated")
+
+(define_insn_reservation "xgene1_f_mrc" 4
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "f_mrc"))
+  "xgene1_decode2op")
+
+(define_insn_reservation "xgene1_load_pair" 6
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "load2"))
+  "xgene1_decodeIsolated")
+
+(define_insn_reservation "xgene1_store_pair" 2
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "store2"))
+  "xgene1_decodeIsolated")
+
+(define_insn_reservation "xgene1_fp_load1" 10
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "load1")
+       (eq_attr "fp" "yes"))
+  "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_load1" 5
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "load1"))
+  "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_store1" 2
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "store1"))
+  "xgene1_decode2op")
+
+(define_insn_reservation "xgene1_move" 1
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "mov_reg,mov_imm,mrs"))
+  "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_alu" 1
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "alu_imm,alu_reg,alu_shift_imm,\
+                        alu_ext,adc_reg,csel,logic_imm,\
+                        logic_reg,logic_shift_imm,clz,\
+                        rbit,shift_reg,adr,mov_reg,\
+                        mov_imm,extend"))
+  "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_simd" 1
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "rev"))
+  "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_alus" 1
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "alus_imm,alu_reg,alus_shift_imm,\
+                        alus_ext,logics_imm,logics_reg,\
+                        logics_shift_imm"))
+  "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_mul" 6
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "mul,mla,smull,umull,smlal,umlal"))
+  "xgene1_decode2op")
+
+(define_insn_reservation "xgene1_div" 34
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "sdiv,udiv"))
+  "xgene1_decode1op,xgene1_divide*7")
+
+(define_insn_reservation "xgene1_fcmp" 10
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "fcmpd,fcmps"))
+  "xgene1_decode1op,xgene1_fsu+xgene1_fcmp*3")
+
+(define_insn_reservation "xgene1_fcsel" 3
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "fcsel"))
+  "xgene1_decode1op,xgene1_fsu")
+
+(define_insn_reservation "xgene1_bfm" 2
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "bfm"))
+  "xgene1_decode1op,xgene1_fsu")
+
+(define_insn_reservation "xgene1_f_rint" 5
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "f_rintd,f_rints"))
+  "xgene1_decode1op,xgene1_fsu")
+
+(define_insn_reservation "xgene1_f_cvt" 3
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "f_cvt"))
+  "xgene1_decode1op,xgene1_fsu")
+
+(define_insn_reservation "xgene1_f_cvtf2i" 11
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "f_cvtf2i"))
+  "xgene1_decodeIsolated,xgene1_fsu")
+
+(define_insn_reservation "xgene1_f_cvti2f" 14
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "f_cvti2f"))
+  "xgene1_decodeIsolated,xgene1_fsu")
+
+(define_insn_reservation "xgene1_f_add" 5
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "faddd,fadds,fmuld,fmuls"))
+  "xgene1_decode1op,xgene1_fsu")
+
+(define_insn_reservation "xgene1_f_divs" 22
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "fdivs,fsqrts"))
+  "xgene1_decode1op,(xgene1_fp_divide+xgene1_fsu)*8,xgene1_fp_divide*14")
+
+(define_insn_reservation "xgene1_f_divd" 28
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "fdivd"))
+  "xgene1_decode1op,(xgene1_fp_divide+xgene1_fsu)*11,xgene1_fp_divide*17")
+
+(define_insn_reservation "xgene1_f_sqrtd" 28
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "fsqrtd"))
+  "xgene1_decode1op,(xgene1_fp_divide+xgene1_fsu)*17,xgene1_fp_divide*11")
+
+(define_insn_reservation "xgene1_f_arith" 2
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "ffarithd,ffariths"))
+  "xgene1_decode1op,xgene1_fsu")
+
+(define_insn_reservation "xgene1_f_select" 3
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "f_minmaxd,f_minmaxs"))
+  "xgene1_decode1op,xgene1_fsu")
+
+(define_insn_reservation "xgene1_neon_dup" 3
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_dup,neon_dup_q"))
+  "xgene1_decode1op,xgene1_fsu")
+
+(define_insn_reservation "xgene1_neon_load1" 11
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_load1_1reg, neon_load1_1reg_q"))
+  "xgene1_decode2op,xgene1_fsu")
+
+(define_insn_reservation "xgene1_neon_store1" 5
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_store1_1reg, neon_store1_1reg_q"))
+  "xgene1_decode2op,xgene1_fsu")
+
+(define_insn_reservation "xgene1_neon_logic" 2
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_logic,\
+                        neon_logic_q,\
+                        neon_bsl,\
+                        neon_bsl_q,\
+                        neon_move,\
+                        neon_move_q,\
+                       "))
+  "xgene1_decode1op,xgene1_fsu")
+
+(define_insn_reservation "xgene1_neon_umov" 7
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_to_gp, neon_to_gp_q"))
+  "xgene1_decodeIsolated")
+
+(define_insn_reservation "xgene1_neon_ins" 14
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_from_gp,\
+                        neon_from_gp_q,\
+                        neon_ins,\
+                        neon_ins_q,\
+                       "))
+  "xgene1_decodeIsolated,xgene1_fsu")
+
+(define_insn_reservation "xgene1_neon_shift" 3
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_shift_imm,\
+                        neon_shift_imm_q,\
+                        neon_shift_reg,\
+                        neon_shift_reg_q,\
+                        neon_shift_imm_long,\
+                        neon_sat_shift_imm,\
+                        neon_sat_shift_imm_q,\
+                        neon_sat_shift_imm_narrow_q,\
+                        neon_sat_shift_reg,\
+                        neon_sat_shift_reg_q,\
+                        neon_shift_imm_narrow_q,\
+                       "))
+  "xgene1_decode1op,xgene1_fsu")
+
+(define_insn_reservation "xgene1_neon_arith" 3
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_add,\
+                        neon_add_q,\
+                        neon_sub,\
+                        neon_sub_q,\
+                        neon_neg,\
+                        neon_neg_q,\
+                        neon_abs,\
+                        neon_abs_q,\
+                        neon_abd_q,\
+                        neon_arith_acc,\
+                        neon_arith_acc_q,\
+                        neon_reduc_add,\
+                        neon_reduc_add_q,\
+                        neon_add_halve,\
+                        neon_add_halve_q,\
+                        neon_sub_halve,\
+                        neon_sub_halve_q,\
+                        neon_qadd,\
+                        neon_qadd_q,\
+                        neon_compare,\
+                        neon_compare_q,\
+                        neon_compare_zero,\
+                        neon_compare_zero_q,\
+                        neon_tst,\
+                        neon_tst_q,\
+                       "))
+  "xgene1_decode1op,xgene1_fsu")
+
+(define_insn_reservation "xgene1_neon_abs_diff" 6
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_arith_acc,neon_arith_acc_q"))
+  "xgene1_decode2op,xgene1_fsu*2")
+
+(define_insn_reservation "xgene1_neon_mul" 5
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_mul_b,\
+                        neon_mul_b_q,\
+                        neon_mul_h,\
+                        neon_mul_h_q,\
+                        neon_mul_s,\
+                        neon_mul_s_q,\
+                        neon_fp_mul_s_scalar,\
+                        neon_fp_mul_s_scalar_q,\
+                        neon_fp_mul_d_scalar_q,\
+                        neon_mla_b,neon_mla_b_q,\
+                        neon_mla_h,neon_mla_h_q,\
+                        neon_mla_s,neon_mla_s_q,\
+                        neon_mla_h_scalar,\
+                        neon_mla_h_scalar_q,\
+                        neon_mla_s_scalar,\
+                        neon_mla_s_scalar_q,\
+                        neon_mla_b_long,\
+                        neon_mla_h_long,\
+                        neon_mla_s_long,\
+                        neon_fp_mul_s,\
+                        neon_fp_mul_s_q,\
+                        neon_fp_mul_d,\
+                        neon_fp_mul_d_q,\
+                        neon_fp_mla_s,\
+                        neon_fp_mla_s_q,\
+                        neon_fp_mla_d,\
+                        neon_fp_mla_d_q,\
+                        neon_fp_mla_s_scalar,\
+                        neon_fp_mla_s_scalar_q,\
+                        neon_fp_mla_d_scalar_q,\
+                        neon_sat_mul_b,\
+                        neon_sat_mul_b_q,\
+                        neon_sat_mul_h,\
+                        neon_sat_mul_h_q,\
+                        neon_sat_mul_s,\
+                        neon_sat_mul_s_q,\
+                        neon_sat_mul_h_scalar,\
+                        neon_sat_mul_h_scalar_q,\
+                        neon_sat_mul_s_scalar,\
+                        neon_sat_mul_s_scalar_q,\
+                        neon_sat_mul_h_scalar_long,\
+                        neon_sat_mul_s_scalar_long,\
+                        neon_sat_mla_b_long,\
+                        neon_sat_mla_h_long,\
+                        neon_sat_mla_s_long,\
+                        neon_sat_mla_h_scalar_long,\
+                        neon_sat_mla_s_scalar_long,\
+                       "))
+  "xgene1_decode2op,xgene1_fsu*2")
+
+(define_insn_reservation "xgene1_fp_abd_diff" 5
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_fp_abd_s,\
+                        neon_fp_abd_s_q,\
+                        neon_fp_abd_d,\
+                        neon_fp_abd_d_q,\
+                       "))
+  "xgene1_decode1op,xgene1_fsu")
+
+(define_insn_reservation "xgene1_neon_f_add" 5
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_fp_addsub_s,\
+                        neon_fp_addsub_s_q,\
+                        neon_fp_addsub_d,\
+                        neon_fp_addsub_d_q,\
+                       "))
+  "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_neon_f_div" 2
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_fp_div_s,\
+                        neon_fp_div_s_q,\
+                        neon_fp_div_d,\
+                        neon_fp_div_d_q,\
+                       "))
+  "xgene1_decode1op,(xgene1_fsu+xgene1_fp_divide)")
+
+(define_insn_reservation "xgene1_neon_f_neg" 2
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_fp_neg_s,\
+                        neon_fp_neg_s_q,\
+                        neon_fp_neg_d,\
+                        neon_fp_neg_d_q,\
+                        neon_fp_abs_s,\
+                        neon_fp_abs_s_q,\
+                        neon_fp_abs_d,\
+                        neon_fp_abs_d_q,\
+                       "))
+  "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_neon_f_round" 5
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_fp_round_s,\
+                        neon_fp_round_s_q,\
+                        neon_fp_round_d,\
+                        neon_fp_round_d_q,\
+                       "))
+  "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_neon_f_cvt" 5
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type"  "neon_int_to_fp_s,\
+                         neon_int_to_fp_s_q,\
+                         neon_int_to_fp_d,\
+                         neon_int_to_fp_d_q,\
+                         neon_fp_cvt_widen_s,\
+                         neon_fp_cvt_narrow_s_q,\
+                         neon_fp_cvt_narrow_d_q,\
+                        "))
+  "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_neon_f_reduc" 5
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_fp_reduc_add_s,\
+                        neon_fp_reduc_add_s_q,\
+                        neon_fp_reduc_add_d,\
+                        neon_fp_reduc_add_d_q,\
+                       "))
+  "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_neon_cls" 2
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_cls,neon_cls_q"))
+  "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_neon_st1" 4
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_store1_one_lane,\
+                        neon_store1_one_lane_q,\
+                       "))
+  "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_neon_halve_narrow" 6
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_sub_halve_narrow_q,\
+                        neon_add_halve_narrow_q,\
+                       "))
+  "xgene1_decodeIsolated")
+
+(define_insn_reservation "xgene1_neon_shift_acc" 6
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_shift_acc,\
+                        neon_shift_acc_q,\
+                       "))
+  "xgene1_decode2op")
+
+(define_insn_reservation "xgene1_neon_fp_compare" 3
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_fp_compare_s,\
+                        neon_fp_compare_s_q,\
+                        neon_fp_compare_d,\
+                        neon_fp_compare_d_q,\
+                       "))
+  "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_neon_fp_sqrt" 2
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_fp_sqrt_s,\
+                        neon_fp_sqrt_s_q,\
+                        neon_fp_sqrt_d,\
+                        neon_fp_sqrt_d_q,\
+                       "))
+  "xgene1_decode1op,(xgene1_fsu+xgene1_fp_divide)")
+
+(define_insn_reservation "xgene1_neon_tbl1" 4
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_tbl1,\
+                        neon_tbl1_q,\
+                       "))
+  "xgene1_decode2op")
+
+(define_insn_reservation "xgene1_neon_tbl2" 8
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_tbl2,\
+                        neon_tbl2_q,\
+                       "))
+  "xgene1_decodeIsolated")
+
+(define_insn_reservation "xgene1_neon_permute" 3
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_permute,\
+                        neon_permute_q,\
+                       "))
+  "xgene1_decode2op")
+
+(define_insn_reservation "xgene1_neon_ld1r" 10
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_load1_all_lanes,\
+                       "))
+  "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_neon_fp_recp" 3
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_fp_recpe_s,\
+                        neon_fp_recpe_s_q,\
+                        neon_fp_recpe_d,\
+                        neon_fp_recpe_d_q,\
+                        neon_fp_recpx_s,\
+                        neon_fp_recpx_s_q,\
+                        neon_fp_recpx_d,\
+                        neon_fp_recpx_d_q,\
+                       "))
+  "xgene1_decode1op")
+
+
+(define_insn_reservation "xgene1_neon_fp_recp_s" 5
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_fp_recps_s,\
+                        neon_fp_recps_s_q,\
+                        neon_fp_recps_d,\
+                        neon_fp_recps_d_q,\
+                       "))
+  "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_neon_pmull" 5
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_mul_d_long,\
+                       "))
+  "xgene1_decode2op")
index 1dcac76..ccf64a2 100644 (file)
@@ -11472,7 +11472,8 @@ architecture.
 @opindex mtune
 Specify the name of the target processor for which GCC should tune the
 performance of the code.  Permissible values for this option are:
-@samp{generic}, @samp{cortex-a53}, @samp{cortex-a57}, @samp{thunderx}.
+@samp{generic}, @samp{cortex-a53}, @samp{cortex-a57}, @samp{thunderx},
+@samp{xgene1}.
 
 Additionally, this option can specify that GCC should tune the performance
 of the code for a big.LITTLE system.  The only permissible value is
@@ -12357,7 +12358,8 @@ Permissible names are: @samp{arm2}, @samp{arm250},
 @samp{marvell-pj4},
 @samp{xscale}, @samp{iwmmxt}, @samp{iwmmxt2}, @samp{ep9312},
 @samp{fa526}, @samp{fa626},
-@samp{fa606te}, @samp{fa626te}, @samp{fmp626}, @samp{fa726te}.
+@samp{fa606te}, @samp{fa626te}, @samp{fmp626}, @samp{fa726te},
+@samp{xgene1}.
 
 Additionally, this option can specify that GCC should tune the performance
 of the code for a big.LITTLE system.  Permissible names are: