From d6b7f0196dbc96c6c215b70bad0557d08f542f26 Mon Sep 17 00:00:00 2001 From: gretay Date: Fri, 21 Dec 2012 09:49:58 +0000 Subject: [PATCH] gcc/ 2012-12-21 Greta Yorsh * config/arm/cortex-a7.md: New file. * config/arm/t-arm (MD_INCLUDES): Add cortex-a7.md. * config/arm/arm.md: Include cortex-a7.md. (generic_sched): Don't use generic scheduler for Cortex-A7. (generic_vfp): Likewise. * config/arm/arm.c: (TARGET_SCHED_REORDER): Use arm_sched_reorder. (arm_sched_reorder,cortexa7_sched_reorder): New function. (cortexa7_older_only,cortexa7_younger): Likewise. (arm_issue_rate): Add Cortex-A7. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@194656 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 12 ++ gcc/config/arm/arm.c | 163 ++++++++++++++++++++ gcc/config/arm/arm.md | 5 +- gcc/config/arm/cortex-a7.md | 353 ++++++++++++++++++++++++++++++++++++++++++++ gcc/config/arm/t-arm | 1 + 5 files changed, 532 insertions(+), 2 deletions(-) create mode 100644 gcc/config/arm/cortex-a7.md diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 29cab03..90ecae0 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,15 @@ +2012-12-21 Greta Yorsh + + * config/arm/cortex-a7.md: New file. + * config/arm/t-arm (MD_INCLUDES): Add cortex-a7.md. + * config/arm/arm.md: Include cortex-a7.md. + (generic_sched): Don't use generic scheduler for Cortex-A7. + (generic_vfp): Likewise. + * config/arm/arm.c: (TARGET_SCHED_REORDER): Use arm_sched_reorder. + (arm_sched_reorder,cortexa7_sched_reorder): New function. + (cortexa7_older_only,cortexa7_younger): Likewise. + (arm_issue_rate): Add Cortex-A7. + 2012-12-20 Ian Bolton * gcc/config/aarch64/aarch64.md diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 4484bc9..13d745f 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -132,6 +132,7 @@ static void arm_output_function_prologue (FILE *, HOST_WIDE_INT); static int arm_comp_type_attributes (const_tree, const_tree); static void arm_set_default_type_attributes (tree); static int arm_adjust_cost (rtx, rtx, rtx, int); +static int arm_sched_reorder (FILE *, int, rtx *, int *, int); static int optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val, struct four_ints *return_sequence); @@ -367,6 +368,9 @@ static const struct attribute_spec arm_attribute_table[] = #undef TARGET_SCHED_ADJUST_COST #define TARGET_SCHED_ADJUST_COST arm_adjust_cost +#undef TARGET_SCHED_REORDER +#define TARGET_SCHED_REORDER arm_sched_reorder + #undef TARGET_REGISTER_MOVE_COST #define TARGET_REGISTER_MOVE_COST arm_register_move_cost @@ -8694,6 +8698,164 @@ arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass, } } + +/* Return true if and only if this insn can dual-issue only as older. */ +static bool +cortexa7_older_only (rtx insn) +{ + if (recog_memoized (insn) < 0) + return false; + + if (get_attr_insn (insn) == INSN_MOV) + return false; + + switch (get_attr_type (insn)) + { + case TYPE_ALU_REG: + case TYPE_LOAD_BYTE: + case TYPE_LOAD1: + case TYPE_STORE1: + case TYPE_FFARITHS: + case TYPE_FADDS: + case TYPE_FFARITHD: + case TYPE_FADDD: + case TYPE_FCPYS: + case TYPE_F_CVT: + case TYPE_FCMPS: + case TYPE_FCMPD: + case TYPE_FCONSTS: + case TYPE_FCONSTD: + case TYPE_FMULS: + case TYPE_FMACS: + case TYPE_FMULD: + case TYPE_FMACD: + case TYPE_FDIVS: + case TYPE_FDIVD: + case TYPE_F_2_R: + case TYPE_F_FLAG: + case TYPE_F_LOADS: + case TYPE_F_STORES: + return true; + default: + return false; + } +} + +/* Return true if and only if this insn can dual-issue as younger. */ +static bool +cortexa7_younger (FILE *file, int verbose, rtx insn) +{ + if (recog_memoized (insn) < 0) + { + if (verbose > 5) + fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn)); + return false; + } + + if (get_attr_insn (insn) == INSN_MOV) + return true; + + switch (get_attr_type (insn)) + { + case TYPE_SIMPLE_ALU_IMM: + case TYPE_SIMPLE_ALU_SHIFT: + case TYPE_BRANCH: + return true; + default: + return false; + } +} + + +/* Look for an instruction that can dual issue only as an older + instruction, and move it in front of any instructions that can + dual-issue as younger, while preserving the relative order of all + other instructions in the ready list. This is a hueuristic to help + dual-issue in later cycles, by postponing issue of more flexible + instructions. This heuristic may affect dual issue opportunities + in the current cycle. */ +static void +cortexa7_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp, + int clock) +{ + int i; + int first_older_only = -1, first_younger = -1; + + if (verbose > 5) + fprintf (file, + ";; sched_reorder for cycle %d with %d insns in ready list\n", + clock, + *n_readyp); + + /* Traverse the ready list from the head (the instruction to issue + first), and looking for the first instruction that can issue as + younger and the first instruction that can dual-issue only as + older. */ + for (i = *n_readyp - 1; i >= 0; i--) + { + rtx insn = ready[i]; + if (cortexa7_older_only (insn)) + { + first_older_only = i; + if (verbose > 5) + fprintf (file, ";; reorder older found %d\n", INSN_UID (insn)); + break; + } + else if (cortexa7_younger (file, verbose, insn) && first_younger == -1) + first_younger = i; + } + + /* Nothing to reorder because either no younger insn found or insn + that can dual-issue only as older appears before any insn that + can dual-issue as younger. */ + if (first_younger == -1) + { + if (verbose > 5) + fprintf (file, ";; sched_reorder nothing to reorder as no younger\n"); + return; + } + + /* Nothing to reorder because no older-only insn in the ready list. */ + if (first_older_only == -1) + { + if (verbose > 5) + fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n"); + return; + } + + /* Move first_older_only insn before first_younger. */ + if (verbose > 5) + fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n", + INSN_UID(ready [first_older_only]), + INSN_UID(ready [first_younger])); + rtx first_older_only_insn = ready [first_older_only]; + for (i = first_older_only; i < first_younger; i++) + { + ready[i] = ready[i+1]; + } + + ready[i] = first_older_only_insn; + return; +} + +/* Implement TARGET_SCHED_REORDER. */ +static int +arm_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp, + int clock) +{ + switch (arm_tune) + { + case cortexa7: + cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock); + break; + default: + /* Do nothing for other cores. */ + break; + } + + return arm_issue_rate (); +} + /* This function implements the target macro TARGET_SCHED_ADJUST_COST. It corrects the value of COST based on the relationship between INSN and DEP through the dependence LINK. It returns the new @@ -25480,6 +25642,7 @@ arm_issue_rate (void) case cortexr5: case genericv7a: case cortexa5: + case cortexa7: case cortexa8: case cortexa9: case fa726te: diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index 385a58d..1cb1515 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -502,7 +502,7 @@ (define_attr "generic_sched" "yes,no" (const (if_then_else - (ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa8,cortexa9,cortexa15,cortexm4") + (ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa7,cortexa8,cortexa9,cortexa15,cortexm4") (eq_attr "tune_cortexr4" "yes")) (const_string "no") (const_string "yes")))) @@ -510,7 +510,7 @@ (define_attr "generic_vfp" "yes,no" (const (if_then_else (and (eq_attr "fpu" "vfp") - (eq_attr "tune" "!arm1020e,arm1022e,cortexa5,cortexa8,cortexa9,cortexm4") + (eq_attr "tune" "!arm1020e,arm1022e,cortexa5,cortexa7,cortexa8,cortexa9,cortexm4") (eq_attr "tune_cortexr4" "no")) (const_string "yes") (const_string "no")))) @@ -527,6 +527,7 @@ (include "fmp626.md") (include "fa726te.md") (include "cortex-a5.md") +(include "cortex-a7.md") (include "cortex-a8.md") (include "cortex-a9.md") (include "cortex-a15.md") diff --git a/gcc/config/arm/cortex-a7.md b/gcc/config/arm/cortex-a7.md new file mode 100644 index 0000000..74d4ca0 --- /dev/null +++ b/gcc/config/arm/cortex-a7.md @@ -0,0 +1,353 @@ +;; ARM Cortex-A7 pipeline description +;; Copyright (C) 2012 Free Software Foundation, Inc. +;; +;; Contributed by ARM Ltd. +;; Based on cortex-a5.md which was originally contributed by CodeSourcery. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_automaton "cortex_a7") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Functional units. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; The Cortex-A7 pipeline integer and vfp pipeline. +;; The decode is the same for all instructions, so do not model it. +;; We only model the first execution stage because +;; instructions always advance one stage per cycle in order. +;; We model all of the LS, Branch, ALU, MAC and FPU pipelines together. + +(define_cpu_unit "cortex_a7_ex1, cortex_a7_ex2" "cortex_a7") + +(define_reservation "cortex_a7_both" "cortex_a7_ex1+cortex_a7_ex2") + +(define_cpu_unit "cortex_a7_branch" "cortex_a7") + +;; Cortex-A7 is in order and can dual-issue under limited circumstances. +;; ex2 can be reserved only after ex1 is reserved. + +(final_presence_set "cortex_a7_ex2" "cortex_a7_ex1") + +;; Pseudo-unit for blocking the multiply pipeline when a double-precision +;; multiply is in progress. + +(define_cpu_unit "cortex_a7_fpmul_pipe" "cortex_a7") + +;; The floating-point add pipeline (ex1/f1 stage), used to model the usage +;; of the add pipeline by fmac instructions, etc. + +(define_cpu_unit "cortex_a7_fpadd_pipe" "cortex_a7") + +;; Floating-point div/sqrt (long latency, out-of-order completion). + +(define_cpu_unit "cortex_a7_fp_div_sqrt" "cortex_a7") + +;; Neon pipeline +(define_cpu_unit "cortex_a7_neon" "cortex_a7") + +(define_reservation "cortex_a7_all" "cortex_a7_both+\ + cortex_a7_fpmul_pipe+\ + cortex_a7_fpadd_pipe+\ + cortex_a7_fp_div_sqrt+\ + cortex_a7_neon") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Branches. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; A direct branch can dual issue either as younger or older instruction, +;; but branches cannot dual issue with branches. +;; No latency as there is no result. + +(define_insn_reservation "cortex_a7_branch" 0 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "branch") + (eq_attr "neon_type" "none"))) + "(cortex_a7_ex2|cortex_a7_ex1)+cortex_a7_branch") + +;; A call reserves all issue slots. The result is available the next cycle. +(define_insn_reservation "cortex_a7_call" 1 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "call") + (eq_attr "neon_type" "none"))) + "cortex_a7_all") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ALU instructions. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; ALU instruction with an immediate operand can dual-issue. +(define_insn_reservation "cortex_a7_alu_imm" 2 + (and (eq_attr "tune" "cortexa7") + (and (ior (eq_attr "type" "simple_alu_imm") + (ior (eq_attr "type" "simple_alu_shift") + (and (eq_attr "insn" "mov") + (not (eq_attr "length" "8"))))) + (eq_attr "neon_type" "none"))) + "cortex_a7_ex2|cortex_a7_ex1") + +;; ALU instruction with register operands can dual-issue +;; with a younger immediate-based instruction. +(define_insn_reservation "cortex_a7_alu_reg" 2 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "alu_reg") + (eq_attr "neon_type" "none"))) + "cortex_a7_ex1") + +(define_insn_reservation "cortex_a7_alu_shift" 2 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "alu_shift,alu_shift_reg") + (eq_attr "neon_type" "none"))) + "cortex_a7_ex1") + +;; Forwarding path for unshifted operands. +(define_bypass 1 "cortex_a7_alu_imm,cortex_a7_alu_reg,cortex_a7_alu_shift" + "cortex_a7_alu_imm,cortex_a7_alu_reg,cortex_a7_mul") + +(define_bypass 1 "cortex_a7_alu_imm,cortex_a7_alu_reg,cortex_a7_alu_shift" + "cortex_a7_store*" + "arm_no_early_store_addr_dep") + +(define_bypass 1 "cortex_a7_alu_imm,cortex_a7_alu_reg,cortex_a7_alu_shift" + "cortex_a7_alu_shift" + "arm_no_early_alu_shift_dep") + +;; The multiplier pipeline can forward results from wr stage only so +;; there's no need to specify bypasses. +;; Multiply instructions cannot dual-issue. + +(define_insn_reservation "cortex_a7_mul" 2 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "mult") + (eq_attr "neon_type" "none"))) + "cortex_a7_both") + +;; The latency depends on the operands, so we use an estimate here. +(define_insn_reservation "cortex_a7_idiv" 5 + (and (eq_attr "tune" "cortexa7") + (eq_attr "insn" "udiv,sdiv")) + "cortex_a7_all*5") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Load/store instructions. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Address-generation happens in the issue stage. +;; Double-word accesses can be issued in a single cycle, +;; and occupy only one pipeline stage. + +(define_insn_reservation "cortex_a7_load1" 2 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "load_byte,load1") + (eq_attr "neon_type" "none"))) + "cortex_a7_ex1") + +(define_insn_reservation "cortex_a7_store1" 0 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "store1") + (eq_attr "neon_type" "none"))) + "cortex_a7_ex1") + +(define_insn_reservation "cortex_a7_load2" 2 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "load2") + (eq_attr "neon_type" "none"))) + "cortex_a7_both") + +(define_insn_reservation "cortex_a7_store2" 0 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "store2") + (eq_attr "neon_type" "none"))) + "cortex_a7_both") + +(define_insn_reservation "cortex_a7_load3" 3 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "load3") + (eq_attr "neon_type" "none"))) + "cortex_a7_both, cortex_a7_ex1") + +(define_insn_reservation "cortex_a7_store3" 0 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "store4") + (eq_attr "neon_type" "none"))) + "cortex_a7_both, cortex_a7_ex1") + +(define_insn_reservation "cortex_a7_load4" 3 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "load4") + (eq_attr "neon_type" "none"))) + "cortex_a7_both, cortex_a7_both") + +(define_insn_reservation "cortex_a7_store4" 0 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "store3") + (eq_attr "neon_type" "none"))) + "cortex_a7_both, cortex_a7_both") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Floating-point arithmetic. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn_reservation "cortex_a7_fpalu" 4 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "ffariths, fadds, ffarithd, faddd, fcpys,\ + f_cvt, fcmps, fcmpd") + (eq_attr "neon_type" "none"))) + "cortex_a7_ex1+cortex_a7_fpadd_pipe") + +;; For fconsts and fconstd, 8-bit immediate data is passed directly from +;; f1 to f3 (which I think reduces the latency by one cycle). + +(define_insn_reservation "cortex_a7_fconst" 3 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "fconsts,fconstd") + (eq_attr "neon_type" "none"))) + "cortex_a7_ex1+cortex_a7_fpadd_pipe") + +;; We should try not to attempt to issue a single-precision multiplication in +;; the middle of a double-precision multiplication operation (the usage of +;; cortex_a7_fpmul_pipe). + +(define_insn_reservation "cortex_a7_fpmuls" 4 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "fmuls") + (eq_attr "neon_type" "none"))) + "cortex_a7_ex1+cortex_a7_fpmul_pipe") + +;; For single-precision multiply-accumulate, the add (accumulate) is issued +;; whilst the multiply is in F4. The multiply result can then be forwarded +;; from F5 to F1. The issue unit is only used once (when we first start +;; processing the instruction), but the usage of the FP add pipeline could +;; block other instructions attempting to use it simultaneously. We try to +;; avoid that using cortex_a7_fpadd_pipe. + +(define_insn_reservation "cortex_a7_fpmacs" 8 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "fmacs") + (eq_attr "neon_type" "none"))) + "cortex_a7_ex1+cortex_a7_fpmul_pipe, nothing*3, cortex_a7_fpadd_pipe") + +;; Non-multiply instructions can issue between two cycles of a +;; double-precision multiply. + +(define_insn_reservation "cortex_a7_fpmuld" 7 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "fmuld") + (eq_attr "neon_type" "none"))) + "cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*2,\ + cortex_a7_ex1+cortex_a7_fpmul_pipe") + +(define_insn_reservation "cortex_a7_fpmacd" 11 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "fmacd") + (eq_attr "neon_type" "none"))) + "cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*2,\ + cortex_a7_ex1+cortex_a7_fpmul_pipe, nothing*3, cortex_a7_fpadd_pipe") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Floating-point divide/square root instructions. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn_reservation "cortex_a7_fdivs" 16 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "fdivs") + (eq_attr "neon_type" "none"))) + "cortex_a7_ex1, cortex_a7_fp_div_sqrt * 14") + +(define_insn_reservation "cortex_a7_fdivd" 29 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "fdivd") + (eq_attr "neon_type" "none"))) + "cortex_a7_ex1, cortex_a7_fp_div_sqrt * 28") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; VFP to/from core transfers. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Core-to-VFP transfers. + +(define_insn_reservation "cortex_a7_r2f" 4 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "r_2_f") + (eq_attr "neon_type" "none"))) + "cortex_a7_both") + +(define_insn_reservation "cortex_a7_f2r" 2 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "f_2_r") + (eq_attr "neon_type" "none"))) + "cortex_a7_ex1") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; VFP flag transfer. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Fuxne: The flag forwarding from fmstat to the second instruction is +;; not modeled at present. + +(define_insn_reservation "cortex_a7_f_flags" 4 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "f_flag") + (eq_attr "neon_type" "none"))) + "cortex_a7_ex1") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; VFP load/store. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn_reservation "cortex_a7_f_loads" 4 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "f_loads") + (eq_attr "neon_type" "none"))) + "cortex_a7_ex1") + +(define_insn_reservation "cortex_a7_f_loadd" 4 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "f_loadd") + (eq_attr "neon_type" "none"))) + "cortex_a7_both") + +(define_insn_reservation "cortex_a7_f_stores" 0 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "f_stores") + (eq_attr "neon_type" "none"))) + "cortex_a7_ex1") + +(define_insn_reservation "cortex_a7_f_stored" 0 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "f_stored") + (eq_attr "neon_type" "none"))) + "cortex_a7_both") + +;; Load-to-use for floating-point values has a penalty of one cycle, +;; i.e. a latency of two. + +(define_bypass 2 "cortex_a7_f_loads, cortex_a7_f_loadd" + "cortex_a7_fpalu, cortex_a7_fpmacs, cortex_a7_fpmuld,\ + cortex_a7_fpmacd, cortex_a7_fdivs, cortex_a7_fdivd,\ + cortex_a7_f2r") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; NEON load/store. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + +(define_insn_reservation "cortex_a7_neon" 4 + (and (eq_attr "tune" "cortexa7") + (eq_attr "neon_type" "!none")) + "cortex_a7_both*2") diff --git a/gcc/config/arm/t-arm b/gcc/config/arm/t-arm index 731b614..2ceb938 100644 --- a/gcc/config/arm/t-arm +++ b/gcc/config/arm/t-arm @@ -32,6 +32,7 @@ MD_INCLUDES= $(srcdir)/config/arm/arm1020e.md \ $(srcdir)/config/arm/constraints.md \ $(srcdir)/config/arm/cortex-a15.md \ $(srcdir)/config/arm/cortex-a5.md \ + $(srcdir)/config/arm/cortex-a7.md \ $(srcdir)/config/arm/cortex-a8.md \ $(srcdir)/config/arm/cortex-a8-neon.md \ $(srcdir)/config/arm/cortex-a9.md \ -- 2.7.4