From 96b07b108fb7cfaeeddb3f5cede1fda8f772b614 Mon Sep 17 00:00:00 2001 From: Kito Cheng Date: Sun, 13 May 2018 17:10:36 +0000 Subject: [PATCH] [NDS32] Implment n12/n13 pipeline. gcc/ * config.gcc (nds32*-*-*): Check that n12/n13 are valid to --with-cpu. * config/nds32/nds32-n13.md: New file. * config/nds32/nds32-opts.h (nds32_cpu_type): Add CPU_N12 and CPU_N13. * config/nds32/nds32-pipelines-auxiliary.c: Implementation for n12/n13 pipeline. * config/nds32/nds32-protos.h: More declarations for n12/n13 pipeline. * config/nds32/nds32.md (pipeline_model): Add n13. * config/nds32/nds32.opt (mcpu): Support n12/n13 pipeline cpus. * config/nds32/pipelines.md: Include n13 settings. Co-Authored-By: Chung-Ju Wu From-SVN: r260212 --- gcc/ChangeLog | 13 + gcc/config.gcc | 4 +- gcc/config/nds32/nds32-n13.md | 401 +++++++++++++++++++++++++++ gcc/config/nds32/nds32-opts.h | 2 + gcc/config/nds32/nds32-pipelines-auxiliary.c | 271 ++++++++++++++++++ gcc/config/nds32/nds32-protos.h | 7 + gcc/config/nds32/nds32.md | 4 +- gcc/config/nds32/nds32.opt | 27 ++ gcc/config/nds32/pipelines.md | 6 + 9 files changed, 732 insertions(+), 3 deletions(-) create mode 100644 gcc/config/nds32/nds32-n13.md diff --git a/gcc/ChangeLog b/gcc/ChangeLog index f39ff63..9fdc345 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,6 +1,19 @@ 2018-05-13 Kito Cheng Chung-Ju Wu + * config.gcc (nds32*-*-*): Check that n12/n13 are valid to --with-cpu. + * config/nds32/nds32-n13.md: New file. + * config/nds32/nds32-opts.h (nds32_cpu_type): Add CPU_N12 and CPU_N13. + * config/nds32/nds32-pipelines-auxiliary.c: Implementation for n12/n13 + pipeline. + * config/nds32/nds32-protos.h: More declarations for n12/n13 pipeline. + * config/nds32/nds32.md (pipeline_model): Add n13. + * config/nds32/nds32.opt (mcpu): Support n12/n13 pipeline cpus. + * config/nds32/pipelines.md: Include n13 settings. + +2018-05-13 Kito Cheng + Chung-Ju Wu + * config.gcc (nds32*-*-*): Check that n10/d10 are valid to --with-cpu. * config/nds32/nds32-n10.md: New file. * config/nds32/nds32-opts.h (nds32_cpu_type): Add CPU_N10. diff --git a/gcc/config.gcc b/gcc/config.gcc index b729f3b..c8eda1d 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -4367,11 +4367,11 @@ case "${target}" in "") with_cpu=n9 ;; - n6 | n7 |n8 | e8 | s8 | n9 | n10 | d10) + n6 | n7 |n8 | e8 | s8 | n9 | n10 | d10 | n12 | n13) # OK ;; *) - echo "Cannot accept --with-cpu=$with_cpu, available values are: n6 n7 n8 e8 s8 n9 n10 d10" 1>&2 + echo "Cannot accept --with-cpu=$with_cpu, available values are: n6 n7 n8 e8 s8 n9 n10 d10 n12 n13" 1>&2 exit 1 ;; esac diff --git a/gcc/config/nds32/nds32-n13.md b/gcc/config/nds32/nds32-n13.md new file mode 100644 index 0000000..ca7546b --- /dev/null +++ b/gcc/config/nds32/nds32-n13.md @@ -0,0 +1,401 @@ +;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler +;; Copyright (C) 2012-2018 Free Software Foundation, Inc. +;; Contributed by Andes Technology Corporation. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + + +;; ------------------------------------------------------------------------ +;; Define N13 pipeline settings. +;; ------------------------------------------------------------------------ + +(define_automaton "nds32_n13_machine") + +;; ------------------------------------------------------------------------ +;; Pipeline Stages +;; ------------------------------------------------------------------------ +;; F1 - Instruction Fetch First +;; Instruction Tag/Data Arrays +;; ITLB Address Translation +;; Branch Target Buffer Prediction +;; F2 - Instruction Fetch Second +;; Instruction Cache Hit Detection +;; Cache Way Selection +;; Inustruction Alignment +;; I1 - Instruction Issue First / Instruction Decode +;; Instruction Cache Replay Triggering +;; 32/16-Bit Instruction Decode +;; Return Address Stack Prediction +;; I2 - Instruction Issue Second / Register File Access +;; Instruction Issue Logic +;; Register File Access +;; E1 - Instruction Execute First / Address Generation / MAC First +;; Data Access Address generation +;; Multiply Operation +;; E2 - Instruction Execute Second / Data Access First / MAC Second / +;; ALU Execute +;; Skewed ALU +;; Branch/Jump/Return Resolution +;; Data Tag/Data arrays +;; DTLB address translation +;; Accumulation Operation +;; E3 - Instruction Execute Third / Data Access Second +;; Data Cache Hit Detection +;; Cache Way Selection +;; Data Alignment +;; E4 - Instruction Execute Fourth / Write Back +;; Interruption Resolution +;; Instruction Retire +;; Register File Write Back + +(define_cpu_unit "n13_i1" "nds32_n13_machine") +(define_cpu_unit "n13_i2" "nds32_n13_machine") +(define_cpu_unit "n13_e1" "nds32_n13_machine") +(define_cpu_unit "n13_e2" "nds32_n13_machine") +(define_cpu_unit "n13_e3" "nds32_n13_machine") +(define_cpu_unit "n13_e4" "nds32_n13_machine") + +(define_insn_reservation "nds_n13_unknown" 1 + (and (eq_attr "type" "unknown") + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4") + +(define_insn_reservation "nds_n13_misc" 1 + (and (eq_attr "type" "misc") + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4") + +(define_insn_reservation "nds_n13_mmu" 1 + (and (eq_attr "type" "mmu") + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4") + +(define_insn_reservation "nds_n13_alu" 1 + (and (eq_attr "type" "alu") + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4") + +(define_insn_reservation "nds_n13_alu_shift" 1 + (and (eq_attr "type" "alu_shift") + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i1+n13_i2, n13_i2+n13_e1, n13_e1+n13_e2, n13_e2+n13_e3, n13_e3+n13_e4, n13_e4") + +(define_insn_reservation "nds_n13_pbsad" 1 + (and (eq_attr "type" "pbsad") + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i2, n13_e1, n13_e2*2, n13_e3, n13_e4") + +(define_insn_reservation "nds_n13_pbsada" 1 + (and (eq_attr "type" "pbsada") + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i2, n13_e1, n13_e2*3, n13_e3, n13_e4") + +(define_insn_reservation "nds_n13_load" 1 + (and (match_test "nds32::load_single_p (insn)") + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4") + +(define_insn_reservation "nds_n13_store" 1 + (and (match_test "nds32::store_single_p (insn)") + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4") + +(define_insn_reservation "nds_n13_load_multiple_1" 1 + (and (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "1")) + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4") + +(define_insn_reservation "nds_n13_load_multiple_2" 1 + (and (ior (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "2")) + (match_test "nds32::load_double_p (insn)")) + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i1+n13_i2, n13_i2+n13_e1, n13_e1+n13_e2, n13_e2+n13_e3, n13_e3+n13_e4, n13_e4") + +(define_insn_reservation "nds_n13_load_multiple_3" 1 + (and (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "3")) + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i2+n13_i2, n13_i1+n13_i2+n13_e1, n13_i2+n13_e1+n13_e2, n13_e1+n13_e2+n13_e3, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") + +(define_insn_reservation "nds_n13_load_multiple_4" 1 + (and (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "4")) + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i2+n13_e1+n13_e2+n13_e3, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") + +(define_insn_reservation "nds_n13_load_multiple_5" 1 + (and (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "5")) + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") + +(define_insn_reservation "nds_n13_load_multiple_6" 1 + (and (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "6")) + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") + +(define_insn_reservation "nds_n13_load_multiple_7" 1 + (and (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "7")) + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*2, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") + +(define_insn_reservation "nds_n13_load_multiple_8" 1 + (and (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "8")) + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*3, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") + +(define_insn_reservation "nds_n13_load_multiple_12" 1 + (and (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "12")) + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*7, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") + +(define_insn_reservation "nds_n13_store_multiple_1" 1 + (and (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "1")) + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4") + +(define_insn_reservation "nds_n13_store_multiple_2" 1 + (and (ior (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "2")) + (match_test "nds32::store_double_p (insn)")) + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i1+n13_i2, n13_i2+n13_e1, n13_e1+n13_e2, n13_e2+n13_e3, n13_e3+n13_e4, n13_e4") + +(define_insn_reservation "nds_n13_store_multiple_3" 1 + (and (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "3")) + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i2+n13_i2, n13_i1+n13_i2+n13_e1, n13_i2+n13_e1+n13_e2, n13_e1+n13_e2+n13_e3, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") + +(define_insn_reservation "nds_n13_store_multiple_4" 1 + (and (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "4")) + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i2+n13_e1+n13_e2+n13_e3, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") + +(define_insn_reservation "nds_n13_store_multiple_5" 1 + (and (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "5")) + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") + +(define_insn_reservation "nds_n13_store_multiple_6" 1 + (and (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "6")) + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") + +(define_insn_reservation "nds_n13_store_multiple_7" 1 + (and (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "7")) + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*2, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") + +(define_insn_reservation "nds_n13_store_multiple_8" 1 + (and (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "8")) + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*3, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") + +(define_insn_reservation "nds_n13_store_multiple_12" 1 + (and (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "12")) + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*7, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") + +;; The multiplier at E1 takes two cycles. +(define_insn_reservation "nds_n13_mul" 1 + (and (eq_attr "type" "mul") + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i2, n13_e1*2, n13_e2, n13_e3, n13_e4") + +(define_insn_reservation "nds_n13_mac" 1 + (and (eq_attr "type" "mac") + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i2, n13_e1*2, n13_e2, n13_e3, n13_e4") + +;; The cycles consumed at E2 are 32 - CLZ(abs(Ra)) + 2, +;; so the worst case is 34. +(define_insn_reservation "nds_n13_div" 1 + (and (eq_attr "type" "div") + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i2, n13_e1, n13_e2*34, n13_e3, n13_e4") + +(define_insn_reservation "nds_n13_branch" 1 + (and (eq_attr "type" "branch") + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4") + +;; ------------------------------------------------------------------------ +;; Comment Notations and Bypass Rules +;; ------------------------------------------------------------------------ +;; Producers (LHS) +;; LD +;; Load data from the memory and produce the loaded data. The result is +;; ready at E3. +;; LMW(N, M) +;; There are N micro-operations within an instruction that loads multiple +;; words. The result produced by the M-th micro-operation is sent to +;; consumers. The result is ready at E3. +;; ADDR_OUT +;; Most load/store instructions can produce an address output if updating +;; the base register is required. The result is ready at E2, which is +;; produced by ALU. +;; ALU, ALU_SHIFT, SIMD +;; Compute data in ALU and produce the data. The result is ready at E2. +;; MUL, MAC +;; Compute data in the multiply-adder and produce the data. The result +;; is ready at E2. +;; DIV +;; Compute data in the divider and produce the data. The result is ready +;; at E2. +;; BR +;; Branch-with-link instructions produces a result containing the return +;; address. The result is ready at E2. +;; +;; Consumers (RHS) +;; ALU +;; General ALU instructions require operands at E2. +;; ALU_E1 +;; Some special ALU instructions, such as BSE, BSP and MOVD44, require +;; operand at E1. +;; MUL, DIV, PBSAD, MMU +;; Operands are required at E1. +;; PBSADA_Rt, PBSADA_RaRb +;; Operands Ra and Rb are required at E1, and the operand Rt is required +;; at E2. +;; ALU_SHIFT_Rb +;; An ALU-SHIFT instruction consists of a shift micro-operation followed +;; by an arithmetic micro-operation. The operand Rb is used by the first +;; micro-operation, and there are some latencies if data dependency occurs. +;; MAC_RaRb +;; A MAC instruction does multiplication at E1 and does accumulation at E2, +;; so the operand Rt is required at E2, and operands Ra and Rb are required +;; at E1. +;; ADDR_IN +;; If an instruction requires an address as its input operand, the address +;; is required at E1. +;; ST +;; A store instruction requires its data at E2. +;; SMW(N, M) +;; There are N micro-operations within an instruction that stores multiple +;; words. Each M-th micro-operation requires its data at E2. +;; BR +;; If a branch instruction is conditional, its input data is required at E2. + +;; LD -> ALU_E1, PBSAD, PBSADA_RaRb, MUL, MAC_RaRb, DIV, MMU, ADDR_IN +(define_bypass 3 + "nds_n13_load" + "nds_n13_alu, nds_n13_pbsad, nds_n13_pbsada,\ + nds_n13_mul, nds_n13_mac, nds_n13_div,\ + nds_n13_mmu,\ + nds_n13_load, nds_n13_store,\ + nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\ + nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\ + nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12,\ + nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\ + nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\ + nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12" + "nds32_n13_load_to_e1_p" +) + +;; LD -> ALU, ALU_SHIFT_Rb, PBSADA_Rt, BR, ST, SMW(N, 1) +(define_bypass 2 + "nds_n13_load" + "nds_n13_alu, nds_n13_alu_shift, nds_n13_pbsada, nds_n13_branch, nds_n13_store,\ + nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\ + nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\ + nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12" + "nds32_n13_load_to_e2_p" +) + +;; LMW(N, N) -> ALU_E1, PBSAD, PBSADA_RaRb, MUL, MAC_RaRb, DIV, MMU, ADDR_IN +(define_bypass 3 + "nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\ + nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\ + nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12" + "nds_n13_alu, nds_n13_pbsad, nds_n13_pbsada,\ + nds_n13_mul, nds_n13_mac, nds_n13_div,\ + nds_n13_mmu,\ + nds_n13_load, nds_n13_store,\ + nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\ + nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\ + nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12,\ + nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\ + nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\ + nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12" + "nds32_n13_last_load_to_e1_p") + +;; LMW(N, N) -> ALU, ALU_SHIFT_Rb, PBSADA_Rt, BR, ST, SMW(N, 1) +(define_bypass 2 + "nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\ + nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\ + nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12" + "nds_n13_alu, nds_n13_alu_shift, nds_n13_pbsada, nds_n13_branch, nds_n13_store,\ + nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\ + nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\ + nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12" + "nds32_n13_last_load_to_e2_p" +) + +;; LMW(N, N - 1) -> ALU_E1, PBSAD, PBSADA_RaRb, MUL, MAC_RaRb, DIV, MMU, ADDR_IN +(define_bypass 2 + "nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\ + nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\ + nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12" + "nds_n13_alu, nds_n13_pbsad, nds_n13_pbsada,\ + nds_n13_mul, nds_n13_mac, nds_n13_div,\ + nds_n13_mmu,\ + nds_n13_load, nds_n13_store,\ + nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\ + nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\ + nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12,\ + nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\ + nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\ + nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12" + "nds32_n13_last_two_load_to_e1_p") + +;; ALU, ALU_SHIFT, SIMD, BR, MUL, MAC, DIV, ADDR_OUT +;; -> ALU_E1, PBSAD, PBSADA_RaRb, MUL, MAC_RaRb, DIV, MMU, ADDR_IN +(define_bypass 2 + "nds_n13_alu, nds_n13_alu_shift, nds_n13_pbsad, nds_n13_pbsada, nds_n13_branch,\ + nds_n13_mul, nds_n13_mac, nds_n13_div,\ + nds_n13_load, nds_n13_store,\ + nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\ + nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\ + nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12,\ + nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\ + nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\ + nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12" + "nds_n13_alu, nds_n13_pbsad, nds_n13_pbsada,\ + nds_n13_mul, nds_n13_mac, nds_n13_div,\ + nds_n13_mmu,\ + nds_n13_load, nds_n13_store,\ + nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\ + nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\ + nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12,\ + nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\ + nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\ + nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12" + "nds32_n13_e2_to_e1_p") diff --git a/gcc/config/nds32/nds32-opts.h b/gcc/config/nds32/nds32-opts.h index 61e95f9..3c853e4 100644 --- a/gcc/config/nds32/nds32-opts.h +++ b/gcc/config/nds32/nds32-opts.h @@ -43,6 +43,8 @@ enum nds32_cpu_type CPU_E8, CPU_N9, CPU_N10, + CPU_N12, + CPU_N13, CPU_SIMPLE }; diff --git a/gcc/config/nds32/nds32-pipelines-auxiliary.c b/gcc/config/nds32/nds32-pipelines-auxiliary.c index 7ab5539..560137c 100644 --- a/gcc/config/nds32/nds32-pipelines-auxiliary.c +++ b/gcc/config/nds32/nds32-pipelines-auxiliary.c @@ -306,6 +306,19 @@ pbsada_insn_ra_rb_dep_reg_p (rtx pbsada_insn, rtx def_reg) return false; } +/* Determine if the latency is occured when the consumer PBSADA_INSN uses the + value of DEF_REG in its Rt field. */ +bool +pbsada_insn_rt_dep_reg_p (rtx pbsada_insn, rtx def_reg) +{ + rtx pbsada_rt = SET_DEST (PATTERN (pbsada_insn)); + + if (rtx_equal_p (def_reg, pbsada_rt)) + return true; + + return false; +} + /* Check if INSN is a movd44 insn consuming DEF_REG. */ bool movd44_even_dep_p (rtx_insn *insn, rtx def_reg) @@ -918,6 +931,153 @@ n10_consumed_by_ex_dep_p (rtx_insn *consumer, rtx def_reg) return false; } +/* Check dependencies from any stages to ALU_E1 (E1). This is a helper + function of n13_consumed_by_e1_dep_p (). */ +bool +n13_alu_e1_insn_dep_reg_p (rtx_insn *alu_e1_insn, rtx def_reg) +{ + rtx unspec_rtx, operand_ra, operand_rb; + rtx src_rtx, dst_rtx; + + switch (INSN_CODE (alu_e1_insn)) + { + /* BSP and BSE are supported by built-in functions, the corresponding + patterns are formed by UNSPEC RTXs. We have to handle them + individually. */ + case CODE_FOR_unspec_bsp: + case CODE_FOR_unspec_bse: + unspec_rtx = SET_SRC (parallel_element (alu_e1_insn, 0)); + gcc_assert (GET_CODE (unspec_rtx) == UNSPEC); + + operand_ra = XVECEXP (unspec_rtx, 0, 0); + operand_rb = XVECEXP (unspec_rtx, 0, 1); + + if (rtx_equal_p (def_reg, operand_ra) + || rtx_equal_p (def_reg, operand_rb)) + return true; + + return false; + + /* Unlink general ALU instructions, MOVD44 requires operands at E1. */ + case CODE_FOR_move_di: + case CODE_FOR_move_df: + src_rtx = SET_SRC (PATTERN (alu_e1_insn)); + dst_rtx = SET_DEST (PATTERN (alu_e1_insn)); + + if (REG_P (dst_rtx) && REG_P (src_rtx) + && rtx_equal_p (src_rtx, def_reg)) + return true; + + return false; + + default: + return false; + } +} + +/* Check the dependency between the producer defining DEF_REG and CONSUMER + requiring input operand at E1. Because the address generation unti is + at E1, the address input should be ready at E1. Note that the branch + target is also a kind of addresses, so we have to check it. */ +bool +n13_consumed_by_e1_dep_p (rtx_insn *consumer, rtx def_reg) +{ + rtx use_rtx; + + switch (get_attr_type (consumer)) + { + /* ALU_E1 */ + case TYPE_ALU: + return n13_alu_e1_insn_dep_reg_p (consumer, def_reg); + + case TYPE_PBSADA: + return pbsada_insn_ra_rb_dep_reg_p (consumer, def_reg); + + case TYPE_PBSAD: + case TYPE_MUL: + use_rtx = SET_SRC (PATTERN (consumer)); + break; + + case TYPE_MAC: + use_rtx = extract_mac_non_acc_rtx (consumer); + break; + + case TYPE_DIV: + if (divmod_p (consumer)) + use_rtx = SET_SRC (parallel_element (consumer, 0)); + else + use_rtx = SET_SRC (PATTERN (consumer)); + break; + + case TYPE_MMU: + if (GET_CODE (PATTERN (consumer)) == SET) + use_rtx = SET_SRC (PATTERN (consumer)); + else + return true; + break; + + case TYPE_BRANCH: + use_rtx = extract_branch_target_rtx (consumer); + break; + + case TYPE_LOAD: + case TYPE_STORE: + use_rtx = extract_mem_rtx (consumer); + break; + + case TYPE_LOAD_MULTIPLE: + case TYPE_STORE_MULTIPLE: + use_rtx = extract_base_reg (consumer); + break; + + default: + return false; + } + + if (reg_overlap_p (def_reg, use_rtx)) + return true; + + return false; +} + +/* Check the dependency between the producer defining DEF_REG and CONSUMER + requiring input operand at E2. */ +bool +n13_consumed_by_e2_dep_p (rtx_insn *consumer, rtx def_reg) +{ + rtx use_rtx; + + switch (get_attr_type (consumer)) + { + case TYPE_ALU: + case TYPE_STORE: + use_rtx = SET_SRC (PATTERN (consumer)); + break; + + case TYPE_ALU_SHIFT: + use_rtx = extract_shift_reg (consumer); + break; + + case TYPE_PBSADA: + return pbsada_insn_rt_dep_reg_p (consumer, def_reg); + + case TYPE_STORE_MULTIPLE: + use_rtx = extract_nth_access_rtx (consumer, 0); + break; + + case TYPE_BRANCH: + use_rtx = extract_branch_condition_rtx (consumer); + break; + + default: + gcc_unreachable(); + } + + if (reg_overlap_p (def_reg, use_rtx)) + return true; + + return false; +} } // anonymous namespace /* ------------------------------------------------------------------------ */ @@ -1371,4 +1531,115 @@ nds32_n10_last_load_to_ex_p (rtx_insn *producer, rtx_insn *consumer) return n10_consumed_by_ex_dep_p (consumer, last_def_reg); } + +/* Guard functions for N12/N13 cores. */ + +/* Check dependencies from E2 to E1. */ +bool +nds32_n13_e2_to_e1_p (rtx_insn *producer, rtx_insn *consumer) +{ + rtx def_reg; + + switch (get_attr_type (producer)) + { + /* Only post-update load/store instructions are considered. These + instructions produces address output at E2. */ + case TYPE_LOAD: + case TYPE_STORE: + case TYPE_LOAD_MULTIPLE: + case TYPE_STORE_MULTIPLE: + if (!post_update_insn_p (producer)) + return false; + + def_reg = extract_base_reg (producer); + break; + + case TYPE_ALU: + case TYPE_ALU_SHIFT: + case TYPE_PBSAD: + case TYPE_PBSADA: + case TYPE_MUL: + case TYPE_MAC: + def_reg = SET_DEST (PATTERN (producer)); + break; + + case TYPE_BRANCH: + return true; + + case TYPE_DIV: + /* Some special instructions, divmodsi4 and udivmodsi4, produce two + results, the quotient and the remainder. We have to handle them + individually. */ + if (divmod_p (producer)) + { + rtx def_reg1 = SET_DEST (parallel_element (producer, 0)); + rtx def_reg2 = SET_DEST (parallel_element (producer, 1)); + + return (n13_consumed_by_e1_dep_p (consumer, def_reg1) + || n13_consumed_by_e1_dep_p (consumer, def_reg2)); + } + + def_reg = SET_DEST (PATTERN (producer)); + break; + + default: + gcc_unreachable (); + } + + return n13_consumed_by_e1_dep_p (consumer, def_reg); +} + +/* Check dependencies from Load-Store Unit (E3) to E1. */ +bool +nds32_n13_load_to_e1_p (rtx_insn *producer, rtx_insn *consumer) +{ + rtx def_reg = SET_DEST (PATTERN (producer)); + + gcc_assert (get_attr_type (producer) == TYPE_LOAD); + gcc_assert (REG_P (def_reg) || GET_CODE (def_reg) == SUBREG); + + return n13_consumed_by_e1_dep_p (consumer, def_reg); +} + +/* Check dependencies from Load-Store Unit (E3) to E2. */ +bool +nds32_n13_load_to_e2_p (rtx_insn *producer, rtx_insn *consumer) +{ + rtx def_reg = SET_DEST (PATTERN (producer)); + + gcc_assert (get_attr_type (producer) == TYPE_LOAD); + gcc_assert (REG_P (def_reg) || GET_CODE (def_reg) == SUBREG); + + return n13_consumed_by_e2_dep_p (consumer, def_reg); +} + +/* Check dependencies from LMW(N, N) to E1. */ +bool +nds32_n13_last_load_to_e1_p (rtx_insn *producer, rtx_insn *consumer) +{ + rtx last_def_reg = extract_nth_access_reg (producer, -1); + + return n13_consumed_by_e1_dep_p (consumer, last_def_reg); +} + +/* Check dependencies from LMW(N, N) to E2. */ +bool +nds32_n13_last_load_to_e2_p (rtx_insn *producer, rtx_insn *consumer) +{ + rtx last_def_reg = extract_nth_access_reg (producer, -1); + + return n13_consumed_by_e2_dep_p (consumer, last_def_reg); +} + +/* Check dependencies from LMW(N, N-1) to E2. */ +bool +nds32_n13_last_two_load_to_e1_p (rtx_insn *producer, rtx_insn *consumer) +{ + rtx last_two_def_reg = extract_nth_access_reg (producer, -2); + + if (last_two_def_reg == NULL_RTX) + return false; + + return n13_consumed_by_e1_dep_p (consumer, last_two_def_reg); +} /* ------------------------------------------------------------------------ */ diff --git a/gcc/config/nds32/nds32-protos.h b/gcc/config/nds32/nds32-protos.h index 8e66522..9debad3 100644 --- a/gcc/config/nds32/nds32-protos.h +++ b/gcc/config/nds32/nds32-protos.h @@ -125,6 +125,13 @@ extern bool nds32_n10_ex_to_ex_p (rtx_insn *, rtx_insn *); extern bool nds32_n10_mm_to_ex_p (rtx_insn *, rtx_insn *); extern bool nds32_n10_last_load_to_ex_p (rtx_insn *, rtx_insn *); +extern bool nds32_n13_e2_to_e1_p (rtx_insn *, rtx_insn *); +extern bool nds32_n13_load_to_e1_p (rtx_insn *, rtx_insn *); +extern bool nds32_n13_load_to_e2_p (rtx_insn *, rtx_insn *); +extern bool nds32_n13_last_load_to_e1_p (rtx_insn *, rtx_insn *); +extern bool nds32_n13_last_load_to_e2_p (rtx_insn *, rtx_insn *); +extern bool nds32_n13_last_two_load_to_e1_p (rtx_insn *, rtx_insn *); + /* Auxiliary functions for stack operation predicate checking. */ extern bool nds32_valid_stack_push_pop_p (rtx, bool); diff --git a/gcc/config/nds32/nds32.md b/gcc/config/nds32/nds32.md index d6c3515..0c1f301 100644 --- a/gcc/config/nds32/nds32.md +++ b/gcc/config/nds32/nds32.md @@ -56,13 +56,15 @@ ;; ------------------------------------------------------------------------ ;; CPU pipeline model. -(define_attr "pipeline_model" "n7,n8,e8,n9,n10,simple" +(define_attr "pipeline_model" "n7,n8,e8,n9,n10,n13,simple" (const (cond [(match_test "nds32_cpu_option == CPU_N7") (const_string "n7") (match_test "nds32_cpu_option == CPU_E8") (const_string "e8") (match_test "nds32_cpu_option == CPU_N6 || nds32_cpu_option == CPU_N8") (const_string "n8") (match_test "nds32_cpu_option == CPU_N9") (const_string "n9") (match_test "nds32_cpu_option == CPU_N10") (const_string "n10") + (match_test "nds32_cpu_option == CPU_N12") (const_string "n13") + (match_test "nds32_cpu_option == CPU_N13") (const_string "n13") (match_test "nds32_cpu_option == CPU_SIMPLE") (const_string "simple")] (const_string "n9")))) diff --git a/gcc/config/nds32/nds32.opt b/gcc/config/nds32/nds32.opt index 4111278..f69bddc 100644 --- a/gcc/config/nds32/nds32.opt +++ b/gcc/config/nds32/nds32.opt @@ -288,6 +288,33 @@ EnumValue Enum(nds32_cpu_type) String(d1088-spu) Value(CPU_N10) EnumValue +Enum(nds32_cpu_type) String(n12) Value(CPU_N12) + +EnumValue +Enum(nds32_cpu_type) String(n1213) Value(CPU_N12) + +EnumValue +Enum(nds32_cpu_type) String(n1233) Value(CPU_N12) + +EnumValue +Enum(nds32_cpu_type) String(n1233-fpu) Value(CPU_N12) + +EnumValue +Enum(nds32_cpu_type) String(n1233-spu) Value(CPU_N12) + +EnumValue +Enum(nds32_cpu_type) String(n13) Value(CPU_N13) + +EnumValue +Enum(nds32_cpu_type) String(n1337) Value(CPU_N13) + +EnumValue +Enum(nds32_cpu_type) String(n1337-fpu) Value(CPU_N13) + +EnumValue +Enum(nds32_cpu_type) String(n1337-spu) Value(CPU_N13) + +EnumValue Enum(nds32_cpu_type) String(simple) Value(CPU_SIMPLE) mconfig-fpu= diff --git a/gcc/config/nds32/pipelines.md b/gcc/config/nds32/pipelines.md index a382c75..29514ae 100644 --- a/gcc/config/nds32/pipelines.md +++ b/gcc/config/nds32/pipelines.md @@ -50,6 +50,12 @@ ;; ------------------------------------------------------------------------ +;; Include N12/N13 pipeline settings. +;; ------------------------------------------------------------------------ +(include "nds32-n13.md") + + +;; ------------------------------------------------------------------------ ;; Define simple pipeline settings. ;; ------------------------------------------------------------------------ -- 2.7.4