From 2f2ebf951ba2f5f85c53213e0756756ce45bd58f Mon Sep 17 00:00:00 2001 From: Kito Cheng Date: Sun, 13 May 2018 06:52:02 +0000 Subject: [PATCH] [NDS32] Implment n10 pipeline. gcc/ * config.gcc (nds32*-*-*): Check that n10/d10 are valid to --with-cpu. * config/nds32/nds32-n10.md: New file. * config/nds32/nds32-opts.h (nds32_cpu_type): Add CPU_N10. * config/nds32/nds32-pipelines-auxiliary.c: Implementation for n10 pipeline. * config/nds32/nds32-protos.h: More declarations for n10 pipeline. * config/nds32/nds32-utils.c: More implementations for n10 pipeline. * config/nds32/nds32.md (pipeline_model): Add n10. * config/nds32/nds32.opt (mcpu): Support n10 pipeline cpus. * config/nds32/pipelines.md: Include n10 settings. Co-Authored-By: Chung-Ju Wu From-SVN: r260207 --- gcc/ChangeLog | 14 + gcc/config.gcc | 4 +- gcc/config/nds32/nds32-n10.md | 439 +++++++++++++++++++++++++++ gcc/config/nds32/nds32-opts.h | 1 + gcc/config/nds32/nds32-pipelines-auxiliary.c | 267 +++++++++++++++- gcc/config/nds32/nds32-protos.h | 5 + gcc/config/nds32/nds32-utils.c | 13 + gcc/config/nds32/nds32.md | 3 +- gcc/config/nds32/nds32.opt | 45 +++ gcc/config/nds32/pipelines.md | 6 + 10 files changed, 780 insertions(+), 17 deletions(-) create mode 100644 gcc/config/nds32/nds32-n10.md diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 7b788ca..f39ff63 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,17 @@ +2018-05-13 Kito Cheng + Chung-Ju Wu + + * config.gcc (nds32*-*-*): Check that n10/d10 are valid to --with-cpu. + * config/nds32/nds32-n10.md: New file. + * config/nds32/nds32-opts.h (nds32_cpu_type): Add CPU_N10. + * config/nds32/nds32-pipelines-auxiliary.c: Implementation for n10 + pipeline. + * config/nds32/nds32-protos.h: More declarations for n10 pipeline. + * config/nds32/nds32-utils.c: More implementations for n10 pipeline. + * config/nds32/nds32.md (pipeline_model): Add n10. + * config/nds32/nds32.opt (mcpu): Support n10 pipeline cpus. + * config/nds32/pipelines.md: Include n10 settings. + 2018-05-13 Monk Chiang Kito Cheng Chung-Ju Wu diff --git a/gcc/config.gcc b/gcc/config.gcc index fec118c..b729f3b 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -4367,11 +4367,11 @@ case "${target}" in "") with_cpu=n9 ;; - n6 | n7 | n8 | e8 | s8 | n9) + n6 | n7 |n8 | e8 | s8 | n9 | n10 | d10) # OK ;; *) - echo "Cannot accept --with-cpu=$with_cpu, available values are: n6 n7 n8 e8 s8 n9" 1>&2 + echo "Cannot accept --with-cpu=$with_cpu, available values are: n6 n7 n8 e8 s8 n9 n10 d10" 1>&2 exit 1 ;; esac diff --git a/gcc/config/nds32/nds32-n10.md b/gcc/config/nds32/nds32-n10.md new file mode 100644 index 0000000..0dd76da --- /dev/null +++ b/gcc/config/nds32/nds32-n10.md @@ -0,0 +1,439 @@ +;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler +;; Copyright (C) 2012-2018 Free Software Foundation, Inc. +;; Contributed by Andes Technology Corporation. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + + +;; ------------------------------------------------------------------------ +;; Define N10 pipeline settings. +;; ------------------------------------------------------------------------ + +(define_automaton "nds32_n10_machine") + +;; ------------------------------------------------------------------------ +;; Pipeline Stages +;; ------------------------------------------------------------------------ +;; IF - Instruction Fetch +;; II - Instruction Issue / Instruction Decode +;; EX - Instruction Execution +;; MM - Memory Execution +;; WB - Instruction Retire / Result Write-Back + +(define_cpu_unit "n10_ii" "nds32_n10_machine") +(define_cpu_unit "n10_ex" "nds32_n10_machine") +(define_cpu_unit "n10_mm" "nds32_n10_machine") +(define_cpu_unit "n10_wb" "nds32_n10_machine") +(define_cpu_unit "n10f_iq" "nds32_n10_machine") +(define_cpu_unit "n10f_rf" "nds32_n10_machine") +(define_cpu_unit "n10f_e1" "nds32_n10_machine") +(define_cpu_unit "n10f_e2" "nds32_n10_machine") +(define_cpu_unit "n10f_e3" "nds32_n10_machine") +(define_cpu_unit "n10f_e4" "nds32_n10_machine") + +(define_insn_reservation "nds_n10_unknown" 1 + (and (eq_attr "type" "unknown") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_misc" 1 + (and (eq_attr "type" "misc") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_mmu" 1 + (and (eq_attr "type" "mmu") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_alu" 1 + (and (eq_attr "type" "alu") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_alu_shift" 1 + (and (eq_attr "type" "alu_shift") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ii+n10_ex, n10_ex+n10_mm, n10_mm+n10_wb, n10_wb") + +(define_insn_reservation "nds_n10_pbsad" 1 + (and (eq_attr "type" "pbsad") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex*3, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_pbsada" 1 + (and (eq_attr "type" "pbsada") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex*3, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_load" 1 + (and (match_test "nds32::load_single_p (insn)") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_store" 1 + (and (match_test "nds32::store_single_p (insn)") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_load_multiple_1" 1 + (and (eq_attr "pipeline_model" "n10") + (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "1"))) + "n10_ii, n10_ex, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_load_multiple_2" 1 + (and (eq_attr "pipeline_model" "n10") + (ior (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "2")) + (match_test "nds32::load_double_p (insn)"))) + "n10_ii, n10_ii+n10_ex, n10_ex+n10_mm, n10_mm+n10_wb, n10_wb") + +(define_insn_reservation "nds_n10_load_multiple_3" 1 + (and (eq_attr "pipeline_model" "n10") + (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "3"))) + "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") + +(define_insn_reservation "nds_n10_load_multiple_4" 1 + (and (eq_attr "pipeline_model" "n10") + (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "4"))) + "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, n10_ii+n10_ex+n10_mm+n10_wb, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") + +(define_insn_reservation "nds_n10_load_multiple_5" 1 + (and (eq_attr "pipeline_model" "n10") + (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "5"))) + "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*2, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") + +(define_insn_reservation "nds_n10_load_multiple_6" 1 + (and (eq_attr "pipeline_model" "n10") + (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "6"))) + "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*3, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") + +(define_insn_reservation "nds_n10_load_multiple_7" 1 + (and (eq_attr "pipeline_model" "n10") + (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "7"))) + "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*4, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") + +(define_insn_reservation "nds_n10_load_multiple_N" 1 + (and (eq_attr "pipeline_model" "n10") + (and (eq_attr "type" "load_multiple") + (match_test "get_attr_combo (insn) >= 8"))) + "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*5, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") + +(define_insn_reservation "nds_n10_store_multiple_1" 1 + (and (eq_attr "pipeline_model" "n10") + (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "1"))) + "n10_ii, n10_ex, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_store_multiple_2" 1 + (and (eq_attr "pipeline_model" "n10") + (ior (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "2")) + (match_test "nds32::store_double_p (insn)"))) + "n10_ii, n10_ii+n10_ex, n10_ex+n10_mm, n10_mm+n10_wb, n10_wb") + +(define_insn_reservation "nds_n10_store_multiple_3" 1 + (and (eq_attr "pipeline_model" "n10") + (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "3"))) + "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") + +(define_insn_reservation "nds_n10_store_multiple_4" 1 + (and (eq_attr "pipeline_model" "n10") + (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "4"))) + "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, n10_ii+n10_ex+n10_mm+n10_wb, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") + +(define_insn_reservation "nds_n10_store_multiple_5" 1 + (and (eq_attr "pipeline_model" "n10") + (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "5"))) + "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*2, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") + +(define_insn_reservation "nds_n10_store_multiple_6" 1 + (and (eq_attr "pipeline_model" "n10") + (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "6"))) + "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*3, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") + +(define_insn_reservation "nds_n10_store_multiple_7" 1 + (and (eq_attr "pipeline_model" "n10") + (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "7"))) + "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*4, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") + +(define_insn_reservation "nds_n10_store_multiple_N" 1 + (and (eq_attr "pipeline_model" "n10") + (and (eq_attr "type" "store_multiple") + (match_test "get_attr_combo (insn) >= 8"))) + "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*5, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") + +(define_insn_reservation "nds_n10_mul" 1 + (and (eq_attr "type" "mul") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_mac" 1 + (and (eq_attr "type" "mac") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_div" 1 + (and (eq_attr "type" "div") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex*34, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_branch" 1 + (and (eq_attr "type" "branch") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_dsp_alu" 1 + (and (eq_attr "type" "dalu") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_dsp_alu64" 1 + (and (eq_attr "type" "dalu64") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_dsp_alu_round" 1 + (and (eq_attr "type" "daluround") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_dsp_cmp" 1 + (and (eq_attr "type" "dcmp") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_dsp_clip" 1 + (and (eq_attr "type" "dclip") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_dsp_mul" 1 + (and (eq_attr "type" "dmul") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_dsp_mac" 1 + (and (eq_attr "type" "dmac") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_dsp_insb" 1 + (and (eq_attr "type" "dinsb") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_dsp_pack" 1 + (and (eq_attr "type" "dpack") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_dsp_bpick" 1 + (and (eq_attr "type" "dbpick") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_dsp_wext" 1 + (and (eq_attr "type" "dwext") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_fpu_alu" 4 + (and (eq_attr "type" "falu") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4") + +(define_insn_reservation "nds_n10_fpu_muls" 4 + (and (eq_attr "type" "fmuls") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4") + +(define_insn_reservation "nds_n10_fpu_muld" 4 + (and (eq_attr "type" "fmuld") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2*2, n10f_e3, n10f_e4") + +(define_insn_reservation "nds_n10_fpu_macs" 4 + (and (eq_attr "type" "fmacs") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2*3, n10f_e3, n10f_e4") + +(define_insn_reservation "nds_n10_fpu_macd" 4 + (and (eq_attr "type" "fmacd") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2*4, n10f_e3, n10f_e4") + +(define_insn_reservation "nds_n10_fpu_divs" 4 + (and (ior (eq_attr "type" "fdivs") + (eq_attr "type" "fsqrts")) + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2*14, n10f_e3, n10f_e4") + +(define_insn_reservation "nds_n10_fpu_divd" 4 + (and (ior (eq_attr "type" "fdivd") + (eq_attr "type" "fsqrtd")) + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2*28, n10f_e3, n10f_e4") + +(define_insn_reservation "nds_n10_fpu_fast_alu" 2 + (and (ior (eq_attr "type" "fcmp") + (ior (eq_attr "type" "fabs") + (ior (eq_attr "type" "fcpy") + (eq_attr "type" "fcmov")))) + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4") + +(define_insn_reservation "nds_n10_fpu_fmtsr" 4 + (and (eq_attr "type" "fmtsr") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4") + +(define_insn_reservation "nds_n10_fpu_fmtdr" 4 + (and (eq_attr "type" "fmtdr") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ii+n10f_iq, n10f_iq+n10f_rf, n10f_rf+n10f_e1, n10f_e1+n10f_e2, n10f_e2+n10f_e3, n10f_e3+n10f_e4, n10f_e4") + +(define_insn_reservation "nds_n10_fpu_fmfsr" 2 + (and (eq_attr "type" "fmfsr") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4") + +(define_insn_reservation "nds_n10_fpu_fmfdr" 2 + (and (eq_attr "type" "fmfdr") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ii+n10f_iq, n10f_iq+n10f_rf, n10f_rf+n10f_e1, n10f_e1+n10f_e2, n10f_e2+n10f_e3, n10f_e3+n10f_e4, n10f_e4") + +(define_insn_reservation "nds_n10_fpu_load" 3 + (and (eq_attr "type" "fload") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4") + +(define_insn_reservation "nds_n10_fpu_store" 1 + (and (eq_attr "type" "fstore") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4") + +;; ------------------------------------------------------------------------ +;; Comment Notations and Bypass Rules +;; ------------------------------------------------------------------------ +;; Producers (LHS) +;; LD +;; Load data from the memory and produce the loaded data. The result is +;; ready at MM. +;; LMW(N, M) +;; There are N micro-operations within an instruction that loads multiple +;; words. The result produced by the M-th micro-operation is sent to +;; consumers. The result is ready at MM. +;; MUL, MAC +;; Compute data in the multiply-adder and produce the data. The result +;; is ready at MM. +;; DIV +;; Compute data in the divider and produce the data. The result is ready +;; at MM. +;; +;; Consumers (RHS) +;; ALU, MOVD44, PBSAD, PBSADA_RaRb, MUL, MAC, DIV, MMU +;; Require operands at EX. +;; ALU_SHIFT_Rb +;; An ALU-SHIFT instruction consists of a shift micro-operation followed +;; by an arithmetic micro-operation. The operand Rb is used by the first +;; micro-operation, and there are some latencies if data dependency occurs. +;; MAC_RaRb +;; A MAC instruction does multiplication at EX and does accumulation at MM, +;; so the operand Rt is required at MM, and operands Ra and Rb are required +;; at EX. +;; ADDR_IN +;; If an instruction requires an address as its input operand, the address +;; is required at EX. +;; ST +;; A store instruction requires its data at MM. +;; SMW(N, M) +;; There are N micro-operations within an instruction that stores multiple +;; words. Each M-th micro-operation requires its data at MM. +;; BR +;; If a branch instruction is conditional, its input data is required at EX. + +;; FPU_ADDR_OUT -> FPU_ADDR_IN +;; Main pipeline rules don't need this because those default latency is 1. +(define_bypass 1 + "nds_n10_fpu_load, nds_n10_fpu_store" + "nds_n10_fpu_load, nds_n10_fpu_store" + "nds32_n10_ex_to_ex_p" +) + +;; LD, MUL, MAC, DIV, DALU64, DMUL, DMAC, DALUROUND, DBPICK, DWEXT +;; -> ALU, ALU_SHIFT_Rb, PBSAD, PBSADA_RaRb, MOVD44, MUL, MAC_RaRb, DIV, ADDR_IN, BR, MMU, +;; DALU, DALUROUND, DMUL, DMAC_RaRb, DPACK, DINSB, DCMP, DCLIP, WEXT_O, BPICK_RaRb +(define_bypass 2 + "nds_n10_load, nds_n10_mul, nds_n10_mac, nds_n10_div,\ + nds_n10_dsp_alu64, nds_n10_dsp_mul, nds_n10_dsp_mac,\ + nds_n10_dsp_alu_round, nds_n10_dsp_bpick, nds_n10_dsp_wext" + "nds_n10_alu, nds_n10_alu_shift,\ + nds_n10_pbsad, nds_n10_pbsada,\ + nds_n10_mul, nds_n10_mac, nds_n10_div,\ + nds_n10_branch,\ + nds_n10_load, nds_n10_store,\ + nds_n10_load_multiple_1, nds_n10_load_multiple_2, nds_n10_load_multiple_3,\ + nds_n10_load_multiple_4, nds_n10_load_multiple_5, nds_n10_load_multiple_6,\ + nds_n10_load_multiple_7, nds_n10_load_multiple_N,\ + nds_n10_store_multiple_1, nds_n10_store_multiple_2, nds_n10_store_multiple_3,\ + nds_n10_store_multiple_4, nds_n10_store_multiple_5, nds_n10_store_multiple_6,\ + nds_n10_store_multiple_7, nds_n10_store_multiple_N,\ + nds_n10_mmu,\ + nds_n10_dsp_alu, nds_n10_dsp_alu_round,\ + nds_n10_dsp_mul, nds_n10_dsp_mac, nds_n10_dsp_pack,\ + nds_n10_dsp_insb, nds_n10_dsp_cmp, nds_n10_dsp_clip,\ + nds_n10_dsp_wext, nds_n10_dsp_bpick" + "nds32_n10_mm_to_ex_p" +) + +;; LMW(N, N) +;; -> ALU, ALU_SHIFT_Rb, PBSAD, PBSADA_RaRb, MOVD44, MUL, MAC_RaRb, DIV, ADDR_IN, BR, MMU +;; DALU, DALUROUND, DMUL, DMAC_RaRb, DPACK, DINSB, DCMP, DCLIP, WEXT_O, BPICK_RaRb +(define_bypass 2 + "nds_n10_load_multiple_1, nds_n10_load_multiple_2, nds_n10_load_multiple_3,\ + nds_n10_load_multiple_4, nds_n10_load_multiple_5, nds_n10_load_multiple_6,\ + nds_n10_load_multiple_7, nds_n10_load_multiple_N" + "nds_n10_alu, nds_n10_alu_shift,\ + nds_n10_pbsad, nds_n10_pbsada,\ + nds_n10_mul, nds_n10_mac, nds_n10_div,\ + nds_n10_branch,\ + nds_n10_load, nds_n10_store,\ + nds_n10_load_multiple_1, nds_n10_load_multiple_2, nds_n10_load_multiple_3,\ + nds_n10_load_multiple_4, nds_n10_load_multiple_5, nds_n10_load_multiple_6,\ + nds_n10_load_multiple_7, nds_n10_load_multiple_N,\ + nds_n10_store_multiple_1, nds_n10_store_multiple_2, nds_n10_store_multiple_3,\ + nds_n10_store_multiple_4, nds_n10_store_multiple_5, nds_n10_store_multiple_6,\ + nds_n10_store_multiple_7, nds_n10_store_multiple_N,\ + nds_n10_mmu,\ + nds_n10_dsp_alu, nds_n10_dsp_alu_round,\ + nds_n10_dsp_mul, nds_n10_dsp_mac, nds_n10_dsp_pack,\ + nds_n10_dsp_insb, nds_n10_dsp_cmp, nds_n10_dsp_clip,\ + nds_n10_dsp_wext, nds_n10_dsp_bpick" + "nds32_n10_last_load_to_ex_p" +) diff --git a/gcc/config/nds32/nds32-opts.h b/gcc/config/nds32/nds32-opts.h index 5d7e165..61e95f9 100644 --- a/gcc/config/nds32/nds32-opts.h +++ b/gcc/config/nds32/nds32-opts.h @@ -42,6 +42,7 @@ enum nds32_cpu_type CPU_N8, CPU_E8, CPU_N9, + CPU_N10, CPU_SIMPLE }; diff --git a/gcc/config/nds32/nds32-pipelines-auxiliary.c b/gcc/config/nds32/nds32-pipelines-auxiliary.c index a983238..7ab5539 100644 --- a/gcc/config/nds32/nds32-pipelines-auxiliary.c +++ b/gcc/config/nds32/nds32-pipelines-auxiliary.c @@ -335,6 +335,103 @@ movd44_even_dep_p (rtx_insn *insn, rtx def_reg) return false; } +/* Check if INSN is a wext insn consuming DEF_REG. */ +bool +wext_odd_dep_p (rtx insn, rtx def_reg) +{ + rtx shift_rtx = XEXP (SET_SRC (PATTERN (insn)), 0); + rtx use_reg = XEXP (shift_rtx, 0); + rtx pos_rtx = XEXP (shift_rtx, 1); + + if (REG_P (pos_rtx) && reg_overlap_p (def_reg, pos_rtx)) + return true; + + if (GET_MODE (def_reg) == DImode) + return reg_overlap_p (def_reg, use_reg); + + gcc_assert (REG_P (def_reg) || GET_CODE (def_reg) == SUBREG); + gcc_assert (REG_P (use_reg)); + + if (REG_P (def_reg)) + { + if (!TARGET_BIG_ENDIAN) + return REGNO (def_reg) == REGNO (use_reg) + 1; + else + return REGNO (def_reg) == REGNO (use_reg); + } + + if (GET_CODE (def_reg) == SUBREG) + { + if (!reg_overlap_p (def_reg, use_reg)) + return false; + + if (!TARGET_BIG_ENDIAN) + return SUBREG_BYTE (def_reg) == 4; + else + return SUBREG_BYTE (def_reg) == 0; + } + + return false; +} + +/* Check if INSN is a bpick insn consuming DEF_REG. */ +bool +bpick_ra_rb_dep_p (rtx insn, rtx def_reg) +{ + rtx ior_rtx = SET_SRC (PATTERN (insn)); + rtx and1_rtx = XEXP (ior_rtx, 0); + rtx and2_rtx = XEXP (ior_rtx, 1); + rtx reg1_0 = XEXP (and1_rtx, 0); + rtx reg1_1 = XEXP (and1_rtx, 1); + rtx reg2_0 = XEXP (and2_rtx, 0); + rtx reg2_1 = XEXP (and2_rtx, 1); + + if (GET_CODE (reg1_0) == NOT) + { + if (rtx_equal_p (reg1_0, reg2_0)) + return reg_overlap_p (def_reg, reg1_1) + || reg_overlap_p (def_reg, reg2_1); + + if (rtx_equal_p (reg1_0, reg2_1)) + return reg_overlap_p (def_reg, reg1_1) + || reg_overlap_p (def_reg, reg2_0); + } + + if (GET_CODE (reg1_1) == NOT) + { + if (rtx_equal_p (reg1_1, reg2_0)) + return reg_overlap_p (def_reg, reg1_0) + || reg_overlap_p (def_reg, reg2_1); + + if (rtx_equal_p (reg1_1, reg2_1)) + return reg_overlap_p (def_reg, reg1_0) + || reg_overlap_p (def_reg, reg2_0); + } + + if (GET_CODE (reg2_0) == NOT) + { + if (rtx_equal_p (reg2_0, reg1_0)) + return reg_overlap_p (def_reg, reg2_1) + || reg_overlap_p (def_reg, reg1_1); + + if (rtx_equal_p (reg2_0, reg1_1)) + return reg_overlap_p (def_reg, reg2_1) + || reg_overlap_p (def_reg, reg1_0); + } + + if (GET_CODE (reg2_1) == NOT) + { + if (rtx_equal_p (reg2_1, reg1_0)) + return reg_overlap_p (def_reg, reg2_0) + || reg_overlap_p (def_reg, reg1_1); + + if (rtx_equal_p (reg2_1, reg1_1)) + return reg_overlap_p (def_reg, reg2_0) + || reg_overlap_p (def_reg, reg1_0); + } + + gcc_unreachable (); +} } // namespace scheduling } // namespace nds32 @@ -375,8 +472,7 @@ n7_consumed_by_ii_dep_p (rtx_insn *consumer, rtx def_reg) operations in order to write two registers. We have to check the dependency from the producer to the first micro-operation. */ case TYPE_DIV: - if (INSN_CODE (consumer) == CODE_FOR_divmodsi4 - || INSN_CODE (consumer) == CODE_FOR_udivmodsi4) + if (divmod_p (consumer)) use_rtx = SET_SRC (parallel_element (consumer, 0)); else use_rtx = SET_SRC (PATTERN (consumer)); @@ -506,8 +602,7 @@ n8_consumed_by_ex_p (rtx_insn *consumer, rtx def_reg) operations in order to write two registers. We have to check the dependency from the producer to the first micro-operation. */ case TYPE_DIV: - if (INSN_CODE (consumer) == CODE_FOR_divmodsi4 - || INSN_CODE (consumer) == CODE_FOR_udivmodsi4) + if (divmod_p (consumer)) use_rtx = SET_SRC (parallel_element (consumer, 0)); else use_rtx = SET_SRC (PATTERN (consumer)); @@ -606,8 +701,7 @@ n9_2r1w_consumed_by_ex_dep_p (rtx_insn *consumer, rtx def_reg) break; case TYPE_DIV: - if (INSN_CODE (consumer) == CODE_FOR_divmodsi4 - || INSN_CODE (consumer) == CODE_FOR_udivmodsi4) + if (divmod_p (consumer)) use_rtx = SET_SRC (parallel_element (consumer, 0)); else use_rtx = SET_SRC (PATTERN (consumer)); @@ -706,8 +800,7 @@ n9_3r2w_consumed_by_ex_dep_p (rtx_insn *consumer, rtx def_reg) We have to check the dependency from the producer to the first micro-operation. */ case TYPE_DIV: - if (INSN_CODE (consumer) == CODE_FOR_divmodsi4 - || INSN_CODE (consumer) == CODE_FOR_udivmodsi4) + if (divmod_p (consumer)) use_rtx = SET_SRC (parallel_element (consumer, 0)); else use_rtx = SET_SRC (PATTERN (consumer)); @@ -744,6 +837,86 @@ n9_3r2w_consumed_by_ex_dep_p (rtx_insn *consumer, rtx def_reg) return false; } +/* Check the dependency between the producer defining DEF_REG and CONSUMER + requiring input operand at EX. */ +bool +n10_consumed_by_ex_dep_p (rtx_insn *consumer, rtx def_reg) +{ + rtx use_rtx; + + switch (get_attr_type (consumer)) + { + case TYPE_ALU: + case TYPE_PBSAD: + case TYPE_MUL: + case TYPE_DALU: + case TYPE_DALU64: + case TYPE_DMUL: + case TYPE_DPACK: + case TYPE_DINSB: + case TYPE_DCMP: + case TYPE_DCLIP: + case TYPE_DALUROUND: + use_rtx = SET_SRC (PATTERN (consumer)); + break; + + case TYPE_ALU_SHIFT: + use_rtx = extract_shift_reg (consumer); + break; + + case TYPE_PBSADA: + return pbsada_insn_ra_rb_dep_reg_p (consumer, def_reg); + + case TYPE_MAC: + case TYPE_DMAC: + use_rtx = extract_mac_non_acc_rtx (consumer); + break; + + /* Some special instructions, divmodsi4 and udivmodsi4, produce two + results, the quotient and the remainder. */ + case TYPE_DIV: + if (divmod_p (consumer)) + use_rtx = SET_SRC (parallel_element (consumer, 0)); + else + use_rtx = SET_SRC (PATTERN (consumer)); + break; + + case TYPE_DWEXT: + return wext_odd_dep_p (consumer, def_reg); + + case TYPE_DBPICK: + return bpick_ra_rb_dep_p (consumer, def_reg); + + case TYPE_MMU: + if (GET_CODE (PATTERN (consumer)) == SET) + use_rtx = SET_SRC (PATTERN (consumer)); + else + return true; + break; + + case TYPE_LOAD: + case TYPE_STORE: + use_rtx = extract_mem_rtx (consumer); + break; + + case TYPE_LOAD_MULTIPLE: + case TYPE_STORE_MULTIPLE: + use_rtx = extract_base_reg (consumer); + break; + + case TYPE_BRANCH: + use_rtx = PATTERN (consumer); + break; + + default: + gcc_unreachable (); + } + + if (reg_overlap_p (def_reg, use_rtx)) + return true; + + return false; +} } // anonymous namespace @@ -837,8 +1010,7 @@ nds32_n8_ex_to_ii_p (rtx_insn *producer, rtx_insn *consumer) break; case TYPE_DIV: - if (INSN_CODE (producer) == CODE_FOR_divmodsi4 - || INSN_CODE (producer) == CODE_FOR_udivmodsi4) + if (divmod_p (producer)) def_reg = SET_DEST (parallel_element (producer, 1)); else def_reg = SET_DEST (PATTERN (producer)); @@ -969,8 +1141,7 @@ nds32_e8_ex_to_ii_p (rtx_insn *producer, rtx_insn *consumer) break; case TYPE_DIV: - if (INSN_CODE (producer) == CODE_FOR_divmodsi4 - || INSN_CODE (producer) == CODE_FOR_udivmodsi4) + if (divmod_p (producer)) { rtx def_reg1 = SET_DEST (parallel_element (producer, 0)); rtx def_reg2 = SET_DEST (parallel_element (producer, 1)); @@ -1073,8 +1244,7 @@ nds32_n9_3r2w_mm_to_ex_p (rtx_insn *producer, rtx_insn *consumer) results, the quotient and the remainder. We have to handle them individually. */ case TYPE_DIV: - if (INSN_CODE (producer) == CODE_FOR_divmodsi4 - || INSN_CODE (producer) == CODE_FOR_udivmodsi4) + if (divmod_p (producer)) { rtx def_reg1 = SET_DEST (parallel_element (producer, 0)); rtx def_reg2 = SET_DEST (parallel_element (producer, 1)); @@ -1132,4 +1302,73 @@ nds32_n9_last_load_to_ex_p (rtx_insn *producer, rtx_insn *consumer) return n9_3r2w_consumed_by_ex_dep_p (consumer, last_def_reg); } +/* Guard functions for N10 cores. */ + +/* Check dependencies from EX to EX (ADDR_OUT -> ADDR_IN). */ +bool +nds32_n10_ex_to_ex_p (rtx_insn *producer, rtx_insn *consumer) +{ + gcc_assert (get_attr_type (producer) == TYPE_FLOAD + || get_attr_type (producer) == TYPE_FSTORE); + gcc_assert (get_attr_type (consumer) == TYPE_FLOAD + || get_attr_type (consumer) == TYPE_FSTORE); + + if (!post_update_insn_p (producer)) + return false; + + return reg_overlap_p (extract_base_reg (producer), + extract_mem_rtx (consumer)); +} + +/* Check dependencies from MM to EX. */ +bool +nds32_n10_mm_to_ex_p (rtx_insn *producer, rtx_insn *consumer) +{ + rtx def_reg; + + switch (get_attr_type (producer)) + { + case TYPE_LOAD: + case TYPE_MUL: + case TYPE_MAC: + case TYPE_DALU64: + case TYPE_DMUL: + case TYPE_DMAC: + case TYPE_DALUROUND: + case TYPE_DBPICK: + case TYPE_DWEXT: + def_reg = SET_DEST (PATTERN (producer)); + break; + + /* Some special instructions, divmodsi4 and udivmodsi4, produce two + results, the quotient and the remainder. We have to handle them + individually. */ + case TYPE_DIV: + if (divmod_p (producer)) + { + rtx def_reg1 = SET_DEST (parallel_element (producer, 0)); + rtx def_reg2 = SET_DEST (parallel_element (producer, 1)); + + return (n10_consumed_by_ex_dep_p (consumer, def_reg1) + || n10_consumed_by_ex_dep_p (consumer, def_reg2)); + } + + def_reg = SET_DEST (PATTERN (producer)); + break; + + default: + gcc_unreachable (); + } + + return n10_consumed_by_ex_dep_p (consumer, def_reg); +} + +/* Check dependencies from LMW(N, N) to EX. */ +bool +nds32_n10_last_load_to_ex_p (rtx_insn *producer, rtx_insn *consumer) +{ + rtx last_def_reg = extract_nth_access_reg (producer, -1); + + return n10_consumed_by_ex_dep_p (consumer, last_def_reg); +} /* ------------------------------------------------------------------------ */ diff --git a/gcc/config/nds32/nds32-protos.h b/gcc/config/nds32/nds32-protos.h index 64500e3..8e66522 100644 --- a/gcc/config/nds32/nds32-protos.h +++ b/gcc/config/nds32/nds32-protos.h @@ -121,6 +121,9 @@ extern bool nds32_n9_2r1w_mm_to_ex_p (rtx_insn *, rtx_insn *); extern bool nds32_n9_3r2w_mm_to_ex_p (rtx_insn *, rtx_insn *); extern bool nds32_n9_last_load_to_ex_p (rtx_insn *, rtx_insn *); +extern bool nds32_n10_ex_to_ex_p (rtx_insn *, rtx_insn *); +extern bool nds32_n10_mm_to_ex_p (rtx_insn *, rtx_insn *); +extern bool nds32_n10_last_load_to_ex_p (rtx_insn *, rtx_insn *); /* Auxiliary functions for stack operation predicate checking. */ @@ -300,6 +303,8 @@ rtx extract_movd44_odd_reg (rtx_insn *); rtx extract_mac_non_acc_rtx (rtx_insn *); +bool divmod_p (rtx_insn *); + rtx extract_branch_target_rtx (rtx_insn *); rtx extract_branch_condition_rtx (rtx_insn *); } // namespace nds32 diff --git a/gcc/config/nds32/nds32-utils.c b/gcc/config/nds32/nds32-utils.c index 203cd11..6c93774 100644 --- a/gcc/config/nds32/nds32-utils.c +++ b/gcc/config/nds32/nds32-utils.c @@ -424,6 +424,19 @@ extract_mac_non_acc_rtx (rtx_insn *insn) } } +/* Check if the DIV insn needs two write ports. */ +bool +divmod_p (rtx_insn *insn) +{ + gcc_assert (get_attr_type (insn) == TYPE_DIV); + + if (INSN_CODE (insn) == CODE_FOR_divmodsi4 + || INSN_CODE (insn) == CODE_FOR_udivmodsi4) + return true; + + return false; +} + /* Extract the rtx representing the branch target to help recognize data hazards. */ rtx diff --git a/gcc/config/nds32/nds32.md b/gcc/config/nds32/nds32.md index 7249520..d6c3515 100644 --- a/gcc/config/nds32/nds32.md +++ b/gcc/config/nds32/nds32.md @@ -56,12 +56,13 @@ ;; ------------------------------------------------------------------------ ;; CPU pipeline model. -(define_attr "pipeline_model" "n7,n8,e8,n9,simple" +(define_attr "pipeline_model" "n7,n8,e8,n9,n10,simple" (const (cond [(match_test "nds32_cpu_option == CPU_N7") (const_string "n7") (match_test "nds32_cpu_option == CPU_E8") (const_string "e8") (match_test "nds32_cpu_option == CPU_N6 || nds32_cpu_option == CPU_N8") (const_string "n8") (match_test "nds32_cpu_option == CPU_N9") (const_string "n9") + (match_test "nds32_cpu_option == CPU_N10") (const_string "n10") (match_test "nds32_cpu_option == CPU_SIMPLE") (const_string "simple")] (const_string "n9")))) diff --git a/gcc/config/nds32/nds32.opt b/gcc/config/nds32/nds32.opt index 2c72a01..4111278 100644 --- a/gcc/config/nds32/nds32.opt +++ b/gcc/config/nds32/nds32.opt @@ -243,6 +243,51 @@ EnumValue Enum(nds32_cpu_type) String(n968a) Value(CPU_N9) EnumValue +Enum(nds32_cpu_type) String(n10) Value(CPU_N10) + +EnumValue +Enum(nds32_cpu_type) String(n1033) Value(CPU_N10) + +EnumValue +Enum(nds32_cpu_type) String(n1033a) Value(CPU_N10) + +EnumValue +Enum(nds32_cpu_type) String(n1033-fpu) Value(CPU_N10) + +EnumValue +Enum(nds32_cpu_type) String(n1033-spu) Value(CPU_N10) + +EnumValue +Enum(nds32_cpu_type) String(n1068) Value(CPU_N10) + +EnumValue +Enum(nds32_cpu_type) String(n1068a) Value(CPU_N10) + +EnumValue +Enum(nds32_cpu_type) String(n1068-fpu) Value(CPU_N10) + +EnumValue +Enum(nds32_cpu_type) String(n1068a-fpu) Value(CPU_N10) + +EnumValue +Enum(nds32_cpu_type) String(n1068-spu) Value(CPU_N10) + +EnumValue +Enum(nds32_cpu_type) String(n1068a-spu) Value(CPU_N10) + +EnumValue +Enum(nds32_cpu_type) String(d10) Value(CPU_N10) + +EnumValue +Enum(nds32_cpu_type) String(d1088) Value(CPU_N10) + +EnumValue +Enum(nds32_cpu_type) String(d1088-fpu) Value(CPU_N10) + +EnumValue +Enum(nds32_cpu_type) String(d1088-spu) Value(CPU_N10) + +EnumValue Enum(nds32_cpu_type) String(simple) Value(CPU_SIMPLE) mconfig-fpu= diff --git a/gcc/config/nds32/pipelines.md b/gcc/config/nds32/pipelines.md index 3428807..a382c75 100644 --- a/gcc/config/nds32/pipelines.md +++ b/gcc/config/nds32/pipelines.md @@ -44,6 +44,12 @@ ;; ------------------------------------------------------------------------ +;; Include N10 pipeline settings. +;; ------------------------------------------------------------------------ +(include "nds32-n10.md") + + +;; ------------------------------------------------------------------------ ;; Define simple pipeline settings. ;; ------------------------------------------------------------------------ -- 2.7.4