From: Kito Cheng Date: Sun, 8 Apr 2018 08:12:19 +0000 (+0000) Subject: [NDS32] Implment n9 pipeline. X-Git-Tag: upstream/12.2.0~32443 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=b99353a2aadd2de47694db415005ab137dd33959;p=platform%2Fupstream%2Fgcc.git [NDS32] Implment n9 pipeline. gcc/ * config.gcc (nds32*): Add nds32-utils.o into extra_objs. * config/nds32/nds32-n9-2r1w.md: New file. * config/nds32/nds32-n9-3r2w.md: New file. * config/nds32/nds32-opts.h (nds32_cpu_type, nds32_mul_type, nds32_register_ports): New or modify for cpu n9. * config/nds32/nds32-pipelines-auxiliary.c: Implementation for n9 pipeline. * config/nds32/nds32-protos.h: More declarations for n9 pipeline. * config/nds32/nds32-utils.c: New file. * config/nds32/nds32.h (TARGET_PIPELINE_N9, TARGET_PIPELINE_SIMPLE, TARGET_MUL_SLOW): Define. * config/nds32/nds32.md (pipeline_model): New attribute. * config/nds32/nds32.opt (mcpu, mconfig-mul, mconfig-register-ports): New options that support cpu n9. * config/nds32/pipelines.md: Include n9 settings. * config/nds32/t-nds32 (nds32-utils.o): Add dependency. Co-Authored-By: Chung-Ju Wu From-SVN: r259218 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 6f98ac7..96a8584 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,23 @@ +2018-04-08 Kito Cheng + Chung-Ju Wu + + * config.gcc (nds32*): Add nds32-utils.o into extra_objs. + * config/nds32/nds32-n9-2r1w.md: New file. + * config/nds32/nds32-n9-3r2w.md: New file. + * config/nds32/nds32-opts.h (nds32_cpu_type, nds32_mul_type, + nds32_register_ports): New or modify for cpu n9. + * config/nds32/nds32-pipelines-auxiliary.c: Implementation for n9 + pipeline. + * config/nds32/nds32-protos.h: More declarations for n9 pipeline. + * config/nds32/nds32-utils.c: New file. + * config/nds32/nds32.h (TARGET_PIPELINE_N9, TARGET_PIPELINE_SIMPLE, + TARGET_MUL_SLOW): Define. + * config/nds32/nds32.md (pipeline_model): New attribute. + * config/nds32/nds32.opt (mcpu, mconfig-mul, mconfig-register-ports): + New options that support cpu n9. + * config/nds32/pipelines.md: Include n9 settings. + * config/nds32/t-nds32 (nds32-utils.o): Add dependency. + 2018-04-08 Chung-Ju Wu * config/nds32/nds32-md-auxiliary.c (output_cond_branch): Output align diff --git a/gcc/config.gcc b/gcc/config.gcc index 99c7ae1..fa15a8c 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -445,7 +445,7 @@ mips*-*-*) nds32*) cpu_type=nds32 extra_headers="nds32_intrinsic.h" - extra_objs="nds32-cost.o nds32-intrinsic.o nds32-isr.o nds32-md-auxiliary.o nds32-pipelines-auxiliary.o nds32-predicates.o nds32-memory-manipulation.o nds32-fp-as-gp.o nds32-relax-opt.o" + extra_objs="nds32-cost.o nds32-intrinsic.o nds32-isr.o nds32-md-auxiliary.o nds32-pipelines-auxiliary.o nds32-predicates.o nds32-memory-manipulation.o nds32-fp-as-gp.o nds32-relax-opt.o nds32-utils.o" ;; nios2-*-*) cpu_type=nios2 diff --git a/gcc/config/nds32/nds32-n9-2r1w.md b/gcc/config/nds32/nds32-n9-2r1w.md new file mode 100644 index 0000000..b742e55 --- /dev/null +++ b/gcc/config/nds32/nds32-n9-2r1w.md @@ -0,0 +1,362 @@ +;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler +;; Copyright (C) 2012-2018 Free Software Foundation, Inc. +;; Contributed by Andes Technology Corporation. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + + +;; ------------------------------------------------------------------------ +;; Define N9 2R1W pipeline settings. +;; ------------------------------------------------------------------------ + +(define_automaton "nds32_n9_2r1w_machine") + +;; ------------------------------------------------------------------------ +;; Pipeline Stages +;; ------------------------------------------------------------------------ +;; IF - Instruction Fetch +;; II - Instruction Issue / Instruction Decode +;; EX - Instruction Execution +;; MM - Memory Execution +;; WB - Instruction Retire / Result Write-Back + +(define_cpu_unit "n9_2r1w_ii" "nds32_n9_2r1w_machine") +(define_cpu_unit "n9_2r1w_ex" "nds32_n9_2r1w_machine") +(define_cpu_unit "n9_2r1w_mm" "nds32_n9_2r1w_machine") +(define_cpu_unit "n9_2r1w_wb" "nds32_n9_2r1w_machine") + +(define_insn_reservation "nds_n9_2r1w_unknown" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") + (and (eq_attr "type" "unknown") + (eq_attr "pipeline_model" "n9"))) + "n9_2r1w_ii, n9_2r1w_ex, n9_2r1w_mm, n9_2r1w_wb") + +(define_insn_reservation "nds_n9_2r1w_misc" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") + (and (eq_attr "type" "misc") + (eq_attr "pipeline_model" "n9"))) + "n9_2r1w_ii, n9_2r1w_ex, n9_2r1w_mm, n9_2r1w_wb") + +(define_insn_reservation "nds_n9_2r1w_mmu" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") + (and (eq_attr "type" "mmu") + (eq_attr "pipeline_model" "n9"))) + "n9_2r1w_ii, n9_2r1w_ex, n9_2r1w_mm, n9_2r1w_wb") + +(define_insn_reservation "nds_n9_2r1w_alu" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") + (and (eq_attr "type" "alu") + (eq_attr "pipeline_model" "n9"))) + "n9_2r1w_ii, n9_2r1w_ex, n9_2r1w_mm, n9_2r1w_wb") + +(define_insn_reservation "nds_n9_2r1w_alu_shift" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") + (and (eq_attr "type" "alu_shift") + (eq_attr "pipeline_model" "n9"))) + "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ex+n9_2r1w_mm, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb") + +(define_insn_reservation "nds_n9_2r1w_pbsad" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") + (and (eq_attr "type" "pbsad") + (eq_attr "pipeline_model" "n9"))) + "n9_2r1w_ii, n9_2r1w_ex*3, n9_2r1w_mm, n9_2r1w_wb") + +(define_insn_reservation "nds_n9_2r1w_pbsada" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") + (and (eq_attr "type" "pbsada") + (eq_attr "pipeline_model" "n9"))) + "n9_2r1w_ii, n9_2r1w_ex*3, n9_2r1w_mm, n9_2r1w_wb") + +(define_insn_reservation "nds_n9_2r1w_load" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") + (and (match_test "nds32::load_single_p (insn)") + (eq_attr "pipeline_model" "n9"))) + "n9_2r1w_ii, n9_2r1w_ex, n9_2r1w_mm, n9_2r1w_wb") + +(define_insn_reservation "nds_n9_2r1w_store" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") + (and (match_test "nds32::store_single_p (insn)") + (eq_attr "pipeline_model" "n9"))) + "n9_2r1w_ii, n9_2r1w_ex, n9_2r1w_mm, n9_2r1w_wb") + +(define_insn_reservation "nds_n9_2r1w_load_multiple_1" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") + (and (eq_attr "pipeline_model" "n9") + (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "1")))) + "n9_2r1w_ii, n9_2r1w_ex, n9_2r1w_mm, n9_2r1w_wb") + +(define_insn_reservation "nds_n9_2r1w_load_multiple_2" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") + (and (eq_attr "pipeline_model" "n9") + (ior (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "2")) + (match_test "nds32::load_double_p (insn)")))) + "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ex+n9_2r1w_mm, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb") + +(define_insn_reservation "nds_n9_2r1w_load_multiple_3" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") + (and (eq_attr "pipeline_model" "n9") + (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "3")))) + "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb") + +(define_insn_reservation "nds_n9_2r1w_load_multiple_4" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") + (and (eq_attr "pipeline_model" "n9") + (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "4")))) + "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb") + +(define_insn_reservation "nds_n9_2r1w_load_multiple_5" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") + (and (eq_attr "pipeline_model" "n9") + (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "5")))) + "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, (n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb)*2, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb") + +(define_insn_reservation "nds_n9_2r1w_load_multiple_6" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") + (and (eq_attr "pipeline_model" "n9") + (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "6")))) + "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, (n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb)*3, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb") + +(define_insn_reservation "nds_n9_2r1w_load_multiple_7" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") + (and (eq_attr "pipeline_model" "n9") + (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "7")))) + "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, (n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb)*4, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb") + +(define_insn_reservation "nds_n9_2r1w_load_multiple_8" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") + (and (eq_attr "pipeline_model" "n9") + (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "8")))) + "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, (n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb)*5, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb") + +(define_insn_reservation "nds_n9_2r1w_load_multiple_12" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") + (and (eq_attr "pipeline_model" "n9") + (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "12")))) + "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, (n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb)*9, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb") + +(define_insn_reservation "nds_n9_2r1w_store_multiple_1" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") + (and (eq_attr "pipeline_model" "n9") + (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "1")))) + "n9_2r1w_ii, n9_2r1w_ex, n9_2r1w_mm, n9_2r1w_wb") + +(define_insn_reservation "nds_n9_2r1w_store_multiple_2" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") + (and (eq_attr "pipeline_model" "n9") + (ior (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "2")) + (match_test "nds32::store_double_p (insn)")))) + "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ex+n9_2r1w_mm, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb") + +(define_insn_reservation "nds_n9_2r1w_store_multiple_3" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") + (and (eq_attr "pipeline_model" "n9") + (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "3")))) + "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb") + +(define_insn_reservation "nds_n9_2r1w_store_multiple_4" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") + (and (eq_attr "pipeline_model" "n9") + (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "4")))) + "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb") + +(define_insn_reservation "nds_n9_2r1w_store_multiple_5" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") + (and (eq_attr "pipeline_model" "n9") + (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "5")))) + "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, (n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb)*2, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb") + +(define_insn_reservation "nds_n9_2r1w_store_multiple_6" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") + (and (eq_attr "pipeline_model" "n9") + (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "6")))) + "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, (n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb)*3, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb") + +(define_insn_reservation "nds_n9_2r1w_store_multiple_7" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") + (and (eq_attr "pipeline_model" "n9") + (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "7")))) + "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, (n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb)*4, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb") + +(define_insn_reservation "nds_n9_2r1w_store_multiple_8" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") + (and (eq_attr "pipeline_model" "n9") + (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "8")))) + "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, (n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb)*5, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb") + +(define_insn_reservation "nds_n9_2r1w_store_multiple_12" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") + (and (eq_attr "pipeline_model" "n9") + (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "12")))) + "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, (n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb)*9, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb") + +(define_insn_reservation "nds_n9_2r1w_mul_fast" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_2R1W && nds32_mul_config != MUL_TYPE_SLOW") + (and (eq_attr "type" "mul") + (eq_attr "pipeline_model" "n9"))) + "n9_2r1w_ii, n9_2r1w_ex, n9_2r1w_mm, n9_2r1w_wb") + +(define_insn_reservation "nds_n9_2r1w_mul_slow" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_2R1W && nds32_mul_config == MUL_TYPE_SLOW") + (and (eq_attr "type" "mul") + (eq_attr "pipeline_model" "n9"))) + "n9_2r1w_ii, n9_2r1w_ex*17, n9_2r1w_mm, n9_2r1w_wb") + +(define_insn_reservation "nds_n9_2r1w_mac_fast" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_2R1W && nds32_mul_config != MUL_TYPE_SLOW") + (and (eq_attr "type" "mac") + (eq_attr "pipeline_model" "n9"))) + "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ex+n9_2r1w_mm, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb") + +(define_insn_reservation "nds_n9_2r1w_mac_slow" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_2R1W && nds32_mul_config == MUL_TYPE_SLOW") + (and (eq_attr "type" "mac") + (eq_attr "pipeline_model" "n9"))) + "n9_2r1w_ii, (n9_2r1w_ii+n9_2r1w_ex)*17, n9_2r1w_ex+n9_2r1w_mm, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb") + +(define_insn_reservation "nds_n9_2r1w_div" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") + (and (eq_attr "type" "div") + (eq_attr "pipeline_model" "n9"))) + "n9_2r1w_ii, (n9_2r1w_ii+n9_2r1w_ex)*34, n9_2r1w_ex+n9_2r1w_mm, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb") + +(define_insn_reservation "nds_n9_2r1w_branch" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") + (and (eq_attr "type" "branch") + (eq_attr "pipeline_model" "n9"))) + "n9_2r1w_ii, n9_2r1w_ex, n9_2r1w_mm, n9_2r1w_wb") + +;; ------------------------------------------------------------------------ +;; Comment Notations and Bypass Rules +;; ------------------------------------------------------------------------ +;; Producers (LHS) +;; LD_!bi +;; Load data from the memory (without updating the base register) and +;; produce the loaded data. The result is ready at MM. Because the register +;; port is 2R1W, two micro-operations are required if the base register +;; should be updated. In this case, the base register is updated by the +;; second micro-operation, and the updated result is ready at EX. +;; LMW(N, M) +;; There are N micro-operations within an instruction that loads multiple +;; words. The result produced by the M-th micro-operation is sent to +;; consumers. The result is ready at MM. If the base register should be +;; updated, an extra micro-operation is apppended to the end of the +;; sequence, and the result is ready at EX. +;; MUL, MAC +;; Compute data in the multiply-adder and produce the data. The result +;; is ready at MM. +;; DIV +;; Compute data in the divider and produce the data. The result is ready +;; at MM. +;; +;; Consumers (RHS) +;; ALU, PBSAD, PBSADA_RaRb, MUL, MAC, DIV, MMU +;; Require operands at EX. +;; ALU_SHIFT_Rb +;; An ALU-SHIFT instruction consists of a shift micro-operation followed +;; by an arithmetic micro-operation. The operand Rb is used by the first +;; micro-operation, and there are some latencies if data dependency occurs. +;; MOVD44_E +;; A double-word move instruction needs two micro-operations because the +;; reigster ports is 2R1W. The first micro-operation writes an even number +;; register, and the second micro-operation writes an odd number register. +;; Each input operand is required at EX for each micro-operation. MOVD44_E +;; stands for the first micro-operation. +;; MAC_RaRb, M2R +;; MAC instructions do multiplication at EX and do accumulation at MM, but +;; MAC instructions which operate on general purpose registers always +;; require operands at EX because MM stage cannot be forwarded in 2R1W mode. +;; ADDR_IN +;; If an instruction requires an address as its input operand, the address +;; is required at EX. +;; ST_bi +;; A post-increment store instruction requires its data at EX because MM +;; cannot be forwarded in 2R1W mode. +;; ST_!bi_RI +;; A store instruction with an immediate offset requires its data at EX +;; because MM cannot be forwarded in 2R1W mode. If the offset field is a +;; register (ST_!bi_RR), the instruction will be separated into two micro- +;; operations, and the second one requires the input operand at EX in order +;; to store it to the memory. +;; SMW(N, M) +;; There are N micro-operations within an instruction that stores multiple +;; words. Each M-th micro-operation requires its data at MM. +;; BR +;; If a branch instruction is conditional, its input data is required at EX. + +;; LD_!bi, MUL, MAC +;; -> ALU, ALU_SHIFT_Rb, PBSAD, PBSADA_RaRb, MOVD44_E, MUL, MAC_RaRb, M2R, DIV, ADDR_IN_!bi, ADDR_IN_bi_Ra, ST_bi, ST_!bi_RI, BR, MMU +(define_bypass 2 + "nds_n9_2r1w_load,\ + nds_n9_2r1w_mul_fast, nds_n9_2r1w_mul_slow,\ + nds_n9_2r1w_mac_fast, nds_n9_2r1w_mac_slow" + "nds_n9_2r1w_alu, nds_n9_2r1w_alu_shift,\ + nds_n9_2r1w_pbsad, nds_n9_2r1w_pbsada,\ + nds_n9_2r1w_mul_fast, nds_n9_2r1w_mul_slow,\ + nds_n9_2r1w_mac_fast, nds_n9_2r1w_mac_slow,\ + nds_n9_2r1w_branch,\ + nds_n9_2r1w_div,\ + nds_n9_2r1w_load,nds_n9_2r1w_store,\ + nds_n9_2r1w_load_multiple_1,nds_n9_2r1w_load_multiple_2, nds_n9_2r1w_load_multiple_3,\ + nds_n9_2r1w_load_multiple_4,nds_n9_2r1w_load_multiple_5, nds_n9_2r1w_load_multiple_6,\ + nds_n9_2r1w_load_multiple_7,nds_n9_2r1w_load_multiple_8, nds_n9_2r1w_load_multiple_12,\ + nds_n9_2r1w_store_multiple_1,nds_n9_2r1w_store_multiple_2, nds_n9_2r1w_store_multiple_3,\ + nds_n9_2r1w_store_multiple_4,nds_n9_2r1w_store_multiple_5, nds_n9_2r1w_store_multiple_6,\ + nds_n9_2r1w_store_multiple_7,nds_n9_2r1w_store_multiple_8, nds_n9_2r1w_store_multiple_12,\ + nds_n9_2r1w_mmu" + "nds32_n9_2r1w_mm_to_ex_p" +) + +;; LMW(N, N) +;; -> ALU, ALU_SHIFT_Rb, PBSAD, PBSADA_RaRb, MOVD44_E, MUL, MAC_RaRb, M2R, DIV, ADDR_IN_!bi, ADDR_IN_bi_Ra, ST_bi, ST_!bi_RI, BR, MMU +(define_bypass 2 + "nds_n9_2r1w_load_multiple_1,nds_n9_2r1w_load_multiple_2, nds_n9_2r1w_load_multiple_3,\ + nds_n9_2r1w_load_multiple_4,nds_n9_2r1w_load_multiple_5, nds_n9_2r1w_load_multiple_6,\ + nds_n9_2r1w_load_multiple_7,nds_n9_2r1w_load_multiple_8, nds_n9_2r1w_load_multiple_12" + "nds_n9_2r1w_alu, nds_n9_2r1w_alu_shift,\ + nds_n9_2r1w_pbsad, nds_n9_2r1w_pbsada,\ + nds_n9_2r1w_mul_fast, nds_n9_2r1w_mul_slow,\ + nds_n9_2r1w_mac_fast, nds_n9_2r1w_mac_slow,\ + nds_n9_2r1w_branch,\ + nds_n9_2r1w_div,\ + nds_n9_2r1w_load,nds_n9_2r1w_store,\ + nds_n9_2r1w_load_multiple_1,nds_n9_2r1w_load_multiple_2, nds_n9_2r1w_load_multiple_3,\ + nds_n9_2r1w_load_multiple_4,nds_n9_2r1w_load_multiple_5, nds_n9_2r1w_load_multiple_6,\ + nds_n9_2r1w_load_multiple_7,nds_n9_2r1w_load_multiple_8, nds_n9_2r1w_load_multiple_12,\ + nds_n9_2r1w_store_multiple_1,nds_n9_2r1w_store_multiple_2, nds_n9_2r1w_store_multiple_3,\ + nds_n9_2r1w_store_multiple_4,nds_n9_2r1w_store_multiple_5, nds_n9_2r1w_store_multiple_6,\ + nds_n9_2r1w_store_multiple_7,nds_n9_2r1w_store_multiple_8, nds_n9_2r1w_store_multiple_12,\ + nds_n9_2r1w_mmu" + "nds32_n9_last_load_to_ex_p" +) diff --git a/gcc/config/nds32/nds32-n9-3r2w.md b/gcc/config/nds32/nds32-n9-3r2w.md new file mode 100644 index 0000000..8439cc6 --- /dev/null +++ b/gcc/config/nds32/nds32-n9-3r2w.md @@ -0,0 +1,357 @@ +;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler +;; Copyright (C) 2012-2018 Free Software Foundation, Inc. +;; Contributed by Andes Technology Corporation. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + + +;; ------------------------------------------------------------------------ +;; Define N9 3R2W pipeline settings. +;; ------------------------------------------------------------------------ + +(define_automaton "nds32_n9_3r2w_machine") + +;; ------------------------------------------------------------------------ +;; Pipeline Stages +;; ------------------------------------------------------------------------ +;; IF - Instruction Fetch +;; II - Instruction Issue / Instruction Decode +;; EX - Instruction Execution +;; MM - Memory Execution +;; WB - Instruction Retire / Result Write-Back + +(define_cpu_unit "n9_3r2w_ii" "nds32_n9_3r2w_machine") +(define_cpu_unit "n9_3r2w_ex" "nds32_n9_3r2w_machine") +(define_cpu_unit "n9_3r2w_mm" "nds32_n9_3r2w_machine") +(define_cpu_unit "n9_3r2w_wb" "nds32_n9_3r2w_machine") + +(define_insn_reservation "nds_n9_3r2w_unknown" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") + (and (eq_attr "type" "unknown") + (eq_attr "pipeline_model" "n9"))) + "n9_3r2w_ii, n9_3r2w_ex, n9_3r2w_mm, n9_3r2w_wb") + +(define_insn_reservation "nds_n9_3r2w_misc" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") + (and (eq_attr "type" "misc") + (eq_attr "pipeline_model" "n9"))) + "n9_3r2w_ii, n9_3r2w_ex, n9_3r2w_mm, n9_3r2w_wb") + +(define_insn_reservation "nds_n9_3r2w_mmu" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") + (and (eq_attr "type" "mmu") + (eq_attr "pipeline_model" "n9"))) + "n9_3r2w_ii, n9_3r2w_ex, n9_3r2w_mm, n9_3r2w_wb") + +(define_insn_reservation "nds_n9_3r2w_alu" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") + (and (eq_attr "type" "alu") + (eq_attr "pipeline_model" "n9"))) + "n9_3r2w_ii, n9_3r2w_ex, n9_3r2w_mm, n9_3r2w_wb") + +(define_insn_reservation "nds_n9_3r2w_alu_shift" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") + (and (eq_attr "type" "alu_shift") + (eq_attr "pipeline_model" "n9"))) + "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ex+n9_3r2w_mm, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb") + +(define_insn_reservation "nds_n9_3r2w_pbsad" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") + (and (eq_attr "type" "pbsad") + (eq_attr "pipeline_model" "n9"))) + "n9_3r2w_ii, n9_3r2w_ex*3, n9_3r2w_mm, n9_3r2w_wb") + +(define_insn_reservation "nds_n9_3r2w_pbsada" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") + (and (eq_attr "type" "pbsada") + (eq_attr "pipeline_model" "n9"))) + "n9_3r2w_ii, n9_3r2w_ex*3, n9_3r2w_mm, n9_3r2w_wb") + +(define_insn_reservation "nds_n9_3r2w_load" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") + (and (match_test "nds32::load_single_p (insn)") + (eq_attr "pipeline_model" "n9"))) + "n9_3r2w_ii, n9_3r2w_ex, n9_3r2w_mm, n9_3r2w_wb") + +(define_insn_reservation "nds_n9_3r2w_store" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") + (and (match_test "nds32::store_single_p (insn)") + (eq_attr "pipeline_model" "n9"))) + "n9_3r2w_ii, n9_3r2w_ex, n9_3r2w_mm, n9_3r2w_wb") + +(define_insn_reservation "nds_n9_3r2w_load_multiple_1" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") + (and (eq_attr "pipeline_model" "n9") + (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "1")))) + "n9_3r2w_ii, n9_3r2w_ex, n9_3r2w_mm, n9_3r2w_wb") + +(define_insn_reservation "nds_n9_3r2w_load_multiple_2" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") + (and (eq_attr "pipeline_model" "n9") + (ior (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "2")) + (match_test "nds32::load_double_p (insn)")))) + "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ex+n9_3r2w_mm, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb") + +(define_insn_reservation "nds_n9_3r2w_load_multiple_3" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") + (and (eq_attr "pipeline_model" "n9") + (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "3")))) + "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb") + +(define_insn_reservation "nds_n9_3r2w_load_multiple_4" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") + (and (eq_attr "pipeline_model" "n9") + (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "4")))) + "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb") + +(define_insn_reservation "nds_n9_3r2w_load_multiple_5" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") + (and (eq_attr "pipeline_model" "n9") + (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "5")))) + "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, (n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb)*2, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb") + +(define_insn_reservation "nds_n9_3r2w_load_multiple_6" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") + (and (eq_attr "pipeline_model" "n9") + (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "6")))) + "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, (n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb)*3, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb") + +(define_insn_reservation "nds_n9_3r2w_load_multiple_7" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") + (and (eq_attr "pipeline_model" "n9") + (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "7")))) + "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, (n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb)*4, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb") + +(define_insn_reservation "nds_n9_3r2w_load_multiple_8" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") + (and (eq_attr "pipeline_model" "n9") + (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "8")))) + "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, (n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb)*5, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb") + +(define_insn_reservation "nds_n9_3r2w_load_multiple_12" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") + (and (eq_attr "pipeline_model" "n9") + (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "12")))) + "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, (n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb)*9, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb") + +(define_insn_reservation "nds_n9_3r2w_store_multiple_1" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") + (and (eq_attr "pipeline_model" "n9") + (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "1")))) + "n9_3r2w_ii, n9_3r2w_ex, n9_3r2w_mm, n9_3r2w_wb") + +(define_insn_reservation "nds_n9_3r2w_store_multiple_2" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") + (and (eq_attr "pipeline_model" "n9") + (ior (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "2")) + (match_test "nds32::store_double_p (insn)")))) + "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ex+n9_3r2w_mm, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb") + +(define_insn_reservation "nds_n9_3r2w_store_multiple_3" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") + (and (eq_attr "pipeline_model" "n9") + (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "3")))) + "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb") + +(define_insn_reservation "nds_n9_3r2w_store_multiple_4" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") + (and (eq_attr "pipeline_model" "n9") + (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "4")))) + "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb") + +(define_insn_reservation "nds_n9_3r2w_store_multiple_5" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") + (and (eq_attr "pipeline_model" "n9") + (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "5")))) + "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, (n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb)*2, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb") + +(define_insn_reservation "nds_n9_3r2w_store_multiple_6" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") + (and (eq_attr "pipeline_model" "n9") + (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "6")))) + "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, (n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb)*3, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb") + +(define_insn_reservation "nds_n9_3r2w_store_multiple_7" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") + (and (eq_attr "pipeline_model" "n9") + (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "7")))) + "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, (n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb)*4, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb") + +(define_insn_reservation "nds_n9_3r2w_store_multiple_8" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") + (and (eq_attr "pipeline_model" "n9") + (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "8")))) + "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, (n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb)*5, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb") + +(define_insn_reservation "nds_n9_3r2w_store_multiple_12" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") + (and (eq_attr "pipeline_model" "n9") + (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "12")))) + "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, (n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb)*9, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb") + +(define_insn_reservation "nds_n9_3r2w_mul_fast1" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_3R2W && nds32_mul_config == MUL_TYPE_FAST_1") + (and (eq_attr "type" "mul") + (eq_attr "pipeline_model" "n9"))) + "n9_3r2w_ii, n9_3r2w_ex, n9_3r2w_mm, n9_3r2w_wb") + +(define_insn_reservation "nds_n9_3r2w_mul_fast2" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_3R2W && nds32_mul_config == MUL_TYPE_FAST_2") + (and (eq_attr "type" "mul") + (eq_attr "pipeline_model" "n9"))) + "n9_3r2w_ii, n9_3r2w_ex*2, n9_3r2w_mm, n9_3r2w_wb") + +(define_insn_reservation "nds_n9_3r2w_mul_slow" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_3R2W && nds32_mul_config == MUL_TYPE_SLOW") + (and (eq_attr "type" "mul") + (eq_attr "pipeline_model" "n9"))) + "n9_3r2w_ii, n9_3r2w_ex*17, n9_3r2w_mm, n9_3r2w_wb") + +(define_insn_reservation "nds_n9_3r2w_mac_fast1" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_3R2W && nds32_mul_config == MUL_TYPE_FAST_1") + (and (eq_attr "type" "mac") + (eq_attr "pipeline_model" "n9"))) + "n9_3r2w_ii, n9_3r2w_ex, n9_3r2w_mm, n9_3r2w_wb") + +(define_insn_reservation "nds_n9_3r2w_mac_fast2" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_3R2W && nds32_mul_config == MUL_TYPE_FAST_2") + (and (eq_attr "type" "mac") + (eq_attr "pipeline_model" "n9"))) + "n9_3r2w_ii, n9_3r2w_ex*2, n9_3r2w_mm, n9_3r2w_wb") + +(define_insn_reservation "nds_n9_3r2w_mac_slow" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_3R2W && nds32_mul_config == MUL_TYPE_SLOW") + (and (eq_attr "type" "mac") + (eq_attr "pipeline_model" "n9"))) + "n9_3r2w_ii, n9_3r2w_ex*17, n9_3r2w_ex+n9_3r2w_mm, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb") + +(define_insn_reservation "nds_n9_3r2w_div" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") + (and (eq_attr "type" "div") + (eq_attr "pipeline_model" "n9"))) + "n9_3r2w_ii, n9_3r2w_ex*34, n9_3r2w_mm, n9_3r2w_wb") + +(define_insn_reservation "nds_n9_3r2w_branch" 1 + (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") + (and (eq_attr "type" "branch") + (eq_attr "pipeline_model" "n9"))) + "n9_3r2w_ii, n9_3r2w_ex, n9_3r2w_mm, n9_3r2w_wb") + +;; ------------------------------------------------------------------------ +;; Comment Notations and Bypass Rules +;; ------------------------------------------------------------------------ +;; Producers (LHS) +;; LD +;; Load data from the memory and produce the loaded data. The result is +;; ready at MM. +;; LMW(N, M) +;; There are N micro-operations within an instruction that loads multiple +;; words. The result produced by the M-th micro-operation is sent to +;; consumers. The result is ready at MM. +;; MUL, MAC +;; Compute data in the multiply-adder and produce the data. The result +;; is ready at MM. +;; DIV +;; Compute data in the divider and produce the data. The result is ready +;; at MM. +;; +;; Consumers (RHS) +;; ALU, MOVD44, PBSAD, PBSADA_RaRb, MUL, MAC, DIV, MMU +;; Require operands at EX. +;; ALU_SHIFT_Rb +;; An ALU-SHIFT instruction consists of a shift micro-operation followed +;; by an arithmetic micro-operation. The operand Rb is used by the first +;; micro-operation, and there are some latencies if data dependency occurs. +;; MAC_RaRb +;; A MAC instruction does multiplication at EX and does accumulation at MM, +;; so the operand Rt is required at MM, and operands Ra and Rb are required +;; at EX. +;; ADDR_IN +;; If an instruction requires an address as its input operand, the address +;; is required at EX. +;; ST +;; A store instruction requires its data at MM. +;; SMW(N, M) +;; There are N micro-operations within an instruction that stores multiple +;; words. Each M-th micro-operation requires its data at MM. +;; BR +;; If a branch instruction is conditional, its input data is required at EX. + +;; LD, MUL, MAC, DIV +;; -> ALU, ALU_SHIFT_Rb, PBSAD, PBSADA_RaRb, MOVD44, MUL, MAC_RaRb, DIV, ADDR_IN, BR, MMU +(define_bypass 2 + "nds_n9_3r2w_load,\ + nds_n9_3r2w_mul_fast1, nds_n9_3r2w_mul_fast2, nds_n9_3r2w_mul_slow,\ + nds_n9_3r2w_mac_fast1, nds_n9_3r2w_mac_fast2, nds_n9_3r2w_mac_slow,\ + nds_n9_3r2w_div" + "nds_n9_3r2w_alu, nds_n9_3r2w_alu_shift,\ + nds_n9_3r2w_pbsad, nds_n9_3r2w_pbsada,\ + nds_n9_3r2w_mul_fast1, nds_n9_3r2w_mul_fast2, nds_n9_3r2w_mul_slow,\ + nds_n9_3r2w_mac_fast1, nds_n9_3r2w_mac_fast2, nds_n9_3r2w_mac_slow,\ + nds_n9_3r2w_branch,\ + nds_n9_3r2w_div,\ + nds_n9_3r2w_load,nds_n9_3r2w_store,\ + nds_n9_3r2w_load_multiple_1,nds_n9_3r2w_load_multiple_2, nds_n9_3r2w_load_multiple_3,\ + nds_n9_3r2w_load_multiple_4,nds_n9_3r2w_load_multiple_5, nds_n9_3r2w_load_multiple_6,\ + nds_n9_3r2w_load_multiple_7,nds_n9_3r2w_load_multiple_8, nds_n9_3r2w_load_multiple_12,\ + nds_n9_3r2w_store_multiple_1,nds_n9_3r2w_store_multiple_2, nds_n9_3r2w_store_multiple_3,\ + nds_n9_3r2w_store_multiple_4,nds_n9_3r2w_store_multiple_5, nds_n9_3r2w_store_multiple_6,\ + nds_n9_3r2w_store_multiple_7,nds_n9_3r2w_store_multiple_8, nds_n9_3r2w_store_multiple_12,\ + nds_n9_3r2w_mmu" + "nds32_n9_3r2w_mm_to_ex_p" +) + +;; LMW(N, N) +;; -> ALU, ALU_SHIFT_Rb, PBSAD, PBSADA_RaRb, MOVD44, MUL, MAC_RaRb, DIV, ADDR_IN, BR, MMU +(define_bypass 2 + "nds_n9_3r2w_load_multiple_1,nds_n9_3r2w_load_multiple_2, nds_n9_3r2w_load_multiple_3,\ + nds_n9_3r2w_load_multiple_4,nds_n9_3r2w_load_multiple_5, nds_n9_3r2w_load_multiple_6,\ + nds_n9_3r2w_load_multiple_7,nds_n9_3r2w_load_multiple_8, nds_n9_3r2w_load_multiple_12" + "nds_n9_3r2w_alu, nds_n9_3r2w_alu_shift,\ + nds_n9_3r2w_pbsad, nds_n9_3r2w_pbsada,\ + nds_n9_3r2w_mul_fast1, nds_n9_3r2w_mul_fast2, nds_n9_3r2w_mul_slow,\ + nds_n9_3r2w_mac_fast1, nds_n9_3r2w_mac_fast2, nds_n9_3r2w_mac_slow,\ + nds_n9_3r2w_branch,\ + nds_n9_3r2w_div,\ + nds_n9_3r2w_load,nds_n9_3r2w_store,\ + nds_n9_3r2w_load_multiple_1,nds_n9_3r2w_load_multiple_2, nds_n9_3r2w_load_multiple_3,\ + nds_n9_3r2w_load_multiple_4,nds_n9_3r2w_load_multiple_5, nds_n9_3r2w_load_multiple_6,\ + nds_n9_3r2w_load_multiple_7,nds_n9_3r2w_load_multiple_8, nds_n9_3r2w_load_multiple_12,\ + nds_n9_3r2w_store_multiple_1,nds_n9_3r2w_store_multiple_2, nds_n9_3r2w_store_multiple_3,\ + nds_n9_3r2w_store_multiple_4,nds_n9_3r2w_store_multiple_5, nds_n9_3r2w_store_multiple_6,\ + nds_n9_3r2w_store_multiple_7,nds_n9_3r2w_store_multiple_8, nds_n9_3r2w_store_multiple_12,\ + nds_n9_3r2w_mmu" + "nds32_n9_last_load_to_ex_p" +) diff --git a/gcc/config/nds32/nds32-opts.h b/gcc/config/nds32/nds32-opts.h index 642ec28..fc80323 100644 --- a/gcc/config/nds32/nds32-opts.h +++ b/gcc/config/nds32/nds32-opts.h @@ -34,6 +34,13 @@ enum nds32_arch_type ARCH_V3S }; +/* The various ANDES CPU. */ +enum nds32_cpu_type +{ + CPU_N9, + CPU_SIMPLE +}; + /* The code model defines the address generation strategy. */ enum nds32_cmodel_type { @@ -42,10 +49,19 @@ enum nds32_cmodel_type CMODEL_LARGE }; -/* The various ANDES CPU. */ -enum nds32_cpu_type +/* Multiply instruction configuration. */ +enum nds32_mul_type +{ + MUL_TYPE_FAST_1, + MUL_TYPE_FAST_2, + MUL_TYPE_SLOW +}; + +/* Register ports configuration. */ +enum nds32_register_ports { - CPU_N9 + REG_PORT_3R2W, + REG_PORT_2R1W }; /* Which ABI to use. */ diff --git a/gcc/config/nds32/nds32-pipelines-auxiliary.c b/gcc/config/nds32/nds32-pipelines-auxiliary.c index e4b6949..c34b168 100644 --- a/gcc/config/nds32/nds32-pipelines-auxiliary.c +++ b/gcc/config/nds32/nds32-pipelines-auxiliary.c @@ -27,10 +27,598 @@ #include "system.h" #include "coretypes.h" #include "backend.h" +#include "rtl.h" +#include "insn-attr.h" +#include "insn-codes.h" +#include "target.h" + +#include "nds32-protos.h" + +/* ------------------------------------------------------------------------ */ + +namespace nds32 { +namespace scheduling { + +/* Classify the memory access direction. It's unknown if the offset register + is not a constant value. */ +enum memory_access_direction +{ + MEM_ACCESS_DIR_POS, + MEM_ACCESS_DIR_NEG, + MEM_ACCESS_DIR_UNKNOWN +}; + +/* A safe wrapper to the function reg_overlap_mentioned_p (). */ +bool +reg_overlap_p (rtx x, rtx in) +{ + if (x == NULL_RTX || in == NULL_RTX) + return false; + + return static_cast (reg_overlap_mentioned_p (x, in)); +} + + +/* Determine the memory access direction of a load/store insn. */ +memory_access_direction +determine_access_direction (rtx_insn *insn) +{ + int post_update_rtx_index; + rtx plus_rtx; + rtx mem_rtx; + rtx offset_rtx; + + switch (get_attr_type (insn)) + { + case TYPE_LOAD_MULTIPLE: + gcc_assert (parallel_elements (insn) >= 2); + + post_update_rtx_index = find_post_update_rtx (insn); + if (post_update_rtx_index != -1) + plus_rtx = SET_SRC (parallel_element (insn, post_update_rtx_index)); + else + { + /* (parallel + [(set (reg) (mem (reg))) : index 0 + (set (reg) (mem (plus (reg) (...)))) : index 1 + ...]) */ + mem_rtx = SET_SRC (parallel_element (insn, 1)); + if (GET_CODE (mem_rtx) == UNSPEC) + mem_rtx = XVECEXP (mem_rtx, 0, 0); + gcc_assert (MEM_P (mem_rtx)); + plus_rtx = XEXP (mem_rtx, 0); + } + break; + + case TYPE_STORE_MULTIPLE: + gcc_assert (parallel_elements (insn) >= 2); + + post_update_rtx_index = find_post_update_rtx (insn); + if (post_update_rtx_index != -1) + plus_rtx = SET_SRC (parallel_element (insn, post_update_rtx_index)); + else + { + /* (parallel + [(set (mem (reg)) (reg)) : index 0 + (set (mem (plus (reg) (...))) (reg)) : index 1 + ...]) */ + mem_rtx = SET_DEST (parallel_element (insn, 1)); + if (GET_CODE (mem_rtx) == UNSPEC) + mem_rtx = XVECEXP (mem_rtx, 0, 0); + gcc_assert (MEM_P (mem_rtx)); + plus_rtx = XEXP (mem_rtx, 0); + } + break; + + case TYPE_LOAD: + case TYPE_STORE: + mem_rtx = extract_mem_rtx (insn); + + switch (GET_CODE (XEXP (mem_rtx, 0))) + { + case POST_INC: + /* (mem (post_inc (...))) */ + return MEM_ACCESS_DIR_POS; + + case POST_DEC: + /* (mem (post_dec (...))) */ + return MEM_ACCESS_DIR_NEG; + + case PLUS: + /* (mem (plus (reg) (...))) */ + plus_rtx = XEXP (mem_rtx, 0); + break; + + case POST_MODIFY: + /* (mem (post_modify (reg) (plus (reg) (...)))) */ + plus_rtx = XEXP (XEXP (mem_rtx, 0), 1); + break; + + default: + gcc_unreachable (); + } + break; + + default: + gcc_unreachable (); + } + + gcc_assert (GET_CODE (plus_rtx) == PLUS); + + offset_rtx = XEXP (plus_rtx, 1); + if (GET_CODE (offset_rtx) == CONST_INT) + { + if (INTVAL (offset_rtx) < 0) + return MEM_ACCESS_DIR_NEG; + else + return MEM_ACCESS_DIR_POS; + } + + return MEM_ACCESS_DIR_UNKNOWN; +} + +/* Return the nth load/store operation in the real micro-operation + accessing order. */ +rtx +extract_nth_access_rtx (rtx_insn *insn, int n) +{ + int n_elems = parallel_elements (insn); + int post_update_rtx_index = find_post_update_rtx (insn); + memory_access_direction direction = determine_access_direction (insn); + + gcc_assert (direction != MEM_ACCESS_DIR_UNKNOWN); + + /* Reverse the order if the direction negative. */ + if (direction == MEM_ACCESS_DIR_NEG) + n = -1 * n - 1; + + if (post_update_rtx_index != -1) + { + if (n >= 0 && post_update_rtx_index <= n) + ++n; + else if (n < 0 && post_update_rtx_index >= n + n_elems) + --n; + } + + return parallel_element (insn, n); +} + +/* Returns the register operated by the nth load/store operation in the real + micro-operation accessing order. This function assumes INSN must be a + multiple-word load/store insn. */ +rtx +extract_nth_lmsw_access_reg (rtx_insn *insn, int n) +{ + rtx nth_rtx = extract_nth_access_rtx (insn, n); + + if (nth_rtx == NULL_RTX) + return NULL_RTX; + + switch (get_attr_type (insn)) + { + case TYPE_LOAD_MULTIPLE: + return SET_DEST (nth_rtx); + + case TYPE_STORE_MULTIPLE: + return SET_SRC (nth_rtx); + + default: + gcc_unreachable (); + } +} + +/* Returns the register operated by the nth load/store operation in the real + micro-operation accessing order. This function assumes INSN must be a + double-word load/store insn. */ +rtx +extract_nth_ls2_access_reg (rtx_insn *insn, int n) +{ + rtx reg; + machine_mode mode; + + if (post_update_insn_p (insn)) + { + memory_access_direction direction = determine_access_direction (insn); + gcc_assert (direction != MEM_ACCESS_DIR_UNKNOWN); + + /* Reverse the order if the direction negative. */ + if (direction == MEM_ACCESS_DIR_NEG) + n = -1 * n - 1; + } + + /* Handle the out-of-range case. */ + if (n < -2 || n > 1) + return NULL_RTX; + + /* Convert the index to a positive one. */ + if (n < 0) + n = 2 + n; + + switch (get_attr_type (insn)) + { + case TYPE_LOAD: + reg = SET_DEST (PATTERN (insn)); + break; + + case TYPE_STORE: + reg = SET_SRC (PATTERN (insn)); + break; + + default: + gcc_unreachable (); + } + + gcc_assert (REG_P (reg) || GET_CODE (reg) == SUBREG); + + switch (GET_MODE (reg)) + { + case E_DImode: + mode = SImode; + break; + + case E_DFmode: + mode = SFmode; + break; + + default: + gcc_unreachable (); + } + + if (n == 0) + return gen_lowpart (mode, reg); + else + return gen_highpart (mode, reg); +} + +/* Returns the register operated by the nth load/store operation in the real + micro-operation accessing order. */ +rtx +extract_nth_access_reg (rtx_insn *insn, int index) +{ + switch (GET_CODE (PATTERN (insn))) + { + case PARALLEL: + return extract_nth_lmsw_access_reg (insn, index); + + case SET: + return extract_nth_ls2_access_reg (insn, index); + + default: + gcc_unreachable (); + } +} + +/* Determine if the latency is occured when the consumer PBSADA_INSN uses the + value of DEF_REG in its Ra or Rb fields. */ +bool +pbsada_insn_ra_rb_dep_reg_p (rtx pbsada_insn, rtx def_reg) +{ + rtx unspec_rtx = SET_SRC (PATTERN (pbsada_insn)); + gcc_assert (GET_CODE (unspec_rtx) == UNSPEC); + + rtx pbsada_ra = XVECEXP (unspec_rtx, 0, 0); + rtx pbsada_rb = XVECEXP (unspec_rtx, 0, 1); + + if (rtx_equal_p (def_reg, pbsada_ra) + || rtx_equal_p (def_reg, pbsada_rb)) + return true; + + return false; +} + +/* Check if INSN is a movd44 insn consuming DEF_REG. */ +bool +movd44_even_dep_p (rtx_insn *insn, rtx def_reg) +{ + if (!movd44_insn_p (insn)) + return false; + + rtx use_rtx = SET_SRC (PATTERN (insn)); + + if (REG_P (def_reg)) + { + return rtx_equal_p (def_reg, use_rtx); + } + else if (GET_CODE (def_reg) == SUBREG + && GET_MODE (def_reg) == SImode + && rtx_equal_p (SUBREG_REG (def_reg), use_rtx)) + { + if (TARGET_BIG_ENDIAN && SUBREG_BYTE (def_reg) == 4) + return true; + + if (!TARGET_BIG_ENDIAN && SUBREG_BYTE (def_reg) == 0) + return true; + + return false; + } + + return false; +} + +} // namespace scheduling +} // namespace nds32 /* ------------------------------------------------------------------------ */ -/* This file is prepared for future implementation of precise - pipeline description for nds32 target. */ +using namespace nds32; +using namespace nds32::scheduling; + +/* Check the dependency between the producer defining DEF_REG and CONSUMER + requiring input operand at EX. */ +bool +n9_2r1w_consumed_by_ex_dep_p (rtx_insn *consumer, rtx def_reg) +{ + rtx use_rtx; + + switch (get_attr_type (consumer)) + { + case TYPE_ALU: + if (movd44_even_dep_p (consumer, def_reg)) + return true; + + use_rtx = SET_SRC (PATTERN (consumer)); + break; + + case TYPE_PBSAD: + case TYPE_MUL: + use_rtx = SET_SRC (PATTERN (consumer)); + break; + + case TYPE_ALU_SHIFT: + use_rtx = extract_shift_reg (consumer); + break; + + case TYPE_PBSADA: + return pbsada_insn_ra_rb_dep_reg_p (consumer, def_reg); + + case TYPE_MAC: + use_rtx = PATTERN (consumer); + break; + + case TYPE_DIV: + if (INSN_CODE (consumer) == CODE_FOR_divmodsi4 + || INSN_CODE (consumer) == CODE_FOR_udivmodsi4) + use_rtx = SET_SRC (parallel_element (consumer, 0)); + else + use_rtx = SET_SRC (PATTERN (consumer)); + break; + + case TYPE_MMU: + if (GET_CODE (PATTERN (consumer)) == SET) + use_rtx = SET_SRC (PATTERN (consumer)); + else + return true; + break; + + case TYPE_LOAD: + /* ADDR_IN_bi_Ra, ADDR_IN_!bi */ + if (post_update_insn_p (consumer)) + use_rtx = extract_base_reg (consumer); + else + use_rtx = extract_mem_rtx (consumer); + break; + + case TYPE_STORE: + /* ADDR_IN_bi_Ra, ADDR_IN_!bi */ + if (post_update_insn_p (consumer)) + use_rtx = extract_base_reg (consumer); + else + use_rtx = extract_mem_rtx (consumer); + + if (reg_overlap_p (def_reg, use_rtx)) + return true; + + /* exclude ST_!bi_RR */ + if (!post_update_insn_p (consumer) + && !immed_offset_p (extract_mem_rtx (consumer))) + return false; + + use_rtx = SET_SRC (PATTERN (consumer)); + break; + + case TYPE_LOAD_MULTIPLE: + use_rtx = extract_base_reg (consumer); + break; + + case TYPE_STORE_MULTIPLE: + /* ADDR_IN */ + use_rtx = extract_base_reg (consumer); + if (reg_overlap_p (def_reg, use_rtx)) + return true; + + /* SMW (N, 1) */ + use_rtx = extract_nth_access_rtx (consumer, 0); + break; + + case TYPE_BRANCH: + use_rtx = PATTERN (consumer); + break; + + default: + gcc_unreachable (); + } + + if (reg_overlap_p (def_reg, use_rtx)) + return true; + + return false; +} + +/* Check the dependency between the producer defining DEF_REG and CONSUMER + requiring input operand at EX. */ +bool +n9_3r2w_consumed_by_ex_dep_p (rtx_insn *consumer, rtx def_reg) +{ + rtx use_rtx; + + switch (get_attr_type (consumer)) + { + case TYPE_ALU: + case TYPE_PBSAD: + case TYPE_MUL: + use_rtx = SET_SRC (PATTERN (consumer)); + break; + + case TYPE_ALU_SHIFT: + use_rtx = extract_shift_reg (consumer); + break; + + case TYPE_PBSADA: + return pbsada_insn_ra_rb_dep_reg_p (consumer, def_reg); + + case TYPE_MAC: + use_rtx = extract_mac_non_acc_rtx (consumer); + break; + + /* Some special instructions, divmodsi4 and udivmodsi4, produce two + results, the quotient and the remainder. In 2R1W configuration, + it requires two micro-operations in order to write two registers. + We have to check the dependency from the producer to the first + micro-operation. */ + case TYPE_DIV: + if (INSN_CODE (consumer) == CODE_FOR_divmodsi4 + || INSN_CODE (consumer) == CODE_FOR_udivmodsi4) + use_rtx = SET_SRC (parallel_element (consumer, 0)); + else + use_rtx = SET_SRC (PATTERN (consumer)); + break; + + case TYPE_MMU: + if (GET_CODE (PATTERN (consumer)) == SET) + use_rtx = SET_SRC (PATTERN (consumer)); + else + return true; + break; + + case TYPE_LOAD: + case TYPE_STORE: + use_rtx = extract_mem_rtx (consumer); + break; + + case TYPE_LOAD_MULTIPLE: + case TYPE_STORE_MULTIPLE: + use_rtx = extract_base_reg (consumer); + break; + + case TYPE_BRANCH: + use_rtx = PATTERN (consumer); + break; + + default: + gcc_unreachable (); + } + + if (reg_overlap_p (def_reg, use_rtx)) + return true; + + return false; +} + +/* Guard functions for N9 cores. */ + +/* Check dependencies from MM to EX. */ +bool +nds32_n9_2r1w_mm_to_ex_p (rtx_insn *producer, rtx_insn *consumer) +{ + rtx def_reg; + + switch (get_attr_type (producer)) + { + /* LD_!bi */ + case TYPE_LOAD: + if (post_update_insn_p (producer)) + return false; + + def_reg = SET_DEST (PATTERN (producer)); + break; + + case TYPE_MUL: + case TYPE_MAC: + def_reg = SET_DEST (PATTERN (producer)); + break; + + default: + gcc_unreachable (); + } + + return n9_2r1w_consumed_by_ex_dep_p (consumer, def_reg); +} + +/* Check dependencies from MM to EX. */ +bool +nds32_n9_3r2w_mm_to_ex_p (rtx_insn *producer, rtx_insn *consumer) +{ + rtx def_reg; + + switch (get_attr_type (producer)) + { + case TYPE_LOAD: + case TYPE_MUL: + case TYPE_MAC: + def_reg = SET_DEST (PATTERN (producer)); + break; + + /* Some special instructions, divmodsi4 and udivmodsi4, produce two + results, the quotient and the remainder. We have to handle them + individually. */ + case TYPE_DIV: + if (INSN_CODE (producer) == CODE_FOR_divmodsi4 + || INSN_CODE (producer) == CODE_FOR_udivmodsi4) + { + rtx def_reg1 = SET_DEST (parallel_element (producer, 0)); + rtx def_reg2 = SET_DEST (parallel_element (producer, 1)); + + return (n9_3r2w_consumed_by_ex_dep_p (consumer, def_reg1) + || n9_3r2w_consumed_by_ex_dep_p (consumer, def_reg2)); + } + + def_reg = SET_DEST (PATTERN (producer)); + break; + + default: + gcc_unreachable (); + } + + return n9_3r2w_consumed_by_ex_dep_p (consumer, def_reg); +} + +/* Check dependencies from LMW(N, N) to EX. */ +bool +nds32_n9_last_load_to_ex_p (rtx_insn *producer, rtx_insn *consumer) +{ + rtx last_def_reg = extract_nth_access_reg (producer, -1); + + if (nds32_register_ports_config == REG_PORT_2R1W) + { + /* The base-update micro operation occupies the last cycle. */ + if (post_update_insn_p (producer)) + return false; + + /* When the base register is in the list of a load multiple insn and the + access order of the base register is not the last one, we need an + additional micro operation to commit the load result to the base + register -- we can treat the base register as the last defined + register. */ + size_t i; + size_t n_elems = parallel_elements (producer); + rtx base_reg = extract_base_reg (producer); + + for (i = 0; i < n_elems; ++i) + { + rtx load_rtx = extract_nth_access_rtx (producer, i); + rtx list_element = SET_DEST (load_rtx); + + if (rtx_equal_p (base_reg, list_element) && i != n_elems - 1) + { + last_def_reg = base_reg; + break; + } + } + + return n9_2r1w_consumed_by_ex_dep_p (consumer, last_def_reg); + } + else + return n9_3r2w_consumed_by_ex_dep_p (consumer, last_def_reg); +} /* ------------------------------------------------------------------------ */ diff --git a/gcc/config/nds32/nds32-protos.h b/gcc/config/nds32/nds32-protos.h index 2f07700..8debf4b 100644 --- a/gcc/config/nds32/nds32-protos.h +++ b/gcc/config/nds32/nds32-protos.h @@ -97,6 +97,13 @@ extern void nds32_expand_unaligned_store (rtx *, enum machine_mode); extern bool nds32_valid_multiple_load_store_p (rtx, bool, bool); +/* Auxiliary functions for guard function checking in pipelines.md. */ + +extern bool nds32_n9_2r1w_mm_to_ex_p (rtx_insn *, rtx_insn *); +extern bool nds32_n9_3r2w_mm_to_ex_p (rtx_insn *, rtx_insn *); +extern bool nds32_n9_last_load_to_ex_p (rtx_insn *, rtx_insn *); + + /* Auxiliary functions for stack operation predicate checking. */ extern bool nds32_valid_stack_push_pop_p (rtx, bool); @@ -225,6 +232,29 @@ extern void nds32_cpu_cpp_builtins(struct cpp_reader *); extern bool nds32_split_double_word_load_store_p (rtx *,bool); +namespace nds32 { + +extern rtx extract_pattern_from_insn (rtx); + +size_t parallel_elements (rtx); +rtx parallel_element (rtx, int); +bool load_single_p (rtx_insn *); +bool store_single_p (rtx_insn *); +bool load_double_p (rtx_insn *); +bool store_double_p (rtx_insn *); +bool post_update_insn_p (rtx_insn *); +bool immed_offset_p (rtx); +int find_post_update_rtx (rtx_insn *); +rtx extract_mem_rtx (rtx_insn *); +rtx extract_base_reg (rtx_insn *); + +rtx extract_shift_reg (rtx); + +bool movd44_insn_p (rtx_insn *); + +rtx extract_mac_non_acc_rtx (rtx_insn *); +} // namespace nds32 + /* Functions for create nds32 specific optimization pass. */ extern rtl_opt_pass *make_pass_nds32_relax_opt (gcc::context *); diff --git a/gcc/config/nds32/nds32-utils.c b/gcc/config/nds32/nds32-utils.c new file mode 100644 index 0000000..55d4a91 --- /dev/null +++ b/gcc/config/nds32/nds32-utils.c @@ -0,0 +1,399 @@ +/* Auxiliary functions for pipeline descriptions pattern of Andes + NDS32 cpu for GNU compiler + Copyright (C) 2012-2018 Free Software Foundation, Inc. + Contributed by Andes Technology Corporation. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* ------------------------------------------------------------------------ */ + +#define IN_TARGET_CODE 1 + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "backend.h" +#include "target.h" +#include "rtl.h" +#include "tree.h" +#include "memmodel.h" +#include "tm_p.h" +#include "optabs.h" /* For GEN_FCN. */ +#include "recog.h" +#include "tm-constrs.h" +#include "insn-attr.h" + + +namespace nds32 { + +/* Get the rtx in the PATTERN field of an insn. If INSN is not an insn, + the funciton doesn't change anything and returns it directly. */ +rtx +extract_pattern_from_insn (rtx insn) +{ + if (INSN_P (insn)) + return PATTERN (insn); + + return insn; +} + +/* Get the number of elements in a parallel rtx. */ +size_t +parallel_elements (rtx parallel_rtx) +{ + parallel_rtx = extract_pattern_from_insn (parallel_rtx); + gcc_assert (GET_CODE (parallel_rtx) == PARALLEL); + + return XVECLEN (parallel_rtx, 0); +} + +/* Extract an rtx from a parallel rtx with index NTH. If NTH is a negative + value, the function returns the last NTH rtx. */ +rtx +parallel_element (rtx parallel_rtx, int nth) +{ + parallel_rtx = extract_pattern_from_insn (parallel_rtx); + gcc_assert (GET_CODE (parallel_rtx) == PARALLEL); + + int len = parallel_elements (parallel_rtx); + + if (nth >= 0) + { + if (nth >= len) + return NULL_RTX; + + return XVECEXP (parallel_rtx, 0, nth); + } + else + { + if (len + nth < 0) + return NULL_RTX; + + return XVECEXP (parallel_rtx, 0, len + nth); + } +} + +/* Functions to determine whether INSN is single-word, double-word + or partial-word load/store insn. */ + +bool +load_single_p (rtx_insn *insn) +{ + if (get_attr_type (insn) != TYPE_LOAD) + return false; + + if (INSN_CODE (insn) == CODE_FOR_move_di || + INSN_CODE (insn) == CODE_FOR_move_df) + return false; + + return true; +} + +bool +store_single_p (rtx_insn *insn) +{ + if (get_attr_type (insn) != TYPE_STORE) + return false; + + if (INSN_CODE (insn) == CODE_FOR_move_di || + INSN_CODE (insn) == CODE_FOR_move_df) + return false; + + return true; +} + +bool +load_double_p (rtx_insn *insn) +{ + if (get_attr_type (insn) != TYPE_LOAD) + return false; + + if (INSN_CODE (insn) != CODE_FOR_move_di && + INSN_CODE (insn) != CODE_FOR_move_df) + return false; + + return true; +} + +bool +store_double_p (rtx_insn *insn) +{ + if (get_attr_type (insn) != TYPE_STORE) + return false; + + if (INSN_CODE (insn) != CODE_FOR_move_di && + INSN_CODE (insn) != CODE_FOR_move_df) + return false; + + return true; +} + +/* Determine if INSN is a post update insn. */ +bool +post_update_insn_p (rtx_insn *insn) +{ + if (find_post_update_rtx (insn) == -1) + return false; + else + return true; +} + +/* Check if the address of MEM_RTX consists of a base register and an + immediate offset. */ +bool +immed_offset_p (rtx mem_rtx) +{ + gcc_assert (MEM_P (mem_rtx)); + + rtx addr_rtx = XEXP (mem_rtx, 0); + + /* (mem (reg)) is equivalent to (mem (plus (reg) (const_int 0))) */ + if (REG_P (addr_rtx)) + return true; + + /* (mem (plus (reg) (const_int))) */ + if (GET_CODE (addr_rtx) == PLUS + && GET_CODE (XEXP (addr_rtx, 1)) == CONST_INT) + return true; + + return false; +} + +/* Find the post update rtx in INSN. If INSN is a load/store multiple insn, + the function returns the vector index of its parallel part. If INSN is a + single load/store insn, the function returns 0. If INSN is not a post- + update insn, the function returns -1. */ +int +find_post_update_rtx (rtx_insn *insn) +{ + rtx mem_rtx; + int i, len; + + switch (get_attr_type (insn)) + { + case TYPE_LOAD_MULTIPLE: + case TYPE_STORE_MULTIPLE: + /* Find a pattern in a parallel rtx: + (set (reg) (plus (reg) (const_int))) */ + len = parallel_elements (insn); + for (i = 0; i < len; ++i) + { + rtx curr_insn = parallel_element (insn, i); + + if (GET_CODE (curr_insn) == SET + && REG_P (SET_DEST (curr_insn)) + && GET_CODE (SET_SRC (curr_insn)) == PLUS) + return i; + } + return -1; + + case TYPE_LOAD: + case TYPE_FLOAD: + case TYPE_STORE: + case TYPE_FSTORE: + mem_rtx = extract_mem_rtx (insn); + /* (mem (post_inc (reg))) */ + switch (GET_CODE (XEXP (mem_rtx, 0))) + { + case POST_INC: + case POST_DEC: + case POST_MODIFY: + return 0; + + default: + return -1; + } + + default: + gcc_unreachable (); + } +} + +/* Extract the MEM rtx from a load/store insn. */ +rtx +extract_mem_rtx (rtx_insn *insn) +{ + rtx body = PATTERN (insn); + + switch (get_attr_type (insn)) + { + case TYPE_LOAD: + case TYPE_FLOAD: + if (MEM_P (SET_SRC (body))) + return SET_SRC (body); + + /* unaligned address: (unspec [(mem)]) */ + if (GET_CODE (SET_SRC (body)) == UNSPEC) + { + gcc_assert (MEM_P (XVECEXP (SET_SRC (body), 0, 0))); + return XVECEXP (SET_SRC (body), 0, 0); + } + + /* (sign_extend (mem)) */ + gcc_assert (MEM_P (XEXP (SET_SRC (body), 0))); + return XEXP (SET_SRC (body), 0); + + case TYPE_STORE: + case TYPE_FSTORE: + if (MEM_P (SET_DEST (body))) + return SET_DEST (body); + + /* unaligned address: (unspec [(mem)]) */ + if (GET_CODE (SET_DEST (body)) == UNSPEC) + { + gcc_assert (MEM_P (XVECEXP (SET_DEST (body), 0, 0))); + return XVECEXP (SET_DEST (body), 0, 0); + } + + /* (sign_extend (mem)) */ + gcc_assert (MEM_P (XEXP (SET_DEST (body), 0))); + return XEXP (SET_DEST (body), 0); + + default: + gcc_unreachable (); + } +} + +/* Extract the base register from load/store insns. The function returns + NULL_RTX if the address is not consist of any registers. */ +rtx +extract_base_reg (rtx_insn *insn) +{ + int post_update_rtx_index; + rtx mem_rtx; + rtx plus_rtx; + + /* Find the MEM rtx. If we can find an insn updating the base register, + the base register will be returned directly. */ + switch (get_attr_type (insn)) + { + case TYPE_LOAD_MULTIPLE: + post_update_rtx_index = find_post_update_rtx (insn); + + if (post_update_rtx_index != -1) + return SET_DEST (parallel_element (insn, post_update_rtx_index)); + + mem_rtx = SET_SRC (parallel_element (insn, 0)); + break; + + case TYPE_STORE_MULTIPLE: + post_update_rtx_index = find_post_update_rtx (insn); + + if (post_update_rtx_index != -1) + return SET_DEST (parallel_element (insn, post_update_rtx_index)); + + mem_rtx = SET_DEST (parallel_element (insn, 0)); + break; + + case TYPE_LOAD: + case TYPE_FLOAD: + case TYPE_STORE: + case TYPE_FSTORE: + mem_rtx = extract_mem_rtx (insn); + break; + + default: + gcc_unreachable (); + } + + gcc_assert (MEM_P (mem_rtx)); + + /* (mem (reg)) */ + if (REG_P (XEXP (mem_rtx, 0))) + return XEXP (mem_rtx, 0); + + plus_rtx = XEXP (mem_rtx, 0); + + if (GET_CODE (plus_rtx) == SYMBOL_REF + || GET_CODE (plus_rtx) == CONST) + return NULL_RTX; + + gcc_assert (GET_CODE (plus_rtx) == PLUS + || GET_CODE (plus_rtx) == POST_INC + || GET_CODE (plus_rtx) == POST_DEC + || GET_CODE (plus_rtx) == POST_MODIFY); + gcc_assert (REG_P (XEXP (plus_rtx, 0))); + /* (mem (plus (reg) (const_int))) or + (mem (post_inc (reg))) or + (mem (post_dec (reg))) or + (mem (post_modify (reg) (plus (reg) (reg)))) */ + return XEXP (plus_rtx, 0); +} + +/* Extract the register of the shift operand from an ALU_SHIFT rtx. */ +rtx +extract_shift_reg (rtx alu_shift_rtx) +{ + alu_shift_rtx = extract_pattern_from_insn (alu_shift_rtx); + + rtx alu_rtx = SET_SRC (alu_shift_rtx); + rtx shift_rtx; + + /* Various forms of ALU_SHIFT can be made by the combiner. + See the difference between add_slli and sub_slli in nds32.md. */ + if (REG_P (XEXP (alu_rtx, 0))) + shift_rtx = XEXP (alu_rtx, 1); + else + shift_rtx = XEXP (alu_rtx, 0); + + return XEXP (shift_rtx, 0); +} + +/* Check if INSN is a movd44 insn. */ +bool +movd44_insn_p (rtx_insn *insn) +{ + if (get_attr_type (insn) == TYPE_ALU + && (INSN_CODE (insn) == CODE_FOR_move_di + || INSN_CODE (insn) == CODE_FOR_move_df)) + { + rtx body = PATTERN (insn); + gcc_assert (GET_CODE (body) == SET); + + rtx src = SET_SRC (body); + rtx dest = SET_DEST (body); + + if ((REG_P (src) || GET_CODE (src) == SUBREG) + && (REG_P (dest) || GET_CODE (dest) == SUBREG)) + return true; + + return false; + } + + return false; +} + +/* Extract the rtx representing non-accumulation operands of a MAC insn. */ +rtx +extract_mac_non_acc_rtx (rtx_insn *insn) +{ + rtx exp = SET_SRC (PATTERN (insn)); + + switch (get_attr_type (insn)) + { + case TYPE_MAC: + if (REG_P (XEXP (exp, 0))) + return XEXP (exp, 1); + else + return XEXP (exp, 0); + + default: + gcc_unreachable (); + } +} + +} // namespace nds32 diff --git a/gcc/config/nds32/nds32.h b/gcc/config/nds32/nds32.h index dc82c0a..03deda9 100644 --- a/gcc/config/nds32/nds32.h +++ b/gcc/config/nds32/nds32.h @@ -502,6 +502,11 @@ enum nds32_builtins || nds32_arch_option == ARCH_V3S) #define TARGET_ISA_V3M (nds32_arch_option == ARCH_V3M) +#define TARGET_PIPELINE_N9 \ + (nds32_cpu_option == CPU_N9) +#define TARGET_PIPELINE_SIMPLE \ + (nds32_cpu_option == CPU_SIMPLE) + #define TARGET_CMODEL_SMALL \ (nds32_cmodel_option == CMODEL_SMALL) #define TARGET_CMODEL_MEDIUM \ @@ -515,6 +520,8 @@ enum nds32_builtins (nds32_cmodel_option == CMODEL_SMALL\ || nds32_cmodel_option == CMODEL_MEDIUM) +#define TARGET_MUL_SLOW \ + (nds32_mul_config == MUL_TYPE_SLOW) /* Run-time Target Specification. */ #define TARGET_SOFT_FLOAT (nds32_abi == NDS32_ABI_V2) @@ -553,6 +560,7 @@ enum nds32_builtins " %{!mno-ext-fpu-dp:%{!mext-fpu-dp:-mext-fpu-dp}}}" \ " %{march=v3s:%{!mfloat-abi=*:-mfloat-abi=hard}" \ " %{!mno-ext-fpu-sp:%{!mext-fpu-sp:-mext-fpu-sp}}}" }, \ + {"cpu", "%{!mcpu=*:-mcpu=%(VALUE)}" }, \ {"float", "%{!mfloat-abi=*:-mfloat-abi=%(VALUE)}" } #define CC1_SPEC \ diff --git a/gcc/config/nds32/nds32.md b/gcc/config/nds32/nds32.md index dd25e06..dabb97f 100644 --- a/gcc/config/nds32/nds32.md +++ b/gcc/config/nds32/nds32.md @@ -53,6 +53,15 @@ (include "nds32-peephole2.md") +;; ------------------------------------------------------------------------ + +;; CPU pipeline model. +(define_attr "pipeline_model" "n9,simple" + (const + (cond [(match_test "nds32_cpu_option == CPU_N9") (const_string "n9") + (match_test "nds32_cpu_option == CPU_SIMPLE") (const_string "simple")] + (const_string "n9")))) + ;; Insn type, it is used to default other attribute values. (define_attr "type" "unknown,load,store,load_multiple,store_multiple,alu,alu_shift,pbsad,pbsada,mul,mac,div,branch,mmu,misc,\ diff --git a/gcc/config/nds32/nds32.opt b/gcc/config/nds32/nds32.opt index ec6b9d4..304100f 100644 --- a/gcc/config/nds32/nds32.opt +++ b/gcc/config/nds32/nds32.opt @@ -177,6 +177,21 @@ Known cpu types (for use with the -mcpu= option): EnumValue Enum(nds32_cpu_type) String(n9) Value(CPU_N9) +EnumValue +Enum(nds32_cpu_type) String(n903) Value(CPU_N9) + +EnumValue +Enum(nds32_cpu_type) String(n903a) Value(CPU_N9) + +EnumValue +Enum(nds32_cpu_type) String(n968) Value(CPU_N9) + +EnumValue +Enum(nds32_cpu_type) String(n968a) Value(CPU_N9) + +EnumValue +Enum(nds32_cpu_type) String(simple) Value(CPU_SIMPLE) + mconfig-fpu= Target RejectNegative Joined Enum(float_reg_number) Var(nds32_fp_regnum) Init(TARGET_CONFIG_FPU_DEFAULT) Specify a fpu configuration value from 0 to 7; 0-3 is as FPU spec says, and 4-7 is corresponding to 0-3. @@ -209,6 +224,38 @@ Enum(float_reg_number) String(6) Value(NDS32_CONFIG_FPU_6) EnumValue Enum(float_reg_number) String(7) Value(NDS32_CONFIG_FPU_7) +mconfig-mul= +Target RejectNegative Joined Enum(nds32_mul_type) Var(nds32_mul_config) Init(MUL_TYPE_FAST_1) +Specify configuration of instruction mul: fast1, fast2 or slow. The default is fast1. + +Enum +Name(nds32_mul_type) Type(enum nds32_mul_type) + +EnumValue +Enum(nds32_mul_type) String(fast) Value(MUL_TYPE_FAST_1) + +EnumValue +Enum(nds32_mul_type) String(fast1) Value(MUL_TYPE_FAST_1) + +EnumValue +Enum(nds32_mul_type) String(fast2) Value(MUL_TYPE_FAST_2) + +EnumValue +Enum(nds32_mul_type) String(slow) Value(MUL_TYPE_SLOW) + +mconfig-register-ports= +Target RejectNegative Joined Enum(nds32_register_ports) Var(nds32_register_ports_config) Init(REG_PORT_3R2W) +Specify how many read/write ports for n9/n10 cores. The value should be 3r2w or 2r1w. + +Enum +Name(nds32_register_ports) Type(enum nds32_register_ports) + +EnumValue +Enum(nds32_register_ports) String(3r2w) Value(REG_PORT_3R2W) + +EnumValue +Enum(nds32_register_ports) String(2r1w) Value(REG_PORT_2R1W) + mctor-dtor Target Report Enable constructor/destructor feature. diff --git a/gcc/config/nds32/pipelines.md b/gcc/config/nds32/pipelines.md index e60b4b7..433391e 100644 --- a/gcc/config/nds32/pipelines.md +++ b/gcc/config/nds32/pipelines.md @@ -18,12 +18,24 @@ ;; along with GCC; see the file COPYING3. If not see ;; . -(define_automaton "nds32_machine") -(define_cpu_unit "general_unit" "nds32_machine") +;; ------------------------------------------------------------------------ +;; Include N9/N10 pipeline settings. +;; ------------------------------------------------------------------------ +(include "nds32-n9-3r2w.md") +(include "nds32-n9-2r1w.md") + + +;; ------------------------------------------------------------------------ +;; Define simple pipeline settings. +;; ------------------------------------------------------------------------ + +(define_automaton "nds32_simple_machine") + +(define_cpu_unit "simple_unit" "nds32_simple_machine") (define_insn_reservation "simple_insn" 1 - (eq_attr "type" "unknown,load,store,load_multiple,store_multiple,alu,alu_shift,mul,mac,div,branch,mmu,misc") - "general_unit") + (eq_attr "pipeline_model" "simple") + "simple_unit") ;; ------------------------------------------------------------------------ diff --git a/gcc/config/nds32/t-nds32 b/gcc/config/nds32/t-nds32 index 0f1c385..89092a5 100644 --- a/gcc/config/nds32/t-nds32 +++ b/gcc/config/nds32/t-nds32 @@ -131,3 +131,16 @@ nds32-relax-opt.o: \ intl.h libfuncs.h $(PARAMS_H) $(OPTS_H) $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ $(srcdir)/config/nds32/nds32-relax-opt.c + +nds32-utils.o: \ + $(srcdir)/config/nds32/nds32-utils.c \ + $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ + $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \ + insn-config.h conditions.h output.h dumpfile.h \ + $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \ + $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \ + $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \ + $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \ + intl.h libfuncs.h $(PARAMS_H) $(OPTS_H) + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ + $(srcdir)/config/nds32/nds32-utils.c