From 8fd5214127adf2ee03865aa89f671cadded3fb9c Mon Sep 17 00:00:00 2001 From: Kito Cheng Date: Sun, 8 Apr 2018 08:31:52 +0000 Subject: [PATCH] [NDS32] Implement n8 pipeline. gcc/ * config.gcc (nds32*-*-*): Check that n6/n8/s8 are valid to --with-cpu. * config/nds32/nds32-n8.md: New file. * config/nds32/nds32-opts.h (nds32_cpu_type): Add CPU_N6 and CPU_N8. * config/nds32/nds32-pipelines-auxiliary.c: Implementation for n8 pipeline. * config/nds32/nds32-protos.h: More declarations for n8 pipeline. * config/nds32/nds32-utils.c: More implementations for n8 pipeline. * config/nds32/nds32.md (pipeline_model): Add n8. * config/nds32/nds32.opt (mcpu): Support n8 pipeline cpus. * config/nds32/pipelines.md: Include n8 settings. Co-Authored-By: Chung-Ju Wu From-SVN: r259219 --- gcc/ChangeLog | 14 + gcc/config.gcc | 4 +- gcc/config/nds32/nds32-n8.md | 389 +++++++++++++++++++++++++++ gcc/config/nds32/nds32-opts.h | 2 + gcc/config/nds32/nds32-pipelines-auxiliary.c | 248 +++++++++++++++++ gcc/config/nds32/nds32-protos.h | 12 + gcc/config/nds32/nds32-utils.c | 112 ++++++++ gcc/config/nds32/nds32.md | 5 +- gcc/config/nds32/nds32.opt | 24 ++ gcc/config/nds32/pipelines.md | 4 + 10 files changed, 810 insertions(+), 4 deletions(-) create mode 100644 gcc/config/nds32/nds32-n8.md diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 96a8584..c5fe967 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,6 +1,20 @@ 2018-04-08 Kito Cheng Chung-Ju Wu + * config.gcc (nds32*-*-*): Check that n6/n8/s8 are valid to --with-cpu. + * config/nds32/nds32-n8.md: New file. + * config/nds32/nds32-opts.h (nds32_cpu_type): Add CPU_N6 and CPU_N8. + * config/nds32/nds32-pipelines-auxiliary.c: Implementation for n8 + pipeline. + * config/nds32/nds32-protos.h: More declarations for n8 pipeline. + * config/nds32/nds32-utils.c: More implementations for n8 pipeline. + * config/nds32/nds32.md (pipeline_model): Add n8. + * config/nds32/nds32.opt (mcpu): Support n8 pipeline cpus. + * config/nds32/pipelines.md: Include n8 settings. + +2018-04-08 Kito Cheng + Chung-Ju Wu + * config.gcc (nds32*): Add nds32-utils.o into extra_objs. * config/nds32/nds32-n9-2r1w.md: New file. * config/nds32/nds32-n9-3r2w.md: New file. diff --git a/gcc/config.gcc b/gcc/config.gcc index fa15a8c..4df7201 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -4315,11 +4315,11 @@ case "${target}" in "") with_cpu=n9 ;; - n9) + n6 | n8 | s8 | n9) # OK ;; *) - echo "Cannot accept --with-cpu=$with_cpu, available values are: n9" 1>&2 + echo "Cannot accept --with-cpu=$with_cpu, available values are: n6 n8 s8 n9" 1>&2 exit 1 ;; esac diff --git a/gcc/config/nds32/nds32-n8.md b/gcc/config/nds32/nds32-n8.md new file mode 100644 index 0000000..d6242f8 --- /dev/null +++ b/gcc/config/nds32/nds32-n8.md @@ -0,0 +1,389 @@ +;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler +;; Copyright (C) 2012-2018 Free Software Foundation, Inc. +;; Contributed by Andes Technology Corporation. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + + +;; ------------------------------------------------------------------------ +;; Define N8 pipeline settings. +;; ------------------------------------------------------------------------ + +(define_automaton "nds32_n8_machine") + +;; ------------------------------------------------------------------------ +;; Pipeline Stages +;; ------------------------------------------------------------------------ +;; IF - Instruction Fetch +;; II - Instruction Issue / Address Generation +;; EX - Instruction Execution +;; EXD - Psuedo Stage / Load Data Completion + +(define_cpu_unit "n8_ii" "nds32_n8_machine") +(define_cpu_unit "n8_ex" "nds32_n8_machine") + +(define_insn_reservation "nds_n8_unknown" 1 + (and (eq_attr "type" "unknown") + (eq_attr "pipeline_model" "n8")) + "n8_ii, n8_ex") + +(define_insn_reservation "nds_n8_misc" 1 + (and (eq_attr "type" "misc") + (eq_attr "pipeline_model" "n8")) + "n8_ii, n8_ex") + +(define_insn_reservation "nds_n8_alu" 1 + (and (eq_attr "type" "alu") + (eq_attr "pipeline_model" "n8")) + "n8_ii, n8_ex") + +(define_insn_reservation "nds_n8_load" 1 + (and (match_test "nds32::load_single_p (insn)") + (eq_attr "pipeline_model" "n8")) + "n8_ii, n8_ex") + +(define_insn_reservation "nds_n8_store" 1 + (and (match_test "nds32::store_single_p (insn)") + (eq_attr "pipeline_model" "n8")) + "n8_ii, n8_ex") + +(define_insn_reservation "nds_n8_load_multiple_1" 1 + (and (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "1")) + (eq_attr "pipeline_model" "n8")) + "n8_ii, n8_ex") + +(define_insn_reservation "nds_n8_load_multiple_2" 1 + (and (ior (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "2")) + (match_test "nds32::load_double_p (insn)")) + (eq_attr "pipeline_model" "n8")) + "n8_ii, n8_ii+n8_ex, n8_ex") + +(define_insn_reservation "nds_n8_load_multiple_3" 1 + (and (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "3")) + (eq_attr "pipeline_model" "n8")) + "n8_ii, (n8_ii+n8_ex)*2, n8_ex") + +(define_insn_reservation "nds_n8_load_multiple_4" 1 + (and (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "4")) + (eq_attr "pipeline_model" "n8")) + "n8_ii, (n8_ii+n8_ex)*3, n8_ex") + +(define_insn_reservation "nds_n8_load_multiple_5" 1 + (and (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "5")) + (eq_attr "pipeline_model" "n8")) + "n8_ii, (n8_ii+n8_ex)*4, n8_ex") + +(define_insn_reservation "nds_n8_load_multiple_6" 1 + (and (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "6")) + (eq_attr "pipeline_model" "n8")) + "n8_ii, (n8_ii+n8_ex)*5, n8_ex") + +(define_insn_reservation "nds_n8_load_multiple_7" 1 + (and (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "7")) + (eq_attr "pipeline_model" "n8")) + "n8_ii, (n8_ii+n8_ex)*6, n8_ex") + +(define_insn_reservation "nds_n8_load_multiple_8" 1 + (and (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "8")) + (eq_attr "pipeline_model" "n8")) + "n8_ii, (n8_ii+n8_ex)*7, n8_ex") + +(define_insn_reservation "nds_n8_load_multiple_12" 1 + (and (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "12")) + (eq_attr "pipeline_model" "n8")) + "n8_ii, (n8_ii+n8_ex)*11, n8_ex") + +(define_insn_reservation "nds_n8_store_multiple_1" 1 + (and (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "1")) + (eq_attr "pipeline_model" "n8")) + "n8_ii, n8_ex") + +(define_insn_reservation "nds_n8_store_multiple_2" 1 + (and (ior (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "2")) + (match_test "nds32::store_double_p (insn)")) + (eq_attr "pipeline_model" "n8")) + "n8_ii, n8_ii+n8_ex, n8_ex") + +(define_insn_reservation "nds_n8_store_multiple_3" 1 + (and (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "3")) + (eq_attr "pipeline_model" "n8")) + "n8_ii, (n8_ii+n8_ex)*2, n8_ex") + +(define_insn_reservation "nds_n8_store_multiple_4" 1 + (and (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "4")) + (eq_attr "pipeline_model" "n8")) + "n8_ii, (n8_ii+n8_ex)*3, n8_ex") + +(define_insn_reservation "nds_n8_store_multiple_5" 1 + (and (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "5")) + (eq_attr "pipeline_model" "n8")) + "n8_ii, (n8_ii+n8_ex)*4, n8_ex") + +(define_insn_reservation "nds_n8_store_multiple_6" 1 + (and (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "6")) + (eq_attr "pipeline_model" "n8")) + "n8_ii, (n8_ii+n8_ex)*5, n8_ex") + +(define_insn_reservation "nds_n8_store_multiple_7" 1 + (and (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "7")) + (eq_attr "pipeline_model" "n8")) + "n8_ii, (n8_ii+n8_ex)*6, n8_ex") + +(define_insn_reservation "nds_n8_store_multiple_8" 1 + (and (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "8")) + (eq_attr "pipeline_model" "n8")) + "n8_ii, (n8_ii+n8_ex)*7, n8_ex") + +(define_insn_reservation "nds_n8_store_multiple_12" 1 + (and (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "12")) + (eq_attr "pipeline_model" "n8")) + "n8_ii, (n8_ii+n8_ex)*11, n8_ex") + +(define_insn_reservation "nds_n8_mul_fast" 1 + (and (match_test "nds32_mul_config != MUL_TYPE_SLOW") + (and (eq_attr "type" "mul") + (eq_attr "pipeline_model" "n8"))) + "n8_ii, n8_ex") + +(define_insn_reservation "nds_n8_mul_slow" 1 + (and (match_test "nds32_mul_config == MUL_TYPE_SLOW") + (and (eq_attr "type" "mul") + (eq_attr "pipeline_model" "n8"))) + "n8_ii, n8_ex*16") + +(define_insn_reservation "nds_n8_mac_fast" 1 + (and (match_test "nds32_mul_config != MUL_TYPE_SLOW") + (and (eq_attr "type" "mac") + (eq_attr "pipeline_model" "n8"))) + "n8_ii, n8_ii+n8_ex, n8_ex") + +(define_insn_reservation "nds_n8_mac_slow" 1 + (and (match_test "nds32_mul_config == MUL_TYPE_SLOW") + (and (eq_attr "type" "mac") + (eq_attr "pipeline_model" "n8"))) + "n8_ii, (n8_ii+n8_ex)*16, n8_ex") + +(define_insn_reservation "nds_n8_div" 1 + (and (eq_attr "type" "div") + (eq_attr "pipeline_model" "n8")) + "n8_ii, (n8_ii+n8_ex)*36, n8_ex") + +(define_insn_reservation "nds_n8_branch" 1 + (and (eq_attr "type" "branch") + (eq_attr "pipeline_model" "n8")) + "n8_ii, n8_ex") + +;; ------------------------------------------------------------------------ +;; Comment Notations and Bypass Rules +;; ------------------------------------------------------------------------ +;; Producers (LHS) +;; LD_!bi +;; Load data from the memory (without updating the base register) and +;; produce the loaded data. The result is ready at EXD. +;; LD_bi +;; Load data from the memory (with updating the base register) and +;; produce the loaded data. The result is ready at EXD. Because the +;; register port is 2R1W, two micro-operations are required in order +;; to write two registers. The base register is updated by the second +;; micro-operation and the result is ready at EX. +;; LMW(N, M) +;; There are N micro-operations within an instruction that loads multiple +;; words. The result produced by the M-th micro-operation is sent to +;; consumers. The result is ready at EXD. If the base register should be +;; updated, an extra micro-operation is inserted to the sequence, and the +;; result is ready at EX. +;; ADDR_OUT +;; Most load/store instructions can produce an address output if updating +;; the base register is required. The result is ready at EX, which is +;; produced by ALU. +;; ALU, MUL, MAC +;; The result is ready at EX. +;; MOVD44_O +;; A double-word move instruction needs to write registers twice. Because +;; the register port is 2R1W, two micro-operations are required. The even +;; number reigster is updated by the first one, and the odd number register +;; is updated by the second one. Each of the results is ready at EX. +;; The letter 'O' stands for odd. +;; DIV_Rs +;; A division instruction saves the quotient result to Rt and saves the +;; remainder result to Rs. It requires two micro-operations because the +;; register port is 2R1W. The first micro-operation writes to Rt, and +;; the seconde one writes to Rs. Each of the results is ready at EX. +;; +;; Consumers (RHS) +;; ALU, MUL, DIV +;; Require operands at EX. +;; MOVD44_E +;; The letter 'E' stands for even, which is accessed by the first micro- +;; operation and a movd44 instruction. The operand is required at EX. +;; MAC_RaRb +;; A MAC instruction is separated into two micro-operations. The first +;; micro-operation does the multiplication, which requires operands Ra +;; and Rb at EX. The second micro-options does the accumulation, which +;; requires the operand Rt at EX. +;; ADDR_IN_MOP(N) +;; Because the reigster port is 2R1W, some load/store instructions are +;; separated into many micro-operations. N denotes the address input is +;; required by the N-th micro-operation. Such operand is required at II. +;; ST_bi +;; A post-increment store instruction requires its data at EX. +;; ST_!bi_RI +;; A store instruction with an immediate offset requires its data at EX. +;; If the offset field is a register (ST_!bi_RR), the instruction will be +;; separated into two micro-operations, and the second one requires the +;; input operand at EX in order to store it to the memory. +;; SMW(N, M) +;; There are N micro-operations within an instruction that stores multiple +;; words. Each M-th micro-operation requires its data at EX. If the base +;; register should be updated, an extra micro-operation is inserted to the +;; sequence. +;; BR_COND +;; If a branch instruction is conditional, its input data is required at EX. + +;; LD_!bi -> ADDR_IN_MOP(1) +(define_bypass 3 + "nds_n8_load" + "nds_n8_branch,\ + nds_n8_load, nds_n8_store,\ + nds_n8_load_multiple_1,nds_n8_load_multiple_2, nds_n8_load_multiple_3,\ + nds_n8_load_multiple_4,nds_n8_load_multiple_5, nds_n8_load_multiple_6,\ + nds_n8_load_multiple_7,nds_n8_load_multiple_8, nds_n8_load_multiple_12,\ + nds_n8_store_multiple_1,nds_n8_store_multiple_2, nds_n8_store_multiple_3,\ + nds_n8_store_multiple_4,nds_n8_store_multiple_5, nds_n8_store_multiple_6,\ + nds_n8_store_multiple_7,nds_n8_store_multiple_8, nds_n8_store_multiple_12" + "nds32_n8_load_to_ii_p" +) + +;; LMW(N, N) -> ADDR_IN_MOP(1) +(define_bypass 3 + "nds_n8_load_multiple_1,nds_n8_load_multiple_2, nds_n8_load_multiple_3,\ + nds_n8_load_multiple_4,nds_n8_load_multiple_5, nds_n8_load_multiple_6,\ + nds_n8_load_multiple_7,nds_n8_load_multiple_8, nds_n8_load_multiple_12" + "nds_n8_branch,\ + nds_n8_load, nds_n8_store,\ + nds_n8_load_multiple_1,nds_n8_load_multiple_2, nds_n8_load_multiple_3,\ + nds_n8_load_multiple_4,nds_n8_load_multiple_5, nds_n8_load_multiple_6,\ + nds_n8_load_multiple_7,nds_n8_load_multiple_8, nds_n8_load_multiple_12,\ + nds_n8_store_multiple_1,nds_n8_store_multiple_2, nds_n8_store_multiple_3,\ + nds_n8_store_multiple_4,nds_n8_store_multiple_5, nds_n8_store_multiple_6,\ + nds_n8_store_multiple_7,nds_n8_store_multiple_8, nds_n8_store_multiple_12" + "nds32_n8_last_load_to_ii_p" +) + +;; LMW(N, N - 1) -> ADDR_IN_MOP(1) +(define_bypass 2 + "nds_n8_load_multiple_1,nds_n8_load_multiple_2, nds_n8_load_multiple_3,\ + nds_n8_load_multiple_4,nds_n8_load_multiple_5, nds_n8_load_multiple_6,\ + nds_n8_load_multiple_7,nds_n8_load_multiple_8, nds_n8_load_multiple_12" + "nds_n8_branch,\ + nds_n8_load, nds_n8_store,\ + nds_n8_load_multiple_1,nds_n8_load_multiple_2, nds_n8_load_multiple_3,\ + nds_n8_load_multiple_4,nds_n8_load_multiple_5, nds_n8_load_multiple_6,\ + nds_n8_load_multiple_7,nds_n8_load_multiple_8, nds_n8_load_multiple_12,\ + nds_n8_store_multiple_1,nds_n8_store_multiple_2, nds_n8_store_multiple_3,\ + nds_n8_store_multiple_4,nds_n8_store_multiple_5, nds_n8_store_multiple_6,\ + nds_n8_store_multiple_7,nds_n8_store_multiple_8, nds_n8_store_multiple_12" + "nds32_n8_last_load_two_to_ii_p" +) + +;; LD_bi -> ADDR_IN_MOP(1) +(define_bypass 2 + "nds_n8_load" + "nds_n8_branch,\ + nds_n8_load, nds_n8_store,\ + nds_n8_load_multiple_1,nds_n8_load_multiple_2, nds_n8_load_multiple_3,\ + nds_n8_load_multiple_4,nds_n8_load_multiple_5, nds_n8_load_multiple_6,\ + nds_n8_load_multiple_7,nds_n8_load_multiple_8, nds_n8_load_multiple_12,\ + nds_n8_store_multiple_1,nds_n8_store_multiple_2, nds_n8_store_multiple_3,\ + nds_n8_store_multiple_4,nds_n8_store_multiple_5, nds_n8_store_multiple_6,\ + nds_n8_store_multiple_7,nds_n8_store_multiple_8, nds_n8_store_multiple_12" + "nds32_n8_load_bi_to_ii_p" +) + +;; LD_!bi -> ALU, MOVD44_E, MUL, MAC_RaRb, DIV, BR_COND, ST_bi, ST_!bi_RI, SMW(N, 1) +(define_bypass 2 + "nds_n8_load" + "nds_n8_alu, + nds_n8_mul_fast, nds_n8_mul_slow,\ + nds_n8_mac_fast, nds_n8_mac_slow,\ + nds_n8_div,\ + nds_n8_branch,\ + nds_n8_store,\ + nds_n8_store_multiple_1,nds_n8_store_multiple_2, nds_n8_store_multiple_3,\ + nds_n8_store_multiple_4,nds_n8_store_multiple_5, nds_n8_store_multiple_6,\ + nds_n8_store_multiple_7,nds_n8_store_multiple_8, nds_n8_store_multiple_12" + "nds32_n8_load_to_ex_p" +) + +;; ALU, MOVD44_O, MUL, MAC, DIV_Rs, LD_bi, ADDR_OUT -> ADDR_IN_MOP(1) +(define_bypass 2 + "nds_n8_alu, + nds_n8_mul_fast, nds_n8_mul_slow,\ + nds_n8_mac_fast, nds_n8_mac_slow,\ + nds_n8_div,\ + nds_n8_load, nds_n8_store,\ + nds_n8_load_multiple_1,nds_n8_load_multiple_2, nds_n8_load_multiple_3,\ + nds_n8_load_multiple_4,nds_n8_load_multiple_5, nds_n8_load_multiple_6,\ + nds_n8_load_multiple_7,nds_n8_load_multiple_8, nds_n8_load_multiple_12,\ + nds_n8_store_multiple_1,nds_n8_store_multiple_2, nds_n8_store_multiple_3,\ + nds_n8_store_multiple_4,nds_n8_store_multiple_5, nds_n8_store_multiple_6,\ + nds_n8_store_multiple_7,nds_n8_store_multiple_8, nds_n8_store_multiple_12" + "nds_n8_branch,\ + nds_n8_load, nds_n8_store,\ + nds_n8_load_multiple_1,nds_n8_load_multiple_2, nds_n8_load_multiple_3,\ + nds_n8_load_multiple_4,nds_n8_load_multiple_5, nds_n8_load_multiple_6,\ + nds_n8_load_multiple_7,nds_n8_load_multiple_8, nds_n8_load_multiple_12,\ + nds_n8_store_multiple_1,nds_n8_store_multiple_2, nds_n8_store_multiple_3,\ + nds_n8_store_multiple_4,nds_n8_store_multiple_5, nds_n8_store_multiple_6,\ + nds_n8_store_multiple_7,nds_n8_store_multiple_8, nds_n8_store_multiple_12" + "nds32_n8_ex_to_ii_p" +) + +;; LMW(N, N) -> ALU, MOVD44_E, MUL, MAC_RaRb, DIV, BR_COND, ST_bi, ST_!bi_RI, SMW(N, 1) +(define_bypass 2 + "nds_n8_load_multiple_1,nds_n8_load_multiple_2, nds_n8_load_multiple_3,\ + nds_n8_load_multiple_4,nds_n8_load_multiple_5, nds_n8_load_multiple_6,\ + nds_n8_load_multiple_7,nds_n8_load_multiple_8, nds_n8_load_multiple_12" + "nds_n8_alu, + nds_n8_mul_fast, nds_n8_mul_slow,\ + nds_n8_mac_fast, nds_n8_mac_slow,\ + nds_n8_div,\ + nds_n8_branch,\ + nds_n8_store,\ + nds_n8_store_multiple_1,nds_n8_store_multiple_2, nds_n8_store_multiple_3,\ + nds_n8_store_multiple_4,nds_n8_store_multiple_5, nds_n8_store_multiple_6,\ + nds_n8_store_multiple_7,nds_n8_store_multiple_8, nds_n8_store_multiple_12" + "nds32_n8_last_load_to_ex_p" +) diff --git a/gcc/config/nds32/nds32-opts.h b/gcc/config/nds32/nds32-opts.h index fc80323..9d3dd69 100644 --- a/gcc/config/nds32/nds32-opts.h +++ b/gcc/config/nds32/nds32-opts.h @@ -37,6 +37,8 @@ enum nds32_arch_type /* The various ANDES CPU. */ enum nds32_cpu_type { + CPU_N6, + CPU_N8, CPU_N9, CPU_SIMPLE }; diff --git a/gcc/config/nds32/nds32-pipelines-auxiliary.c b/gcc/config/nds32/nds32-pipelines-auxiliary.c index c34b168..c2330f0 100644 --- a/gcc/config/nds32/nds32-pipelines-auxiliary.c +++ b/gcc/config/nds32/nds32-pipelines-auxiliary.c @@ -343,6 +343,108 @@ movd44_even_dep_p (rtx_insn *insn, rtx def_reg) using namespace nds32; using namespace nds32::scheduling; +namespace { // anonymous namespace +/* Check the dependency between the producer defining DEF_REG and CONSUMER + requiring input operand at AG (II). */ +bool +n8_consumed_by_addr_in_p (rtx_insn *consumer, rtx def_reg) +{ + rtx use_rtx; + + switch (get_attr_type (consumer)) + { + case TYPE_BRANCH: + use_rtx = extract_branch_target_rtx (consumer); + break; + + case TYPE_LOAD: + if (load_single_p (consumer)) + use_rtx = extract_mem_rtx (consumer); + else + use_rtx = extract_base_reg (consumer); + break; + + case TYPE_STORE: + if (store_single_p (consumer) + && (!post_update_insn_p (consumer) + || immed_offset_p (extract_mem_rtx (consumer)))) + use_rtx = extract_mem_rtx (consumer); + else + use_rtx = extract_base_reg (consumer); + break; + + case TYPE_LOAD_MULTIPLE: + case TYPE_STORE_MULTIPLE: + use_rtx = extract_base_reg (consumer); + break; + + default: + gcc_unreachable (); + } + + return reg_overlap_p (def_reg, use_rtx); +} + +/* Check the dependency between the producer defining DEF_REG and CONSUMER + requiring input operand at EX. */ +bool +n8_consumed_by_ex_p (rtx_insn *consumer, rtx def_reg) +{ + rtx use_rtx; + + switch (get_attr_type (consumer)) + { + case TYPE_ALU: + if (movd44_even_dep_p (consumer, def_reg)) + return true; + + use_rtx = SET_SRC (PATTERN (consumer)); + break; + + case TYPE_MUL: + use_rtx = SET_SRC (PATTERN (consumer)); + break; + + case TYPE_MAC: + use_rtx = extract_mac_non_acc_rtx (consumer); + break; + + /* Some special instructions, divmodsi4 and udivmodsi4, produce two + results, the quotient and the remainder. It requires two micro- + operations in order to write two registers. We have to check the + dependency from the producer to the first micro-operation. */ + case TYPE_DIV: + if (INSN_CODE (consumer) == CODE_FOR_divmodsi4 + || INSN_CODE (consumer) == CODE_FOR_udivmodsi4) + use_rtx = SET_SRC (parallel_element (consumer, 0)); + else + use_rtx = SET_SRC (PATTERN (consumer)); + break; + + case TYPE_BRANCH: + use_rtx = extract_branch_condition_rtx (consumer); + break; + + case TYPE_STORE: + /* exclude ST_!bi_RR */ + if (!post_update_insn_p (consumer) + && !immed_offset_p (extract_mem_rtx (consumer))) + return false; + + use_rtx = SET_SRC (PATTERN (consumer)); + break; + + case TYPE_STORE_MULTIPLE: + use_rtx = extract_nth_access_rtx (consumer, 0); + break; + + default: + gcc_unreachable (); + } + + return reg_overlap_p (def_reg, use_rtx); +} + /* Check the dependency between the producer defining DEF_REG and CONSUMER requiring input operand at EX. */ bool @@ -514,6 +616,152 @@ n9_3r2w_consumed_by_ex_dep_p (rtx_insn *consumer, rtx def_reg) return false; } + +} // anonymous namespace + +/* ------------------------------------------------------------------------ */ + +/* Guard functions for N8 core. */ + +bool +nds32_n8_load_to_ii_p (rtx_insn *producer, rtx_insn *consumer) +{ + if (post_update_insn_p (producer)) + return false; + + rtx def_reg = SET_DEST (PATTERN (producer)); + + return n8_consumed_by_addr_in_p (consumer, def_reg); +} + +bool +nds32_n8_load_bi_to_ii_p (rtx_insn *producer, rtx_insn *consumer) +{ + if (!post_update_insn_p (producer)) + return false; + + rtx def_reg = SET_DEST (PATTERN (producer)); + + return n8_consumed_by_addr_in_p (consumer, def_reg); +} + +bool +nds32_n8_load_to_ex_p (rtx_insn *producer, rtx_insn *consumer) +{ + if (post_update_insn_p (producer)) + return false; + + rtx def_reg = SET_DEST (PATTERN (producer)); + + return n8_consumed_by_ex_p (consumer, def_reg); +} + +bool +nds32_n8_ex_to_ii_p (rtx_insn *producer, rtx_insn *consumer) +{ + rtx def_reg; + + switch (get_attr_type (producer)) + { + case TYPE_ALU: + if (movd44_insn_p (producer)) + def_reg = extract_movd44_odd_reg (producer); + else + def_reg = SET_DEST (PATTERN (producer)); + break; + + case TYPE_MUL: + case TYPE_MAC: + def_reg = SET_DEST (PATTERN (producer)); + break; + + case TYPE_DIV: + if (INSN_CODE (producer) == CODE_FOR_divmodsi4 + || INSN_CODE (producer) == CODE_FOR_udivmodsi4) + def_reg = SET_DEST (parallel_element (producer, 1)); + else + def_reg = SET_DEST (PATTERN (producer)); + break; + + case TYPE_LOAD: + case TYPE_STORE: + case TYPE_LOAD_MULTIPLE: + case TYPE_STORE_MULTIPLE: + if (!post_update_insn_p (producer)) + return false; + + def_reg = extract_base_reg (producer); + break; + + default: + gcc_unreachable (); + } + + return n8_consumed_by_addr_in_p (consumer, def_reg); +} + +bool +nds32_n8_last_load_to_ii_p (rtx_insn *producer, rtx_insn *consumer) +{ + /* If PRODUCER is a post-update LMW insn, the last micro-operation updates + the base register and the result is ready in EX stage, so we don't need + to handle that case in this guard function and the corresponding bypass + rule. */ + if (post_update_insn_p (producer)) + return false; + + rtx last_def_reg = extract_nth_access_reg (producer, -1); + + if (last_def_reg == NULL_RTX) + return false; + + gcc_assert (REG_P (last_def_reg) || GET_CODE (last_def_reg) == SUBREG); + + return n8_consumed_by_addr_in_p (consumer, last_def_reg); +} + +bool +nds32_n8_last_load_two_to_ii_p (rtx_insn *producer, rtx_insn *consumer) +{ + int index = -2; + + /* If PRODUCER is a post-update insn, there is an additional one micro- + operation inserted in the end, so the last memory access operation should + be handled by this guard function and the corresponding bypass rule. */ + if (post_update_insn_p (producer)) + index = -1; + + rtx last_two_def_reg = extract_nth_access_reg (producer, index); + + if (last_two_def_reg == NULL_RTX) + return false; + + gcc_assert (REG_P (last_two_def_reg) + || GET_CODE (last_two_def_reg) == SUBREG); + + return n8_consumed_by_addr_in_p (consumer, last_two_def_reg); +} + +bool +nds32_n8_last_load_to_ex_p (rtx_insn *producer, rtx_insn *consumer) +{ + /* If PRODUCER is a post-update LMW insn, the last micro-operation updates + the base register and the result is ready in EX stage, so we don't need + to handle that case in this guard function and the corresponding bypass + rule. */ + if (post_update_insn_p (producer)) + return false; + + rtx last_def_reg = extract_nth_access_reg (producer, -1); + + if (last_def_reg == NULL_RTX) + return false; + + gcc_assert (REG_P (last_def_reg) || GET_CODE (last_def_reg) == SUBREG); + + return n8_consumed_by_ex_p (consumer, last_def_reg); +} + /* Guard functions for N9 cores. */ /* Check dependencies from MM to EX. */ diff --git a/gcc/config/nds32/nds32-protos.h b/gcc/config/nds32/nds32-protos.h index 8debf4b..f9bba8a 100644 --- a/gcc/config/nds32/nds32-protos.h +++ b/gcc/config/nds32/nds32-protos.h @@ -99,6 +99,14 @@ extern bool nds32_valid_multiple_load_store_p (rtx, bool, bool); /* Auxiliary functions for guard function checking in pipelines.md. */ +extern bool nds32_n8_load_to_ii_p (rtx_insn *, rtx_insn *); +extern bool nds32_n8_load_bi_to_ii_p (rtx_insn *, rtx_insn *); +extern bool nds32_n8_load_to_ex_p (rtx_insn *, rtx_insn *); +extern bool nds32_n8_ex_to_ii_p (rtx_insn *, rtx_insn *); +extern bool nds32_n8_last_load_to_ii_p (rtx_insn *, rtx_insn *); +extern bool nds32_n8_last_load_two_to_ii_p (rtx_insn *, rtx_insn *); +extern bool nds32_n8_last_load_to_ex_p (rtx_insn *, rtx_insn *); + extern bool nds32_n9_2r1w_mm_to_ex_p (rtx_insn *, rtx_insn *); extern bool nds32_n9_3r2w_mm_to_ex_p (rtx_insn *, rtx_insn *); extern bool nds32_n9_last_load_to_ex_p (rtx_insn *, rtx_insn *); @@ -251,8 +259,12 @@ rtx extract_base_reg (rtx_insn *); rtx extract_shift_reg (rtx); bool movd44_insn_p (rtx_insn *); +rtx extract_movd44_odd_reg (rtx_insn *); rtx extract_mac_non_acc_rtx (rtx_insn *); + +rtx extract_branch_target_rtx (rtx_insn *); +rtx extract_branch_condition_rtx (rtx_insn *); } // namespace nds32 /* Functions for create nds32 specific optimization pass. */ diff --git a/gcc/config/nds32/nds32-utils.c b/gcc/config/nds32/nds32-utils.c index 55d4a91..b0151be 100644 --- a/gcc/config/nds32/nds32-utils.c +++ b/gcc/config/nds32/nds32-utils.c @@ -377,6 +377,33 @@ movd44_insn_p (rtx_insn *insn) return false; } +/* Extract the second result (odd reg) of a movd44 insn. */ +rtx +extract_movd44_odd_reg (rtx_insn *insn) +{ + gcc_assert (movd44_insn_p (insn)); + + rtx def_reg = SET_DEST (PATTERN (insn)); + machine_mode mode; + + gcc_assert (REG_P (def_reg) || GET_CODE (def_reg) == SUBREG); + switch (GET_MODE (def_reg)) + { + case E_DImode: + mode = SImode; + break; + + case E_DFmode: + mode = SFmode; + break; + + default: + gcc_unreachable (); + } + + return gen_highpart (mode, def_reg); +} + /* Extract the rtx representing non-accumulation operands of a MAC insn. */ rtx extract_mac_non_acc_rtx (rtx_insn *insn) @@ -396,4 +423,89 @@ extract_mac_non_acc_rtx (rtx_insn *insn) } } +/* Extract the rtx representing the branch target to help recognize + data hazards. */ +rtx +extract_branch_target_rtx (rtx_insn *insn) +{ + gcc_assert (CALL_P (insn) || JUMP_P (insn)); + + rtx body = PATTERN (insn); + + if (GET_CODE (body) == SET) + { + /* RTXs in IF_THEN_ELSE are branch conditions. */ + if (GET_CODE (SET_SRC (body)) == IF_THEN_ELSE) + return NULL_RTX; + + return SET_SRC (body); + } + + if (GET_CODE (body) == CALL) + return XEXP (body, 0); + + if (GET_CODE (body) == PARALLEL) + { + rtx first_rtx = parallel_element (body, 0); + + if (GET_CODE (first_rtx) == SET) + return SET_SRC (first_rtx); + + if (GET_CODE (first_rtx) == CALL) + return XEXP (first_rtx, 0); + } + + /* Handle special cases of bltzal, bgezal and jralnez. */ + if (GET_CODE (body) == COND_EXEC) + { + rtx addr_rtx = XEXP (body, 1); + + if (GET_CODE (addr_rtx) == SET) + return SET_SRC (addr_rtx); + + if (GET_CODE (addr_rtx) == PARALLEL) + { + rtx first_rtx = parallel_element (addr_rtx, 0); + + if (GET_CODE (first_rtx) == SET) + { + rtx call_rtx = SET_SRC (first_rtx); + gcc_assert (GET_CODE (call_rtx) == CALL); + + return XEXP (call_rtx, 0); + } + + if (GET_CODE (first_rtx) == CALL) + return XEXP (first_rtx, 0); + } + } + + gcc_unreachable (); +} + +/* Extract the rtx representing the branch condition to help recognize + data hazards. */ +rtx +extract_branch_condition_rtx (rtx_insn *insn) +{ + gcc_assert (CALL_P (insn) || JUMP_P (insn)); + + rtx body = PATTERN (insn); + + if (GET_CODE (body) == SET) + { + rtx if_then_else_rtx = SET_SRC (body); + + if (GET_CODE (if_then_else_rtx) == IF_THEN_ELSE) + return XEXP (if_then_else_rtx, 0); + + return NULL_RTX; + } + + if (GET_CODE (body) == COND_EXEC) + return XEXP (body, 0); + + return NULL_RTX; +} + } // namespace nds32 diff --git a/gcc/config/nds32/nds32.md b/gcc/config/nds32/nds32.md index dabb97f..9ea72d7 100644 --- a/gcc/config/nds32/nds32.md +++ b/gcc/config/nds32/nds32.md @@ -56,9 +56,10 @@ ;; ------------------------------------------------------------------------ ;; CPU pipeline model. -(define_attr "pipeline_model" "n9,simple" +(define_attr "pipeline_model" "n8,n9,simple" (const - (cond [(match_test "nds32_cpu_option == CPU_N9") (const_string "n9") + (cond [(match_test "nds32_cpu_option == CPU_N6 || nds32_cpu_option == CPU_N8") (const_string "n8") + (match_test "nds32_cpu_option == CPU_N9") (const_string "n9") (match_test "nds32_cpu_option == CPU_SIMPLE") (const_string "simple")] (const_string "n9")))) diff --git a/gcc/config/nds32/nds32.opt b/gcc/config/nds32/nds32.opt index 304100f..f7d98a8 100644 --- a/gcc/config/nds32/nds32.opt +++ b/gcc/config/nds32/nds32.opt @@ -175,6 +175,30 @@ Name(nds32_cpu_type) Type(enum nds32_cpu_type) Known cpu types (for use with the -mcpu= option): EnumValue +Enum(nds32_cpu_type) String(n6) Value(CPU_N6) + +EnumValue +Enum(nds32_cpu_type) String(n650) Value(CPU_N6) + +EnumValue +Enum(nds32_cpu_type) String(n8) Value(CPU_N8) + +EnumValue +Enum(nds32_cpu_type) String(n801) Value(CPU_N8) + +EnumValue +Enum(nds32_cpu_type) String(sn8) Value(CPU_N8) + +EnumValue +Enum(nds32_cpu_type) String(sn801) Value(CPU_N8) + +EnumValue +Enum(nds32_cpu_type) String(s8) Value(CPU_N8) + +EnumValue +Enum(nds32_cpu_type) String(s801) Value(CPU_N8) + +EnumValue Enum(nds32_cpu_type) String(n9) Value(CPU_N9) EnumValue diff --git a/gcc/config/nds32/pipelines.md b/gcc/config/nds32/pipelines.md index 433391e..b31d7e6 100644 --- a/gcc/config/nds32/pipelines.md +++ b/gcc/config/nds32/pipelines.md @@ -18,6 +18,10 @@ ;; along with GCC; see the file COPYING3. If not see ;; . +;; ------------------------------------------------------------------------ +;; Include N8 pipeline settings. +;; ------------------------------------------------------------------------ +(include "nds32-n8.md") ;; ------------------------------------------------------------------------ ;; Include N9/N10 pipeline settings. -- 2.7.4