From f758f2992f61a18b961a151e0e8b7bac68310356 Mon Sep 17 00:00:00 2001 From: Michael Meissner Date: Fri, 16 Aug 2013 15:28:13 +0000 Subject: [PATCH] re PR target/58160 (Power8 fusion support has a bug that shows up in running spec 2006) 2013-08-14 Michael Meissner PR target/58160 * config/rs6000/predicates.md (fusion_gpr_mem_load): Allow the memory rtx to contain ZERO_EXTEND and SIGN_EXTEND. * config/rs6000/rs6000-protos.h (fusion_gpr_load_p): Pass operands array instead of each individual operand as a separate argument. (emit_fusion_gpr_load): Likewise. (expand_fusion_gpr_load): Add new function declaration. * config/rs6000/rs6000.c (fusion_gpr_load_p): Change the calling signature to have the operands passed as an array, instead of as separate arguments. Allow ZERO_EXTEND to be in the memory address, and also SIGN_EXTEND if -mpower8-fusion-sign. Do not depend on the register live/dead flags when peepholes are run. (expand_fusion_gpr_load): New function to be called from the peephole2 pass, to change the register that addis sets to be the target register. (emit_fusion_gpr_load): Change the calling signature to have the operands passed as an array, instead of as separate arguments. Allow ZERO_EXTEND to be in the memory address, and also SIGN_EXTEND if -mpower8-fusion-sign. * config/rs6000/rs6000.md (UNSPEC_FUSION_GPR): Delete unused unspec enumeration. (power8 fusion peephole/peephole2): Rework the fusion peepholes to adjust the register addis loads up in the peephole2 pass. Do not depend on the register live/dead state when the peephole pass is done. From-SVN: r201792 --- gcc/ChangeLog | 31 ++++++ gcc/config/rs6000/predicates.md | 10 +- gcc/config/rs6000/rs6000-protos.h | 5 +- gcc/config/rs6000/rs6000.c | 222 ++++++++++++++++++++++++++++---------- gcc/config/rs6000/rs6000.md | 99 +++-------------- 5 files changed, 224 insertions(+), 143 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e71ae3e..abe0d43 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,34 @@ +2013-08-16 Michael Meissner + + PR target/58160 + * config/rs6000/predicates.md (fusion_gpr_mem_load): Allow the + memory rtx to contain ZERO_EXTEND and SIGN_EXTEND. + + * config/rs6000/rs6000-protos.h (fusion_gpr_load_p): Pass operands + array instead of each individual operand as a separate argument. + (emit_fusion_gpr_load): Likewise. + (expand_fusion_gpr_load): Add new function declaration. + + * config/rs6000/rs6000.c (fusion_gpr_load_p): Change the calling + signature to have the operands passed as an array, instead of as + separate arguments. Allow ZERO_EXTEND to be in the memory + address, and also SIGN_EXTEND if -mpower8-fusion-sign. Do not + depend on the register live/dead flags when peepholes are run. + (expand_fusion_gpr_load): New function to be called from the + peephole2 pass, to change the register that addis sets to be the + target register. + (emit_fusion_gpr_load): Change the calling signature to have the + operands passed as an array, instead of as separate arguments. + Allow ZERO_EXTEND to be in the memory address, and also + SIGN_EXTEND if -mpower8-fusion-sign. + + * config/rs6000/rs6000.md (UNSPEC_FUSION_GPR): Delete unused + unspec enumeration. + (power8 fusion peephole/peephole2): Rework the fusion peepholes to + adjust the register addis loads up in the peephole2 pass. Do not + depend on the register live/dead state when the peephole pass is + done. + 2013-08-16 David Malcolm * gengtype.c (create_user_defined_type): Ensure that the kind diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 09013c3..7338e76 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -1740,10 +1740,18 @@ ;; Match the second insn (lbz, lhz, lwz, ld) in fusing the combination of addis ;; and loads to GPR registers on power8. (define_predicate "fusion_gpr_mem_load" - (match_code "mem") + (match_code "mem,sign_extend,zero_extend") { rtx addr; + /* Handle sign/zero extend. */ + if (GET_CODE (op) == ZERO_EXTEND + || (TARGET_P8_FUSION_SIGN && GET_CODE (op) == SIGN_EXTEND)) + { + op = XEXP (op, 0); + mode = GET_MODE (op); + } + if (!MEM_P (op)) return 0; diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 410042b..3ddabb8 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -73,8 +73,9 @@ extern int mems_ok_for_quad_peep (rtx, rtx); extern bool gpr_or_gpr_p (rtx, rtx); extern bool direct_move_p (rtx, rtx); extern bool quad_load_store_p (rtx, rtx); -extern bool fusion_gpr_load_p (rtx, rtx, rtx, rtx, rtx); -extern const char *emit_fusion_gpr_load (rtx, rtx, rtx, rtx); +extern bool fusion_gpr_load_p (rtx *, bool); +extern void expand_fusion_gpr_load (rtx *); +extern const char *emit_fusion_gpr_load (rtx *); extern enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class); extern enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class, diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index c1d4b99..b469930 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -30429,16 +30429,25 @@ rs6000_split_logical (rtx operands[3], /* Return true if the peephole2 can combine a load involving a combination of an addis instruction and a load with an offset that can be fused together on - a power8. */ + a power8. + + The operands are: + operands[0] register set with addis + operands[1] value set via addis + operands[2] target register being loaded + operands[3] D-form memory reference using operands[0]. + + In addition, we are passed a boolean that is true if this is a peephole2, + and we can use see if the addis_reg is dead after the insn and can be + replaced by the target register. */ bool -fusion_gpr_load_p (rtx addis_reg, /* reg. to hold high value. */ - rtx addis_value, /* high value loaded. */ - rtx target, /* reg. that is loaded. */ - rtx mem, /* memory to load. */ - rtx insn) /* insn for looking up reg notes or - NULL_RTX if this is a peephole2. */ +fusion_gpr_load_p (rtx *operands, bool peep2_p) { + rtx addis_reg = operands[0]; + rtx addis_value = operands[1]; + rtx target = operands[2]; + rtx mem = operands[3]; rtx addr; rtx base_reg; @@ -30455,62 +30464,154 @@ fusion_gpr_load_p (rtx addis_reg, /* reg. to hold high value. */ if (!fusion_gpr_mem_load (mem, GET_MODE (mem))) return false; + /* Allow sign/zero extension. */ + if (GET_CODE (mem) == ZERO_EXTEND + || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN)) + mem = XEXP (mem, 0); + + if (!MEM_P (mem)) + return false; + + addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */ + if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM) + return false; + /* Validate that the register used to load the high value is either the - register being loaded, or we can safely replace its use in a peephole. + register being loaded, or we can safely replace its use in a peephole2. If this is a peephole2, we assume that there are 2 instructions in the peephole (addis and load), so we want to check if the target register was - not used and the register to hold the addis result is dead after the - peephole. */ + not used in the memory address and the register to hold the addis result + is dead after the peephole. */ if (REGNO (addis_reg) != REGNO (target)) { + if (!peep2_p) + return false; + if (reg_mentioned_p (target, mem)) return false; - if (insn) - { - if (!find_reg_note (insn, REG_DEAD, addis_reg)) - return false; - } - else - { - if (!peep2_reg_dead_p (2, addis_reg)) - return false; - } + if (!peep2_reg_dead_p (2, addis_reg)) + return false; } - /* Validate that the value being loaded in the addis is used in the load. */ - addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */ - if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM) - return false; - base_reg = XEXP (addr, 0); return REGNO (addis_reg) == REGNO (base_reg); } +/* During the peephole2 pass, adjust and expand the insns for a load fusion + sequence. We adjust the addis register to use the target register. If the + load sign extends, we adjust the code to do the zero extending load, and an + explicit sign extension later since the fusion only covers zero extending + loads. + + The operands are: + operands[0] register set with addis (to be replaced with target) + operands[1] value set via addis + operands[2] target register being loaded + operands[3] D-form memory reference using operands[0]. */ + +void +expand_fusion_gpr_load (rtx *operands) +{ + rtx addis_value = operands[1]; + rtx target = operands[2]; + rtx orig_mem = operands[3]; + rtx new_addr, new_mem, orig_addr, offset; + enum rtx_code plus_or_lo_sum; + enum machine_mode target_mode = GET_MODE (target); + enum machine_mode extend_mode = target_mode; + enum machine_mode ptr_mode = Pmode; + enum rtx_code extend = UNKNOWN; + rtx addis_reg = ((ptr_mode == target_mode) + ? target + : simplify_subreg (ptr_mode, target, target_mode, 0)); + + if (GET_CODE (orig_mem) == ZERO_EXTEND + || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND)) + { + extend = GET_CODE (orig_mem); + orig_mem = XEXP (orig_mem, 0); + target_mode = GET_MODE (orig_mem); + } + + gcc_assert (MEM_P (orig_mem)); + + orig_addr = XEXP (orig_mem, 0); + plus_or_lo_sum = GET_CODE (orig_addr); + gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM); + + offset = XEXP (orig_addr, 1); + new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_reg, offset); + new_mem = change_address (orig_mem, target_mode, new_addr); + + if (extend != UNKNOWN) + new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem); + + emit_insn (gen_rtx_SET (VOIDmode, addis_reg, addis_value)); + emit_insn (gen_rtx_SET (VOIDmode, target, new_mem)); + + if (extend == SIGN_EXTEND) + { + int sub_off = ((BYTES_BIG_ENDIAN) + ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode) + : 0); + rtx sign_reg + = simplify_subreg (target_mode, target, extend_mode, sub_off); + + emit_insn (gen_rtx_SET (VOIDmode, target, + gen_rtx_SIGN_EXTEND (extend_mode, sign_reg))); + } + + return; +} + /* Return a string to fuse an addis instruction with a gpr load to the same register that we loaded up the addis instruction. The code is complicated, - so we call output_asm_insn directly, and just return "". */ + so we call output_asm_insn directly, and just return "". + + The operands are: + operands[0] register set with addis (must be same reg as target). + operands[1] value set via addis + operands[2] target register being loaded + operands[3] D-form memory reference using operands[0]. */ const char * -emit_fusion_gpr_load (rtx addis_reg, rtx addis_value, rtx target, rtx mem) +emit_fusion_gpr_load (rtx *operands) { + rtx addis_reg = operands[0]; + rtx addis_value = operands[1]; + rtx target = operands[2]; + rtx mem = operands[3]; rtx fuse_ops[10]; rtx addr; rtx load_offset; const char *addis_str = NULL; const char *load_str = NULL; + const char *extend_insn = NULL; const char *mode_name = NULL; char insn_template[80]; - enum machine_mode mode = GET_MODE (mem); + enum machine_mode mode; const char *comment_str = ASM_COMMENT_START; + bool sign_p = false; + + gcc_assert (REG_P (addis_reg) && REG_P (target)); + gcc_assert (REGNO (addis_reg) == REGNO (target)); if (*comment_str == ' ') comment_str++; - if (!MEM_P (mem)) - gcc_unreachable (); + /* Allow sign/zero extension. */ + if (GET_CODE (mem) == ZERO_EXTEND) + mem = XEXP (mem, 0); + + else if (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN) + { + sign_p = true; + mem = XEXP (mem, 0); + } + gcc_assert (MEM_P (mem)); addr = XEXP (mem, 0); if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM) gcc_unreachable (); @@ -30518,21 +30619,25 @@ emit_fusion_gpr_load (rtx addis_reg, rtx addis_value, rtx target, rtx mem) load_offset = XEXP (addr, 1); /* Now emit the load instruction to the same register. */ + mode = GET_MODE (mem); switch (mode) { case QImode: mode_name = "char"; load_str = "lbz"; + extend_insn = "extsb %0,%0"; break; case HImode: mode_name = "short"; load_str = "lhz"; + extend_insn = "extsh %0,%0"; break; case SImode: mode_name = "int"; load_str = "lwz"; + extend_insn = "extsw %0,%0"; break; case DImode: @@ -30541,22 +30646,20 @@ emit_fusion_gpr_load (rtx addis_reg, rtx addis_value, rtx target, rtx mem) mode_name = "long"; load_str = "ld"; } + else + gcc_unreachable (); break; default: - break; + gcc_unreachable (); } - if (!load_str) - gcc_unreachable (); - /* Emit the addis instruction. */ fuse_ops[0] = target; - fuse_ops[1] = addis_reg; if (satisfies_constraint_L (addis_value)) { - fuse_ops[2] = addis_value; - addis_str = "lis %0,%v2"; + fuse_ops[1] = addis_value; + addis_str = "lis %0,%v1"; } else if (GET_CODE (addis_value) == PLUS) @@ -30567,9 +30670,9 @@ emit_fusion_gpr_load (rtx addis_reg, rtx addis_value, rtx target, rtx mem) if (REG_P (op0) && CONST_INT_P (op1) && satisfies_constraint_L (op1)) { - fuse_ops[2] = op0; - fuse_ops[3] = op1; - addis_str = "addis %0,%2,%v3"; + fuse_ops[1] = op0; + fuse_ops[2] = op1; + addis_str = "addis %0,%1,%v2"; } } @@ -30578,13 +30681,13 @@ emit_fusion_gpr_load (rtx addis_reg, rtx addis_value, rtx target, rtx mem) rtx value = XEXP (addis_value, 0); if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL) { - fuse_ops[2] = XVECEXP (value, 0, 0); /* symbol ref. */ - fuse_ops[3] = XVECEXP (value, 0, 1); /* TOC register. */ + fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */ + fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */ if (TARGET_ELF) - addis_str = "addis %0,%3,%2@toc@ha"; + addis_str = "addis %0,%2,%1@toc@ha"; else if (TARGET_XCOFF) - addis_str = "addis %0,%2@u(%3)"; + addis_str = "addis %0,%1@u(%2)"; else gcc_unreachable (); @@ -30599,14 +30702,14 @@ emit_fusion_gpr_load (rtx addis_reg, rtx addis_value, rtx target, rtx mem) && XINT (op0, 1) == UNSPEC_TOCREL && CONST_INT_P (op1)) { - fuse_ops[2] = XVECEXP (op0, 0, 0); /* symbol ref. */ - fuse_ops[3] = XVECEXP (op0, 0, 1); /* TOC register. */ - fuse_ops[4] = op1; + fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */ + fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */ + fuse_ops[3] = op1; if (TARGET_ELF) - addis_str = "addis %0,%3,%2+%4@toc@ha"; + addis_str = "addis %0,%2,%1+%3@toc@ha"; else if (TARGET_XCOFF) - addis_str = "addis %0,%2+%4@u(%3)"; + addis_str = "addis %0,%1+%3@u(%2)"; else gcc_unreachable (); @@ -30615,24 +30718,25 @@ emit_fusion_gpr_load (rtx addis_reg, rtx addis_value, rtx target, rtx mem) else if (satisfies_constraint_L (value)) { - fuse_ops[2] = value; - addis_str = "lis %0,%v2"; + fuse_ops[1] = value; + addis_str = "lis %0,%v1"; } else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value)) { - fuse_ops[2] = value; - addis_str = "lis %0,%2@ha"; + fuse_ops[1] = value; + addis_str = "lis %0,%1@ha"; } } if (!addis_str) fatal_insn ("Could not generate addis value for fusion", addis_value); - sprintf (insn_template, "%s\t\t%s gpr load fusion, type %s, addis reg %%1", - addis_str, comment_str, mode_name); + sprintf (insn_template, "%s\t\t%s gpr load fusion, type %s", addis_str, + comment_str, mode_name); output_asm_insn (insn_template, fuse_ops); + /* Emit the D-form load instruction. */ if (CONST_INT_P (load_offset) && satisfies_constraint_I (load_offset)) { sprintf (insn_template, "%s %%0,%%1(%%0)", load_str); @@ -30687,6 +30791,14 @@ emit_fusion_gpr_load (rtx addis_reg, rtx addis_value, rtx target, rtx mem) else fatal_insn ("Unable to generate load offset for fusion", load_offset); + /* Handle sign extension. The peephole2 pass generates this as a separate + insn, but we handle it just in case it got reattached. */ + if (sign_p) + { + gcc_assert (extend_insn != NULL); + output_asm_insn (extend_insn, fuse_ops); + } + return ""; } diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 98c43df..637c5d3 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -136,7 +136,6 @@ UNSPEC_P8V_MTVSRD UNSPEC_P8V_XXPERMDI UNSPEC_P8V_RELOAD_FROM_VSX - UNSPEC_FUSION_GPR ]) ;; @@ -15775,108 +15774,38 @@ ;; a GPR. The addis instruction must be adjacent to the load, and use the same ;; register that is being loaded. The fused ops must be physically adjacent. -;; GPR fusion for single word integer types +;; We use define_peephole for the actual addis/load, and the register used to +;; hold the addis value must be the same as the register being loaded. We use +;; define_peephole2 to change the register used for addis to be the register +;; being loaded, since we can look at whether it is dead after the load insn. (define_peephole [(set (match_operand:P 0 "base_reg_operand" "") (match_operand:P 1 "fusion_gpr_addis" "")) (set (match_operand:INT1 2 "base_reg_operand" "") (match_operand:INT1 3 "fusion_gpr_mem_load" ""))] - "TARGET_P8_FUSION - && fusion_gpr_load_p (operands[0], operands[1], operands[2], operands[3], - insn)" + "TARGET_P8_FUSION && fusion_gpr_load_p (operands, false)" { - return emit_fusion_gpr_load (operands[0], operands[1], operands[2], - operands[3]); + return emit_fusion_gpr_load (operands); } [(set_attr "type" "load") (set_attr "length" "8")]) -(define_peephole - [(set (match_operand:DI 0 "base_reg_operand" "") - (match_operand:DI 1 "fusion_gpr_addis" "")) - (set (match_operand:DI 2 "base_reg_operand" "") - (zero_extend:DI (match_operand:QHSI 3 "fusion_gpr_mem_load" "")))] - "TARGET_P8_FUSION && TARGET_POWERPC64 - && fusion_gpr_load_p (operands[0], operands[1], operands[2], operands[3], - insn)" -{ - return emit_fusion_gpr_load (operands[0], operands[1], operands[2], - operands[3]); -} - [(set_attr "type" "load") - (set_attr "length" "8")]) - -;; Power8 does not fuse a sign extending load, so convert the sign extending -;; load into a zero extending load, and do an explicit sign extension. Don't -;; do this if we are trying to optimize for space. Do this as a peephole2 to -;; allow final rtl optimizations and scheduling to move the sign extend. (define_peephole2 - [(set (match_operand:DI 0 "base_reg_operand" "") - (match_operand:DI 1 "fusion_gpr_addis" "")) - (set (match_operand:DI 2 "base_reg_operand" "") - (sign_extend:DI (match_operand:HSI 3 "fusion_gpr_mem_load" "")))] - "TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN && TARGET_POWERPC64 - && fusion_gpr_load_p (operands[0], operands[1], operands[2], operands[3], - NULL_RTX)" - [(set (match_dup 0) (match_dup 1)) - (set (match_dup 4) (match_dup 3)) - (set (match_dup 2) (sign_extend:DI (match_dup 4)))] -{ - unsigned int offset - = (BYTES_BIG_ENDIAN ? 8 - GET_MODE_SIZE (mode) : 0); - - operands[4] = simplify_subreg (mode, operands[2], DImode, - offset); -}) - -(define_peephole [(set (match_operand:P 0 "base_reg_operand" "") (match_operand:P 1 "fusion_gpr_addis" "")) - (set (match_operand:SI 2 "base_reg_operand" "") - (zero_extend:SI (match_operand:QHI 3 "fusion_gpr_mem_load" "")))] + (set (match_operand:INT1 2 "base_reg_operand" "") + (match_operand:INT1 3 "fusion_gpr_mem_load" ""))] "TARGET_P8_FUSION - && fusion_gpr_load_p (operands[0], operands[1], operands[2], operands[3], - insn)" -{ - return emit_fusion_gpr_load (operands[0], operands[1], operands[2], - operands[3]); -} - [(set_attr "type" "load") - (set_attr "length" "8")]) - -(define_peephole2 - [(set (match_operand:P 0 "base_reg_operand" "") - (match_operand:P 1 "fusion_gpr_addis" "")) - (set (match_operand:SI 2 "base_reg_operand" "") - (sign_extend:SI (match_operand:HI 3 "fusion_gpr_mem_load" "")))] - "TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN - && fusion_gpr_load_p (operands[0], operands[1], operands[2], operands[3], - NULL_RTX)" - [(set (match_dup 0) (match_dup 1)) - (set (match_dup 4) (match_dup 3)) - (set (match_dup 2) (sign_extend:SI (match_dup 4)))] + && (REGNO (operands[0]) != REGNO (operands[2]) + || GET_CODE (operands[3]) == SIGN_EXTEND) + && fusion_gpr_load_p (operands, true)" + [(const_int 0)] { - unsigned int offset = (BYTES_BIG_ENDIAN ? 2 : 0); - - operands[4] = simplify_subreg (HImode, operands[2], SImode, offset); + expand_fusion_gpr_load (operands); + DONE; }) -(define_peephole - [(set (match_operand:P 0 "base_reg_operand" "") - (match_operand:P 1 "fusion_gpr_addis" "")) - (set (match_operand:HI 2 "base_reg_operand" "") - (zero_extend:HI (match_operand:QI 3 "fusion_gpr_mem_load" "")))] - "TARGET_P8_FUSION - && fusion_gpr_load_p (operands[0], operands[1], operands[2], operands[3], - insn)" -{ - return emit_fusion_gpr_load (operands[0], operands[1], operands[2], - operands[3]); -} - [(set_attr "type" "load") - (set_attr "length" "8")]) - (include "sync.md") -- 2.7.4