From b0f86a7e6ba69a2638b00efa758aec522046760a Mon Sep 17 00:00:00 2001 From: Andreas Krebbel Date: Thu, 20 Aug 2009 09:21:13 +0000 Subject: [PATCH] s390.c (Z10_PREDICT_DISTANCE): New macro. 2009-08-20 Andreas Krebbel * config/s390/s390.c (Z10_PREDICT_DISTANCE): New macro. (s390_z10_fix_long_loop_prediction): New function. (s390_z10_optimize_cmp): INSN walk moved to callee - s390_reorg. (s390_reorg): Walk over the INSNs and invoke s390_z10_fix_long_loop_prediction and s390_z10_optimize_cmp. From-SVN: r150955 --- gcc/ChangeLog | 8 ++ gcc/config/s390/s390.c | 231 ++++++++++++++++++++++++++++++++---------------- gcc/config/s390/s390.md | 58 ++++++++++++ 3 files changed, 220 insertions(+), 77 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index f35ed54..76a3fe4 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,13 @@ 2009-08-20 Andreas Krebbel + * config/s390/s390.c (Z10_PREDICT_DISTANCE): New macro. + (s390_z10_fix_long_loop_prediction): New function. + (s390_z10_optimize_cmp): INSN walk moved to callee - s390_reorg. + (s390_reorg): Walk over the INSNs and invoke + s390_z10_fix_long_loop_prediction and s390_z10_optimize_cmp. + +2009-08-20 Andreas Krebbel + * config/s390/s390.md ("*brx_stage1_", "*brxg_64bit", "*brx_64bit", "*brx_31bit"): New patterns. * config/s390/s390.c ('E'): New output modifier. diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c index 1431bfd..47b939c 100644 --- a/gcc/config/s390/s390.c +++ b/gcc/config/s390/s390.c @@ -345,6 +345,10 @@ struct GTY(()) machine_function #define REGNO_PAIR_OK(REGNO, MODE) \ (HARD_REGNO_NREGS ((REGNO), (MODE)) == 1 || !((REGNO) & 1)) +/* That's the read ahead of the dynamic branch prediction unit in + bytes on a z10 CPU. */ +#define Z10_PREDICT_DISTANCE 384 + static enum machine_mode s390_libgcc_cmp_return_mode (void) { @@ -9661,6 +9665,66 @@ s390_optimize_prologue (void) } } +/* On z10 the dynamic branch prediction must see the backward jump in + a window of 384 bytes. If not it falls back to the static + prediction. This function rearranges the loop backward branch in a + way which makes the static prediction always correct. The function + returns true if it added an instruction. */ +static bool +s390_z10_fix_long_loop_prediction (rtx insn) +{ + rtx set = single_set (insn); + rtx code_label, label_ref, new_label; + rtx uncond_jump; + rtx cur_insn; + rtx tmp; + int distance; + + /* This will exclude branch on count and branch on index patterns + since these are correctly statically predicted. */ + if (!set + || SET_DEST (set) != pc_rtx + || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE) + return false; + + label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ? + XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2)); + + gcc_assert (GET_CODE (label_ref) == LABEL_REF); + + code_label = XEXP (label_ref, 0); + + if (INSN_ADDRESSES (INSN_UID (code_label)) == -1 + || INSN_ADDRESSES (INSN_UID (insn)) == -1 + || (INSN_ADDRESSES (INSN_UID (insn)) + - INSN_ADDRESSES (INSN_UID (code_label)) < Z10_PREDICT_DISTANCE)) + return false; + + for (distance = 0, cur_insn = PREV_INSN (insn); + distance < Z10_PREDICT_DISTANCE - 6; + distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn)) + if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn)) + return false; + + new_label = gen_label_rtx (); + uncond_jump = emit_jump_insn_after ( + gen_rtx_SET (VOIDmode, pc_rtx, + gen_rtx_LABEL_REF (VOIDmode, code_label)), + insn); + emit_label_after (new_label, uncond_jump); + + tmp = XEXP (SET_SRC (set), 1); + XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2); + XEXP (SET_SRC (set), 2) = tmp; + INSN_CODE (insn) = -1; + + XEXP (label_ref, 0) = new_label; + JUMP_LABEL (insn) = new_label; + JUMP_LABEL (uncond_jump) = code_label; + + return true; +} + /* Returns 1 if INSN reads the value of REG for purposes not related to addressing of memory, and 0 otherwise. */ static int @@ -9743,97 +9807,87 @@ s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx insn) if that register's value is delivered via a bypass, then the pipeline recycles, thereby causing significant performance decline. This function locates such situations and exchanges the two - operands of the compare. */ -static void -s390_z10_optimize_cmp (void) + operands of the compare. The function return true whenever it + added an insn. */ +static bool +s390_z10_optimize_cmp (rtx insn) { - rtx insn, prev_insn, next_insn; - int added_NOPs = 0; + rtx prev_insn, next_insn; + bool insn_added_p = false; + rtx cond, *op0, *op1; - for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + if (GET_CODE (PATTERN (insn)) == PARALLEL) { - rtx cond, *op0, *op1; + /* Handle compare and branch and branch on count + instructions. */ + rtx pattern = single_set (insn); - if (!INSN_P (insn) || INSN_CODE (insn) <= 0) - continue; - - if (GET_CODE (PATTERN (insn)) == PARALLEL) - { - /* Handle compare and branch and branch on count - instructions. */ - rtx pattern = single_set (insn); - - if (!pattern - || SET_DEST (pattern) != pc_rtx - || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE) - continue; + if (!pattern + || SET_DEST (pattern) != pc_rtx + || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE) + return false; - cond = XEXP (SET_SRC (pattern), 0); - op0 = &XEXP (cond, 0); - op1 = &XEXP (cond, 1); - } - else if (GET_CODE (PATTERN (insn)) == SET) - { - rtx src, dest; + cond = XEXP (SET_SRC (pattern), 0); + op0 = &XEXP (cond, 0); + op1 = &XEXP (cond, 1); + } + else if (GET_CODE (PATTERN (insn)) == SET) + { + rtx src, dest; - /* Handle normal compare instructions. */ - src = SET_SRC (PATTERN (insn)); - dest = SET_DEST (PATTERN (insn)); + /* Handle normal compare instructions. */ + src = SET_SRC (PATTERN (insn)); + dest = SET_DEST (PATTERN (insn)); - if (!REG_P (dest) - || !CC_REGNO_P (REGNO (dest)) - || GET_CODE (src) != COMPARE) - continue; + if (!REG_P (dest) + || !CC_REGNO_P (REGNO (dest)) + || GET_CODE (src) != COMPARE) + return false; - /* s390_swap_cmp will try to find the conditional - jump when passing NULL_RTX as condition. */ - cond = NULL_RTX; - op0 = &XEXP (src, 0); - op1 = &XEXP (src, 1); - } - else - continue; + /* s390_swap_cmp will try to find the conditional + jump when passing NULL_RTX as condition. */ + cond = NULL_RTX; + op0 = &XEXP (src, 0); + op1 = &XEXP (src, 1); + } + else + return false; - if (!REG_P (*op0) || !REG_P (*op1)) - continue; + if (!REG_P (*op0) || !REG_P (*op1)) + return false; - /* Swap the COMPARE arguments and its mask if there is a - conflicting access in the previous insn. */ - prev_insn = PREV_INSN (insn); + /* Swap the COMPARE arguments and its mask if there is a + conflicting access in the previous insn. */ + prev_insn = PREV_INSN (insn); + if (prev_insn != NULL_RTX && INSN_P (prev_insn) + && reg_referenced_p (*op1, PATTERN (prev_insn))) + s390_swap_cmp (cond, op0, op1, insn); + + /* Check if there is a conflict with the next insn. If there + was no conflict with the previous insn, then swap the + COMPARE arguments and its mask. If we already swapped + the operands, or if swapping them would cause a conflict + with the previous insn, issue a NOP after the COMPARE in + order to separate the two instuctions. */ + next_insn = NEXT_INSN (insn); + if (next_insn != NULL_RTX && INSN_P (next_insn) + && s390_non_addr_reg_read_p (*op1, next_insn)) + { if (prev_insn != NULL_RTX && INSN_P (prev_insn) - && reg_referenced_p (*op1, PATTERN (prev_insn))) - s390_swap_cmp (cond, op0, op1, insn); - - /* Check if there is a conflict with the next insn. If there - was no conflict with the previous insn, then swap the - COMPARE arguments and its mask. If we already swapped - the operands, or if swapping them would cause a conflict - with the previous insn, issue a NOP after the COMPARE in - order to separate the two instuctions. */ - next_insn = NEXT_INSN (insn); - if (next_insn != NULL_RTX && INSN_P (next_insn) - && s390_non_addr_reg_read_p (*op1, next_insn)) + && s390_non_addr_reg_read_p (*op0, prev_insn)) { - if (prev_insn != NULL_RTX && INSN_P (prev_insn) - && s390_non_addr_reg_read_p (*op0, prev_insn)) - { - if (REGNO (*op1) == 0) - emit_insn_after (gen_nop1 (), insn); - else - emit_insn_after (gen_nop (), insn); - added_NOPs = 1; - } + if (REGNO (*op1) == 0) + emit_insn_after (gen_nop1 (), insn); else - s390_swap_cmp (cond, op0, op1, insn); + emit_insn_after (gen_nop (), insn); + insn_added_p = true; } + else + s390_swap_cmp (cond, op0, op1, insn); } - - /* Adjust branches if we added new instructions. */ - if (added_NOPs) - shorten_branches (get_insns ()); + return insn_added_p; } - /* Perform machine-dependent processing. */ static void @@ -9944,10 +9998,33 @@ s390_reorg (void) /* Try to optimize prologue and epilogue further. */ s390_optimize_prologue (); - /* Eliminate z10-specific pipeline recycles related to some compare - instructions. */ + /* Walk over the insns and do some z10 specific changes. */ if (s390_tune == PROCESSOR_2097_Z10) - s390_z10_optimize_cmp (); + { + rtx insn; + bool insn_added_p = false; + + /* The insn lengths and addresses have to be up to date for the + following manipulations. */ + shorten_branches (get_insns ()); + + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + if (!INSN_P (insn) || INSN_CODE (insn) <= 0) + continue; + + if (JUMP_P (insn)) + insn_added_p |= s390_z10_fix_long_loop_prediction (insn); + + if (GET_CODE (PATTERN (insn)) == PARALLEL + || GET_CODE (PATTERN (insn)) == SET) + insn_added_p |= s390_z10_optimize_cmp (insn); + } + + /* Adjust branches if we added new instructions. */ + if (insn_added_p) + shorten_branches (get_insns ()); + } } diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md index 7898dc5..0a4361f 100644 --- a/gcc/config/s390/s390.md +++ b/gcc/config/s390/s390.md @@ -1046,6 +1046,64 @@ (const_int 6) (const_int 12)))]) ; 8 byte for clr/jg ; 10 byte for clgr/jg +; And now the same two patterns as above but with a negated CC mask. + +; cij, cgij, crj, cgrj, cfi, cgfi, cr, cgr +; The following instructions do a complementary access of their second +; operand (z01 only): crj_c, cgrjc, cr, cgr +(define_insn "*icmp_and_br_signed_" + [(set (pc) + (if_then_else (match_operator 0 "s390_signed_integer_comparison" + [(match_operand:GPR 1 "register_operand" "d,d") + (match_operand:GPR 2 "nonmemory_operand" "d,C")]) + (pc) + (label_ref (match_operand 3 "" "")))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_Z10" +{ + if (get_attr_length (insn) == 6) + return which_alternative ? + "cij%D0\t%1,%c2,%l3" : "crj%D0\t%1,%2,%l3"; + else + return which_alternative ? + "cfi\t%1,%c2\;jg%D0\t%l3" : "cr\t%1,%2\;jg%D0\t%l3"; +} + [(set_attr "op_type" "RIE") + (set_attr "type" "branch") + (set_attr "z10prop" "z10_super_c,z10_super") + (set (attr "length") + (if_then_else (lt (abs (minus (pc) (match_dup 3))) (const_int 60000)) + (const_int 6) (const_int 12)))]) ; 8 byte for cr/jg + ; 10 byte for cgr/jg + +; clij, clgij, clrj, clgrj, clfi, clgfi, clr, clgr +; The following instructions do a complementary access of their second +; operand (z10 only): clrj, clgrj, clr, clgr +(define_insn "*icmp_and_br_unsigned_" + [(set (pc) + (if_then_else (match_operator 0 "s390_unsigned_integer_comparison" + [(match_operand:GPR 1 "register_operand" "d,d") + (match_operand:GPR 2 "nonmemory_operand" "d,I")]) + (pc) + (label_ref (match_operand 3 "" "")))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_Z10" +{ + if (get_attr_length (insn) == 6) + return which_alternative ? + "clij%D0\t%1,%b2,%l3" : "clrj%D0\t%1,%2,%l3"; + else + return which_alternative ? + "clfi\t%1,%b2\;jg%D0\t%l3" : "clr\t%1,%2\;jg%D0\t%l3"; +} + [(set_attr "op_type" "RIE") + (set_attr "type" "branch") + (set_attr "z10prop" "z10_super_c,z10_super") + (set (attr "length") + (if_then_else (lt (abs (minus (pc) (match_dup 3))) (const_int 60000)) + (const_int 6) (const_int 12)))]) ; 8 byte for clr/jg + ; 10 byte for clgr/jg + ;; ;;- Move instructions. ;; -- 2.7.4