From 01b1efaa1439e6cbf44566354dcce9a96d5c6f5e Mon Sep 17 00:00:00 2001 From: Vladimir Makarov Date: Fri, 25 Oct 2013 21:06:38 +0000 Subject: [PATCH] 2013-10-25 Vladimir Makarov * config/rs6000/rs6000-protos.h (rs6000_secondary_memory_needed_mode): New prototype. * config/rs6000/rs6000.c: Include ira.h. (TARGET_LRA_P): Redefine. (rs6000_legitimate_offset_address_p): Call legitimate_constant_pool_address_p in strict mode for LRA. (rs6000_legitimate_address_p): Ditto. (legitimate_lo_sum_address_p): Add code for LRA. Use lra_in_progress. (rs6000_emit_move): Add LRA version of code to generate load/store of SDmode values. (rs6000_secondary_memory_needed_mode): New. (rs6000_alloc_sdmode_stack_slot): Do nothing for LRA. (rs6000_secondary_reload_class): Return NO_REGS for LRA for constants, memory, and FP registers. (rs6000_lra_p): New. * config/rs6000/rs6000.h (SECONDARY_MEMORY_NEEDED_MODE): New macro. * config/rs6000/rs6000.opt (mlra): New option. * lra-spills.c (lra_final_code_change): Remove useless move insns. From-SVN: r204079 --- gcc/ChangeLog | 23 ++++++++ gcc/config/rs6000/rs6000-protos.h | 2 + gcc/config/rs6000/rs6000.c | 114 ++++++++++++++++++++++++++++++++++++-- gcc/config/rs6000/rs6000.h | 7 +++ gcc/config/rs6000/rs6000.opt | 4 ++ gcc/lra-spills.c | 16 +++++- 6 files changed, 159 insertions(+), 7 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 8b5eec8..d120771 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,26 @@ +2013-10-25 Vladimir Makarov + + * config/rs6000/rs6000-protos.h + (rs6000_secondary_memory_needed_mode): New prototype. + * config/rs6000/rs6000.c: Include ira.h. + (TARGET_LRA_P): Redefine. + (rs6000_legitimate_offset_address_p): Call + legitimate_constant_pool_address_p in strict mode for LRA. + (rs6000_legitimate_address_p): Ditto. + (legitimate_lo_sum_address_p): Add code for LRA. + Use lra_in_progress. + (rs6000_emit_move): Add LRA version of code to generate load/store + of SDmode values. + (rs6000_secondary_memory_needed_mode): New. + (rs6000_alloc_sdmode_stack_slot): Do nothing for LRA. + (rs6000_secondary_reload_class): Return NO_REGS for LRA for + constants, memory, and FP registers. + (rs6000_lra_p): New. + * config/rs6000/rs6000.h (SECONDARY_MEMORY_NEEDED_MODE): New + macro. + * config/rs6000/rs6000.opt (mlra): New option. + * lra-spills.c (lra_final_code_change): Remove useless move insns. + 2013-10-25 Yufeng Zhang * tree-ssa-math-opts.c (convert_plusminus_to_widen): Call diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 1fc8903..c35e44d 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -126,6 +126,8 @@ extern void rs6000_split_multireg_move (rtx, rtx); extern void rs6000_emit_le_vsx_move (rtx, rtx, enum machine_mode); extern void rs6000_emit_move (rtx, rtx, enum machine_mode); extern rtx rs6000_secondary_memory_needed_rtx (enum machine_mode); +extern enum machine_mode rs6000_secondary_memory_needed_mode (enum + machine_mode); extern rtx (*rs6000_legitimize_reload_address_ptr) (rtx, enum machine_mode, int, int, int, int *); extern bool rs6000_legitimate_offset_address_p (enum machine_mode, rtx, diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index afd6db2..1006eec 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -55,6 +55,7 @@ #include "intl.h" #include "params.h" #include "tm-constrs.h" +#include "ira.h" #include "opts.h" #include "tree-vectorizer.h" #include "dumpfile.h" @@ -1554,6 +1555,9 @@ static const struct attribute_spec rs6000_attribute_table[] = #undef TARGET_MODE_DEPENDENT_ADDRESS_P #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p +#undef TARGET_LRA_P +#define TARGET_LRA_P rs6000_lra_p + #undef TARGET_CAN_ELIMINATE #define TARGET_CAN_ELIMINATE rs6000_can_eliminate @@ -6226,7 +6230,7 @@ rs6000_legitimate_offset_address_p (enum machine_mode mode, rtx x, return false; if (!reg_offset_addressing_ok_p (mode)) return virtual_stack_registers_memory_p (x); - if (legitimate_constant_pool_address_p (x, mode, strict)) + if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress)) return true; if (GET_CODE (XEXP (x, 1)) != CONST_INT) return false; @@ -6366,19 +6370,31 @@ legitimate_lo_sum_address_p (enum machine_mode mode, rtx x, int strict) if (TARGET_ELF || TARGET_MACHO) { + bool large_toc_ok; + if (DEFAULT_ABI != ABI_AIX && DEFAULT_ABI != ABI_DARWIN && flag_pic) return false; - if (TARGET_TOC) + /* LRA don't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls + push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS + recognizes some LO_SUM addresses as valid although this + function says opposite. In most cases, LRA through different + transformations can generate correct code for address reloads. + It can not manage only some LO_SUM cases. So we need to add + code analogous to one in rs6000_legitimize_reload_address for + LOW_SUM here saying that some addresses are still valid. */ + large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL + && small_toc_ref (x, VOIDmode)); + if (TARGET_TOC && ! large_toc_ok) return false; if (GET_MODE_NUNITS (mode) != 1) return false; - if (GET_MODE_SIZE (mode) > UNITS_PER_WORD + if (! lra_in_progress && GET_MODE_SIZE (mode) > UNITS_PER_WORD && !(/* ??? Assume floating point reg based on mode? */ TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && (mode == DFmode || mode == DDmode))) return false; - return CONSTANT_P (x); + return CONSTANT_P (x) || large_toc_ok; } return false; @@ -7368,7 +7384,8 @@ rs6000_legitimate_address_p (enum machine_mode mode, rtx x, bool reg_ok_strict) if (reg_offset_p && legitimate_small_data_p (mode, x)) return 1; if (reg_offset_p - && legitimate_constant_pool_address_p (x, mode, reg_ok_strict)) + && legitimate_constant_pool_address_p (x, mode, + reg_ok_strict || lra_in_progress)) return 1; /* For TImode, if we have load/store quad and TImode in VSX registers, only allow register indirect addresses. This will allow the values to go in @@ -7654,6 +7671,7 @@ rs6000_conditional_register_usage (void) fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1; } } + /* Try to output insns to set TARGET equal to the constant C if it can be done in less than N insns. Do all computations in MODE. @@ -8058,6 +8076,68 @@ rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode) cfun->machine->sdmode_stack_slot = eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX); + + if (lra_in_progress + && mode == SDmode + && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER + && reg_preferred_class (REGNO (operands[0])) == NO_REGS + && (REG_P (operands[1]) + || (GET_CODE (operands[1]) == SUBREG + && REG_P (SUBREG_REG (operands[1]))))) + { + int regno = REGNO (GET_CODE (operands[1]) == SUBREG + ? SUBREG_REG (operands[1]) : operands[1]); + enum reg_class cl; + + if (regno >= FIRST_PSEUDO_REGISTER) + { + cl = reg_preferred_class (regno); + gcc_assert (cl != NO_REGS); + regno = ira_class_hard_regs[cl][0]; + } + if (FP_REGNO_P (regno)) + { + if (GET_MODE (operands[0]) != DDmode) + operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0); + emit_insn (gen_movsd_store (operands[0], operands[1])); + } + else if (INT_REGNO_P (regno)) + emit_insn (gen_movsd_hardfloat (operands[0], operands[1])); + else + gcc_unreachable(); + return; + } + if (lra_in_progress + && mode == SDmode + && (REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && REG_P (SUBREG_REG (operands[0])))) + && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER + && reg_preferred_class (REGNO (operands[1])) == NO_REGS) + { + int regno = REGNO (GET_CODE (operands[0]) == SUBREG + ? SUBREG_REG (operands[0]) : operands[0]); + enum reg_class cl; + + if (regno >= FIRST_PSEUDO_REGISTER) + { + cl = reg_preferred_class (regno); + gcc_assert (cl != NO_REGS); + regno = ira_class_hard_regs[cl][0]; + } + if (FP_REGNO_P (regno)) + { + if (GET_MODE (operands[1]) != DDmode) + operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0); + emit_insn (gen_movsd_load (operands[0], operands[1])); + } + else if (INT_REGNO_P (regno)) + emit_insn (gen_movsd_hardfloat (operands[0], operands[1])); + else + gcc_unreachable(); + return; + } + if (reload_in_progress && mode == SDmode && cfun->machine->sdmode_stack_slot != NULL_RTX @@ -14905,6 +14985,17 @@ rs6000_secondary_memory_needed_rtx (enum machine_mode mode) return ret; } +/* Return the mode to be used for memory when a secondary memory + location is needed. For SDmode values we need to use DDmode, in + all other cases we can use the same mode. */ +enum machine_mode +rs6000_secondary_memory_needed_mode (enum machine_mode mode) +{ + if (mode == SDmode) + return DDmode; + return mode; +} + static tree rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED) { @@ -15798,6 +15889,10 @@ rs6000_alloc_sdmode_stack_slot (void) gimple_stmt_iterator gsi; gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX); + /* We use a different approach for dealing with the secondary + memory in LRA. */ + if (ira_use_lra_p) + return; if (TARGET_NO_SDMODE_STACK) return; @@ -16019,7 +16114,7 @@ rs6000_secondary_reload_class (enum reg_class rclass, enum machine_mode mode, /* Constants, memory, and FP registers can go into FP registers. */ if ((regno == -1 || FP_REGNO_P (regno)) && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS)) - return (mode != SDmode) ? NO_REGS : GENERAL_REGS; + return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS; /* Memory, and FP/altivec registers can go into fp/altivec registers under VSX. However, for scalar variables, use the traditional floating point @@ -29364,6 +29459,13 @@ rs6000_libcall_value (enum machine_mode mode) } +/* Return true if we use LRA instead of reload pass. */ +static bool +rs6000_lra_p (void) +{ + return rs6000_lra_flag; +} + /* Given FROM and TO register numbers, say whether this elimination is allowed. Frame pointer elimination is automatically handled. diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index 745437e..f13951e 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -1488,6 +1488,13 @@ extern enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX]; #define SECONDARY_MEMORY_NEEDED_RTX(MODE) \ rs6000_secondary_memory_needed_rtx (MODE) +/* Specify the mode to be used for memory when a secondary memory + location is needed. For cpus that cannot load/store SDmode values + from the 64-bit FP registers without using a full 64-bit + load/store, we need a wider mode. */ +#define SECONDARY_MEMORY_NEEDED_MODE(MODE) \ + rs6000_secondary_memory_needed_mode (MODE) + /* Return the maximum number of consecutive registers needed to represent mode MODE in a register of class CLASS. diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index c3f9c2d..94e4b38 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -449,6 +449,10 @@ mlong-double- Target RejectNegative Joined UInteger Var(rs6000_long_double_type_size) Save -mlong-double- Specify size of long double (64 or 128 bits) +mlra +Target Report Var(rs6000_lra_flag) Init(0) Save +Use LRA instead of reload + msched-costly-dep= Target RejectNegative Joined Var(rs6000_sched_costly_dep_str) Determine which dependences between insns are considered costly diff --git a/gcc/lra-spills.c b/gcc/lra-spills.c index 7c0c630..73a90a8 100644 --- a/gcc/lra-spills.c +++ b/gcc/lra-spills.c @@ -625,7 +625,7 @@ lra_final_code_change (void) { int i, hard_regno; basic_block bb; - rtx insn, curr; + rtx insn, curr, set; int max_regno = max_reg_num (); for (i = FIRST_PSEUDO_REGISTER; i < max_regno; i++) @@ -661,5 +661,19 @@ lra_final_code_change (void) } if (insn_change_p) lra_update_operator_dups (id); + + if ((set = single_set (insn)) != NULL + && REG_P (SET_SRC (set)) && REG_P (SET_DEST (set)) + && REGNO (SET_SRC (set)) == REGNO (SET_DEST (set))) + { + /* Remove an useless move insn. IRA can generate move + insns involving pseudos. It is better remove them + earlier to speed up compiler a bit. It is also + better to do it here as they might not pass final RTL + check in LRA, (e.g. insn moving a control register + into itself). */ + lra_invalidate_insn_data (insn); + delete_insn (insn); + } } } -- 2.7.4