From d37c3c6271c02adcc159e84630e33de8ee8471b7 Mon Sep 17 00:00:00 2001 From: Maxim Kuvyrkov Date: Tue, 27 Jul 2010 19:44:51 +0000 Subject: [PATCH] re PR target/42495 (redundant memory load) PR target/42495 PR middle-end/42574 * config/arm/arm.c (legitimize_pic_address): Use gen_calculate_pic_address pattern to emit calculation of PIC address. (will_be_in_index_register): New function. (arm_legitimate_address_outer_p, thumb2_legitimate_address_p,) (thumb1_legitimate_address_p): Use it provided !strict_p. * config/arm/arm.md (calculate_pic_address): New expand and split. From-SVN: r162595 --- gcc/ChangeLog | 11 +++++++++++ gcc/config/arm/arm.c | 49 +++++++++++++++++++++++++++++++++---------------- gcc/config/arm/arm.md | 28 ++++++++++++++++++++++++++++ 3 files changed, 72 insertions(+), 16 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c06e7d5..0deef9c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -2,6 +2,17 @@ PR target/42495 PR middle-end/42574 + * config/arm/arm.c (legitimize_pic_address): Use + gen_calculate_pic_address pattern to emit calculation of PIC address. + (will_be_in_index_register): New function. + (arm_legitimate_address_outer_p, thumb2_legitimate_address_p,) + (thumb1_legitimate_address_p): Use it provided !strict_p. + * config/arm/arm.md (calculate_pic_address): New expand and split. + +2010-07-27 Maxim Kuvyrkov + + PR target/42495 + PR middle-end/42574 * config/arm/arm.c (thumb1_size_rtx_costs): Add cost for "J" constants. * config/arm/arm.md (define_split "J", define_split "K"): Make IRA/reload friendly. diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 03c1506..1146418 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -4977,17 +4977,13 @@ legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg) if (GET_CODE (orig) == SYMBOL_REF || GET_CODE (orig) == LABEL_REF) { - rtx pic_ref, address; rtx insn; if (reg == 0) { gcc_assert (can_create_pseudo_p ()); reg = gen_reg_rtx (Pmode); - address = gen_reg_rtx (Pmode); } - else - address = reg; /* VxWorks does not impose a fixed gap between segments; the run-time gap can be different from the object-file gap. We therefore can't @@ -5003,18 +4999,21 @@ legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg) insn = arm_pic_static_addr (orig, reg); else { + rtx pat; + rtx mem; + /* If this function doesn't have a pic register, create one now. */ require_pic_register (); - if (TARGET_32BIT) - emit_insn (gen_pic_load_addr_32bit (address, orig)); - else /* TARGET_THUMB1 */ - emit_insn (gen_pic_load_addr_thumb1 (address, orig)); + pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig); - pic_ref = gen_const_mem (Pmode, - gen_rtx_PLUS (Pmode, cfun->machine->pic_reg, - address)); - insn = emit_move_insn (reg, pic_ref); + /* Make the MEM as close to a constant as possible. */ + mem = SET_SRC (pat); + gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem)); + MEM_READONLY_P (mem) = 1; + MEM_NOTRAP_P (mem) = 1; + + insn = emit_insn (pat); } /* Put a REG_EQUAL note on this insn, so that it can be optimized @@ -5294,6 +5293,15 @@ pcrel_constant_p (rtx x) return FALSE; } +/* Return true if X will surely end up in an index register after next + splitting pass. */ +static bool +will_be_in_index_register (const_rtx x) +{ + /* arm.md: calculate_pic_address will split this into a register. */ + return GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_SYM; +} + /* Return nonzero if X is a valid ARM state address operand. */ int arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer, @@ -5351,8 +5359,9 @@ arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer, rtx xop1 = XEXP (x, 1); return ((arm_address_register_rtx_p (xop0, strict_p) - && GET_CODE(xop1) == CONST_INT - && arm_legitimate_index_p (mode, xop1, outer, strict_p)) + && ((GET_CODE(xop1) == CONST_INT + && arm_legitimate_index_p (mode, xop1, outer, strict_p)) + || (!strict_p && will_be_in_index_register (xop1)))) || (arm_address_register_rtx_p (xop1, strict_p) && arm_legitimate_index_p (mode, xop0, outer, strict_p))); } @@ -5438,7 +5447,8 @@ thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p) rtx xop1 = XEXP (x, 1); return ((arm_address_register_rtx_p (xop0, strict_p) - && thumb2_legitimate_index_p (mode, xop1, strict_p)) + && (thumb2_legitimate_index_p (mode, xop1, strict_p) + || (!strict_p && will_be_in_index_register (xop1)))) || (arm_address_register_rtx_p (xop1, strict_p) && thumb2_legitimate_index_p (mode, xop0, strict_p))); } @@ -5741,7 +5751,8 @@ thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p) && XEXP (x, 0) != frame_pointer_rtx && XEXP (x, 1) != frame_pointer_rtx && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p) - && thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)) + && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p) + || (!strict_p && will_be_in_index_register (XEXP (x, 1))))) return 1; /* REG+const has 5-7 bit offset for non-SP registers. */ @@ -7110,6 +7121,12 @@ arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, a single register, otherwise it costs one insn per word. */ if (REG_P (XEXP (x, 0))) *total = COSTS_N_INSNS (1); + else if (flag_pic + && GET_CODE (XEXP (x, 0)) == PLUS + && will_be_in_index_register (XEXP (XEXP (x, 0), 1))) + /* This will be split into two instructions. + See arm.md:calculate_pic_address. */ + *total = COSTS_N_INSNS (2); else *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); return true; diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index 5438b3c..885d6ed 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -5133,6 +5133,34 @@ ;; we use an unspec. The offset will be loaded from a constant pool entry, ;; since that is the only type of relocation we can use. +;; Wrap calculation of the whole PIC address in a single pattern for the +;; benefit of optimizers, particularly, PRE and HOIST. Calculation of +;; a PIC address involves two loads from memory, so we want to CSE it +;; as often as possible. +;; This pattern will be split into one of the pic_load_addr_* patterns +;; and a move after GCSE optimizations. +;; +;; Note: Update arm.c: legitimize_pic_address() when changing this pattern. +(define_expand "calculate_pic_address" + [(set (match_operand:SI 0 "register_operand" "") + (mem:SI (plus:SI (match_operand:SI 1 "register_operand" "") + (unspec:SI [(match_operand:SI 2 "" "")] + UNSPEC_PIC_SYM))))] + "flag_pic" +) + +;; Split calculate_pic_address into pic_load_addr_* and a move. +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (mem:SI (plus:SI (match_operand:SI 1 "register_operand" "") + (unspec:SI [(match_operand:SI 2 "" "")] + UNSPEC_PIC_SYM))))] + "flag_pic" + [(set (match_dup 3) (unspec:SI [(match_dup 2)] UNSPEC_PIC_SYM)) + (set (match_dup 0) (mem:SI (plus:SI (match_dup 1) (match_dup 3))))] + "operands[3] = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0];" +) + ;; The rather odd constraints on the following are to force reload to leave ;; the insn alone, and to force the minipool generation pass to then move ;; the GOT symbol to memory. -- 2.7.4