From 8261494856f9478e0393d5bd9be26961d16f266f Mon Sep 17 00:00:00 2001 From: Ramana Radhakrishnan Date: Fri, 14 Nov 2014 09:44:17 +0000 Subject: [PATCH] re PR target/63724 ([AArch64] Inefficient immediate expansion and hoisting.) Fix PR target/63724 2014-11-14 Ramana Radhakrishnan PR target/63724 * config/aarch64/aarch64.c (aarch64_expand_mov_immediate): Split out numerical immediate handling to... (aarch64_internal_mov_immediate): ...this. New. (aarch64_rtx_costs): Use aarch64_internal_mov_immediate. (aarch64_mov_operand_p): Relax predicate. * config/aarch64/aarch64.md (mov:GPI): Do not expand CONST_INTs. (*movsi_aarch64): Turn into define_insn_and_split and new alternative for 'n'. (*movdi_aarch64): Likewise. From-SVN: r217546 --- gcc/ChangeLog | 13 ++ gcc/config/aarch64/aarch64.c | 364 ++++++++++++++++++++++++------------------ gcc/config/aarch64/aarch64.md | 49 ++++-- 3 files changed, 259 insertions(+), 167 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 49e1b7b..4a20001 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,16 @@ +2014-11-14 Ramana Radhakrishnan + + PR target/63724 + * config/aarch64/aarch64.c (aarch64_expand_mov_immediate): Split out + numerical immediate handling to... + (aarch64_internal_mov_immediate): ...this. New. + (aarch64_rtx_costs): Use aarch64_internal_mov_immediate. + (aarch64_mov_operand_p): Relax predicate. + * config/aarch64/aarch64.md (mov:GPI): Do not expand CONST_INTs. + (*movsi_aarch64): Turn into define_insn_and_split and new alternative + for 'n'. + (*movdi_aarch64): Likewise. + 2014-11-14 Richard Biener * match.pd: Implement more binary patterns exercised by diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 07f75e1..92937bb 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -1046,10 +1046,10 @@ aarch64_add_offset (machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset) return plus_constant (mode, reg, offset); } -void -aarch64_expand_mov_immediate (rtx dest, rtx imm) +static int +aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate, + machine_mode mode) { - machine_mode mode = GET_MODE (dest); unsigned HOST_WIDE_INT mask; int i; bool first; @@ -1057,85 +1057,14 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) bool subtargets; rtx subtarget; int one_match, zero_match, first_not_ffff_match; - - gcc_assert (mode == SImode || mode == DImode); - - /* Check on what type of symbol it is. */ - if (GET_CODE (imm) == SYMBOL_REF - || GET_CODE (imm) == LABEL_REF - || GET_CODE (imm) == CONST) - { - rtx mem, base, offset; - enum aarch64_symbol_type sty; - - /* If we have (const (plus symbol offset)), separate out the offset - before we start classifying the symbol. */ - split_const (imm, &base, &offset); - - sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR); - switch (sty) - { - case SYMBOL_FORCE_TO_MEM: - if (offset != const0_rtx - && targetm.cannot_force_const_mem (mode, imm)) - { - gcc_assert (can_create_pseudo_p ()); - base = aarch64_force_temporary (mode, dest, base); - base = aarch64_add_offset (mode, NULL, base, INTVAL (offset)); - aarch64_emit_move (dest, base); - return; - } - mem = force_const_mem (ptr_mode, imm); - gcc_assert (mem); - if (mode != ptr_mode) - mem = gen_rtx_ZERO_EXTEND (mode, mem); - emit_insn (gen_rtx_SET (VOIDmode, dest, mem)); - return; - - case SYMBOL_SMALL_TLSGD: - case SYMBOL_SMALL_TLSDESC: - case SYMBOL_SMALL_GOTTPREL: - case SYMBOL_SMALL_GOT: - case SYMBOL_TINY_GOT: - if (offset != const0_rtx) - { - gcc_assert(can_create_pseudo_p ()); - base = aarch64_force_temporary (mode, dest, base); - base = aarch64_add_offset (mode, NULL, base, INTVAL (offset)); - aarch64_emit_move (dest, base); - return; - } - /* FALLTHRU */ - - case SYMBOL_SMALL_TPREL: - case SYMBOL_SMALL_ABSOLUTE: - case SYMBOL_TINY_ABSOLUTE: - aarch64_load_symref_appropriately (dest, imm, sty); - return; - - default: - gcc_unreachable (); - } - } + int num_insns = 0; if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode)) { - emit_insn (gen_rtx_SET (VOIDmode, dest, imm)); - return; - } - - if (!CONST_INT_P (imm)) - { - if (GET_CODE (imm) == HIGH) + if (generate) emit_insn (gen_rtx_SET (VOIDmode, dest, imm)); - else - { - rtx mem = force_const_mem (mode, imm); - gcc_assert (mem); - emit_insn (gen_rtx_SET (VOIDmode, dest, mem)); - } - - return; + num_insns++; + return num_insns; } if (mode == SImode) @@ -1143,10 +1072,15 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) /* We know we can't do this in 1 insn, and we must be able to do it in two; so don't mess around looking for sequences that don't buy us anything. */ - emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff))); - emit_insn (gen_insv_immsi (dest, GEN_INT (16), - GEN_INT ((INTVAL (imm) >> 16) & 0xffff))); - return; + if (generate) + { + emit_insn (gen_rtx_SET (VOIDmode, dest, + GEN_INT (INTVAL (imm) & 0xffff))); + emit_insn (gen_insv_immsi (dest, GEN_INT (16), + GEN_INT ((INTVAL (imm) >> 16) & 0xffff))); + } + num_insns += 2; + return num_insns; } /* Remaining cases are all for DImode. */ @@ -1176,11 +1110,15 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) { /* Set one of the quarters and then insert back into result. */ mask = 0xffffll << first_not_ffff_match; - emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask))); - emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match), - GEN_INT ((val >> first_not_ffff_match) - & 0xffff))); - return; + if (generate) + { + emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask))); + emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match), + GEN_INT ((val >> first_not_ffff_match) + & 0xffff))); + } + num_insns += 2; + return num_insns; } if (zero_match == 2) @@ -1193,42 +1131,55 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) if (aarch64_uimm12_shift (val - (val & mask))) { - subtarget = subtargets ? gen_reg_rtx (DImode) : dest; - - emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask))); - emit_insn (gen_adddi3 (dest, subtarget, - GEN_INT (val - (val & mask)))); - return; + if (generate) + { + subtarget = subtargets ? gen_reg_rtx (DImode) : dest; + emit_insn (gen_rtx_SET (VOIDmode, subtarget, + GEN_INT (val & mask))); + emit_insn (gen_adddi3 (dest, subtarget, + GEN_INT (val - (val & mask)))); + } + num_insns += 2; + return num_insns; } else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask)))) { - subtarget = subtargets ? gen_reg_rtx (DImode) : dest; - - emit_insn (gen_rtx_SET (VOIDmode, subtarget, - GEN_INT ((val + comp) & mask))); - emit_insn (gen_adddi3 (dest, subtarget, - GEN_INT (val - ((val + comp) & mask)))); - return; + if (generate) + { + subtarget = subtargets ? gen_reg_rtx (DImode) : dest; + emit_insn (gen_rtx_SET (VOIDmode, subtarget, + GEN_INT ((val + comp) & mask))); + emit_insn (gen_adddi3 (dest, subtarget, + GEN_INT (val - ((val + comp) & mask)))); + } + num_insns += 2; + return num_insns; } else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask))) { - subtarget = subtargets ? gen_reg_rtx (DImode) : dest; - - emit_insn (gen_rtx_SET (VOIDmode, subtarget, - GEN_INT ((val - comp) | ~mask))); - emit_insn (gen_adddi3 (dest, subtarget, - GEN_INT (val - ((val - comp) | ~mask)))); - return; + if (generate) + { + subtarget = subtargets ? gen_reg_rtx (DImode) : dest; + emit_insn (gen_rtx_SET (VOIDmode, subtarget, + GEN_INT ((val - comp) | ~mask))); + emit_insn (gen_adddi3 (dest, subtarget, + GEN_INT (val - ((val - comp) | ~mask)))); + } + num_insns += 2; + return num_insns; } else if (aarch64_uimm12_shift (-(val - (val | ~mask)))) { - subtarget = subtargets ? gen_reg_rtx (DImode) : dest; - - emit_insn (gen_rtx_SET (VOIDmode, subtarget, - GEN_INT (val | ~mask))); - emit_insn (gen_adddi3 (dest, subtarget, - GEN_INT (val - (val | ~mask)))); - return; + if (generate) + { + subtarget = subtargets ? gen_reg_rtx (DImode) : dest; + emit_insn (gen_rtx_SET (VOIDmode, subtarget, + GEN_INT (val | ~mask))); + emit_insn (gen_adddi3 (dest, subtarget, + GEN_INT (val - (val | ~mask)))); + } + num_insns += 2; + return num_insns; } } @@ -1242,23 +1193,31 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) if (aarch64_uimm12_shift (val - aarch64_bitmasks[i]) || aarch64_uimm12_shift (-val + aarch64_bitmasks[i])) { - subtarget = subtargets ? gen_reg_rtx (DImode) : dest; - emit_insn (gen_rtx_SET (VOIDmode, subtarget, - GEN_INT (aarch64_bitmasks[i]))); - emit_insn (gen_adddi3 (dest, subtarget, - GEN_INT (val - aarch64_bitmasks[i]))); - return; + if (generate) + { + subtarget = subtargets ? gen_reg_rtx (DImode) : dest; + emit_insn (gen_rtx_SET (VOIDmode, subtarget, + GEN_INT (aarch64_bitmasks[i]))); + emit_insn (gen_adddi3 (dest, subtarget, + GEN_INT (val - aarch64_bitmasks[i]))); + } + num_insns += 2; + return num_insns; } for (j = 0; j < 64; j += 16, mask <<= 16) { if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask)) { - emit_insn (gen_rtx_SET (VOIDmode, dest, - GEN_INT (aarch64_bitmasks[i]))); - emit_insn (gen_insv_immdi (dest, GEN_INT (j), - GEN_INT ((val >> j) & 0xffff))); - return; + if (generate) + { + emit_insn (gen_rtx_SET (VOIDmode, dest, + GEN_INT (aarch64_bitmasks[i]))); + emit_insn (gen_insv_immdi (dest, GEN_INT (j), + GEN_INT ((val >> j) & 0xffff))); + } + num_insns += 2; + return num_insns; } } } @@ -1273,12 +1232,16 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++) if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j])) { - subtarget = subtargets ? gen_reg_rtx (mode) : dest; - emit_insn (gen_rtx_SET (VOIDmode, subtarget, - GEN_INT (aarch64_bitmasks[i]))); - emit_insn (gen_iordi3 (dest, subtarget, - GEN_INT (aarch64_bitmasks[j]))); - return; + if (generate) + { + subtarget = subtargets ? gen_reg_rtx (mode) : dest; + emit_insn (gen_rtx_SET (VOIDmode, subtarget, + GEN_INT (aarch64_bitmasks[i]))); + emit_insn (gen_iordi3 (dest, subtarget, + GEN_INT (aarch64_bitmasks[j]))); + } + num_insns += 2; + return num_insns; } } else if ((val & aarch64_bitmasks[i]) == val) @@ -1288,13 +1251,16 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++) if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i])) { - - subtarget = subtargets ? gen_reg_rtx (mode) : dest; - emit_insn (gen_rtx_SET (VOIDmode, subtarget, - GEN_INT (aarch64_bitmasks[j]))); - emit_insn (gen_anddi3 (dest, subtarget, - GEN_INT (aarch64_bitmasks[i]))); - return; + if (generate) + { + subtarget = subtargets ? gen_reg_rtx (mode) : dest; + emit_insn (gen_rtx_SET (VOIDmode, subtarget, + GEN_INT (aarch64_bitmasks[j]))); + emit_insn (gen_anddi3 (dest, subtarget, + GEN_INT (aarch64_bitmasks[i]))); + } + num_insns += 2; + return num_insns; } } } @@ -1303,18 +1269,24 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) { /* Set either first three quarters or all but the third. */ mask = 0xffffll << (16 - first_not_ffff_match); - emit_insn (gen_rtx_SET (VOIDmode, dest, - GEN_INT (val | mask | 0xffffffff00000000ull))); + if (generate) + emit_insn (gen_rtx_SET (VOIDmode, dest, + GEN_INT (val | mask | 0xffffffff00000000ull))); + num_insns ++; /* Now insert other two quarters. */ for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1); i < 64; i += 16, mask <<= 16) { if ((val & mask) != mask) - emit_insn (gen_insv_immdi (dest, GEN_INT (i), - GEN_INT ((val >> i) & 0xffff))); + { + if (generate) + emit_insn (gen_insv_immdi (dest, GEN_INT (i), + GEN_INT ((val >> i) & 0xffff))); + num_insns ++; + } } - return; + return num_insns; } simple_sequence: @@ -1326,15 +1298,106 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) { if (first) { - emit_insn (gen_rtx_SET (VOIDmode, dest, - GEN_INT (val & mask))); + if (generate) + emit_insn (gen_rtx_SET (VOIDmode, dest, + GEN_INT (val & mask))); + num_insns ++; first = false; } else - emit_insn (gen_insv_immdi (dest, GEN_INT (i), - GEN_INT ((val >> i) & 0xffff))); + { + if (generate) + emit_insn (gen_insv_immdi (dest, GEN_INT (i), + GEN_INT ((val >> i) & 0xffff))); + num_insns ++; + } + } + } + + return num_insns; +} + + +void +aarch64_expand_mov_immediate (rtx dest, rtx imm) +{ + machine_mode mode = GET_MODE (dest); + + gcc_assert (mode == SImode || mode == DImode); + + /* Check on what type of symbol it is. */ + if (GET_CODE (imm) == SYMBOL_REF + || GET_CODE (imm) == LABEL_REF + || GET_CODE (imm) == CONST) + { + rtx mem, base, offset; + enum aarch64_symbol_type sty; + + /* If we have (const (plus symbol offset)), separate out the offset + before we start classifying the symbol. */ + split_const (imm, &base, &offset); + + sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR); + switch (sty) + { + case SYMBOL_FORCE_TO_MEM: + if (offset != const0_rtx + && targetm.cannot_force_const_mem (mode, imm)) + { + gcc_assert (can_create_pseudo_p ()); + base = aarch64_force_temporary (mode, dest, base); + base = aarch64_add_offset (mode, NULL, base, INTVAL (offset)); + aarch64_emit_move (dest, base); + return; + } + mem = force_const_mem (ptr_mode, imm); + gcc_assert (mem); + if (mode != ptr_mode) + mem = gen_rtx_ZERO_EXTEND (mode, mem); + emit_insn (gen_rtx_SET (VOIDmode, dest, mem)); + return; + + case SYMBOL_SMALL_TLSGD: + case SYMBOL_SMALL_TLSDESC: + case SYMBOL_SMALL_GOTTPREL: + case SYMBOL_SMALL_GOT: + case SYMBOL_TINY_GOT: + if (offset != const0_rtx) + { + gcc_assert(can_create_pseudo_p ()); + base = aarch64_force_temporary (mode, dest, base); + base = aarch64_add_offset (mode, NULL, base, INTVAL (offset)); + aarch64_emit_move (dest, base); + return; + } + /* FALLTHRU */ + + case SYMBOL_SMALL_TPREL: + case SYMBOL_SMALL_ABSOLUTE: + case SYMBOL_TINY_ABSOLUTE: + aarch64_load_symref_appropriately (dest, imm, sty); + return; + + default: + gcc_unreachable (); + } + } + + if (!CONST_INT_P (imm)) + { + if (GET_CODE (imm) == HIGH) + emit_insn (gen_rtx_SET (VOIDmode, dest, imm)); + else + { + rtx mem = force_const_mem (mode, imm); + gcc_assert (mem); + emit_insn (gen_rtx_SET (VOIDmode, dest, mem)); } + + return; } + + aarch64_internal_mov_immediate (dest, imm, true, GET_MODE (dest)); } static bool @@ -5240,9 +5303,8 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, proportionally expensive to the number of instructions required to build that constant. This is true whether we are compiling for SPEED or otherwise. */ - *cost = COSTS_N_INSNS (aarch64_build_constant (0, - INTVAL (x), - false)); + *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate + (NULL_RTX, x, false, mode)); } return true; @@ -8041,7 +8103,7 @@ aarch64_mov_operand_p (rtx x, && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0)))) return true; - if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode)) + if (CONST_INT_P (x)) return true; if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x)) diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 17570ba..142e8b1 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -746,17 +746,20 @@ if (GET_CODE (operands[0]) == MEM && operands[1] != const0_rtx) operands[1] = force_reg (mode, operands[1]); - if (CONSTANT_P (operands[1])) - { - aarch64_expand_mov_immediate (operands[0], operands[1]); - DONE; - } + /* FIXME: RR we still need to fix up what we are doing with + symbol_refs and other types of constants. */ + if (CONSTANT_P (operands[1]) + && !CONST_INT_P (operands[1])) + { + aarch64_expand_mov_immediate (operands[0], operands[1]); + DONE; + } " ) -(define_insn "*movsi_aarch64" - [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,*w,m, m,r,r ,*w, r,*w") - (match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,m, m,rZ,*w,S,Ush,rZ,*w,*w"))] +(define_insn_and_split "*movsi_aarch64" + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m, m,r,r ,*w, r,*w") + (match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,n,m, m,rZ,*w,S,Ush,rZ,*w,*w"))] "(register_operand (operands[0], SImode) || aarch64_reg_or_zero (operands[1], SImode))" "@ @@ -764,6 +767,7 @@ mov\\t%w0, %w1 mov\\t%w0, %w1 mov\\t%w0, %1 + # ldr\\t%w0, %1 ldr\\t%s0, %1 str\\t%w1, %0 @@ -773,14 +777,20 @@ fmov\\t%s0, %w1 fmov\\t%w0, %s1 fmov\\t%s0, %s1" - [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,load1,load1,store1,store1,\ + "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode)" + [(const_int 0)] + "{ + aarch64_expand_mov_immediate (operands[0], operands[1]); + DONE; + }" + [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,\ adr,adr,f_mcr,f_mrc,fmov") - (set_attr "fp" "*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes")] + (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes")] ) -(define_insn "*movdi_aarch64" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,*w,m, m,r,r, *w, r,*w,w") - (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,m, m,rZ,*w,S,Ush,rZ,*w,*w,Dd"))] +(define_insn_and_split "*movdi_aarch64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m, m,r,r, *w, r,*w,w") + (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,n,m, m,rZ,*w,S,Ush,rZ,*w,*w,Dd"))] "(register_operand (operands[0], DImode) || aarch64_reg_or_zero (operands[1], DImode))" "@ @@ -788,6 +798,7 @@ mov\\t%0, %x1 mov\\t%x0, %1 mov\\t%x0, %1 + # ldr\\t%x0, %1 ldr\\t%d0, %1 str\\t%x1, %0 @@ -798,10 +809,16 @@ fmov\\t%x0, %d1 fmov\\t%d0, %d1 movi\\t%d0, %1" - [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,load1,load1,store1,store1,\ + "(CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), DImode))" + [(const_int 0)] + "{ + aarch64_expand_mov_immediate (operands[0], operands[1]); + DONE; + }" + [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,\ adr,adr,f_mcr,f_mrc,fmov,fmov") - (set_attr "fp" "*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*") - (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,yes")] + (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*") + (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")] ) (define_insn "insv_imm" -- 2.7.4