From 33eb364e159226c13a421320d67c1f8e1a8a15ac Mon Sep 17 00:00:00 2001 From: yroux Date: Sun, 11 Jan 2015 18:36:42 +0000 Subject: [PATCH] gcc/ 2015-01-11 Yvan Roux Backport from trunk r217362, r217546. 2014-11-14 Ramana Radhakrishnan PR target/63724 * config/aarch64/aarch64.c (aarch64_expand_mov_immediate): Split out numerical immediate handling to... (aarch64_internal_mov_immediate): ...this. New. (aarch64_rtx_costs): Use aarch64_internal_mov_immediate. (aarch64_mov_operand_p): Relax predicate. * config/aarch64/aarch64.md (mov:GPI): Do not expand CONST_INTs. (*movsi_aarch64): Turn into define_insn_and_split and new alternative for 'n'. (*movdi_aarch64): Likewise. 2014-11-11 James Greenhalgh * config/aarch64/aarch64-simd.md (aarch64_simd_bsl_internal): Remove float cases, canonicalize. (aarch64_simd_bsl): Add gen_lowpart expressions where we are punning between float vectors and integer vectors. gcc/testsuite 2015-01-11 Yvan Roux Backport from trunk r217362. 2014-11-11 James Greenhalgh * gcc.target/aarch64/vbslq_f64_1.c: New. * gcc.target/aarch64/vbslq_f64_2.c: Likewise. * gcc.target/aarch64/vbslq_u64_1.c: Likewise. * gcc.target/aarch64/vbslq_u64_2.c: Likewise. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/linaro/gcc-4_9-branch@219433 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog.linaro | 23 ++ gcc/config/aarch64/aarch64-simd.md | 32 ++- gcc/config/aarch64/aarch64.c | 364 +++++++++++++++---------- gcc/config/aarch64/aarch64.md | 49 ++-- gcc/testsuite/ChangeLog.linaro | 10 + gcc/testsuite/gcc.target/aarch64/vbslq_f64_1.c | 21 ++ gcc/testsuite/gcc.target/aarch64/vbslq_f64_2.c | 24 ++ gcc/testsuite/gcc.target/aarch64/vbslq_u64_1.c | 17 ++ gcc/testsuite/gcc.target/aarch64/vbslq_u64_2.c | 22 ++ 9 files changed, 385 insertions(+), 177 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/vbslq_f64_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/vbslq_f64_2.c create mode 100644 gcc/testsuite/gcc.target/aarch64/vbslq_u64_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/vbslq_u64_2.c diff --git a/gcc/ChangeLog.linaro b/gcc/ChangeLog.linaro index ed15c9d..e9778eb 100644 --- a/gcc/ChangeLog.linaro +++ b/gcc/ChangeLog.linaro @@ -1,3 +1,26 @@ +2015-01-11 Yvan Roux + + Backport from trunk r217362, r217546. + 2014-11-14 Ramana Radhakrishnan + + PR target/63724 + * config/aarch64/aarch64.c (aarch64_expand_mov_immediate): Split out + numerical immediate handling to... + (aarch64_internal_mov_immediate): ...this. New. + (aarch64_rtx_costs): Use aarch64_internal_mov_immediate. + (aarch64_mov_operand_p): Relax predicate. + * config/aarch64/aarch64.md (mov:GPI): Do not expand CONST_INTs. + (*movsi_aarch64): Turn into define_insn_and_split and new alternative + for 'n'. + (*movdi_aarch64): Likewise. + + 2014-11-11 James Greenhalgh + + * config/aarch64/aarch64-simd.md + (aarch64_simd_bsl_internal): Remove float cases, canonicalize. + (aarch64_simd_bsl): Add gen_lowpart expressions where we + are punning between float vectors and integer vectors. + 2014-12-11 Yvan Roux * LINARO-VERSION: Bump version. diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 51324e0..566f3db 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1870,15 +1870,15 @@ ;; bif op0, op1, mask (define_insn "aarch64_simd_bsl_internal" - [(set (match_operand:VALLDIF 0 "register_operand" "=w,w,w") - (ior:VALLDIF - (and:VALLDIF - (match_operand: 1 "register_operand" " 0,w,w") - (match_operand:VALLDIF 2 "register_operand" " w,w,0")) - (and:VALLDIF + [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w") + (ior:VSDQ_I_DI + (and:VSDQ_I_DI (not: - (match_dup: 1)) - (match_operand:VALLDIF 3 "register_operand" " w,0,w")) + (match_operand: 1 "register_operand" " 0,w,w")) + (match_operand:VSDQ_I_DI 3 "register_operand" " w,0,w")) + (and:VSDQ_I_DI + (match_dup: 1) + (match_operand:VSDQ_I_DI 2 "register_operand" " w,w,0")) ))] "TARGET_SIMD" "@ @@ -1896,9 +1896,21 @@ "TARGET_SIMD" { /* We can't alias operands together if they have different modes. */ + rtx tmp = operands[0]; + if (FLOAT_MODE_P (mode)) + { + operands[2] = gen_lowpart (mode, operands[2]); + operands[3] = gen_lowpart (mode, operands[3]); + tmp = gen_reg_rtx (mode); + } operands[1] = gen_lowpart (mode, operands[1]); - emit_insn (gen_aarch64_simd_bsl_internal (operands[0], operands[1], - operands[2], operands[3])); + emit_insn (gen_aarch64_simd_bsl_internal (tmp, + operands[1], + operands[2], + operands[3])); + if (tmp != operands[0]) + emit_move_insn (operands[0], gen_lowpart (mode, tmp)); + DONE; }) diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 04b3670..aa78638 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -1027,10 +1027,10 @@ aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT off return plus_constant (mode, reg, offset); } -void -aarch64_expand_mov_immediate (rtx dest, rtx imm) +static int +aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate, + machine_mode mode) { - enum machine_mode mode = GET_MODE (dest); unsigned HOST_WIDE_INT mask; int i; bool first; @@ -1038,85 +1038,14 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) bool subtargets; rtx subtarget; int one_match, zero_match, first_not_ffff_match; - - gcc_assert (mode == SImode || mode == DImode); - - /* Check on what type of symbol it is. */ - if (GET_CODE (imm) == SYMBOL_REF - || GET_CODE (imm) == LABEL_REF - || GET_CODE (imm) == CONST) - { - rtx mem, base, offset; - enum aarch64_symbol_type sty; - - /* If we have (const (plus symbol offset)), separate out the offset - before we start classifying the symbol. */ - split_const (imm, &base, &offset); - - sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR); - switch (sty) - { - case SYMBOL_FORCE_TO_MEM: - if (offset != const0_rtx - && targetm.cannot_force_const_mem (mode, imm)) - { - gcc_assert (can_create_pseudo_p ()); - base = aarch64_force_temporary (mode, dest, base); - base = aarch64_add_offset (mode, NULL, base, INTVAL (offset)); - aarch64_emit_move (dest, base); - return; - } - mem = force_const_mem (ptr_mode, imm); - gcc_assert (mem); - if (mode != ptr_mode) - mem = gen_rtx_ZERO_EXTEND (mode, mem); - emit_insn (gen_rtx_SET (VOIDmode, dest, mem)); - return; - - case SYMBOL_SMALL_TLSGD: - case SYMBOL_SMALL_TLSDESC: - case SYMBOL_SMALL_GOTTPREL: - case SYMBOL_SMALL_GOT: - case SYMBOL_TINY_GOT: - if (offset != const0_rtx) - { - gcc_assert(can_create_pseudo_p ()); - base = aarch64_force_temporary (mode, dest, base); - base = aarch64_add_offset (mode, NULL, base, INTVAL (offset)); - aarch64_emit_move (dest, base); - return; - } - /* FALLTHRU */ - - case SYMBOL_SMALL_TPREL: - case SYMBOL_SMALL_ABSOLUTE: - case SYMBOL_TINY_ABSOLUTE: - aarch64_load_symref_appropriately (dest, imm, sty); - return; - - default: - gcc_unreachable (); - } - } + int num_insns = 0; if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode)) { - emit_insn (gen_rtx_SET (VOIDmode, dest, imm)); - return; - } - - if (!CONST_INT_P (imm)) - { - if (GET_CODE (imm) == HIGH) + if (generate) emit_insn (gen_rtx_SET (VOIDmode, dest, imm)); - else - { - rtx mem = force_const_mem (mode, imm); - gcc_assert (mem); - emit_insn (gen_rtx_SET (VOIDmode, dest, mem)); - } - - return; + num_insns++; + return num_insns; } if (mode == SImode) @@ -1124,10 +1053,15 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) /* We know we can't do this in 1 insn, and we must be able to do it in two; so don't mess around looking for sequences that don't buy us anything. */ - emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff))); - emit_insn (gen_insv_immsi (dest, GEN_INT (16), - GEN_INT ((INTVAL (imm) >> 16) & 0xffff))); - return; + if (generate) + { + emit_insn (gen_rtx_SET (VOIDmode, dest, + GEN_INT (INTVAL (imm) & 0xffff))); + emit_insn (gen_insv_immsi (dest, GEN_INT (16), + GEN_INT ((INTVAL (imm) >> 16) & 0xffff))); + } + num_insns += 2; + return num_insns; } /* Remaining cases are all for DImode. */ @@ -1157,11 +1091,15 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) { /* Set one of the quarters and then insert back into result. */ mask = 0xffffll << first_not_ffff_match; - emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask))); - emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match), - GEN_INT ((val >> first_not_ffff_match) - & 0xffff))); - return; + if (generate) + { + emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask))); + emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match), + GEN_INT ((val >> first_not_ffff_match) + & 0xffff))); + } + num_insns += 2; + return num_insns; } if (zero_match == 2) @@ -1174,42 +1112,55 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) if (aarch64_uimm12_shift (val - (val & mask))) { - subtarget = subtargets ? gen_reg_rtx (DImode) : dest; - - emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask))); - emit_insn (gen_adddi3 (dest, subtarget, - GEN_INT (val - (val & mask)))); - return; + if (generate) + { + subtarget = subtargets ? gen_reg_rtx (DImode) : dest; + emit_insn (gen_rtx_SET (VOIDmode, subtarget, + GEN_INT (val & mask))); + emit_insn (gen_adddi3 (dest, subtarget, + GEN_INT (val - (val & mask)))); + } + num_insns += 2; + return num_insns; } else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask)))) { - subtarget = subtargets ? gen_reg_rtx (DImode) : dest; - - emit_insn (gen_rtx_SET (VOIDmode, subtarget, - GEN_INT ((val + comp) & mask))); - emit_insn (gen_adddi3 (dest, subtarget, - GEN_INT (val - ((val + comp) & mask)))); - return; + if (generate) + { + subtarget = subtargets ? gen_reg_rtx (DImode) : dest; + emit_insn (gen_rtx_SET (VOIDmode, subtarget, + GEN_INT ((val + comp) & mask))); + emit_insn (gen_adddi3 (dest, subtarget, + GEN_INT (val - ((val + comp) & mask)))); + } + num_insns += 2; + return num_insns; } else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask))) { - subtarget = subtargets ? gen_reg_rtx (DImode) : dest; - - emit_insn (gen_rtx_SET (VOIDmode, subtarget, - GEN_INT ((val - comp) | ~mask))); - emit_insn (gen_adddi3 (dest, subtarget, - GEN_INT (val - ((val - comp) | ~mask)))); - return; + if (generate) + { + subtarget = subtargets ? gen_reg_rtx (DImode) : dest; + emit_insn (gen_rtx_SET (VOIDmode, subtarget, + GEN_INT ((val - comp) | ~mask))); + emit_insn (gen_adddi3 (dest, subtarget, + GEN_INT (val - ((val - comp) | ~mask)))); + } + num_insns += 2; + return num_insns; } else if (aarch64_uimm12_shift (-(val - (val | ~mask)))) { - subtarget = subtargets ? gen_reg_rtx (DImode) : dest; - - emit_insn (gen_rtx_SET (VOIDmode, subtarget, - GEN_INT (val | ~mask))); - emit_insn (gen_adddi3 (dest, subtarget, - GEN_INT (val - (val | ~mask)))); - return; + if (generate) + { + subtarget = subtargets ? gen_reg_rtx (DImode) : dest; + emit_insn (gen_rtx_SET (VOIDmode, subtarget, + GEN_INT (val | ~mask))); + emit_insn (gen_adddi3 (dest, subtarget, + GEN_INT (val - (val | ~mask)))); + } + num_insns += 2; + return num_insns; } } @@ -1223,23 +1174,31 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) if (aarch64_uimm12_shift (val - aarch64_bitmasks[i]) || aarch64_uimm12_shift (-val + aarch64_bitmasks[i])) { - subtarget = subtargets ? gen_reg_rtx (DImode) : dest; - emit_insn (gen_rtx_SET (VOIDmode, subtarget, - GEN_INT (aarch64_bitmasks[i]))); - emit_insn (gen_adddi3 (dest, subtarget, - GEN_INT (val - aarch64_bitmasks[i]))); - return; + if (generate) + { + subtarget = subtargets ? gen_reg_rtx (DImode) : dest; + emit_insn (gen_rtx_SET (VOIDmode, subtarget, + GEN_INT (aarch64_bitmasks[i]))); + emit_insn (gen_adddi3 (dest, subtarget, + GEN_INT (val - aarch64_bitmasks[i]))); + } + num_insns += 2; + return num_insns; } for (j = 0; j < 64; j += 16, mask <<= 16) { if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask)) { - emit_insn (gen_rtx_SET (VOIDmode, dest, - GEN_INT (aarch64_bitmasks[i]))); - emit_insn (gen_insv_immdi (dest, GEN_INT (j), - GEN_INT ((val >> j) & 0xffff))); - return; + if (generate) + { + emit_insn (gen_rtx_SET (VOIDmode, dest, + GEN_INT (aarch64_bitmasks[i]))); + emit_insn (gen_insv_immdi (dest, GEN_INT (j), + GEN_INT ((val >> j) & 0xffff))); + } + num_insns += 2; + return num_insns; } } } @@ -1254,12 +1213,16 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++) if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j])) { - subtarget = subtargets ? gen_reg_rtx (mode) : dest; - emit_insn (gen_rtx_SET (VOIDmode, subtarget, - GEN_INT (aarch64_bitmasks[i]))); - emit_insn (gen_iordi3 (dest, subtarget, - GEN_INT (aarch64_bitmasks[j]))); - return; + if (generate) + { + subtarget = subtargets ? gen_reg_rtx (mode) : dest; + emit_insn (gen_rtx_SET (VOIDmode, subtarget, + GEN_INT (aarch64_bitmasks[i]))); + emit_insn (gen_iordi3 (dest, subtarget, + GEN_INT (aarch64_bitmasks[j]))); + } + num_insns += 2; + return num_insns; } } else if ((val & aarch64_bitmasks[i]) == val) @@ -1269,13 +1232,16 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++) if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i])) { - - subtarget = subtargets ? gen_reg_rtx (mode) : dest; - emit_insn (gen_rtx_SET (VOIDmode, subtarget, - GEN_INT (aarch64_bitmasks[j]))); - emit_insn (gen_anddi3 (dest, subtarget, - GEN_INT (aarch64_bitmasks[i]))); - return; + if (generate) + { + subtarget = subtargets ? gen_reg_rtx (mode) : dest; + emit_insn (gen_rtx_SET (VOIDmode, subtarget, + GEN_INT (aarch64_bitmasks[j]))); + emit_insn (gen_anddi3 (dest, subtarget, + GEN_INT (aarch64_bitmasks[i]))); + } + num_insns += 2; + return num_insns; } } } @@ -1284,18 +1250,24 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) { /* Set either first three quarters or all but the third. */ mask = 0xffffll << (16 - first_not_ffff_match); - emit_insn (gen_rtx_SET (VOIDmode, dest, - GEN_INT (val | mask | 0xffffffff00000000ull))); + if (generate) + emit_insn (gen_rtx_SET (VOIDmode, dest, + GEN_INT (val | mask | 0xffffffff00000000ull))); + num_insns ++; /* Now insert other two quarters. */ for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1); i < 64; i += 16, mask <<= 16) { if ((val & mask) != mask) - emit_insn (gen_insv_immdi (dest, GEN_INT (i), - GEN_INT ((val >> i) & 0xffff))); + { + if (generate) + emit_insn (gen_insv_immdi (dest, GEN_INT (i), + GEN_INT ((val >> i) & 0xffff))); + num_insns ++; + } } - return; + return num_insns; } simple_sequence: @@ -1307,15 +1279,106 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) { if (first) { - emit_insn (gen_rtx_SET (VOIDmode, dest, - GEN_INT (val & mask))); + if (generate) + emit_insn (gen_rtx_SET (VOIDmode, dest, + GEN_INT (val & mask))); + num_insns ++; first = false; } else - emit_insn (gen_insv_immdi (dest, GEN_INT (i), - GEN_INT ((val >> i) & 0xffff))); + { + if (generate) + emit_insn (gen_insv_immdi (dest, GEN_INT (i), + GEN_INT ((val >> i) & 0xffff))); + num_insns ++; + } } } + + return num_insns; +} + + +void +aarch64_expand_mov_immediate (rtx dest, rtx imm) +{ + machine_mode mode = GET_MODE (dest); + + gcc_assert (mode == SImode || mode == DImode); + + /* Check on what type of symbol it is. */ + if (GET_CODE (imm) == SYMBOL_REF + || GET_CODE (imm) == LABEL_REF + || GET_CODE (imm) == CONST) + { + rtx mem, base, offset; + enum aarch64_symbol_type sty; + + /* If we have (const (plus symbol offset)), separate out the offset + before we start classifying the symbol. */ + split_const (imm, &base, &offset); + + sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR); + switch (sty) + { + case SYMBOL_FORCE_TO_MEM: + if (offset != const0_rtx + && targetm.cannot_force_const_mem (mode, imm)) + { + gcc_assert (can_create_pseudo_p ()); + base = aarch64_force_temporary (mode, dest, base); + base = aarch64_add_offset (mode, NULL, base, INTVAL (offset)); + aarch64_emit_move (dest, base); + return; + } + mem = force_const_mem (ptr_mode, imm); + gcc_assert (mem); + if (mode != ptr_mode) + mem = gen_rtx_ZERO_EXTEND (mode, mem); + emit_insn (gen_rtx_SET (VOIDmode, dest, mem)); + return; + + case SYMBOL_SMALL_TLSGD: + case SYMBOL_SMALL_TLSDESC: + case SYMBOL_SMALL_GOTTPREL: + case SYMBOL_SMALL_GOT: + case SYMBOL_TINY_GOT: + if (offset != const0_rtx) + { + gcc_assert(can_create_pseudo_p ()); + base = aarch64_force_temporary (mode, dest, base); + base = aarch64_add_offset (mode, NULL, base, INTVAL (offset)); + aarch64_emit_move (dest, base); + return; + } + /* FALLTHRU */ + + case SYMBOL_SMALL_TPREL: + case SYMBOL_SMALL_ABSOLUTE: + case SYMBOL_TINY_ABSOLUTE: + aarch64_load_symref_appropriately (dest, imm, sty); + return; + + default: + gcc_unreachable (); + } + } + + if (!CONST_INT_P (imm)) + { + if (GET_CODE (imm) == HIGH) + emit_insn (gen_rtx_SET (VOIDmode, dest, imm)); + else + { + rtx mem = force_const_mem (mode, imm); + gcc_assert (mem); + emit_insn (gen_rtx_SET (VOIDmode, dest, mem)); + } + + return; + } + + aarch64_internal_mov_immediate (dest, imm, true, GET_MODE (dest)); } static bool @@ -5171,9 +5234,8 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, proportionally expensive to the number of instructions required to build that constant. This is true whether we are compiling for SPEED or otherwise. */ - *cost = COSTS_N_INSNS (aarch64_build_constant (0, - INTVAL (x), - false)); + *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate + (NULL_RTX, x, false, mode)); } return true; @@ -8009,7 +8071,7 @@ aarch64_mov_operand_p (rtx x, && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0)))) return true; - if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode)) + if (CONST_INT_P (x)) return true; if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x)) diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index c909466..512646d 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -672,17 +672,20 @@ if (GET_CODE (operands[0]) == MEM && operands[1] != const0_rtx) operands[1] = force_reg (mode, operands[1]); - if (CONSTANT_P (operands[1])) - { - aarch64_expand_mov_immediate (operands[0], operands[1]); - DONE; - } + /* FIXME: RR we still need to fix up what we are doing with + symbol_refs and other types of constants. */ + if (CONSTANT_P (operands[1]) + && !CONST_INT_P (operands[1])) + { + aarch64_expand_mov_immediate (operands[0], operands[1]); + DONE; + } " ) -(define_insn "*movsi_aarch64" - [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,*w,m, m,r,r ,*w, r,*w") - (match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,m, m,rZ,*w,S,Ush,rZ,*w,*w"))] +(define_insn_and_split "*movsi_aarch64" + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m, m,r,r ,*w, r,*w") + (match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,n,m, m,rZ,*w,S,Ush,rZ,*w,*w"))] "(register_operand (operands[0], SImode) || aarch64_reg_or_zero (operands[1], SImode))" "@ @@ -690,6 +693,7 @@ mov\\t%w0, %w1 mov\\t%w0, %w1 mov\\t%w0, %1 + # ldr\\t%w0, %1 ldr\\t%s0, %1 str\\t%w1, %0 @@ -699,14 +703,20 @@ fmov\\t%s0, %w1 fmov\\t%w0, %s1 fmov\\t%s0, %s1" - [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,load1,load1,store1,store1,\ + "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode)" + [(const_int 0)] + "{ + aarch64_expand_mov_immediate (operands[0], operands[1]); + DONE; + }" + [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,\ adr,adr,f_mcr,f_mrc,fmov") - (set_attr "fp" "*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes")] + (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes")] ) -(define_insn "*movdi_aarch64" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,*w,m, m,r,r, *w, r,*w,w") - (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,m, m,rZ,*w,S,Ush,rZ,*w,*w,Dd"))] +(define_insn_and_split "*movdi_aarch64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m, m,r,r, *w, r,*w,w") + (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,n,m, m,rZ,*w,S,Ush,rZ,*w,*w,Dd"))] "(register_operand (operands[0], DImode) || aarch64_reg_or_zero (operands[1], DImode))" "@ @@ -714,6 +724,7 @@ mov\\t%0, %x1 mov\\t%x0, %1 mov\\t%x0, %1 + # ldr\\t%x0, %1 ldr\\t%d0, %1 str\\t%x1, %0 @@ -724,10 +735,16 @@ fmov\\t%x0, %d1 fmov\\t%d0, %d1 movi\\t%d0, %1" - [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,load1,load1,store1,store1,\ + "(CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), DImode))" + [(const_int 0)] + "{ + aarch64_expand_mov_immediate (operands[0], operands[1]); + DONE; + }" + [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,\ adr,adr,f_mcr,f_mrc,fmov,fmov") - (set_attr "fp" "*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*") - (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,yes")] + (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*") + (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")] ) (define_insn "insv_imm" diff --git a/gcc/testsuite/ChangeLog.linaro b/gcc/testsuite/ChangeLog.linaro index fe80507..538c082 100644 --- a/gcc/testsuite/ChangeLog.linaro +++ b/gcc/testsuite/ChangeLog.linaro @@ -1,3 +1,13 @@ +2015-01-11 Yvan Roux + + Backport from trunk r217362. + 2014-11-11 James Greenhalgh + + * gcc.target/aarch64/vbslq_f64_1.c: New. + * gcc.target/aarch64/vbslq_f64_2.c: Likewise. + * gcc.target/aarch64/vbslq_u64_1.c: Likewise. + * gcc.target/aarch64/vbslq_u64_2.c: Likewise. + 2014-12-11 Yvan Roux GCC Linaro 4.9-2014.12 released. diff --git a/gcc/testsuite/gcc.target/aarch64/vbslq_f64_1.c b/gcc/testsuite/gcc.target/aarch64/vbslq_f64_1.c new file mode 100644 index 0000000..128a1db --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vbslq_f64_1.c @@ -0,0 +1,21 @@ +/* Test vbslq_f64 can be folded. */ +/* { dg-do assemble } */ +/* { dg-options "--save-temps -O3" } */ + +#include + +/* Folds to ret. */ + +float32x4_t +fold_me (float32x4_t a, float32x4_t b) +{ + uint32x4_t mask = {-1, -1, -1, -1}; + return vbslq_f32 (mask, a, b); +} + +/* { dg-final { scan-assembler-not "bsl\\tv" } } */ +/* { dg-final { scan-assembler-not "bit\\tv" } } */ +/* { dg-final { scan-assembler-not "bif\\tv" } } */ + +/* { dg-final { cleanup-saved-temps } } */ + diff --git a/gcc/testsuite/gcc.target/aarch64/vbslq_f64_2.c b/gcc/testsuite/gcc.target/aarch64/vbslq_f64_2.c new file mode 100644 index 0000000..62358bf --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vbslq_f64_2.c @@ -0,0 +1,24 @@ +/* Test vbslq_f64 can be folded. */ +/* { dg-do assemble } */ +/* { dg-options "--save-temps -O3" } */ + +#include + +/* Should fold out one half of the BSL, leaving just a BIC. */ + +float32x4_t +half_fold_me (uint32x4_t mask) +{ + float32x4_t a = {0.0, 0.0, 0.0, 0.0}; + float32x4_t b = {2.0, 4.0, 8.0, 16.0}; + return vbslq_f32 (mask, a, b); + +} + +/* { dg-final { scan-assembler-not "bsl\\tv" } } */ +/* { dg-final { scan-assembler-not "bit\\tv" } } */ +/* { dg-final { scan-assembler-not "bif\\tv" } } */ +/* { dg-final { scan-assembler "bic\\tv" } } */ + +/* { dg-final { cleanup-saved-temps } } */ + diff --git a/gcc/testsuite/gcc.target/aarch64/vbslq_u64_1.c b/gcc/testsuite/gcc.target/aarch64/vbslq_u64_1.c new file mode 100644 index 0000000..7a4892e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vbslq_u64_1.c @@ -0,0 +1,17 @@ +/* Test if a BSL-like instruction can be generated from a C idiom. */ +/* { dg-do assemble } */ +/* { dg-options "--save-temps -O3" } */ + +#include + +/* Folds to BIF. */ + +uint32x4_t +vbslq_dummy_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t mask) +{ + return (mask & a) | (~mask & b); +} + +/* { dg-final { scan-assembler-times "bif\\tv" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ + diff --git a/gcc/testsuite/gcc.target/aarch64/vbslq_u64_2.c b/gcc/testsuite/gcc.target/aarch64/vbslq_u64_2.c new file mode 100644 index 0000000..5b70168 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vbslq_u64_2.c @@ -0,0 +1,22 @@ +/* Test vbslq_u64 can be folded. */ +/* { dg-do assemble } */ +/* { dg-options "--save-temps -O3" } */ +#include + +/* Folds to BIC. */ + +int32x4_t +half_fold_int (uint32x4_t mask) +{ + int32x4_t a = {0, 0, 0, 0}; + int32x4_t b = {2, 4, 8, 16}; + return vbslq_s32 (mask, a, b); +} + +/* { dg-final { scan-assembler-not "bsl\\tv" } } */ +/* { dg-final { scan-assembler-not "bit\\tv" } } */ +/* { dg-final { scan-assembler-not "bif\\tv" } } */ +/* { dg-final { scan-assembler "bic\\tv" } } */ + +/* { dg-final { cleanup-saved-temps } } */ + -- 2.7.4