From 541e350d51ba4016a752fce095ec62f89f2fbc1a Mon Sep 17 00:00:00 2001 From: jakub Date: Tue, 20 Mar 2012 16:25:54 +0000 Subject: [PATCH] PR target/52607 * config/i386/i386.md ("isa" attribute): Add avx2 and noavx2. ("enabled" attribute): Handle avx2 and noavx2 isas. * config/i386/sse.md (avx2_vec_dupv8sf_1, avx2_pbroadcast_1): New insns. (vec_dup): Add avx2 =x,x alternative. (vec_dup splitter): Don't split if TARGET_AVX2. (*avx_vperm_broadcast_): Don't split V4DFmode if TARGET_AVX2. For TARGET_AVX2, V8SFmode and elt == 0 split into vbroadcastss. * config/i386/i386.c (expand_vec_perm_pshufb): Emit also vpermps for V8SFmode. (expand_vec_perm_1): For broadcasts, use avx2_pbroadcast_1 if possible, handle also V8SFmode. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@185577 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 16 ++++++++++++++++ gcc/config/i386/i386.c | 31 +++++++++++++++++++++---------- gcc/config/i386/i386.md | 4 +++- gcc/config/i386/sse.md | 42 ++++++++++++++++++++++++++++++++++++++---- 4 files changed, 78 insertions(+), 15 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 0269a51..bc9a5b8 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,19 @@ +2012-03-20 Jakub Jelinek + + PR target/52607 + * config/i386/i386.md ("isa" attribute): Add avx2 and noavx2. + ("enabled" attribute): Handle avx2 and noavx2 isas. + * config/i386/sse.md (avx2_vec_dupv8sf_1, avx2_pbroadcast_1): + New insns. + (vec_dup): Add avx2 =x,x alternative. + (vec_dup splitter): Don't split if TARGET_AVX2. + (*avx_vperm_broadcast_): Don't split V4DFmode if TARGET_AVX2. + For TARGET_AVX2, V8SFmode and elt == 0 split into vbroadcastss. + * config/i386/i386.c (expand_vec_perm_pshufb): Emit also vpermps + for V8SFmode. + (expand_vec_perm_1): For broadcasts, use avx2_pbroadcast_1 + if possible, handle also V8SFmode. + 2012-03-20 Richard Earnshaw * arm/predicates.md (zero_operand, reg_or_zero_operand): New predicates. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 2410236..30ed616 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -35836,7 +35836,7 @@ valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d) } /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D - in terms of pshufb, vpperm, vpermq, vpermd or vperm2i128. */ + in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */ static bool expand_vec_perm_pshufb (struct expand_vec_perm_d *d) @@ -35910,6 +35910,9 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d) if (valid_perm_using_mode_p (V8SImode, d)) vmode = V8SImode; } + /* Or if vpermps can be used. */ + else if (d->vmode == V8SFmode) + vmode = V8SImode; if (vmode == V32QImode) { @@ -35952,6 +35955,12 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d) gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm)); vperm = force_reg (vmode, vperm); + if (vmode == V8SImode && d->vmode == V8SFmode) + { + vmode = V8SFmode; + vperm = gen_lowpart (vmode, vperm); + } + target = gen_lowpart (vmode, d->target); op0 = gen_lowpart (vmode, d->op0); if (d->op0 == d->op1) @@ -35960,6 +35969,8 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d) emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm)); else if (vmode == V32QImode) emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm)); + else if (vmode == V8SFmode) + emit_insn (gen_avx2_permvarv8sf (target, vperm, op0)); else emit_insn (gen_avx2_permvarv8si (target, vperm, op0)); } @@ -36008,20 +36019,17 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d) else if (broadcast_perm && TARGET_AVX2) { /* Use vpbroadcast{b,w,d}. */ - rtx op = d->op0, (*gen) (rtx, rtx) = NULL; + rtx (*gen) (rtx, rtx) = NULL; switch (d->vmode) { case V32QImode: - op = gen_lowpart (V16QImode, op); - gen = gen_avx2_pbroadcastv32qi; + gen = gen_avx2_pbroadcastv32qi_1; break; case V16HImode: - op = gen_lowpart (V8HImode, op); - gen = gen_avx2_pbroadcastv16hi; + gen = gen_avx2_pbroadcastv16hi_1; break; case V8SImode: - op = gen_lowpart (V4SImode, op); - gen = gen_avx2_pbroadcastv8si; + gen = gen_avx2_pbroadcastv8si_1; break; case V16QImode: gen = gen_avx2_pbroadcastv16qi; @@ -36029,13 +36037,16 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d) case V8HImode: gen = gen_avx2_pbroadcastv8hi; break; + case V8SFmode: + gen = gen_avx2_vec_dupv8sf_1; + break; /* For other modes prefer other shuffles this function creates. */ default: break; } if (gen != NULL) { if (!d->testing_p) - emit_insn (gen (d->target, op)); + emit_insn (gen (d->target, d->op0)); return true; } } @@ -36103,7 +36114,7 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d) return true; /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128, - vpshufb, vpermd or vpermq variable permutation. */ + vpshufb, vpermd, vpermps or vpermq variable permutation. */ if (expand_vec_perm_pshufb (d)) return true; diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 4bcb7d2..2d20a52 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -639,7 +639,7 @@ (define_attr "movu" "0,1" (const_string "0")) ;; Used to control the "enabled" attribute on a per-instruction basis. -(define_attr "isa" "base,sse2,sse2_noavx,sse3,sse4,sse4_noavx,noavx,avx,bmi2" +(define_attr "isa" "base,sse2,sse2_noavx,sse3,sse4,sse4_noavx,noavx,avx,avx2,noavx2,bmi2" (const_string "base")) (define_attr "enabled" "" @@ -652,6 +652,8 @@ (symbol_ref "TARGET_SSE4_1 && !TARGET_AVX") (eq_attr "isa" "avx") (symbol_ref "TARGET_AVX") (eq_attr "isa" "noavx") (symbol_ref "!TARGET_AVX") + (eq_attr "isa" "avx2") (symbol_ref "TARGET_AVX2") + (eq_attr "isa" "noavx2") (symbol_ref "!TARGET_AVX2") (eq_attr "isa" "bmi2") (symbol_ref "TARGET_BMI2") ] (const_int 1))) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 6b79a08..9a4728c 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -3808,6 +3808,18 @@ (set_attr "prefix" "vex") (set_attr "mode" "")]) +(define_insn "avx2_vec_dupv8sf_1" + [(set (match_operand:V8SF 0 "register_operand" "=x") + (vec_duplicate:V8SF + (vec_select:SF + (match_operand:V8SF 1 "register_operand" "x") + (parallel [(const_int 0)]))))] + "TARGET_AVX2" + "vbroadcastss\t{%x1, %0|%0, %x1}" + [(set_attr "type" "sselog1") + (set_attr "prefix" "vex") + (set_attr "mode" "V8SF")]) + (define_insn "vec_dupv4sf" [(set (match_operand:V4SF 0 "register_operand" "=x,x,x") (vec_duplicate:V4SF @@ -11876,6 +11888,19 @@ (set_attr "prefix" "vex") (set_attr "mode" "")]) +(define_insn "avx2_pbroadcast_1" + [(set (match_operand:VI_256 0 "register_operand" "=x") + (vec_duplicate:VI_256 + (vec_select: + (match_operand:VI_256 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0)]))))] + "TARGET_AVX2" + "vpbroadcast\t{%x1, %0|%0, %x1}" + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "vex") + (set_attr "mode" "")]) + (define_insn "avx2_permvarv8si" [(set (match_operand:V8SI 0 "register_operand" "=x") (unspec:V8SI @@ -11967,16 +11992,18 @@ [V8SI V8SF V4DI V4DF]) (define_insn "vec_dup" - [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x") + [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x") (vec_duplicate:AVX_VEC_DUP_MODE - (match_operand: 1 "nonimmediate_operand" "m,?x")))] + (match_operand: 1 "nonimmediate_operand" "m,x,?x")))] "TARGET_AVX" "@ vbroadcast\t{%1, %0|%0, %1} + vbroadcast\t{%x1, %0|%0, %x1} #" [(set_attr "type" "ssemov") (set_attr "prefix_extra" "1") (set_attr "prefix" "vex") + (set_attr "isa" "*,avx2,noavx2") (set_attr "mode" "V8SF")]) (define_insn "avx2_vbroadcasti128_" @@ -11995,7 +12022,7 @@ [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand") (vec_duplicate:AVX_VEC_DUP_MODE (match_operand: 1 "register_operand")))] - "TARGET_AVX && reload_completed" + "TARGET_AVX && !TARGET_AVX2 && reload_completed" [(set (match_dup 2) (vec_duplicate: (match_dup 1))) (set (match_dup 0) @@ -12057,7 +12084,7 @@ [(match_operand 3 "const_int_operand" "C,n,n")])))] "TARGET_AVX" "#" - "&& reload_completed" + "&& reload_completed && (mode != V4DFmode || !TARGET_AVX2)" [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))] { rtx op0 = operands[0], op1 = operands[1]; @@ -12067,6 +12094,13 @@ { int mask; + if (TARGET_AVX2 && elt == 0) + { + emit_insn (gen_vec_dup (op0, gen_lowpart (mode, + op1))); + DONE; + } + /* Shuffle element we care about into all elements of the 128-bit lane. The other lane gets shuffled too, but we don't care. */ if (mode == V4DFmode) -- 2.7.4