From: Richard Henderson Date: Mon, 30 Nov 2009 17:36:07 +0000 (-0800) Subject: i386.c (avx_vperm2f128_parallel): New. X-Git-Tag: upstream/12.2.0~95759 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=ca659f6ed947ad43caf21917dca2aced344dc35b;p=platform%2Fupstream%2Fgcc.git i386.c (avx_vperm2f128_parallel): New. * config/i386/i386.c (avx_vperm2f128_parallel): New. * config/i386/i386-protos.h: Declare it. * config/i386/predicates.md (avx_vperm2f128_v8sf_operand, avx_vperm2f128_v8si_operand, avx_vperm2f128_v4df_operand): New. * config/i386/sse.md (avx_vperm2f1283): Change to expander. (*avx_vperm2f128_full): Renamed from avx_vperm2f1283. (*avx_vperm2f128_nozero): New. From-SVN: r154832 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 8260bb6..96cacc0 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,15 @@ 2009-11-30 Richard Henderson + * config/i386/i386.c (avx_vperm2f128_parallel): New. + * config/i386/i386-protos.h: Declare it. + * config/i386/predicates.md (avx_vperm2f128_v8sf_operand, + avx_vperm2f128_v8si_operand, avx_vperm2f128_v4df_operand): New. + * config/i386/sse.md (avx_vperm2f1283): Change to expander. + (*avx_vperm2f128_full): Renamed from avx_vperm2f1283. + (*avx_vperm2f128_nozero): New. + +2009-11-30 Richard Henderson + * config/i386/i386-builtin-types.def (V4DF_FTYPE_V4DF_V4DF_V4DI): New. (V8SF_FTYPE_V8SF_V8SF_V8SI): New. * config/i386/i386.c (ix86_vectorize_builtin_vec_perm): Support diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 88acc1f..1e94cde 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -48,6 +48,7 @@ extern bool x86_extended_reg_mentioned_p (rtx); extern enum machine_mode ix86_cc_mode (enum rtx_code, rtx, rtx); extern int avx_vpermilp_parallel (rtx par, enum machine_mode mode); +extern int avx_vperm2f128_parallel (rtx par, enum machine_mode mode); extern int ix86_expand_movmem (rtx, rtx, rtx, rtx, rtx, rtx); extern int ix86_expand_setmem (rtx, rtx, rtx, rtx, rtx, rtx); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index be8f38b..fa84e32 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -24646,6 +24646,58 @@ avx_vpermilp_parallel (rtx par, enum machine_mode mode) /* Make sure success has a non-zero value by adding one. */ return mask + 1; } + +/* Helper for avx_vperm2f128_v4df_operand et al. This is also used by + the expansion functions to turn the parallel back into a mask. + The return value is 0 for no match and the imm8+1 for a match. */ + +int +avx_vperm2f128_parallel (rtx par, enum machine_mode mode) +{ + unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2; + unsigned mask = 0; + unsigned char ipar[8]; + + if (XVECLEN (par, 0) != (int) nelt) + return 0; + + /* Validate that all of the elements are constants, and not totally + out of range. Copy the data into an integral array to make the + subsequent checks easier. */ + for (i = 0; i < nelt; ++i) + { + rtx er = XVECEXP (par, 0, i); + unsigned HOST_WIDE_INT ei; + + if (!CONST_INT_P (er)) + return 0; + ei = INTVAL (er); + if (ei >= 2 * nelt) + return 0; + ipar[i] = ei; + } + + /* Validate that the halves of the permute are halves. */ + for (i = 0; i < nelt2 - 1; ++i) + if (ipar[i] + 1 != ipar[i + 1]) + return 0; + for (i = nelt2; i < nelt - 1; ++i) + if (ipar[i] + 1 != ipar[i + 1]) + return 0; + + /* Reconstruct the mask. */ + for (i = 0; i < 2; ++i) + { + unsigned e = ipar[i * nelt2]; + if (e % nelt2) + return 0; + e /= nelt2; + mask |= e << (i * 4); + } + + /* Make sure success has a non-zero value by adding one. */ + return mask + 1; +} /* Store OPERAND to the memory after reload is completed. This means diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index 7200a6a..50a68d9 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -1227,3 +1227,17 @@ (define_predicate "avx_vpermilp_v2df_operand" (and (match_code "parallel") (match_test "avx_vpermilp_parallel (op, V2DFmode)"))) + +;; Return 1 if OP is a parallel for a vperm2f128 permute. + +(define_predicate "avx_vperm2f128_v8sf_operand" + (and (match_code "parallel") + (match_test "avx_vperm2f128_parallel (op, V8SFmode)"))) + +(define_predicate "avx_vperm2f128_v8si_operand" + (and (match_code "parallel") + (match_test "avx_vperm2f128_parallel (op, V8SImode)"))) + +(define_predicate "avx_vperm2f128_v4df_operand" + (and (match_code "parallel") + (match_test "avx_vperm2f128_parallel (op, V4DFmode)"))) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 27c7a8b..b73820b 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -11917,7 +11917,44 @@ (set_attr "prefix" "vex") (set_attr "mode" "")]) -(define_insn "avx_vperm2f1283" +(define_expand "avx_vperm2f1283" + [(set (match_operand:AVX256MODE2P 0 "register_operand" "") + (unspec:AVX256MODE2P + [(match_operand:AVX256MODE2P 1 "register_operand" "") + (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "") + (match_operand:SI 3 "const_0_to_255_operand" "")] + UNSPEC_VPERMIL2F128))] + "TARGET_AVX" +{ + int mask = INTVAL (operands[2]); + if ((mask & 0x88) == 0) + { + rtx perm[], t1, t2; + int i, base, nelt = , nelt2 = nelt / 2; + + base = (mask & 3) * nelt2; + for (i = 0; i < nelt2; ++i) + perm[i] = GEN_INT (base + i); + + base = ((mask >> 4) & 3) * nelt2; + for (i = 0; i < nelt2; ++i) + perm[i + nelt2] = GEN_INT (base + i); + + t2 = gen_rtx_VEC_CONCAT (mode, + operands[1], operands[2]); + t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm)); + t2 = gen_rtx_VEC_SELECT (mode, t2, t1); + t2 = gen_rtx_SET (VOIDmode, operands[0], t2); + emit_insn (t2); + DONE; + } +}) + +;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which +;; means that in order to represent this properly in rtl we'd have to +;; nest *another* vec_concat with a zero operand and do the select from +;; a 4x wide vector. That doesn't seem very nice. +(define_insn "*avx_vperm2f128_full" [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x") (unspec:AVX256MODE2P [(match_operand:AVX256MODE2P 1 "register_operand" "x") @@ -11932,6 +11969,26 @@ (set_attr "prefix" "vex") (set_attr "mode" "V8SF")]) +(define_insn "*avx_vperm2f128_nozero" + [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x") + (vec_select:AVX256MODE2P + (vec_concat: + (match_operand:AVX256MODE2P 1 "register_operand" "x") + (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")) + (match_parallel 3 "avx_vperm2f128__operand" + [(match_operand 4 "const_int_operand" "")])))] + "TARGET_AVX" +{ + int mask = avx_vperm2f128_parallel (operands[3], mode) - 1; + operands[3] = GEN_INT (mask); + return "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"; +} + [(set_attr "type" "sselog") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") + (set_attr "prefix" "vex") + (set_attr "mode" "V8SF")]) + (define_insn "avx_vbroadcasts" [(set (match_operand:AVXMODEF4P 0 "register_operand" "=x") (vec_concat:AVXMODEF4P