From: rth Date: Mon, 23 Nov 2009 00:01:47 +0000 (+0000) Subject: * i386.c (avx_vpermilp_parallel): New function. X-Git-Tag: upstream/4.9.2~32472 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=af6d29272faacdee49679a61b88ad8915959947f;p=platform%2Fupstream%2Flinaro-gcc.git * i386.c (avx_vpermilp_parallel): New function. * i386-protos.h: Declare it. * predicates.md (avx_vpermilp_v8sf_operand, avx_vpermilp_v4df_operand, avx_vpermilp_v4sf_operand, avx_vpermilp_v2df_operand): New. * sse.md (AVXMODEFDP, AVXMODEFSP): New iterators. (ssescalarnum, ssedoublesizemode): Add AVX modes. (vpermilbits): Remove. (avx_vpermil): Change insns to expanders. (*avx_vpermil): New. Use vec_select. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@154427 138bc75d-0d04-0410-961f-82ee72b054a4 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a2bc0f8..9a26636 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,15 @@ +2009-11-22 Richard Henderson + + * i386.c (avx_vpermilp_parallel): New function. + * i386-protos.h: Declare it. + * predicates.md (avx_vpermilp_v8sf_operand, avx_vpermilp_v4df_operand, + avx_vpermilp_v4sf_operand, avx_vpermilp_v2df_operand): New. + * sse.md (AVXMODEFDP, AVXMODEFSP): New iterators. + (ssescalarnum, ssedoublesizemode): Add AVX modes. + (vpermilbits): Remove. + (avx_vpermil): Change insns to expanders. + (*avx_vpermil): New. Use vec_select. + 2009-11-22 Richard Earnshaw * opts.c (decode_options): Don't enable flag_schedule_insns diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 58da131..d36b269 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -47,6 +47,8 @@ extern bool x86_extended_QIreg_mentioned_p (rtx); extern bool x86_extended_reg_mentioned_p (rtx); extern enum machine_mode ix86_cc_mode (enum rtx_code, rtx, rtx); +extern int avx_vpermilp_parallel (rtx par, enum machine_mode mode); + extern int ix86_expand_movmem (rtx, rtx, rtx, rtx, rtx, rtx); extern int ix86_expand_setmem (rtx, rtx, rtx, rtx, rtx, rtx); extern int ix86_expand_strlen (rtx, rtx, rtx, rtx); @@ -275,3 +277,4 @@ extern int asm_preferred_eh_data_format (int, int); #ifdef HAVE_ATTR_cpu extern enum attr_cpu ix86_schedule; #endif + diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index b85ef63..b149924 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -24527,6 +24527,82 @@ ix86_builtin_reciprocal (unsigned int fn, bool md_fn, return NULL_TREE; } } + +/* Helper for avx_vpermilps256_operand et al. This is also used by + the expansion functions to turn the parallel back into a mask. + The return value is 0 for no match and the imm8+1 for a match. */ + +int +avx_vpermilp_parallel (rtx par, enum machine_mode mode) +{ + unsigned i, nelt = GET_MODE_NUNITS (mode); + unsigned mask = 0; + unsigned char ipar[8]; + + if (XVECLEN (par, 0) != nelt) + return 0; + + /* Validate that all of the elements are constants, and not totally + out of range. Copy the data into an integral array to make the + subsequent checks easier. */ + for (i = 0; i < nelt; ++i) + { + rtx er = XVECEXP (par, 0, i); + unsigned HOST_WIDE_INT ei; + + if (!CONST_INT_P (er)) + return 0; + ei = INTVAL (er); + if (ei >= nelt) + return 0; + ipar[i] = ei; + } + + switch (mode) + { + case V4DFmode: + /* In the 256-bit DFmode case, we can only move elements within + a 128-bit lane. */ + for (i = 0; i < 2; ++i) + { + if (ipar[i] >= 2) + return 0; + mask |= ipar[i] << i; + } + for (i = 2; i < 4; ++i) + { + if (ipar[i] < 2) + return 0; + mask |= (ipar[i] - 2) << i; + } + break; + + case V8SFmode: + /* In the 256-bit SFmode case, we have full freedom of movement + within the low 128-bit lane, but the high 128-bit lane must + mirror the exact same pattern. */ + for (i = 0; i < 4; ++i) + if (ipar[i] + 4 != ipar[i + 4]) + return 0; + nelt = 4; + /* FALLTHRU */ + + case V2DFmode: + case V4SFmode: + /* In the 128-bit case, we've full freedom in the placement of + the elements from the source operand. */ + for (i = 0; i < nelt; ++i) + mask |= ipar[i] << (i * (nelt / 2)); + break; + + default: + gcc_unreachable (); + } + + /* Make sure success has a non-zero value by adding one. */ + return mask + 1; +} + /* Store OPERAND to the memory after reload is completed. This means that we can't easily use assign_stack_local. */ diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index 31a2037..1e8200a 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -1148,3 +1148,24 @@ return 1; }) + +;; Return 1 if OP is a parallel for a vpermilp[ds] permute. +;; ??? It would be much easier if the PARALLEL for a VEC_SELECT +;; had a mode, but it doesn't. So we have 4 copies and install +;; the mode by hand. + +(define_predicate "avx_vpermilp_v8sf_operand" + (and (match_code "parallel") + (match_test "avx_vpermilp_parallel (op, V8SFmode)"))) + +(define_predicate "avx_vpermilp_v4df_operand" + (and (match_code "parallel") + (match_test "avx_vpermilp_parallel (op, V4DFmode)"))) + +(define_predicate "avx_vpermilp_v4sf_operand" + (and (match_code "parallel") + (match_test "avx_vpermilp_parallel (op, V4SFmode)"))) + +(define_predicate "avx_vpermilp_v2df_operand" + (and (match_code "parallel") + (match_test "avx_vpermilp_parallel (op, V2DFmode)"))) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 7144799..79adc77 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -58,6 +58,8 @@ (define_mode_iterator AVX256MODE8P [V8SI V8SF]) (define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF]) (define_mode_iterator AVXMODEF4P [V4SF V4DF]) +(define_mode_iterator AVXMODEFDP [V2DF V4DF]) +(define_mode_iterator AVXMODEFSP [V4SF V8SF]) (define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF]) (define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI]) @@ -95,13 +97,16 @@ (V4SI "SI") (V2DI "DI")]) ;; Mapping of vector modes to a vector mode of double size -(define_mode_attr ssedoublesizemode [(V2DF "V4DF") (V2DI "V4DI") - (V4SF "V8SF") (V4SI "V8SI")]) +(define_mode_attr ssedoublesizemode + [(V2DF "V4DF") (V2DI "V4DI") (V4SF "V8SF") (V4SI "V8SI") + (V8HI "V16HI") (V16QI "V32QI") + (V4DF "V8DF") (V8SF "V16SF") + (V4DI "V8DI") (V8SI "V16SI") (V16HI "V32HI") (V32QI "V64QI")]) ;; Number of scalar elements in each vector type -(define_mode_attr ssescalarnum [(V4SF "4") (V2DF "2") - (V16QI "16") (V8HI "8") - (V4SI "4") (V2DI "2")]) +(define_mode_attr ssescalarnum + [(V4SF "4") (V2DF "2") (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2") + (V8SF "8") (V4DF "4") (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")]) ;; Mapping for AVX (define_mode_attr avxvecmode @@ -134,10 +139,6 @@ (define_mode_attr blendbits [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")]) -;; Mapping of immediate bits for vpermil instructions -(define_mode_attr vpermilbits - [(V8SF "255") (V4SF "255") (V4DF "15") (V2DF "3")]) - ;; Mapping of immediate bits for pinsr instructions (define_mode_attr pinsrbits [(V16QI "32768") (V8HI "128") (V4SI "8")]) @@ -12088,14 +12089,66 @@ (set_attr "prefix" "vex") (set_attr "mode" "OI")]) -(define_insn "avx_vpermil" +(define_expand "avx_vpermil" + [(set (match_operand:AVXMODEFDP 0 "register_operand" "") + (vec_select:AVXMODEFDP + (match_operand:AVXMODEFDP 1 "nonimmediate_operand" "") + (match_operand:SI 2 "const_0_to_255_operand" "")))] + "TARGET_AVX" +{ + int mask = INTVAL (operands[2]); + rtx perm[]; + + perm[0] = GEN_INT (mask & 1); + perm[1] = GEN_INT ((mask >> 1) & 1); + if (mode == V4DFmode) + { + perm[2] = GEN_INT (((mask >> 2) & 1) + 2); + perm[3] = GEN_INT (((mask >> 3) & 1) + 2); + } + + operands[2] + = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (, perm)); +}) + +(define_expand "avx_vpermil" + [(set (match_operand:AVXMODEFSP 0 "register_operand" "") + (vec_select:AVXMODEFSP + (match_operand:AVXMODEFSP 1 "nonimmediate_operand" "") + (match_operand:SI 2 "const_0_to_255_operand" "")))] + "TARGET_AVX" +{ + int mask = INTVAL (operands[2]); + rtx perm[]; + + perm[0] = GEN_INT (mask & 3); + perm[1] = GEN_INT ((mask >> 2) & 3); + perm[2] = GEN_INT ((mask >> 4) & 3); + perm[3] = GEN_INT ((mask >> 6) & 3); + if (mode == V8SFmode) + { + perm[4] = GEN_INT ((mask & 3) + 4); + perm[5] = GEN_INT (((mask >> 2) & 3) + 4); + perm[6] = GEN_INT (((mask >> 4) & 3) + 4); + perm[7] = GEN_INT (((mask >> 6) & 3) + 4); + } + + operands[2] + = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (, perm)); +}) + +(define_insn "*avx_vpermilp" [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x") - (unspec:AVXMODEF2P - [(match_operand:AVXMODEF2P 1 "register_operand" "xm") - (match_operand:SI 2 "const_0_to__operand" "n")] - UNSPEC_VPERMIL))] + (vec_select:AVXMODEF2P + (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm") + (match_parallel 2 "avx_vpermilp__operand" + [(match_operand 3 "const_int_operand" "")])))] "TARGET_AVX" - "vpermilp\t{%2, %1, %0|%0, %1, %2}" +{ + int mask = avx_vpermilp_parallel (operands[2], mode) - 1; + operands[2] = GEN_INT (mask); + return "vpermilp\t{%2, %1, %0|%0, %1, %2}"; +} [(set_attr "type" "sselog") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1")