From 0cbc5fc82ab47014e6c29cb90c79c6133aaa6483 Mon Sep 17 00:00:00 2001 From: sje Date: Fri, 4 Feb 2011 21:46:45 +0000 Subject: [PATCH] 2011-02-04 Richard Henderson Steve Ellcey PR target/46997 * config/ia64/predicates.md (mux1_brcst_element): New. * config/ia64/ia64-protos.h (ia64_unpack_assemble): New. * config/ia64/ia64.c (ia64_unpack_assemble): New. (ia64_unpack_sign): New. (ia64_expand_unpack): Rewrite using new routines. (ia64_expand_widen_sum): Ditto. (ia64_expand_dot_prod_v8qi): Ditto. * config/ia64/vect.md (mulv8qi3): Rewrite to use new routines, add endian check. (pmpy2_even): Rename from pmpy2_r, add endian check. (pmpy2_odd): Rename from pmpy2_l, add endian check. (vec_widen_smult_lo_v4hi): Rewrite using new routines. (vec_widen_smult_hi_v4hi): Ditto. (vec_widen_umult_lo_v4hi): Ditto. (vec_widen_umult_hi_v4hi): Ditto. (mulv2si3): Change endian checks. (sdot_prodv4hi): Rewrite with new calls. (udot_prodv4hi): New. (vec_pack_ssat_v4hi): Add endian check. (vec_pack_usat_v4hi): Ditto. (vec_pack_ssat_v2si): Ditto. (max1_even): Rename from max1_r, add endian check. (max1_odd): Rename from max1_l, add endian check. (*mux1_rev): Format change. (*mux1_mix): Ditto. (*mux1_shuf): Ditto. (*mux1_alt): Ditto. (*mux1_brcst_v8qi): Use new predicate. (vec_extract_evenv8qi): Remove endian check. (vec_extract_oddv8qi): Ditto. (vec_interleave_lowv4hi): Format change. (vec_interleave_highv4hi): Ditto. (mix2_even): Rename from mix2_r, add endian check. (mix2_odd): Rename from mux2_l, add endian check. (*mux2): Fix mask setting for TARGET_BIG_ENDIAN. (vec_extract_evenodd_helper): Format change. (vec_extract_evenv4hi): Remove endian check. (vec_extract_oddv4hi): Remove endian check. (vec_interleave_lowv2si): Format change. (vec_interleave_highv2si): Format change. (vec_initv2si): Remove endian check. (vecinit_v2si): Add endian check. (reduc_splus_v2sf): Add endian check. (reduc_smax_v2sf): Ditto. (reduc_smin_v2sf): Ditto. (vec_initv2sf): Remove endian check. (fpack): Add endian check. (fswap): Add endian check. (vec_interleave_highv2sf): Add endian check. (vec_interleave_lowv2sf): Add endian check. (fmix_lr): Add endian check. (vec_setv2sf): Format change. (*vec_extractv2sf_0_be): Use shift to extract operand. (*vec_extractv2sf_1_be): New. (vec_pack_trunc_v4hi): Add endian check. (vec_pack_trunc_v2si): Format change. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@169840 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 61 +++++ gcc/config/ia64/ia64-protos.h | 2 +- gcc/config/ia64/ia64.c | 184 +++++---------- gcc/config/ia64/predicates.md | 4 + gcc/config/ia64/vect.md | 506 ++++++++++++++++++++++++------------------ 5 files changed, 416 insertions(+), 341 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e94de88..f1cc0ed 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,64 @@ +2011-02-04 Richard Henderson + Steve Ellcey + + PR target/46997 + * config/ia64/predicates.md (mux1_brcst_element): New. + * config/ia64/ia64-protos.h (ia64_unpack_assemble): New. + * config/ia64/ia64.c (ia64_unpack_assemble): New. + (ia64_unpack_sign): New. + (ia64_expand_unpack): Rewrite using new routines. + (ia64_expand_widen_sum): Ditto. + (ia64_expand_dot_prod_v8qi): Ditto. + * config/ia64/vect.md (mulv8qi3): Rewrite to use new + routines, add endian check. + (pmpy2_even): Rename from pmpy2_r, add endian check. + (pmpy2_odd): Rename from pmpy2_l, add endian check. + (vec_widen_smult_lo_v4hi): Rewrite using new routines. + (vec_widen_smult_hi_v4hi): Ditto. + (vec_widen_umult_lo_v4hi): Ditto. + (vec_widen_umult_hi_v4hi): Ditto. + (mulv2si3): Change endian checks. + (sdot_prodv4hi): Rewrite with new calls. + (udot_prodv4hi): New. + (vec_pack_ssat_v4hi): Add endian check. + (vec_pack_usat_v4hi): Ditto. + (vec_pack_ssat_v2si): Ditto. + (max1_even): Rename from max1_r, add endian check. + (max1_odd): Rename from max1_l, add endian check. + (*mux1_rev): Format change. + (*mux1_mix): Ditto. + (*mux1_shuf): Ditto. + (*mux1_alt): Ditto. + (*mux1_brcst_v8qi): Use new predicate. + (vec_extract_evenv8qi): Remove endian check. + (vec_extract_oddv8qi): Ditto. + (vec_interleave_lowv4hi): Format change. + (vec_interleave_highv4hi): Ditto. + (mix2_even): Rename from mix2_r, add endian check. + (mix2_odd): Rename from mux2_l, add endian check. + (*mux2): Fix mask setting for TARGET_BIG_ENDIAN. + (vec_extract_evenodd_helper): Format change. + (vec_extract_evenv4hi): Remove endian check. + (vec_extract_oddv4hi): Remove endian check. + (vec_interleave_lowv2si): Format change. + (vec_interleave_highv2si): Format change. + (vec_initv2si): Remove endian check. + (vecinit_v2si): Add endian check. + (reduc_splus_v2sf): Add endian check. + (reduc_smax_v2sf): Ditto. + (reduc_smin_v2sf): Ditto. + (vec_initv2sf): Remove endian check. + (fpack): Add endian check. + (fswap): Add endian check. + (vec_interleave_highv2sf): Add endian check. + (vec_interleave_lowv2sf): Add endian check. + (fmix_lr): Add endian check. + (vec_setv2sf): Format change. + (*vec_extractv2sf_0_be): Use shift to extract operand. + (*vec_extractv2sf_1_be): New. + (vec_pack_trunc_v4hi): Add endian check. + (vec_pack_trunc_v2si): Format change. + 2011-02-04 Jakub Jelinek PR inline-asm/23200 diff --git a/gcc/config/ia64/ia64-protos.h b/gcc/config/ia64/ia64-protos.h index 87e04a2..107a7cc 100644 --- a/gcc/config/ia64/ia64-protos.h +++ b/gcc/config/ia64/ia64-protos.h @@ -39,9 +39,9 @@ extern bool ia64_expand_movxf_movrf (enum machine_mode, rtx[]); extern void ia64_expand_compare (rtx *, rtx *, rtx *); extern void ia64_expand_vecint_cmov (rtx[]); extern bool ia64_expand_vecint_minmax (enum rtx_code, enum machine_mode, rtx[]); +extern void ia64_unpack_assemble (rtx, rtx, rtx, bool); extern void ia64_expand_unpack (rtx [], bool, bool); extern void ia64_expand_widen_sum (rtx[], bool); -extern void ia64_expand_widen_mul_v4hi (rtx [], bool, bool); extern void ia64_expand_dot_prod_v8qi (rtx[], bool); extern void ia64_expand_call (rtx, rtx, rtx, int); extern void ia64_split_call (rtx, rtx, rtx, rtx, rtx, int, int); diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c index 1842555..1d3f8b1 100644 --- a/gcc/config/ia64/ia64.c +++ b/gcc/config/ia64/ia64.c @@ -1972,12 +1972,13 @@ ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode, return true; } -/* Emit an integral vector unpack operation. */ +/* The vectors LO and HI each contain N halves of a double-wide vector. + Reassemble either the first N/2 or the second N/2 elements. */ void -ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp) +ia64_unpack_assemble (rtx out, rtx lo, rtx hi, bool highp) { - enum machine_mode mode = GET_MODE (operands[1]); + enum machine_mode mode = GET_MODE (lo); rtx (*gen) (rtx, rtx, rtx); rtx x; @@ -1993,110 +1994,66 @@ ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp) gcc_unreachable (); } - /* Fill in x with the sign extension of each element in op1. */ - if (unsignedp) - x = CONST0_RTX (mode); - else - { - bool neg; - - x = gen_reg_rtx (mode); - - neg = ia64_expand_vecint_compare (LT, mode, x, operands[1], - CONST0_RTX (mode)); - gcc_assert (!neg); - } - + x = gen_lowpart (mode, out); if (TARGET_BIG_ENDIAN) - emit_insn (gen (gen_lowpart (mode, operands[0]), x, operands[1])); + x = gen (x, hi, lo); else - emit_insn (gen (gen_lowpart (mode, operands[0]), operands[1], x)); + x = gen (x, lo, hi); + emit_insn (x); } -/* Emit an integral vector widening sum operations. */ +/* Return a vector of the sign-extension of VEC. */ -void -ia64_expand_widen_sum (rtx operands[3], bool unsignedp) +static rtx +ia64_unpack_sign (rtx vec, bool unsignedp) { - rtx l, h, x, s; - enum machine_mode wmode, mode; - rtx (*unpack_l) (rtx, rtx, rtx); - rtx (*unpack_h) (rtx, rtx, rtx); - rtx (*plus) (rtx, rtx, rtx); - - wmode = GET_MODE (operands[0]); - mode = GET_MODE (operands[1]); + enum machine_mode mode = GET_MODE (vec); + rtx zero = CONST0_RTX (mode); - switch (mode) - { - case V8QImode: - unpack_l = gen_vec_interleave_lowv8qi; - unpack_h = gen_vec_interleave_highv8qi; - plus = gen_addv4hi3; - break; - case V4HImode: - unpack_l = gen_vec_interleave_lowv4hi; - unpack_h = gen_vec_interleave_highv4hi; - plus = gen_addv2si3; - break; - default: - gcc_unreachable (); - } - - /* Fill in x with the sign extension of each element in op1. */ if (unsignedp) - x = CONST0_RTX (mode); + return zero; else { + rtx sign = gen_reg_rtx (mode); bool neg; - x = gen_reg_rtx (mode); - - neg = ia64_expand_vecint_compare (LT, mode, x, operands[1], - CONST0_RTX (mode)); + neg = ia64_expand_vecint_compare (LT, mode, sign, vec, zero); gcc_assert (!neg); + + return sign; } +} - l = gen_reg_rtx (wmode); - h = gen_reg_rtx (wmode); - s = gen_reg_rtx (wmode); +/* Emit an integral vector unpack operation. */ - if (TARGET_BIG_ENDIAN) - { - emit_insn (unpack_l (gen_lowpart (mode, l), x, operands[1])); - emit_insn (unpack_h (gen_lowpart (mode, h), x, operands[1])); - } - else - { - emit_insn (unpack_l (gen_lowpart (mode, l), operands[1], x)); - emit_insn (unpack_h (gen_lowpart (mode, h), operands[1], x)); - } - emit_insn (plus (s, l, operands[2])); - emit_insn (plus (operands[0], h, s)); +void +ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp) +{ + rtx sign = ia64_unpack_sign (operands[1], unsignedp); + ia64_unpack_assemble (operands[0], operands[1], sign, highp); } +/* Emit an integral vector widening sum operations. */ + void -ia64_expand_widen_mul_v4hi (rtx operands[3], bool unsignedp, bool highp) +ia64_expand_widen_sum (rtx operands[3], bool unsignedp) { - rtx l = gen_reg_rtx (V4HImode); - rtx h = gen_reg_rtx (V4HImode); - rtx (*mulhigh)(rtx, rtx, rtx, rtx); - rtx (*interl)(rtx, rtx, rtx); + enum machine_mode wmode; + rtx l, h, t, sign; - emit_insn (gen_mulv4hi3 (l, operands[1], operands[2])); + sign = ia64_unpack_sign (operands[1], unsignedp); + + wmode = GET_MODE (operands[0]); + l = gen_reg_rtx (wmode); + h = gen_reg_rtx (wmode); - /* For signed, pmpy2.r would appear to more closely match this operation. - However, the vectorizer is more likely to use the LO and HI patterns - in pairs. At which point, with this formulation, the first two insns - of each can be CSEd. */ - mulhigh = unsignedp ? gen_pmpyshr2_u : gen_pmpyshr2; - emit_insn (mulhigh (h, operands[1], operands[2], GEN_INT (16))); + ia64_unpack_assemble (l, operands[1], sign, false); + ia64_unpack_assemble (h, operands[1], sign, true); - interl = highp ? gen_vec_interleave_highv4hi : gen_vec_interleave_lowv4hi; - if (TARGET_BIG_ENDIAN) - emit_insn (interl (gen_lowpart (V4HImode, operands[0]), h, l)); - else - emit_insn (interl (gen_lowpart (V4HImode, operands[0]), l, h)); + t = expand_binop (wmode, add_optab, l, operands[2], NULL, 0, OPTAB_DIRECT); + t = expand_binop (wmode, add_optab, h, t, operands[0], 0, OPTAB_DIRECT); + if (t != operands[0]) + emit_move_insn (operands[0], t); } /* Emit a signed or unsigned V8QI dot product operation. */ @@ -2104,62 +2061,31 @@ ia64_expand_widen_mul_v4hi (rtx operands[3], bool unsignedp, bool highp) void ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp) { - rtx l1, l2, h1, h2, x1, x2, p1, p2, p3, p4, s1, s2, s3; + rtx op1, op2, sn1, sn2, l1, l2, h1, h2; + rtx p1, p2, p3, p4, s1, s2, s3; - /* Fill in x1 and x2 with the sign extension of each element. */ - if (unsignedp) - x1 = x2 = CONST0_RTX (V8QImode); - else - { - bool neg; - - x1 = gen_reg_rtx (V8QImode); - x2 = gen_reg_rtx (V8QImode); - - neg = ia64_expand_vecint_compare (LT, V8QImode, x1, operands[1], - CONST0_RTX (V8QImode)); - gcc_assert (!neg); - neg = ia64_expand_vecint_compare (LT, V8QImode, x2, operands[2], - CONST0_RTX (V8QImode)); - gcc_assert (!neg); - } + op1 = operands[1]; + op2 = operands[2]; + sn1 = ia64_unpack_sign (op1, unsignedp); + sn2 = ia64_unpack_sign (op2, unsignedp); l1 = gen_reg_rtx (V4HImode); l2 = gen_reg_rtx (V4HImode); h1 = gen_reg_rtx (V4HImode); h2 = gen_reg_rtx (V4HImode); - - if (TARGET_BIG_ENDIAN) - { - emit_insn (gen_vec_interleave_lowv8qi - (gen_lowpart (V8QImode, l1), x1, operands[1])); - emit_insn (gen_vec_interleave_lowv8qi - (gen_lowpart (V8QImode, l2), x2, operands[2])); - emit_insn (gen_vec_interleave_highv8qi - (gen_lowpart (V8QImode, h1), x1, operands[1])); - emit_insn (gen_vec_interleave_highv8qi - (gen_lowpart (V8QImode, h2), x2, operands[2])); - } - else - { - emit_insn (gen_vec_interleave_lowv8qi - (gen_lowpart (V8QImode, l1), operands[1], x1)); - emit_insn (gen_vec_interleave_lowv8qi - (gen_lowpart (V8QImode, l2), operands[2], x2)); - emit_insn (gen_vec_interleave_highv8qi - (gen_lowpart (V8QImode, h1), operands[1], x1)); - emit_insn (gen_vec_interleave_highv8qi - (gen_lowpart (V8QImode, h2), operands[2], x2)); - } + ia64_unpack_assemble (l1, op1, sn1, false); + ia64_unpack_assemble (l2, op2, sn2, false); + ia64_unpack_assemble (h1, op1, sn1, true); + ia64_unpack_assemble (h2, op2, sn2, true); p1 = gen_reg_rtx (V2SImode); p2 = gen_reg_rtx (V2SImode); p3 = gen_reg_rtx (V2SImode); p4 = gen_reg_rtx (V2SImode); - emit_insn (gen_pmpy2_r (p1, l1, l2)); - emit_insn (gen_pmpy2_l (p2, l1, l2)); - emit_insn (gen_pmpy2_r (p3, h1, h2)); - emit_insn (gen_pmpy2_l (p4, h1, h2)); + emit_insn (gen_pmpy2_even (p1, l1, l2)); + emit_insn (gen_pmpy2_even (p2, h1, h2)); + emit_insn (gen_pmpy2_odd (p3, l1, l2)); + emit_insn (gen_pmpy2_odd (p4, h1, h2)); s1 = gen_reg_rtx (V2SImode); s2 = gen_reg_rtx (V2SImode); diff --git a/gcc/config/ia64/predicates.md b/gcc/config/ia64/predicates.md index e06c521..6622b20 100644 --- a/gcc/config/ia64/predicates.md +++ b/gcc/config/ia64/predicates.md @@ -624,3 +624,7 @@ return REG_P (op) && REG_POINTER (op); }) +;; True if this is the right-most vector element; for mux1 @brcst. +(define_predicate "mux1_brcst_element" + (and (match_code "const_int") + (match_test "INTVAL (op) == (TARGET_BIG_ENDIAN ? 7 : 0)"))) diff --git a/gcc/config/ia64/vect.md b/gcc/config/ia64/vect.md index 4e5977c..ad36721 100644 --- a/gcc/config/ia64/vect.md +++ b/gcc/config/ia64/vect.md @@ -172,35 +172,14 @@ (match_operand:V8QI 2 "gr_register_operand" "r")))] "" { - rtx r1, l1, r2, l2, rm, lm; - - r1 = gen_reg_rtx (V4HImode); - l1 = gen_reg_rtx (V4HImode); - r2 = gen_reg_rtx (V4HImode); - l2 = gen_reg_rtx (V4HImode); - - /* Zero-extend the QImode elements into two words of HImode elements - by interleaving them with zero bytes. */ - emit_insn (gen_mix1_r (gen_lowpart (V8QImode, r1), - operands[1], CONST0_RTX (V8QImode))); - emit_insn (gen_mix1_r (gen_lowpart (V8QImode, r2), - operands[2], CONST0_RTX (V8QImode))); - emit_insn (gen_mix1_l (gen_lowpart (V8QImode, l1), - operands[1], CONST0_RTX (V8QImode))); - emit_insn (gen_mix1_l (gen_lowpart (V8QImode, l2), - operands[2], CONST0_RTX (V8QImode))); - - /* Multiply. */ - rm = gen_reg_rtx (V4HImode); - lm = gen_reg_rtx (V4HImode); - emit_insn (gen_mulv4hi3 (rm, r1, r2)); - emit_insn (gen_mulv4hi3 (lm, l1, l2)); - - /* Zap the high order bytes of the HImode elements by overwriting those - in one part with the low order bytes of the other. */ - emit_insn (gen_mix1_r (operands[0], - gen_lowpart (V8QImode, rm), - gen_lowpart (V8QImode, lm))); + rtx l = gen_reg_rtx (V4HImode); + rtx h = gen_reg_rtx (V4HImode); + emit_insn (gen_vec_widen_umult_lo_v8qi (l, operands[1], operands[2])); + emit_insn (gen_vec_widen_umult_hi_v8qi (h, operands[1], operands[2])); + if (TARGET_BIG_ENDIAN) + emit_insn (gen_vec_pack_trunc_v4hi (operands[0], h, l)); + else + emit_insn (gen_vec_pack_trunc_v4hi (operands[0], l, h)); DONE; }) @@ -296,7 +275,7 @@ "pmpyshr2.u %0 = %1, %2, %3" [(set_attr "itanium_class" "mmmul")]) -(define_insn "pmpy2_r" +(define_insn "pmpy2_even" [(set (match_operand:V2SI 0 "gr_register_operand" "=r") (mult:V2SI (vec_select:V2SI @@ -308,10 +287,16 @@ (match_operand:V4HI 2 "gr_register_operand" "r")) (parallel [(const_int 0) (const_int 2)]))))] "" - "pmpy2.r %0 = %1, %2" +{ + /* Recall that vector elements are numbered in memory order. */ + if (TARGET_BIG_ENDIAN) + return "%,pmpy2.l %0 = %1, %2"; + else + return "%,pmpy2.r %0 = %1, %2"; +} [(set_attr "itanium_class" "mmshf")]) -(define_insn "pmpy2_l" +(define_insn "pmpy2_odd" [(set (match_operand:V2SI 0 "gr_register_operand" "=r") (mult:V2SI (vec_select:V2SI @@ -323,7 +308,13 @@ (match_operand:V4HI 2 "gr_register_operand" "r")) (parallel [(const_int 1) (const_int 3)]))))] "" - "pmpy2.l %0 = %1, %2" +{ + /* Recall that vector elements are numbered in memory order. */ + if (TARGET_BIG_ENDIAN) + return "%,pmpy2.r %0 = %1, %2"; + else + return "%,pmpy2.l %0 = %1, %2"; +} [(set_attr "itanium_class" "mmshf")]) (define_expand "vec_widen_smult_lo_v4hi" @@ -332,7 +323,11 @@ (match_operand:V4HI 2 "gr_register_operand" "")] "" { - ia64_expand_widen_mul_v4hi (operands, false, false); + rtx l = gen_reg_rtx (V4HImode); + rtx h = gen_reg_rtx (V4HImode); + emit_insn (gen_mulv4hi3 (l, operands[1], operands[2])); + emit_insn (gen_pmpyshr2 (h, operands[1], operands[2], GEN_INT (16))); + ia64_unpack_assemble (operands[0], l, h, false); DONE; }) @@ -342,7 +337,11 @@ (match_operand:V4HI 2 "gr_register_operand" "")] "" { - ia64_expand_widen_mul_v4hi (operands, false, true); + rtx l = gen_reg_rtx (V4HImode); + rtx h = gen_reg_rtx (V4HImode); + emit_insn (gen_mulv4hi3 (l, operands[1], operands[2])); + emit_insn (gen_pmpyshr2 (h, operands[1], operands[2], GEN_INT (16))); + ia64_unpack_assemble (operands[0], l, h, true); DONE; }) @@ -352,7 +351,11 @@ (match_operand:V4HI 2 "gr_register_operand" "")] "" { - ia64_expand_widen_mul_v4hi (operands, true, false); + rtx l = gen_reg_rtx (V4HImode); + rtx h = gen_reg_rtx (V4HImode); + emit_insn (gen_mulv4hi3 (l, operands[1], operands[2])); + emit_insn (gen_pmpyshr2_u (h, operands[1], operands[2], GEN_INT (16))); + ia64_unpack_assemble (operands[0], l, h, false); DONE; }) @@ -362,7 +365,11 @@ (match_operand:V4HI 2 "gr_register_operand" "")] "" { - ia64_expand_widen_mul_v4hi (operands, true, true); + rtx l = gen_reg_rtx (V4HImode); + rtx h = gen_reg_rtx (V4HImode); + emit_insn (gen_mulv4hi3 (l, operands[1], operands[2])); + emit_insn (gen_pmpyshr2_u (h, operands[1], operands[2], GEN_INT (16))); + ia64_unpack_assemble (operands[0], l, h, true); DONE; }) @@ -390,12 +397,8 @@ of the full 32-bit product. */ /* T0 = CDBA. */ - if (TARGET_BIG_ENDIAN) - x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (4, GEN_INT (3), const2_rtx, - const1_rtx, const0_rtx)); - else - x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (4, const1_rtx, const0_rtx, - GEN_INT (3), const2_rtx)); + x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (4, const1_rtx, const0_rtx, + GEN_INT (3), const2_rtx)); x = gen_rtx_VEC_SELECT (V4HImode, op1h, x); emit_insn (gen_rtx_SET (VOIDmode, t0, x)); @@ -409,15 +412,28 @@ emit_insn (gen_mulv4hi3 (t3, t0, op2h)); /* T4 = CY.h, CY.l, AW.h, AW.l = CY, AW. */ - emit_insn (gen_mix2_r (gen_lowpart (V4HImode, t4), t1, t2)); + x = gen_lowpart (V4HImode, t4); + if (TARGET_BIG_ENDIAN) + x = gen_mix2_odd (x, t2, t1); + else + x = gen_mix2_even (x, t1, t2); + emit_insn (x); /* T5 = CZ.l, 0, AX.l, 0 = CZ << 16, AX << 16. */ - emit_insn (gen_mix2_l (gen_lowpart (V4HImode, t5), - CONST0_RTX (V4HImode), t3)); + x = gen_lowpart (V4HImode, t5); + if (TARGET_BIG_ENDIAN) + x = gen_mix2_even (x, t3, CONST0_RTX (V4HImode)); + else + x = gen_mix2_odd (x, CONST0_RTX (V4HImode), t3); + emit_insn (x); /* T6 = DY.l, 0, BW.l, 0 = DY << 16, BW << 16. */ - emit_insn (gen_mix2_r (gen_lowpart (V4HImode, t6), - CONST0_RTX (V4HImode), t3)); + x = gen_lowpart (V4HImode, t6); + if (TARGET_BIG_ENDIAN) + x = gen_mix2_odd (x, t3, CONST0_RTX (V4HImode)); + else + x = gen_mix2_even (x, CONST0_RTX (V4HImode), t3); + emit_insn (x); emit_insn (gen_addv2si3 (t7, t4, t5)); emit_insn (gen_addv2si3 (operands[0], t6, t7)); @@ -612,16 +628,36 @@ (match_operand:V2SI 3 "gr_register_operand" "")] "" { - rtx l, r, t; + rtx e, o, t; + + e = gen_reg_rtx (V2SImode); + o = gen_reg_rtx (V2SImode); + t = gen_reg_rtx (V2SImode); + + emit_insn (gen_pmpy2_even (e, operands[1], operands[2])); + emit_insn (gen_pmpy2_odd (o, operands[1], operands[2])); + emit_insn (gen_addv2si3 (t, e, operands[3])); + emit_insn (gen_addv2si3 (operands[0], t, o)); + DONE; +}) + +(define_expand "udot_prodv4hi" + [(match_operand:V2SI 0 "gr_register_operand" "") + (match_operand:V4HI 1 "gr_register_operand" "") + (match_operand:V4HI 2 "gr_register_operand" "") + (match_operand:V2SI 3 "gr_register_operand" "")] + "" +{ + rtx l, h, t; - r = gen_reg_rtx (V2SImode); l = gen_reg_rtx (V2SImode); + h = gen_reg_rtx (V2SImode); t = gen_reg_rtx (V2SImode); - emit_insn (gen_pmpy2_r (r, operands[1], operands[2])); - emit_insn (gen_pmpy2_l (l, operands[1], operands[2])); - emit_insn (gen_addv2si3 (t, r, operands[3])); - emit_insn (gen_addv2si3 (operands[0], t, l)); + emit_insn (gen_vec_widen_umult_lo_v4hi (l, operands[1], operands[2])); + emit_insn (gen_vec_widen_umult_hi_v4hi (h, operands[1], operands[2])); + emit_insn (gen_addv2si3 (t, l, operands[3])); + emit_insn (gen_addv2si3 (operands[0], t, h)); DONE; }) @@ -677,7 +713,13 @@ (ss_truncate:V4QI (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))))] "" - "pack2.sss %0 = %r1, %r2" +{ + /* Recall that vector elements are numbered in memory order. */ + if (TARGET_BIG_ENDIAN) + return "%,pack2.sss %0 = %r2, %r1"; + else + return "%,pack2.sss %0 = %r1, %r2"; +} [(set_attr "itanium_class" "mmshf")]) (define_insn "vec_pack_usat_v4hi" @@ -688,7 +730,13 @@ (us_truncate:V4QI (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))))] "" - "pack2.uss %0 = %r1, %r2" +{ + /* Recall that vector elements are numbered in memory order. */ + if (TARGET_BIG_ENDIAN) + return "%,pack2.uss %0 = %r2, %r1"; + else + return "%,pack2.uss %0 = %r1, %r2"; +} [(set_attr "itanium_class" "mmshf")]) (define_insn "vec_pack_ssat_v2si" @@ -699,7 +747,13 @@ (ss_truncate:V2HI (match_operand:V2SI 2 "gr_reg_or_0_operand" "rU"))))] "" - "pack4.sss %0 = %r1, %r2" +{ + /* Recall that vector elements are numbered in memory order. */ + if (TARGET_BIG_ENDIAN) + return "%,pack4.sss %0 = %r2, %r1"; + else + return "%,pack4.sss %0 = %r1, %r2"; +} [(set_attr "itanium_class" "mmshf")]) (define_insn "vec_interleave_lowv8qi" @@ -742,54 +796,54 @@ } [(set_attr "itanium_class" "mmshf")]) -(define_insn "mix1_r" +(define_insn "mix1_even" [(set (match_operand:V8QI 0 "gr_register_operand" "=r") (vec_select:V8QI (vec_concat:V16QI (match_operand:V8QI 1 "gr_reg_or_0_operand" "rU") (match_operand:V8QI 2 "gr_reg_or_0_operand" "rU")) - (parallel [(const_int 0) - (const_int 8) - (const_int 2) - (const_int 10) - (const_int 4) - (const_int 12) - (const_int 6) - (const_int 14)])))] - "" - "mix1.r %0 = %r2, %r1" + (parallel [(const_int 0) (const_int 8) + (const_int 2) (const_int 10) + (const_int 4) (const_int 12) + (const_int 6) (const_int 14)])))] + "" +{ + /* Recall that vector elements are numbered in memory order. */ + if (TARGET_BIG_ENDIAN) + return "%,mix1.l %0 = %r1, %r2"; + else + return "%,mix1.r %0 = %r2, %r1"; +} [(set_attr "itanium_class" "mmshf")]) -(define_insn "mix1_l" +(define_insn "mix1_odd" [(set (match_operand:V8QI 0 "gr_register_operand" "=r") (vec_select:V8QI (vec_concat:V16QI (match_operand:V8QI 1 "gr_reg_or_0_operand" "rU") (match_operand:V8QI 2 "gr_reg_or_0_operand" "rU")) - (parallel [(const_int 1) - (const_int 9) - (const_int 3) - (const_int 11) - (const_int 5) - (const_int 13) - (const_int 7) - (const_int 15)])))] - "" - "mix1.l %0 = %r2, %r1" + (parallel [(const_int 1) (const_int 9) + (const_int 3) (const_int 11) + (const_int 5) (const_int 13) + (const_int 7) (const_int 15)])))] + "" +{ + /* Recall that vector elements are numbered in memory order. */ + if (TARGET_BIG_ENDIAN) + return "%,mix1.r %0 = %r1, %r2"; + else + return "%,mix1.l %0 = %r2, %r1"; +} [(set_attr "itanium_class" "mmshf")]) (define_insn "*mux1_rev" [(set (match_operand:V8QI 0 "gr_register_operand" "=r") (vec_select:V8QI (match_operand:V8QI 1 "gr_register_operand" "r") - (parallel [(const_int 7) - (const_int 6) - (const_int 5) - (const_int 4) - (const_int 3) - (const_int 2) - (const_int 1) - (const_int 0)])))] + (parallel [(const_int 7) (const_int 6) + (const_int 5) (const_int 4) + (const_int 3) (const_int 2) + (const_int 1) (const_int 0)])))] "" "mux1 %0 = %1, @rev" [(set_attr "itanium_class" "mmshf")]) @@ -798,14 +852,10 @@ [(set (match_operand:V8QI 0 "gr_register_operand" "=r") (vec_select:V8QI (match_operand:V8QI 1 "gr_register_operand" "r") - (parallel [(const_int 0) - (const_int 4) - (const_int 2) - (const_int 6) - (const_int 1) - (const_int 5) - (const_int 3) - (const_int 7)])))] + (parallel [(const_int 0) (const_int 4) + (const_int 2) (const_int 6) + (const_int 1) (const_int 5) + (const_int 3) (const_int 7)])))] "" "mux1 %0 = %1, @mix" [(set_attr "itanium_class" "mmshf")]) @@ -814,14 +864,10 @@ [(set (match_operand:V8QI 0 "gr_register_operand" "=r") (vec_select:V8QI (match_operand:V8QI 1 "gr_register_operand" "r") - (parallel [(const_int 0) - (const_int 4) - (const_int 1) - (const_int 5) - (const_int 2) - (const_int 6) - (const_int 3) - (const_int 7)])))] + (parallel [(const_int 0) (const_int 4) + (const_int 1) (const_int 5) + (const_int 2) (const_int 6) + (const_int 3) (const_int 7)])))] "" "mux1 %0 = %1, @shuf" [(set_attr "itanium_class" "mmshf")]) @@ -830,14 +876,10 @@ [(set (match_operand:V8QI 0 "gr_register_operand" "=r") (vec_select:V8QI (match_operand:V8QI 1 "gr_register_operand" "r") - (parallel [(const_int 0) - (const_int 2) - (const_int 4) - (const_int 6) - (const_int 1) - (const_int 3) - (const_int 5) - (const_int 7)])))] + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6) + (const_int 1) (const_int 3) + (const_int 5) (const_int 7)])))] "" "mux1 %0 = %1, @alt" [(set_attr "itanium_class" "mmshf")]) @@ -846,14 +888,14 @@ [(set (match_operand:V8QI 0 "gr_register_operand" "=r") (vec_select:V8QI (match_operand:V8QI 1 "gr_register_operand" "r") - (parallel [(const_int 0) - (const_int 0) - (const_int 0) - (const_int 0) - (const_int 0) - (const_int 0) - (const_int 0) - (const_int 0)])))] + (parallel [(match_operand 2 "mux1_brcst_element" "") + (match_dup 2) + (match_dup 2) + (match_dup 2) + (match_dup 2) + (match_dup 2) + (match_dup 2) + (match_dup 2)])))] "" "mux1 %0 = %1, @brcst" [(set_attr "itanium_class" "mmshf")]) @@ -873,10 +915,7 @@ "" { rtx temp = gen_reg_rtx (V8QImode); - if (TARGET_BIG_ENDIAN) - emit_insn (gen_mix1_l (temp, operands[2], operands[1])); - else - emit_insn (gen_mix1_r (temp, operands[1], operands[2])); + emit_insn (gen_mix1_even (temp, operands[1], operands[2])); emit_insn (gen_mux1_alt (operands[0], temp)); DONE; }) @@ -888,10 +927,7 @@ "" { rtx temp = gen_reg_rtx (V8QImode); - if (TARGET_BIG_ENDIAN) - emit_insn (gen_mix1_r (temp, operands[2], operands[1])); - else - emit_insn (gen_mix1_l (temp, operands[1], operands[2])); + emit_insn (gen_mix1_odd (temp, operands[1], operands[2])); emit_insn (gen_mux1_alt (operands[0], temp)); DONE; }) @@ -902,10 +938,8 @@ (vec_concat:V8HI (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU") (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU")) - (parallel [(const_int 0) - (const_int 4) - (const_int 1) - (const_int 5)])))] + (parallel [(const_int 0) (const_int 4) + (const_int 1) (const_int 5)])))] "" { /* Recall that vector elements are numbered in memory order. */ @@ -922,10 +956,8 @@ (vec_concat:V8HI (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU") (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU")) - (parallel [(const_int 2) - (const_int 6) - (const_int 3) - (const_int 7)])))] + (parallel [(const_int 2) (const_int 6) + (const_int 3) (const_int 7)])))] "" { /* Recall that vector elements are numbered in memory order. */ @@ -936,32 +968,40 @@ } [(set_attr "itanium_class" "mmshf")]) -(define_insn "mix2_r" +(define_insn "mix2_even" [(set (match_operand:V4HI 0 "gr_register_operand" "=r") (vec_select:V4HI (vec_concat:V8HI (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU") (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU")) - (parallel [(const_int 0) - (const_int 4) - (const_int 2) - (const_int 6)])))] + (parallel [(const_int 0) (const_int 4) + (const_int 2) (const_int 6)])))] "" - "mix2.r %0 = %r2, %r1" +{ + /* Recall that vector elements are numbered in memory order. */ + if (TARGET_BIG_ENDIAN) + return "%,mix2.l %0 = %r1, %r2"; + else + return "%,mix2.r %0 = %r2, %r1"; +} [(set_attr "itanium_class" "mmshf")]) -(define_insn "mix2_l" +(define_insn "mix2_odd" [(set (match_operand:V4HI 0 "gr_register_operand" "=r") (vec_select:V4HI (vec_concat:V8HI (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU") (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU")) - (parallel [(const_int 1) - (const_int 5) - (const_int 3) - (const_int 7)])))] + (parallel [(const_int 1) (const_int 5) + (const_int 3) (const_int 7)])))] "" - "mix2.l %0 = %r2, %r1" +{ + /* Recall that vector elements are numbered in memory order. */ + if (TARGET_BIG_ENDIAN) + return "%,mix2.r %0 = %r1, %r2"; + else + return "%,mix2.l %0 = %r2, %r1"; +} [(set_attr "itanium_class" "mmshf")]) (define_insn "*mux2" @@ -974,17 +1014,17 @@ (match_operand 5 "const_int_2bit_operand" "")])))] "" { - int mask; + int mask = 0; if (TARGET_BIG_ENDIAN) { - mask = INTVAL (operands[2]) << 4; - mask |= INTVAL (operands[3]) << 6; - mask |= INTVAL (operands[4]); - mask |= INTVAL (operands[5]) << 2; + mask |= (3 - INTVAL (operands[2])) << 6; + mask |= (3 - INTVAL (operands[3])) << 4; + mask |= (3 - INTVAL (operands[4])) << 2; + mask |= 3 - INTVAL (operands[5]); } else { - mask = INTVAL (operands[2]); + mask |= INTVAL (operands[2]); mask |= INTVAL (operands[3]) << 2; mask |= INTVAL (operands[4]) << 4; mask |= INTVAL (operands[5]) << 6; @@ -998,10 +1038,8 @@ [(set (match_operand:V4HI 0 "gr_register_operand" "") (vec_select:V4HI (match_operand:V4HI 1 "gr_register_operand" "") - (parallel [(const_int 0) - (const_int 2) - (const_int 1) - (const_int 3)])))] + (parallel [(const_int 0) (const_int 2) + (const_int 1) (const_int 3)])))] "") (define_expand "vec_extract_evenv4hi" @@ -1011,10 +1049,7 @@ "" { rtx temp = gen_reg_rtx (V4HImode); - if (TARGET_BIG_ENDIAN) - emit_insn (gen_mix2_l (temp, operands[1], operands[2])); - else - emit_insn (gen_mix2_r (temp, operands[1], operands[2])); + emit_insn (gen_mix2_even (temp, operands[1], operands[2])); emit_insn (gen_vec_extract_evenodd_helper (operands[0], temp)); DONE; }) @@ -1026,10 +1061,7 @@ "" { rtx temp = gen_reg_rtx (V4HImode); - if (TARGET_BIG_ENDIAN) - emit_insn (gen_mix2_r (temp, operands[1], operands[2])); - else - emit_insn (gen_mix2_l (temp, operands[1], operands[2])); + emit_insn (gen_mix2_odd (temp, operands[1], operands[2])); emit_insn (gen_vec_extract_evenodd_helper (operands[0], temp)); DONE; }) @@ -1042,15 +1074,13 @@ "mux2 %0 = %1, 0" [(set_attr "itanium_class" "mmshf")]) -;; Note that mix4.r performs the exact same operation. (define_insn "vec_interleave_lowv2si" [(set (match_operand:V2SI 0 "gr_register_operand" "=r") (vec_select:V2SI (vec_concat:V4SI (match_operand:V2SI 1 "gr_reg_or_0_operand" "rU") (match_operand:V2SI 2 "gr_reg_or_0_operand" "rU")) - (parallel [(const_int 0) - (const_int 2)])))] + (parallel [(const_int 0) (const_int 2)])))] "" { /* Recall that vector elements are numbered in memory order. */ @@ -1061,15 +1091,13 @@ } [(set_attr "itanium_class" "mmshf")]) -;; Note that mix4.l performs the exact same operation. (define_insn "vec_interleave_highv2si" [(set (match_operand:V2SI 0 "gr_register_operand" "=r") (vec_select:V2SI (vec_concat:V4SI (match_operand:V2SI 1 "gr_reg_or_0_operand" "rU") (match_operand:V2SI 2 "gr_reg_or_0_operand" "rU")) - (parallel [(const_int 1) - (const_int 3)])))] + (parallel [(const_int 1) (const_int 3)])))] "" { /* Recall that vector elements are numbered in memory order. */ @@ -1088,7 +1116,7 @@ { if (TARGET_BIG_ENDIAN) emit_insn (gen_vec_interleave_highv2si (operands[0], operands[1], - operands[2])); + operands[2])); else emit_insn (gen_vec_interleave_lowv2si (operands[0], operands[1], operands[2])); @@ -1103,7 +1131,7 @@ { if (TARGET_BIG_ENDIAN) emit_insn (gen_vec_interleave_lowv2si (operands[0], operands[1], - operands[2])); + operands[2])); else emit_insn (gen_vec_interleave_highv2si (operands[0], operands[1], operands[2])); @@ -1131,10 +1159,7 @@ if (!gr_reg_or_0_operand (op2, SImode)) op2 = force_reg (SImode, op2); - if (TARGET_BIG_ENDIAN) - x = gen_rtx_VEC_CONCAT (V2SImode, op2, op1); - else - x = gen_rtx_VEC_CONCAT (V2SImode, op1, op2); + x = gen_rtx_VEC_CONCAT (V2SImode, op1, op2); emit_insn (gen_rtx_SET (VOIDmode, operands[0], x)); DONE; }) @@ -1145,7 +1170,13 @@ (match_operand:SI 1 "gr_reg_or_0_operand" "rO") (match_operand:SI 2 "gr_reg_or_0_operand" "rO")))] "" - "unpack4.l %0 = %r2, %r1" +{ + /* Recall that vector elements are numbered in memory order. */ + if (TARGET_BIG_ENDIAN) + return "%,unpack4.l %0 = %r1, %r2"; + else + return "%,unpack4.l %0 = %r2, %r1"; +} [(set_attr "itanium_class" "mmshf")]) ;; Missing operations @@ -1315,7 +1346,10 @@ "" { rtx tmp = gen_reg_rtx (V2SFmode); - emit_insn (gen_fswap (tmp, operands[1], CONST0_RTX (V2SFmode))); + if (TARGET_BIG_ENDIAN) + emit_insn (gen_fswap (tmp, CONST0_RTX (V2SFmode), operands[1])); + else + emit_insn (gen_fswap (tmp, operands[1], CONST0_RTX (V2SFmode))); emit_insn (gen_addv2sf3 (operands[0], operands[1], tmp)); DONE; }) @@ -1326,7 +1360,10 @@ "" { rtx tmp = gen_reg_rtx (V2SFmode); - emit_insn (gen_fswap (tmp, operands[1], CONST0_RTX (V2SFmode))); + if (TARGET_BIG_ENDIAN) + emit_insn (gen_fswap (tmp, CONST0_RTX (V2SFmode), operands[1])); + else + emit_insn (gen_fswap (tmp, operands[1], CONST0_RTX (V2SFmode))); emit_insn (gen_smaxv2sf3 (operands[0], operands[1], tmp)); DONE; }) @@ -1337,7 +1374,10 @@ "" { rtx tmp = gen_reg_rtx (V2SFmode); - emit_insn (gen_fswap (tmp, operands[1], CONST0_RTX (V2SFmode))); + if (TARGET_BIG_ENDIAN) + emit_insn (gen_fswap (tmp, CONST0_RTX (V2SFmode), operands[1])); + else + emit_insn (gen_fswap (tmp, operands[1], CONST0_RTX (V2SFmode))); emit_insn (gen_sminv2sf3 (operands[0], operands[1], tmp)); DONE; }) @@ -1403,10 +1443,7 @@ if (!fr_reg_or_fp01_operand (op2, SFmode)) op2 = force_reg (SFmode, op2); - if (TARGET_BIG_ENDIAN) - emit_insn (gen_fpack (operands[0], op2, op1)); - else - emit_insn (gen_fpack (operands[0], op1, op2)); + emit_insn (gen_fpack (operands[0], op1, op2)); DONE; }) @@ -1416,7 +1453,13 @@ (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG") (match_operand:SF 2 "fr_reg_or_fp01_operand" "fG")))] "" - "fpack %0 = %F2, %F1" +{ + /* Recall that vector elements are numbered in memory order. */ + if (TARGET_BIG_ENDIAN) + return "%,fpack %0 = %F1, %F2"; + else + return "%,fpack %0 = %F2, %F1"; +} [(set_attr "itanium_class" "fmisc")]) (define_insn "fswap" @@ -1427,7 +1470,13 @@ (match_operand:V2SF 2 "fr_reg_or_0_operand" "fU")) (parallel [(const_int 1) (const_int 2)])))] "" - "fswap %0 = %F1, %F2" +{ + /* Recall that vector elements are numbered in memory order. */ + if (TARGET_BIG_ENDIAN) + return "%,fswap %0 = %F2, %F1"; + else + return "%,fswap %0 = %F1, %F2"; +} [(set_attr "itanium_class" "fmisc")]) (define_insn "vec_interleave_highv2sf" @@ -1438,7 +1487,13 @@ (match_operand:V2SF 2 "fr_reg_or_0_operand" "fU")) (parallel [(const_int 1) (const_int 3)])))] "" - "fmix.l %0 = %F2, %F1" +{ + /* Recall that vector elements are numbered in memory order. */ + if (TARGET_BIG_ENDIAN) + return "%,fmix.r %0 = %F1, %F2"; + else + return "%,fmix.l %0 = %F2, %F1"; +} [(set_attr "itanium_class" "fmisc")]) (define_insn "vec_interleave_lowv2sf" @@ -1449,7 +1504,13 @@ (match_operand:V2SF 2 "fr_reg_or_0_operand" "fU")) (parallel [(const_int 0) (const_int 2)])))] "" - "fmix.r %0 = %F2, %F1" +{ + /* Recall that vector elements are numbered in memory order. */ + if (TARGET_BIG_ENDIAN) + return "%,fmix.l %0 = %F1, %F2"; + else + return "%,fmix.r %0 = %F2, %F1"; +} [(set_attr "itanium_class" "fmisc")]) (define_insn "fmix_lr" @@ -1460,7 +1521,13 @@ (match_operand:V2SF 2 "fr_reg_or_0_operand" "fU")) (parallel [(const_int 0) (const_int 3)])))] "" - "fmix.lr %0 = %F2, %F1" +{ + /* Recall that vector elements are numbered in memory order. */ + if (TARGET_BIG_ENDIAN) + return "%,fmix.lr %0 = %F1, %F2"; + else + return "%,fmix.lr %0 = %F2, %F1"; +} [(set_attr "itanium_class" "fmisc")]) (define_expand "vec_extract_evenv2sf" @@ -1485,23 +1552,24 @@ DONE; }) - (define_expand "vec_setv2sf" [(match_operand:V2SF 0 "fr_register_operand" "") (match_operand:SF 1 "fr_register_operand" "") (match_operand 2 "const_int_operand" "")] "" { + rtx op0 = operands[0]; rtx tmp = gen_reg_rtx (V2SFmode); + emit_insn (gen_fpack (tmp, operands[1], CONST0_RTX (SFmode))); switch (INTVAL (operands[2])) { case 0: - emit_insn (gen_fmix_lr (operands[0], tmp, operands[0])); + emit_insn (gen_fmix_lr (op0, tmp, op0)); break; case 1: - emit_insn (gen_vec_interleave_lowv2sf (operands[0], operands[0], tmp)); + emit_insn (gen_vec_interleave_lowv2sf (op0, op0, tmp)); break; default: gcc_unreachable (); @@ -1528,8 +1596,8 @@ }) (define_insn_and_split "*vec_extractv2sf_0_be" - [(set (match_operand:SF 0 "register_operand" "=r,f") - (unspec:SF [(match_operand:V2SF 1 "register_operand" "rf,r") + [(set (match_operand:SF 0 "register_operand" "=rf,r") + (unspec:SF [(match_operand:V2SF 1 "nonimmediate_operand" "m,r") (const_int 0)] UNSPEC_VECT_EXTR))] "TARGET_BIG_ENDIAN" @@ -1537,31 +1605,44 @@ "reload_completed" [(set (match_dup 0) (match_dup 1))] { - if (REG_P (operands[1]) && FR_REGNO_P (REGNO (operands[1]))) - operands[0] = gen_rtx_REG (V2SFmode, REGNO (operands[0])); + if (MEM_P (operands[1])) + operands[1] = adjust_address (operands[1], SFmode, 0); else - operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1])); + { + emit_insn (gen_lshrdi3 (operands[0], operands[1], GEN_INT (32))); + DONE; + } }) -(define_insn_and_split "*vec_extractv2sf_1" +(define_insn_and_split "*vec_extractv2sf_1_le" [(set (match_operand:SF 0 "register_operand" "=r") (unspec:SF [(match_operand:V2SF 1 "register_operand" "r") (const_int 1)] UNSPEC_VECT_EXTR))] - "" + "!TARGET_BIG_ENDIAN" "#" - "reload_completed" + "&& reload_completed" [(const_int 0)] { operands[0] = gen_rtx_REG (DImode, REGNO (operands[0])); operands[1] = gen_rtx_REG (DImode, REGNO (operands[1])); - if (TARGET_BIG_ENDIAN) - emit_move_insn (operands[0], operands[1]); - else - emit_insn (gen_lshrdi3 (operands[0], operands[1], GEN_INT (32))); + emit_insn (gen_lshrdi3 (operands[0], operands[1], GEN_INT (32))); DONE; }) +(define_insn_and_split "*vec_extractv2sf_1_be" + [(set (match_operand:SF 0 "register_operand" "=rf") + (unspec:SF [(match_operand:V2SF 1 "register_operand" "r") + (const_int 1)] + UNSPEC_VECT_EXTR))] + "TARGET_BIG_ENDIAN" + "#" + "&& reload_completed" + [(set (match_dup 0) (match_dup 1))] +{ + operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1])); +}) + (define_expand "vec_extractv2sf" [(set (match_operand:SF 0 "register_operand" "") (unspec:SF [(match_operand:V2SF 1 "register_operand" "") @@ -1610,11 +1691,14 @@ [(match_operand:V8QI 0 "gr_register_operand" "") (match_operand:V4HI 1 "gr_register_operand" "") (match_operand:V4HI 2 "gr_register_operand" "")] - "!TARGET_BIG_ENDIAN" + "" { - rtx op1 = gen_lowpart(V8QImode, operands[1]); - rtx op2 = gen_lowpart(V8QImode, operands[2]); - emit_insn (gen_vec_extract_evenv8qi (operands[0], op1, op2)); + rtx op1 = gen_lowpart (V8QImode, operands[1]); + rtx op2 = gen_lowpart (V8QImode, operands[2]); + if (TARGET_BIG_ENDIAN) + emit_insn (gen_vec_extract_oddv8qi (operands[0], op1, op2)); + else + emit_insn (gen_vec_extract_evenv8qi (operands[0], op1, op2)); DONE; }) @@ -1624,8 +1708,8 @@ (match_operand:V2SI 2 "gr_register_operand" "")] "" { - rtx op1 = gen_lowpart(V4HImode, operands[1]); - rtx op2 = gen_lowpart(V4HImode, operands[2]); + rtx op1 = gen_lowpart (V4HImode, operands[1]); + rtx op2 = gen_lowpart (V4HImode, operands[2]); if (TARGET_BIG_ENDIAN) emit_insn (gen_vec_extract_oddv4hi (operands[0], op1, op2)); else -- 2.7.4