From d3b735c8fb61172ab23221c7c994c0ac4aefec2d Mon Sep 17 00:00:00 2001 From: rth Date: Mon, 19 Sep 2005 17:20:02 +0000 Subject: [PATCH] * config/ia64/ia64.c (ia64_expand_widen_sum): New. (ia64_expand_dot_prod_v8qi): New. * config/ia64/ia64-protos.h: Update. * config/ia64/vect.md (pmpy2_r, pmpy2_l, widen_usumv8qi3, widen_usumv4hi3, widen_ssumv8qi3, widen_ssumv4hi3, udot_prodv8qi, sdot_prodv8qi, sdot_prodv4hi): New. (reduc_splus_v2sf): Rename from reduc_plus_v2sf. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@104426 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 10 ++++ gcc/config/ia64/ia64-protos.h | 2 + gcc/config/ia64/ia64.c | 107 ++++++++++++++++++++++++++++++++++++++ gcc/config/ia64/vect.md | 118 ++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 232 insertions(+), 5 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index addadd5..6969211 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,15 @@ 2005-09-19 Richard Henderson + * config/ia64/ia64.c (ia64_expand_widen_sum): New. + (ia64_expand_dot_prod_v8qi): New. + * config/ia64/ia64-protos.h: Update. + * config/ia64/vect.md (pmpy2_r, pmpy2_l, widen_usumv8qi3, + widen_usumv4hi3, widen_ssumv8qi3, widen_ssumv4hi3, udot_prodv8qi, + sdot_prodv8qi, sdot_prodv4hi): New. + (reduc_splus_v2sf): Rename from reduc_plus_v2sf. + +2005-09-19 Richard Henderson + PR 23941 * real.c (exact_real_truncate): Return false if the format cannot represent the number as a normal. diff --git a/gcc/config/ia64/ia64-protos.h b/gcc/config/ia64/ia64-protos.h index 0c6355a..6de20c8 100644 --- a/gcc/config/ia64/ia64-protos.h +++ b/gcc/config/ia64/ia64-protos.h @@ -50,6 +50,8 @@ extern bool ia64_expand_movxf_movrf (enum machine_mode, rtx[]); extern rtx ia64_expand_compare (enum rtx_code, enum machine_mode); extern void ia64_expand_vecint_cmov (rtx[]); extern bool ia64_expand_vecint_minmax (enum rtx_code, enum machine_mode, rtx[]); +extern void ia64_expand_widen_sum (rtx[], bool); +extern void ia64_expand_dot_prod_v8qi (rtx[], bool); extern void ia64_expand_call (rtx, rtx, rtx, int); extern void ia64_split_call (rtx, rtx, rtx, rtx, rtx, int, int); extern void ia64_reload_gp (void); diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c index f4b8cee..4dafbd2 100644 --- a/gcc/config/ia64/ia64.c +++ b/gcc/config/ia64/ia64.c @@ -1766,6 +1766,113 @@ ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode, return true; } +/* Emit an integral vector widening sum operations. */ + +void +ia64_expand_widen_sum (rtx operands[3], bool unsignedp) +{ + rtx l, h, x, s; + enum machine_mode wmode, mode; + rtx (*unpack_l) (rtx, rtx, rtx); + rtx (*unpack_h) (rtx, rtx, rtx); + rtx (*plus) (rtx, rtx, rtx); + + wmode = GET_MODE (operands[0]); + mode = GET_MODE (operands[1]); + + switch (mode) + { + case V8QImode: + unpack_l = gen_unpack1_l; + unpack_h = gen_unpack1_h; + plus = gen_addv4hi3; + break; + case V4HImode: + unpack_l = gen_unpack2_l; + unpack_h = gen_unpack2_h; + plus = gen_addv2si3; + break; + default: + gcc_unreachable (); + } + + /* Fill in x with the sign extension of each element in op1. */ + if (unsignedp) + x = CONST0_RTX (mode); + else + { + bool neg; + + x = gen_reg_rtx (mode); + + neg = ia64_expand_vecint_compare (LT, mode, x, operands[1], + CONST0_RTX (mode)); + gcc_assert (!neg); + } + + l = gen_reg_rtx (wmode); + h = gen_reg_rtx (wmode); + s = gen_reg_rtx (wmode); + + emit_insn (unpack_l (gen_lowpart (mode, l), operands[1], x)); + emit_insn (unpack_h (gen_lowpart (mode, h), operands[1], x)); + emit_insn (plus (s, l, operands[2])); + emit_insn (plus (operands[0], h, s)); +} + +/* Emit a signed or unsigned V8QI dot product operation. */ + +void +ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp) +{ + rtx l1, l2, h1, h2, x1, x2, p1, p2, p3, p4, s1, s2, s3; + + /* Fill in x1 and x2 with the sign extension of each element. */ + if (unsignedp) + x1 = x2 = CONST0_RTX (V8QImode); + else + { + bool neg; + + x1 = gen_reg_rtx (V8QImode); + x2 = gen_reg_rtx (V8QImode); + + neg = ia64_expand_vecint_compare (LT, V8QImode, x1, operands[1], + CONST0_RTX (V8QImode)); + gcc_assert (!neg); + neg = ia64_expand_vecint_compare (LT, V8QImode, x2, operands[2], + CONST0_RTX (V8QImode)); + gcc_assert (!neg); + } + + l1 = gen_reg_rtx (V4HImode); + l2 = gen_reg_rtx (V4HImode); + h1 = gen_reg_rtx (V4HImode); + h2 = gen_reg_rtx (V4HImode); + + emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l1), operands[1], x1)); + emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l2), operands[2], x2)); + emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h1), operands[1], x1)); + emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h2), operands[2], x2)); + + p1 = gen_reg_rtx (V2SImode); + p2 = gen_reg_rtx (V2SImode); + p3 = gen_reg_rtx (V2SImode); + p4 = gen_reg_rtx (V2SImode); + emit_insn (gen_pmpy2_r (p1, l1, l2)); + emit_insn (gen_pmpy2_l (p2, l1, l2)); + emit_insn (gen_pmpy2_r (p3, h1, h2)); + emit_insn (gen_pmpy2_l (p4, h1, h2)); + + s1 = gen_reg_rtx (V2SImode); + s2 = gen_reg_rtx (V2SImode); + s3 = gen_reg_rtx (V2SImode); + emit_insn (gen_addv2si3 (s1, p1, p2)); + emit_insn (gen_addv2si3 (s2, p3, p4)); + emit_insn (gen_addv2si3 (s3, s1, operands[3])); + emit_insn (gen_addv2si3 (operands[0], s2, s3)); +} + /* Emit the appropriate sequence for a call. */ void diff --git a/gcc/config/ia64/vect.md b/gcc/config/ia64/vect.md index 88e9eb2..66295ad 100644 --- a/gcc/config/ia64/vect.md +++ b/gcc/config/ia64/vect.md @@ -212,6 +212,36 @@ "pmpyshr2 %0 = %1, %2, 0" [(set_attr "itanium_class" "mmmul")]) +(define_insn "pmpy2_r" + [(set (match_operand:V2SI 0 "gr_register_operand" "=r") + (mult:V2SI + (vec_select:V2SI + (sign_extend:V4SI + (match_operand:V4HI 1 "gr_register_operand" "r")) + (parallel [(const_int 0) (const_int 2)])) + (vec_select:V2SI + (sign_extend:V4SI + (match_operand:V4HI 2 "gr_register_operand" "r")) + (parallel [(const_int 0) (const_int 2)]))))] + "" + "pmpy2.r %0 = %1, %2" + [(set_attr "itanium_class" "mmshf")]) + +(define_insn "pmpy2_l" + [(set (match_operand:V2SI 0 "gr_register_operand" "=r") + (mult:V2SI + (vec_select:V2SI + (sign_extend:V4SI + (match_operand:V4HI 1 "gr_register_operand" "r")) + (parallel [(const_int 1) (const_int 3)])) + (vec_select:V2SI + (sign_extend:V4SI + (match_operand:V4HI 2 "gr_register_operand" "r")) + (parallel [(const_int 1) (const_int 3)]))))] + "" + "pmpy2.l %0 = %1, %2" + [(set_attr "itanium_class" "mmshf")]) + (define_expand "umax3" [(set (match_operand:VECINT 0 "gr_register_operand" "") (umax:VECINT (match_operand:VECINT 1 "gr_register_operand" "") @@ -331,6 +361,88 @@ operands[1] = gen_lowpart (DImode, operands[1]); }) +(define_expand "widen_usumv8qi3" + [(match_operand:V4HI 0 "gr_register_operand" "") + (match_operand:V8QI 1 "gr_register_operand" "") + (match_operand:V4HI 2 "gr_register_operand" "")] + "" +{ + ia64_expand_widen_sum (operands, true); + DONE; +}) + +(define_expand "widen_usumv4hi3" + [(match_operand:V2SI 0 "gr_register_operand" "") + (match_operand:V4HI 1 "gr_register_operand" "") + (match_operand:V2SI 2 "gr_register_operand" "")] + "" +{ + ia64_expand_widen_sum (operands, true); + DONE; +}) + +(define_expand "widen_ssumv8qi3" + [(match_operand:V4HI 0 "gr_register_operand" "") + (match_operand:V8QI 1 "gr_register_operand" "") + (match_operand:V4HI 2 "gr_register_operand" "")] + "" +{ + ia64_expand_widen_sum (operands, false); + DONE; +}) + +(define_expand "widen_ssumv4hi3" + [(match_operand:V2SI 0 "gr_register_operand" "") + (match_operand:V4HI 1 "gr_register_operand" "") + (match_operand:V2SI 2 "gr_register_operand" "")] + "" +{ + ia64_expand_widen_sum (operands, false); + DONE; +}) + +(define_expand "udot_prodv8qi" + [(match_operand:V2SI 0 "gr_register_operand" "") + (match_operand:V8QI 1 "gr_register_operand" "") + (match_operand:V8QI 2 "gr_register_operand" "") + (match_operand:V2SI 3 "gr_register_operand" "")] + "" +{ + ia64_expand_dot_prod_v8qi (operands, true); + DONE; +}) + +(define_expand "sdot_prodv8qi" + [(match_operand:V2SI 0 "gr_register_operand" "") + (match_operand:V8QI 1 "gr_register_operand" "") + (match_operand:V8QI 2 "gr_register_operand" "") + (match_operand:V2SI 3 "gr_register_operand" "")] + "" +{ + ia64_expand_dot_prod_v8qi (operands, false); + DONE; +}) + +(define_expand "sdot_prodv4hi" + [(match_operand:V2SI 0 "gr_register_operand" "") + (match_operand:V4HI 1 "gr_register_operand" "") + (match_operand:V4HI 2 "gr_register_operand" "") + (match_operand:V2SI 3 "gr_register_operand" "")] + "" +{ + rtx l, r, t; + + r = gen_reg_rtx (V2SImode); + l = gen_reg_rtx (V2SImode); + t = gen_reg_rtx (V2SImode); + + emit_insn (gen_pmpy2_r (r, operands[1], operands[2])); + emit_insn (gen_pmpy2_l (l, operands[1], operands[2])); + emit_insn (gen_addv2si3 (t, r, operands[3])); + emit_insn (gen_addv2si3 (operands[0], t, l)); + DONE; +}) + (define_expand "vcond" [(set (match_operand:VECINT 0 "gr_register_operand" "") (if_then_else:VECINT @@ -717,15 +829,11 @@ ;; padd.uus ;; pavg ;; pavgsub -;; pmpy ;; pmpyshr, general form ;; psad ;; pshladd ;; pshradd ;; psub.uus -;; vec_set -;; vec_extract -;; vec_init ;; Floating point vector operations @@ -947,7 +1055,7 @@ "fpmin %0 = %1, %2" [(set_attr "itanium_class" "fmisc")]) -(define_expand "reduc_plus_v2sf" +(define_expand "reduc_splus_v2sf" [(match_operand:V2SF 0 "fr_register_operand" "") (match_operand:V2SF 1 "fr_register_operand" "")] "" -- 2.7.4