From d3b735c8fb61172ab23221c7c994c0ac4aefec2d Mon Sep 17 00:00:00 2001
From: rth <rth@138bc75d-0d04-0410-961f-82ee72b054a4>
Date: Mon, 19 Sep 2005 17:20:02 +0000
Subject: [PATCH]         * config/ia64/ia64.c (ia64_expand_widen_sum): New.   
      (ia64_expand_dot_prod_v8qi): New.         * config/ia64/ia64-protos.h:
 Update.         * config/ia64/vect.md (pmpy2_r, pmpy2_l, widen_usumv8qi3,    
     widen_usumv4hi3, widen_ssumv8qi3, widen_ssumv4hi3, udot_prodv8qi,        
 sdot_prodv8qi, sdot_prodv4hi): New.         (reduc_splus_v2sf): Rename from
 reduc_plus_v2sf.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@104426 138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/ChangeLog                 |  10 ++++
 gcc/config/ia64/ia64-protos.h |   2 +
 gcc/config/ia64/ia64.c        | 107 ++++++++++++++++++++++++++++++++++++++
 gcc/config/ia64/vect.md       | 118 ++++++++++++++++++++++++++++++++++++++++--
 4 files changed, 232 insertions(+), 5 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index addadd5..6969211 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,15 @@
 2005-09-19  Richard Henderson  <rth@redhat.com>
 
+	* config/ia64/ia64.c (ia64_expand_widen_sum): New.
+	(ia64_expand_dot_prod_v8qi): New.
+	* config/ia64/ia64-protos.h: Update.
+	* config/ia64/vect.md (pmpy2_r, pmpy2_l, widen_usumv8qi3,
+	widen_usumv4hi3, widen_ssumv8qi3, widen_ssumv4hi3, udot_prodv8qi,
+	sdot_prodv8qi, sdot_prodv4hi): New.
+	(reduc_splus_v2sf): Rename from reduc_plus_v2sf.
+
+2005-09-19  Richard Henderson  <rth@redhat.com>
+
 	PR 23941
 	* real.c (exact_real_truncate): Return false if the format cannot
 	represent the number as a normal.
diff --git a/gcc/config/ia64/ia64-protos.h b/gcc/config/ia64/ia64-protos.h
index 0c6355a..6de20c8 100644
--- a/gcc/config/ia64/ia64-protos.h
+++ b/gcc/config/ia64/ia64-protos.h
@@ -50,6 +50,8 @@ extern bool ia64_expand_movxf_movrf (enum machine_mode, rtx[]);
 extern rtx ia64_expand_compare (enum rtx_code, enum machine_mode);
 extern void ia64_expand_vecint_cmov (rtx[]);
 extern bool ia64_expand_vecint_minmax (enum rtx_code, enum machine_mode, rtx[]);
+extern void ia64_expand_widen_sum (rtx[], bool);
+extern void ia64_expand_dot_prod_v8qi (rtx[], bool);
 extern void ia64_expand_call (rtx, rtx, rtx, int);
 extern void ia64_split_call (rtx, rtx, rtx, rtx, rtx, int, int);
 extern void ia64_reload_gp (void);
diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c
index f4b8cee..4dafbd2 100644
--- a/gcc/config/ia64/ia64.c
+++ b/gcc/config/ia64/ia64.c
@@ -1766,6 +1766,113 @@ ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
   return true;
 }
 
+/* Emit an integral vector widening sum operations.  */
+
+void
+ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
+{
+  rtx l, h, x, s;
+  enum machine_mode wmode, mode;
+  rtx (*unpack_l) (rtx, rtx, rtx);
+  rtx (*unpack_h) (rtx, rtx, rtx);
+  rtx (*plus) (rtx, rtx, rtx);
+
+  wmode = GET_MODE (operands[0]);
+  mode = GET_MODE (operands[1]);
+
+  switch (mode)
+    {
+    case V8QImode:
+      unpack_l = gen_unpack1_l;
+      unpack_h = gen_unpack1_h;
+      plus = gen_addv4hi3;
+      break;
+    case V4HImode:
+      unpack_l = gen_unpack2_l;
+      unpack_h = gen_unpack2_h;
+      plus = gen_addv2si3;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  /* Fill in x with the sign extension of each element in op1.  */
+  if (unsignedp)
+    x = CONST0_RTX (mode);
+  else
+    {
+      bool neg;
+
+      x = gen_reg_rtx (mode);
+
+      neg = ia64_expand_vecint_compare (LT, mode, x, operands[1],
+					CONST0_RTX (mode));
+      gcc_assert (!neg);
+    }
+
+  l = gen_reg_rtx (wmode);
+  h = gen_reg_rtx (wmode);
+  s = gen_reg_rtx (wmode);
+
+  emit_insn (unpack_l (gen_lowpart (mode, l), operands[1], x));
+  emit_insn (unpack_h (gen_lowpart (mode, h), operands[1], x));
+  emit_insn (plus (s, l, operands[2]));
+  emit_insn (plus (operands[0], h, s));
+}
+
+/* Emit a signed or unsigned V8QI dot product operation.  */
+
+void
+ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp)
+{
+  rtx l1, l2, h1, h2, x1, x2, p1, p2, p3, p4, s1, s2, s3;
+
+  /* Fill in x1 and x2 with the sign extension of each element.  */
+  if (unsignedp)
+    x1 = x2 = CONST0_RTX (V8QImode);
+  else
+    {
+      bool neg;
+
+      x1 = gen_reg_rtx (V8QImode);
+      x2 = gen_reg_rtx (V8QImode);
+
+      neg = ia64_expand_vecint_compare (LT, V8QImode, x1, operands[1],
+					CONST0_RTX (V8QImode));
+      gcc_assert (!neg);
+      neg = ia64_expand_vecint_compare (LT, V8QImode, x2, operands[2],
+					CONST0_RTX (V8QImode));
+      gcc_assert (!neg);
+    }
+
+  l1 = gen_reg_rtx (V4HImode);
+  l2 = gen_reg_rtx (V4HImode);
+  h1 = gen_reg_rtx (V4HImode);
+  h2 = gen_reg_rtx (V4HImode);
+
+  emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l1), operands[1], x1));
+  emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l2), operands[2], x2));
+  emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h1), operands[1], x1));
+  emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h2), operands[2], x2));
+
+  p1 = gen_reg_rtx (V2SImode);
+  p2 = gen_reg_rtx (V2SImode);
+  p3 = gen_reg_rtx (V2SImode);
+  p4 = gen_reg_rtx (V2SImode);
+  emit_insn (gen_pmpy2_r (p1, l1, l2));
+  emit_insn (gen_pmpy2_l (p2, l1, l2));
+  emit_insn (gen_pmpy2_r (p3, h1, h2));
+  emit_insn (gen_pmpy2_l (p4, h1, h2));
+
+  s1 = gen_reg_rtx (V2SImode);
+  s2 = gen_reg_rtx (V2SImode);
+  s3 = gen_reg_rtx (V2SImode);
+  emit_insn (gen_addv2si3 (s1, p1, p2));
+  emit_insn (gen_addv2si3 (s2, p3, p4));
+  emit_insn (gen_addv2si3 (s3, s1, operands[3]));
+  emit_insn (gen_addv2si3 (operands[0], s2, s3));
+}
+
 /* Emit the appropriate sequence for a call.  */
 
 void
diff --git a/gcc/config/ia64/vect.md b/gcc/config/ia64/vect.md
index 88e9eb2..66295ad 100644
--- a/gcc/config/ia64/vect.md
+++ b/gcc/config/ia64/vect.md
@@ -212,6 +212,36 @@
   "pmpyshr2 %0 = %1, %2, 0"
   [(set_attr "itanium_class" "mmmul")])
 
+(define_insn "pmpy2_r"
+  [(set (match_operand:V2SI 0 "gr_register_operand" "=r")
+	(mult:V2SI
+	  (vec_select:V2SI
+	    (sign_extend:V4SI
+	      (match_operand:V4HI 1 "gr_register_operand" "r"))
+	    (parallel [(const_int 0) (const_int 2)]))
+	  (vec_select:V2SI
+	    (sign_extend:V4SI
+	      (match_operand:V4HI 2 "gr_register_operand" "r"))
+	    (parallel [(const_int 0) (const_int 2)]))))]
+  ""
+  "pmpy2.r %0 = %1, %2"
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "pmpy2_l"
+  [(set (match_operand:V2SI 0 "gr_register_operand" "=r")
+	(mult:V2SI
+	  (vec_select:V2SI
+	    (sign_extend:V4SI
+	      (match_operand:V4HI 1 "gr_register_operand" "r"))
+	    (parallel [(const_int 1) (const_int 3)]))
+	  (vec_select:V2SI
+	    (sign_extend:V4SI
+	      (match_operand:V4HI 2 "gr_register_operand" "r"))
+	    (parallel [(const_int 1) (const_int 3)]))))]
+  ""
+  "pmpy2.l %0 = %1, %2"
+  [(set_attr "itanium_class" "mmshf")])
+
 (define_expand "umax<mode>3"
   [(set (match_operand:VECINT 0 "gr_register_operand" "")
 	(umax:VECINT (match_operand:VECINT 1 "gr_register_operand" "")
@@ -331,6 +361,88 @@
   operands[1] = gen_lowpart (DImode, operands[1]);
 })
 
+(define_expand "widen_usumv8qi3"
+  [(match_operand:V4HI 0 "gr_register_operand" "")
+   (match_operand:V8QI 1 "gr_register_operand" "")
+   (match_operand:V4HI 2 "gr_register_operand" "")]
+  ""
+{
+  ia64_expand_widen_sum (operands, true);
+  DONE;
+})
+
+(define_expand "widen_usumv4hi3"
+  [(match_operand:V2SI 0 "gr_register_operand" "")
+   (match_operand:V4HI 1 "gr_register_operand" "")
+   (match_operand:V2SI 2 "gr_register_operand" "")]
+  ""
+{
+  ia64_expand_widen_sum (operands, true);
+  DONE;
+})
+
+(define_expand "widen_ssumv8qi3"
+  [(match_operand:V4HI 0 "gr_register_operand" "")
+   (match_operand:V8QI 1 "gr_register_operand" "")
+   (match_operand:V4HI 2 "gr_register_operand" "")]
+  ""
+{
+  ia64_expand_widen_sum (operands, false);
+  DONE;
+})
+
+(define_expand "widen_ssumv4hi3"
+  [(match_operand:V2SI 0 "gr_register_operand" "")
+   (match_operand:V4HI 1 "gr_register_operand" "")
+   (match_operand:V2SI 2 "gr_register_operand" "")]
+  ""
+{
+  ia64_expand_widen_sum (operands, false);
+  DONE;
+})
+
+(define_expand "udot_prodv8qi"
+  [(match_operand:V2SI 0 "gr_register_operand" "")
+   (match_operand:V8QI 1 "gr_register_operand" "")
+   (match_operand:V8QI 2 "gr_register_operand" "")
+   (match_operand:V2SI 3 "gr_register_operand" "")]
+  ""
+{
+  ia64_expand_dot_prod_v8qi (operands, true);
+  DONE;
+})
+
+(define_expand "sdot_prodv8qi"
+  [(match_operand:V2SI 0 "gr_register_operand" "")
+   (match_operand:V8QI 1 "gr_register_operand" "")
+   (match_operand:V8QI 2 "gr_register_operand" "")
+   (match_operand:V2SI 3 "gr_register_operand" "")]
+  ""
+{
+  ia64_expand_dot_prod_v8qi (operands, false);
+  DONE;
+})
+
+(define_expand "sdot_prodv4hi"
+  [(match_operand:V2SI 0 "gr_register_operand" "")
+   (match_operand:V4HI 1 "gr_register_operand" "")
+   (match_operand:V4HI 2 "gr_register_operand" "")
+   (match_operand:V2SI 3 "gr_register_operand" "")]
+  ""
+{
+  rtx l, r, t;
+
+  r = gen_reg_rtx (V2SImode);
+  l = gen_reg_rtx (V2SImode);
+  t = gen_reg_rtx (V2SImode);
+
+  emit_insn (gen_pmpy2_r (r, operands[1], operands[2]));
+  emit_insn (gen_pmpy2_l (l, operands[1], operands[2]));
+  emit_insn (gen_addv2si3 (t, r, operands[3]));
+  emit_insn (gen_addv2si3 (operands[0], t, l));
+  DONE;
+})
+
 (define_expand "vcond<mode>"
   [(set (match_operand:VECINT 0 "gr_register_operand" "")
 	(if_then_else:VECINT
@@ -717,15 +829,11 @@
 ;; padd.uus
 ;; pavg
 ;; pavgsub
-;; pmpy
 ;; pmpyshr, general form
 ;; psad
 ;; pshladd
 ;; pshradd
 ;; psub.uus
-;; vec_set<mode>
-;; vec_extract<mode>
-;; vec_init<mode>
 
 ;; Floating point vector operations
 
@@ -947,7 +1055,7 @@
   "fpmin %0 = %1, %2"
   [(set_attr "itanium_class" "fmisc")])
 
-(define_expand "reduc_plus_v2sf"
+(define_expand "reduc_splus_v2sf"
   [(match_operand:V2SF 0 "fr_register_operand" "")
    (match_operand:V2SF 1 "fr_register_operand" "")]
   ""
-- 
2.7.4