sse.md (mulv4si3): New.

author Richard Henderson <rth@redhat.com>

Tue, 17 May 2005 04:33:42 +0000 (21:33 -0700)

committer Richard Henderson <rth@gcc.gnu.org>

Tue, 17 May 2005 04:33:42 +0000 (21:33 -0700)
author Richard Henderson <rth@redhat.com>
Tue, 17 May 2005 04:33:42 +0000 (21:33 -0700)
committer Richard Henderson <rth@gcc.gnu.org>
Tue, 17 May 2005 04:33:42 +0000 (21:33 -0700)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index 28d0a13..f42032b 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,7 @@
+2005-05-16  Richard Henderson  <rth@redhat.com>
+
+       * config/i386/sse.md (mulv4si3): New.
+
  2005-05-17  Hans-Peter Nilsson  <hp@axis.com>
  
         * config/cris/cris.h (EXTRA_CONSTRAINT_T): Remove FIXME and
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md

index ee2e614..76efe5f 100644 (file)
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -2490,6 +2490,52 @@
    [(set_attr "type" "sseiadd")
     (set_attr "mode" "TI")])
  
+(define_expand "mulv4si3"
+  [(set (match_operand:V4SI 0 "register_operand" "")
+       (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "")
+                  (match_operand:V4SI 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE2"
+{
+  rtx t1, t2, t3, t4, t5, t6, thirtytwo;
+  rtx op0, op1, op2;
+
+  op0 = operands[0];
+  op1 = operands[1];
+  op2 = operands[2];
+  t1 = gen_reg_rtx (V4SImode);
+  t2 = gen_reg_rtx (V4SImode);
+  t3 = gen_reg_rtx (V4SImode);
+  t4 = gen_reg_rtx (V4SImode);
+  t5 = gen_reg_rtx (V4SImode);
+  t6 = gen_reg_rtx (V4SImode);
+  thirtytwo = GEN_INT (32);
+
+  /* Multiply elements 2 and 0.  */
+  emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1), op1, op2));
+
+  /* Shift both input vectors down one element, so that elements 3 and 1
+     are now in the slots for elements 2 and 0.  For K8, at least, this is
+     faster than using a shuffle.  */
+  emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
+                              gen_lowpart (TImode, op1), thirtytwo));
+  emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
+                              gen_lowpart (TImode, op2), thirtytwo));
+
+  /* Multiply elements 3 and 1.  */
+  emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4), t2, t3));
+
+  /* Move the results in element 2 down to element 1; we don't care what
+     goes in elements 2 and 3.  */
+  emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
+                               const0_rtx, const0_rtx));
+  emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
+                               const0_rtx, const0_rtx));
+
+  /* Merge the parts back together.  */
+  emit_insn (gen_sse2_punpckldq (op0, t5, t6));
+  DONE;
+})
+
  (define_insn "ashr<mode>3"
    [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
         (ashiftrt:SSEMODE24
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index b94841e..c9f0b74 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2005-05-16  Richard Henderson  <rth@redhat.com>
+
+       * lib/target-supports.exp (check_effective_target_vect_int_mul): Add
+       i?86 and x86_64.
+
  2005-05-16  Mark Mitchell  <mark@codesourcery.com>
  
         * gcc.dg/compat/generate-random.c (config.h): Do not include.
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp

index 9306790..ac6dda5 100644 (file)
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -955,7 +955,9 @@ proc check_effective_target_vect_int_mult { } {
         verbose "check_effective_target_vect_int_mult: using cached result" 2
      } else {
         set et_vect_int_mult_saved 0
-       if { [istarget powerpc*-*-*] } {
+       if { [istarget powerpc*-*-*]
+            || [istarget i?86-*-*]
+            || [istarget x86_64-*-*] } {
            set et_vect_int_mult_saved 1
         }
      }
author	Richard Henderson <rth@redhat.com>
	Tue, 17 May 2005 04:33:42 +0000 (21:33 -0700)
committer	Richard Henderson <rth@gcc.gnu.org>
	Tue, 17 May 2005 04:33:42 +0000 (21:33 -0700)
gcc/ChangeLog		patch \| blob \| history
gcc/config/i386/sse.md		patch \| blob \| history
gcc/testsuite/ChangeLog		patch \| blob \| history
gcc/testsuite/lib/target-supports.exp		patch \| blob \| history