* config/rs6000/atlivec.md (mulv4si3): New pattern.
* gcc.dg/vect/vect-11.c: Require effective target vect_int_mult.
* gcc.dg/vect/vect-11a.c: New.
* gcc.dg/vect/vect-none.c: Update.
* lib/target-supports.exp (check_effective_target_vect_int_mult): New.
From-SVN: r98323
+2005-04-18 Devang Patel <dpatel@apple.com>
+
+ * config/rs6000/atlivec.md (mulv4si3): New pattern.
+
2005-04-18 James A. Morrison <phython@gcc.gnu.org>
PR tree-optimization/20922
DONE;
}")
+;; 32 bit integer multiplication
+;; A_high = Operand_0 & 0xFFFF0000 >> 16
+;; A_low = Operand_0 & 0xFFFF
+;; B_high = Operand_1 & 0xFFFF0000 >> 16
+;; B_low = Operand_1 & 0xFFFF
+;; result = A_low * B_low + (A_high * B_low + B_high * A_low) << 16
+
+;; (define_insn "mulv4si3"
+;; [(set (match_operand:V4SI 0 "register_operand" "=v")
+;; (mult:V4SI (match_operand:V4SI 1 "register_operand" "v")
+;; (match_operand:V4SI 2 "register_operand" "v")))]
+(define_expand "mulv4si3"
+ [(use (match_operand:V4SI 0 "register_operand" ""))
+ (use (match_operand:V4SI 1 "register_operand" ""))
+ (use (match_operand:V4SI 2 "register_operand" ""))]
+ "TARGET_ALTIVEC"
+ "
+ {
+ rtx zero;
+ rtx swap;
+ rtx small_swap;
+ rtx sixteen;
+ rtx one;
+ rtx two;
+ rtx low_product;
+ rtx high_product;
+
+ zero = gen_reg_rtx (V4SImode);
+ emit_insn (gen_altivec_vspltisw (zero, const0_rtx));
+
+ sixteen = gen_reg_rtx (V4SImode);
+ emit_insn (gen_altivec_vspltisw (sixteen, gen_rtx_CONST_INT (V4SImode, -16)));
+
+ swap = gen_reg_rtx (V4SImode);
+ emit_insn (gen_altivec_vrlw (swap, operands[2], sixteen));
+
+ one = gen_reg_rtx (V8HImode);
+ convert_move (one, operands[1], 0);
+
+ two = gen_reg_rtx (V8HImode);
+ convert_move (two, operands[2], 0);
+
+ small_swap = gen_reg_rtx (V8HImode);
+ convert_move (small_swap, swap, 0);
+
+ low_product = gen_reg_rtx (V4SImode);
+ emit_insn (gen_altivec_vmulouh (low_product, one, two));
+
+ high_product = gen_reg_rtx (V4SImode);
+ emit_insn (gen_altivec_vmsumuhm (high_product, one, small_swap, zero));
+
+ emit_insn (gen_altivec_vslw (high_product, high_product, sixteen));
+
+ emit_insn (gen_addv4si3 (operands[0], high_product, low_product));
+
+ DONE;
+ }")
+
+
;; Fused multiply subtract
(define_insn "altivec_vnmsubfp"
[(set (match_operand:V4SF 0 "register_operand" "=v")
+2005-04-18 Devang Patel <dpatel@apple.com>
+
+ * gcc.dg/vect/vect-11.c: Require effective target vect_int_mult.
+ * gcc.dg/vect/vect-11a.c: New.
+ * gcc.dg/vect/vect-none.c: Update.
+ * lib/target-supports.exp (check_effective_target_vect_int_mult): New.
+
2005-04-18 James A. Morrison <phython@gcc.gnu.org>
PR tree-optimization/20922
/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_int_mult } */
#include <stdarg.h>
#include "tree-vect.h"
return main1 ();
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
--- /dev/null
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_int_mult } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+extern void abort (void);
+void u ()
+{
+ unsigned int A[4] = {0x08000000,0xffffffff,0xff0000ff,0xf0000001};
+ unsigned int B[4] = {0x08000000,0x08000001,0xff0000ff,0xf0000001};
+ unsigned int Answer[4] = {0,0xf7ffffff,0x0200fe01,0xe0000001};
+ unsigned int C[4];
+ int i, j;
+
+ for (i=0; i<4; i++)
+ C[i] = A[i] * B[i];
+ for (i=0; i<4; i++)
+ if (C[i] != Answer[i])
+ abort ();
+}
+void s()
+{
+ signed int A[4] = {0x08000000,0xffffffff,0xff0000ff,0xf0000001};
+ signed int B[4] = {0x08000000,0x08000001,0xff0000ff,0xf0000001};
+ signed int Answer[4] = {0,0xf7ffffff,0x0200fe01, 0xe0000001};
+ signed int C[4];
+ int i, j;
+
+ for (i=0; i<4; i++)
+ C[i] = A[i] * B[i];
+ for (i=0; i<4; i++)
+ if (C[i] != Answer[i])
+ abort ();
+}
+
+int main1 ()
+{
+ u();
+ s();
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ return main1 ();
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
/* Test 3 - no target support for integer mult. */
+ /* This loop is vectorized on platforms that support vect_int_mult. */
for (i = 0; i < N; i++)
{
ia[i] = ib[i] * ic[i];
/* Test 6 - condition in loop. */
+ /* This loop is vectorized on platformst that support vect_condition. */
for (i = 0; i < N; i++){
a[i] = (b[i] > 0 ? b[i] : 0);
}
}
/* { dg-final { scan-tree-dump-times "vectorized " 3 "vect"} } */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"} } */
-/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 2 "vect"} } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" { xfail powerpc*-*-* i?86-*-* x86_64-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target powerpc*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 2 "vect" { target powerpc*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target i?86-*-* x86_64-*-* ia64-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 2 "vect" { target i?86-*-* x86_64-*-* ia64-*-* } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
return $et_vect_cond_saved
}
+# Return 1 if the target supports vector int multiplication, 0 otherwise.
+
+proc check_effective_target_vect_int_mult { } {
+ global et_vect_int_mult_saved
+
+ if [info exists et_vect_int_mult] {
+ verbose "check_effective_target_vect_int_mult: using cached result" 2
+ } else {
+ set et_vect_int_mult_saved 0
+ if { [istarget powerpc*-*-*] } {
+ set et_vect_int_mult_saved 1
+ }
+ }
+
+ verbose "check_effective_target_vect_int_mult: returning $et_vect_int_mult_saved" 2
+ return $et_vect_int_mult_saved
+}
# Return 1 if the target matches the effective target 'arg', 0 otherwise.
# This can be used with any check_* proc that takes no argument and