From 962b96688689a73ddf5fb97d9c63514f98b49d27 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Mon, 6 Nov 2017 13:47:46 +0000 Subject: [PATCH] [gcc] 2017-11-06 Bill Schmidt * config/rs6000/altivec.md (*p9_vadu3) Rename to p9_vadu3. (usadv16qi): New define_expand. (usadv8hi): New define_expand. [gcc/testsuite] 2017-11-06 Bill Schmidt * gcc.target/powerpc/sad-vectorize-1.c: New file. * gcc.target/powerpc/sad-vectorize-2.c: New file. * gcc.target/powerpc/sad-vectorize-3.c: New file. * gcc.target/powerpc/sad-vectorize-4.c: New file. From-SVN: r254453 --- gcc/ChangeLog | 7 +++ gcc/config/rs6000/altivec.md | 45 ++++++++++++++++- gcc/testsuite/ChangeLog | 7 +++ gcc/testsuite/gcc.target/powerpc/sad-vectorize-1.c | 36 ++++++++++++++ gcc/testsuite/gcc.target/powerpc/sad-vectorize-2.c | 36 ++++++++++++++ gcc/testsuite/gcc.target/powerpc/sad-vectorize-3.c | 57 ++++++++++++++++++++++ gcc/testsuite/gcc.target/powerpc/sad-vectorize-4.c | 57 ++++++++++++++++++++++ 7 files changed, 244 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/sad-vectorize-1.c create mode 100644 gcc/testsuite/gcc.target/powerpc/sad-vectorize-2.c create mode 100644 gcc/testsuite/gcc.target/powerpc/sad-vectorize-3.c create mode 100644 gcc/testsuite/gcc.target/powerpc/sad-vectorize-4.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 26afa77..6ba0a80 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,10 @@ +2017-11-06 Bill Schmidt + + * config/rs6000/altivec.md (*p9_vadu3) Rename to + p9_vadu3. + (usadv16qi): New define_expand. + (usadv8hi): New define_expand. + 2017-11-06 Jan Hubicka PR bootstrap/82832 diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index d0fcd1c..651f6c9 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -4020,7 +4020,7 @@ "TARGET_P9_VECTOR") ;; Vector absolute difference unsigned -(define_insn "*p9_vadu3" +(define_insn "p9_vadu3" [(set (match_operand:VI 0 "register_operand" "=v") (unspec:VI [(match_operand:VI 1 "register_operand" "v") (match_operand:VI 2 "register_operand" "v")] @@ -4184,6 +4184,49 @@ "vbpermd %0,%1,%2" [(set_attr "type" "vecsimple")]) +;; Support for SAD (sum of absolute differences). + +;; Due to saturating semantics, we can't combine the sum-across +;; with the vector accumulate in vsum4ubs. A vadduwm is needed. +(define_expand "usadv16qi" + [(use (match_operand:V4SI 0 "register_operand")) + (use (match_operand:V16QI 1 "register_operand")) + (use (match_operand:V16QI 2 "register_operand")) + (use (match_operand:V4SI 3 "register_operand"))] + "TARGET_P9_VECTOR" +{ + rtx absd = gen_reg_rtx (V16QImode); + rtx zero = gen_reg_rtx (V4SImode); + rtx psum = gen_reg_rtx (V4SImode); + + emit_insn (gen_p9_vaduv16qi3 (absd, operands[1], operands[2])); + emit_insn (gen_altivec_vspltisw (zero, const0_rtx)); + emit_insn (gen_altivec_vsum4ubs (psum, absd, zero)); + emit_insn (gen_addv4si3 (operands[0], psum, operands[3])); + DONE; +}) + +;; Since vsum4shs is saturating and further performs signed +;; arithmetic, we can't combine the sum-across with the vector +;; accumulate in vsum4shs. A vadduwm is needed. +(define_expand "usadv8hi" + [(use (match_operand:V4SI 0 "register_operand")) + (use (match_operand:V8HI 1 "register_operand")) + (use (match_operand:V8HI 2 "register_operand")) + (use (match_operand:V4SI 3 "register_operand"))] + "TARGET_P9_VECTOR" +{ + rtx absd = gen_reg_rtx (V8HImode); + rtx zero = gen_reg_rtx (V4SImode); + rtx psum = gen_reg_rtx (V4SImode); + + emit_insn (gen_p9_vaduv8hi3 (absd, operands[1], operands[2])); + emit_insn (gen_altivec_vspltisw (zero, const0_rtx)); + emit_insn (gen_altivec_vsum4shs (psum, absd, zero)); + emit_insn (gen_addv4si3 (operands[0], psum, operands[3])); + DONE; +}) + ;; Decimal Integer operations (define_int_iterator UNSPEC_BCD_ADD_SUB [UNSPEC_BCDADD UNSPEC_BCDSUB]) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 1d5ba34..dde8a35 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2017-11-06 Bill Schmidt + + * gcc.target/powerpc/sad-vectorize-1.c: New file. + * gcc.target/powerpc/sad-vectorize-2.c: New file. + * gcc.target/powerpc/sad-vectorize-3.c: New file. + * gcc.target/powerpc/sad-vectorize-4.c: New file. + 2017-11-06 Martin Liska * c-c++-common/cilk-plus/AN/pr57541-2.c (foo1): Return a value diff --git a/gcc/testsuite/gcc.target/powerpc/sad-vectorize-1.c b/gcc/testsuite/gcc.target/powerpc/sad-vectorize-1.c new file mode 100644 index 0000000..b122bf5 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sad-vectorize-1.c @@ -0,0 +1,36 @@ +/* { dg-do compile { target { powerpc*-*-* } } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-skip-if "" { powerpc*-*-aix* } } */ +/* { dg-options "-O3 -mcpu=power9" } */ + +/* Verify that we vectorize this SAD loop using vabsdub. */ + +extern int abs (int __x) __attribute__ ((__nothrow__, __leaf__)) __attribute__ ((__const__)); + +static int +foo (unsigned char *w, int i, unsigned char *x, int j) +{ + int tot = 0; + for (int a = 0; a < 16; a++) + { + for (int b = 0; b < 16; b++) + tot += abs (w[b] - x[b]); + w += i; + x += j; + } + return tot; +} + +void +bar (unsigned char *w, unsigned char *x, int i, int *result) +{ + *result = foo (w, 16, x, i); +} + +/* { dg-final { scan-assembler-times "vabsdub" 16 } } */ +/* { dg-final { scan-assembler-times "vsum4ubs" 16 } } */ +/* { dg-final { scan-assembler-times "vadduwm" 17 } } */ + +/* Note: One of the 16 adds is optimized out (add with zero), + leaving 15. The extra two adds are for the final reduction. */ diff --git a/gcc/testsuite/gcc.target/powerpc/sad-vectorize-2.c b/gcc/testsuite/gcc.target/powerpc/sad-vectorize-2.c new file mode 100644 index 0000000..b1b6de9 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sad-vectorize-2.c @@ -0,0 +1,36 @@ +/* { dg-do compile { target { powerpc*-*-* } } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-skip-if "" { powerpc*-*-aix* } } */ +/* { dg-options "-O3 -mcpu=power9" } */ + +/* Verify that we vectorize this SAD loop using vabsduh. */ + +extern int abs (int __x) __attribute__ ((__nothrow__, __leaf__)) __attribute__ ((__const__)); + +static int +foo (unsigned short *w, int i, unsigned short *x, int j) +{ + int tot = 0; + for (int a = 0; a < 16; a++) + { + for (int b = 0; b < 8; b++) + tot += abs (w[b] - x[b]); + w += i; + x += j; + } + return tot; +} + +void +bar (unsigned short *w, unsigned short *x, int i, int *result) +{ + *result = foo (w, 8, x, i); +} + +/* { dg-final { scan-assembler-times "vabsduh" 16 } } */ +/* { dg-final { scan-assembler-times "vsum4shs" 16 } } */ +/* { dg-final { scan-assembler-times "vadduwm" 17 } } */ + +/* Note: One of the 16 adds is optimized out (add with zero), + leaving 15. The extra two adds are for the final reduction. */ diff --git a/gcc/testsuite/gcc.target/powerpc/sad-vectorize-3.c b/gcc/testsuite/gcc.target/powerpc/sad-vectorize-3.c new file mode 100644 index 0000000..0513a50 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sad-vectorize-3.c @@ -0,0 +1,57 @@ +/* { dg-do run { target { powerpc*-*-linux* && { lp64 && p9vector_hw } } } } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-options "-O3 -mcpu=power9" } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ + +/* Verify that we get correct code when we vectorize this SAD loop using + vabsdub. */ + +extern void abort (); +extern int abs (int __x) __attribute__ ((__nothrow__, __leaf__)) __attribute__ ((__const__)); + +static int +foo (unsigned char *w, int i, unsigned char *x, int j) +{ + int tot = 0; + for (int a = 0; a < 16; a++) + { + for (int b = 0; b < 16; b++) + tot += abs (w[b] - x[b]); + w += i; + x += j; + } + return tot; +} + +void +bar (unsigned char *w, unsigned char *x, int i, int *result) +{ + *result = foo (w, 16, x, i); +} + +int +main () +{ + unsigned char m[256]; + unsigned char n[256]; + int sum, i; + + for (i = 0; i < 256; ++i) + if (i % 2 == 0) + { + m[i] = (i % 8) * 2 + 1; + n[i] = -(i % 8); + } + else + { + m[i] = -((i % 8) * 2 + 2); + n[i] = -((i % 8) >> 1); + } + + bar (m, n, 16, &sum); + + if (sum != 32384) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/powerpc/sad-vectorize-4.c b/gcc/testsuite/gcc.target/powerpc/sad-vectorize-4.c new file mode 100644 index 0000000..2db0165 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sad-vectorize-4.c @@ -0,0 +1,57 @@ +/* { dg-do run { target { powerpc*-*-linux* && { lp64 && p9vector_hw } } } } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-options "-O3 -mcpu=power9" } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ + +/* Verify that we get correct code when we vectorize this SAD loop using + vabsduh. */ + +extern void abort (); +extern int abs (int __x) __attribute__ ((__nothrow__, __leaf__)) __attribute__ ((__const__)); + +static int +foo (unsigned short *w, int i, unsigned short *x, int j) +{ + int tot = 0; + for (int a = 0; a < 16; a++) + { + for (int b = 0; b < 8; b++) + tot += abs (w[b] - x[b]); + w += i; + x += j; + } + return tot; +} + +void +bar (unsigned short *w, unsigned short *x, int i, int *result) +{ + *result = foo (w, 8, x, i); +} + +int +main () +{ + unsigned short m[128]; + unsigned short n[128]; + int sum, i; + + for (i = 0; i < 128; ++i) + if (i % 2 == 0) + { + m[i] = (i % 8) * 2 + 1; + n[i] = i % 8; + } + else + { + m[i] = (i % 8) * 4 - 3; + n[i] = (i % 8) >> 1; + } + + bar (m, n, 8, &sum); + + if (sum != 992) + abort (); + + return 0; +} -- 2.7.4