From f9ea11ac194e5129ff90145a73b91b08a244174b Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Fri, 4 Mar 2016 15:45:56 +0100 Subject: [PATCH] re PR target/70059 (Invalid codegen on AVX-512 when using _mm512_inserti64x4(x, y, 0)) PR target/70059 * config/i386/sse.md (vec_set_lo_, _vinsert_mask): Formatting fixes. (vec_set_hi_): Likewise. Swap VEC_CONCAT operands. * gcc.target/i386/avx512f-pr70059.c: New test. * gcc.target/i386/avx512dq-pr70059.c: New test. From-SVN: r233968 --- gcc/ChangeLog | 8 ++++++ gcc/config/i386/sse.md | 32 +++++++++++------------ gcc/testsuite/ChangeLog | 6 +++++ gcc/testsuite/gcc.target/i386/avx512dq-pr70059.c | 33 ++++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/avx512f-pr70059.c | 33 ++++++++++++++++++++++++ 5 files changed, 96 insertions(+), 16 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/avx512dq-pr70059.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-pr70059.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 976e6fa..a65e347 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2016-03-04 Jakub Jelinek + + PR target/70059 + * config/i386/sse.md (vec_set_lo_, + _vinsert_mask): Formatting + fixes. + (vec_set_hi_): Likewise. Swap VEC_CONCAT operands. + 2016-03-04 Bernd Schmidt PR rtl-optimization/57676 diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 49b99e4..3dd787c 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -12426,13 +12426,13 @@ { int mask = INTVAL (operands[3]); if (mask == 0) - emit_insn (gen_vec_set_lo__mask - (operands[0], operands[1], operands[2], - operands[4], operands[5])); + emit_insn (gen_vec_set_lo__mask (operands[0], operands[1], + operands[2], operands[4], + operands[5])); else - emit_insn (gen_vec_set_hi__mask - (operands[0], operands[1], operands[2], - operands[4], operands[5])); + emit_insn (gen_vec_set_hi__mask (operands[0], operands[1], + operands[2], operands[4], + operands[5])); DONE; }) @@ -12443,9 +12443,9 @@ (vec_select: (match_operand:V16FI 1 "register_operand" "v") (parallel [(const_int 8) (const_int 9) - (const_int 10) (const_int 11) - (const_int 12) (const_int 13) - (const_int 14) (const_int 15)]))))] + (const_int 10) (const_int 11) + (const_int 12) (const_int 13) + (const_int 14) (const_int 15)]))))] "TARGET_AVX512DQ" "vinsert32x8\t{$0x0, %2, %1, %0|%0, %1, %2, $0x0}" [(set_attr "type" "sselog") @@ -12456,13 +12456,13 @@ (define_insn "vec_set_hi_" [(set (match_operand:V16FI 0 "register_operand" "=v") (vec_concat:V16FI - (match_operand: 2 "nonimmediate_operand" "vm") (vec_select: (match_operand:V16FI 1 "register_operand" "v") (parallel [(const_int 0) (const_int 1) - (const_int 2) (const_int 3) - (const_int 4) (const_int 5) - (const_int 6) (const_int 7)]))))] + (const_int 2) (const_int 3) + (const_int 4) (const_int 5) + (const_int 6) (const_int 7)])) + (match_operand: 2 "nonimmediate_operand" "vm")))] "TARGET_AVX512DQ" "vinsert32x8\t{$0x1, %2, %1, %0|%0, %1, %2, $0x1}" [(set_attr "type" "sselog") @@ -12477,7 +12477,7 @@ (vec_select: (match_operand:V8FI 1 "register_operand" "v") (parallel [(const_int 4) (const_int 5) - (const_int 6) (const_int 7)]))))] + (const_int 6) (const_int 7)]))))] "TARGET_AVX512F" "vinsert64x4\t{$0x0, %2, %1, %0|%0, %1, %2, $0x0}" [(set_attr "type" "sselog") @@ -12488,11 +12488,11 @@ (define_insn "vec_set_hi_" [(set (match_operand:V8FI 0 "register_operand" "=v") (vec_concat:V8FI - (match_operand: 2 "nonimmediate_operand" "vm") (vec_select: (match_operand:V8FI 1 "register_operand" "v") (parallel [(const_int 0) (const_int 1) - (const_int 2) (const_int 3)]))))] + (const_int 2) (const_int 3)])) + (match_operand: 2 "nonimmediate_operand" "vm")))] "TARGET_AVX512F" "vinsert64x4\t{$0x1, %2, %1, %0|%0, %1, %2, $0x1}" [(set_attr "type" "sselog") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index e4770e7..baf683a 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2016-03-04 Jakub Jelinek + + PR target/70059 + * gcc.target/i386/avx512f-pr70059.c: New test. + * gcc.target/i386/avx512dq-pr70059.c: New test. + 2016-03-04 Bernd Schmidt PR rtl-optimization/57676 diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-pr70059.c b/gcc/testsuite/gcc.target/i386/avx512dq-pr70059.c new file mode 100644 index 0000000..932ff6a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512dq-pr70059.c @@ -0,0 +1,33 @@ +/* PR target/70059 */ +/* { dg-do run } */ +/* { dg-require-effective-target avx512dq } */ +/* { dg-options "-O2 -mavx512dq" } */ + +#include "avx512dq-check.h" + +__attribute__((noinline, noclone)) __m512i +foo (__m256i a, __m256i b) +{ + __m512i r = _mm512_undefined_si512 (); + r = _mm512_inserti32x8 (r, a, 0); + r = _mm512_inserti32x8 (r, b, 1); + return r; +} + +static void +avx512dq_test (void) +{ + union256i_q a, b; + union512i_q r; + long long r_ref[8]; + int i; + for (i = 0; i < 4; i++) + { + a.a[i] = 0x0101010101010101ULL * i; + b.a[i] = 0x1010101010101010ULL * i; + r_ref[i] = a.a[i]; + r_ref[i + 4] = b.a[i]; + } + r.x = foo (a.x, b.x); + check_union512i_q (r, r_ref); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr70059.c b/gcc/testsuite/gcc.target/i386/avx512f-pr70059.c new file mode 100644 index 0000000..95c8915 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-pr70059.c @@ -0,0 +1,33 @@ +/* PR target/70059 */ +/* { dg-do run } */ +/* { dg-require-effective-target avx512f } */ +/* { dg-options "-O2 -mavx512f" } */ + +#include "avx512f-check.h" + +__attribute__((noinline, noclone)) __m512i +foo (__m256i a, __m256i b) +{ + __m512i r = _mm512_undefined_si512 (); + r = _mm512_inserti64x4 (r, a, 0); + r = _mm512_inserti64x4 (r, b, 1); + return r; +} + +static void +avx512f_test (void) +{ + union256i_q a, b; + union512i_q r; + long long r_ref[8]; + int i; + for (i = 0; i < 4; i++) + { + a.a[i] = 0x0101010101010101ULL * i; + b.a[i] = 0x1010101010101010ULL * i; + r_ref[i] = a.a[i]; + r_ref[i + 4] = b.a[i]; + } + r.x = foo (a.x, b.x); + check_union512i_q (r, r_ref); +} -- 2.7.4