From 5358e8f5800daa0012fc9d06705d64bbb21fa07b Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Thu, 5 Mar 2020 16:45:05 -0800 Subject: [PATCH] i386: Properly encode vector registers in vector move On x86, when AVX and AVX512 are enabled, vector move instructions can be encoded with either 2-byte/3-byte VEX (AVX) or 4-byte EVEX (AVX512): 0: c5 f9 6f d1 vmovdqa %xmm1,%xmm2 4: 62 f1 fd 08 6f d1 vmovdqa64 %xmm1,%xmm2 We prefer VEX encoding over EVEX since VEX is shorter. Also AVX512F only supports 512-bit vector moves. AVX512F + AVX512VL supports 128-bit and 256-bit vector moves. xmm16-xmm31 and ymm16-ymm31 are disallowed in 128-bit and 256-bit modes when AVX512VL is disabled. Mode attributes on x86 vector move patterns indicate target preferences of vector move encoding. For scalar register to register move, we can use 512-bit vector move instructions to move 32-bit/64-bit scalar if AVX512VL isn't available. With AVX512F and AVX512VL, we should use VEX encoding for 128-bit/256-bit vector moves if upper 16 vector registers aren't used. This patch adds a function, ix86_output_ssemov, to generate vector moves: 1. If zmm registers are used, use EVEX encoding. 2. If xmm16-xmm31/ymm16-ymm31 registers aren't used, SSE or VEX encoding will be generated. 3. If xmm16-xmm31/ymm16-ymm31 registers are used: a. With AVX512VL, AVX512VL vector moves will be generated. b. Without AVX512VL, xmm16-xmm31/ymm16-ymm31 register to register move will be done with zmm register move. There is no need to set mode attribute to XImode explicitly since ix86_output_ssemov can properly encode xmm16-xmm31/ymm16-ymm31 registers with and without AVX512VL. Tested on AVX2 and AVX512 with and without --with-arch=native. gcc/ PR target/89229 PR target/89346 * config/i386/i386-protos.h (ix86_output_ssemov): New prototype. * config/i386/i386.c (ix86_get_ssemov): New function. (ix86_output_ssemov): Likewise. * config/i386/sse.md (VMOVE:mov_internal): Call ix86_output_ssemov for TYPE_SSEMOV. Remove TARGET_AVX512VL check. (*movxi_internal_avx512f): Call ix86_output_ssemov for TYPE_SSEMOV. (*movoi_internal_avx): Call ix86_output_ssemov for TYPE_SSEMOV. Remove ext_sse_reg_operand and TARGET_AVX512VL check. (*movti_internal): Likewise. (*movtf_internal): Call ix86_output_ssemov for TYPE_SSEMOV. gcc/testsuite/ PR target/89229 PR target/89346 * gcc.target/i386/avx512vl-vmovdqa64-1.c: Updated. * gcc.target/i386/pr89229-2a.c: New test. * gcc.target/i386/pr89229-2b.c: Likewise. * gcc.target/i386/pr89229-2c.c: Likewise. * gcc.target/i386/pr89229-3a.c: Likewise. * gcc.target/i386/pr89229-3b.c: Likewise. * gcc.target/i386/pr89229-3c.c: Likewise. * gcc.target/i386/pr89346.c: Likewise. --- gcc/ChangeLog | 16 ++ gcc/config/i386/i386-protos.h | 2 + gcc/config/i386/i386.c | 208 +++++++++++++++++++++ gcc/config/i386/i386.md | 86 +-------- gcc/config/i386/sse.md | 98 +--------- gcc/testsuite/ChangeLog | 13 ++ .../gcc.target/i386/avx512vl-vmovdqa64-1.c | 7 +- gcc/testsuite/gcc.target/i386/pr89229-2a.c | 15 ++ gcc/testsuite/gcc.target/i386/pr89229-2b.c | 13 ++ gcc/testsuite/gcc.target/i386/pr89229-2c.c | 6 + gcc/testsuite/gcc.target/i386/pr89229-3a.c | 16 ++ gcc/testsuite/gcc.target/i386/pr89229-3b.c | 12 ++ gcc/testsuite/gcc.target/i386/pr89229-3c.c | 6 + gcc/testsuite/gcc.target/i386/pr89346.c | 15 ++ 14 files changed, 332 insertions(+), 181 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-2a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-2b.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-2c.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-3a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-3b.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-3c.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89346.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 14e90fb..78a1189 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,19 @@ +2020-03-05 H.J. Lu + + PR target/89229 + PR target/89346 + * config/i386/i386-protos.h (ix86_output_ssemov): New prototype. + * config/i386/i386.c (ix86_get_ssemov): New function. + (ix86_output_ssemov): Likewise. + * config/i386/sse.md (VMOVE:mov_internal): Call + ix86_output_ssemov for TYPE_SSEMOV. Remove TARGET_AVX512VL + check. + (*movxi_internal_avx512f): Call ix86_output_ssemov for TYPE_SSEMOV. + (*movoi_internal_avx): Call ix86_output_ssemov for TYPE_SSEMOV. + Remove ext_sse_reg_operand and TARGET_AVX512VL check. + (*movti_internal): Likewise. + (*movtf_internal): Call ix86_output_ssemov for TYPE_SSEMOV. + 2020-03-05 Jeff Law PR tree-optimization/91890 diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 266381c..39fcaa0 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -38,6 +38,8 @@ extern void ix86_expand_split_stack_prologue (void); extern void ix86_output_addr_vec_elt (FILE *, int); extern void ix86_output_addr_diff_elt (FILE *, int, int); +extern const char *ix86_output_ssemov (rtx_insn *, rtx *); + extern enum calling_abi ix86_cfun_abi (void); extern enum calling_abi ix86_function_type_abi (const_tree); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index dac7a3f..7bbfbb4 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -4915,6 +4915,214 @@ ix86_pre_reload_split (void) && !(cfun->curr_properties & PROP_rtl_split_insns)); } +/* Return the opcode of the TYPE_SSEMOV instruction. To move from + or to xmm16-xmm31/ymm16-ymm31 registers, we either require + TARGET_AVX512VL or it is a register to register move which can + be done with zmm register move. */ + +static const char * +ix86_get_ssemov (rtx *operands, unsigned size, + enum attr_mode insn_mode, machine_mode mode) +{ + char buf[128]; + bool misaligned_p = (misaligned_operand (operands[0], mode) + || misaligned_operand (operands[1], mode)); + bool evex_reg_p = (size == 64 + || EXT_REX_SSE_REG_P (operands[0]) + || EXT_REX_SSE_REG_P (operands[1])); + machine_mode scalar_mode; + + const char *opcode = NULL; + enum + { + opcode_int, + opcode_float, + opcode_double + } type = opcode_int; + + switch (insn_mode) + { + case MODE_V16SF: + case MODE_V8SF: + case MODE_V4SF: + scalar_mode = E_SFmode; + type = opcode_float; + break; + case MODE_V8DF: + case MODE_V4DF: + case MODE_V2DF: + scalar_mode = E_DFmode; + type = opcode_double; + break; + case MODE_XI: + case MODE_OI: + case MODE_TI: + scalar_mode = GET_MODE_INNER (mode); + break; + default: + gcc_unreachable (); + } + + /* NB: To move xmm16-xmm31/ymm16-ymm31 registers without AVX512VL, + we can only use zmm register move without memory operand. */ + if (evex_reg_p + && !TARGET_AVX512VL + && GET_MODE_SIZE (mode) < 64) + { + /* NB: Since ix86_hard_regno_mode_ok only allows xmm16-xmm31 or + ymm16-ymm31 in 128/256 bit modes when AVX512VL is enabled, + we get here only for xmm16-xmm31 or ymm16-ymm31 in 32/64 bit + modes. */ + if (GET_MODE_SIZE (mode) >= 16 + || memory_operand (operands[0], mode) + || memory_operand (operands[1], mode)) + gcc_unreachable (); + size = 64; + switch (type) + { + case opcode_int: + opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32"; + break; + case opcode_float: + opcode = misaligned_p ? "vmovups" : "vmovaps"; + break; + case opcode_double: + opcode = misaligned_p ? "vmovupd" : "vmovapd"; + break; + } + } + else if (SCALAR_FLOAT_MODE_P (scalar_mode)) + { + switch (scalar_mode) + { + case E_SFmode: + opcode = misaligned_p ? "%vmovups" : "%vmovaps"; + break; + case E_DFmode: + opcode = misaligned_p ? "%vmovupd" : "%vmovapd"; + break; + case E_TFmode: + if (evex_reg_p) + opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64"; + else + opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa"; + break; + default: + gcc_unreachable (); + } + } + else if (SCALAR_INT_MODE_P (scalar_mode)) + { + switch (scalar_mode) + { + case E_QImode: + if (evex_reg_p) + opcode = (misaligned_p + ? (TARGET_AVX512BW + ? "vmovdqu8" + : "vmovdqu64") + : "vmovdqa64"); + else + opcode = (misaligned_p + ? (TARGET_AVX512BW + ? "vmovdqu8" + : "%vmovdqu") + : "%vmovdqa"); + break; + case E_HImode: + if (evex_reg_p) + opcode = (misaligned_p + ? (TARGET_AVX512BW + ? "vmovdqu16" + : "vmovdqu64") + : "vmovdqa64"); + else + opcode = (misaligned_p + ? (TARGET_AVX512BW + ? "vmovdqu16" + : "%vmovdqu") + : "%vmovdqa"); + break; + case E_SImode: + if (evex_reg_p) + opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32"; + else + opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa"; + break; + case E_DImode: + case E_TImode: + case E_OImode: + if (evex_reg_p) + opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64"; + else + opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa"; + break; + case E_XImode: + opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64"; + break; + default: + gcc_unreachable (); + } + } + else + gcc_unreachable (); + + switch (size) + { + case 64: + snprintf (buf, sizeof (buf), "%s\t{%%g1, %%g0|%%g0, %%g1}", + opcode); + break; + case 32: + snprintf (buf, sizeof (buf), "%s\t{%%t1, %%t0|%%t0, %%t1}", + opcode); + break; + case 16: + snprintf (buf, sizeof (buf), "%s\t{%%x1, %%x0|%%x0, %%x1}", + opcode); + break; + default: + gcc_unreachable (); + } + output_asm_insn (buf, operands); + return ""; +} + +/* Return the template of the TYPE_SSEMOV instruction to move + operands[1] into operands[0]. */ + +const char * +ix86_output_ssemov (rtx_insn *insn, rtx *operands) +{ + machine_mode mode = GET_MODE (operands[0]); + if (get_attr_type (insn) != TYPE_SSEMOV + || mode != GET_MODE (operands[1])) + gcc_unreachable (); + + enum attr_mode insn_mode = get_attr_mode (insn); + + switch (insn_mode) + { + case MODE_XI: + case MODE_V8DF: + case MODE_V16SF: + return ix86_get_ssemov (operands, 64, insn_mode, mode); + + case MODE_OI: + case MODE_V4DF: + case MODE_V8SF: + return ix86_get_ssemov (operands, 32, insn_mode, mode); + + case MODE_TI: + case MODE_V2DF: + case MODE_V4SF: + return ix86_get_ssemov (operands, 16, insn_mode, mode); + + default: + gcc_unreachable (); + } +} + /* Returns true if OP contains a symbol reference */ bool diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 8e29dff..a4ee549 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -1902,11 +1902,7 @@ return standard_sse_constant_opcode (insn, operands); case TYPE_SSEMOV: - if (misaligned_operand (operands[0], XImode) - || misaligned_operand (operands[1], XImode)) - return "vmovdqu32\t{%1, %0|%0, %1}"; - else - return "vmovdqa32\t{%1, %0|%0, %1}"; + return ix86_output_ssemov (insn, operands); default: gcc_unreachable (); @@ -1929,21 +1925,7 @@ return standard_sse_constant_opcode (insn, operands); case TYPE_SSEMOV: - if (misaligned_operand (operands[0], OImode) - || misaligned_operand (operands[1], OImode)) - { - if (get_attr_mode (insn) == MODE_XI) - return "vmovdqu32\t{%1, %0|%0, %1}"; - else - return "vmovdqu\t{%1, %0|%0, %1}"; - } - else - { - if (get_attr_mode (insn) == MODE_XI) - return "vmovdqa32\t{%1, %0|%0, %1}"; - else - return "vmovdqa\t{%1, %0|%0, %1}"; - } + return ix86_output_ssemov (insn, operands); default: gcc_unreachable (); @@ -1952,15 +1934,7 @@ [(set_attr "isa" "*,avx2,*,*") (set_attr "type" "sselog1,sselog1,ssemov,ssemov") (set_attr "prefix" "vex") - (set (attr "mode") - (cond [(ior (match_operand 0 "ext_sse_reg_operand") - (match_operand 1 "ext_sse_reg_operand")) - (const_string "XI") - (and (eq_attr "alternative" "1") - (match_test "TARGET_AVX512VL")) - (const_string "XI") - ] - (const_string "OI")))]) + (set_attr "mode" "OI")]) (define_insn "*movti_internal" [(set (match_operand:TI 0 "nonimmediate_operand" "=!r ,o ,v,v ,v ,m,?r,?Yd") @@ -1981,27 +1955,7 @@ return standard_sse_constant_opcode (insn, operands); case TYPE_SSEMOV: - /* TDmode values are passed as TImode on the stack. Moving them - to stack may result in unaligned memory access. */ - if (misaligned_operand (operands[0], TImode) - || misaligned_operand (operands[1], TImode)) - { - if (get_attr_mode (insn) == MODE_V4SF) - return "%vmovups\t{%1, %0|%0, %1}"; - else if (get_attr_mode (insn) == MODE_XI) - return "vmovdqu32\t{%1, %0|%0, %1}"; - else - return "%vmovdqu\t{%1, %0|%0, %1}"; - } - else - { - if (get_attr_mode (insn) == MODE_V4SF) - return "%vmovaps\t{%1, %0|%0, %1}"; - else if (get_attr_mode (insn) == MODE_XI) - return "vmovdqa32\t{%1, %0|%0, %1}"; - else - return "%vmovdqa\t{%1, %0|%0, %1}"; - } + return ix86_output_ssemov (insn, operands); default: gcc_unreachable (); @@ -2028,12 +1982,6 @@ (set (attr "mode") (cond [(eq_attr "alternative" "0,1") (const_string "DI") - (ior (match_operand 0 "ext_sse_reg_operand") - (match_operand 1 "ext_sse_reg_operand")) - (const_string "XI") - (and (eq_attr "alternative" "3") - (match_test "TARGET_AVX512VL")) - (const_string "XI") (match_test "TARGET_AVX") (const_string "TI") (ior (not (match_test "TARGET_SSE2")) @@ -3254,31 +3202,7 @@ return standard_sse_constant_opcode (insn, operands); case TYPE_SSEMOV: - /* Handle misaligned load/store since we - don't have movmisaligntf pattern. */ - if (misaligned_operand (operands[0], TFmode) - || misaligned_operand (operands[1], TFmode)) - { - if (get_attr_mode (insn) == MODE_V4SF) - return "%vmovups\t{%1, %0|%0, %1}"; - else if (TARGET_AVX512VL - && (EXT_REX_SSE_REG_P (operands[0]) - || EXT_REX_SSE_REG_P (operands[1]))) - return "vmovdqu64\t{%1, %0|%0, %1}"; - else - return "%vmovdqu\t{%1, %0|%0, %1}"; - } - else - { - if (get_attr_mode (insn) == MODE_V4SF) - return "%vmovaps\t{%1, %0|%0, %1}"; - else if (TARGET_AVX512VL - && (EXT_REX_SSE_REG_P (operands[0]) - || EXT_REX_SSE_REG_P (operands[1]))) - return "vmovdqa64\t{%1, %0|%0, %1}"; - else - return "%vmovdqa\t{%1, %0|%0, %1}"; - } + return ix86_output_ssemov (insn, operands); case TYPE_MULTI: return "#"; diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index ee1f138..8f59022 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1013,98 +1013,7 @@ return standard_sse_constant_opcode (insn, operands); case TYPE_SSEMOV: - /* There is no evex-encoded vmov* for sizes smaller than 64-bytes - in avx512f, so we need to use workarounds, to access sse registers - 16-31, which are evex-only. In avx512vl we don't need workarounds. */ - if (TARGET_AVX512F && < 64 && !TARGET_AVX512VL - && (EXT_REX_SSE_REG_P (operands[0]) - || EXT_REX_SSE_REG_P (operands[1]))) - { - if (memory_operand (operands[0], mode)) - { - if ( == 32) - return "vextract64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}"; - else if ( == 16) - return "vextract32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}"; - else - gcc_unreachable (); - } - else if (memory_operand (operands[1], mode)) - { - if ( == 32) - return "vbroadcast64x4\t{%1, %g0|%g0, %1}"; - else if ( == 16) - return "vbroadcast32x4\t{%1, %g0|%g0, %1}"; - else - gcc_unreachable (); - } - else - /* Reg -> reg move is always aligned. Just use wider move. */ - switch (get_attr_mode (insn)) - { - case MODE_V8SF: - case MODE_V4SF: - return "vmovaps\t{%g1, %g0|%g0, %g1}"; - case MODE_V4DF: - case MODE_V2DF: - return "vmovapd\t{%g1, %g0|%g0, %g1}"; - case MODE_OI: - case MODE_TI: - return "vmovdqa64\t{%g1, %g0|%g0, %g1}"; - default: - gcc_unreachable (); - } - } - - switch (get_attr_mode (insn)) - { - case MODE_V16SF: - case MODE_V8SF: - case MODE_V4SF: - if (misaligned_operand (operands[0], mode) - || misaligned_operand (operands[1], mode)) - return "%vmovups\t{%1, %0|%0, %1}"; - else - return "%vmovaps\t{%1, %0|%0, %1}"; - - case MODE_V8DF: - case MODE_V4DF: - case MODE_V2DF: - if (misaligned_operand (operands[0], mode) - || misaligned_operand (operands[1], mode)) - return "%vmovupd\t{%1, %0|%0, %1}"; - else - return "%vmovapd\t{%1, %0|%0, %1}"; - - case MODE_OI: - case MODE_TI: - if (misaligned_operand (operands[0], mode) - || misaligned_operand (operands[1], mode)) - return TARGET_AVX512VL - && (mode == V4SImode - || mode == V2DImode - || mode == V8SImode - || mode == V4DImode - || TARGET_AVX512BW) - ? "vmovdqu\t{%1, %0|%0, %1}" - : "%vmovdqu\t{%1, %0|%0, %1}"; - else - return TARGET_AVX512VL ? "vmovdqa64\t{%1, %0|%0, %1}" - : "%vmovdqa\t{%1, %0|%0, %1}"; - case MODE_XI: - if (misaligned_operand (operands[0], mode) - || misaligned_operand (operands[1], mode)) - return (mode == V16SImode - || mode == V8DImode - || TARGET_AVX512BW) - ? "vmovdqu\t{%1, %0|%0, %1}" - : "vmovdqu64\t{%1, %0|%0, %1}"; - else - return "vmovdqa64\t{%1, %0|%0, %1}"; - - default: - gcc_unreachable (); - } + return ix86_output_ssemov (insn, operands); default: gcc_unreachable (); @@ -1113,10 +1022,7 @@ [(set_attr "type" "sselog1,sselog1,ssemov,ssemov") (set_attr "prefix" "maybe_vex") (set (attr "mode") - (cond [(and (eq_attr "alternative" "1") - (match_test "TARGET_AVX512VL")) - (const_string "") - (match_test "TARGET_AVX") + (cond [(match_test "TARGET_AVX") (const_string "") (ior (not (match_test "TARGET_SSE2")) (match_test "optimize_function_for_size_p (cfun)")) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index c4f161a..6b4f301 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,16 @@ +2020-03-05 H.J. Lu + + PR target/89229 + PR target/89346 + * gcc.target/i386/avx512vl-vmovdqa64-1.c: Updated. + * gcc.target/i386/pr89229-2a.c: New test. + * gcc.target/i386/pr89229-2b.c: Likewise. + * gcc.target/i386/pr89229-2c.c: Likewise. + * gcc.target/i386/pr89229-3a.c: Likewise. + * gcc.target/i386/pr89229-3b.c: Likewise. + * gcc.target/i386/pr89229-3c.c: Likewise. + * gcc.target/i386/pr89346.c: Likewise. + 2020-03-05 Andre Vieira * g++.dg/pr80481.C: Disable epilogue vectorization. diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa64-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa64-1.c index 14fe4b8..db4d9d1 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa64-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa64-1.c @@ -4,14 +4,13 @@ /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\\(\[^\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { target nonpic } } } */ -/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\\(\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { target nonpic } } } */ +/* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\\(\[^\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { target nonpic } } } */ +/* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\\(\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { target nonpic } } } */ /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\nxy\]*\\(.{5,6}(?:\n|\[ \\t\]+#)" 1 { target nonpic } } } */ -/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\nxy\]*\\((?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\nxy\]*\\(.{5,6}(?:\n|\[ \\t\]+#)" 1 { target nonpic } } } */ /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*\\)\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*\\)\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr89229-2a.c b/gcc/testsuite/gcc.target/i386/pr89229-2a.c new file mode 100644 index 0000000..0cf7803 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr89229-2a.c @@ -0,0 +1,15 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -march=skylake-avx512" } */ + +typedef __int128 __m128t __attribute__ ((__vector_size__ (16), + __may_alias__)); + +__m128t +foo1 (void) +{ + register __int128 xmm16 __asm ("xmm16") = (__int128) -1; + asm volatile ("" : "+v" (xmm16)); + return (__m128t) xmm16; +} + +/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr89229-2b.c b/gcc/testsuite/gcc.target/i386/pr89229-2b.c new file mode 100644 index 0000000..8d5d6c4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr89229-2b.c @@ -0,0 +1,13 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -march=skylake-avx512 -mno-avx512vl" } */ + +typedef __int128 __m128t __attribute__ ((__vector_size__ (16), + __may_alias__)); + +__m128t +foo1 (void) +{ + register __int128 xmm16 __asm ("xmm16") = (__int128) -1; /* { dg-error "register specified for 'xmm16'" } */ + asm volatile ("" : "+v" (xmm16)); + return (__m128t) xmm16; +} diff --git a/gcc/testsuite/gcc.target/i386/pr89229-2c.c b/gcc/testsuite/gcc.target/i386/pr89229-2c.c new file mode 100644 index 0000000..218da46 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr89229-2c.c @@ -0,0 +1,6 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -march=skylake-avx512 -mprefer-vector-width=512" } */ + +#include "pr89229-2a.c" + +/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr89229-3a.c b/gcc/testsuite/gcc.target/i386/pr89229-3a.c new file mode 100644 index 0000000..fcb85c3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr89229-3a.c @@ -0,0 +1,16 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -march=skylake-avx512" } */ + +extern __float128 d; + +void +foo1 (__float128 x) +{ + register __float128 xmm16 __asm ("xmm16") = x; + asm volatile ("" : "+v" (xmm16)); + register __float128 xmm17 __asm ("xmm17") = xmm16; + asm volatile ("" : "+v" (xmm17)); + d = xmm17; +} + +/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr89229-3b.c b/gcc/testsuite/gcc.target/i386/pr89229-3b.c new file mode 100644 index 0000000..37eb83c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr89229-3b.c @@ -0,0 +1,12 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -march=skylake-avx512 -mno-avx512vl" } */ + +extern __float128 d; + +void +foo1 (__float128 x) +{ + register __float128 xmm16 __asm ("xmm16") = x; /* { dg-error "register specified for 'xmm16'" } */ + asm volatile ("" : "+v" (xmm16)); + d = xmm16; +} diff --git a/gcc/testsuite/gcc.target/i386/pr89229-3c.c b/gcc/testsuite/gcc.target/i386/pr89229-3c.c new file mode 100644 index 0000000..529a520 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr89229-3c.c @@ -0,0 +1,6 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -march=skylake-avx512 -mprefer-vector-width=512" } */ + +#include "pr89229-5a.c" + +/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr89346.c b/gcc/testsuite/gcc.target/i386/pr89346.c new file mode 100644 index 0000000..cdc9acc --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr89346.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake-avx512" } */ + +#include + +long long *p; +volatile __m256i y; + +void +foo (void) +{ + _mm256_store_epi64 (p, y); +} + +/* { dg-final { scan-assembler-not "vmovdqa64" } } */ -- 2.7.4