From 266f44a91c0c9705d3d18e82d7c5bab32927a18f Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Sun, 17 May 2020 10:10:34 -0700 Subject: [PATCH] x86: Allow V1TI vector register pushes Add V1TI vector register push and split it after reload to a sequence of: (set (reg:P SP_REG) (plus:P SP_REG) (const_int -8))) (set (match_dup 0) (match_dup 1)) so that STV pass can convert TI mode integer push to V1TI vector register push. Rename has_non_address_hard_reg to pseudo_reg_set, combine calls of single_set and has_non_address_hard_reg to pseudo_reg_set, to ignore pseudo register push. Remove c-c++-common/dfp/func-vararg-mixed-2.c since it is compiled with -mpreferred-stack-boundary=2 and leads to segfault: Dump of assembler code for function __bid_nesd2: 0x08049210 <+0>: endbr32 0x08049214 <+4>: push %esi 0x08049215 <+5>: push %ebx 0x08049216 <+6>: call 0x8049130 <__x86.get_pc_thunk.bx> 0x0804921b <+11>: add $0x8de5,%ebx 0x08049221 <+17>: sub $0x20,%esp 0x08049224 <+20>: mov 0x30(%esp),%esi 0x08049228 <+24>: pushl 0x2c(%esp) 0x0804922c <+28>: call 0x804e600 <__bid32_to_bid64> 0x08049231 <+33>: mov %esi,(%esp) 0x08049234 <+36>: movd %edx,%xmm1 0x08049238 <+40>: movd %eax,%xmm0 0x0804923c <+44>: punpckldq %xmm1,%xmm0 => 0x08049240 <+48>: movaps %xmm0,0x10(%esp) 0x08049245 <+53>: call 0x804e600 <__bid32_to_bid64> 0x0804924a <+58>: push %edx 0x0804924b <+59>: push %eax 0x0804924c <+60>: pushl 0x1c(%esp) 0x08049250 <+64>: pushl 0x1c(%esp) 0x08049254 <+68>: call 0x804b260 <__bid64_quiet_not_equal> 0x08049259 <+73>: add $0x34,%esp 0x0804925c <+76>: pop %ebx 0x0804925d <+77>: pop %esi 0x0804925e <+78>: ret when libgcc is compiled with -msse2. According to GCC manual: '-mpreferred-stack-boundary=NUM' Attempt to keep the stack boundary aligned to a 2 raised to NUM byte boundary. If '-mpreferred-stack-boundary' is not specified, the default is 4 (16 bytes or 128-bits). *Warning:* If you use this switch, then you must build all modules with the same value, including any libraries. This includes the system libraries and startup modules. c-c++-common/dfp/func-vararg-mixed-2.c, which was added by commit 3b2488ca6ece182f2136a20ee5fa0bb92f935b0f Author: H.J. Lu Date: Wed Jul 30 19:24:02 2008 +0000 func-vararg-alternate-d128-2.c: New. 2008-07-30 H.J. Lu Joey Ye * gcc.dg/dfp/func-vararg-alternate-d128-2.c: New. * gcc.dg/dfp/func-vararg-mixed-2.c: Likewise. isn't expected to work with libgcc. gcc/ PR target/95021 * config/i386/i386-features.c (has_non_address_hard_reg): Renamed to ... (pseudo_reg_set): This. Return the SET expression. Ignore pseudo register push. (general_scalar_to_vector_candidate_p): Combine single_set and has_non_address_hard_reg calls to pseudo_reg_set. (timode_scalar_to_vector_candidate_p): Likewise. * config/i386/i386.md (*pushv1ti2): New pattern. gcc/testsuite/ PR target/95021 * c-c++-common/dfp/func-vararg-mixed-2.c: Removed. * gcc.target/i386/pr95021-1.c: New test. * gcc.target/i386/pr95021-2.c: Likewise. * gcc.target/i386/pr95021-3.c: Likewise. * gcc.target/i386/pr95021-4.c: Likewise. * gcc.target/i386/pr95021-5.c: Likewise. --- gcc/ChangeLog | 12 +++ gcc/config/i386/i386-features.c | 37 ++++---- gcc/config/i386/i386.md | 16 ++++ gcc/testsuite/ChangeLog | 10 ++ .../c-c++-common/dfp/func-vararg-mixed-2.c | 105 --------------------- gcc/testsuite/gcc.target/i386/pr95021-1.c | 27 ++++++ gcc/testsuite/gcc.target/i386/pr95021-2.c | 39 ++++++++ gcc/testsuite/gcc.target/i386/pr95021-3.c | 5 + gcc/testsuite/gcc.target/i386/pr95021-4.c | 28 ++++++ gcc/testsuite/gcc.target/i386/pr95021-5.c | 45 +++++++++ 10 files changed, 203 insertions(+), 121 deletions(-) delete mode 100644 gcc/testsuite/c-c++-common/dfp/func-vararg-mixed-2.c create mode 100644 gcc/testsuite/gcc.target/i386/pr95021-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr95021-2.c create mode 100644 gcc/testsuite/gcc.target/i386/pr95021-3.c create mode 100644 gcc/testsuite/gcc.target/i386/pr95021-4.c create mode 100644 gcc/testsuite/gcc.target/i386/pr95021-5.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 60ff1ff..f03b996 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,15 @@ +2020-05-17 H.J. Lu + + PR target/95021 + * config/i386/i386-features.c (has_non_address_hard_reg): + Renamed to ... + (pseudo_reg_set): This. Return the SET expression. Ignore + pseudo register push. + (general_scalar_to_vector_candidate_p): Combine single_set and + has_non_address_hard_reg calls to pseudo_reg_set. + (timode_scalar_to_vector_candidate_p): Likewise. + * config/i386/i386.md (*pushv1ti2): New pattern. + 2020-05-17 Aldy Hernandez Revert: diff --git a/gcc/config/i386/i386-features.c b/gcc/config/i386/i386-features.c index 78fb373..b9b764c 100644 --- a/gcc/config/i386/i386-features.c +++ b/gcc/config/i386/i386-features.c @@ -1253,25 +1253,36 @@ scalar_chain::convert () return converted_insns; } -/* Return 1 if INSN uses or defines a hard register. - Hard register uses in a memory address are ignored. - Clobbers and flags definitions are ignored. */ +/* Return the SET expression if INSN doesn't reference hard register. + Return NULL if INSN uses or defines a hard register, excluding + pseudo register pushes, hard register uses in a memory address, + clobbers and flags definitions. */ -static bool -has_non_address_hard_reg (rtx_insn *insn) +static rtx +pseudo_reg_set (rtx_insn *insn) { + rtx set = single_set (insn); + if (!set) + return NULL; + + /* Check pseudo register push first. */ + if (REG_P (SET_SRC (set)) + && !HARD_REGISTER_P (SET_SRC (set)) + && push_operand (SET_DEST (set), GET_MODE (SET_DEST (set)))) + return set; + df_ref ref; FOR_EACH_INSN_DEF (ref, insn) if (HARD_REGISTER_P (DF_REF_REAL_REG (ref)) && !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER) && DF_REF_REGNO (ref) != FLAGS_REG) - return true; + return NULL; FOR_EACH_INSN_USE (ref, insn) if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref))) - return true; + return NULL; - return false; + return set; } /* Check if comparison INSN may be transformed @@ -1345,14 +1356,11 @@ convertible_comparison_p (rtx_insn *insn, enum machine_mode mode) static bool general_scalar_to_vector_candidate_p (rtx_insn *insn, enum machine_mode mode) { - rtx def_set = single_set (insn); + rtx def_set = pseudo_reg_set (insn); if (!def_set) return false; - if (has_non_address_hard_reg (insn)) - return false; - rtx src = SET_SRC (def_set); rtx dst = SET_DEST (def_set); @@ -1442,14 +1450,11 @@ general_scalar_to_vector_candidate_p (rtx_insn *insn, enum machine_mode mode) static bool timode_scalar_to_vector_candidate_p (rtx_insn *insn) { - rtx def_set = single_set (insn); + rtx def_set = pseudo_reg_set (insn); if (!def_set) return false; - if (has_non_address_hard_reg (insn)) - return false; - rtx src = SET_SRC (def_set); rtx dst = SET_DEST (def_set); diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 1bf0c1a..9fd32f2 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -1674,6 +1674,22 @@ ;; Push/pop instructions. +(define_insn_and_split "*pushv1ti2" + [(set (match_operand:V1TI 0 "push_operand" "=<") + (match_operand:V1TI 1 "register_operand" "v"))] + "TARGET_64BIT && TARGET_STV" + "#" + "&& reload_completed" + [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2))) + (set (match_dup 0) (match_dup 1))] +{ + operands[2] = GEN_INT (-PUSH_ROUNDING (GET_MODE_SIZE (V1TImode))); + /* Preserve memory attributes. */ + operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx); +} + [(set_attr "type" "multi") + (set_attr "mode" "TI")]) + (define_insn "*push2" [(set (match_operand:DWI 0 "push_operand" "=<,<") (match_operand:DWI 1 "general_no_elim_operand" "riF*o,*v"))] diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index fa3d018..54fbdb0 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,15 @@ 2020-05-17 H.J. Lu + PR target/95021 + * c-c++-common/dfp/func-vararg-mixed-2.c: Removed. + * gcc.target/i386/pr95021-1.c: New test. + * gcc.target/i386/pr95021-2.c: Likewise. + * gcc.target/i386/pr95021-3.c: Likewise. + * gcc.target/i386/pr95021-4.c: Likewise. + * gcc.target/i386/pr95021-5.c: Likewise. + +2020-05-17 H.J. Lu + * gcc.target/i386/strncmp-1.c: New test. 2020-05-16 Iain Sandoe diff --git a/gcc/testsuite/c-c++-common/dfp/func-vararg-mixed-2.c b/gcc/testsuite/c-c++-common/dfp/func-vararg-mixed-2.c deleted file mode 100644 index 02cafb0..0000000 --- a/gcc/testsuite/c-c++-common/dfp/func-vararg-mixed-2.c +++ /dev/null @@ -1,105 +0,0 @@ -/* { dg-do run { target { { i?86-*-* x86_64-*-* } && ia32 } } } */ -/* { dg-options "-mpreferred-stack-boundary=2" } */ - -/* C99 6.5.2.2 Function calls. - Test passing varargs of the combination of decimal float types and - other types. */ - -#include -#include "dfp-dbg.h" - -/* Supposing the list of varying number of arguments is: - unsigned int, _Decimal128, double, _Decimal32, _Decimal64. */ - -static _Decimal32 -vararg_d32 (unsigned arg, ...) -{ - va_list ap; - _Decimal32 result; - - va_start (ap, arg); - - va_arg (ap, unsigned int); - va_arg (ap, _Decimal128); - va_arg (ap, double); - result = va_arg (ap, _Decimal32); - - va_end (ap); - return result; -} - -static _Decimal32 -vararg_d64 (unsigned arg, ...) -{ - va_list ap; - _Decimal64 result; - - va_start (ap, arg); - - va_arg (ap, unsigned int); - va_arg (ap, _Decimal128); - va_arg (ap, double); - va_arg (ap, _Decimal32); - result = va_arg (ap, _Decimal64); - - va_end (ap); - return result; -} - -static _Decimal128 -vararg_d128 (unsigned arg, ...) -{ - va_list ap; - _Decimal128 result; - - va_start (ap, arg); - - va_arg (ap, unsigned int); - result = va_arg (ap, _Decimal128); - - va_end (ap); - return result; -} - -static unsigned int -vararg_int (unsigned arg, ...) -{ - va_list ap; - unsigned int result; - - va_start (ap, arg); - - result = va_arg (ap, unsigned int); - - va_end (ap); - return result; -} - -static double -vararg_double (unsigned arg, ...) -{ - va_list ap; - float result; - - va_start (ap, arg); - - va_arg (ap, unsigned int); - va_arg (ap, _Decimal128); - result = va_arg (ap, double); - - va_end (ap); - return result; -} - - -int -main () -{ - if (vararg_d32 (3, 0, 1.0dl, 2.0, 3.0df, 4.0dd) != 3.0df) FAILURE - if (vararg_d64 (4, 0, 1.0dl, 2.0, 3.0df, 4.0dd) != 4.0dd) FAILURE - if (vararg_d128 (1, 0, 1.0dl, 2.0, 3.0df, 4.0dd) != 1.0dl) FAILURE - if (vararg_int (0, 0, 1.0dl, 2.0, 3.0df, 4.0dd) != 0) FAILURE - if (vararg_double (2, 0, 1.0dl, 2.0, 3.0df, 4.0dd) != 2.0) FAILURE - - FINISH -} diff --git a/gcc/testsuite/gcc.target/i386/pr95021-1.c b/gcc/testsuite/gcc.target/i386/pr95021-1.c new file mode 100644 index 0000000..a0b9a26 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr95021-1.c @@ -0,0 +1,27 @@ +/* { dg-do compile { target ia32 } } */ +/* { dg-options "-O2 -msse2 -mstv -W" } */ +/* { dg-final { scan-assembler "movq\[ \t\]%xmm\[0-9\]+, \\(%esp\\)" } } */ +/* { dg-final { scan-assembler-not "psrlq" } } */ + +#include + +extern jmp_buf buf; + +extern long long *target_p; +extern long long *c; + +extern void foo (long long); + +__attribute__ ((noclone, noinline)) +void +bar (void) +{ + if (setjmp (buf)) + { + long long target = *target_p; + *c = target; + foo (target); + } + else + foo (0); +} diff --git a/gcc/testsuite/gcc.target/i386/pr95021-2.c b/gcc/testsuite/gcc.target/i386/pr95021-2.c new file mode 100644 index 0000000..53247e5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr95021-2.c @@ -0,0 +1,39 @@ +/* { dg-do run { target ia32 } } */ +/* { dg-require-effective-target sse2_runtime } */ +/* { dg-options "-O2 -msse2 -mstv -W" } */ + +#include +#include "pr95021-1.c" + +jmp_buf buf; + +long long *target_p; +long long *c; + +int count; + +__attribute__ ((noclone, noinline)) +void +foo (long long x) +{ + if (x != *c) + abort (); + if (!count) + { + count++; + longjmp (buf, 1); + } +} + +int +main () +{ + long long val = 30; + long long local = 0; + target_p = &val; + c = &local; + bar (); + if (val != local) + abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/i386/pr95021-3.c b/gcc/testsuite/gcc.target/i386/pr95021-3.c new file mode 100644 index 0000000..1748161 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr95021-3.c @@ -0,0 +1,5 @@ +/* { dg-do compile { target ia32 } } */ +/* { dg-options "-O2 -msse2 -mstv -mregparm=3 -W" } */ +/* { dg-final { scan-assembler "movq\[ \t\]+\[^\n\]*, %xmm" } } */ + +#include "pr95021-1.c" diff --git a/gcc/testsuite/gcc.target/i386/pr95021-4.c b/gcc/testsuite/gcc.target/i386/pr95021-4.c new file mode 100644 index 0000000..d5bb28c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr95021-4.c @@ -0,0 +1,28 @@ +/* { dg-do compile { target int128 } } */ +/* { dg-options "-O2 -msse2 -mstv -W" } */ +/* { dg-final { scan-assembler "(movaps|vmovdqa)\[ \t\]%xmm\[0-9\]+, \\(%rsp\\)" } } */ + +#include + +extern jmp_buf buf; + +extern __int128 *target_p; +__int128 *c; + +extern int count; + +extern void foo (__int128, __int128, __int128, __int128); + +__attribute__ ((noclone, noinline)) +void +bar (void) +{ + if (setjmp (buf)) + { + __int128 target = *target_p; + *c = target; + foo (0xbadbeef1, 0x2badbeef, 0xbad3beef, target); + } + else + foo (0xbadbeef1, 0x2badbeef, 0xbad3beef, 0); +} diff --git a/gcc/testsuite/gcc.target/i386/pr95021-5.c b/gcc/testsuite/gcc.target/i386/pr95021-5.c new file mode 100644 index 0000000..d865809 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr95021-5.c @@ -0,0 +1,45 @@ +/* { dg-do run { target int128 } } */ +/* { dg-require-effective-target sse2_runtime } */ +/* { dg-options "-O2 -msse2 -mstv -W" } */ + +#include +#include "pr95021-4.c" + +jmp_buf buf; + +__int128 *target_p; +__int128 *c; + +int count; + +__attribute__ ((noclone, noinline)) +void +foo (__int128 i1, __int128 i2, __int128 i3, __int128 x) +{ + if (i1 != 0xbadbeef1) + abort (); + if (i2 != 0x2badbeef) + abort (); + if (i3 != 0xbad3beef) + abort (); + if (x != *c) + abort (); + if (!count) + { + count++; + longjmp (buf, 1); + } +} + +int +main () +{ + __int128 val = 30; + __int128 local = 0; + target_p = &val; + c = &local; + bar (); + if (val != local) + abort (); + return 0; +} -- 2.7.4