From 3c5e83d5b32c31b11cf1684bf5d1ab3e7174685c Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 20 Jul 2020 20:34:46 +0200 Subject: [PATCH] i386: Use lock prefixed insn instead of MFENCE [PR95750] MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Currently, __atomic_thread_fence(seq_cst) on x86 and x86-64 generates mfence instruction. A dummy atomic instruction (a lock-prefixed instruction or xchg with a memory operand) would provide the same sequential consistency guarantees while being more efficient on most current CPUs. The mfence instruction additionally orders non-temporal stores, which is not relevant for atomic operations and are not ordered by seq_cst atomic operations anyway. 2020-07-20 Uroš Bizjak gcc/ChangeLog: PR target/95750 * config/i386/i386.h (TARGET_AVOID_MFENCE): Rename from TARGET_USE_XCHG_FOR_ATOMIC_STORE. * config/i386/sync.md (mfence_sse2): Disable for TARGET_AVOID_MFENCE. (mfence_nosse): Enable also for TARGET_AVOID_MFENCE. Emit stack referred memory in word_mode. (mem_thread_fence): Do not generate mfence_sse2 pattern when TARGET_AVOID_MFENCE is true. (atomic_store): Update for rename. * config/i386/x86-tune.def (X86_TUNE_AVOID_MFENCE): Rename from X86_TUNE_USE_XCHG_FOR_ATOMIC_STORE. gcc/testsuite/ChangeLog: PR target/95750 * gcc.target/i386/pr95750.c: New test. --- gcc/config/i386/i386.h | 3 +-- gcc/config/i386/sync.md | 21 ++++++++++++++------- gcc/config/i386/x86-tune.def | 4 ++-- gcc/testsuite/gcc.target/i386/pr95750.c | 19 +++++++++++++++++++ 4 files changed, 36 insertions(+), 11 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr95750.c diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index f4a8f13..114967e 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -598,8 +598,7 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; ix86_tune_features[X86_TUNE_AVOID_FALSE_DEP_FOR_BMI] #define TARGET_ONE_IF_CONV_INSN \ ix86_tune_features[X86_TUNE_ONE_IF_CONV_INSN] -#define TARGET_USE_XCHG_FOR_ATOMIC_STORE \ - ix86_tune_features[X86_TUNE_USE_XCHG_FOR_ATOMIC_STORE] +#define TARGET_AVOID_MFENCE ix86_tune_features[X86_TUNE_AVOID_MFENCE] #define TARGET_EMIT_VZEROUPPER \ ix86_tune_features[X86_TUNE_EMIT_VZEROUPPER] #define TARGET_EXPAND_ABS \ diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md index e221090..c682703 100644 --- a/gcc/config/i386/sync.md +++ b/gcc/config/i386/sync.md @@ -89,7 +89,8 @@ (define_insn "mfence_sse2" [(set (match_operand:BLK 0) (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))] - "TARGET_64BIT || TARGET_SSE2" + "(TARGET_64BIT || TARGET_SSE2) + && !TARGET_AVOID_MFENCE" "mfence" [(set_attr "type" "sse") (set_attr "length_address" "0") @@ -100,8 +101,14 @@ [(set (match_operand:BLK 0) (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE)) (clobber (reg:CC FLAGS_REG))] - "!(TARGET_64BIT || TARGET_SSE2)" - "lock{%;} or{l}\t{$0, (%%esp)|DWORD PTR [esp], 0}" + "!(TARGET_64BIT || TARGET_SSE2) + || TARGET_AVOID_MFENCE" +{ + rtx mem = gen_rtx_MEM (word_mode, stack_pointer_rtx); + + output_asm_insn ("lock{%;} or%z0\t{$0, %0|%0, 0}", &mem); + return ""; +} [(set_attr "memory" "unknown")]) (define_expand "mem_thread_fence" @@ -117,7 +124,8 @@ rtx (*mfence_insn)(rtx); rtx mem; - if (TARGET_64BIT || TARGET_SSE2) + if ((TARGET_64BIT || TARGET_SSE2) + && !TARGET_AVOID_MFENCE) mfence_insn = gen_mfence_sse2; else mfence_insn = gen_mfence_nosse; @@ -306,11 +314,10 @@ { operands[1] = force_reg (mode, operands[1]); - /* For seq-cst stores, use XCHG when we lack MFENCE - or when target prefers XCHG. */ + /* For seq-cst stores, use XCHG when we lack MFENCE. */ if (is_mm_seq_cst (model) && (!(TARGET_64BIT || TARGET_SSE2) - || TARGET_USE_XCHG_FOR_ATOMIC_STORE)) + || TARGET_AVOID_MFENCE)) { emit_insn (gen_atomic_exchange (gen_reg_rtx (mode), operands[0], operands[1], diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index 1776aba..6eff825 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -313,8 +313,8 @@ DEF_TUNE (X86_TUNE_ONE_IF_CONV_INSN, "one_if_conv_insn", m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_CORE_ALL | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_GENERIC) -/* X86_TUNE_USE_XCHG_FOR_ATOMIC_STORE: Use xchg instead of mov+mfence. */ -DEF_TUNE (X86_TUNE_USE_XCHG_FOR_ATOMIC_STORE, "use_xchg_for_atomic_store", +/* X86_TUNE_AVOID_MFENCE: Use lock prefixed instructions instead of mfence. */ +DEF_TUNE (X86_TUNE_AVOID_MFENCE, "avoid_mfence", m_CORE_ALL | m_BDVER | m_ZNVER | m_GENERIC) /* X86_TUNE_EXPAND_ABS: This enables a new abs pattern by diff --git a/gcc/testsuite/gcc.target/i386/pr95750.c b/gcc/testsuite/gcc.target/i386/pr95750.c new file mode 100644 index 0000000..c47108f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr95750.c @@ -0,0 +1,19 @@ +/* PR target/95750 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=core2" } */ + +void +foo (void) +{ + __atomic_thread_fence (__ATOMIC_SEQ_CST); +} + +int x; + +void +bar (void) +{ + __atomic_store_n (&x, -1, __ATOMIC_SEQ_CST); +} + +/* { dg-final { scan-assembler-not "mfence" } } */ -- 2.7.4