*rem_p = rem;
}
+void ix86_expand_atomic_fetch_op_loop (rtx target, rtx mem, rtx val,
+ enum rtx_code code, bool after,
+ bool doubleword)
+{
+ rtx old_reg, new_reg, old_mem, success, oldval, new_mem;
+ rtx_code_label *loop_label, *pause_label;
+ machine_mode mode = GET_MODE (target);
+
+ old_reg = gen_reg_rtx (mode);
+ new_reg = old_reg;
+ loop_label = gen_label_rtx ();
+ pause_label = gen_label_rtx ();
+ old_mem = copy_to_reg (mem);
+ emit_label (loop_label);
+ emit_move_insn (old_reg, old_mem);
+
+ /* return value for atomic_fetch_op. */
+ if (!after)
+ emit_move_insn (target, old_reg);
+
+ if (code == NOT)
+ {
+ new_reg = expand_simple_binop (mode, AND, new_reg, val, NULL_RTX,
+ true, OPTAB_LIB_WIDEN);
+ new_reg = expand_simple_unop (mode, code, new_reg, NULL_RTX, true);
+ }
+ else
+ new_reg = expand_simple_binop (mode, code, new_reg, val, NULL_RTX,
+ true, OPTAB_LIB_WIDEN);
+
+ /* return value for atomic_op_fetch. */
+ if (after)
+ emit_move_insn (target, new_reg);
+
+ /* Load memory again inside loop. */
+ new_mem = copy_to_reg (mem);
+ /* Compare mem value with expected value. */
+
+ if (doubleword)
+ {
+ machine_mode half_mode = (mode == DImode)? SImode : DImode;
+ rtx low_new_mem = gen_lowpart (half_mode, new_mem);
+ rtx low_old_mem = gen_lowpart (half_mode, old_mem);
+ rtx high_new_mem = gen_highpart (half_mode, new_mem);
+ rtx high_old_mem = gen_highpart (half_mode, old_mem);
+ emit_cmp_and_jump_insns (low_new_mem, low_old_mem, NE, NULL_RTX,
+ half_mode, 1, pause_label,
+ profile_probability::guessed_never ());
+ emit_cmp_and_jump_insns (high_new_mem, high_old_mem, NE, NULL_RTX,
+ half_mode, 1, pause_label,
+ profile_probability::guessed_never ());
+ }
+ else
+ emit_cmp_and_jump_insns (new_mem, old_mem, NE, NULL_RTX,
+ GET_MODE (old_mem), 1, pause_label,
+ profile_probability::guessed_never ());
+
+ success = NULL_RTX;
+ oldval = old_mem;
+ expand_atomic_compare_and_swap (&success, &oldval, mem, old_reg,
+ new_reg, false, MEMMODEL_SYNC_SEQ_CST,
+ MEMMODEL_RELAXED);
+ if (oldval != old_mem)
+ emit_move_insn (old_mem, oldval);
+
+ emit_cmp_and_jump_insns (success, const0_rtx, EQ, const0_rtx,
+ GET_MODE (success), 1, loop_label,
+ profile_probability::guessed_never ());
+
+ /* If mem is not expected, pause and loop back. */
+ emit_label (pause_label);
+ emit_insn (gen_pause ());
+ emit_jump_insn (gen_jump (loop_label));
+ emit_barrier ();
+}
+
#include "gt-i386-expand.h"
{ "-mstv", MASK_STV },
{ "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD },
{ "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE },
- { "-mcall-ms2sysv-xlogues", MASK_CALL_MS2SYSV_XLOGUES }
+ { "-mcall-ms2sysv-xlogues", MASK_CALL_MS2SYSV_XLOGUES },
+ { "-mrelax-cmpxchg-loop", MASK_RELAX_CMPXCHG_LOOP }
};
/* Additional flag options. */
IX86_ATTR_IX86_YES ("general-regs-only",
OPT_mgeneral_regs_only,
OPTION_MASK_GENERAL_REGS_ONLY),
+
+ IX86_ATTR_YES ("relax-cmpxchg-loop",
+ OPT_mrelax_cmpxchg_loop,
+ MASK_RELAX_CMPXCHG_LOOP),
};
location_t loc
extern void ix86_split_mmx_pack (rtx[], enum rtx_code);
extern void ix86_split_mmx_punpck (rtx[], bool);
extern void ix86_expand_avx_vzeroupper (void);
+extern void ix86_expand_atomic_fetch_op_loop (rtx, rtx, rtx, enum rtx_code,
+ bool, bool);
#ifdef TREE_CODE
extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
Target Mask(OMIT_LEAF_FRAME_POINTER) Save
Omit the frame pointer in leaf functions.
+mrelax-cmpxchg-loop
+Target Mask(RELAX_CMPXCHG_LOOP) Save
+Relax cmpxchg loop for atomic_fetch_{or,xor,and,nand} by adding load and cmp before cmpxchg, execute pause and loop back to load and compare if load value is not expected.
+
mpc32
Target RejectNegative
Set 80387 floating-point precision to 32-bit.
(set (reg:CCZ FLAGS_REG)
(unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))])])
+(define_expand "atomic_fetch_<logic><mode>"
+ [(match_operand:SWI124 0 "register_operand")
+ (any_logic:SWI124
+ (match_operand:SWI124 1 "memory_operand")
+ (match_operand:SWI124 2 "register_operand"))
+ (match_operand:SI 3 "const_int_operand")]
+ "TARGET_CMPXCHG && TARGET_RELAX_CMPXCHG_LOOP"
+{
+ ix86_expand_atomic_fetch_op_loop (operands[0], operands[1],
+ operands[2], <CODE>, false,
+ false);
+ DONE;
+})
+
+(define_expand "atomic_<logic>_fetch<mode>"
+ [(match_operand:SWI124 0 "register_operand")
+ (any_logic:SWI124
+ (match_operand:SWI124 1 "memory_operand")
+ (match_operand:SWI124 2 "register_operand"))
+ (match_operand:SI 3 "const_int_operand")]
+ "TARGET_CMPXCHG && TARGET_RELAX_CMPXCHG_LOOP"
+{
+ ix86_expand_atomic_fetch_op_loop (operands[0], operands[1],
+ operands[2], <CODE>, true,
+ false);
+ DONE;
+})
+
+(define_expand "atomic_fetch_nand<mode>"
+ [(match_operand:SWI124 0 "register_operand")
+ (match_operand:SWI124 1 "memory_operand")
+ (match_operand:SWI124 2 "register_operand")
+ (match_operand:SI 3 "const_int_operand")]
+ "TARGET_CMPXCHG && TARGET_RELAX_CMPXCHG_LOOP"
+{
+ ix86_expand_atomic_fetch_op_loop (operands[0], operands[1],
+ operands[2], NOT, false,
+ false);
+ DONE;
+})
+
+(define_expand "atomic_nand_fetch<mode>"
+ [(match_operand:SWI124 0 "register_operand")
+ (match_operand:SWI124 1 "memory_operand")
+ (match_operand:SWI124 2 "register_operand")
+ (match_operand:SI 3 "const_int_operand")]
+ "TARGET_CMPXCHG && TARGET_RELAX_CMPXCHG_LOOP"
+{
+ ix86_expand_atomic_fetch_op_loop (operands[0], operands[1],
+ operands[2], NOT, true,
+ false);
+ DONE;
+})
+
+(define_expand "atomic_fetch_<logic><mode>"
+ [(match_operand:CASMODE 0 "register_operand")
+ (any_logic:CASMODE
+ (match_operand:CASMODE 1 "memory_operand")
+ (match_operand:CASMODE 2 "register_operand"))
+ (match_operand:SI 3 "const_int_operand")]
+ "TARGET_CMPXCHG && TARGET_RELAX_CMPXCHG_LOOP"
+{
+ bool doubleword = (<MODE>mode == DImode && !TARGET_64BIT)
+ || (<MODE>mode == TImode);
+ ix86_expand_atomic_fetch_op_loop (operands[0], operands[1],
+ operands[2], <CODE>, false,
+ doubleword);
+ DONE;
+})
+
+(define_expand "atomic_<logic>_fetch<mode>"
+ [(match_operand:CASMODE 0 "register_operand")
+ (any_logic:CASMODE
+ (match_operand:CASMODE 1 "memory_operand")
+ (match_operand:CASMODE 2 "register_operand"))
+ (match_operand:SI 3 "const_int_operand")]
+ "TARGET_CMPXCHG && TARGET_RELAX_CMPXCHG_LOOP"
+{
+ bool doubleword = (<MODE>mode == DImode && !TARGET_64BIT)
+ || (<MODE>mode == TImode);
+ ix86_expand_atomic_fetch_op_loop (operands[0], operands[1],
+ operands[2], <CODE>, true,
+ doubleword);
+ DONE;
+})
+
+(define_expand "atomic_fetch_nand<mode>"
+ [(match_operand:CASMODE 0 "register_operand")
+ (match_operand:CASMODE 1 "memory_operand")
+ (match_operand:CASMODE 2 "register_operand")
+ (match_operand:SI 3 "const_int_operand")]
+ "TARGET_CMPXCHG && TARGET_RELAX_CMPXCHG_LOOP"
+{
+ bool doubleword = (<MODE>mode == DImode && !TARGET_64BIT)
+ || (<MODE>mode == TImode);
+ ix86_expand_atomic_fetch_op_loop (operands[0], operands[1],
+ operands[2], NOT, false,
+ doubleword);
+ DONE;
+})
+
+(define_expand "atomic_nand_fetch<mode>"
+ [(match_operand:CASMODE 0 "register_operand")
+ (match_operand:CASMODE 1 "memory_operand")
+ (match_operand:CASMODE 2 "register_operand")
+ (match_operand:SI 3 "const_int_operand")]
+ "TARGET_CMPXCHG && TARGET_RELAX_CMPXCHG_LOOP"
+{
+ bool doubleword = (<MODE>mode == DImode && !TARGET_64BIT)
+ || (<MODE>mode == TImode);
+ ix86_expand_atomic_fetch_op_loop (operands[0], operands[1],
+ operands[2], NOT, true,
+ doubleword);
+ DONE;
+})
+
+
;; For operand 2 nonmemory_operand predicate is used instead of
;; register_operand to allow combiner to better optimize atomic
;; additions of constants.
-mstack-protector-guard-reg=@var{reg} @gol
-mstack-protector-guard-offset=@var{offset} @gol
-mstack-protector-guard-symbol=@var{symbol} @gol
--mgeneral-regs-only -mcall-ms2sysv-xlogues @gol
+-mgeneral-regs-only -mcall-ms2sysv-xlogues -mrelax-cmpxchg-loop @gol
-mindirect-branch=@var{choice} -mfunction-return=@var{choice} @gol
-mindirect-branch-register -mneeded}
prevents the compiler from using floating-point, vector, mask and bound
registers.
+@item -mrelax-cmpxchg-loop
+@opindex mrelax-cmpxchg-loop
+Relax cmpxchg loop by emitting an early load and compare before cmpxchg,
+execute pause if load value is not expected. This reduces excessive
+cachline bouncing when and works for all atomic logic fetch builtins
+that generates compare and swap loop.
+
@item -mindirect-branch=@var{choice}
@opindex mindirect-branch
Convert indirect call and jump with @var{choice}. The default is
--- /dev/null
+/* PR target/103068 */
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -march=x86-64 -mtune=generic -mrelax-cmpxchg-loop" } */
+/* { dg-final { scan-assembler-times "rep;?\[ \\t\]+nop" 32 } } */
+
+#include <stdint.h>
+
+#define FUNC_ATOMIC(TYPE, OP) \
+__attribute__ ((noinline, noclone)) \
+TYPE f_##TYPE##_##OP##_fetch (TYPE *a, TYPE b) \
+{ \
+ return __atomic_##OP##_fetch (a, b, __ATOMIC_RELAXED); \
+} \
+__attribute__ ((noinline, noclone)) \
+TYPE f_##TYPE##_fetch_##OP (TYPE *a, TYPE b) \
+{ \
+ return __atomic_fetch_##OP (a, b, __ATOMIC_RELAXED); \
+}
+
+FUNC_ATOMIC (int64_t, and)
+FUNC_ATOMIC (int64_t, nand)
+FUNC_ATOMIC (int64_t, or)
+FUNC_ATOMIC (int64_t, xor)
+FUNC_ATOMIC (int, and)
+FUNC_ATOMIC (int, nand)
+FUNC_ATOMIC (int, or)
+FUNC_ATOMIC (int, xor)
+FUNC_ATOMIC (short, and)
+FUNC_ATOMIC (short, nand)
+FUNC_ATOMIC (short, or)
+FUNC_ATOMIC (short, xor)
+FUNC_ATOMIC (char, and)
+FUNC_ATOMIC (char, nand)
+FUNC_ATOMIC (char, or)
+FUNC_ATOMIC (char, xor)
--- /dev/null
+/* PR target/103068 */
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -march=x86-64 -mtune=generic" } */
+
+#include <stdlib.h>
+#include "pr103069-1.c"
+
+#define FUNC_ATOMIC_RELAX(TYPE, OP) \
+__attribute__ ((noinline, noclone, target ("relax-cmpxchg-loop"))) \
+TYPE relax_##TYPE##_##OP##_fetch (TYPE *a, TYPE b) \
+{ \
+ return __atomic_##OP##_fetch (a, b, __ATOMIC_RELAXED); \
+} \
+__attribute__ ((noinline, noclone, target ("relax-cmpxchg-loop"))) \
+TYPE relax_##TYPE##_fetch_##OP (TYPE *a, TYPE b) \
+{ \
+ return __atomic_fetch_##OP (a, b, __ATOMIC_RELAXED); \
+}
+
+FUNC_ATOMIC_RELAX (int64_t, and)
+FUNC_ATOMIC_RELAX (int64_t, nand)
+FUNC_ATOMIC_RELAX (int64_t, or)
+FUNC_ATOMIC_RELAX (int64_t, xor)
+FUNC_ATOMIC_RELAX (int, and)
+FUNC_ATOMIC_RELAX (int, nand)
+FUNC_ATOMIC_RELAX (int, or)
+FUNC_ATOMIC_RELAX (int, xor)
+FUNC_ATOMIC_RELAX (short, and)
+FUNC_ATOMIC_RELAX (short, nand)
+FUNC_ATOMIC_RELAX (short, or)
+FUNC_ATOMIC_RELAX (short, xor)
+FUNC_ATOMIC_RELAX (char, and)
+FUNC_ATOMIC_RELAX (char, nand)
+FUNC_ATOMIC_RELAX (char, or)
+FUNC_ATOMIC_RELAX (char, xor)
+
+#define TEST_ATOMIC_FETCH_LOGIC(TYPE, OP) \
+{ \
+ TYPE a = 11, b = 101, res, exp; \
+ res = relax_##TYPE##_##OP##_fetch (&a, b); \
+ exp = f_##TYPE##_##OP##_fetch (&a, b); \
+ if (res != exp) \
+ abort (); \
+ a = 21, b = 92; \
+ res = relax_##TYPE##_fetch_##OP (&a, b); \
+ exp = f_##TYPE##_fetch_##OP (&a, b); \
+ if (res != exp) \
+ abort (); \
+}
+
+int main (void)
+{
+ TEST_ATOMIC_FETCH_LOGIC (int64_t, and)
+ TEST_ATOMIC_FETCH_LOGIC (int64_t, nand)
+ TEST_ATOMIC_FETCH_LOGIC (int64_t, or)
+ TEST_ATOMIC_FETCH_LOGIC (int64_t, xor)
+ TEST_ATOMIC_FETCH_LOGIC (int, and)
+ TEST_ATOMIC_FETCH_LOGIC (int, nand)
+ TEST_ATOMIC_FETCH_LOGIC (int, or)
+ TEST_ATOMIC_FETCH_LOGIC (int, xor)
+ TEST_ATOMIC_FETCH_LOGIC (short, and)
+ TEST_ATOMIC_FETCH_LOGIC (short, nand)
+ TEST_ATOMIC_FETCH_LOGIC (short, or)
+ TEST_ATOMIC_FETCH_LOGIC (short, xor)
+ TEST_ATOMIC_FETCH_LOGIC (char, and)
+ TEST_ATOMIC_FETCH_LOGIC (char, nand)
+ TEST_ATOMIC_FETCH_LOGIC (char, or)
+ TEST_ATOMIC_FETCH_LOGIC (char, xor)
+ return 0;
+}