/* If SET subexpression of INSN sets a register, emit a shuffle instruction to
propagate its value from lane MASTER to current lane. */
-static void
+static bool
nvptx_unisimt_handle_set (rtx set, rtx_insn *insn, rtx master)
{
rtx reg;
if (GET_CODE (set) == SET && REG_P (reg = SET_DEST (set)))
- emit_insn_after (nvptx_gen_shuffle (reg, reg, master, SHUFFLE_IDX), insn);
+ {
+ emit_insn_after (nvptx_gen_shuffle (reg, reg, master, SHUFFLE_IDX),
+ insn);
+ return true;
+ }
+
+ return false;
}
/* Adjust code for uniform-simt code generation variant by making atomics and
continue;
rtx pat = PATTERN (insn);
rtx master = nvptx_get_unisimt_master ();
+ bool shuffle_p = false;
for (int i = 0; i < XVECLEN (pat, 0); i++)
- nvptx_unisimt_handle_set (XVECEXP (pat, 0, i), insn, master);
+ shuffle_p
+ |= nvptx_unisimt_handle_set (XVECEXP (pat, 0, i), insn, master);
+ if (shuffle_p && TARGET_PTX_6_0)
+ {
+ /* The shuffle is a sync, so uniformity is guaranteed. */
+ }
+ else
+ {
+ if (TARGET_PTX_6_0)
+ {
+ gcc_assert (!shuffle_p);
+ /* Emit after the insn, to guarantee uniformity. */
+ emit_insn_after (gen_nvptx_warpsync (), insn);
+ }
+ else
+ {
+ /* Emit after the insn (and before the shuffle, if there are any)
+ to check uniformity. */
+ emit_insn_after (gen_nvptx_uniform_warp_check (), insn);
+ }
+ }
+
rtx pred = nvptx_get_unisimt_predicate ();
pred = gen_rtx_NE (BImode, pred, const0_rtx);
pat = gen_rtx_COND_EXEC (VOIDmode, pred, pat);
--- /dev/null
+/* { dg-options "-O2 -muniform-simt -mptx=3.1" } */
+
+enum memmodel
+{
+ MEMMODEL_RELAXED = 0,
+};
+
+int a = 0;
+
+int
+f (void)
+{
+ int expected = 1;
+ return __atomic_compare_exchange_n (&a, &expected, 0, 0, MEMMODEL_RELAXED,
+ MEMMODEL_RELAXED);
+}
+
+/* { dg-final { scan-assembler-times "@%r\[0-9\]*\tatom.global.cas" 1 } } */
+/* { dg-final { scan-assembler-times "shfl.idx.b32" 1 } } */
+/* { dg-final { scan-assembler-times "vote.ballot.b32" 1 } } */