int master = REGNO (cfun->machine->unisimt_master);
int pred = REGNO (cfun->machine->unisimt_predicate);
fprintf (file, "\t\tld.shared.u32 %%r%d, [%%r%d];\n", master, loc);
+ if (cfun->machine->unisimt_outside_simt_predicate)
+ {
+ int pred_outside_simt
+ = REGNO (cfun->machine->unisimt_outside_simt_predicate);
+ fprintf (file, "\t\tsetp.eq.u32 %%r%d, %%r%d, 0;\n",
+ pred_outside_simt, master);
+ }
fprintf (file, "\t\tmov.u32 %%ustmp0, %%laneid;\n");
/* Compute 'master lane index' as 'laneid & __nvptx_uni[tid.y]'. */
fprintf (file, "\t\tand.b32 %%r%d, %%r%d, %%ustmp0;\n", master, master);
fprintf (file, "\t{\n");
fprintf (file, "\t\t.reg.u32 %%ustmp2;\n");
fprintf (file, "\t\tmov.u32 %%ustmp2, %d;\n", entering ? -1 : 0);
+ if (cfun->machine->unisimt_outside_simt_predicate)
+ {
+ int pred_outside_simt
+ = REGNO (cfun->machine->unisimt_outside_simt_predicate);
+ fprintf (file, "\t\tmov.pred %%r%d, %d;\n", pred_outside_simt,
+ entering ? 0 : 1);
+ }
if (!crtl->is_leaf)
{
int loc = REGNO (cfun->machine->unisimt_location);
return pred ? pred : pred = gen_reg_rtx (BImode);
}
+static rtx
+nvptx_get_unisimt_outside_simt_predicate ()
+{
+ rtx &pred = cfun->machine->unisimt_outside_simt_predicate;
+ return pred ? pred : pred = gen_reg_rtx (BImode);
+}
+
/* Return true if given call insn references one of the functions provided by
the CUDA runtime: malloc, free, vprintf. */
return false;
}
+static void
+predicate_insn (rtx_insn *insn, rtx pred)
+{
+ rtx pat = PATTERN (insn);
+ pred = gen_rtx_NE (BImode, pred, const0_rtx);
+ pat = gen_rtx_COND_EXEC (VOIDmode, pred, pat);
+ bool changed_p = validate_change (insn, &PATTERN (insn), pat, false);
+ gcc_assert (changed_p);
+}
+
/* Adjust code for uniform-simt code generation variant by making atomics and
"syscalls" conditionally executed, and inserting shuffle-based propagation
for registers being set. */
}
rtx pred = nvptx_get_unisimt_predicate ();
- pred = gen_rtx_NE (BImode, pred, const0_rtx);
- pat = gen_rtx_COND_EXEC (VOIDmode, pred, pat);
- bool changed_p = validate_change (insn, &PATTERN (insn), pat, false);
- gcc_assert (changed_p);
+ predicate_insn (insn, pred);
+
+ pred = NULL_RTX;
+ for (rtx_insn *post = NEXT_INSN (insn); post != next;
+ post = NEXT_INSN (post))
+ {
+ if (pred == NULL_RTX)
+ pred = nvptx_get_unisimt_outside_simt_predicate ();
+ predicate_insn (post, pred);
+ }
}
}
(define_insn "nvptx_warpsync"
[(unspec_volatile [(const_int 0)] UNSPECV_WARPSYNC)]
"TARGET_PTX_6_0"
- "\\tbar.warp.sync\\t0xffffffff;"
- [(set_attr "predicable" "false")])
+ "%.\\tbar.warp.sync\\t0xffffffff;")
(define_insn "nvptx_uniform_warp_check"
[(unspec_volatile [(const_int 0)] UNSPECV_UNIFORM_WARP_CHECK)]
""
{
- output_asm_insn ("{", NULL);
- output_asm_insn ("\\t" ".reg.b32" "\\t" "act;", NULL);
- output_asm_insn ("\\t" "vote.ballot.b32" "\\t" "act,1;", NULL);
- output_asm_insn ("\\t" ".reg.pred" "\\t" "uni;", NULL);
- output_asm_insn ("\\t" "setp.eq.b32" "\\t" "uni,act,0xffffffff;",
- NULL);
- output_asm_insn ("@ !uni\\t" "trap;", NULL);
- output_asm_insn ("@ !uni\\t" "exit;", NULL);
- output_asm_insn ("}", NULL);
+ const char *insns[] = {
+ "{",
+ "\\t" ".reg.b32" "\\t" "act;",
+ "%.\\t" "vote.ballot.b32" "\\t" "act,1;",
+ "\\t" ".reg.pred" "\\t" "do_abort;",
+ "\\t" "mov.pred" "\\t" "do_abort,0;",
+ "%.\\t" "setp.ne.b32" "\\t" "do_abort,act,0xffffffff;",
+ "@ do_abort\\t" "trap;",
+ "@ do_abort\\t" "exit;",
+ "}",
+ NULL
+ };
+ for (const char **p = &insns[0]; *p != NULL; p++)
+ output_asm_insn (*p, NULL);
return "";
- }
- [(set_attr "predicable" "false")])
+ })
(define_expand "memory_barrier"
[(set (match_dup 0)