In ptx isa 6.0, a new barrier instruction was added, and bar.sync was
redefined as barrier.sync.aligned.
The aligned modifier indicates that all threads in a CTA will execute the same
barrier instruction.
The seems fine for a form "bar.sync 0".
But a "bar.sync %rx,64" (as used for vector length > 32) may execute a
diffferent barrier depending on the value of %rx, so we can't assume it's
aligned.
Fix this by using "barrier.sync %rx,64" instead.
Tested on x86_64 with nvptx accelerator.
gcc/ChangeLog:
2022-01-27 Tom de Vries <tdevries@suse.de>
* config/nvptx/nvptx-opts.h (enum ptx_version): Add PTX_VERSION_6_0.
* config/nvptx/nvptx.h (TARGET_PTX_6_0): New macro.
* config/nvptx/nvptx.md (define_insn "nvptx_barsync"): Use barrier
insn for TARGET_PTX_6_0.
enum ptx_version
{
PTX_VERSION_3_1,
+ PTX_VERSION_6_0,
PTX_VERSION_6_3,
PTX_VERSION_7_0
};
#define TARGET_SM75 (ptx_isa_option >= PTX_ISA_SM75)
#define TARGET_SM80 (ptx_isa_option >= PTX_ISA_SM80)
+#define TARGET_PTX_6_0 (ptx_version_option >= PTX_VERSION_6_0)
#define TARGET_PTX_6_3 (ptx_version_option >= PTX_VERSION_6_3)
#define TARGET_PTX_7_0 (ptx_version_option >= PTX_VERSION_7_0)
""
{
if (INTVAL (operands[1]) == 0)
- return "\\tbar.sync\\t%0;";
+ return (TARGET_PTX_6_0
+ ? "\\tbarrier.sync.aligned\\t%0;"
+ : "\\tbar.sync\\t%0;");
else
- return "\\tbar.sync\\t%0, %1;";
+ return (TARGET_PTX_6_0
+ ? "\\tbarrier.sync\\t%0, %1;"
+ : "\\tbar.sync\\t%0, %1;");
}
[(set_attr "predicable" "false")])