From 58f7c7e098b79c96403c8341823ec3ba1e8b3945 Mon Sep 17 00:00:00 2001 From: Tom de Vries Date: Mon, 17 May 2021 10:11:52 +0200 Subject: [PATCH] [nvptx] Handle memmodel for atomic ops The atomic ops in nvptx.md have memmodel arguments, which are currently ignored. Handle these, fixing test-case fails libgomp.c-c++-common/reduction-{5,6}.c on volta. Tested libgomp on x86_64-linux with nvptx accelerator. gcc/ChangeLog: 2021-05-17 Tom de Vries PR target/100497 * config/nvptx/nvptx-protos.h (nvptx_output_atomic_insn): Declare * config/nvptx/nvptx.c (nvptx_output_barrier) (nvptx_output_atomic_insn): New function. (nvptx_print_operand): Add support for 'B'. * config/nvptx/nvptx.md: Use nvptx_output_atomic_insn for atomic insns. --- gcc/config/nvptx/nvptx-protos.h | 1 + gcc/config/nvptx/nvptx.c | 77 +++++++++++++++++++++++++++++++++++++++++ gcc/config/nvptx/nvptx.md | 31 ++++++++++++++--- 3 files changed, 104 insertions(+), 5 deletions(-) diff --git a/gcc/config/nvptx/nvptx-protos.h b/gcc/config/nvptx/nvptx-protos.h index 1512209..b7e6ae2 100644 --- a/gcc/config/nvptx/nvptx-protos.h +++ b/gcc/config/nvptx/nvptx-protos.h @@ -57,5 +57,6 @@ extern const char *nvptx_output_set_softstack (unsigned); extern const char *nvptx_output_simt_enter (rtx, rtx, rtx); extern const char *nvptx_output_simt_exit (rtx); extern const char *nvptx_output_red_partition (rtx, rtx); +extern const char *nvptx_output_atomic_insn (const char *, rtx *, int, int); #endif #endif diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c index ebbfa92..722b0fa 100644 --- a/gcc/config/nvptx/nvptx.c +++ b/gcc/config/nvptx/nvptx.c @@ -2444,6 +2444,53 @@ nvptx_output_mov_insn (rtx dst, rtx src) return "%.\tcvt%t0%t1\t%0, %1;"; } +/* Output a pre/post barrier for MEM_OPERAND according to MEMMODEL. */ + +static void +nvptx_output_barrier (rtx *mem_operand, int memmodel, bool pre_p) +{ + bool post_p = !pre_p; + + switch (memmodel) + { + case MEMMODEL_RELAXED: + return; + case MEMMODEL_CONSUME: + case MEMMODEL_ACQUIRE: + case MEMMODEL_SYNC_ACQUIRE: + if (post_p) + break; + return; + case MEMMODEL_RELEASE: + case MEMMODEL_SYNC_RELEASE: + if (pre_p) + break; + return; + case MEMMODEL_ACQ_REL: + case MEMMODEL_SEQ_CST: + case MEMMODEL_SYNC_SEQ_CST: + if (pre_p || post_p) + break; + return; + default: + gcc_unreachable (); + } + + output_asm_insn ("%.\tmembar%B0;", mem_operand); +} + +const char * +nvptx_output_atomic_insn (const char *asm_template, rtx *operands, int mem_pos, + int memmodel_pos) +{ + nvptx_output_barrier (&operands[mem_pos], INTVAL (operands[memmodel_pos]), + true); + output_asm_insn (asm_template, operands); + nvptx_output_barrier (&operands[mem_pos], INTVAL (operands[memmodel_pos]), + false); + return ""; +} + static void nvptx_print_operand (FILE *, rtx, int); /* Output INSN, which is a call to CALLEE with result RESULT. For ptx, this @@ -2660,6 +2707,36 @@ nvptx_print_operand (FILE *file, rtx x, int code) switch (code) { + case 'B': + if (SYMBOL_REF_P (XEXP (x, 0))) + switch (SYMBOL_DATA_AREA (XEXP (x, 0))) + { + case DATA_AREA_GENERIC: + /* Assume worst-case: global. */ + gcc_fallthrough (); /* FALLTHROUGH. */ + case DATA_AREA_GLOBAL: + break; + case DATA_AREA_SHARED: + fputs (".cta", file); + return; + case DATA_AREA_LOCAL: + case DATA_AREA_CONST: + case DATA_AREA_PARAM: + default: + gcc_unreachable (); + } + + /* There are 2 cases where membar.sys differs from membar.gl: + - host accesses global memory (f.i. systemwide atomics) + - 2 or more devices are setup in peer-to-peer mode, and one + peer can access global memory of other peer. + Neither are currently supported by openMP/OpenACC on nvptx, but + that could change, so we default to membar.sys. We could support + this more optimally by adding DATA_AREA_SYS and then emitting + .gl for DATA_AREA_GLOBAL and .sys for DATA_AREA_SYS. */ + fputs (".sys", file); + return; + case 'A': x = XEXP (x, 0); gcc_fallthrough (); /* FALLTHROUGH. */ diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md index 00bb8fe..108de1c 100644 --- a/gcc/config/nvptx/nvptx.md +++ b/gcc/config/nvptx/nvptx.md @@ -1642,7 +1642,11 @@ (set (match_dup 1) (unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS))] "" - "%.\\tatom%A1.cas.b%T0\\t%0, %1, %2, %3;" + { + const char *t + = "%.\\tatom%A1.cas.b%T0\\t%0, %1, %2, %3;"; + return nvptx_output_atomic_insn (t, operands, 1, 4); + } [(set_attr "atomic" "true")]) (define_insn "atomic_exchange" @@ -1654,7 +1658,11 @@ (set (match_dup 1) (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))] ;; input "" - "%.\\tatom%A1.exch.b%T0\\t%0, %1, %2;" + { + const char *t + = "%.\tatom%A1.exch.b%T0\t%0, %1, %2;"; + return nvptx_output_atomic_insn (t, operands, 1, 3); + } [(set_attr "atomic" "true")]) (define_insn "atomic_fetch_add" @@ -1667,7 +1675,11 @@ (set (match_operand:SDIM 0 "nvptx_register_operand" "=R") (match_dup 1))] "" - "%.\\tatom%A1.add%t0\\t%0, %1, %2;" + { + const char *t + = "%.\\tatom%A1.add%t0\\t%0, %1, %2;"; + return nvptx_output_atomic_insn (t, operands, 1, 3); + } [(set_attr "atomic" "true")]) (define_insn "atomic_fetch_addsf" @@ -1680,7 +1692,11 @@ (set (match_operand:SF 0 "nvptx_register_operand" "=R") (match_dup 1))] "" - "%.\\tatom%A1.add%t0\\t%0, %1, %2;" + { + const char *t + = "%.\\tatom%A1.add%t0\\t%0, %1, %2;"; + return nvptx_output_atomic_insn (t, operands, 1, 3); + } [(set_attr "atomic" "true")]) (define_code_iterator any_logic [and ior xor]) @@ -1696,7 +1712,12 @@ (set (match_operand:SDIM 0 "nvptx_register_operand" "=R") (match_dup 1))] "mode == SImode || TARGET_SM35" - "%.\\tatom%A1.b%T0.\\t%0, %1, %2;" + { + const char *t + = "%.\\tatom%A1.b%T0.\\t%0, %1, %2;"; + return nvptx_output_atomic_insn (t, operands, 1, 3); + } + [(set_attr "atomic" "true")]) (define_expand "atomic_test_and_set" -- 2.7.4