From 3e7d4e82dc9fecb051e9ac422c312b26206d5ecd Mon Sep 17 00:00:00 2001 From: Tom de Vries Date: Thu, 13 Jan 2022 13:13:44 +0100 Subject: [PATCH] [nvptx] Handle pre-sm_7x shared atomic store using atomic exchange The ptx isa specifies (for pre-sm_7x) that atomic operations on shared memory locations do not guarantee atomicity with respect to normal store instructions to the same address. This can be fixed by: - inserting barriers between normal stores and atomic operations to a common address - using atom.exch to store to locations accessed by other atomic operations. It's not clearly spelled out which barriers are needed, and a barrier seem more expensive than atomic exchange. Implement the pre-sm_7x shared atomic store using atomic exchange. That includes stores using generic addressing, since those may also point to shared memory. Tested on x86-64 with nvptx accelerator. gcc/ChangeLog: 2022-02-02 Tom de Vries * config/nvptx/nvptx-protos.h (nvptx_mem_maybe_shared_p): Declare. * config/nvptx/nvptx.cc (nvptx_mem_data_area): New static function. (nvptx_mem_maybe_shared_p): New function. * config/nvptx/nvptx.md (define_expand "atomic_store"): New define_expand. gcc/testsuite/ChangeLog: 2022-02-02 Tom de Vries * gcc.target/nvptx/atomic-store-1.c: New test. * gcc.target/nvptx/atomic-store-3.c: New test. * gcc.target/nvptx/stack-atomics-run.c: Update. --- gcc/config/nvptx/nvptx-protos.h | 1 + gcc/config/nvptx/nvptx.cc | 22 ++++++++++++++++ gcc/config/nvptx/nvptx.md | 30 ++++++++++++++++++++++ gcc/testsuite/gcc.target/nvptx/atomic-store-1.c | 26 +++++++++++++++++++ gcc/testsuite/gcc.target/nvptx/atomic-store-3.c | 25 ++++++++++++++++++ gcc/testsuite/gcc.target/nvptx/stack-atomics-run.c | 6 ++++- 6 files changed, 109 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/nvptx/atomic-store-1.c create mode 100644 gcc/testsuite/gcc.target/nvptx/atomic-store-3.c diff --git a/gcc/config/nvptx/nvptx-protos.h b/gcc/config/nvptx/nvptx-protos.h index a846e34..0bf9af4 100644 --- a/gcc/config/nvptx/nvptx-protos.h +++ b/gcc/config/nvptx/nvptx-protos.h @@ -60,5 +60,6 @@ extern const char *nvptx_output_simt_exit (rtx); extern const char *nvptx_output_red_partition (rtx, rtx); extern const char *nvptx_output_atomic_insn (const char *, rtx *, int, int); extern bool nvptx_mem_local_p (rtx); +extern bool nvptx_mem_maybe_shared_p (const_rtx); #endif #endif diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc index 1b0227a..5b26c0f 100644 --- a/gcc/config/nvptx/nvptx.cc +++ b/gcc/config/nvptx/nvptx.cc @@ -76,6 +76,7 @@ #include "intl.h" #include "opts.h" #include "tree-pretty-print.h" +#include "rtl-iter.h" /* This file should be included last. */ #include "target-def.h" @@ -2787,6 +2788,27 @@ nvptx_print_operand_address (FILE *file, machine_mode mode, rtx addr) nvptx_print_address_operand (file, addr, mode); } +static nvptx_data_area +nvptx_mem_data_area (const_rtx x) +{ + gcc_assert (GET_CODE (x) == MEM); + + const_rtx addr = XEXP (x, 0); + subrtx_iterator::array_type array; + FOR_EACH_SUBRTX (iter, array, addr, ALL) + if (SYMBOL_REF_P (*iter)) + return SYMBOL_DATA_AREA (*iter); + + return DATA_AREA_GENERIC; +} + +bool +nvptx_mem_maybe_shared_p (const_rtx x) +{ + nvptx_data_area area = nvptx_mem_data_area (x); + return area == DATA_AREA_SHARED || area == DATA_AREA_GENERIC; +} + /* Print an operand, X, to FILE, with an optional modifier in CODE. Meaning of CODE: diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md index cced68e..1a283b4 100644 --- a/gcc/config/nvptx/nvptx.md +++ b/gcc/config/nvptx/nvptx.md @@ -2051,6 +2051,36 @@ } [(set_attr "atomic" "true")]) +(define_expand "atomic_store" + [(match_operand:SDIM 0 "memory_operand" "=m") ;; memory + (match_operand:SDIM 1 "nvptx_nonmemory_operand" "Ri") ;; input + (match_operand:SI 2 "const_int_operand")] ;; model + "" +{ + struct address_info info; + decompose_mem_address (&info, operands[0]); + if (info.base != NULL && REG_P (*info.base) + && REGNO_PTR_FRAME_P (REGNO (*info.base))) + { + emit_insn (gen_mov (operands[0], operands[1])); + DONE; + } + + if (TARGET_SM70) + /* Fall back to expand_atomic_store. */ + FAIL; + + bool maybe_shared_p = nvptx_mem_maybe_shared_p (operands[0]); + if (!maybe_shared_p) + /* Fall back to expand_atomic_store. */ + FAIL; + + rtx tmpreg = gen_reg_rtx (mode); + emit_insn (gen_atomic_exchange (tmpreg, operands[0], operands[1], + operands[2])); + DONE; +}) + (define_insn "atomic_fetch_add" [(set (match_operand:SDIM 1 "memory_operand" "+m") (unspec_volatile:SDIM diff --git a/gcc/testsuite/gcc.target/nvptx/atomic-store-1.c b/gcc/testsuite/gcc.target/nvptx/atomic-store-1.c new file mode 100644 index 0000000..cee3815 --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/atomic-store-1.c @@ -0,0 +1,26 @@ +/* Test the atomic store expansion for sm <= sm_6x targets, + shared state space. */ + +/* { dg-do compile } */ +/* { dg-options "-misa=sm_53" } */ + +enum memmodel +{ + MEMMODEL_SEQ_CST = 5 +}; + +unsigned int u32 __attribute__((shared)); +unsigned long long int u64 __attribute__((shared)); + +int +main() +{ + __atomic_store_n (&u32, 0, MEMMODEL_SEQ_CST); + __atomic_store_n (&u64, 0, MEMMODEL_SEQ_CST); + + return 0; +} + +/* { dg-final { scan-assembler-times "atom.shared.exch.b32" 1 } } */ +/* { dg-final { scan-assembler-times "atom.shared.exch.b64" 1 } } */ +/* { dg-final { scan-assembler-times "membar.cta" 4 } } */ diff --git a/gcc/testsuite/gcc.target/nvptx/atomic-store-3.c b/gcc/testsuite/gcc.target/nvptx/atomic-store-3.c new file mode 100644 index 0000000..cc0264f --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/atomic-store-3.c @@ -0,0 +1,25 @@ +/* Test the atomic store expansion, global state space. */ + +/* { dg-do compile } */ +/* { dg-additional-options "-Wno-long-long" } */ + +enum memmodel +{ + MEMMODEL_SEQ_CST = 5 +}; + +unsigned int u32; +unsigned long long int u64; + +int +main() +{ + __atomic_store_n (&u32, 0, MEMMODEL_SEQ_CST); + __atomic_store_n (&u64, 0, MEMMODEL_SEQ_CST); + + return 0; +} + +/* { dg-final { scan-assembler-times "st.global.u32" 1 } } */ +/* { dg-final { scan-assembler-times "st.global.u64" 1 } } */ +/* { dg-final { scan-assembler-times "membar.sys" 4 } } */ diff --git a/gcc/testsuite/gcc.target/nvptx/stack-atomics-run.c b/gcc/testsuite/gcc.target/nvptx/stack-atomics-run.c index ad8e2f8..cd04596 100644 --- a/gcc/testsuite/gcc.target/nvptx/stack-atomics-run.c +++ b/gcc/testsuite/gcc.target/nvptx/stack-atomics-run.c @@ -39,6 +39,10 @@ main (void) if (b != 1) __builtin_abort (); - + a = 1; + __atomic_store_n (&a, 0, MEMMODEL_RELAXED); + if (a != 0) + __builtin_abort (); + return 0; } -- 2.7.4