nvptx: Add support for subword compare-and-swap
authorKwok Cheung Yeung <kcy@codesourcery.com>
Mon, 3 Aug 2020 15:38:13 +0000 (17:38 +0200)
committerTom de Vries <tdevries@suse.de>
Thu, 13 Aug 2020 09:11:55 +0000 (11:11 +0200)
This adds support for __sync_val_compare_and_swap and
__sync_bool_compare_and_swap for 1-byte and 2-byte long
values, which are not natively supported on nvptx.

Build and reg-tested on nvptx.
Build and reg-tested libgomp on x86_64 with nvptx accelerator.

2020-07-16  Kwok Cheung Yeung  <kcy@codesourcery.com>

libgcc/
* config/nvptx/atomic.c: New.
* config/nvptx/t-nvptx (LIB2ADD): Add atomic.c.

gcc/testsuite/
* gcc.target/nvptx/ia64-sync-5.c: New.

libgomp/
* testsuite/libgomp.c-c++-common/reduction-16.c: New.

gcc/testsuite/gcc.target/nvptx/ia64-sync-5.c [new file with mode: 0644]
libgcc/config/nvptx/atomic.c [new file with mode: 0644]
libgcc/config/nvptx/t-nvptx
libgomp/testsuite/libgomp.c-c++-common/reduction-16.c [new file with mode: 0644]

diff --git a/gcc/testsuite/gcc.target/nvptx/ia64-sync-5.c b/gcc/testsuite/gcc.target/nvptx/ia64-sync-5.c
new file mode 100644 (file)
index 0000000..ec40f2c
--- /dev/null
@@ -0,0 +1,2 @@
+/* { dg-do run } */
+#include "../../gcc.dg/ia64-sync-5.c"
diff --git a/libgcc/config/nvptx/atomic.c b/libgcc/config/nvptx/atomic.c
new file mode 100644 (file)
index 0000000..e1ea078
--- /dev/null
@@ -0,0 +1,73 @@
+/* NVPTX atomic operations
+   Copyright (C) 2020 Free Software Foundation, Inc.
+   Contributed by Mentor Graphics.
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <stdbool.h>
+
+/* Implement __sync_val_compare_and_swap and __sync_bool_compare_and_swap
+   for 1 and 2-byte values (which are not natively supported) in terms of
+   __sync_val_compare_and_swap for 4-byte values (which is supported).
+   This assumes that the contents of the word surrounding the subword
+   value that we are interested in are accessible as well (which should
+   normally be the case).  Note that if the contents of the word surrounding
+   the subword changes between the __sync_val_compare_and_swap_4 and the
+   preceeding load of oldword, while the subword does not, the implementation
+   loops, which may manifest worst-case as a hang.  */
+
+#define __SYNC_SUBWORD_COMPARE_AND_SWAP(TYPE, SIZE)                         \
+                                                                            \
+TYPE                                                                        \
+__sync_val_compare_and_swap_##SIZE (TYPE *ptr, TYPE oldval, TYPE newval)     \
+{                                                                           \
+  unsigned int *wordptr = (unsigned int *)((__UINTPTR_TYPE__ ) ptr & ~3UL);  \
+  int shift = ((__UINTPTR_TYPE__ ) ptr & 3UL) * 8;                          \
+  unsigned int valmask = (1 << (SIZE * 8)) - 1;                                     \
+  unsigned int wordmask = ~(valmask << shift);                              \
+  unsigned int oldword = *wordptr;                                          \
+  for (;;)                                                                  \
+    {                                                                       \
+      TYPE prevval = (oldword >> shift) & valmask;                          \
+      /* Exit if the subword value previously read from memory is not */     \
+      /* equal to the expected value OLDVAL.  */                            \
+      if (__builtin_expect (prevval != oldval, 0))                          \
+       return prevval;                                                      \
+      unsigned int newword = oldword & wordmask;                            \
+      newword |= ((unsigned int) newval) << shift;                          \
+      unsigned int prevword                                                 \
+         = __sync_val_compare_and_swap_4 (wordptr, oldword, newword);       \
+      /* Exit only if the compare-and-swap succeeds on the whole word */     \
+      /* (i.e. the contents of *WORDPTR have not changed since the last */   \
+      /* memory read).  */                                                  \
+      if (__builtin_expect (prevword == oldword, 1))                        \
+       return oldval;                                                       \
+      oldword = prevword;                                                   \
+    }                                                                       \
+}                                                                           \
+                                                                            \
+bool                                                                        \
+__sync_bool_compare_and_swap_##SIZE (TYPE *ptr, TYPE oldval, TYPE newval)    \
+{                                                                           \
+  return __sync_val_compare_and_swap_##SIZE (ptr, oldval, newval) == oldval; \
+}
+
+__SYNC_SUBWORD_COMPARE_AND_SWAP (unsigned char, 1)
+__SYNC_SUBWORD_COMPARE_AND_SWAP (unsigned short, 2)
index c4d20c9..ede0bf0 100644 (file)
@@ -1,5 +1,6 @@
 LIB2ADD=$(srcdir)/config/nvptx/reduction.c \
-       $(srcdir)/config/nvptx/mgomp.c
+       $(srcdir)/config/nvptx/mgomp.c \
+       $(srcdir)/config/nvptx/atomic.c
 
 LIB2ADDEH=
 LIB2FUNCS_EXCLUDE=__main
diff --git a/libgomp/testsuite/libgomp.c-c++-common/reduction-16.c b/libgomp/testsuite/libgomp.c-c++-common/reduction-16.c
new file mode 100644 (file)
index 0000000..d0e82b0
--- /dev/null
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+#define N 512
+
+#define GENERATE_TEST(T)       \
+int test_##T (void)            \
+{                              \
+  T a[N], res = 0;             \
+                               \
+  for (int i = 0; i < N; ++i)  \
+    a[i] = i & 1;              \
+                               \
+_Pragma("omp target teams distribute reduction(||:res) defaultmap(tofrom:scalar)") \
+  for (int i = 0; i < N; ++i)  \
+    res = res || a[i];         \
+                               \
+  /* res should be non-zero.  */\
+  if (!res)                    \
+    return 1;                  \
+                               \
+_Pragma("omp target teams distribute reduction(&&:res) defaultmap(tofrom:scalar)") \
+  for (int i = 0; i < N; ++i)  \
+    res = res && a[i];         \
+                               \
+  /* res should be zero.  */   \
+  return res;                  \
+}
+
+GENERATE_TEST(char)
+GENERATE_TEST(short)
+GENERATE_TEST(int)
+GENERATE_TEST(long)
+#ifdef __SIZEOF_INT128__
+GENERATE_TEST(__int128)
+#endif
+
+int main(void)
+{
+  if (test_char ())
+    abort ();
+  if (test_short ())
+    abort ();
+  if (test_int ())
+    abort ();
+  if (test_long ())
+    abort ();
+#ifdef __SIZEOF_INT128__
+  if (test___int128 ())
+    abort ();
+#endif
+}