From 1bf767fb8e6066bd87560378c23185a412fb2538 Mon Sep 17 00:00:00 2001 From: Jonas Hahnfeld Date: Sun, 30 Sep 2018 09:23:21 +0000 Subject: [PATCH] [libomptarget-nvptx] Align data sharing stack NVPTX requires addresses of pointer locations to be 8-byte aligned or there will be an exception during runtime. This could happen without this patch as shown in the added test: getId() requires 4 byte of stack and putValueInParallel() uses 16 bytes to store the addresses of the captured variables. Differential Revision: https://reviews.llvm.org/D52655 llvm-svn: 343402 --- .../deviceRTLs/nvptx/src/data_sharing.cu | 7 +++ .../deviceRTLs/nvptx/test/data_sharing/alignment.c | 55 ++++++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 openmp/libomptarget/deviceRTLs/nvptx/test/data_sharing/alignment.c diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu index 2125c36..c7b9bdf 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu @@ -384,6 +384,13 @@ EXTERN void* __kmpc_data_sharing_push_stack(size_t DataSize, return omptarget_nvptx_SimpleThreadPrivateContext::Allocate(DataSize); } + // Add worst-case padding to DataSize so that future stack allocations are + // correctly aligned. + const size_t Alignment = 8; + if (DataSize % Alignment != 0) { + DataSize += (Alignment - DataSize % Alignment); + } + // Frame pointer must be visible to all workers in the same warp. unsigned WID = getWarpId(); void *&FrameP = DataSharingState.FramePtr[WID]; diff --git a/openmp/libomptarget/deviceRTLs/nvptx/test/data_sharing/alignment.c b/openmp/libomptarget/deviceRTLs/nvptx/test/data_sharing/alignment.c new file mode 100644 index 0000000..dd17ae7 --- /dev/null +++ b/openmp/libomptarget/deviceRTLs/nvptx/test/data_sharing/alignment.c @@ -0,0 +1,55 @@ +// RUN: %compile-run-and-check + +#include +#include + +#pragma omp declare target +static void putValueInParallel(int *ptr, int value) { + #pragma omp parallel + { + *ptr = value; + } +} + +static int getId() { + int id; + putValueInParallel(&id, omp_get_thread_num()); + return id; +} +#pragma omp end declare target + +const int MaxThreads = 1024; +const int Threads = 64; + +int main(int argc, char *argv[]) { + int master; + int check[MaxThreads]; + for (int i = 0; i < MaxThreads; i++) { + check[i] = 0; + } + + #pragma omp target map(master, check[:]) + { + master = getId(); + + #pragma omp parallel num_threads(Threads) + { + check[omp_get_thread_num()] = getId(); + } + } + + // CHECK: master = 0. + printf("master = %d.\n", master); + // CHECK-NOT: invalid + for (int i = 0; i < MaxThreads; i++) { + if (i < Threads) { + if (check[i] != i) { + printf("invalid: check[%d] should be %d, is %d\n", i, i, check[i]); + } + } else if (check[i] != 0) { + printf("invalid: check[%d] should be 0, is %d\n", i, check[i]); + } + } + + return 0; +} -- 2.7.4