From 4d50803ce49ce6b57c4865361c9ba0ad7063b7be Mon Sep 17 00:00:00 2001 From: Jon Chesterfield Date: Thu, 28 Oct 2021 12:33:25 +0100 Subject: [PATCH] [libomptarget] Build DeviceRTL for amdgpu Passes same tests as the current deviceRTL. Includes cmake change from D111987. CI is showing a different set of pass/fails to local, committing this without the tests enabled by default while debugging that difference. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D112227 --- clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp | 2 +- openmp/libomptarget/DeviceRTL/CMakeLists.txt | 3 +- .../libomptarget/DeviceRTL/src/Configuration.cpp | 4 +- .../libomptarget/DeviceRTL/src/Synchronization.cpp | 76 ++++++++++++++++++++-- .../libomptarget/test/mapping/data_member_ref.cpp | 1 + .../declare_mapper_nested_default_mappers.cpp | 1 + .../test/mapping/declare_mapper_nested_mappers.cpp | 1 + .../test/mapping/delete_inf_refcount.c | 1 + .../libomptarget/test/mapping/lambda_by_value.cpp | 1 + .../libomptarget/test/mapping/ompx_hold/struct.c | 1 + .../libomptarget/test/mapping/ptr_and_obj_motion.c | 1 + .../test/mapping/reduction_implicit_map.cpp | 1 + openmp/libomptarget/test/offloading/bug49021.cpp | 1 + openmp/libomptarget/test/offloading/bug49334.cpp | 2 +- openmp/libomptarget/test/offloading/bug50022.cpp | 1 + .../test/offloading/global_constructor.cpp | 1 + .../libomptarget/test/offloading/host_as_target.c | 1 + .../libomptarget/test/unified_shared_memory/api.c | 1 + .../test/unified_shared_memory/close_enter_exit.c | 1 + .../test/unified_shared_memory/close_modifier.c | 1 + .../test/unified_shared_memory/shared_update.c | 1 + 21 files changed, 90 insertions(+), 13 deletions(-) diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp index 5400e26..b138000 100644 --- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp @@ -252,7 +252,7 @@ void AMDGPUOpenMPToolChain::addClangTargetOptions( std::string BitcodeSuffix; if (DriverArgs.hasFlag(options::OPT_fopenmp_target_new_runtime, options::OPT_fno_openmp_target_new_runtime, false)) - BitcodeSuffix = "new-amdgcn-" + GPUArch; + BitcodeSuffix = "new-amdgpu-" + GPUArch; else BitcodeSuffix = "amdgcn-" + GPUArch; diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt index a4f9862..419c64d 100644 --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -226,6 +226,5 @@ foreach(sm ${nvptx_sm_list}) endforeach() foreach(mcpu ${amdgpu_mcpus}) - # require D112227 or similar to enable the compilation for amdgpu - # compileDeviceRTLLibrary(${mcpu} amdgpu -target amdgcn-amd-amdhsa -D__AMDGCN__ -fvisibility=default -nogpulib) + compileDeviceRTLLibrary(${mcpu} amdgpu -target amdgcn-amd-amdhsa -D__AMDGCN__ -fvisibility=default -nogpulib) endforeach() diff --git a/openmp/libomptarget/DeviceRTL/src/Configuration.cpp b/openmp/libomptarget/DeviceRTL/src/Configuration.cpp index 2b6f20f..f7c61dc 100644 --- a/openmp/libomptarget/DeviceRTL/src/Configuration.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Configuration.cpp @@ -20,9 +20,9 @@ using namespace _OMP; #pragma omp declare target -extern uint32_t __omp_rtl_debug_kind; +extern uint32_t __omp_rtl_debug_kind; // defined by CGOpenMPRuntimeGPU -// TOOD: We want to change the name as soon as the old runtime is gone. +// TODO: We want to change the name as soon as the old runtime is gone. DeviceEnvironmentTy CONSTANT(omptarget_device_environment) __attribute__((used)); diff --git a/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp b/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp index d09461a..931dffc 100644 --- a/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp @@ -68,8 +68,23 @@ uint64_t atomicAdd(uint64_t *Address, uint64_t Val, int Ordering) { ///{ #pragma omp begin declare variant match(device = {arch(amdgcn)}) -uint32_t atomicInc(uint32_t *Address, uint32_t Val, int Ordering) { - return __builtin_amdgcn_atomic_inc32(Address, Val, Ordering, ""); +uint32_t atomicInc(uint32_t *A, uint32_t V, int Ordering) { + // builtin_amdgcn_atomic_inc32 should expand to this switch when + // passed a runtime value, but does not do so yet. Workaround here. + switch (Ordering) { + default: + __builtin_unreachable(); + case __ATOMIC_RELAXED: + return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_RELAXED, ""); + case __ATOMIC_ACQUIRE: + return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_ACQUIRE, ""); + case __ATOMIC_RELEASE: + return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_RELEASE, ""); + case __ATOMIC_ACQ_REL: + return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_ACQ_REL, ""); + case __ATOMIC_SEQ_CST: + return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_SEQ_CST, ""); + } } uint32_t SHARED(namedBarrierTracker); @@ -126,6 +141,52 @@ void namedBarrier() { fence::team(__ATOMIC_RELEASE); } +// sema checking of amdgcn_fence is aggressive. Intention is to patch clang +// so that it is usable within a template environment and so that a runtime +// value of the memory order is expanded to this switch within clang/llvm. +void fenceTeam(int Ordering) { + switch (Ordering) { + default: + __builtin_unreachable(); + case __ATOMIC_ACQUIRE: + return __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "workgroup"); + case __ATOMIC_RELEASE: + return __builtin_amdgcn_fence(__ATOMIC_RELEASE, "workgroup"); + case __ATOMIC_ACQ_REL: + return __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, "workgroup"); + case __ATOMIC_SEQ_CST: + return __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup"); + } +} +void fenceKernel(int Ordering) { + switch (Ordering) { + default: + __builtin_unreachable(); + case __ATOMIC_ACQUIRE: + return __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent"); + case __ATOMIC_RELEASE: + return __builtin_amdgcn_fence(__ATOMIC_RELEASE, "agent"); + case __ATOMIC_ACQ_REL: + return __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, "agent"); + case __ATOMIC_SEQ_CST: + return __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "agent"); + } +} +void fenceSystem(int Ordering) { + switch (Ordering) { + default: + __builtin_unreachable(); + case __ATOMIC_ACQUIRE: + return __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, ""); + case __ATOMIC_RELEASE: + return __builtin_amdgcn_fence(__ATOMIC_RELEASE, ""); + case __ATOMIC_ACQ_REL: + return __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, ""); + case __ATOMIC_SEQ_CST: + return __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, ""); + } +} + void syncWarp(__kmpc_impl_lanemask_t) { // AMDGCN doesn't need to sync threads in a warp } @@ -133,11 +194,12 @@ void syncWarp(__kmpc_impl_lanemask_t) { void syncThreads() { __builtin_amdgcn_s_barrier(); } void syncThreadsAligned() { syncThreads(); } -void fenceTeam(int Ordering) { __builtin_amdgcn_fence(Ordering, "workgroup"); } - -void fenceKernel(int Ordering) { __builtin_amdgcn_fence(Ordering, "agent"); } - -void fenceSystem(int Ordering) { __builtin_amdgcn_fence(Ordering, ""); } +// TODO: Don't have wavefront lane locks. Possibly can't have them. +void unsetLock(omp_lock_t *) { __builtin_trap(); } +int testLock(omp_lock_t *) { __builtin_trap(); } +void initLock(omp_lock_t *) { __builtin_trap(); } +void destroyLock(omp_lock_t *) { __builtin_trap(); } +void setLock(omp_lock_t *) { __builtin_trap(); } #pragma omp end declare variant ///} diff --git a/openmp/libomptarget/test/mapping/data_member_ref.cpp b/openmp/libomptarget/test/mapping/data_member_ref.cpp index ec23890..dff5987 100644 --- a/openmp/libomptarget/test/mapping/data_member_ref.cpp +++ b/openmp/libomptarget/test/mapping/data_member_ref.cpp @@ -2,6 +2,7 @@ // amdgcn does not have printf definition // XFAIL: amdgcn-amd-amdhsa +// XFAIL: amdgcn-amd-amdhsa-newRTL #include diff --git a/openmp/libomptarget/test/mapping/declare_mapper_nested_default_mappers.cpp b/openmp/libomptarget/test/mapping/declare_mapper_nested_default_mappers.cpp index 7edd7db..7825d98 100644 --- a/openmp/libomptarget/test/mapping/declare_mapper_nested_default_mappers.cpp +++ b/openmp/libomptarget/test/mapping/declare_mapper_nested_default_mappers.cpp @@ -2,6 +2,7 @@ // amdgcn does not have printf definition // XFAIL: amdgcn-amd-amdhsa +// XFAIL: amdgcn-amd-amdhsa-newRTL #include #include diff --git a/openmp/libomptarget/test/mapping/declare_mapper_nested_mappers.cpp b/openmp/libomptarget/test/mapping/declare_mapper_nested_mappers.cpp index c8986dd..bf2addd 100644 --- a/openmp/libomptarget/test/mapping/declare_mapper_nested_mappers.cpp +++ b/openmp/libomptarget/test/mapping/declare_mapper_nested_mappers.cpp @@ -2,6 +2,7 @@ // amdgcn does not have printf definition // XFAIL: amdgcn-amd-amdhsa +// XFAIL: amdgcn-amd-amdhsa-newRTL #include #include diff --git a/openmp/libomptarget/test/mapping/delete_inf_refcount.c b/openmp/libomptarget/test/mapping/delete_inf_refcount.c index cd67ddd..c6d2bda 100644 --- a/openmp/libomptarget/test/mapping/delete_inf_refcount.c +++ b/openmp/libomptarget/test/mapping/delete_inf_refcount.c @@ -2,6 +2,7 @@ // fails with error message 'Unable to generate target entries' on amdgcn // XFAIL: amdgcn-amd-amdhsa +// XFAIL: amdgcn-amd-amdhsa-newRTL #include #include diff --git a/openmp/libomptarget/test/mapping/lambda_by_value.cpp b/openmp/libomptarget/test/mapping/lambda_by_value.cpp index 6e35324..9cd3833 100644 --- a/openmp/libomptarget/test/mapping/lambda_by_value.cpp +++ b/openmp/libomptarget/test/mapping/lambda_by_value.cpp @@ -2,6 +2,7 @@ // amdgcn does not have printf definition // XFAIL: amdgcn-amd-amdhsa +// XFAIL: amdgcn-amd-amdhsa-newRTL #include #include diff --git a/openmp/libomptarget/test/mapping/ompx_hold/struct.c b/openmp/libomptarget/test/mapping/ompx_hold/struct.c index 2a0626b..fc63e86 100644 --- a/openmp/libomptarget/test/mapping/ompx_hold/struct.c +++ b/openmp/libomptarget/test/mapping/ompx_hold/struct.c @@ -3,6 +3,7 @@ // amdgcn does not have printf definition // XFAIL: amdgcn-amd-amdhsa +// XFAIL: amdgcn-amd-amdhsa-newRTL #include #include diff --git a/openmp/libomptarget/test/mapping/ptr_and_obj_motion.c b/openmp/libomptarget/test/mapping/ptr_and_obj_motion.c index ddea2fb..4852561 100644 --- a/openmp/libomptarget/test/mapping/ptr_and_obj_motion.c +++ b/openmp/libomptarget/test/mapping/ptr_and_obj_motion.c @@ -2,6 +2,7 @@ // amdgcn does not have printf definition // XFAIL: amdgcn-amd-amdhsa +// XFAIL: amdgcn-amd-amdhsa-newRTL #include diff --git a/openmp/libomptarget/test/mapping/reduction_implicit_map.cpp b/openmp/libomptarget/test/mapping/reduction_implicit_map.cpp index 040accd..24b97bd 100644 --- a/openmp/libomptarget/test/mapping/reduction_implicit_map.cpp +++ b/openmp/libomptarget/test/mapping/reduction_implicit_map.cpp @@ -2,6 +2,7 @@ // amdgcn does not have printf definition // UNSUPPORTED: amdgcn-amd-amdhsa +// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL #include diff --git a/openmp/libomptarget/test/offloading/bug49021.cpp b/openmp/libomptarget/test/offloading/bug49021.cpp index 521adf2..1e456af 100644 --- a/openmp/libomptarget/test/offloading/bug49021.cpp +++ b/openmp/libomptarget/test/offloading/bug49021.cpp @@ -2,6 +2,7 @@ // Wrong results on amdgcn // UNSUPPORTED: amdgcn-amd-amdhsa +// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL #include diff --git a/openmp/libomptarget/test/offloading/bug49334.cpp b/openmp/libomptarget/test/offloading/bug49334.cpp index 0ba0815..4907d32 100644 --- a/openmp/libomptarget/test/offloading/bug49334.cpp +++ b/openmp/libomptarget/test/offloading/bug49334.cpp @@ -2,7 +2,7 @@ // Currently hangs on amdgpu // UNSUPPORTED: amdgcn-amd-amdhsa - +// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL // UNSUPPORTED: x86_64-pc-linux-gnu #include diff --git a/openmp/libomptarget/test/offloading/bug50022.cpp b/openmp/libomptarget/test/offloading/bug50022.cpp index a520442..ca1f0e1 100644 --- a/openmp/libomptarget/test/offloading/bug50022.cpp +++ b/openmp/libomptarget/test/offloading/bug50022.cpp @@ -1,6 +1,7 @@ // RUN: %libomptarget-compilexx-and-run-generic // UNSUPPORTED: amdgcn-amd-amdhsa +// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL #include #include diff --git a/openmp/libomptarget/test/offloading/global_constructor.cpp b/openmp/libomptarget/test/offloading/global_constructor.cpp index d73fe1a..ae602df 100644 --- a/openmp/libomptarget/test/offloading/global_constructor.cpp +++ b/openmp/libomptarget/test/offloading/global_constructor.cpp @@ -2,6 +2,7 @@ // Fails in DAGToDAG on an address space problem // UNSUPPORTED: amdgcn-amd-amdhsa +// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL #include #include diff --git a/openmp/libomptarget/test/offloading/host_as_target.c b/openmp/libomptarget/test/offloading/host_as_target.c index c25a480..1e7cdef 100644 --- a/openmp/libomptarget/test/offloading/host_as_target.c +++ b/openmp/libomptarget/test/offloading/host_as_target.c @@ -9,6 +9,7 @@ // amdgcn does not have printf definition // XFAIL: amdgcn-amd-amdhsa +// XFAIL: amdgcn-amd-amdhsa-newRTL #include #include diff --git a/openmp/libomptarget/test/unified_shared_memory/api.c b/openmp/libomptarget/test/unified_shared_memory/api.c index 7282491..fcb5318 100644 --- a/openmp/libomptarget/test/unified_shared_memory/api.c +++ b/openmp/libomptarget/test/unified_shared_memory/api.c @@ -4,6 +4,7 @@ // Fails on amdgcn with error: GPU Memory Error // XFAIL: amdgcn-amd-amdhsa +// XFAIL: amdgcn-amd-amdhsa-newRTL #include #include diff --git a/openmp/libomptarget/test/unified_shared_memory/close_enter_exit.c b/openmp/libomptarget/test/unified_shared_memory/close_enter_exit.c index e159ed8..62555d2 100644 --- a/openmp/libomptarget/test/unified_shared_memory/close_enter_exit.c +++ b/openmp/libomptarget/test/unified_shared_memory/close_enter_exit.c @@ -5,6 +5,7 @@ // Fails on amdgcn with error: GPU Memory Error // XFAIL: amdgcn-amd-amdhsa +// XFAIL: amdgcn-amd-amdhsa-newRTL #include #include diff --git a/openmp/libomptarget/test/unified_shared_memory/close_modifier.c b/openmp/libomptarget/test/unified_shared_memory/close_modifier.c index 6667fd8..98f1322 100644 --- a/openmp/libomptarget/test/unified_shared_memory/close_modifier.c +++ b/openmp/libomptarget/test/unified_shared_memory/close_modifier.c @@ -5,6 +5,7 @@ // amdgcn does not have printf definition // XFAIL: amdgcn-amd-amdhsa +// XFAIL: amdgcn-amd-amdhsa-newRTL #include #include diff --git a/openmp/libomptarget/test/unified_shared_memory/shared_update.c b/openmp/libomptarget/test/unified_shared_memory/shared_update.c index ab9b3e8..2b90cf3 100644 --- a/openmp/libomptarget/test/unified_shared_memory/shared_update.c +++ b/openmp/libomptarget/test/unified_shared_memory/shared_update.c @@ -4,6 +4,7 @@ // amdgcn does not have printf definition // XFAIL: amdgcn-amd-amdhsa +// XFAIL: amdgcn-amd-amdhsa-newRTL #include #include -- 2.7.4