From 09bc755deaa69b1377a8c050131f67cd276a51f3 Mon Sep 17 00:00:00 2001 From: Jon Chesterfield Date: Thu, 22 Oct 2020 15:02:44 +0100 Subject: [PATCH] [OpenMP] Emit calls to int64_t functions for amdgcn [OpenMP] Emit calls to int64_t functions for amdgcn Two functions, syncwarp and active_thread_mask, return lanemask_t. Currently this is assumed to be int32, which is true for nvptx. Patch makes the type target architecture dependent. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D89746 --- llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h | 3 +++ llvm/include/llvm/Frontend/OpenMP/OMPKinds.def | 6 +++-- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 9 +++++++ llvm/test/Transforms/OpenMP/add_attributes.ll | 16 +++++++++++++ .../Transforms/OpenMP/add_attributes_amdgcn.ll | 28 ++++++++++++++++++++++ 5 files changed, 60 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Transforms/OpenMP/add_attributes_amdgcn.ll diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 3228c1b..e814de6 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -226,6 +226,9 @@ public: omp::IdentFlag Flags = omp::IdentFlag(0), unsigned Reserve2Flags = 0); + // Get the type corresponding to __kmpc_impl_lanemask_t from the deviceRTL + Type *getLanemaskType(); + /// Generate control flow and cleanup for cancellation. /// /// \param CancelFlag Flag indicating if the cancellation is performed. diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index 337e942..b7a1558 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -153,6 +153,7 @@ __OMP_TYPE(Int32Ptr) __OMP_TYPE(Int64Ptr) OMP_TYPE(SizeTy, M.getDataLayout().getIntPtrType(Ctx)) +OMP_TYPE(LanemaskTy, getLanemaskType()) #define __OMP_PTR_TYPE(NAME, BASE) OMP_TYPE(NAME, BASE->getPointerTo()) @@ -553,8 +554,9 @@ __OMP_RTL(__kmpc_get_team_static_memory, false, Void, Int16, VoidPtr, SizeTy, Int16, VoidPtrPtr) __OMP_RTL(__kmpc_restore_team_static_memory, false, Void, Int16, Int16) __OMP_RTL(__kmpc_barrier_simple_spmd, false, Void, IdentPtr, Int32) -__OMP_RTL(__kmpc_warp_active_thread_mask, false, Int32, ) -__OMP_RTL(__kmpc_syncwarp, false, Void, Int32) + +__OMP_RTL(__kmpc_warp_active_thread_mask, false, LanemaskTy,) +__OMP_RTL(__kmpc_syncwarp, false, Void, LanemaskTy) __OMP_RTL(__last, false, Void, ) diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index f5b7e4e..dafa45c 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" #include "llvm/IR/CFG.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/IRBuilder.h" @@ -217,6 +218,14 @@ Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr, return Ident; } +Type *OpenMPIRBuilder::getLanemaskType() { + LLVMContext &Ctx = M.getContext(); + Triple triple(M.getTargetTriple()); + + // This test is adequate until deviceRTL has finer grained lane widths + return triple.isAMDGCN() ? Type::getInt64Ty(Ctx) : Type::getInt32Ty(Ctx); +} + Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr) { Constant *&SrcLocStr = SrcLocStrMap[LocStr]; if (!SrcLocStr) { diff --git a/llvm/test/Transforms/OpenMP/add_attributes.ll b/llvm/test/Transforms/OpenMP/add_attributes.ll index cf1bd24..a448117 100644 --- a/llvm/test/Transforms/OpenMP/add_attributes.ll +++ b/llvm/test/Transforms/OpenMP/add_attributes.ll @@ -629,6 +629,10 @@ declare void @__kmpc_destroy_allocator(i32, i8*) declare void @__kmpc_push_target_tripcount(i64, i64) +declare i32 @__kmpc_warp_active_thread_mask() + +declare void @__kmpc_syncwarp(i32) + declare i32 @__tgt_target_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**) declare i32 @__tgt_target_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**) @@ -1142,6 +1146,12 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; CHECK: ; Function Attrs: nounwind ; CHECK-NEXT: declare void @__kmpc_push_target_tripcount(i64, i64) +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare i32 @__kmpc_warp_active_thread_mask() + +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare void @__kmpc_syncwarp(i32) + ; CHECK: ; Function Attrs: nounwind ; CHECK-NEXT: declare i32 @__tgt_target_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**) @@ -1661,6 +1671,12 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly ; OPTIMISTIC-NEXT: declare void @__kmpc_push_target_tripcount(i64, i64) +; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC-NEXT: declare i32 @__kmpc_warp_active_thread_mask() + +; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC-NEXT: declare void @__kmpc_syncwarp(i32) + ; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare i32 @__tgt_target_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**) diff --git a/llvm/test/Transforms/OpenMP/add_attributes_amdgcn.ll b/llvm/test/Transforms/OpenMP/add_attributes_amdgcn.ll new file mode 100644 index 0000000..acf547d --- /dev/null +++ b/llvm/test/Transforms/OpenMP/add_attributes_amdgcn.ll @@ -0,0 +1,28 @@ +; RUN: opt < %s -S -openmpopt | FileCheck %s +; RUN: opt < %s -S -passes=openmpopt | FileCheck %s +; RUN: opt < %s -S -openmpopt -openmp-ir-builder-optimistic-attributes | FileCheck %s --check-prefix=OPTIMISTIC +; RUN: opt < %s -S -passes=openmpopt -openmp-ir-builder-optimistic-attributes | FileCheck %s --check-prefix=OPTIMISTIC + +target triple = "amdgcn-amd-amdhsa" + +define void @call_all(i64 %arg) { + call void @__kmpc_syncwarp(i64 %arg) + call i64 @__kmpc_warp_active_thread_mask() + ret void +} + +declare i64 @__kmpc_warp_active_thread_mask() + +declare void @__kmpc_syncwarp(i64) + +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare i64 @__kmpc_warp_active_thread_mask() + +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare void @__kmpc_syncwarp(i64) + +; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC-NEXT: declare i64 @__kmpc_warp_active_thread_mask() + +; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC-NEXT: declare void @__kmpc_syncwarp(i64) -- 2.7.4