From 90eaedda9b8ef46e2c0c1b8bce33e98a3adbb68c Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Mon, 28 Sep 2020 10:23:14 -0400 Subject: [PATCH] [OpenMP] Replace OpenMP RTL Functions With OMPIRBuilder and OMPKinds.def Summary: Replace the OpenMP Runtime Library functions used in CGOpenMPRuntimeGPU for OpenMP device code generation with ones in OMPKinds.def and use OMPIRBuilder for generating runtime calls. This allows us to consolidate more OpenMP code generation into the OMPIRBuilder. This patch also invalidates specifying target architectures with conflicting pointer sizes. Reviewers: jdoerfert Subscribers: aaron.ballman cfe-commits guansong llvm-commits sstefan1 yaxunl Tags: #OpenMP #Clang #LLVM Differential Revision: https://reviews.llvm.org/D88430 --- clang/lib/CodeGen/CGOpenMPRuntime.h | 5 +- clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 590 +++++-------------------- clang/lib/CodeGen/CodeGenModule.h | 10 - clang/test/OpenMP/nvptx_parallel_codegen.cpp | 8 +- llvm/include/llvm/Frontend/OpenMP/OMPKinds.def | 55 ++- llvm/test/Transforms/OpenMP/add_attributes.ll | 338 +++++++------- 6 files changed, 319 insertions(+), 687 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index 41fa9f5..e39c2e1 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -306,6 +306,9 @@ protected: CodeGenModule &CGM; StringRef FirstSeparator, Separator; + /// An OpenMP-IR-Builder instance. + llvm::OpenMPIRBuilder OMPBuilder; + /// Constructor allowing to redefine the name separator for the variables. explicit CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, StringRef Separator); @@ -386,8 +389,6 @@ protected: llvm::Value *getCriticalRegionLock(StringRef CriticalName); private: - /// An OpenMP-IR-Builder instance. - llvm::OpenMPIRBuilder OMPBuilder; /// Map for SourceLocation and OpenMP runtime library debug locations. typedef llvm::DenseMap OpenMPDebugLocMapTy; diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index d9ef6c2..dbd24d33c 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -28,96 +28,6 @@ using namespace CodeGen; using namespace llvm::omp; namespace { -enum OpenMPRTLFunctionNVPTX { - /// Call to void __kmpc_kernel_init(kmp_int32 thread_limit, - /// int16_t RequiresOMPRuntime); - OMPRTL_NVPTX__kmpc_kernel_init, - /// Call to void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized); - OMPRTL_NVPTX__kmpc_kernel_deinit, - /// Call to void __kmpc_spmd_kernel_init(kmp_int32 thread_limit, - /// int16_t RequiresOMPRuntime, int16_t RequiresDataSharing); - OMPRTL_NVPTX__kmpc_spmd_kernel_init, - /// Call to void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime); - OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2, - /// Call to void __kmpc_kernel_prepare_parallel(void - /// *outlined_function); - OMPRTL_NVPTX__kmpc_kernel_prepare_parallel, - /// Call to bool __kmpc_kernel_parallel(void **outlined_function); - OMPRTL_NVPTX__kmpc_kernel_parallel, - /// Call to void __kmpc_kernel_end_parallel(); - OMPRTL_NVPTX__kmpc_kernel_end_parallel, - /// Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 - /// global_tid); - OMPRTL_NVPTX__kmpc_serialized_parallel, - /// Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 - /// global_tid); - OMPRTL_NVPTX__kmpc_end_serialized_parallel, - /// Call to int32_t __kmpc_shuffle_int32(int32_t element, - /// int16_t lane_offset, int16_t warp_size); - OMPRTL_NVPTX__kmpc_shuffle_int32, - /// Call to int64_t __kmpc_shuffle_int64(int64_t element, - /// int16_t lane_offset, int16_t warp_size); - OMPRTL_NVPTX__kmpc_shuffle_int64, - /// Call to __kmpc_nvptx_parallel_reduce_nowait_v2(ident_t *loc, kmp_int32 - /// global_tid, kmp_int32 num_vars, size_t reduce_size, void* reduce_data, - /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t - /// lane_offset, int16_t shortCircuit), - /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num)); - OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2, - /// Call to __kmpc_nvptx_teams_reduce_nowait_v2(ident_t *loc, kmp_int32 - /// global_tid, void *global_buffer, int32_t num_of_records, void* - /// reduce_data, - /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t - /// lane_offset, int16_t shortCircuit), - /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num), void - /// (*kmp_ListToGlobalCpyFctPtr)(void *buffer, int idx, void *reduce_data), - /// void (*kmp_GlobalToListCpyFctPtr)(void *buffer, int idx, - /// void *reduce_data), void (*kmp_GlobalToListCpyPtrsFctPtr)(void *buffer, - /// int idx, void *reduce_data), void (*kmp_GlobalToListRedFctPtr)(void - /// *buffer, int idx, void *reduce_data)); - OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2, - /// Call to __kmpc_nvptx_end_reduce_nowait(int32_t global_tid); - OMPRTL_NVPTX__kmpc_end_reduce_nowait, - /// Call to void __kmpc_data_sharing_init_stack(); - OMPRTL_NVPTX__kmpc_data_sharing_init_stack, - /// Call to void __kmpc_data_sharing_init_stack_spmd(); - OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd, - /// Call to void* __kmpc_data_sharing_coalesced_push_stack(size_t size, - /// int16_t UseSharedMemory); - OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack, - /// Call to void* __kmpc_data_sharing_push_stack(size_t size, int16_t - /// UseSharedMemory); - OMPRTL_NVPTX__kmpc_data_sharing_push_stack, - /// Call to void __kmpc_data_sharing_pop_stack(void *a); - OMPRTL_NVPTX__kmpc_data_sharing_pop_stack, - /// Call to void __kmpc_begin_sharing_variables(void ***args, - /// size_t n_args); - OMPRTL_NVPTX__kmpc_begin_sharing_variables, - /// Call to void __kmpc_end_sharing_variables(); - OMPRTL_NVPTX__kmpc_end_sharing_variables, - /// Call to void __kmpc_get_shared_variables(void ***GlobalArgs) - OMPRTL_NVPTX__kmpc_get_shared_variables, - /// Call to uint16_t __kmpc_parallel_level(ident_t *loc, kmp_int32 - /// global_tid); - OMPRTL_NVPTX__kmpc_parallel_level, - /// Call to int8_t __kmpc_is_spmd_exec_mode(); - OMPRTL_NVPTX__kmpc_is_spmd_exec_mode, - /// Call to void __kmpc_get_team_static_memory(int16_t isSPMDExecutionMode, - /// const void *buf, size_t size, int16_t is_shared, const void **res); - OMPRTL_NVPTX__kmpc_get_team_static_memory, - /// Call to void __kmpc_restore_team_static_memory(int16_t - /// isSPMDExecutionMode, int16_t is_shared); - OMPRTL_NVPTX__kmpc_restore_team_static_memory, - /// Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); - OMPRTL__kmpc_barrier, - /// Call to void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32 - /// global_tid); - OMPRTL__kmpc_barrier_simple_spmd, - /// Call to int32_t __kmpc_warp_active_thread_mask(void); - OMPRTL_NVPTX__kmpc_warp_active_thread_mask, - /// Call to void __kmpc_syncwarp(int32_t Mask); - OMPRTL_NVPTX__kmpc_syncwarp, -}; /// Pre(post)-action for different OpenMP constructs specialized for NVPTX. class NVPTXActionTy final : public PrePostActionTy { @@ -1243,13 +1153,13 @@ void CGOpenMPRuntimeGPU::emitNonSPMDEntryHeader(CodeGenFunction &CGF, // TODO: Optimize runtime initialization and pass in correct value. llvm::Value *Args[] = {getThreadLimit(CGF), Bld.getInt16(/*RequiresOMPRuntime=*/1)}; - CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_init), Args); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_kernel_init), + Args); // For data sharing, we need to initialize the stack. - CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_data_sharing_init_stack)); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_data_sharing_init_stack)); emitGenericVarsProlog(CGF, WST.Loc); } @@ -1272,8 +1182,9 @@ void CGOpenMPRuntimeGPU::emitNonSPMDEntryFooter(CodeGenFunction &CGF, // Signal termination condition. // TODO: Optimize runtime initialization and pass in correct value. llvm::Value *Args[] = {CGF.Builder.getInt16(/*IsOMPRuntimeInitialized=*/1)}; - CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_deinit), Args); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_kernel_deinit), + Args); // Barrier to terminate worker threads. syncCTAThreads(CGF); // Master thread jumps to exit point. @@ -1347,13 +1258,14 @@ void CGOpenMPRuntimeGPU::emitSPMDEntryHeader( /*RequiresOMPRuntime=*/ Bld.getInt16(RequiresFullRuntime ? 1 : 0), /*RequiresDataSharing=*/Bld.getInt16(0)}; - CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_init), Args); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_spmd_kernel_init), + Args); if (RequiresFullRuntime) { // For data sharing, we need to initialize the stack. - CGF.EmitRuntimeCall(createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd)); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_data_sharing_init_stack_spmd)); } CGF.EmitBranch(ExecuteBB); @@ -1379,9 +1291,9 @@ void CGOpenMPRuntimeGPU::emitSPMDEntryFooter(CodeGenFunction &CGF, // DeInitialize the OMP state in the runtime; called by all active threads. llvm::Value *Args[] = {/*RequiresOMPRuntime=*/ CGF.Builder.getInt16(RequiresFullRuntime ? 1 : 0)}; - CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2), Args); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_spmd_kernel_deinit_v2), + Args); CGF.EmitBranch(EST.ExitBB); CGF.EmitBlock(EST.ExitBB); @@ -1415,7 +1327,7 @@ void CGOpenMPRuntimeGPU::emitWorkerFunction(WorkerFunctionState &WST) { } void CGOpenMPRuntimeGPU::emitWorkerLoop(CodeGenFunction &CGF, - WorkerFunctionState &WST) { + WorkerFunctionState &WST) { // // The workers enter this loop and wait for parallel work from the master. // When the master encounters a parallel region it sets up the work + variable @@ -1450,8 +1362,10 @@ void CGOpenMPRuntimeGPU::emitWorkerLoop(CodeGenFunction &CGF, // TODO: Optimize runtime initialization and pass in correct value. llvm::Value *Args[] = {WorkFn.getPointer()}; - llvm::Value *Ret = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_parallel), Args); + llvm::Value *Ret = + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_kernel_parallel), + Args); Bld.CreateStore(Bld.CreateZExt(Ret, CGF.Int8Ty), ExecStatus); // On termination condition (workid == 0), exit loop. @@ -1516,9 +1430,9 @@ void CGOpenMPRuntimeGPU::emitWorkerLoop(CodeGenFunction &CGF, // Signal end of parallel region. CGF.EmitBlock(TerminateBB); - CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_end_parallel), - llvm::None); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_kernel_end_parallel), + llvm::None); CGF.EmitBranch(BarrierBB); // All active and inactive workers wait at a barrier after parallel region. @@ -1533,328 +1447,6 @@ void CGOpenMPRuntimeGPU::emitWorkerLoop(CodeGenFunction &CGF, clearLocThreadIdInsertPt(CGF); } -/// Returns specified OpenMP runtime function for the current OpenMP -/// implementation. Specialized for the NVPTX device. -/// \param Function OpenMP runtime function. -/// \return Specified function. -llvm::FunctionCallee -CGOpenMPRuntimeGPU::createNVPTXRuntimeFunction(unsigned Function) { - llvm::FunctionCallee RTLFn = nullptr; - switch (static_cast(Function)) { - case OMPRTL_NVPTX__kmpc_kernel_init: { - // Build void __kmpc_kernel_init(kmp_int32 thread_limit, int16_t - // RequiresOMPRuntime); - llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_init"); - break; - } - case OMPRTL_NVPTX__kmpc_kernel_deinit: { - // Build void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized); - llvm::Type *TypeParams[] = {CGM.Int16Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_deinit"); - break; - } - case OMPRTL_NVPTX__kmpc_spmd_kernel_init: { - // Build void __kmpc_spmd_kernel_init(kmp_int32 thread_limit, - // int16_t RequiresOMPRuntime, int16_t RequiresDataSharing); - llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_init"); - break; - } - case OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2: { - // Build void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime); - llvm::Type *TypeParams[] = {CGM.Int16Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_deinit_v2"); - break; - } - case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: { - /// Build void __kmpc_kernel_prepare_parallel( - /// void *outlined_function); - llvm::Type *TypeParams[] = {CGM.Int8PtrTy}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_prepare_parallel"); - break; - } - case OMPRTL_NVPTX__kmpc_kernel_parallel: { - /// Build bool __kmpc_kernel_parallel(void **outlined_function); - llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy}; - llvm::Type *RetTy = CGM.getTypes().ConvertType(CGM.getContext().BoolTy); - auto *FnTy = - llvm::FunctionType::get(RetTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_parallel"); - break; - } - case OMPRTL_NVPTX__kmpc_kernel_end_parallel: { - /// Build void __kmpc_kernel_end_parallel(); - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_end_parallel"); - break; - } - case OMPRTL_NVPTX__kmpc_serialized_parallel: { - // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 - // global_tid); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); - break; - } - case OMPRTL_NVPTX__kmpc_end_serialized_parallel: { - // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 - // global_tid); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); - break; - } - case OMPRTL_NVPTX__kmpc_shuffle_int32: { - // Build int32_t __kmpc_shuffle_int32(int32_t element, - // int16_t lane_offset, int16_t warp_size); - llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int32"); - break; - } - case OMPRTL_NVPTX__kmpc_shuffle_int64: { - // Build int64_t __kmpc_shuffle_int64(int64_t element, - // int16_t lane_offset, int16_t warp_size); - llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int16Ty, CGM.Int16Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int64"); - break; - } - case OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2: { - // Build int32_t kmpc_nvptx_parallel_reduce_nowait_v2(ident_t *loc, - // kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void* - // reduce_data, void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t - // lane_id, int16_t lane_offset, int16_t Algorithm Version), void - // (*kmp_InterWarpCopyFctPtr)(void* src, int warp_num)); - llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty, - CGM.Int16Ty, CGM.Int16Ty}; - auto *ShuffleReduceFnTy = - llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams, - /*isVarArg=*/false); - llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty}; - auto *InterWarpCopyFnTy = - llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams, - /*isVarArg=*/false); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), - CGM.Int32Ty, - CGM.Int32Ty, - CGM.SizeTy, - CGM.VoidPtrTy, - ShuffleReduceFnTy->getPointerTo(), - InterWarpCopyFnTy->getPointerTo()}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction( - FnTy, /*Name=*/"__kmpc_nvptx_parallel_reduce_nowait_v2"); - break; - } - case OMPRTL_NVPTX__kmpc_end_reduce_nowait: { - // Build __kmpc_end_reduce_nowait(kmp_int32 global_tid); - llvm::Type *TypeParams[] = {CGM.Int32Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction( - FnTy, /*Name=*/"__kmpc_nvptx_end_reduce_nowait"); - break; - } - case OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2: { - // Build int32_t __kmpc_nvptx_teams_reduce_nowait_v2(ident_t *loc, kmp_int32 - // global_tid, void *global_buffer, int32_t num_of_records, void* - // reduce_data, - // void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t - // lane_offset, int16_t shortCircuit), - // void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num), void - // (*kmp_ListToGlobalCpyFctPtr)(void *buffer, int idx, void *reduce_data), - // void (*kmp_GlobalToListCpyFctPtr)(void *buffer, int idx, - // void *reduce_data), void (*kmp_GlobalToListCpyPtrsFctPtr)(void *buffer, - // int idx, void *reduce_data), void (*kmp_GlobalToListRedFctPtr)(void - // *buffer, int idx, void *reduce_data)); - llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty, - CGM.Int16Ty, CGM.Int16Ty}; - auto *ShuffleReduceFnTy = - llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams, - /*isVarArg=*/false); - llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty}; - auto *InterWarpCopyFnTy = - llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams, - /*isVarArg=*/false); - llvm::Type *GlobalListTypeParams[] = {CGM.VoidPtrTy, CGM.IntTy, - CGM.VoidPtrTy}; - auto *GlobalListFnTy = - llvm::FunctionType::get(CGM.VoidTy, GlobalListTypeParams, - /*isVarArg=*/false); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), - CGM.Int32Ty, - CGM.VoidPtrTy, - CGM.Int32Ty, - CGM.VoidPtrTy, - ShuffleReduceFnTy->getPointerTo(), - InterWarpCopyFnTy->getPointerTo(), - GlobalListFnTy->getPointerTo(), - GlobalListFnTy->getPointerTo(), - GlobalListFnTy->getPointerTo(), - GlobalListFnTy->getPointerTo()}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction( - FnTy, /*Name=*/"__kmpc_nvptx_teams_reduce_nowait_v2"); - break; - } - case OMPRTL_NVPTX__kmpc_data_sharing_init_stack: { - /// Build void __kmpc_data_sharing_init_stack(); - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack"); - break; - } - case OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd: { - /// Build void __kmpc_data_sharing_init_stack_spmd(); - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false); - RTLFn = - CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack_spmd"); - break; - } - case OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack: { - // Build void *__kmpc_data_sharing_coalesced_push_stack(size_t size, - // int16_t UseSharedMemory); - llvm::Type *TypeParams[] = {CGM.SizeTy, CGM.Int16Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction( - FnTy, /*Name=*/"__kmpc_data_sharing_coalesced_push_stack"); - break; - } - case OMPRTL_NVPTX__kmpc_data_sharing_push_stack: { - // Build void *__kmpc_data_sharing_push_stack(size_t size, int16_t - // UseSharedMemory); - llvm::Type *TypeParams[] = {CGM.SizeTy, CGM.Int16Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction( - FnTy, /*Name=*/"__kmpc_data_sharing_push_stack"); - break; - } - case OMPRTL_NVPTX__kmpc_data_sharing_pop_stack: { - // Build void __kmpc_data_sharing_pop_stack(void *a); - llvm::Type *TypeParams[] = {CGM.VoidPtrTy}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, - /*Name=*/"__kmpc_data_sharing_pop_stack"); - break; - } - case OMPRTL_NVPTX__kmpc_begin_sharing_variables: { - /// Build void __kmpc_begin_sharing_variables(void ***args, - /// size_t n_args); - llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy->getPointerTo(), CGM.SizeTy}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_begin_sharing_variables"); - break; - } - case OMPRTL_NVPTX__kmpc_end_sharing_variables: { - /// Build void __kmpc_end_sharing_variables(); - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_sharing_variables"); - break; - } - case OMPRTL_NVPTX__kmpc_get_shared_variables: { - /// Build void __kmpc_get_shared_variables(void ***GlobalArgs); - llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy->getPointerTo()}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_get_shared_variables"); - break; - } - case OMPRTL_NVPTX__kmpc_parallel_level: { - // Build uint16_t __kmpc_parallel_level(ident_t *loc, kmp_int32 global_tid); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int16Ty, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_parallel_level"); - break; - } - case OMPRTL_NVPTX__kmpc_is_spmd_exec_mode: { - // Build int8_t __kmpc_is_spmd_exec_mode(); - auto *FnTy = llvm::FunctionType::get(CGM.Int8Ty, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_is_spmd_exec_mode"); - break; - } - case OMPRTL_NVPTX__kmpc_get_team_static_memory: { - // Build void __kmpc_get_team_static_memory(int16_t isSPMDExecutionMode, - // const void *buf, size_t size, int16_t is_shared, const void **res); - llvm::Type *TypeParams[] = {CGM.Int16Ty, CGM.VoidPtrTy, CGM.SizeTy, - CGM.Int16Ty, CGM.VoidPtrPtrTy}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_get_team_static_memory"); - break; - } - case OMPRTL_NVPTX__kmpc_restore_team_static_memory: { - // Build void __kmpc_restore_team_static_memory(int16_t isSPMDExecutionMode, - // int16_t is_shared); - llvm::Type *TypeParams[] = {CGM.Int16Ty, CGM.Int16Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); - RTLFn = - CGM.CreateRuntimeFunction(FnTy, "__kmpc_restore_team_static_memory"); - break; - } - case OMPRTL__kmpc_barrier: { - // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = - CGM.CreateConvergentRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); - break; - } - case OMPRTL__kmpc_barrier_simple_spmd: { - // Build void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32 - // global_tid); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateConvergentRuntimeFunction( - FnTy, /*Name*/ "__kmpc_barrier_simple_spmd"); - break; - } - case OMPRTL_NVPTX__kmpc_warp_active_thread_mask: { - // Build int32_t __kmpc_warp_active_thread_mask(void); - auto *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, llvm::None, /*isVarArg=*/false); - RTLFn = CGM.CreateConvergentRuntimeFunction(FnTy, "__kmpc_warp_active_thread_mask"); - break; - } - case OMPRTL_NVPTX__kmpc_syncwarp: { - // Build void __kmpc_syncwarp(kmp_int32 Mask); - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, CGM.Int32Ty, /*isVarArg=*/false); - RTLFn = CGM.CreateConvergentRuntimeFunction(FnTy, "__kmpc_syncwarp"); - break; - } - } - return RTLFn; -} - void CGOpenMPRuntimeGPU::createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t, @@ -2157,12 +1749,14 @@ void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF, llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); llvm::Value *ThreadID = getThreadID(CGF, Loc); llvm::Value *PL = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level), + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), + OMPRTL___kmpc_parallel_level), {RTLoc, ThreadID}); IsTTD = Bld.CreateIsNull(PL); } - llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode))); + llvm::Value *IsSPMD = Bld.CreateIsNotNull( + CGF.EmitNounwindRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_is_spmd_exec_mode))); Bld.CreateCondBr(IsSPMD, SPMDBB, NonSPMDBB); // There is no need to emit line number for unconditional branch. (void)ApplyDebugLocation::CreateEmpty(CGF); @@ -2196,8 +1790,8 @@ void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF, llvm::Value *GlobalRecordSizeArg[] = { Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)}; llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_data_sharing_coalesced_push_stack), GlobalRecordSizeArg); GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( GlobalRecValue, GlobalRecPtrTy); @@ -2259,9 +1853,10 @@ void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF, CGM.Int16Ty, getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD ? 1 : 0), StaticGlobalized, Ld, IsInSharedMemory, ResAddr}; - CGF.EmitRuntimeCall(createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_get_team_static_memory), - GlobalRecordSizeArg); + CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_get_team_static_memory), + GlobalRecordSizeArg); GlobalizedRecords.back().Buffer = StaticGlobalized; GlobalizedRecords.back().RecSize = RecSize; GlobalizedRecords.back().UseSharedMemory = UseSharedMemory; @@ -2288,10 +1883,10 @@ void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF, llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize), CGF.Builder.getInt16(UseSharedMemory ? 1 : 0)}; llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction( - IsInTTDRegion - ? OMPRTL_NVPTX__kmpc_data_sharing_push_stack - : OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), + IsInTTDRegion ? OMPRTL___kmpc_data_sharing_push_stack + : OMPRTL___kmpc_data_sharing_coalesced_push_stack), GlobalRecordSizeArg); GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( GlobalRecValue, GlobalRecPtrTy); @@ -2390,8 +1985,8 @@ void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF, llvm::Value *GlobalRecordSizeArg[] = { Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)}; llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_data_sharing_coalesced_push_stack), GlobalRecordSizeArg); llvm::Value *GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( GlobalRecValue, CGF.ConvertTypeForMem(VD->getType())->getPointerTo()); @@ -2419,7 +2014,8 @@ void CGOpenMPRuntimeGPU::emitGenericVarsEpilog(CodeGenFunction &CGF, for (llvm::Value *Addr : llvm::reverse(I->getSecond().EscapedVariableLengthDeclsAddrs)) { CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_pop_stack), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_data_sharing_pop_stack), Addr); } if (I->getSecond().GlobalRecordAddr) { @@ -2434,8 +2030,8 @@ void CGOpenMPRuntimeGPU::emitGenericVarsEpilog(CodeGenFunction &CGF, (void)ApplyDebugLocation::CreateEmpty(CGF); CGF.EmitBlock(NonSPMDBB); CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_data_sharing_pop_stack), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_data_sharing_pop_stack), CGF.EmitCastToVoidPtr(I->getSecond().GlobalRecordAddr)); CGF.EmitBlock(ExitBB); } else if (!CGM.getLangOpts().OpenMPCUDATargetParallel && IsInTTDRegion) { @@ -2456,14 +2052,15 @@ void CGOpenMPRuntimeGPU::emitGenericVarsEpilog(CodeGenFunction &CGF, getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD ? 1 : 0), IsInSharedMemory}; CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_restore_team_static_memory), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_restore_team_static_memory), Args); } } else { - CGF.EmitRuntimeCall(createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_data_sharing_pop_stack), - I->getSecond().GlobalRecordAddr); + CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_data_sharing_pop_stack), + I->getSecond().GlobalRecordAddr); } } } @@ -2535,9 +2132,11 @@ void CGOpenMPRuntimeGPU::emitNonSPMDParallelCall( llvm::Value *Args[] = {RTLoc, ThreadID}; NVPTXActionTy Action( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_serialized_parallel), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_serialized_parallel), Args, - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_serialized_parallel), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_end_serialized_parallel), Args); RCG.setAction(Action); RCG(CGF); @@ -2553,7 +2152,8 @@ void CGOpenMPRuntimeGPU::emitNonSPMDParallelCall( // Prepare for parallel region. Indicate the outlined function. llvm::Value *Args[] = {ID}; CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_kernel_prepare_parallel), Args); // Create a private scope that will globalize the arguments @@ -2570,9 +2170,10 @@ void CGOpenMPRuntimeGPU::emitNonSPMDParallelCall( llvm::Value *DataSharingArgs[] = { SharedArgsPtr, llvm::ConstantInt::get(CGM.SizeTy, CapturedVars.size())}; - CGF.EmitRuntimeCall(createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_begin_sharing_variables), - DataSharingArgs); + CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_begin_sharing_variables), + DataSharingArgs); // Store variable address in a list of references to pass to workers. unsigned Idx = 0; @@ -2606,8 +2207,8 @@ void CGOpenMPRuntimeGPU::emitNonSPMDParallelCall( syncCTAThreads(CGF); if (!CapturedVars.empty()) - CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_sharing_variables)); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_end_sharing_variables)); // Remember for post-processing in worker loop. Work.emplace_back(WFn); @@ -2631,8 +2232,9 @@ void CGOpenMPRuntimeGPU::emitNonSPMDParallelCall( llvm::BasicBlock *SeqBB = CGF.createBasicBlock(".sequential"); llvm::BasicBlock *ParallelCheckBB = CGF.createBasicBlock(".parcheck"); llvm::BasicBlock *MasterBB = CGF.createBasicBlock(".master"); - llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode))); + llvm::Value *IsSPMD = Bld.CreateIsNotNull( + CGF.EmitNounwindRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_is_spmd_exec_mode))); Bld.CreateCondBr(IsSPMD, SeqBB, ParallelCheckBB); // There is no need to emit line number for unconditional branch. (void)ApplyDebugLocation::CreateEmpty(CGF); @@ -2640,7 +2242,8 @@ void CGOpenMPRuntimeGPU::emitNonSPMDParallelCall( llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); llvm::Value *ThreadID = getThreadID(CGF, Loc); llvm::Value *PL = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level), + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), + OMPRTL___kmpc_parallel_level), {RTLoc, ThreadID}); llvm::Value *Res = Bld.CreateIsNotNull(PL); Bld.CreateCondBr(Res, SeqBB, MasterBB); @@ -2704,9 +2307,11 @@ void CGOpenMPRuntimeGPU::emitSPMDParallelCall( llvm::Value *Args[] = {RTLoc, ThreadID}; NVPTXActionTy Action( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_serialized_parallel), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_serialized_parallel), Args, - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_serialized_parallel), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_end_serialized_parallel), Args); RCG.setAction(Action); RCG(CGF); @@ -2736,9 +2341,9 @@ void CGOpenMPRuntimeGPU::syncCTAThreads(CodeGenFunction &CGF) { llvm::ConstantPointerNull::get( cast(getIdentTyPointerTy())), llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/0, /*isSigned=*/true)}; - llvm::CallInst *Call = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier_simple_spmd), Args); - Call->setConvergent(); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_barrier_simple_spmd), + Args); } void CGOpenMPRuntimeGPU::emitBarrierCall(CodeGenFunction &CGF, @@ -2752,9 +2357,10 @@ void CGOpenMPRuntimeGPU::emitBarrierCall(CodeGenFunction &CGF, unsigned Flags = getDefaultFlagsForBarriers(Kind); llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), getThreadID(CGF, Loc)}; - llvm::CallInst *Call = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier), Args); - Call->setConvergent(); + + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_barrier), + Args); } void CGOpenMPRuntimeGPU::emitCriticalRegion( @@ -2770,8 +2376,8 @@ void CGOpenMPRuntimeGPU::emitCriticalRegion( auto &RT = static_cast(CGF.CGM.getOpenMPRuntime()); // Get the mask of active threads in the warp. - llvm::Value *Mask = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_warp_active_thread_mask)); + llvm::Value *Mask = CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_warp_active_thread_mask)); // Fetch team-local id of the thread. llvm::Value *ThreadID = RT.getGPUThreadID(CGF); @@ -2813,8 +2419,9 @@ void CGOpenMPRuntimeGPU::emitCriticalRegion( // counter variable and returns to the loop. CGF.EmitBlock(SyncBB); // Reconverge active threads in the warp. - (void)CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_syncwarp), Mask); + (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_syncwarp), + Mask); llvm::Value *IncCounterVal = CGF.Builder.CreateNSWAdd(CounterVal, CGF.Builder.getInt32(1)); @@ -2864,14 +2471,15 @@ static llvm::Value *createRuntimeShuffleFunction(CodeGenFunction &CGF, CGBuilderTy &Bld = CGF.Builder; CGOpenMPRuntimeGPU &RT = *(static_cast(&CGM.getOpenMPRuntime())); + llvm::OpenMPIRBuilder &OMPBuilder = RT.getOMPBuilder(); CharUnits Size = CGF.getContext().getTypeSizeInChars(ElemType); assert(Size.getQuantity() <= 8 && "Unsupported bitwidth in shuffle instruction."); - OpenMPRTLFunctionNVPTX ShuffleFn = Size.getQuantity() <= 4 - ? OMPRTL_NVPTX__kmpc_shuffle_int32 - : OMPRTL_NVPTX__kmpc_shuffle_int64; + RuntimeFunction ShuffleFn = Size.getQuantity() <= 4 + ? OMPRTL___kmpc_shuffle_int32 + : OMPRTL___kmpc_shuffle_int64; // Cast all types to 32- or 64-bit values before calling shuffle routines. QualType CastTy = CGF.getContext().getIntTypeForBitwidth( @@ -2881,7 +2489,8 @@ static llvm::Value *createRuntimeShuffleFunction(CodeGenFunction &CGF, Bld.CreateIntCast(RT.getGPUWarpSize(CGF), CGM.Int16Ty, /*isSigned=*/true); llvm::Value *ShuffledVal = CGF.EmitRuntimeCall( - RT.createNVPTXRuntimeFunction(ShuffleFn), {ElemCast, Offset, WarpSize}); + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), ShuffleFn), + {ElemCast, Offset, WarpSize}); return castValueToType(CGF, ShuffledVal, CastTy, ElemType, Loc); } @@ -4391,8 +4000,8 @@ void CGOpenMPRuntimeGPU::emitReduction( InterWarpCopyFn}; Res = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2), Args); } else { assert(TeamsReduction && "expected teams reduction."); @@ -4441,8 +4050,8 @@ void CGOpenMPRuntimeGPU::emitReduction( BufferToGlobalRedFn}; Res = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2), Args); } @@ -4477,7 +4086,8 @@ void CGOpenMPRuntimeGPU::emitReduction( RegionCodeGenTy RCG(CodeGen); NVPTXActionTy Action( nullptr, llvm::None, - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_reduce_nowait), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_nvptx_end_reduce_nowait), EndArgs); RCG.setAction(Action); RCG(CGF); @@ -4488,7 +4098,7 @@ void CGOpenMPRuntimeGPU::emitReduction( const VarDecl * CGOpenMPRuntimeGPU::translateParameter(const FieldDecl *FD, - const VarDecl *NativeParam) const { + const VarDecl *NativeParam) const { if (!NativeParam->getType()->isReferenceType()) return NativeParam; QualType ArgType = NativeParam->getType(); @@ -4638,9 +4248,9 @@ llvm::Function *CGOpenMPRuntimeGPU::createParallelDataSharingWrapper( CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy, "global_args"); llvm::Value *GlobalArgsPtr = GlobalArgs.getPointer(); llvm::Value *DataSharingArgs[] = {GlobalArgsPtr}; - CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_get_shared_variables), - DataSharingArgs); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_get_shared_variables), + DataSharingArgs); // Retrieve the shared variables from the list of references returned // by the runtime. Pass the variables to the outlined function. diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index 19085b5..088ed28 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -1068,16 +1068,6 @@ public: llvm::AttributeList ExtraAttrs = llvm::AttributeList(), bool Local = false, bool AssumeConvergent = false); - /// Create or return a runtime function declaration with the specified type - /// and name. This will automatically add the convergent attribute to the - /// function declaration. - llvm::FunctionCallee CreateConvergentRuntimeFunction( - llvm::FunctionType *Ty, StringRef Name, - llvm::AttributeList ExtraAttrs = llvm::AttributeList(), - bool Local = false) { - return CreateRuntimeFunction(Ty, Name, ExtraAttrs, Local, true); - } - /// Create a new runtime global variable with the specified type and name. llvm::Constant *CreateRuntimeVariable(llvm::Type *Ty, StringRef Name); diff --git a/clang/test/OpenMP/nvptx_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_codegen.cpp index ad25e0d..bd9c988 100644 --- a/clang/test/OpenMP/nvptx_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_parallel_codegen.cpp @@ -91,7 +91,7 @@ int bar(int n){ // CHECK: br label {{%?}}[[AWAIT_WORK:.+]] // // CHECK: [[AWAIT_WORK]] -// CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) #[[#CONVERGENT:]] +// CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]]) // CHECK: [[KPRB:%.+]] = zext i1 [[KPR]] to i8 // store i8 [[KPRB]], i8* [[OMP_EXEC_STATUS]], align 1 @@ -321,10 +321,10 @@ int bar(int n){ // CHECK: define internal void [[PARALLEL_FN4]]( // CHECK: [[A:%.+]] = alloca i[[SZ:32|64]], // CHECK: store i[[SZ]] 45, i[[SZ]]* %a, -// CHECK: call void @__kmpc_barrier(%struct.ident_t* @{{.+}}, i32 %{{.+}}) #[[#CONVERGENT:]] +// CHECK: call void @__kmpc_barrier(%struct.ident_t* @{{.+}}, i32 %{{.+}}) // CHECK: ret void -// CHECK: declare void @__kmpc_barrier(%struct.ident_t*, i32) #[[#CONVERGENT]] +// CHECK: declare void @__kmpc_barrier(%struct.ident_t*, i32) #[[#CONVERGENT:]] // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l58}}_worker() // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l58}}( @@ -377,6 +377,6 @@ int bar(int n){ // CHECK: declare i32 @__kmpc_warp_active_thread_mask() #[[#CONVERGENT:]] // CHECK: declare void @__kmpc_syncwarp(i32) #[[#CONVERGENT:]] -// CHECK: attributes #[[#CONVERGENT]] = {{.*}} convergent {{.*}} +// CHECK: attributes #[[#CONVERGENT:]] = {{.*}} convergent {{.*}} #endif diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index e93f836..ff5e69d 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -220,6 +220,9 @@ __OMP_FUNCTION_TYPE(KmpcDtor, false, Void, VoidPtr) __OMP_FUNCTION_TYPE(KmpcCopyCtor, false, VoidPtr, VoidPtr, VoidPtr) __OMP_FUNCTION_TYPE(TaskRoutineEntry, false, Int32, Int32, /* kmp_task_t */ VoidPtr) +__OMP_FUNCTION_TYPE(ShuffleReduce, false, Void, VoidPtr, Int16, Int16, Int16) +__OMP_FUNCTION_TYPE(InterWarpCopy, false, Void, VoidPtr, Int32) +__OMP_FUNCTION_TYPE(GlobalList, false, Void, VoidPtr, Int32, VoidPtr) #undef __OMP_FUNCTION_TYPE #undef OMP_FUNCTION_TYPE @@ -311,8 +314,6 @@ __OMP_RTL(__kmpc_omp_taskyield, false, Int32, IdentPtr, Int32, /* Int */ Int32) __OMP_RTL(__kmpc_push_num_threads, false, Void, IdentPtr, Int32, /* Int */ Int32) __OMP_RTL(__kmpc_push_proc_bind, false, Void, IdentPtr, Int32, /* Int */ Int32) -__OMP_RTL(__kmpc_serialized_parallel, false, Void, IdentPtr, Int32) -__OMP_RTL(__kmpc_end_serialized_parallel, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_omp_reg_task_with_affinity, false, Int32, IdentPtr, Int32, /* kmp_task_t */ VoidPtr, Int32, /* kmp_task_affinity_info_t */ VoidPtr) @@ -518,17 +519,42 @@ __OMP_RTL(__tgt_push_mapper_component, false, Void, VoidPtr, VoidPtr, VoidPtr, __OMP_RTL(__kmpc_task_allow_completion_event, false, VoidPtr, IdentPtr, /* Int */ Int32, /* kmp_task_t */ VoidPtr) +/// OpenMP Device runtime functions +__OMP_RTL(__kmpc_kernel_init, false, Void, Int32, Int16) +__OMP_RTL(__kmpc_kernel_deinit, false, Void, Int16) +__OMP_RTL(__kmpc_spmd_kernel_init, false, Void, Int32, Int16, Int16) +__OMP_RTL(__kmpc_spmd_kernel_deinit_v2, false, Void, Int16) +__OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr) +__OMP_RTL(__kmpc_kernel_parallel, false, Int1, VoidPtrPtr) +__OMP_RTL(__kmpc_kernel_end_parallel, false, Void, ) +__OMP_RTL(__kmpc_serialized_parallel, false, Void, IdentPtr, Int32) +__OMP_RTL(__kmpc_end_serialized_parallel, false, Void, IdentPtr, Int32) +__OMP_RTL(__kmpc_shuffle_int32, false, Int32, Int32, Int16, Int16) +__OMP_RTL(__kmpc_nvptx_parallel_reduce_nowait_v2, false, Int32, IdentPtr, Int32, + Int32, SizeTy, VoidPtr, ShuffleReducePtr, InterWarpCopyPtr) +__OMP_RTL(__kmpc_nvptx_end_reduce_nowait, false, Void, Int32) +__OMP_RTL(__kmpc_nvptx_teams_reduce_nowait_v2, false, Int32, IdentPtr, Int32, + VoidPtr, Int32, VoidPtr, ShuffleReducePtr, InterWarpCopyPtr, + GlobalListPtr, GlobalListPtr, GlobalListPtr, GlobalListPtr) + +__OMP_RTL(__kmpc_shuffle_int64, false, Int64, Int64, Int16, Int16) __OMP_RTL(__kmpc_data_sharing_init_stack, false, Void, ) -__OMP_RTL(__kmpc_data_sharing_init_stack_spmd, false, Void, ) -__OMP_RTL(__kmpc_data_sharing_coalesced_push_stack, false, VoidPtr, SizeTy, - Int16) +__OMP_RTL(__kmpc_data_sharing_init_stack_spmd, false, Void, ) + +__OMP_RTL(__kmpc_data_sharing_coalesced_push_stack, false, VoidPtr, SizeTy, Int16) __OMP_RTL(__kmpc_data_sharing_push_stack, false, VoidPtr, SizeTy, Int16) __OMP_RTL(__kmpc_data_sharing_pop_stack, false, Void, VoidPtr) - -/// Note that device runtime functions (in the following) do not necessarily -/// need attributes as we expect to see the definitions. -__OMP_RTL(__kmpc_kernel_parallel, false, Int1, VoidPtrPtr) -__OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr) +__OMP_RTL(__kmpc_begin_sharing_variables, false, Void, VoidPtrPtrPtr, SizeTy) +__OMP_RTL(__kmpc_end_sharing_variables, false, Void, ) +__OMP_RTL(__kmpc_get_shared_variables, false, Void, VoidPtrPtrPtr) +__OMP_RTL(__kmpc_parallel_level, false, Int16, IdentPtr, Int32) +__OMP_RTL(__kmpc_is_spmd_exec_mode, false, Int8, ) +__OMP_RTL(__kmpc_get_team_static_memory, false, Void, Int16, VoidPtr, SizeTy, + Int16, VoidPtrPtr) +__OMP_RTL(__kmpc_restore_team_static_memory, false, Void, Int16, Int16) +__OMP_RTL(__kmpc_barrier_simple_spmd, false, Void, IdentPtr, Int32) +__OMP_RTL(__kmpc_warp_active_thread_mask, false, Int32, ) +__OMP_RTL(__kmpc_syncwarp, false, Void, Int32) __OMP_RTL(__last, false, Void, ) @@ -577,8 +603,8 @@ __OMP_ATTRS_SET(DefaultAttrs, __OMP_ATTRS_SET(BarrierAttrs, OptimisticAttributes - ? AttributeSet(EnumAttr(NoUnwind)) - : AttributeSet(EnumAttr(NoUnwind))) + ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(Convergent)) + : AttributeSet(EnumAttr(NoUnwind), EnumAttr(Convergent))) __OMP_ATTRS_SET(InaccessibleArgOnlyAttrs, OptimisticAttributes @@ -650,6 +676,11 @@ __OMP_ATTRS_SET(ReturnAlignedPtrAttrs, __OMP_RTL_ATTRS(__kmpc_barrier, BarrierAttrs, AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_barrier_simple_spmd, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_warp_active_thread_mask, BarrierAttrs, AttributeSet(), + ParamAttrs()) +__OMP_RTL_ATTRS(__kmpc_syncwarp, BarrierAttrs, AttributeSet(), ParamAttrs()) __OMP_RTL_ATTRS(__kmpc_cancel, InaccessibleArgOnlyAttrs, AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs)) __OMP_RTL_ATTRS(__kmpc_cancel_barrier, BarrierAttrs, AttributeSet(), diff --git a/llvm/test/Transforms/OpenMP/add_attributes.ll b/llvm/test/Transforms/OpenMP/add_attributes.ll index e92447d7..cf1bd24 100644 --- a/llvm/test/Transforms/OpenMP/add_attributes.ll +++ b/llvm/test/Transforms/OpenMP/add_attributes.ll @@ -888,313 +888,313 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; CHECK: declare dso_local i32 @omp_pause_resource_all(i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare dso_local i32 @omp_get_supported_active_levels() #0 +; CHECK-NEXT: declare dso_local i32 @omp_get_supported_active_levels() -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_barrier(%struct.ident_t*, i32) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare void @__kmpc_barrier(%struct.ident_t*, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_cancel(%struct.ident_t*, i32, i32) #0 +; CHECK-NEXT: declare i32 @__kmpc_cancel(%struct.ident_t*, i32, i32) -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_cancel_barrier(%struct.ident_t*, i32) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare i32 @__kmpc_cancel_barrier(%struct.ident_t*, i32) -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_flush(%struct.ident_t*) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare void @__kmpc_flush(%struct.ident_t*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #0 +; CHECK-NEXT: declare i32 @__kmpc_global_thread_num(%struct.ident_t*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) #0 +; CHECK-NEXT: declare void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_omp_taskwait(%struct.ident_t*, i32) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare i32 @__kmpc_omp_taskwait(%struct.ident_t*, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_omp_taskyield(%struct.ident_t*, i32, i32) #0 +; CHECK-NEXT: declare i32 @__kmpc_omp_taskyield(%struct.ident_t*, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_push_num_threads(%struct.ident_t*, i32, i32) #0 +; CHECK-NEXT: declare void @__kmpc_push_num_threads(%struct.ident_t*, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_push_proc_bind(%struct.ident_t*, i32, i32) #0 +; CHECK-NEXT: declare void @__kmpc_push_proc_bind(%struct.ident_t*, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_serialized_parallel(%struct.ident_t*, i32) #0 +; CHECK-NEXT: declare void @__kmpc_serialized_parallel(%struct.ident_t*, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_end_serialized_parallel(%struct.ident_t*, i32) #0 +; CHECK-NEXT: declare void @__kmpc_end_serialized_parallel(%struct.ident_t*, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_master(%struct.ident_t*, i32) #0 +; CHECK-NEXT: declare i32 @__kmpc_master(%struct.ident_t*, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_end_master(%struct.ident_t*, i32) #0 +; CHECK-NEXT: declare void @__kmpc_end_master(%struct.ident_t*, i32) -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_critical(%struct.ident_t*, i32, [8 x i32]*) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare void @__kmpc_critical(%struct.ident_t*, i32, [8 x i32]*) -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_critical_with_hint(%struct.ident_t*, i32, [8 x i32]*, i32) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare void @__kmpc_critical_with_hint(%struct.ident_t*, i32, [8 x i32]*, i32) -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_end_critical(%struct.ident_t*, i32, [8 x i32]*) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare void @__kmpc_end_critical(%struct.ident_t*, i32, [8 x i32]*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_begin(%struct.ident_t*, i32) #0 +; CHECK-NEXT: declare void @__kmpc_begin(%struct.ident_t*, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_end(%struct.ident_t*) #0 +; CHECK-NEXT: declare void @__kmpc_end(%struct.ident_t*) -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_reduce(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare i32 @__kmpc_reduce(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*) -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_reduce_nowait(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare i32 @__kmpc_reduce_nowait(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*) -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_end_reduce(%struct.ident_t*, i32, [8 x i32]*) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare void @__kmpc_end_reduce(%struct.ident_t*, i32, [8 x i32]*) -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_end_reduce_nowait(%struct.ident_t*, i32, [8 x i32]*) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare void @__kmpc_end_reduce_nowait(%struct.ident_t*, i32, [8 x i32]*) -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_ordered(%struct.ident_t*, i32) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare void @__kmpc_ordered(%struct.ident_t*, i32) -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_end_ordered(%struct.ident_t*, i32) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare void @__kmpc_end_ordered(%struct.ident_t*, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32) #0 +; CHECK-NEXT: declare void @__kmpc_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_for_static_init_4u(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32) #0 +; CHECK-NEXT: declare void @__kmpc_for_static_init_4u(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_for_static_init_8(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64, i64) #0 +; CHECK-NEXT: declare void @__kmpc_for_static_init_8(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64, i64) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_for_static_init_8u(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64, i64) #0 +; CHECK-NEXT: declare void @__kmpc_for_static_init_8u(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64, i64) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_for_static_fini(%struct.ident_t*, i32) #0 +; CHECK-NEXT: declare void @__kmpc_for_static_fini(%struct.ident_t*, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_team_static_init_4(%struct.ident_t*, i32, i32*, i32*, i32*, i32*, i32, i32) #0 +; CHECK-NEXT: declare void @__kmpc_team_static_init_4(%struct.ident_t*, i32, i32*, i32*, i32*, i32*, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_team_static_init_4u(%struct.ident_t*, i32, i32*, i32*, i32*, i32*, i32, i32) #0 +; CHECK-NEXT: declare void @__kmpc_team_static_init_4u(%struct.ident_t*, i32, i32*, i32*, i32*, i32*, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_team_static_init_8(%struct.ident_t*, i32, i32*, i64*, i64*, i64*, i64, i64) #0 +; CHECK-NEXT: declare void @__kmpc_team_static_init_8(%struct.ident_t*, i32, i32*, i64*, i64*, i64*, i64, i64) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_team_static_init_8u(%struct.ident_t*, i32, i32*, i64*, i64*, i64*, i64, i64) #0 +; CHECK-NEXT: declare void @__kmpc_team_static_init_8u(%struct.ident_t*, i32, i32*, i64*, i64*, i64*, i64, i64) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32*, i32, i32) #0 +; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32*, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_4u(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32*, i32, i32) #0 +; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_4u(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32*, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_8(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64*, i64, i64) #0 +; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_8(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64*, i64, i64) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_8u(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64*, i64, i64) #0 +; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_8u(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64*, i64, i64) -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_single(%struct.ident_t*, i32) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare i32 @__kmpc_single(%struct.ident_t*, i32) -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_end_single(%struct.ident_t*, i32) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare void @__kmpc_end_single(%struct.ident_t*, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_omp_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*) #0 +; CHECK-NEXT: declare i8* @__kmpc_omp_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_omp_task(%struct.ident_t*, i32, i8*) #0 +; CHECK-NEXT: declare i32 @__kmpc_omp_task(%struct.ident_t*, i32, i8*) -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_end_taskgroup(%struct.ident_t*, i32) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare void @__kmpc_end_taskgroup(%struct.ident_t*, i32) -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_taskgroup(%struct.ident_t*, i32) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare void @__kmpc_taskgroup(%struct.ident_t*, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_4(%struct.ident_t*, i32, i32, i32*, i32, i32, i32, i32) #0 +; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_4(%struct.ident_t*, i32, i32, i32*, i32, i32, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_4u(%struct.ident_t*, i32, i32, i32*, i32, i32, i32, i32) #0 +; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_4u(%struct.ident_t*, i32, i32, i32*, i32, i32, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_8(%struct.ident_t*, i32, i32, i32*, i64, i64, i64, i64) #0 +; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_8(%struct.ident_t*, i32, i32, i32*, i64, i64, i64, i64) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_8u(%struct.ident_t*, i32, i32, i32*, i64, i64, i64, i64) #0 +; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_8u(%struct.ident_t*, i32, i32, i32*, i64, i64, i64, i64) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dispatch_init_4(%struct.ident_t*, i32, i32, i32, i32, i32, i32) #0 +; CHECK-NEXT: declare void @__kmpc_dispatch_init_4(%struct.ident_t*, i32, i32, i32, i32, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dispatch_init_4u(%struct.ident_t*, i32, i32, i32, i32, i32, i32) #0 +; CHECK-NEXT: declare void @__kmpc_dispatch_init_4u(%struct.ident_t*, i32, i32, i32, i32, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dispatch_init_8(%struct.ident_t*, i32, i32, i64, i64, i64, i64) #0 +; CHECK-NEXT: declare void @__kmpc_dispatch_init_8(%struct.ident_t*, i32, i32, i64, i64, i64, i64) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dispatch_init_8u(%struct.ident_t*, i32, i32, i64, i64, i64, i64) #0 +; CHECK-NEXT: declare void @__kmpc_dispatch_init_8u(%struct.ident_t*, i32, i32, i64, i64, i64, i64) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_4(%struct.ident_t*, i32, i32*, i32*, i32*, i32*) #0 +; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_4(%struct.ident_t*, i32, i32*, i32*, i32*, i32*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_4u(%struct.ident_t*, i32, i32*, i32*, i32*, i32*) #0 +; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_4u(%struct.ident_t*, i32, i32*, i32*, i32*, i32*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_8(%struct.ident_t*, i32, i32*, i64*, i64*, i64*) #0 +; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_8(%struct.ident_t*, i32, i32*, i64*, i64*, i64*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_8u(%struct.ident_t*, i32, i32*, i64*, i64*, i64*) #0 +; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_8u(%struct.ident_t*, i32, i32*, i64*, i64*, i64*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dispatch_fini_4(%struct.ident_t*, i32) #0 +; CHECK-NEXT: declare void @__kmpc_dispatch_fini_4(%struct.ident_t*, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dispatch_fini_4u(%struct.ident_t*, i32) #0 +; CHECK-NEXT: declare void @__kmpc_dispatch_fini_4u(%struct.ident_t*, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dispatch_fini_8(%struct.ident_t*, i32) #0 +; CHECK-NEXT: declare void @__kmpc_dispatch_fini_8(%struct.ident_t*, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dispatch_fini_8u(%struct.ident_t*, i32) #0 +; CHECK-NEXT: declare void @__kmpc_dispatch_fini_8u(%struct.ident_t*, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_omp_task_begin_if0(%struct.ident_t*, i32, i8*) #0 +; CHECK-NEXT: declare void @__kmpc_omp_task_begin_if0(%struct.ident_t*, i32, i8*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_omp_task_complete_if0(%struct.ident_t*, i32, i8*) #0 +; CHECK-NEXT: declare void @__kmpc_omp_task_complete_if0(%struct.ident_t*, i32, i8*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_omp_task_with_deps(%struct.ident_t*, i32, i8*, i32, i8*, i32, i8*) #0 +; CHECK-NEXT: declare i32 @__kmpc_omp_task_with_deps(%struct.ident_t*, i32, i8*, i32, i8*, i32, i8*) -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_omp_wait_deps(%struct.ident_t*, i32, i32, i8*, i32, i8*) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare void @__kmpc_omp_wait_deps(%struct.ident_t*, i32, i32, i8*, i32, i8*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_cancellationpoint(%struct.ident_t*, i32, i32) #0 +; CHECK-NEXT: declare i32 @__kmpc_cancellationpoint(%struct.ident_t*, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_push_num_teams(%struct.ident_t*, i32, i32, i32) #0 +; CHECK-NEXT: declare void @__kmpc_push_num_teams(%struct.ident_t*, i32, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_fork_teams(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) #0 +; CHECK-NEXT: declare void @__kmpc_fork_teams(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_taskloop(%struct.ident_t*, i32, i8*, i32, i64*, i64*, i64, i32, i32, i64, i8*) #0 +; CHECK-NEXT: declare void @__kmpc_taskloop(%struct.ident_t*, i32, i8*, i32, i64*, i64*, i64, i32, i32, i64, i8*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_omp_target_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*, i64) #0 +; CHECK-NEXT: declare i8* @__kmpc_omp_target_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*, i64) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_taskred_modifier_init(%struct.ident_t*, i32, i32, i32, i8*) #0 +; CHECK-NEXT: declare i8* @__kmpc_taskred_modifier_init(%struct.ident_t*, i32, i32, i32, i8*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_taskred_init(i32, i32, i8*) #0 +; CHECK-NEXT: declare i8* @__kmpc_taskred_init(i32, i32, i8*) -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_task_reduction_modifier_fini(%struct.ident_t*, i32, i32) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare void @__kmpc_task_reduction_modifier_fini(%struct.ident_t*, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_copyprivate(%struct.ident_t*, i32, i64, i8*, void (i8*, i8*)*, i32) #0 +; CHECK-NEXT: declare void @__kmpc_copyprivate(%struct.ident_t*, i32, i64, i8*, void (i8*, i8*)*, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_threadprivate_cached(%struct.ident_t*, i32, i8*, i64, i8***) #0 +; CHECK-NEXT: declare i8* @__kmpc_threadprivate_cached(%struct.ident_t*, i32, i8*, i64, i8***) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_threadprivate_register(%struct.ident_t*, i8*, i8* (i8*)*, i8* (i8*, i8*)*, void (i8*)*) #0 +; CHECK-NEXT: declare void @__kmpc_threadprivate_register(%struct.ident_t*, i8*, i8* (i8*)*, i8* (i8*, i8*)*, void (i8*)*) -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_doacross_init(%struct.ident_t*, i32, i32, i8*) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare void @__kmpc_doacross_init(%struct.ident_t*, i32, i32, i8*) -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_doacross_wait(%struct.ident_t*, i32, i64*) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare void @__kmpc_doacross_wait(%struct.ident_t*, i32, i64*) -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_doacross_post(%struct.ident_t*, i32, i64*) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare void @__kmpc_doacross_post(%struct.ident_t*, i32, i64*) -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_doacross_fini(%struct.ident_t*, i32) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare void @__kmpc_doacross_fini(%struct.ident_t*, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_alloc(i32, i64, i8*) #0 +; CHECK-NEXT: declare i8* @__kmpc_alloc(i32, i64, i8*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_free(i32, i8*, i8*) #0 +; CHECK-NEXT: declare void @__kmpc_free(i32, i8*, i8*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_init_allocator(i32, i8*, i32, i8*) #0 +; CHECK-NEXT: declare i8* @__kmpc_init_allocator(i32, i8*, i32, i8*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_destroy_allocator(i32, i8*) #0 +; CHECK-NEXT: declare void @__kmpc_destroy_allocator(i32, i8*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_push_target_tripcount(i64, i64) #0 +; CHECK-NEXT: declare void @__kmpc_push_target_tripcount(i64, i64) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__tgt_target_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**) #0 +; CHECK-NEXT: declare i32 @__tgt_target_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__tgt_target_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**) #0 +; CHECK-NEXT: declare i32 @__tgt_target_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__tgt_target_teams_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32) #0 +; CHECK-NEXT: declare i32 @__tgt_target_teams_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__tgt_target_teams_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32) #0 +; CHECK-NEXT: declare i32 @__tgt_target_teams_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__tgt_register_requires(i64) #0 +; CHECK-NEXT: declare void @__tgt_register_requires(i64) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__tgt_target_data_begin_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0 +; CHECK-NEXT: declare void @__tgt_target_data_begin_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__tgt_target_data_begin_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0 +; CHECK-NEXT: declare void @__tgt_target_data_begin_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__tgt_target_data_end_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0 +; CHECK-NEXT: declare void @__tgt_target_data_end_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__tgt_target_data_end_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0 +; CHECK-NEXT: declare void @__tgt_target_data_end_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__tgt_target_data_update_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0 +; CHECK-NEXT: declare void @__tgt_target_data_update_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__tgt_target_data_update_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0 +; CHECK-NEXT: declare void @__tgt_target_data_update_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i64 @__tgt_mapper_num_components(i8*) #0 +; CHECK-NEXT: declare i64 @__tgt_mapper_num_components(i8*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__tgt_push_mapper_component(i8*, i8*, i8*, i64, i64) #0 +; CHECK-NEXT: declare void @__tgt_push_mapper_component(i8*, i8*, i8*, i64, i64) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_task_allow_completion_event(%struct.ident_t*, i32, i8*) #0 +; CHECK-NEXT: declare i8* @__kmpc_task_allow_completion_event(%struct.ident_t*, i32, i8*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_task_reduction_get_th_data(i32, i8*, i8*) #0 +; CHECK-NEXT: declare i8* @__kmpc_task_reduction_get_th_data(i32, i8*, i8*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_task_reduction_init(i32, i32, i8*) #0 +; CHECK-NEXT: declare i8* @__kmpc_task_reduction_init(i32, i32, i8*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_task_reduction_modifier_init(i8*, i32, i32, i32, i8*) #0 +; CHECK-NEXT: declare i8* @__kmpc_task_reduction_modifier_init(i8*, i32, i32, i32, i8*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_proxy_task_completed_ooo(i8*) #0 +; CHECK-NEXT: declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly ; OPTIMISTIC-NEXT: declare dso_local void @omp_set_num_threads(i32) @@ -1212,52 +1212,52 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC-NEXT: declare dso_local void @omp_set_schedule(i32, i32) ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_threads() #1 +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_threads() ; OPTIMISTIC-NOT: Function Attrs ; OPTIMISTIC: declare dso_local void @use_int(i32) ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_dynamic() #1 +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_dynamic() ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_nested() #1 +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_nested() ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_max_threads() #1 +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_max_threads() ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_thread_num() #1 +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_thread_num() ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_procs() #1 +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_procs() ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_in_parallel() #1 +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_in_parallel() ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_in_final() #1 +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_in_final() ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_active_level() #1 +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_active_level() ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_level() #1 +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_level() ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_ancestor_thread_num(i32) #1 +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_ancestor_thread_num(i32) ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_team_size(i32) #1 +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_team_size(i32) ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_thread_limit() #1 +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_thread_limit() ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_max_active_levels() #1 +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_max_active_levels() ; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn -; OPTIMISTIC-NEXT: declare dso_local void @omp_get_schedule(i32* nocapture writeonly, i32* nocapture writeonly) #2 +; OPTIMISTIC-NEXT: declare dso_local void @omp_get_schedule(i32* nocapture writeonly, i32* nocapture writeonly) ; OPTIMISTIC-NOT: Function Attrs ; OPTIMISTIC: declare dso_local i32 @omp_get_max_task_priority() @@ -1326,7 +1326,7 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: declare dso_local i32 @omp_get_team_num() ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_cancellation() #1 +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_cancellation() ; OPTIMISTIC-NOT: Function Attrs ; OPTIMISTIC: declare dso_local i32 @omp_get_initial_device() @@ -1356,25 +1356,25 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: declare dso_local i32 @omp_get_device_num() ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_proc_bind() #1 +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_proc_bind() ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_places() #1 +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_places() ; OPTIMISTIC-NOT: Function Attrs ; OPTIMISTIC: declare dso_local i32 @omp_get_place_num_procs(i32) ; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind -; OPTIMISTIC-NEXT: declare dso_local void @omp_get_place_proc_ids(i32, i32* nocapture writeonly) #2 +; OPTIMISTIC-NEXT: declare dso_local void @omp_get_place_proc_ids(i32, i32* nocapture writeonly) ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_place_num() #1 +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_place_num() ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_partition_num_places() #1 +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_partition_num_places() ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly -; OPTIMISTIC-NEXT: declare dso_local void @omp_get_partition_place_nums(i32*) #1 +; OPTIMISTIC-NEXT: declare dso_local void @omp_get_partition_place_nums(i32*) ; OPTIMISTIC-NOT: Function Attrs ; OPTIMISTIC: declare dso_local i32 @omp_control_tool(i32, i32, i8*) @@ -1419,7 +1419,7 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: declare dso_local i32 @omp_pause_resource_all(i32) ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_supported_active_levels() #1 +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_supported_active_levels() ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn ; OPTIMISTIC-NEXT: declare i32 @__kmpc_global_thread_num(%struct.ident_t* nocapture nofree readonly) @@ -1427,7 +1427,7 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_fork_call(%struct.ident_t* nocapture nofree readonly, i32, void (i32*, i32*, ...)* nocapture nofree readonly, ...) -; OPTIMISTIC: ; Function Attrs: nounwind +; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_taskwait(%struct.ident_t* nocapture nofree readonly, i32) ; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn @@ -1451,13 +1451,13 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn ; OPTIMISTIC-NEXT: declare void @__kmpc_end_master(%struct.ident_t* nocapture nofree readonly, i32) -; OPTIMISTIC: ; Function Attrs: nounwind +; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_critical(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*) -; OPTIMISTIC: ; Function Attrs: nounwind +; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_critical_with_hint(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*, i32) -; OPTIMISTIC: ; Function Attrs: nounwind +; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_end_critical(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn @@ -1466,22 +1466,22 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn ; OPTIMISTIC-NEXT: declare void @__kmpc_end(%struct.ident_t* nocapture nofree readonly) -; OPTIMISTIC: ; Function Attrs: nounwind +; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare i32 @__kmpc_reduce(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i8* nocapture nofree readonly, void (i8*, i8*)*, [8 x i32]*) -; OPTIMISTIC: ; Function Attrs: nounwind +; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare i32 @__kmpc_reduce_nowait(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i8* nocapture nofree readonly, void (i8*, i8*)*, [8 x i32]*) -; OPTIMISTIC: ; Function Attrs: nounwind +; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_end_reduce(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*) -; OPTIMISTIC: ; Function Attrs: nounwind +; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_end_reduce_nowait(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*) -; OPTIMISTIC: ; Function Attrs: nounwind +; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_ordered(%struct.ident_t* nocapture nofree readonly, i32) -; OPTIMISTIC: ; Function Attrs: nounwind +; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_end_ordered(%struct.ident_t* nocapture nofree readonly, i32) ; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn @@ -1523,10 +1523,10 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn ; OPTIMISTIC-NEXT: declare void @__kmpc_dist_for_static_init_8u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64) -; OPTIMISTIC: ; Function Attrs: nounwind +; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare i32 @__kmpc_single(%struct.ident_t* nocapture nofree readonly, i32) -; OPTIMISTIC: ; Function Attrs: nounwind +; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_end_single(%struct.ident_t* nocapture nofree readonly, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn @@ -1535,10 +1535,10 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn ; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_task(%struct.ident_t* nocapture nofree readonly, i32, i8*) -; OPTIMISTIC: ; Function Attrs: nounwind +; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_end_taskgroup(%struct.ident_t* nocapture nofree readonly, i32) -; OPTIMISTIC: ; Function Attrs: nounwind +; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_taskgroup(%struct.ident_t* nocapture nofree readonly, i32) ; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn @@ -1598,7 +1598,7 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn ; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_task_with_deps(%struct.ident_t* nocapture nofree readonly, i32, i8*, i32, i8* nocapture nofree readonly, i32, i8* nocapture nofree readonly) -; OPTIMISTIC: ; Function Attrs: nounwind +; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_omp_wait_deps(%struct.ident_t* nocapture nofree readonly, i32, i32, i8* nocapture nofree readonly, i32, i8*) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn @@ -1622,7 +1622,7 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn ; OPTIMISTIC-NEXT: declare i8* @__kmpc_taskred_init(i32, i32, i8*) -; OPTIMISTIC: ; Function Attrs: nounwind +; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* nocapture nofree readonly, i32, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn @@ -1634,16 +1634,16 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn ; OPTIMISTIC-NEXT: declare void @__kmpc_threadprivate_register(%struct.ident_t* nocapture nofree readonly, i8*, i8* (i8*)* nocapture nofree readonly, i8* (i8*, i8*)* nocapture nofree readonly, void (i8*)* nocapture nofree readonly) -; OPTIMISTIC: ; Function Attrs: nounwind +; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_init(%struct.ident_t* nocapture nofree readonly, i32, i32, i8*) -; OPTIMISTIC: ; Function Attrs: nounwind +; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_wait(%struct.ident_t* nocapture nofree readonly, i32, i64* nocapture nofree readonly) -; OPTIMISTIC: ; Function Attrs: nounwind +; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_post(%struct.ident_t* nocapture nofree readonly, i32, i64* nocapture nofree readonly) -; OPTIMISTIC: ; Function Attrs: nounwind +; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_fini(%struct.ident_t* nocapture nofree readonly, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn -- 2.7.4