From bd756286d2e774716a12f55db27d070595058d53 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Fri, 8 Jan 2021 10:30:28 -0600 Subject: [PATCH] [OpenMP][FIX] Enforce a function boundary for a new data environment Whenever we enter a new OpenMP data environment we want to enter a function to simplify reasoning. Later we probably want to remove the entire specialization wrt. the if clause and pass the result to the runtime, for now this should fix PR48686. Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D94315 --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 8 ++++++++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 23 ++++++++--------------- clang/test/OpenMP/parallel_if_codegen.cpp | 13 +++++++++++++ 3 files changed, 29 insertions(+), 15 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 7a69fe2..57cc2d6 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -2098,6 +2098,14 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); + + // Ensure we do not inline the function. This is trivially true for the ones + // passed to __kmpc_fork_call but the ones calles in serialized regions + // could be inlined. This is not a perfect but it is closer to the invariant + // we want, namely, every data environment starts with a new function. + // TODO: We should pass the if condition to the runtime function and do the + // handling there. Much cleaner code. + OutlinedFn->addFnAttr(llvm::Attribute::NoInline); RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); // __kmpc_end_serialized_parallel(&Loc, GTid); diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index a3540bd..9c8c7b8 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -1576,11 +1576,6 @@ llvm::Function *CGOpenMPRuntimeGPU::emitParallelOutlinedFunction( auto *OutlinedFun = cast(CGOpenMPRuntime::emitParallelOutlinedFunction( D, ThreadIDVar, InnermostKind, CodeGen)); - if (CGM.getLangOpts().Optimize) { - OutlinedFun->removeFnAttr(llvm::Attribute::NoInline); - OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone); - OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline); - } IsInTargetMasterThreadRegion = PrevIsInTargetMasterThreadRegion; IsInTTDRegion = PrevIsInTTDRegion; if (getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD && @@ -1698,11 +1693,6 @@ llvm::Function *CGOpenMPRuntimeGPU::emitTeamsOutlinedFunction( CodeGen.setAction(Action); llvm::Function *OutlinedFun = CGOpenMPRuntime::emitTeamsOutlinedFunction( D, ThreadIDVar, InnermostKind, CodeGen); - if (CGM.getLangOpts().Optimize) { - OutlinedFun->removeFnAttr(llvm::Attribute::NoInline); - OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone); - OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline); - } return OutlinedFun; } @@ -2102,6 +2092,14 @@ void CGOpenMPRuntimeGPU::emitNonSPMDParallelCall( // Force inline this outlined function at its call site. Fn->setLinkage(llvm::GlobalValue::InternalLinkage); + // Ensure we do not inline the function. This is trivially true for the ones + // passed to __kmpc_fork_call but the ones calles in serialized regions + // could be inlined. This is not a perfect but it is closer to the invariant + // we want, namely, every data environment starts with a new function. + // TODO: We should pass the if condition to the runtime function and do the + // handling there. Much cleaner code. + cast(OutlinedFn)->addFnAttr(llvm::Attribute::NoInline); + Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, /*Name=*/".zero.addr"); CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); @@ -3134,11 +3132,6 @@ static llvm::Function *emitShuffleAndReduceFunction( "_omp_reduction_shuffle_and_reduce_func", &CGM.getModule()); CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); Fn->setDoesNotRecurse(); - if (CGM.getLangOpts().Optimize) { - Fn->removeFnAttr(llvm::Attribute::NoInline); - Fn->removeFnAttr(llvm::Attribute::OptimizeNone); - Fn->addFnAttr(llvm::Attribute::AlwaysInline); - } CodeGenFunction CGF(CGM); CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); diff --git a/clang/test/OpenMP/parallel_if_codegen.cpp b/clang/test/OpenMP/parallel_if_codegen.cpp index 6bb4da9..d72aa57 100644 --- a/clang/test/OpenMP/parallel_if_codegen.cpp +++ b/clang/test/OpenMP/parallel_if_codegen.cpp @@ -7,6 +7,7 @@ // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple %itanium_abi_triple -emit-llvm %s -disable-O0-optnone -o - | FileCheck %s --check-prefix=WOPT // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s @@ -96,14 +97,20 @@ int main() { return tmain(Arg); } +// WOPT: noinline +// WOPT-NOT: optnone // CHECK: define internal {{.*}}void [[CAP_FN4]] // CHECK: call {{.*}}void @{{.+}}fn4 // CHECK: ret void +// WOPT: noinline +// WOPT-NOT: optnone // CHECK: define internal {{.*}}void [[CAP_FN5]] // CHECK: call {{.*}}void @{{.+}}fn5 // CHECK: ret void +// WOPT: noinline +// WOPT-NOT: optnone // CHECK: define internal {{.*}}void [[CAP_FN6]] // CHECK: call {{.*}}void @{{.+}}fn6 // CHECK: ret void @@ -129,14 +136,20 @@ int main() { // CHECK: br label %[[OMP_END]] // CHECK: [[OMP_END]] +// WOPT: noinline +// WOPT-NOT: optnone // CHECK: define internal {{.*}}void [[CAP_FN1]] // CHECK: call {{.*}}void @{{.+}}fn1 // CHECK: ret void +// WOPT: noinline +// WOPT-NOT: optnone // CHECK: define internal {{.*}}void [[CAP_FN2]] // CHECK: call {{.*}}void @{{.+}}fn2 // CHECK: ret void +// WOPT: noinline +// WOPT-NOT: optnone // CHECK: define internal {{.*}}void [[CAP_FN3]] // CHECK: call {{.*}}void @{{.+}}fn3 // CHECK: ret void -- 2.7.4