From e754b18f5e10155673ceff9af9a34b473a802764 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 9 Aug 2017 19:38:53 +0000 Subject: [PATCH] [OPENMP] Emit non-debug version of outlined functions with original name. If the host code is compiled with the debug info, while the target without, there is a problem that the compiler is unable to find the debug wrapper. Patch fixes this problem by emitting special name for the debug version of the code. llvm-svn: 310511 --- clang/lib/CodeGen/CGStmtOpenMP.cpp | 31 +++++++++++----------- clang/test/OpenMP/distribute_codegen.cpp | 2 +- clang/test/OpenMP/parallel_codegen.cpp | 6 +++-- clang/test/OpenMP/parallel_for_simd_codegen.cpp | 1 + .../test/OpenMP/target_parallel_debug_codegen.cpp | 25 +++++++++++------ 5 files changed, 38 insertions(+), 27 deletions(-) diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 565f1f2..4b3a201 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -247,7 +247,7 @@ namespace { /// true if cast to/from UIntPtr is required for variables captured by /// value. const bool UIntPtrCastRequired = true; - /// true if only casted argumefnts must be registered as local args or VLA + /// true if only casted arguments must be registered as local args or VLA /// sizes. const bool RegisterCastedArgsOnly = false; /// Name of the generated function. @@ -261,7 +261,7 @@ namespace { }; } -static std::pair emitOutlinedFunctionPrologue( +static llvm::Function *emitOutlinedFunctionPrologue( CodeGenFunction &CGF, FunctionArgList &Args, llvm::MapVector> &LocalAddrs, @@ -277,7 +277,6 @@ static std::pair emitOutlinedFunctionPrologue( CodeGenModule &CGM = CGF.CGM; ASTContext &Ctx = CGM.getContext(); FunctionArgList TargetArgs; - bool HasUIntPtrArgs = false; Args.append(CD->param_begin(), std::next(CD->param_begin(), CD->getContextParamPosition())); TargetArgs.append( @@ -296,7 +295,6 @@ static std::pair emitOutlinedFunctionPrologue( // outlined function. if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || I->capturesVariableArrayType()) { - HasUIntPtrArgs = true; if (FO.UIntPtrCastRequired) ArgType = Ctx.getUIntPtrType(); } @@ -432,7 +430,7 @@ static std::pair emitOutlinedFunctionPrologue( ++I; } - return {F, HasUIntPtrArgs}; + return F; } llvm::Function * @@ -448,12 +446,15 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { FunctionArgList Args; llvm::MapVector> LocalAddrs; llvm::DenseMap> VLASizes; + SmallString<256> Buffer; + llvm::raw_svector_ostream Out(Buffer); + Out << CapturedStmtInfo->getHelperName(); + if (NeedWrapperFunction) + Out << "_debug__"; FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, - CapturedStmtInfo->getHelperName()); - llvm::Function *F; - bool HasUIntPtrArgs; - std::tie(F, HasUIntPtrArgs) = emitOutlinedFunctionPrologue( - *this, Args, LocalAddrs, VLASizes, CXXThisValue, FO); + Out.str()); + llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs, + VLASizes, CXXThisValue, FO); for (const auto &LocalAddrPair : LocalAddrs) { if (LocalAddrPair.second.first) { setAddrOfLocalVar(LocalAddrPair.second.first, @@ -465,14 +466,12 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { PGO.assignRegionCounters(GlobalDecl(CD), F); CapturedStmtInfo->EmitBody(*this, CD->getBody()); FinishFunction(CD->getBodyRBrace()); - if (!NeedWrapperFunction || !HasUIntPtrArgs) + if (!NeedWrapperFunction) return F; - SmallString<256> Buffer; - llvm::raw_svector_ostream Out(Buffer); - Out << "__nondebug_wrapper_" << CapturedStmtInfo->getHelperName(); FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, - /*RegisterCastedArgsOnly=*/true, Out.str()); + /*RegisterCastedArgsOnly=*/true, + CapturedStmtInfo->getHelperName()); CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); WrapperCGF.disableDebugInfo(); Args.clear(); @@ -480,7 +479,7 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { VLASizes.clear(); llvm::Function *WrapperF = emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes, - WrapperCGF.CXXThisValue, WrapperFO).first; + WrapperCGF.CXXThisValue, WrapperFO); LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); llvm::SmallVector CallArgs; for (const auto *Arg : Args) { diff --git a/clang/test/OpenMP/distribute_codegen.cpp b/clang/test/OpenMP/distribute_codegen.cpp index 37f00f0..4646351 100644 --- a/clang/test/OpenMP/distribute_codegen.cpp +++ b/clang/test/OpenMP/distribute_codegen.cpp @@ -34,7 +34,7 @@ void without_schedule_clause(float *a, float *b, float *c, float *d) { } } -// CHECK: define {{.*}}void @.omp_outlined.(i32* noalias [[GBL_TIDP:%.+]], i32* noalias [[BND_TID:%.+]], float** dereferenceable({{[0-9]+}}) [[APTR:%.+]], float** dereferenceable({{[0-9]+}}) [[BPTR:%.+]], float** dereferenceable({{[0-9]+}}) [[CPTR:%.+]], float** dereferenceable({{[0-9]+}}) [[DPTR:%.+]]) +// CHECK: define {{.*}}void @{{.+}}(i32* noalias [[GBL_TIDP:%.+]], i32* noalias [[BND_TID:%.+]], float** dereferenceable({{[0-9]+}}) [[APTR:%.+]], float** dereferenceable({{[0-9]+}}) [[BPTR:%.+]], float** dereferenceable({{[0-9]+}}) [[CPTR:%.+]], float** dereferenceable({{[0-9]+}}) [[DPTR:%.+]]) // CHECK: [[TID_ADDR:%.+]] = alloca i32* // CHECK: [[IV:%.+iv]] = alloca i32 // CHECK: [[LB:%.+lb]] = alloca i32 diff --git a/clang/test/OpenMP/parallel_codegen.cpp b/clang/test/OpenMP/parallel_codegen.cpp index 23b3237..8bf265c 100644 --- a/clang/test/OpenMP/parallel_codegen.cpp +++ b/clang/test/OpenMP/parallel_codegen.cpp @@ -109,7 +109,7 @@ int main (int argc, char **argv) { // CHECK: call {{.*}}void @{{.+terminate.*|abort}}( // CHECK-NEXT: unreachable // CHECK-NEXT: } -// CHECK-DEBUG: define internal void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i8*** dereferenceable({{4|8}}) %argc) +// CHECK-DEBUG: define internal void [[OMP_OUTLINED_DEBUG:@.+]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i8*** dereferenceable({{4|8}}) %argc) // CHECK-DEBUG: store i8*** %argc, i8**** [[ARGC_PTR_ADDR:%.+]], // CHECK-DEBUG: [[ARGC_REF:%.+]] = load i8***, i8**** [[ARGC_PTR_ADDR]] // CHECK-DEBUG-NEXT: [[ARGC:%.+]] = load i8**, i8*** [[ARGC_REF]] @@ -120,7 +120,9 @@ int main (int argc, char **argv) { // CHECK-DEBUG-NEXT: } // CHECK: define linkonce_odr {{.*}}void [[FOO1]](i8** %argc) -// CHECK-DEBUG: define linkonce_odr void [[FOO1]](i8** %argc) +// CHECK-DEBUG-DAG: define linkonce_odr void [[FOO1]](i8** %argc) +// CHECK-DEBUG-DAG: define internal void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i8*** dereferenceable({{4|8}}) %argc) +// CHECK-DEBUG-DAG: call void [[OMP_OUTLINED_DEBUG]] // CHECK: attributes #[[FN_ATTRS]] = {{.+}} nounwind // CHECK-DEBUG: attributes #[[FN_ATTRS]] = {{.+}} nounwind diff --git a/clang/test/OpenMP/parallel_for_simd_codegen.cpp b/clang/test/OpenMP/parallel_for_simd_codegen.cpp index 06d9635..369ea17 100644 --- a/clang/test/OpenMP/parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/parallel_for_simd_codegen.cpp @@ -114,6 +114,7 @@ void simple(float *a, float *b, float *c, float *d) { int lin = 12; #pragma omp parallel for simd linear(lin : get_val()), linear(g_ptr) +// CHECK: alloca i32, // Init linear private var. // CHECK: [[LIN_VAR:%.+]] = load i32*, i32** % // CHECK: [[LIN_LOAD:%.+]] = load i32, i32* [[LIN_VAR]] diff --git a/clang/test/OpenMP/target_parallel_debug_codegen.cpp b/clang/test/OpenMP/target_parallel_debug_codegen.cpp index 41ef09c..d7b6b89 100644 --- a/clang/test/OpenMP/target_parallel_debug_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_debug_codegen.cpp @@ -58,7 +58,7 @@ int main() { // CHECK: addrspacecast [10 x [10 x [10 x i32]]]* %{{.+}} to [10 x [10 x [10 x i32]]] addrspace(1)* // CHECK: call void [[DEBUG_PARALLEL]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]]* {{[^)]+}}) -// CHECK: define void @__nondebug_wrapper___omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]]* dereferenceable{{[^,]+}}, i64 {{[^,]+}}, [10 x [10 x i32]]* dereferenceable{{[^)]+}}) +// CHECK: define void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]]* dereferenceable{{[^,]+}}, i64 {{[^,]+}}, [10 x [10 x i32]]* dereferenceable{{[^)]+}}) // CHECK: addrspacecast [10 x [10 x [10 x i32]]]* %{{.+}} to [10 x [10 x [10 x i32]]] addrspace(1)* // CHECK: call void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]]* {{[^)]+}}) @@ -76,22 +76,31 @@ int main() { // CHECK: addrspacecast [10 x [10 x i32]]* %{{.+}} to [10 x [10 x i32]] addrspace(1)* // CHECK: call void [[DEBUG_PARALLEL]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* {{[^)]+}}) -// CHECK: define void @__nondebug_wrapper___omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]]* dereferenceable{{[^,]+}}, i64 {{[^,]+}}, [10 x [10 x i32]]* dereferenceable{{[^)]+}}) +// CHECK: define void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]]* dereferenceable{{[^,]+}}, i64 {{[^,]+}}, [10 x [10 x i32]]* dereferenceable{{[^)]+}}) // CHECK: addrspacecast [10 x [10 x [10 x i32]]]* %{{.+}} to [10 x [10 x [10 x i32]]] addrspace(1)* // CHECK: addrspacecast [10 x [10 x i32]]* %{{.+}} to [10 x [10 x i32]] addrspace(1)* // CHECK: call void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* {{[^)]+}}) -// CHECK: define void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* noalias {{[^,]+}}, i32 addrspace(1)* noalias {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* noalias {{[^)]+}}) +// CHECK: define internal void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* noalias {{[^,]+}}, i32 addrspace(1)* noalias {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* noalias {{[^)]+}}) // CHECK: addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* %{{.+}} to [10 x [10 x [10 x i32]]]* // CHECK: addrspacecast i32 addrspace(1)* %{{.+}} to i32* // CHECK: addrspacecast [10 x [10 x i32]] addrspace(1)* %{{.+}} to [10 x [10 x i32]]* -// CHECK: addrspacecast [10 x [10 x [10 x i32]]]* %{{.+}} to [10 x [10 x [10 x i32]]] addrspace(1)* -// CHECK: addrspacecast i32* %{{.+}} to i32 addrspace(1)* -// CHECK: addrspacecast [10 x [10 x i32]]* %{{.+}} to [10 x [10 x i32]] addrspace(1)* -// CHECK: call void [[DEBUG_PARALLEL:@.+]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 addrspace(1)* {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* {{[^)]+}}) +// CHECK: call void [[NONDEBUG_WRAPPER:@.+]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]]* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x i32]]* {{[^)]+}}) -// CHECK: define internal void [[DEBUG_PARALLEL]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* noalias{{[^,]+}}, i32 addrspace(1)* noalias{{[^,]+}}, [10 x [10 x i32]] addrspace(1)* noalias{{[^)]+}}) +// CHECK: define internal void [[DEBUG_PARALLEL:@.+]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* noalias{{[^,]+}}, i32 addrspace(1)* noalias{{[^,]+}}, [10 x [10 x i32]] addrspace(1)* noalias{{[^)]+}}) // CHECK: addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* %{{.+}} to [10 x [10 x [10 x i32]]]* // CHECK: addrspacecast i32 addrspace(1)* %{{.+}} to i32* // CHECK: addrspacecast [10 x [10 x i32]] addrspace(1)* %{{.+}} to [10 x [10 x i32]]* +// CHECK: define internal void [[NONDEBUG_WRAPPER]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]]* dereferenceable{{[^,]+}}, i32* dereferenceable{{[^,]+}}, [10 x [10 x i32]]* dereferenceable{{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]]* %{{.+}} to [10 x [10 x [10 x i32]]] addrspace(1)* +// CHECK: addrspacecast i32* %{{.+}} to i32 addrspace(1)* +// CHECK: addrspacecast [10 x [10 x i32]]* %{{.+}} to [10 x [10 x i32]] addrspace(1)* +// CHECK: call void [[DEBUG_PARALLEL]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 addrspace(1)* {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* {{[^)]+}}) + +// CHECK: define void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]]* dereferenceable{{[^,]+}}, i32* dereferenceable{{[^,]+}}, [10 x [10 x i32]]* dereferenceable{{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]]* %{{.+}} to [10 x [10 x [10 x i32]]] addrspace(1)* +// CHECK: addrspacecast i32* %{{.+}} to i32 addrspace(1)* +// CHECK: addrspacecast [10 x [10 x i32]]* %{{.+}} to [10 x [10 x i32]] addrspace(1)* +// CHECK: call void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 addrspace(1)* {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* {{[^)]+}}) + -- 2.7.4